--- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c 2006/04/24 19:10:17 +++ //depot/user/rwatson/proto/src/sys/kern/uipc_usrreq.c 2006/05/06 10:02:49 @@ -88,32 +88,99 @@ struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); /* - * Currently, UNIX domain sockets are protected by a single subsystem lock, - * which covers global data structures and variables, the contents of each - * per-socket unpcb structure, and the so_pcb field in sockets attached to - * the UNIX domain. This provides for a moderate degree of paralellism, as - * receive operations on UNIX domain sockets do not need to acquire the - * subsystem lock. Finer grained locking to permit send() without acquiring - * a global lock would be a logical next step. + * Both send and receive buffers are allocated PIPSIZ bytes of buffering + * for stream sockets, although the total for sender and receiver is + * actually only PIPSIZ. + * Datagram sockets really use the sendspace as the maximum datagram size, + * and don't really want to reserve the sendspace. Their recvspace should + * be large enough for at least one max-size datagram plus address. + */ +#ifndef PIPSIZ +#define PIPSIZ 8192 +#endif +static u_long unpst_sendspace = PIPSIZ; +static u_long unpst_recvspace = PIPSIZ; +static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ +static u_long unpdg_recvspace = 4*1024; + +static int unp_rights; /* file descriptors in flight */ + +SYSCTL_DECL(_net_local_stream); +SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, + &unpst_sendspace, 0, ""); +SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, + &unpst_recvspace, 0, ""); +SYSCTL_DECL(_net_local_dgram); +SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, + &unpdg_sendspace, 0, ""); +SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, + &unpdg_recvspace, 0, ""); +SYSCTL_DECL(_net_local); +SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); + +/* + * Locking and synchronization: + * + * A global UNIX domain socket mutex protects all global variables in the + * implementation, as well as the linked lists tracking the set of allocated + * UNIX domain sockets. These variables/fields may be read lockless using + * atomic operations if stale values are permissible; otherwise the global + * mutex is required to read or read-modify-write. The global mutex also + * serves to prevent deadlock when multiple PCB locks may be acquired at once + * (see below). Finally, the global mutex protects uncounted references from + * vnodes to sockets bound to those vnodes: to safely dereference the + * v_socket pointer, the global mutex must be held while a full reference is + * acquired. + * + * UNIX domain sockets each have one unpcb PCB associated with them from + * pru_attach() to pru_detach() via the so_pcb pointer. The validity of that + * reference is an invariant for the lifetime of the socket, so no lock is + * required to dereference the so_pcb pointer if a valid socket reference is + * held. + * + * Each PCB has a back-pointer to its socket, unp_socket. This pointer may + * only be safely dereferenced as long as a valid reference to the PCB is + * held. Typically, this reference will be from the socket, or from another + * PCB when the referring PCB's lock is held (in order that the reference not + * be invalidated during use). In particular, to follow + * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn. + * + * Fields of PCBs are locked using a per-unpcb lock, unp_mtx. Individual + * atomic reads without the lock may be performed "lockless", but more + * complex reads and read-modify-writes require the mutex to be held. No + * lock order is defined between PCB locks -- multiple PCB locks may be + * acquired at the same time only when holding the global UNIX domain socket + * mutex, which prevents deadlocks. To prevent inter-PCB references from + * becoming invalid, the lock protecting the reference must be held for the + * lifetime of use of the reference. * - * The UNIX domain socket lock preceds all socket layer locks, including the - * socket lock and socket buffer lock, permitting UNIX domain socket code to - * call into socket support routines without releasing its locks. + * Blocking with UNIX domain sockets is a tricky issue: unlike most network + * protocols, bind() is a non-atomic operation, and connect() requires + * potential sleeping in the protocol, due to potentially waiting on local or + * distributed file systems. We try to separate "lookup" operations, which + * may sleep, and the IPC operations themselves, which typically can occur + * with relative atomicity as locks can be held over the entire operation. * - * Some caution is required in areas where the UNIX domain socket code enters - * VFS in order to create or find rendezvous points. This results in - * dropping of the UNIX domain socket subsystem lock, acquisition of the - * Giant lock, and potential sleeping. This increases the chances of races, - * and exposes weaknesses in the socket->protocol API by offering poor - * failure modes. + * Another tricky issue is simultaneous multi-threaded or multi-process + * access to a single UNIX domain socket. These are handled by the flags + * UNP_CONNECTING and UNP_BINDING. */ -static struct mtx unp_mtx; -#define UNP_LOCK_INIT() \ - mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) -#define UNP_LOCK() mtx_lock(&unp_mtx) -#define UNP_UNLOCK() mtx_unlock(&unp_mtx) -#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) -#define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED) +static struct mtx unp_global_mtx; + +#define UNP_GLOBAL_LOCK_INIT() mtx_init(&unp_global_mtx, \ + "unp_global_mtx", NULL, MTX_DEF) +#define UNP_GLOBAL_LOCK() mtx_lock(&unp_global_mtx) +#define UNP_GLOBAL_UNLOCK() mtx_unlock(&unp_global_mtx) +#define UNP_GLOBAL_UNLOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_NOTOWNED) +#define UNP_GLOBAL_LOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_OWNED) + +#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ + "unp_mtx", "unp_mtx", \ + MTX_DUPOK|MTX_DEF|MTX_RECURSE) +#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) +#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) +#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) +#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) /* * Garbage collection of cyclic file descriptor/socket references occurs @@ -123,12 +190,10 @@ */ static struct task unp_gc_task; -static int unp_attach(struct socket *); static void unp_detach(struct unpcb *); -static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); static int unp_connect(struct socket *,struct sockaddr *, struct thread *); static int unp_connect2(struct socket *so, struct socket *so2, int); -static void unp_disconnect(struct unpcb *); +static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); @@ -137,8 +202,6 @@ static void unp_discard(struct file *); static void unp_freerights(struct file **, int); static int unp_internalize(struct mbuf **, struct thread *); -static int unp_listen(struct socket *, struct unpcb *, int, - struct thread *); static void uipc_abort(struct socket *so) @@ -147,83 +210,238 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); unp_drop(unp, ECONNABORTED); unp_detach(unp); - UNP_UNLOCK_ASSERT(); + UNP_GLOBAL_UNLOCK_ASSERT(); } static int uipc_accept(struct socket *so, struct sockaddr **nam) { - struct unpcb *unp; + struct unpcb *unp, *unp2; const struct sockaddr *sa; /* - * Pass back name of connected socket, - * if it was bound and we are still connected - * (our peer may have closed already!). + * Pass back name of connected socket, if it was bound and we are + * still connected (our peer may have closed already!). */ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); - UNP_LOCK(); - if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); + unp2 = unp->unp_conn; + if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) { + UNP_PCB_LOCK(unp2); sa = (struct sockaddr *) unp->unp_conn->unp_addr; - else + bcopy(sa, *nam, sa->sa_len); + UNP_PCB_UNLOCK(unp2); + } else { sa = &sun_noname; - bcopy(sa, *nam, sa->sa_len); - UNP_UNLOCK(); + bcopy(sa, *nam, sa->sa_len); + } + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); return (0); } static int uipc_attach(struct socket *so, int proto, struct thread *td) { + u_long sendspace, recvspace; + struct unpcb *unp; + int error; + + KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + switch (so->so_type) { + case SOCK_STREAM: + sendspace = unpst_sendspace; + recvspace = unpst_recvspace; + break; + + case SOCK_DGRAM: + sendspace = unpdg_sendspace; + recvspace = unpdg_recvspace; + break; + + default: + panic("uipc_attach"); + } + error = soreserve(so, sendspace, recvspace); + if (error) + return (error); + } + unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO); + if (unp == NULL) + return (ENOBUFS); + LIST_INIT(&unp->unp_refs); + UNP_PCB_LOCK_INIT(unp); + unp->unp_socket = so; + so->so_pcb = unp; + + UNP_GLOBAL_LOCK(); + unp->unp_gencnt = ++unp_gencnt; + unp_count++; + LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead + : &unp_shead, unp, unp_link); + UNP_GLOBAL_UNLOCK(); - return (unp_attach(so)); + return (0); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { + struct sockaddr_un *soun = (struct sockaddr_un *)nam; + struct vnode *vp; + struct mount *mp; + struct vattr vattr; + int error, namelen; + struct nameidata nd; struct unpcb *unp; - int error; + char *buf; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); - UNP_LOCK(); - error = unp_bind(unp, nam, td); - UNP_UNLOCK(); + + namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); + if (namelen <= 0) + return (EINVAL); + + /* + * We don't allow simultaneous bind() calls on a single UNIX domain + * socket, so flag in-progress operations, and return an error if an + * operation is already in progress. + * + * Historically, we have not allowed a socket to be rebound, so this + * also returns an error. Not allowing re-binding certainly + * simplifies the implementation and avoids a great many possible + * failure modes. + */ + UNP_PCB_LOCK(unp); + if (unp->unp_vnode != NULL) { + UNP_PCB_UNLOCK(unp); + return (EINVAL); + } + if (unp->unp_flags & UNP_BINDING) { + UNP_PCB_UNLOCK(unp); + return (EALREADY); + } + unp->unp_flags |= UNP_BINDING; + UNP_PCB_UNLOCK(unp); + + buf = malloc(namelen + 1, M_TEMP, M_WAITOK); + strlcpy(buf, soun->sun_path, namelen + 1); + + mtx_lock(&Giant); +restart: + mtx_assert(&Giant, MA_OWNED); + NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, + buf, td); +/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ + error = namei(&nd); + if (error) + goto error; + vp = nd.ni_vp; + if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULL) { + vrele(vp); + error = EADDRINUSE; + goto error; + } + error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); + if (error) + goto error; + goto restart; + } + VATTR_NULL(&vattr); + vattr.va_type = VSOCK; + vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); +#ifdef MAC + error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, + &vattr); +#endif + if (error == 0) { + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if (error) { + vn_finished_write(mp); + goto error; + } + vp = nd.ni_vp; + ASSERT_VOP_LOCKED(vp, "uipc_bind"); + soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); + + /* + * XXXRW: handle race against another consumer also frobbing + * v_socket? Or not. + */ + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); + vp->v_socket = unp->unp_socket; + unp->unp_vnode = vp; + unp->unp_addr = soun; + unp->unp_flags &= ~UNP_BINDING; + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + mtx_unlock(&Giant); + free(buf, M_TEMP); + return (0); + +error: + UNP_PCB_LOCK(unp); + unp->unp_flags &= ~UNP_BINDING; + UNP_PCB_UNLOCK(unp); + mtx_unlock(&Giant); + free(buf, M_TEMP); return (error); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { - struct unpcb *unp; int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); - unp = sotounpcb(so); - KASSERT(unp != NULL, ("uipc_connect: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); error = unp_connect(so, nam, td); - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); return (error); } int uipc_connect2(struct socket *so1, struct socket *so2) { - struct unpcb *unp; + struct unpcb *unp, *unp2; int error; - unp = sotounpcb(so1); + UNP_GLOBAL_LOCK(); + unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); - UNP_LOCK(); + UNP_PCB_LOCK(unp); + unp2 = so2->so_pcb; + KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); + UNP_PCB_LOCK(unp2); error = unp_connect2(so1, so2, PRU_CONNECT2); - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp2); + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); return (error); } @@ -236,21 +454,29 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); unp_detach(unp); - UNP_UNLOCK_ASSERT(); + UNP_GLOBAL_UNLOCK_ASSERT(); } static int uipc_disconnect(struct socket *so) { - struct unpcb *unp; + struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); - UNP_LOCK(); - unp_disconnect(unp); - UNP_UNLOCK(); + + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); + unp2 = unp->unp_conn; + UNP_PCB_LOCK(unp2); + unp_disconnect(unp, unp2); + UNP_PCB_UNLOCK(unp2); + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); return (0); } @@ -262,81 +488,105 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); - UNP_LOCK(); + + UNP_PCB_LOCK(unp); if (unp->unp_vnode == NULL) { - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); return (EINVAL); } - error = unp_listen(so, unp, backlog, td); - UNP_UNLOCK(); + + SOCK_LOCK(so); + error = solisten_proto_check(so); + if (error == 0) { + cru2x(td->td_ucred, &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPCCACHED; + solisten_proto(so, backlog); + } + SOCK_UNLOCK(so); + UNP_PCB_UNLOCK(unp); return (error); } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { - struct unpcb *unp; + struct unpcb *unp, *unp2; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); - UNP_LOCK(); - if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) - sa = (struct sockaddr *) unp->unp_conn->unp_addr; - else { - /* - * XXX: It seems that this test always fails even when - * connection is established. So, this else clause is - * added as workaround to return PF_LOCAL sockaddr. - */ + UNP_PCB_LOCK(unp); + /* + * XXX: It seems that this test always fails even when connection is + * established. So, this else clause is added as workaround to + * return PF_LOCAL sockaddr. + */ + unp2 = unp->unp_conn; + if (unp2 != NULL) { + UNP_PCB_LOCK(unp2); + if (unp2->unp_addr != NULL) + sa = (struct sockaddr *) unp->unp_conn->unp_addr; + else + sa = &sun_noname; + bcopy(sa, *nam, sa->sa_len); + UNP_PCB_UNLOCK(unp2); + } else { sa = &sun_noname; + bcopy(sa, *nam, sa->sa_len); } - bcopy(sa, *nam, sa->sa_len); - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); return (0); } static int uipc_rcvd(struct socket *so, int flags) { - struct unpcb *unp; + struct unpcb *unp, *unp2; struct socket *so2; u_long newhiwat; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); - UNP_LOCK(); - switch (so->so_type) { - case SOCK_DGRAM: + + if (so->so_type == SOCK_DGRAM) panic("uipc_rcvd DGRAM?"); - /*NOTREACHED*/ - case SOCK_STREAM: - if (unp->unp_conn == NULL) - break; - so2 = unp->unp_conn->unp_socket; - SOCKBUF_LOCK(&so2->so_snd); - SOCKBUF_LOCK(&so->so_rcv); - /* - * Adjust backpressure on sender - * and wakeup any waiting to write. - */ - so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; - unp->unp_mbcnt = so->so_rcv.sb_mbcnt; - newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - - so->so_rcv.sb_cc; - (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, - newhiwat, RLIM_INFINITY); - unp->unp_cc = so->so_rcv.sb_cc; - SOCKBUF_UNLOCK(&so->so_rcv); - sowwakeup_locked(so2); - break; + if (so->so_type != SOCK_STREAM) + panic("uipc_rcvd unknown socktype"); - default: - panic("uipc_rcvd unknown socktype"); + /* + * Adjust backpressure on sender and wakeup any waiting to write. + * + * The consistency requirements here are a bit complex: we must + * acquire the lock for our own unpcb in order to prevent it from + * disconnecting while in use, changing the unp_conn peer. We do not + * need unp2's lock, since the unp2->unp_socket pointer will remain + * static as long as the unp2 pcb is valid, which it will be until we + * release unp's lock to allow a disconnect. We do need socket + * mutexes for both socket endpoints since we manipulate fields in + * both; we hold both locks at once since we access both + * simultaneously. + */ + UNP_PCB_LOCK(unp); + unp2 = unp->unp_conn; + if (unp2 == NULL) { + UNP_PCB_UNLOCK(unp); + return (0); } - UNP_UNLOCK(); + so2 = unp2->unp_socket; + SOCKBUF_LOCK(&so2->so_snd); + SOCKBUF_LOCK(&so->so_rcv); + so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; + unp->unp_mbcnt = so->so_rcv.sb_mbcnt; + newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - so->so_rcv.sb_cc; + (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, + newhiwat, RLIM_INFINITY); + unp->unp_cc = so->so_rcv.sb_cc; + SOCKBUF_UNLOCK(&so->so_rcv); + sowwakeup_locked(so2); + UNP_PCB_UNLOCK(unp); return (0); } @@ -346,13 +596,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { - int error = 0; - struct unpcb *unp; + struct unpcb *unp, *unp2; struct socket *so2; u_long newhiwat; + int error = 0; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_send: unp == NULL")); + if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; @@ -361,32 +612,38 @@ if (control != NULL && (error = unp_internalize(&control, td))) goto release; - UNP_LOCK(); + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); switch (so->so_type) { case SOCK_DGRAM: { const struct sockaddr *from; + unp2 = unp->unp_conn; if (nam != NULL) { - if (unp->unp_conn != NULL) { + if (unp2 != NULL) { error = EISCONN; break; } + UNP_PCB_UNLOCK(unp); error = unp_connect(so, nam, td); + UNP_PCB_LOCK(unp); if (error) break; + unp2 = unp->unp_conn; } else { - if (unp->unp_conn == NULL) { + if (unp2 == NULL) { error = ENOTCONN; break; } } - so2 = unp->unp_conn->unp_socket; + UNP_PCB_LOCK(unp2); + so2 = unp2->unp_socket; if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; - if (unp->unp_conn->unp_flags & UNP_WANTCRED) + if (unp2->unp_flags & UNP_WANTCRED) control = unp_addsockcred(td, control); SOCKBUF_LOCK(&so2->so_rcv); if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { @@ -398,19 +655,22 @@ error = ENOBUFS; } if (nam != NULL) - unp_disconnect(unp); + unp_disconnect(unp, unp2); + UNP_PCB_UNLOCK(unp2); break; } case SOCK_STREAM: /* Connect if not connected yet. */ /* - * Note: A better implementation would complain - * if not equal to the peer's address. + * Note: A better implementation would complain if not equal + * to the peer's address. */ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { + UNP_PCB_UNLOCK(unp); error = unp_connect(so, nam, td); + UNP_PCB_LOCK(unp); if (error) break; /* XXX */ } else { @@ -419,22 +679,34 @@ } } + /* + * Lock order here has to be handled carefully: we hold the + * global lock, so acquiring two unpcb locks is OK. We must + * acquire both before acquiring any socket mutexes. We must + * also acquire the local socket send mutex before the remote + * socket receive mutex. The only tricky thing is making + * sure to acquire the unp2 lock before the local socket send + * lock, or we will experience deadlocks. + */ + unp2 = unp->unp_conn; + KASSERT(unp2 != NULL, + ("uipc_send connected but no connection?")); + UNP_PCB_LOCK(unp2); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); + UNP_PCB_UNLOCK(unp2); error = EPIPE; break; } - if (unp->unp_conn == NULL) - panic("uipc_send connected but no connection?"); - so2 = unp->unp_conn->unp_socket; + so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); - if (unp->unp_conn->unp_flags & UNP_WANTCRED) { + if (unp2->unp_flags & UNP_WANTCRED) { /* * Credentials are passed only once on * SOCK_STREAM. */ - unp->unp_conn->unp_flags &= ~UNP_WANTCRED; + unp2->unp_flags &= ~UNP_WANTCRED; control = unp_addsockcred(td, control); } /* @@ -445,19 +717,19 @@ if (control != NULL) { if (sbappendcontrol_locked(&so2->so_rcv, m, control)) control = NULL; - } else { + } else sbappend_locked(&so2->so_rcv, m); - } so->so_snd.sb_mbmax -= so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; - unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; + unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; newhiwat = so->so_snd.sb_hiwat - - (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); + (so2->so_rcv.sb_cc - unp2->unp_cc); (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, newhiwat, RLIM_INFINITY); SOCKBUF_UNLOCK(&so->so_snd); - unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; + unp2->unp_cc = so2->so_rcv.sb_cc; sorwakeup_locked(so2); + UNP_PCB_UNLOCK(unp2); m = NULL; break; @@ -473,7 +745,8 @@ socantsendmore(so); unp_shutdown(unp); } - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); if (control != NULL && error != 0) unp_dispose(control); @@ -489,22 +762,28 @@ static int uipc_sense(struct socket *so, struct stat *sb) { - struct unpcb *unp; + struct unpcb *unp, *unp2; struct socket *so2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); sb->st_blksize = so->so_snd.sb_hiwat; - if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { - so2 = unp->unp_conn->unp_socket; + unp2 = unp->unp_conn; + if (so->so_type == SOCK_STREAM && unp2 != NULL) { + UNP_PCB_LOCK(unp2); + so2 = unp2->unp_socket; sb->st_blksize += so2->so_rcv.sb_cc; + UNP_PCB_UNLOCK(unp2); } sb->st_dev = NODEV; if (unp->unp_ino == 0) unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); return (0); } @@ -515,10 +794,13 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); + UNP_GLOBAL_UNLOCK(); return (0); } @@ -530,14 +812,15 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); - UNP_LOCK(); + UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) sa = (struct sockaddr *) unp->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); - UNP_UNLOCK(); + UNP_PCB_UNLOCK(unp); return (0); } @@ -574,12 +857,13 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); - UNP_LOCK(); + error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case LOCAL_PEERCRED: + UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_HAVEPC) xu = unp->unp_peercred; else { @@ -588,22 +872,31 @@ else error = EINVAL; } + UNP_PCB_UNLOCK(unp); if (error == 0) error = sooptcopyout(sopt, &xu, sizeof(xu)); break; + case LOCAL_CREDS: + UNP_PCB_LOCK(unp); optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; + UNP_PCB_UNLOCK(unp); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; + case LOCAL_CONNWAIT: + UNP_PCB_LOCK(unp); optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; + UNP_PCB_UNLOCK(unp); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; + default: error = EOPNOTSUPP; break; } break; + case SOPT_SET: switch (sopt->sopt_name) { case LOCAL_CREDS: @@ -613,19 +906,24 @@ if (error) break; -#define OPTSET(bit) \ - if (optval) \ - unp->unp_flags |= bit; \ - else \ - unp->unp_flags &= ~bit; +#define OPTSET(bit) do { \ + UNP_PCB_LOCK(unp); \ + if (optval) \ + unp->unp_flags |= bit; \ + else \ + unp->unp_flags &= ~bit; \ + UNP_PCB_UNLOCK(unp); \ +} while (0) switch (sopt->sopt_name) { case LOCAL_CREDS: OPTSET(UNP_WANTCRED); break; + case LOCAL_CONNWAIT: OPTSET(UNP_CONNWAIT); break; + default: break; } @@ -636,117 +934,60 @@ break; } break; + default: error = EOPNOTSUPP; break; } - UNP_UNLOCK(); return (error); } -/* - * Both send and receive buffers are allocated PIPSIZ bytes of buffering - * for stream sockets, although the total for sender and receiver is - * actually only PIPSIZ. - * Datagram sockets really use the sendspace as the maximum datagram size, - * and don't really want to reserve the sendspace. Their recvspace should - * be large enough for at least one max-size datagram plus address. - */ -#ifndef PIPSIZ -#define PIPSIZ 8192 -#endif -static u_long unpst_sendspace = PIPSIZ; -static u_long unpst_recvspace = PIPSIZ; -static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ -static u_long unpdg_recvspace = 4*1024; - -static int unp_rights; /* file descriptors in flight */ - -SYSCTL_DECL(_net_local_stream); -SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, - &unpst_sendspace, 0, ""); -SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, - &unpst_recvspace, 0, ""); -SYSCTL_DECL(_net_local_dgram); -SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, - &unpdg_sendspace, 0, ""); -SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, - &unpdg_recvspace, 0, ""); -SYSCTL_DECL(_net_local); -SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); - -static int -unp_attach(struct socket *so) -{ - struct unpcb *unp; - int error; - - KASSERT(so->so_pcb == NULL, ("unp_attach: so_pcb != NULL")); - if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { - switch (so->so_type) { - - case SOCK_STREAM: - error = soreserve(so, unpst_sendspace, unpst_recvspace); - break; - - case SOCK_DGRAM: - error = soreserve(so, unpdg_sendspace, unpdg_recvspace); - break; - - default: - panic("unp_attach"); - } - if (error) - return (error); - } - unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO); - if (unp == NULL) - return (ENOBUFS); - LIST_INIT(&unp->unp_refs); - unp->unp_socket = so; - so->so_pcb = unp; - - UNP_LOCK(); - unp->unp_gencnt = ++unp_gencnt; - unp_count++; - LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead - : &unp_shead, unp, unp_link); - UNP_UNLOCK(); - - return (0); -} - static void unp_detach(struct unpcb *unp) { + int local_unp_rights; struct vnode *vp; - int local_unp_rights; + struct unpcb *unp2; - UNP_LOCK_ASSERT(); + UNP_GLOBAL_LOCK_ASSERT(); + UNP_PCB_LOCK_ASSERT(unp); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; + + /* + * XXXRW: What if v_socket != our soket? + */ if ((vp = unp->unp_vnode) != NULL) { - /* - * XXXRW: should v_socket be frobbed only while holding - * Giant? - */ unp->unp_vnode->v_socket = NULL; unp->unp_vnode = NULL; } - if (unp->unp_conn != NULL) - unp_disconnect(unp); + unp2 = unp->unp_conn; + if (unp2 != NULL) { + UNP_PCB_LOCK(unp2); + unp_disconnect(unp, unp2); + UNP_PCB_UNLOCK(unp2); + } + + /* + * We hold the global lock, so it's OK to acquire multiple pcb locks + * at a time. + */ while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); + + UNP_PCB_LOCK(ref); unp_drop(ref, ECONNRESET); + UNP_PCB_UNLOCK(ref); } + UNP_GLOBAL_UNLOCK(); soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = NULL; local_unp_rights = unp_rights; - UNP_UNLOCK(); if (unp->unp_addr != NULL) FREE(unp->unp_addr, M_SONAME); + UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); if (vp) { int vfslocked; @@ -760,97 +1001,6 @@ } static int -unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) -{ - struct sockaddr_un *soun = (struct sockaddr_un *)nam; - struct vnode *vp; - struct mount *mp; - struct vattr vattr; - int error, namelen; - struct nameidata nd; - char *buf; - - UNP_LOCK_ASSERT(); - - /* - * XXXRW: This test-and-set of unp_vnode is non-atomic; the - * unlocked read here is fine, but the value of unp_vnode needs - * to be tested again after we do all the lookups to see if the - * pcb is still unbound? - */ - if (unp->unp_vnode != NULL) - return (EINVAL); - - namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); - if (namelen <= 0) - return (EINVAL); - - UNP_UNLOCK(); - - buf = malloc(namelen + 1, M_TEMP, M_WAITOK); - strlcpy(buf, soun->sun_path, namelen + 1); - - mtx_lock(&Giant); -restart: - mtx_assert(&Giant, MA_OWNED); - NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, - buf, td); -/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ - error = namei(&nd); - if (error) - goto done; - vp = nd.ni_vp; - if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULL) { - vrele(vp); - error = EADDRINUSE; - goto done; - } - error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); - if (error) - goto done; - goto restart; - } - VATTR_NULL(&vattr); - vattr.va_type = VSOCK; - vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); -#ifdef MAC - error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, - &vattr); -#endif - if (error == 0) { - VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - } - NDFREE(&nd, NDF_ONLY_PNBUF); - vput(nd.ni_dvp); - if (error) { - vn_finished_write(mp); - goto done; - } - vp = nd.ni_vp; - ASSERT_VOP_LOCKED(vp, "unp_bind"); - soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); - UNP_LOCK(); - vp->v_socket = unp->unp_socket; - unp->unp_vnode = vp; - unp->unp_addr = soun; - UNP_UNLOCK(); - VOP_UNLOCK(vp, 0, td); - vn_finished_write(mp); -done: - mtx_unlock(&Giant); - free(buf, M_TEMP); - UNP_LOCK(); - return (error); -} - -static int unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; @@ -862,15 +1012,25 @@ char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; - UNP_LOCK_ASSERT(); + UNP_GLOBAL_LOCK_ASSERT(); + UNP_GLOBAL_UNLOCK(); unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); + len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return (EINVAL); strlcpy(buf, soun->sun_path, len + 1); - UNP_UNLOCK(); + + UNP_PCB_LOCK(unp); + if (unp->unp_flags & UNP_CONNECTING) { + UNP_PCB_UNLOCK(unp); + return (EALREADY); + } + unp->unp_flags |= UNP_CONNECTING; + UNP_PCB_UNLOCK(unp); + sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); mtx_lock(&Giant); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); @@ -892,9 +1052,15 @@ if (error) goto bad; mtx_unlock(&Giant); - UNP_LOCK(); + unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); + + /* + * Lock global lock for two reasons: make sure v_socket is stable, + * and to protect simultaneous locking of multiple pcbs. + */ + UNP_GLOBAL_LOCK(); so2 = vp->v_socket; if (so2 == NULL) { error = ECONNREFUSED; @@ -907,14 +1073,18 @@ if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (so2->so_options & SO_ACCEPTCONN) { /* - * NB: drop locks here so unp_attach is entered + * NB: drop locks here so uipc_attach is entered * w/o locks; this avoids a recursive lock * of the head and holding sleep locks across * a (potentially) blocking malloc. + * + * XXXRW: This is actually a non-blocking alloc. + * Lock order issue is real. Releasing lock here + * may invalidate so2 pointer, however. */ - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); so3 = sonewconn(so2, 0); - UNP_LOCK(); + UNP_GLOBAL_LOCK(); } else so3 = NULL; if (so3 == NULL) { @@ -924,6 +1094,9 @@ unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); + UNP_PCB_LOCK(unp); + UNP_PCB_LOCK(unp2); + UNP_PCB_LOCK(unp3); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; @@ -941,7 +1114,7 @@ /* * The receiver's (server's) credentials are copied * from the unp_peercred member of socket on which the - * former called listen(); unp_listen() cached that + * former called listen(); uipc_listen() cached that * process's credentials at that time so we can use * them now. */ @@ -952,6 +1125,9 @@ unp->unp_flags |= UNP_HAVEPC; if (unp2->unp_flags & UNP_WANTCRED) unp3->unp_flags |= UNP_WANTCRED; + UNP_PCB_UNLOCK(unp3); + UNP_PCB_UNLOCK(unp2); + UNP_PCB_UNLOCK(unp); #ifdef MAC SOCK_LOCK(so); mac_set_socket_peer_from_socket(so, so3); @@ -961,9 +1137,17 @@ so2 = so3; } + unp = sotounpcb(so); + KASSERT(unp != NULL, ("unp_connect: unp == NULL")); + unp2 = sotounpcb(so2); + KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); + UNP_PCB_LOCK(unp); + UNP_PCB_LOCK(unp2); error = unp_connect2(so, so2, PRU_CONNECT); + UNP_PCB_UNLOCK(unp2); + UNP_PCB_UNLOCK(unp); bad2: - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); mtx_lock(&Giant); bad: mtx_assert(&Giant, MA_OWNED); @@ -971,25 +1155,33 @@ vput(vp); mtx_unlock(&Giant); free(sa, M_SONAME); - UNP_LOCK(); + UNP_GLOBAL_LOCK(); + UNP_PCB_LOCK(unp); + unp->unp_flags &= ~UNP_CONNECTING; + UNP_PCB_UNLOCK(unp); return (error); } static int unp_connect2(struct socket *so, struct socket *so2, int req) { - struct unpcb *unp = sotounpcb(so); + struct unpcb *unp; struct unpcb *unp2; - UNP_LOCK_ASSERT(); + unp = sotounpcb(so); + KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); + unp2 = sotounpcb(so2); + KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); + + UNP_GLOBAL_LOCK_ASSERT(); + UNP_PCB_LOCK_ASSERT(unp); + UNP_PCB_LOCK_ASSERT(unp2); if (so2->so_type != so->so_type) return (EPROTOTYPE); - unp2 = sotounpcb(so2); - KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); unp->unp_conn = unp2; + switch (so->so_type) { - case SOCK_DGRAM: LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); soisconnected(so); @@ -1012,15 +1204,16 @@ } static void -unp_disconnect(struct unpcb *unp) +unp_disconnect(struct unpcb *unp, struct unpcb *unp2) { - struct unpcb *unp2 = unp->unp_conn; struct socket *so; - UNP_LOCK_ASSERT(); + KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); + + UNP_GLOBAL_LOCK_ASSERT(); + UNP_PCB_LOCK_ASSERT(unp); + UNP_PCB_LOCK_ASSERT(unp2); - if (unp2 == NULL) - return; unp->unp_conn = NULL; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: @@ -1039,16 +1232,6 @@ } } -#ifdef notdef -void -unp_abort(struct unpcb *unp) -{ - - unp_detach(unp); - UNP_UNLOCK_ASSERT(); -} -#endif - /* * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers @@ -1087,10 +1270,10 @@ * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); - UNP_LOCK(); + UNP_GLOBAL_LOCK(); gencnt = unp_gencnt; n = unp_count; - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; @@ -1104,23 +1287,34 @@ unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); - UNP_LOCK(); + /* + * XXXRW: Note, this code relies very explicitly in pcb's being type + * stable. + */ + UNP_GLOBAL_LOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { + UNP_PCB_LOCK(unp); if (unp->unp_gencnt <= gencnt) { if (cr_cansee(req->td->td_ucred, unp->unp_socket->so_cred)) continue; unp_list[i++] = unp; } + UNP_PCB_UNLOCK(unp); } - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); n = i; /* in case we lost some during malloc */ + /* + * XXXRW: The logic below asumes that it is OK to lock a mutex in + * an unpcb that may have been freed. + */ error = 0; xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < n; i++) { unp = unp_list[i]; + UNP_PCB_LOCK(unp); if (unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = unp; @@ -1138,8 +1332,10 @@ unp->unp_conn->unp_addr->sun_len); bcopy(unp, &xu->xu_unp, sizeof *unp); sotoxsocket(unp->unp_socket, &xu->xu_socket); + UNP_PCB_UNLOCK(unp); error = SYSCTL_OUT(req, xu, sizeof *xu); - } + } else + UNP_PCB_UNLOCK(unp); } free(xu, M_TEMP); if (!error) { @@ -1170,33 +1366,38 @@ static void unp_shutdown(struct unpcb *unp) { + struct unpcb *unp2; struct socket *so; - UNP_LOCK_ASSERT(); + UNP_GLOBAL_LOCK_ASSERT(); + UNP_PCB_LOCK_ASSERT(unp); - if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && - (so = unp->unp_conn->unp_socket)) - socantrcvmore(so); + unp2 = unp->unp_conn; + if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { + so = unp2->unp_socket; + if (so != NULL) + socantrcvmore(so); + } } static void unp_drop(struct unpcb *unp, int errno) { struct socket *so = unp->unp_socket; + struct unpcb *unp2; - UNP_LOCK_ASSERT(); + UNP_GLOBAL_LOCK_ASSERT(); + UNP_PCB_LOCK_ASSERT(unp); so->so_error = errno; - unp_disconnect(unp); -} + unp2 = unp->unp_conn; + if (unp2 == NULL) + return; -#ifdef notdef -void -unp_drain(void) -{ - + UNP_PCB_LOCK(unp2); + unp_disconnect(unp, unp2); + UNP_PCB_UNLOCK(unp2); } -#endif static void unp_freerights(struct file **rp, int fdcount) @@ -1205,7 +1406,6 @@ struct file *fp; for (i = 0; i < fdcount; i++) { - fp = *rp; /* * zero the pointer before calling * unp_discard since it may end up @@ -1213,7 +1413,8 @@ * * XXXRW: This is less true than it used to be. */ - *rp++ = 0; + fp = *rp; + *rp++ = NULL; unp_discard(fp); } } @@ -1233,7 +1434,7 @@ int f; u_int newlen; - UNP_UNLOCK_ASSERT(); + UNP_GLOBAL_UNLOCK_ASSERT(); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ @@ -1338,6 +1539,7 @@ void unp_init(void) { + unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); if (unp_zone == NULL) @@ -1348,7 +1550,7 @@ LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); - UNP_LOCK_INIT(); + UNP_GLOBAL_LOCK_INIT(); } static int @@ -1368,7 +1570,7 @@ int error, oldfds; u_int newlen; - UNP_UNLOCK_ASSERT(); + UNP_GLOBAL_UNLOCK_ASSERT(); error = 0; *controlp = NULL; @@ -1748,25 +1950,6 @@ unp_scan(m, unp_discard); } -static int -unp_listen(struct socket *so, struct unpcb *unp, int backlog, - struct thread *td) -{ - int error; - - UNP_LOCK_ASSERT(); - - SOCK_LOCK(so); - error = solisten_proto_check(so); - if (error == 0) { - cru2x(td->td_ucred, &unp->unp_peercred); - unp->unp_flags |= UNP_HAVEPCCACHED; - solisten_proto(so, backlog); - } - SOCK_UNLOCK(so); - return (error); -} - static void unp_scan(struct mbuf *m0, void (*op)(struct file *)) { @@ -1819,6 +2002,9 @@ static void unp_mark(struct file *fp) { + + /* XXXRW: Should probably assert file list lock here. */ + if (fp->f_gcflag & FMARK) return; unp_defer++; @@ -1828,11 +2014,12 @@ static void unp_discard(struct file *fp) { - UNP_LOCK(); + + UNP_GLOBAL_LOCK(); FILE_LOCK(fp); fp->f_msgcount--; unp_rights--; FILE_UNLOCK(fp); - UNP_UNLOCK(); + UNP_GLOBAL_UNLOCK(); (void) closef(fp, (struct thread *)NULL); } --- //depot/vendor/freebsd/src/sys/sys/unpcb.h 2005/04/13 00:05:41 +++ //depot/user/rwatson/proto/src/sys/sys/unpcb.h 2006/04/27 20:47:29 @@ -78,6 +78,7 @@ unp_gen_t unp_gencnt; /* generation count of this instance */ int unp_flags; /* flags */ struct xucred unp_peercred; /* peer credentials, if applicable */ + struct mtx unp_mtx; /* mutex */ }; /* @@ -98,6 +99,14 @@ #define UNP_WANTCRED 0x004 /* credentials wanted */ #define UNP_CONNWAIT 0x008 /* connect blocks until accepted */ +/* + * These flags are used to handle non-atomicity in connect() and bind() + * operations on a socket: in particular, to avoid races between multiple + * threads or processes operating simultaneously on the same socket. + */ +#define UNP_CONNECTING 0x010 /* Currently connecting. */ +#define UNP_BINDING 0x020 /* Currently binding. */ + #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) /* Hack alert -- this structure depends on . */