--- //depot/vendor/freebsd/src/sys/fs/fifofs/fifo_vnops.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/fs/fifofs/fifo_vnops.c 2004/04/07 20:11:34 @@ -227,6 +227,7 @@ if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { + /* XXXRW: socket lock? */ fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) { wakeup(&fip->fi_writers); @@ -243,6 +244,7 @@ } fip->fi_writers++; if (fip->fi_writers == 1) { + /* XXXRW: socket lock? */ fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; if (fip->fi_readers > 0) { wakeup(&fip->fi_readers); @@ -322,12 +324,14 @@ if (uio->uio_resid == 0) return (0); if (ap->a_ioflag & IO_NDELAY) + /* XXXRW: socket lock? */ rso->so_state |= SS_NBIO; VOP_UNLOCK(ap->a_vp, 0, td); error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td); if (ap->a_ioflag & IO_NDELAY) + /* XXXRW: socket lock? */ rso->so_state &= ~SS_NBIO; return (error); } @@ -354,12 +358,14 @@ panic("fifo_write mode"); #endif if (ap->a_ioflag & IO_NDELAY) + /* XXXRW: socket lock? */ wso->so_state |= SS_NBIO; VOP_UNLOCK(ap->a_vp, 0, td); error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0, (struct mbuf *)0, 0, td); vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td); if (ap->a_ioflag & IO_NDELAY) + /* XXXRW: socket lock? */ wso->so_state &= ~SS_NBIO; return (error); } @@ -432,6 +438,7 @@ ap->a_kn->kn_hook = (caddr_t)so; + /* XXXRW: socket lock? */ SLIST_INSERT_HEAD(&sb->sb_sel.si_note, ap->a_kn, kn_selnext); sb->sb_flags |= SB_KNOTE; @@ -443,6 +450,7 @@ { struct socket *so = (struct socket *)kn->kn_hook; + /* XXXRW: socket lock? */ SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) so->so_rcv.sb_flags &= ~SB_KNOTE; @@ -453,6 +461,7 @@ { struct socket *so = (struct socket *)kn->kn_hook; + /* XXXRW: socket lock? */ kn->kn_data = so->so_rcv.sb_cc; if (so->so_state & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; @@ -467,6 +476,7 @@ { struct socket *so = (struct socket *)kn->kn_hook; + /* XXXRW: socket lock? */ SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) so->so_snd.sb_flags &= ~SB_KNOTE; @@ -477,6 +487,7 @@ { struct socket *so = (struct socket *)kn->kn_hook; + /* XXXRW: socket lock? */ kn->kn_data = sbspace(&so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; --- //depot/vendor/freebsd/src/sys/fs/portalfs/portal_vnops.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/fs/portalfs/portal_vnops.c 2004/04/07 20:11:34 @@ -193,6 +193,7 @@ unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); + /* XXXRW: Locking? */ if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) sodupsockaddr((struct sockaddr *)unp2->unp_addr, @@ -284,6 +285,7 @@ * and keep polling the reference count. XXX. */ s = splnet(); + /* XXXRW: Locking? */ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { if (fmp->pm_server->f_count == 1) { error = ECONNREFUSED; --- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/kern_descrip.c 2004/04/06 20:50:45 @@ -2039,6 +2039,7 @@ { NET_ASSERT_GIANT(); + SOCK_LOCK(so); sorele(so); } @@ -2064,6 +2065,10 @@ } /* We have the last ref so we can proceed without the file lock. */ FILE_UNLOCK(fp); + /* + * XXXRW: With sockets locked, can we push this Giant grab into + * the purely VFS case? + */ mtx_lock(&Giant); if (fp->f_count < 0) panic("fdrop: count < 0"); --- //depot/vendor/freebsd/src/sys/kern/kern_timeout.c 2004/04/07 19:05:36 +++ //depot/user/rwatson/netperf/sys/kern/kern_timeout.c 2004/04/07 20:11:34 @@ -44,6 +44,7 @@ #include #include #include +#include #include static int avg_depth; @@ -55,6 +56,89 @@ static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); + +/*- + * Sampling buffer of function pointers executed by timeouts and callouts. + * This circular buffer wraps when it fills, and uses an inefficient + * sbuf-based sysctl to dump sample data to userspace. Sysctls can select + * to monitor mpsafe and !mpsafe callouts/timeouts as desired. Suggested + * use is: (1) set sample of interest (mpsafe/notmpsafe), (2) reset the + * buffer, (3) do some benchmark/test, (5) disable sampling, (6) dump + * buffer. + * + * XXX: ifdef TIMEOUT_SAMPLING? + */ + +#define MAXFUNC 200000 +static void * func_array[MAXFUNC]; +static int array_off; + +static void +push_cfunc(void *ptr) +{ + + /* XXX */ + func_array[array_off % MAXFUNC] = ptr; + array_off++; +} + +static int +sysctl_cfunc(SYSCTL_HANDLER_ARGS) +{ + struct sbuf sb; + int error, i; + + if (req->newptr != NULL) + return (EINVAL); + + sbuf_new(&sb, NULL, 0, SBUF_AUTOEXTEND); + + for (i = 0; i < MAXFUNC; i++) { + if (func_array[i] == NULL) + break; + sbuf_printf(&sb, "%p ", func_array[i]); + } + sbuf_finish(&sb); + + error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb) + 1, req); + + sbuf_delete(&sb); + + return (error); +} + +SYSCTL_PROC(_debug, OID_AUTO, to_cfunc, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, + sysctl_cfunc, "A", "callout/timeout sample"); + +static int +sysctl_cfunc_reset(SYSCTL_HANDLER_ARGS) +{ + int dummy, error; + + dummy = 0; + error = sysctl_handle_int(oidp, &dummy, 0, req); + if (error) + return (error); + + if (dummy != 0) { + bzero(func_array, sizeof(void *) * MAXFUNC); + array_off = 0; + } + + return (0); +} + +SYSCTL_PROC(_debug, OID_AUTO, to_cfunc_reset, CTLTYPE_INT|CTLFLAG_RW, 0, 0, + sysctl_cfunc_reset, "I", "Reset sample"); + +static int cfunc_sample_mpsafe; +static int cfunc_sample_notmpsafe; + +SYSCTL_INT(_debug, OID_AUTO, to_cfunc_mpsafe, CTLFLAG_RW, + &cfunc_sample_mpsafe, 0, "Sample mpsafe callouts"); +SYSCTL_INT(_debug, OID_AUTO, to_cfunc_notmpsafe, CTLFLAG_RW, + &cfunc_sample_notmpsafe, 0, "Sample !mpsafe callouts"); + /* * TODO: * allocate more timeout table slots when table overflows. @@ -245,8 +329,12 @@ if (!(c_flags & CALLOUT_MPSAFE)) { mtx_lock(&Giant); gcalls++; + if (cfunc_sample_mpsafe) + push_cfunc(c_func); } else { mpcalls++; + if (cfunc_sample_notmpsafe) + push_cfunc(c_func); } #ifdef DIAGNOSTIC binuptime(&bt1); --- //depot/vendor/freebsd/src/sys/kern/subr_log.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/subr_log.c 2004/04/06 20:50:45 @@ -83,7 +83,12 @@ struct callout sc_callout; /* callout to wakeup syslog */ } logsoftc; -int log_open; /* also used in log() */ +/* + * log_mtx protects logsoftc, log_open. Note that log_mtx does *not* + * protect the structures associated with msgbuf, which require Giant. + */ +struct mtx log_mtx; +int log_open; /* also used in log() */ /* Times per second to check for a pending syslog wakeup. */ static int log_wakeups_per_second = 5; @@ -94,17 +99,24 @@ static int logopen(dev_t dev, int flags, int mode, struct thread *td) { - if (log_open) + + mtx_lock(&log_mtx); + if (log_open) { + mtx_unlock(&log_mtx); return (EBUSY); + } log_open = 1; - callout_init(&logsoftc.sc_callout, 0); + callout_init(&logsoftc.sc_callout, CALLOUT_MPSAFE); + mtx_unlock(&log_mtx); fsetown(td->td_proc->p_pid, &logsoftc.sc_sigio); /* signal process only */ + mtx_lock(&log_mtx); if (log_wakeups_per_second < 1) { printf("syslog wakeup is less than one. Adjusting to 1.\n"); log_wakeups_per_second = 1; } callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); return (0); } @@ -113,9 +125,11 @@ logclose(dev_t dev, int flag, int mode, struct thread *td) { + mtx_lock(&log_mtx); log_open = 0; callout_stop(&logsoftc.sc_callout); logsoftc.sc_state = 0; + mtx_unlock(&log_mtx); funsetown(&logsoftc.sc_sigio); return (0); } @@ -134,14 +148,18 @@ splx(s); return (EWOULDBLOCK); } + mtx_lock(&log_mtx); logsoftc.sc_state |= LOG_RDWAIT; + mtx_unlock(&log_mtx); if ((error = tsleep(mbp, LOG_RDPRI | PCATCH, "klog", 0))) { splx(s); return (error); } } splx(s); + mtx_lock(&log_mtx); logsoftc.sc_state &= ~LOG_RDWAIT; + mtx_unlock(&log_mtx); while (uio->uio_resid > 0) { l = imin(sizeof(buf), uio->uio_resid); @@ -178,8 +196,11 @@ logtimeout(void *arg) { - if (!log_open) + mtx_lock(&log_mtx); + if (!log_open) { + mtx_unlock(&log_mtx); return; + } if (log_wakeups_per_second < 1) { printf("syslog wakeup is less than one. Adjusting to 1.\n"); log_wakeups_per_second = 1; @@ -187,6 +208,7 @@ if (msgbuftrigger == 0) { callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); return; } msgbuftrigger = 0; @@ -199,6 +221,7 @@ } callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); } /*ARGSUSED*/ @@ -217,10 +240,12 @@ break; case FIOASYNC: + mtx_lock(&log_mtx); if (*(int *)data) logsoftc.sc_state |= LOG_ASYNC; else logsoftc.sc_state &= ~LOG_ASYNC; + mtx_unlock(&log_mtx); break; case FIOSETOWN: @@ -249,6 +274,7 @@ log_drvinit(void *unused) { + mtx_init(&log_mtx, "log_mtx", NULL, MTX_DEF); make_dev(&log_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "klog"); } --- //depot/vendor/freebsd/src/sys/kern/subr_prf.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/subr_prf.c 2004/04/06 20:50:45 @@ -83,6 +83,9 @@ size_t remain; }; +/* + * XXXRW: We access subr_log.c's log_open variable unlocked. + */ extern int log_open; static void msglogchar(int c, int pri); --- //depot/vendor/freebsd/src/sys/kern/sys_socket.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/sys_socket.c 2004/04/07 12:09:58 @@ -77,6 +77,7 @@ NET_LOCK_GIANT(); #ifdef MAC + /* XXX: Socket lock needed here? */ error = mac_check_socket_receive(active_cred, so); if (error) { NET_UNLOCK_GIANT(); @@ -102,6 +103,7 @@ NET_LOCK_GIANT(); #ifdef MAC + /* XXX: Socket lock needed here? */ error = mac_check_socket_send(active_cred, so); if (error) { NET_UNLOCK_GIANT(); @@ -146,6 +148,7 @@ return (0); case FIONREAD: + /* Unlocked read. */ *(int *)data = so->so_rcv.sb_cc; return (0); @@ -205,6 +208,9 @@ /* * If SS_CANTRCVMORE is set, but there's still data left in the * receive buffer, the socket is still readable. + * + * XXXRW: perhaps should lock socket buffer so st_size result + * is consistent. */ if ((so->so_state & SS_CANTRCVMORE) == 0 || so->so_rcv.sb_cc != 0) --- //depot/vendor/freebsd/src/sys/kern/uipc_socket.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/uipc_socket.c 2004/04/08 16:01:02 @@ -141,6 +141,8 @@ return so; } #endif + SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); + SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); /* XXX race condition for reentrant kernel */ so->so_gencnt = ++so_gencnt; /* sx_init(&so->so_sxlock, "socket sxlock"); */ @@ -199,6 +201,7 @@ soref(so); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); if (error) { + SOCK_LOCK(so); so->so_state |= SS_NOFDREF; sorele(so); return (error); @@ -242,6 +245,8 @@ mac_destroy_socket(so); #endif crfree(so->so_cred); + SOCKBUF_LOCK_DESTROY(&so->so_snd); + SOCKBUF_LOCK_DESTROY(&so->so_rcv); /* sx_destroy(&so->so_sxlock); */ uma_zfree(socket_zone, so); --numopensockets; @@ -266,11 +271,13 @@ splx(s); return (error); } + SOCKBUF_LOCK(&so->so_rcv); if (TAILQ_EMPTY(&so->so_comp)) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; + SOCKBUF_UNLOCK(&so->so_rcv); splx(s); return (0); } @@ -280,14 +287,17 @@ struct socket *so; { struct socket *head; - int s; KASSERT(so->so_count == 0, ("socket %p so_count not 0", so)); + SOCK_LOCK_ASSERT(so); - if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) + if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { + SOCK_UNLOCK(so); return; - if (so->so_head != NULL) { - head = so->so_head; + } + SOCK_UNLOCK(so); + SOCKBUF_LOCK(&so->so_rcv); + if ((head = so->so_head) != NULL) { if (so->so_state & SS_INCOMP) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; @@ -298,6 +308,7 @@ * accept(2) may hang after select(2) indicated * that the listening socket was ready. */ + /* XXX SOCKBUF_UNLOCK(&so->so_rcv); */ return; } else { panic("sofree: not queued"); @@ -305,13 +316,14 @@ so->so_state &= ~SS_INCOMP; so->so_head = NULL; } + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_NOINTR; (void)sblock(&so->so_snd, M_WAITOK); - s = splimp(); - socantsendmore(so); - splx(s); + socantsendmore_locked(so); sbunlock(&so->so_snd); sbrelease(&so->so_snd, so); + SOCKBUF_UNLOCK(&so->so_snd); sorflush(so); sodealloc(so); } @@ -351,11 +363,14 @@ (void) soabort(sp); } } + SOCK_LOCK(so); if (so->so_pcb == 0) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { + SOCK_UNLOCK(so); error = sodisconnect(so); + SOCK_LOCK(so); if (error) goto drop; } @@ -364,7 +379,7 @@ (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = tsleep(&so->so_timeo, + error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, "soclos", so->so_linger * hz); if (error) break; @@ -373,7 +388,10 @@ } drop: if (so->so_pcb) { - int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); + int error2; + SOCK_UNLOCK(so); + error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); + SOCK_LOCK(so); if (error == 0) error = error2; } @@ -397,6 +415,7 @@ error = (*so->so_proto->pr_usrreqs->pru_abort)(so); if (error) { + SOCK_LOCK(so); sotryfree(so); /* note: does not decrement the ref count */ return error; } @@ -408,14 +427,12 @@ struct socket *so; struct sockaddr **nam; { - int s = splnet(); int error; if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); - splx(s); return (error); } @@ -425,12 +442,10 @@ struct sockaddr *nam; struct thread *td; { - int s; int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); - s = splnet(); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. @@ -443,7 +458,6 @@ error = EISCONN; else error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); - splx(s); return (error); } @@ -452,11 +466,9 @@ struct socket *so1; struct socket *so2; { - int s = splnet(); int error; error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); - splx(s); return (error); } @@ -464,20 +476,13 @@ sodisconnect(so) struct socket *so; { - int s = splnet(); int error; - if ((so->so_state & SS_ISCONNECTED) == 0) { - error = ENOTCONN; - goto bad; - } - if (so->so_state & SS_ISDISCONNECTING) { - error = EALREADY; - goto bad; - } + if ((so->so_state & SS_ISCONNECTED) == 0) + return ENOTCONN; + if (so->so_state & SS_ISDISCONNECTING) + return EALREADY; error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); -bad: - splx(s); return (error); } @@ -528,7 +533,7 @@ struct mbuf **mp; struct mbuf *m; long space, len, resid; - int clen = 0, error, s, dontroute, mlen; + int clen = 0, error, dontroute, mlen; int atomic = sosendallatonce(so) || top; #ifdef ZERO_COPY_SOCKETS int cow_send; @@ -560,20 +565,18 @@ td->td_proc->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; -#define snderr(errno) { error = (errno); splx(s); goto release; } +#define snderr(errno) { error = (errno); goto release; } -restart: + SOCKBUF_LOCK(&so->so_snd); error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; do { - s = splnet(); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; - splx(s); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { @@ -602,14 +605,11 @@ (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); - sbunlock(&so->so_snd); error = sbwait(&so->so_snd); - splx(s); if (error) - goto out; - goto restart; + goto release; + continue; } - splx(s); mp = ⊤ space -= clen; do { @@ -624,10 +624,12 @@ #ifdef ZERO_COPY_SOCKETS cow_send = 0; #endif /* ZERO_COPY_SOCKETS */ + SOCKBUF_UNLOCK(&so->so_snd); if (top == 0) { MGETHDR(m, M_TRYWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); /* XXX */ goto release; } mlen = MHLEN; @@ -637,6 +639,7 @@ MGET(m, M_TRYWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; + SOCKBUF_LOCK(&so->so_snd); /* XXX */ goto release; } mlen = MLEN; @@ -684,6 +687,7 @@ else #endif /* ZERO_COPY_SOCKETS */ error = uiomove(mtod(m, void *), (int)len, uio); + SOCKBUF_LOCK(&so->so_snd); resid = uio->uio_resid; m->m_len = len; *mp = m; @@ -699,13 +703,12 @@ } while (space > 0 && atomic); if (dontroute) so->so_options |= SO_DONTROUTE; - s = splnet(); /* XXX */ /* * XXX all the SS_CANTSENDMORE checks previously * done could be out of date. We could have recieved * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We could - * probably recheck again inside the splnet() protection + * probably recheck again inside the locking protection * here, but there are probably other places that this * also happens. We must rethink this. */ @@ -723,7 +726,6 @@ /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); - splx(s); if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; @@ -738,6 +740,7 @@ release: sbunlock(&so->so_snd); out: + SOCKBUF_UNLOCK(&so->so_snd); if (top) m_freem(top); if (control) @@ -771,7 +774,7 @@ int *flagsp; { struct mbuf *m, **mp; - int flags, len, error, s, offset; + int flags, len, error, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; @@ -829,12 +832,13 @@ if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreqs->pru_rcvd)(so, 0); -restart: + SOCKBUF_LOCK(&so->so_rcv); error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) - return (error); - s = splnet(); + goto out; +restart: + SOCKBUF_LOCK_ASSERT(&so->so_rcv); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more @@ -852,9 +856,8 @@ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { - KASSERT(m != 0 || !so->so_rcv.sb_cc, - ("receive: m == %p so->so_rcv.sb_cc == %u", - m, so->so_rcv.sb_cc)); + KASSERT(!(m == 0 && so->so_rcv.sb_cc), + ("m %p so->so_rcv.sb_cc %u", m, so->so_rcv.sb_cc)); if (so->so_error) { if (m) goto dontblock; @@ -887,14 +890,14 @@ } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); - splx(s); if (error) - return (error); + goto release; goto restart; } dontblock: + SOCKBUFF_LOCK_ASSERT(&so->so_rcv); + KASSERT(error == 0, ("unexpected state, error %u", error)); if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; SBLASTRECORDCHK(&so->so_rcv); @@ -903,10 +906,14 @@ if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); - orig_resid = 0; - if (psa) + if (psa) { *psa = sodupsockaddr(mtod(m, struct sockaddr *), - mp0 == NULL ? M_WAITOK : M_NOWAIT); + M_NOWAIT); + if (*psa == NULL) { + error = ENOMEM; + goto release; + } + } if (flags & MSG_PEEK) { m = m->m_next; } else { @@ -914,30 +921,56 @@ so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } + orig_resid = 0; } - while (m && m->m_type == MT_CONTROL && error == 0) { - if (flags & MSG_PEEK) { - if (controlp) - *controlp = m_copy(m, 0, m->m_len); - m = m->m_next; - } else { - sbfree(&so->so_rcv, m); - so->so_rcv.sb_mb = m->m_next; - m->m_next = NULL; - if (pr->pr_domain->dom_externalize) - error = - (*pr->pr_domain->dom_externalize)(m, controlp); - else if (controlp) - *controlp = m; - else - m_freem(m); - m = so->so_rcv.sb_mb; + if (m && m->m_type == MT_CONTROL) { + struct mbuf *cm = NULL; + struct mbuf **cme = &cm; + + do { + if (flags & MSG_PEEK) { + if (controlp) { + SOCKBUF_UNLOCK(&so->so_rcv); + *controlp = m_copym(m, 0, m->m_len, + M_TRYWAIT); + SOCKBUF_LOCK(&so->so_rcv); + if (*controlp == NULL) { + error = ENOBUFS; + goto release; + } + controlp = &(*controlp)->m_next; + } + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + so->so_rcv.sb_mb = m->m_next; + m->m_next = NULL; + if (controlp) { + /* + * Collect mbufs for processing below. + */ + *cme = m; + cme = &(*cme)->m_next; + } else + m_free(m); + m = so->so_rcv.sb_mb; + } + } while (m && m->m_type == MT_CONTROL); + if (cm != NULL) { + if (pr->pr_domain->dom_externalize) { + /* + * NB: drop the lock to avoid potential LORs; + * in particular unix domain sockets grab the + * file descriptor lock which would be a LOR. + */ + SOCKBUF_UNLOCK(&so->so_rcv); + error = (*pr->pr_domain->dom_externalize) + (cm, controlp); + SOCKBUF_LOCK(&so->so_rcv); + } else + m_freem(cm); } - if (controlp) { - orig_resid = 0; - while (*controlp != NULL) - controlp = &(*controlp)->m_next; - } + orig_resid = 0; } if (m) { if ((flags & MSG_PEEK) == 0) { @@ -994,7 +1027,7 @@ if (mp == 0) { SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - splx(s); + SOCKBUF_UNLOCK(&so->so_rcv); #ifdef ZERO_COPY_SOCKETS if (so_zero_copy_receive) { vm_page_t pg; @@ -1018,7 +1051,7 @@ } else #endif /* ZERO_COPY_SOCKETS */ error = uiomove(mtod(m, char *) + moff, (int)len, uio); - s = splnet(); + SOCKBUF_LOCK(&so->so_rcv); if (error) goto release; } else @@ -1060,6 +1093,7 @@ *mp = m_copym(m, 0, len, M_TRYWAIT); m->m_data += len; m->m_len -= len; + SOCKBUF_LOCK_ASSERT(&so->so_rcv); so->so_rcv.sb_cc -= len; } } @@ -1099,9 +1133,8 @@ SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { - sbunlock(&so->so_rcv); - splx(s); - return (0); + error = 0; + goto release; } m = so->so_rcv.sb_mb; if (m) @@ -1134,17 +1167,15 @@ (*pr->pr_usrreqs->pru_rcvd)(so, flags); } if (orig_resid == uio->uio_resid && orig_resid && - (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { - sbunlock(&so->so_rcv); - splx(s); - goto restart; - } + (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) + goto restart; /* XXX multi-counts msgs */ if (flagsp) *flagsp |= flags; release: sbunlock(&so->so_rcv); - splx(s); +out: + SOCKBUF_UNLOCK(&so->so_rcv); return (error); } @@ -1171,23 +1202,23 @@ { struct sockbuf *sb = &so->so_rcv; struct protosw *pr = so->so_proto; - int s; struct sockbuf asb; + SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAITOK); - s = splimp(); - socantrcvmore(so); + socantrcvmore_locked(so); sbunlock(sb); asb = *sb; /* - * Invalidate/clear most of the sockbuf structure, but keep - * its selinfo structure valid. + * Invalidate/clear most of the sockbuf structure, but leave + * selinfo and mutex data unchanged. */ bzero(&sb->sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); - splx(s); + SOCKBUF_UNLOCK(sb); + /* XXXRW: is passing in sb_mb this way really safe? */ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease(&asb, so); @@ -1204,6 +1235,7 @@ struct so_accf *af = so->so_accf; int error = 0; +/* XXX locking */ /* do not set/remove accept filters on non listen sockets */ if ((so->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; @@ -1760,6 +1792,11 @@ int revents = 0; int s = splnet(); + /* + * XXXRW: Lots of unlocked reads, and some writes. Probably + * some more locking is called for here, especially when + * setting the sb_sel flags. + */ if (events & (POLLIN | POLLRDNORM)) if (soreadable(so)) revents |= events & (POLLIN | POLLRDNORM); @@ -1800,7 +1837,6 @@ { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; - int s; switch (kn->kn_filter) { case EVFILT_READ: @@ -1818,10 +1854,10 @@ return (1); } - s = splnet(); + SOCKBUF_LOCK(sb); SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); sb->sb_flags |= SB_KNOTE; - splx(s); + SOCKBUF_UNLOCK(sb); return (0); } @@ -1829,12 +1865,12 @@ filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int s = splnet(); + SOCKBUF_LOCK(&so->so_rcv); SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) so->so_rcv.sb_flags &= ~SB_KNOTE; - splx(s); + SOCKBUF_UNLOCK(&so->so_rcv); } /*ARGSUSED*/ @@ -1842,8 +1878,11 @@ filt_soread(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; - int result; + int needlock, result; + needlock = !SOCKBUF_OWNED(&so->so_rcv); + if (needlock) + SOCKBUF_LOCK(&so->so_rcv); kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; if (so->so_state & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; @@ -1855,6 +1894,8 @@ result = (kn->kn_data >= kn->kn_sdata); else result = (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat); + if (needlock) + SOCKBUF_UNLOCK(&so->so_rcv); return (result); } @@ -1862,12 +1903,12 @@ filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int s = splnet(); + SOCKBUF_LOCK(&so->so_snd); SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) so->so_snd.sb_flags &= ~SB_KNOTE; - splx(s); + SOCKBUF_UNLOCK(&so->so_snd); } /*ARGSUSED*/ @@ -1875,8 +1916,11 @@ filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; - int result; + int needlock, result; + needlock = !SOCKBUF_OWNED(&so->so_snd); + if (needlock) + SOCKBUF_LOCK(&so->so_snd); kn->kn_data = sbspace(&so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; @@ -1891,6 +1935,8 @@ result = (kn->kn_data >= kn->kn_sdata); else result = (kn->kn_data >= so->so_snd.sb_lowat); + if (needlock) + SOCKBUF_UNLOCK(&so->so_snd); return (result); } --- //depot/vendor/freebsd/src/sys/kern/uipc_socket2.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/uipc_socket2.c 2004/04/07 14:18:32 @@ -104,9 +104,14 @@ soisconnecting(so) register struct socket *so; { + int need_lock = !SOCK_OWNED(so); + if (need_lock) + SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; + if (need_lock) + SOCK_UNLOCK(so); } void @@ -114,56 +119,80 @@ struct socket *so; { struct socket *head = so->so_head; + int need_lock = !SOCK_OWNED(so); + if (need_lock) + SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (head && (so->so_state & SS_INCOMP)) { - if ((so->so_options & SO_ACCEPTFILTER) != 0) { + if ((so->so_options & SO_ACCEPTFILTER) == 0) { + if (need_lock) + SOCK_UNLOCK(so); + SOCK_LOCK(head); + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_state &= ~SS_INCOMP; + TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + head->so_qlen++; + so->so_state |= SS_COMP; + sorwakeup_locked(head); + wakeup_one(&head->so_timeo); + SOCK_UNLOCK(head); + } else { +/* XXX locking */ so->so_upcall = head->so_accf->so_accept_filter->accf_callback; so->so_upcallarg = head->so_accf->so_accept_filter_arg; so->so_rcv.sb_flags |= SB_UPCALL; so->so_options &= ~SO_ACCEPTFILTER; so->so_upcall(so, so->so_upcallarg, M_TRYWAIT); - return; } - TAILQ_REMOVE(&head->so_incomp, so, so_list); - head->so_incqlen--; - so->so_state &= ~SS_INCOMP; - TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); - head->so_qlen++; - so->so_state |= SS_COMP; - sorwakeup(head); - wakeup_one(&head->so_timeo); } else { wakeup(&so->so_timeo); + SOCK_UNLOCK(so); sorwakeup(so); sowwakeup(so); } + if (!need_lock) + SOCK_LOCK(so); } void soisdisconnecting(so) register struct socket *so; { + int need_lock = !SOCK_OWNED(so); + if (need_lock) + SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTING; so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); wakeup(&so->so_timeo); + SOCK_UNLOCK(so); sowwakeup(so); sorwakeup(so); + if (!need_lock) + SOCK_LOCK(so); } void soisdisconnected(so) register struct socket *so; { + int need_lock = !SOCK_OWNED(so); + if (need_lock) + SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); wakeup(&so->so_timeo); + SOCK_UNLOCK(so); + /* Unlocked read. */ sbdrop(&so->so_snd, so->so_snd.sb_cc); sowwakeup(so); sorwakeup(so); + if (!need_lock) + SOCK_LOCK(so); } /* @@ -182,8 +211,12 @@ int connstatus; { register struct socket *so; + int over; - if (head->so_qlen > 3 * head->so_qlimit / 2) + SOCK_LOCK(head); + over = (head->so_qlen > 3 * head->so_qlimit / 2); + SOCK_UNLOCK(head); + if (over) return ((struct socket *)0); so = soalloc(M_NOWAIT); if (so == NULL) @@ -201,12 +234,13 @@ #ifdef MAC mac_create_socket_from_socket(head, so); #endif + if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); return ((struct socket *)0); } - + SOCKBUF_LOCK(&head->so_rcv); if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; @@ -221,10 +255,11 @@ so->so_state |= SS_INCOMP; head->so_incqlen++; } + SOCKBUF_UNLOCK(&head->so_rcv); if (connstatus) { + so->so_state |= connstatus; sorwakeup(head); wakeup(&head->so_timeo); - so->so_state |= connstatus; } return (so); } @@ -249,6 +284,16 @@ } void +socantsendmore_locked(so) + struct socket *so; +{ + SOCKBUF_LOCK_ASSERT(&so->so_snd); + + so->so_state |= SS_CANTSENDMORE; + sowwakeup_locked(so); +} + +void socantrcvmore(so) struct socket *so; { @@ -257,6 +302,16 @@ sorwakeup(so); } +void +socantrcvmore_locked(so) + struct socket *so; +{ + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + + so->so_state |= SS_CANTRCVMORE; + sorwakeup_locked(so); +} + /* * Wait for data to arrive at/drain from a socket buffer. */ @@ -264,9 +319,10 @@ sbwait(sb) struct sockbuf *sb; { + SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; - return (tsleep(&sb->sb_cc, + return (msleep(&sb->sb_cc, &sb->sb_mtx, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo)); } @@ -281,9 +337,11 @@ { int error; + SOCKBUF_LOCK_ASSERT(sb); + while (sb->sb_flags & SB_LOCK) { sb->sb_flags |= SB_WANT; - error = tsleep(&sb->sb_flags, + error = msleep(&sb->sb_flags, &sb->sb_mtx, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, "sblock", 0); if (error) @@ -294,9 +352,53 @@ } /* + * The part of sowakeup that must be done while + * holding the sockbuf lock. + */ +static __inline void +sowakeup_under_lock(struct socket *so, struct sockbuf *sb) +{ + SOCKBUF_LOCK_ASSERT(sb); + + selwakeuppri(&sb->sb_sel, PSOCK); + sb->sb_flags &= ~SB_SEL; + if (sb->sb_flags & SB_WAIT) { + sb->sb_flags &= ~SB_WAIT; + wakeup(&sb->sb_cc); + } +} + +/* + * Wakeup processes waiting on a socket buffer. + * Do asynchronous notification via SIGIO + * if the socket has the SS_ASYNC flag set. + * + * The caller is assumed to hold the necessary + * sockbuf lock. + */ +void +sowakeup_locked(so, sb) + register struct socket *so; + register struct sockbuf *sb; +{ + + sowakeup_under_lock(so, sb); + + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) + pgsigio(&so->so_sigio, SIGIO, 0); + if (sb->sb_flags & SB_UPCALL) + (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); + if (sb->sb_flags & SB_AIO) /* XXX locking */ + aio_swake(so, sb); + KNOTE(&sb->sb_sel.si_note, 0); /* XXX locking? */ +} + +/* * Wakeup processes waiting on a socket buffer. * Do asynchronous notification via SIGIO * if the socket has the SS_ASYNC flag set. + * + * The caller does not hold the sockbuf lock. */ void sowakeup(so, sb) @@ -304,19 +406,25 @@ register struct sockbuf *sb; { - selwakeuppri(&sb->sb_sel, PSOCK); - sb->sb_flags &= ~SB_SEL; - if (sb->sb_flags & SB_WAIT) { - sb->sb_flags &= ~SB_WAIT; - wakeup(&sb->sb_cc); - } + SOCKBUF_LOCK(sb); + sowakeup_under_lock(so, sb); + SOCKBUF_UNLOCK(sb); + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); - if (sb->sb_flags & SB_AIO) + if (sb->sb_flags & SB_AIO) /* XXX locking */ aio_swake(so, sb); - KNOTE(&sb->sb_sel.si_note, 0); + + /* + * XXXRW: KQueue requires attention in the locking department; + * however, we do rely on the socket buffer lock being held + * in filt_soread(), so we need to grab it here. + */ + SOCKBUF_LOCK(sb); + KNOTE(&sb->sb_sel.si_note, 0); /* XXX locking? */ + SOCKBUF_UNLOCK(sb); } /* @@ -356,8 +464,12 @@ register struct socket *so; u_long sndcc, rcvcc; { - struct thread *td = curthread; + struct thread *td = curthread; /* XXX */ + /* + * XXXRW: A lot of read-modify-write going in here without any + * socket buffer locks. + */ if (sbreserve(&so->so_snd, sndcc, so, td) == 0) goto bad; if (sbreserve(&so->so_rcv, rcvcc, so, td) == 0) @@ -412,6 +524,13 @@ /* * td will only be NULL when we're in an interrupt * (e.g. in tcp_input()) + * + * XXXRW: This comment is true, but only because the caller passed + * in NULL, not for the 4.x reason that there is no thread + * available. Need to be careful of callers that do this wrong; + * I suspect many do it wrong, and therefore many socket buffers + * end up with the wrong limits, especially via the soreserve() + * path. */ if (cc > sb_max_adj) return (0); @@ -476,6 +595,8 @@ { struct mbuf *m = sb->sb_mb; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -495,6 +616,8 @@ struct mbuf *m = sb->sb_mb; struct mbuf *n; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -517,6 +640,7 @@ #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ + SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ @@ -531,7 +655,7 @@ * discarded and mbufs are compacted where possible. */ void -sbappend(sb, m) +sbappend_locked(sb, m) struct sockbuf *sb; struct mbuf *m; { @@ -539,6 +663,9 @@ if (m == 0) return; + + SOCKBUF_LOCK_ASSERT(sb); + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { @@ -546,7 +673,7 @@ n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -559,7 +686,7 @@ if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -576,13 +703,33 @@ } /* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +void +sbappend(sb, m) + struct sockbuf *sb; + struct mbuf *m; +{ + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + sbappend_locked(sb, m); + SOCKBUF_UNLOCK(sb); + } else + sbappend_locked(sb, m); +} + +/* * This version of sbappend() should only be used when the caller * absolutely knows that there will never be more than one record * in the socket buffer, that is, a stream protocol (such as TCP). */ void -sbappendstream(struct sockbuf *sb, struct mbuf *m) +sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) { + SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); @@ -595,6 +742,22 @@ SBLASTRECORDCHK(sb); } +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + sbappendstream_locked(sb, m); + SOCKBUF_UNLOCK(sb); + } else + sbappendstream_locked(sb, m); +} + #ifdef SOCKBUF_DEBUG void sbcheck(sb) @@ -604,6 +767,8 @@ struct mbuf *n = 0; u_long len = 0, mbcnt = 0; + SOCKBUF_LOCK_ASSERT(sb); + for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { @@ -626,12 +791,14 @@ * begins a new record. */ void -sbappendrecord(sb, m0) +sbappendrecord_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; m = sb->sb_mb; @@ -659,18 +826,37 @@ } /* + * As above, except the mbuf chain + * begins a new record. + */ +void +sbappendrecord(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + sbappendrecord_locked(sb, m0); + SOCKBUF_UNLOCK(sb); + } else + sbappendrecord_locked(sb, m0); +} + +/* * As above except that OOB data * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ void -sbinsertoob(sb, m0) +sbinsertoob_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; register struct mbuf **mp; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { @@ -705,13 +891,31 @@ } /* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +void +sbinsertoob(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + sbinsertoob_locked(sb, m0); + SOCKBUF_UNLOCK(sb); + } else + sbinsertoob_locked(sb, m0); +} + +/* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ int -sbappendaddr(sb, asa, m0, control) +sbappendaddr_locked(sb, asa, m0, control) struct sockbuf *sb; struct sockaddr *asa; struct mbuf *m0, *control; @@ -719,11 +923,14 @@ struct mbuf *m, *n, *nlast; int space = asa->sa_len; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &n); + if (space > sbspace(sb)) return (0); #if MSIZE <= 256 @@ -745,25 +952,50 @@ sballoc(sb, n); nlast = n; SBLINKRECORD(sb, m); + sb->sb_mbtail = nlast; - sb->sb_mbtail = nlast; SBLASTMBUFCHK(sb); - SBLASTRECORDCHK(sb); return (1); } +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ int -sbappendcontrol(sb, m0, control) +sbappendaddr(sb, asa, m0, control) + struct sockbuf *sb; + struct sockaddr *asa; + struct mbuf *m0, *control; +{ + int retval; + + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + retval = sbappendaddr_locked(sb, asa, m0, control); + SOCKBUF_UNLOCK(sb); + } else + retval = sbappendaddr_locked(sb, asa, m0, control); + return (retval); +} + +int +sbappendcontrol_locked(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { struct mbuf *m, *n, *mlast; int space; + SOCKBUF_LOCK_ASSERT(sb); + if (control == 0) panic("sbappendcontrol"); space = m_length(control, &n) + m_length(m0, NULL); + if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ @@ -775,14 +1007,30 @@ sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); + sb->sb_mbtail = mlast; - sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); + SBLASTRECORDCHK(sb); - SBLASTRECORDCHK(sb); return (1); } +int +sbappendcontrol(sb, m0, control) + struct sockbuf *sb; + struct mbuf *control, *m0; +{ + int retval; + + if (!SOCKBUF_OWNED(sb)) { + SOCKBUF_LOCK(sb); + retval = sbappendcontrol(sb, m0, control); + SOCKBUF_UNLOCK(sb); + } else + retval = sbappendcontrol(sb, m0, control); + return (retval); +} + /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n @@ -796,6 +1044,8 @@ register int eor = 0; register struct mbuf *o; + SOCKBUF_LOCK_ASSERT(sb); + while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && @@ -852,6 +1102,13 @@ register struct sockbuf *sb; { + /* + * XXXRW: It appears the socket buffer really ought to be held + * here, since we read a lot of socket buffer fields and might + * get inconsistent results. However, that's not 100% clear. + */ + SOCKBUF_LOCK_ASSERT(sb); + if (sb->sb_flags & SB_LOCK) panic("sbflush: locked"); while (sb->sb_mbcnt) { @@ -864,7 +1121,8 @@ sbdrop(sb, (int)sb->sb_cc); } if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) - panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); + panic("sbflush: cc %u || mb %p || mbcnt %u", + sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); } /* @@ -877,6 +1135,10 @@ { register struct mbuf *m; struct mbuf *next; + int need_lock = !SOCKBUF_OWNED(sb); + + if (need_lock) + SOCKBUF_LOCK(sb); next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { @@ -921,6 +1183,9 @@ } else if (m->m_nextpkt == NULL) { sb->sb_lastrecord = m; } + + if (need_lock) + SOCKBUF_UNLOCK(sb); } /* @@ -932,6 +1197,10 @@ register struct sockbuf *sb; { register struct mbuf *m; + int need_lock = !SOCKBUF_OWNED(sb); + + if (need_lock) + SOCKBUF_LOCK(sb); m = sb->sb_mb; if (m) { @@ -942,6 +1211,9 @@ } while (m); } SB_EMPTY_FIXUP(sb); + + if (need_lock) + SOCKBUF_UNLOCK(sb); } /* --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2004/04/08 00:15:32 +++ //depot/user/rwatson/netperf/sys/kern/uipc_syscalls.c 2004/04/08 16:02:58 @@ -174,6 +174,7 @@ if ((error = fgetsock(td, fd, &so, NULL)) != 0) goto done2; #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_bind(td->td_ucred, so, sa); if (error) goto done1; @@ -207,6 +208,7 @@ NET_LOCK_GIANT(); if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_listen(td->td_ucred, so); if (error) goto done; @@ -261,7 +263,9 @@ if (error) goto done2; s = splnet(); + SOCK_LOCK(head); if ((head->so_options & SO_ACCEPTCONN) == 0) { + SOCK_UNLOCK(head); splx(s); error = EINVAL; goto done; @@ -275,9 +279,10 @@ head->so_error = EWOULDBLOCK; break; } - error = tsleep(&head->so_timeo, PSOCK | PCATCH, + error = msleep(&head->so_timeo, SOCK_MTX(head), PSOCK | PCATCH, "accept", 0); if (error) { + SOCK_UNLOCK(head); splx(s); goto done; } @@ -285,6 +290,7 @@ if (head->so_error) { error = head->so_error; head->so_error = 0; + SOCK_UNLOCK(head); splx(s); goto done; } @@ -294,11 +300,13 @@ * ready to be accepted. Remove it from the queue prior to * allocating the file descriptor for it since falloc() may * block allowing another process to accept the connection - * instead. + * instead. The reference previously owned by the socket queue + * is now thread-local, letting us release the lock on the head. */ so = TAILQ_FIRST(&head->so_comp); TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; + SOCK_UNLOCK(head); error = falloc(td, &nfp, &fd); if (error) { @@ -306,20 +314,27 @@ * Probably ran out of file descriptors. Put the * unaccepted connection back onto the queue and * do another wakeup so some other process might - * have a chance at it. + * have a chance at it. Note that strict ordering + * is lost. */ + SOCK_LOCK(head); TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); head->so_qlen++; wakeup_one(&head->so_timeo); + SOCK_UNLOCK(head); splx(s); goto done; } /* An extra reference on `nfp' has been held for us by falloc(). */ td->td_retval[0] = fd; + /* XXX lock? */ /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); + /* + * XXXRW: so should be locked to modify so_state here? + */ so->so_state &= ~SS_COMP; so->so_head = NULL; pgid = fgetown(&head->so_sigio); @@ -477,6 +492,7 @@ goto done1; } #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_connect(td->td_ucred, so, sa); if (error) goto bad; @@ -489,8 +505,10 @@ goto done1; } s = splnet(); + SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); + error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, + "connec", 0); if (error) { if (error == EINTR || error == ERESTART) interrupted = 1; @@ -501,6 +519,7 @@ error = so->so_error; so->so_error = 0; } + SOCK_UNLOCK(so); splx(s); bad: if (!interrupted) @@ -696,6 +715,7 @@ goto bad2; #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_send(td->td_ucred, so); if (error) goto bad; @@ -939,6 +959,7 @@ } #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_receive(td->td_ucred, so); if (error) { fputsock(so); @@ -1737,6 +1758,7 @@ } #ifdef MAC + /* XXXRW: MAC requires socket lock? */ error = mac_check_socket_send(td->td_ucred, so); if (error) goto done; @@ -1776,7 +1798,9 @@ /* * Protect against multiple writers to the socket. */ + SOCKBUF_LOCK(&so->so_snd); (void) sblock(&so->so_snd, M_WAITOK); + SOCKBUF_UNLOCK(&so->so_snd); /* * Loop through the pages in the file, starting with the requested @@ -1816,14 +1840,17 @@ * Optimize the non-blocking case by looking at the socket space * before going to the extra work of constituting the sf_buf. */ + SOCKBUF_LOCK(&so->so_snd); if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { if (so->so_state & SS_CANTSENDMORE) error = EPIPE; else error = EAGAIN; sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); goto done; } + SOCKBUF_UNLOCK(&so->so_snd); VM_OBJECT_LOCK(obj); /* * Attempt to look up the page. @@ -1912,7 +1939,9 @@ } vm_page_unlock_queues(); VM_OBJECT_UNLOCK(obj); + SOCKBUF_LOCK(&so->so_snd); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); goto done; } vm_page_unlock_queues(); @@ -1928,7 +1957,9 @@ if (pg->wire_count == 0 && pg->object == NULL) vm_page_free(pg); vm_page_unlock_queues(); + SOCKBUF_LOCK(&so->so_snd); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); error = EINTR; goto done; } @@ -1965,6 +1996,7 @@ * Add the buffer to the socket buffer chain. */ s = splnet(); + SOCKBUF_LOCK(&so->so_snd); retry_space: /* * Make sure that the socket is still able to take more data. @@ -1986,6 +2018,7 @@ } m_freem(m); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); splx(s); goto done; } @@ -1998,6 +2031,7 @@ if (so->so_state & SS_NBIO) { m_freem(m); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); splx(s); error = EAGAIN; goto done; @@ -2017,14 +2051,20 @@ goto retry_space; } error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); + /* XXX: Why release and re-grab? */ + SOCKBUF_UNLOCK(&so->so_snd); splx(s); if (error) { + SOCKBUF_LOCK(&so->so_snd); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); goto done; } headersent = 1; } + SOCKBUF_LOCK(&so->so_snd); sbunlock(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); /* * Send trailers. Wimp out and use writev(2). --- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c 2004/04/05 14:06:48 +++ //depot/user/rwatson/netperf/sys/kern/uipc_usrreq.c 2004/04/07 20:45:01 @@ -81,6 +81,39 @@ static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; static ino_t unp_ino; /* prototype for fake inode numbers */ +static struct mtx unp_mtx; +#define UNP_HEAD_LOCK_INIT() \ + mtx_init(&unp_mtx, "unp head", NULL, MTX_DEF) +#define UNP_HEAD_LOCK() mtx_lock(&unp_mtx) +#define UNP_HEAD_UNLOCK() mtx_unlock(&unp_mtx) +#define UNP_HEAD_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) + +/* NB: DUPOK is to cover the connect2 case XXX */ +#define UNP_LOCK_INIT(_unp) \ + mtx_init(&(_unp)->unp_mtx, "unp", NULL, MTX_DEF | MTX_DUPOK) +#define UNP_LOCK_DESTROY(_unp) mtx_destroy(&(_unp)->unp_mtx) +#define UNP_LOCK(_unp) mtx_lock(&(_unp)->unp_mtx) +#define UNP_UNLOCK(_unp) mtx_unlock(&(_unp)->unp_mtx) +#define UNP_LOCK_ASSERT(_unp) mtx_assert(&(_unp)->unp_mtx, MA_OWNED) + +/* + * A unp lock is always preceded by locking the head. + * Since this occurs often we define convenienc macros + * for entry, exist, and validation in lower-level routines. + */ +#define UNP_ENTER(_unp) do { \ + UNP_HEAD_LOCK(); \ + UNP_LOCK(_unp); \ +} while (0) +#define UNP_EXIT(_unp) do { \ + UNP_UNLOCK(_unp); \ + UNP_HEAD_UNLOCK(); \ +} while (0) +#define UNP_ASSERT(_unp) do { \ + UNP_HEAD_LOCK_ASSERT(); \ + UNP_LOCK_ASSERT(_unp); \ +} while (0) + static int unp_attach(struct socket *); static void unp_detach(struct unpcb *); static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); @@ -104,8 +137,10 @@ if (unp == NULL) return (EINVAL); + UNP_ENTER(unp); unp_drop(unp, ECONNABORTED); - unp_detach(unp); + unp_detach(unp); /* NB: unlocks unp + head */ + SOCK_LOCK(so); sotryfree(so); return (0); } @@ -114,6 +149,7 @@ uipc_accept(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); + struct sockaddr *sa; if (unp == NULL) return (EINVAL); @@ -123,13 +159,14 @@ * if it was bound and we are still connected * (our peer may have closed already!). */ - if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) { - *nam = sodupsockaddr( - (struct sockaddr *)unp->unp_conn->unp_addr, M_WAITOK); - } else { - *nam = sodupsockaddr((struct sockaddr *)&sun_noname, - M_WAITOK); - } + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); + UNP_ENTER(unp); + if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) + sa = (struct sockaddr *) unp->unp_conn->unp_addr; + else + sa = &sun_noname; + bcopy(sa, *nam, sa->sa_len); + UNP_EXIT(unp); return (0); } @@ -150,7 +187,6 @@ if (unp == NULL) return (EINVAL); - return (unp_bind(unp, nam, td)); } @@ -158,21 +194,30 @@ uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct unpcb *unp = sotounpcb(so); + int retval; - if (unp == NULL) - return (EINVAL); - return (unp_connect(so, nam, curthread)); + if (unp != NULL) { + UNP_ENTER(unp); + retval = unp_connect(so, nam, curthread); + UNP_EXIT(unp); + } else + retval = EINVAL; + return (retval); } int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp = sotounpcb(so1); + int retval; - if (unp == NULL) - return (EINVAL); - - return (unp_connect2(so1, so2)); + if (unp != NULL) { + UNP_ENTER(unp); + retval = unp_connect2(so1, so2); + UNP_EXIT(unp); + } else + retval = EINVAL; + return (retval); } /* control is EOPNOTSUPP */ @@ -184,8 +229,8 @@ if (unp == NULL) return (EINVAL); - - unp_detach(unp); + UNP_ENTER(unp); + unp_detach(unp); /* NB: unlocks unp + head */ return (0); } @@ -194,41 +239,52 @@ { struct unpcb *unp = sotounpcb(so); - if (unp == NULL) + if (unp != NULL) { + UNP_ENTER(unp); + unp_disconnect(unp); + UNP_EXIT(unp); + return (0); + } else return (EINVAL); - unp_disconnect(unp); - return (0); } static int uipc_listen(struct socket *so, struct thread *td) { struct unpcb *unp = sotounpcb(so); + int retval; - if (unp == NULL || unp->unp_vnode == NULL) + if (unp != NULL && unp->unp_vnode != NULL) { + UNP_ENTER(unp); + retval = unp_listen(unp, td); + UNP_EXIT(unp); + } else return (EINVAL); - return (unp_listen(unp, td)); + return (retval); } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); + struct sockaddr *sa; if (unp == NULL) return (EINVAL); - if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) - *nam = sodupsockaddr( - (struct sockaddr *)unp->unp_conn->unp_addr, M_WAITOK); + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); + UNP_ENTER(unp); + if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) + sa = (struct sockaddr *) unp->unp_conn->unp_addr; else { /* * XXX: It seems that this test always fails even when * connection is established. So, this else clause is * added as workaround to return PF_LOCAL sockaddr. */ - *nam = sodupsockaddr((struct sockaddr *)&sun_noname, - M_WAITOK); + sa = &sun_noname; } + bcopy(sa, *nam, sa->sa_len); + UNP_EXIT(unp); return (0); } @@ -241,15 +297,27 @@ if (unp == NULL) return (EINVAL); + /* + * Reorder locks to avoid LORs. Note that we + * delay re-locking so_rcv to below so it can + * be done only once. + */ + SOCKBUF_UNLOCK(&so->so_rcv); + UNP_ENTER(unp); switch (so->so_type) { case SOCK_DGRAM: panic("uipc_rcvd DGRAM?"); /*NOTREACHED*/ case SOCK_STREAM: - if (unp->unp_conn == NULL) + if (unp->unp_conn == NULL) { + SOCKBUF_LOCK(&so->so_rcv); break; + } so2 = unp->unp_conn->unp_socket; + /* NB: careful of order here */ + SOCKBUF_LOCK(&so2->so_snd); + SOCKBUF_LOCK(&so->so_rcv); /* * Adjust backpressure on sender * and wakeup any waiting to write. @@ -261,12 +329,14 @@ (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, newhiwat, RLIM_INFINITY); unp->unp_cc = so->so_rcv.sb_cc; - sowwakeup(so2); + sowwakeup_locked(so2); + SOCKBUF_UNLOCK(&so2->so_snd); break; default: panic("uipc_rcvd unknown socktype"); } + UNP_EXIT(unp); return (0); } @@ -293,6 +363,13 @@ if (control != NULL && (error = unp_internalize(&control, td))) goto release; + /* + * Reorder locks to avoid LORs. + */ + SOCKBUF_UNLOCK(&so->so_snd); + UNP_ENTER(unp); + SOCKBUF_LOCK(&so->so_snd); + switch (so->so_type) { case SOCK_DGRAM: { @@ -303,7 +380,9 @@ error = EISCONN; break; } + SOCKBUF_UNLOCK(&so->so_snd); error = unp_connect(so, nam, td); + SOCKBUF_LOCK(&so->so_snd); if (error) break; } else { @@ -317,13 +396,14 @@ from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; - if (sbappendaddr(&so2->so_rcv, from, m, control)) { - sorwakeup(so2); + SOCKBUF_LOCK(&so2->so_rcv); + if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { + sorwakeup_locked(so2); m = NULL; control = NULL; - } else { + } else error = ENOBUFS; - } + SOCKBUF_UNLOCK(&so2->so_rcv); if (nam != NULL) unp_disconnect(unp); break; @@ -337,7 +417,9 @@ */ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { + SOCKBUF_UNLOCK(&so->so_snd); error = unp_connect(so, nam, td); + SOCKBUF_LOCK(&so->so_snd); if (error) break; /* XXX */ } else { @@ -353,17 +435,17 @@ if (unp->unp_conn == NULL) panic("uipc_send connected but no connection?"); so2 = unp->unp_conn->unp_socket; + SOCKBUF_LOCK(&so2->so_rcv); /* * Send to paired receive port, and then reduce * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ if (control != NULL) { - if (sbappendcontrol(&so2->so_rcv, m, control)) + if (sbappendcontrol_locked(&so2->so_rcv, m, control)) control = NULL; - } else { - sbappend(&so2->so_rcv, m); - } + } else + sbappend_locked(&so2->so_rcv, m); so->so_snd.sb_mbmax -= so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; @@ -372,7 +454,8 @@ (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, newhiwat, RLIM_INFINITY); unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; - sorwakeup(so2); + sorwakeup_locked(so2); + SOCKBUF_UNLOCK(&so2->so_rcv); m = NULL; break; @@ -385,13 +468,13 @@ * a SHUTDOWN. */ if (flags & PRUS_EOF) { - socantsendmore(so); + socantsendmore_locked(so); unp_shutdown(unp); } + UNP_EXIT(unp); if (control != NULL && error != 0) - unp_dispose(control); - + unp_dispose(control); /* XXX need head lock? */ release: if (control != NULL) m_freem(control); @@ -408,15 +491,18 @@ if (unp == NULL) return (EINVAL); + UNP_ENTER(unp); sb->st_blksize = so->so_snd.sb_hiwat; if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { so2 = unp->unp_conn->unp_socket; + /* Unlocked read. */ sb->st_blksize += so2->so_rcv.sb_cc; } sb->st_dev = NOUDEV; if (unp->unp_ino == 0) unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; + UNP_EXIT(unp); return (0); } @@ -427,8 +513,11 @@ if (unp == NULL) return (EINVAL); + UNP_ENTER(unp); + /* XXX socket lock? */ socantsendmore(so); unp_shutdown(unp); + UNP_EXIT(unp); return (0); } @@ -436,15 +525,18 @@ uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); + struct sockaddr *sa; if (unp == NULL) return (EINVAL); + *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); + UNP_ENTER(unp); if (unp->unp_addr != NULL) - *nam = sodupsockaddr((struct sockaddr *)unp->unp_addr, - M_WAITOK); + sa = (struct sockaddr *) unp->unp_addr; else - *nam = sodupsockaddr((struct sockaddr *)&sun_noname, - M_WAITOK); + sa = &sun_noname; + bcopy(sa, *nam, sa->sa_len); + UNP_EXIT(unp); return (0); } @@ -466,6 +558,7 @@ switch (sopt->sopt_dir) { case SOPT_GET: + UNP_ENTER(unp); switch (sopt->sopt_name) { case LOCAL_PEERCRED: if (unp->unp_flags & UNP_HAVEPC) @@ -482,6 +575,7 @@ error = EOPNOTSUPP; break; } + UNP_EXIT(unp); break; case SOPT_SET: default: @@ -554,8 +648,13 @@ unp_count++; LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + UNP_LOCK_INIT(unp); + + UNP_HEAD_LOCK(); LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); + UNP_HEAD_UNLOCK(); + so->so_pcb = unp; return (0); } @@ -564,18 +663,25 @@ unp_detach(unp) register struct unpcb *unp; { + struct vnode *vp; + + UNP_ASSERT(unp); + LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; - if (unp->unp_vnode != NULL) { + if ((vp = unp->unp_vnode) != NULL) { unp->unp_vnode->v_socket = NULL; - vrele(unp->unp_vnode); unp->unp_vnode = NULL; } if (unp->unp_conn != NULL) unp_disconnect(unp); - while (!LIST_EMPTY(&unp->unp_refs)) - unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); + while (!LIST_EMPTY(&unp->unp_refs)) { + struct unpcb *ref = LIST_FIRST(&unp->unp_refs); + UNP_LOCK(ref); + unp_drop(ref, ECONNRESET); + UNP_UNLOCK(ref); + } soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = NULL; if (unp_rights) { @@ -591,7 +697,11 @@ } if (unp->unp_addr != NULL) FREE(unp->unp_addr, M_SONAME); + UNP_LOCK_DESTROY(unp); + UNP_HEAD_UNLOCK(); uma_zfree(unp_zone, unp); + if (vp) + vrele(vp); } static int @@ -618,15 +728,14 @@ buf = malloc(namelen + 1, M_TEMP, M_WAITOK); strlcpy(buf, soun->sun_path, namelen + 1); + mtx_lock(&Giant); restart: NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, buf, td); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); - if (error) { - free(buf, M_TEMP); - return (error); - } + if (error) + goto done; vp = nd.ni_vp; if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); @@ -636,14 +745,12 @@ vput(nd.ni_dvp); if (vp != NULL) { vrele(vp); - free(buf, M_TEMP); - return (EADDRINUSE); + error = EADDRINUSE; + goto done; } error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); - if (error) { - free(buf, M_TEMP); - return (error); - } + if (error) + goto done; goto restart; } VATTR_NULL(&vattr); @@ -659,18 +766,21 @@ } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); - if (error) { - free(buf, M_TEMP); - return (error); - } + if (error) + goto done; vp = nd.ni_vp; + soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); + UNP_ENTER(unp); vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; - unp->unp_addr = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); + unp->unp_addr = soun; + UNP_EXIT(unp); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); +done: + mtx_unlock(&Giant); free(buf, M_TEMP); - return (0); + return (error); } static int @@ -682,20 +792,32 @@ register struct sockaddr_un *soun = (struct sockaddr_un *)nam; register struct vnode *vp; register struct socket *so2, *so3; - struct unpcb *unp, *unp2, *unp3; + struct unpcb *unp = sotounpcb(so); + struct unpcb *unp2, *unp3; int error, len; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; + struct sockaddr *sa; + UNP_ASSERT(unp); + len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return (EINVAL); strlcpy(buf, soun->sun_path, len + 1); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); + /* drop locks across namei */ + UNP_EXIT(unp); + mtx_lock(&Giant); error = namei(&nd); + if (!error) + sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); + else + sa = NULL; + UNP_ENTER(unp); if (error) - return (error); + goto bad2; vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VSOCK) { @@ -715,19 +837,30 @@ goto bad; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { - if ((so2->so_options & SO_ACCEPTCONN) == 0 || - (so3 = sonewconn(so2, 0)) == NULL) { + if (so2->so_options & SO_ACCEPTCONN) { + /* + * NB: drop locks here so unp_attach is entered + * w/o locks; this avoids a recursive lock + * of the head and holding sleep locks across + * a (potentially) blocking malloc. + */ + UNP_EXIT(unp); + so3 = sonewconn(so2, 0); + UNP_ENTER(unp); + } else + so3 = NULL; + if (so3 == NULL) { error = ECONNREFUSED; goto bad; } unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); - if (unp2->unp_addr != NULL) - unp3->unp_addr = (struct sockaddr_un *) - sodupsockaddr((struct sockaddr *)unp2->unp_addr, - M_WAITOK); - + if (unp2->unp_addr != NULL) { + bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); + unp3->unp_addr = (struct sockaddr_un *) sa; + sa = NULL; + } /* * unp_peercred management: * @@ -759,6 +892,10 @@ error = unp_connect2(so, so2); bad: vput(vp); +bad2: + if (sa) + free(sa, M_SONAME); + mtx_unlock(&Giant); return (error); } @@ -770,9 +907,12 @@ register struct unpcb *unp = sotounpcb(so); register struct unpcb *unp2; + UNP_ASSERT(unp); + if (so2->so_type != so->so_type) return (EPROTOTYPE); unp2 = sotounpcb(so2); + UNP_LOCK(unp2); unp->unp_conn = unp2; switch (so->so_type) { @@ -790,6 +930,7 @@ default: panic("unp_connect2"); } + UNP_UNLOCK(unp2); return (0); } @@ -799,6 +940,8 @@ { register struct unpcb *unp2 = unp->unp_conn; + UNP_ASSERT(unp); + if (unp2 == NULL) return; unp->unp_conn = NULL; @@ -811,10 +954,13 @@ case SOCK_STREAM: soisdisconnected(unp->unp_socket); + UNP_LOCK(unp2); unp2->unp_conn = NULL; soisdisconnected(unp2->unp_socket); + UNP_UNLOCK(unp2); break; } + return; } #ifdef notdef @@ -857,8 +1003,10 @@ * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); + UNP_HEAD_LOCK(); gencnt = unp_gencnt; n = unp_count; + UNP_HEAD_UNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; @@ -872,6 +1020,7 @@ unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); + UNP_HEAD_LOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { if (unp->unp_gencnt <= gencnt) { @@ -881,6 +1030,7 @@ unp_list[i++] = unp; } } + UNP_HEAD_UNLOCK(); n = i; /* in case we lost some during malloc */ error = 0; @@ -939,6 +1089,8 @@ { struct socket *so; + UNP_ASSERT(unp); + if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && (so = unp->unp_conn->unp_socket)) socantrcvmore(so); @@ -951,6 +1103,8 @@ { struct socket *so = unp->unp_socket; + UNP_ASSERT(unp); + so->so_error = errno; unp_disconnect(unp); } @@ -1102,6 +1256,8 @@ uma_zone_set_max(unp_zone, nmbclusters); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); + + UNP_HEAD_LOCK_INIT(); } static int @@ -1258,6 +1414,8 @@ struct file **extra_ref, **fpp; int nunref, i; + UNP_HEAD_LOCK_ASSERT(); /* NB: this serializes entry */ + if (unp_gcing) return; unp_gcing = 1; @@ -1348,7 +1506,9 @@ * message buffers. Follow those links and mark them * as accessible too. */ + SOCKBUF_LOCK(&so->so_rcv); unp_scan(so->so_rcv.sb_mb, unp_mark); + SOCKBUF_UNLOCK(&so->so_rcv); } } while (unp_defer); sx_sunlock(&filelist_lock); @@ -1452,6 +1612,7 @@ struct unpcb *unp; struct thread *td; { + UNP_ASSERT(unp); cru2x(td->td_ucred, &unp->unp_peercred); unp->unp_flags |= UNP_HAVEPCCACHED; --- //depot/vendor/freebsd/src/sys/net/bpf.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/bpf.c 2004/04/07 20:11:34 @@ -553,7 +553,7 @@ struct ifnet *ifp; struct mbuf *m; int error; - static struct sockaddr dst; + struct sockaddr dst; int datlen; if (d->bd_bif == 0) @@ -564,6 +564,7 @@ if (uio->uio_resid == 0) return (0); + bzero(&dst, sizeof(dst)); error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen); if (error) return (error); @@ -579,12 +580,10 @@ mac_create_mbuf_from_bpfdesc(d, m); BPFD_UNLOCK(d); #endif - mtx_lock(&Giant); + /* NB: the driver frees the mbuf */ + NET_LOCK_GIANT(); error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0); - mtx_unlock(&Giant); - /* - * The driver frees the mbuf. - */ + NET_UNLOCK_GIANT(); return (error); } --- //depot/vendor/freebsd/src/sys/net/if.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/if.c 2004/04/07 20:11:34 @@ -655,6 +655,7 @@ /* * Create a clone network interface. + * XXXRW: Locking? */ int if_clone_create(char *name, int len) @@ -727,6 +728,7 @@ /* * Destroy a clone network interface. + * XXXRW: Locking? */ int if_clone_destroy(const char *name) @@ -764,6 +766,7 @@ /* * Look up a network interface cloner. + * XXXRW: Locking? */ static struct if_clone * if_clone_lookup(const char *name, int *unitp) @@ -805,6 +808,7 @@ /* * Register a network interface cloner. + * XXXRW: Locking? */ void if_clone_attach(struct if_clone *ifc) @@ -847,6 +851,7 @@ /* * Unregister a network interface cloner. + * XXXRW: Locking? */ void if_clone_detach(struct if_clone *ifc) @@ -859,6 +864,7 @@ /* * Provide list of interface cloners to userspace. + * XXXRW: Locking? */ static int if_clone_list(struct if_clonereq *ifcr) --- //depot/vendor/freebsd/src/sys/net/if_gif.c 2004/04/05 09:55:34 +++ //depot/user/rwatson/netperf/sys/net/if_gif.c 2004/04/06 20:50:45 @@ -87,6 +87,10 @@ * gif_mtx protects the global gif_softc_list. * XXX: Per-softc locking is still required. */ +/* + * XXXRW: Note that gif_mtx only protects global gif-related data, not + * per-softc data. See also netinet/in_gif.c for locking needs. + */ static struct mtx gif_mtx; static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static LIST_HEAD(, gif_softc) gif_softc_list; @@ -499,6 +503,9 @@ } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +/* + * XXXRW: per-gif softc locking required. + */ int gif_ioctl(ifp, cmd, data) struct ifnet *ifp; @@ -755,8 +762,9 @@ int s; int error = 0; - s = splnet(); - + /* + * XXXRW: per-gif softc locking required. + */ mtx_lock(&gif_mtx); LIST_FOREACH(sc2, &gif_softc_list, gif_list) { if (sc2 == sc) @@ -785,6 +793,9 @@ } mtx_unlock(&gif_mtx); + /* + * XXXRW: Lock gif softc fields. + */ /* XXX we can detach from both, but be polite just in case */ if (sc->gif_psrc) switch (sc->gif_psrc->sa_family) { --- //depot/vendor/freebsd/src/sys/net/if_gre.c 2004/03/22 08:06:54 +++ //depot/user/rwatson/netperf/sys/net/if_gre.c 2004/03/22 08:18:59 @@ -93,7 +93,8 @@ /* * gre_mtx protects all global variables in if_gre.c. - * XXX: gre_softc data not protected yet. + * + * XXXRW: It does not protect softc-specific data. */ struct mtx gre_mtx; static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation"); --- //depot/vendor/freebsd/src/sys/net/if_gre.h 2004/03/22 08:06:54 +++ //depot/user/rwatson/netperf/sys/net/if_gre.h 2004/03/22 08:18:59 @@ -54,6 +54,12 @@ WCCP_V2 } wccp_ver_t; +/* + * XXXRW: softc fields need locking. + * + * XXXRW: gre's notion of a 'called' count is not MP-safe, as it assumes + * only one packet can be processed at a time. + */ struct gre_softc { struct ifnet sc_if; LIST_ENTRY(gre_softc) sc_list; --- //depot/vendor/freebsd/src/sys/net/if_sl.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/if_sl.c 2004/04/07 20:11:34 @@ -162,6 +162,7 @@ #define ABT_WINDOW (ABT_COUNT*2+2) /* in seconds - time to count */ static LIST_HEAD(sl_list, sl_softc) sl_list; +static struct mtx slip_mtx; #define FRAME_END 0xc0 /* Frame End */ #define FRAME_ESCAPE 0xdb /* Frame Esc */ @@ -197,9 +198,10 @@ sl_modevent(module_t mod, int type, void *data) { switch (type) { - case MOD_LOAD: + case MOD_LOAD: + mtx_init(&slip_mtx, "slip_mtx", NULL, MTX_DEF); + LIST_INIT(&sl_list); linesw[SLIPDISC] = slipdisc; - LIST_INIT(&sl_list); break; case MOD_UNLOAD: printf("if_sl module unload - not possible for this module type\n"); @@ -216,6 +218,7 @@ DECLARE_MODULE(if_sl, sl_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +/* Locked using slip_mtx. */ static int *st_unit_list; static size_t st_unit_max = 0; @@ -224,6 +227,7 @@ { struct sl_softc *nc; + mtx_assert(&slip_mtx, MA_OWNED); LIST_FOREACH(nc, &sl_list, sl_next) { if (nc->sc_if.if_dunit == unit) return (0); @@ -237,6 +241,7 @@ { size_t i; + mtx_assert(&slip_mtx, MA_OWNED); for (i = 0; i < st_unit_max; i++) if (st_unit_list[i] == unit) return 1; @@ -249,6 +254,7 @@ { int *t; + mtx_assert(&slip_mtx, MA_OWNED); if (slisstatic(unit)) return; @@ -311,10 +317,12 @@ sc->sc_if.if_linkmib = sc; sc->sc_if.if_linkmiblen = sizeof *sc; mtx_init(&sc->sc_fastq.ifq_mtx, "sl_fastq", NULL, MTX_DEF); + mtx_init(&sc->sc_mtx, "slip sc_mtx", NULL, MTX_DEF); /* * Find a suitable unit number. */ + mtx_lock(&slip_mtx); for (unit=0; ; unit++) { if (slisstatic(unit)) continue; @@ -324,6 +332,7 @@ } if_initname(&sc->sc_if, "sl", unit); LIST_INSERT_HEAD(&sl_list, sc, sl_next); + mtx_unlock(&slip_mtx); if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_SLIP, SLIP_HDRLEN); @@ -386,10 +395,20 @@ static void sldestroy(struct sl_softc *sc) { + + /* + * XXXRW: Slight race here: we may detach bpf/if before we + * attach. This appears to be a property of the unit selection + * process, which might be better handled by the interface + * cloning subsystem? + */ bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); + mtx_lock(&slip_mtx); LIST_REMOVE(sc, sl_next); + mtx_unlock(&slip_mtx); m_free(sc->sc_mbuf); + mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_fastq.ifq_mtx); if (sc->bpfbuf) free(sc->bpfbuf, M_SL); @@ -419,6 +438,10 @@ tp->t_line = 0; sc = (struct sl_softc *)tp->t_sc; if (sc != NULL) { + /* + * XXXRW: tear-down race between timeout and slclose()? + */ + mtx_lock(&sc->sc_mtx); if (sc->sc_outfill) { sc->sc_outfill = 0; untimeout(sl_outfill, sc, sc->sc_ofhandle); @@ -427,6 +450,7 @@ sc->sc_keepalive = 0; untimeout(sl_keepalive, sc, sc->sc_kahandle); } + mtx_unlock(&sc->sc_mtx); if_down(&sc->sc_if); sc->sc_ttyp = NULL; tp->t_sc = NULL; @@ -464,12 +488,21 @@ splx(s); return (ENXIO); } + /* + * XXXRW: we hold the mutex over all of this to protect + * the unit change and global list consistency. However, + * some of these functions probably sleep, making this + * wrong. If we have to support renumbering, we probably + * need a way to reserve both numbers to prevent them + * from being reused during the change, or a way to sleep + * waiting for a change to end (i.e., a CV). + */ + mtx_lock(&slip_mtx); if (sc->sc_if.if_dunit != unit) { if (!slisunitfree(unit)) { - splx(s); + mtx_unlock(&slip_mtx); return (ENXIO); } - wasup = sc->sc_if.if_flags & IFF_UP; bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); @@ -487,9 +520,11 @@ SLIP_HIWAT + 2 * sc->sc_if.if_mtu + 1); } slmarkstatic(unit); + mtx_unlock(&slip_mtx); break; case SLIOCSKEEPAL: + mtx_lock(&sc->sc_mtx); sc->sc_keepalive = *(u_int *)data * hz; if (sc->sc_keepalive) { sc->sc_flags |= SC_KEEPALIVE; @@ -501,6 +536,7 @@ sc->sc_flags &= ~SC_KEEPALIVE; } } + mtx_unlock(&sc->sc_mtx); break; case SLIOCGKEEPAL: @@ -508,6 +544,7 @@ break; case SLIOCSOUTFILL: + mtx_lock(&sc->sc_mtx); sc->sc_outfill = *(u_int *)data * hz; if (sc->sc_outfill) { sc->sc_flags |= SC_OUTWAIT; @@ -519,9 +556,11 @@ sc->sc_flags &= ~SC_OUTWAIT; } } + mtx_unlock(&sc->sc_mtx); break; case SLIOCGOUTFILL: + /* Unlocked read. */ *(int *)data = sc->sc_outfill / hz; break; @@ -614,8 +653,11 @@ (*tp->t_oproc)(tp); if (tp->t_outq.c_cc != 0) { - if (sc != NULL) + if (sc != NULL) { + mtx_lock(&sc->sc_mtx); sc->sc_flags &= ~SC_OUTWAIT; + mtx_unlock(&sc->sc_mtx); + } if (tp->t_outq.c_cc > SLIP_HIWAT) return 0; } @@ -645,6 +687,7 @@ * queueing, and the connection id compression will get * munged when this happens. */ + mtx_lock(&sc->sc_mtx); if (sc->sc_if.if_bpf) { /* * We need to save the TCP/IP header before it's @@ -675,9 +718,10 @@ } ip = mtod(m, struct ip *); if (ip->ip_v == IPVERSION && ip->ip_p == IPPROTO_TCP) { - if (sc->sc_if.if_flags & SC_COMPRESS) + if (sc->sc_if.if_flags & SC_COMPRESS) { *mtod(m, u_char *) |= sl_compress_tcp(m, ip, &sc->sc_comp, 1); + } } if (sc->sc_if.if_bpf && sc->bpfbuf) { /* @@ -689,6 +733,7 @@ bcopy(mtod(m, caddr_t), &sc->bpfbuf[SLX_CHDR], CHDR_LEN); BPF_TAP(&sc->sc_if, sc->bpfbuf, len + SLIP_HDRLEN); } + mtx_unlock(&sc->sc_mtx); /* * If system is getting low on clists, just flush our @@ -704,7 +749,9 @@ continue; } + mtx_lock(&sc->sc_mtx); sc->sc_flags &= ~SC_OUTWAIT; + mtx_unlock(&sc->sc_mtx); /* * The extra FRAME_END will start up a new packet, and thus * will flush any accumulated garbage. We do this whenever @@ -794,6 +841,8 @@ { struct mbuf *m, *newm; + mtx_assert(&sc->sc_mtx, MA_OWNED); + MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (NULL); @@ -849,13 +898,16 @@ if (sc == NULL) return 0; if (c & TTY_ERRORMASK || (tp->t_state & TS_CONNECTED) == 0) { + mtx_lock(&sc->sc_mtx); sc->sc_flags |= SC_ERROR; + mtx_unlock(&sc->sc_mtx); return 0; } c &= TTY_CHARMASK; ++sc->sc_if.if_ibytes; + mtx_lock(&sc->sc_mtx); if (sc->sc_if.if_flags & IFF_DEBUG) { if (c == ABT_ESC) { /* @@ -874,6 +926,7 @@ sc->sc_starttime = time_second; if (sc->sc_abortcount >= ABT_COUNT) { slclose(tp,0); + mtx_unlock(&sc->sc_mtx); return 0; } } @@ -896,6 +949,7 @@ case FRAME_ESCAPE: sc->sc_escape = 1; + mtx_unlock(&sc->sc_mtx); return 0; case FRAME_END: @@ -980,6 +1034,7 @@ if (sc->sc_mp < sc->sc_ep) { *sc->sc_mp++ = c; sc->sc_escape = 0; + mtx_unlock(&sc->sc_mtx); return 0; } @@ -991,6 +1046,7 @@ newpack: sc->sc_mp = sc->sc_buf = sc->sc_ep - SLRMAX; sc->sc_escape = 0; + mtx_unlock(&sc->sc_mtx); return 0; } @@ -1074,6 +1130,7 @@ { struct sl_softc *sc = chan; + mtx_lock(&sc->sc_mtx); if (sc->sc_keepalive) { if (sc->sc_flags & SC_KEEPALIVE) { if (sc->sc_ttyp->t_pgrp != NULL) { @@ -1087,6 +1144,7 @@ } else { sc->sc_flags &= ~SC_KEEPALIVE; } + mtx_unlock(&sc->sc_mtx); } static void @@ -1097,6 +1155,7 @@ register struct tty *tp = sc->sc_ttyp; int s; + mtx_lock(&sc->sc_mtx); if (sc->sc_outfill && tp != NULL) { if (sc->sc_flags & SC_OUTWAIT) { s = splimp (); @@ -1110,4 +1169,5 @@ } else { sc->sc_flags &= ~SC_OUTWAIT; } + mtx_unlock(&sc->sc_mtx); } --- //depot/vendor/freebsd/src/sys/net/if_slvar.h 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/if_slvar.h 2004/04/07 20:11:34 @@ -34,13 +34,16 @@ #ifndef _NET_IF_SLVAR_H_ #define _NET_IF_SLVAR_H_ +#include #include /* * Definitions for SLIP interface data structures * * (This exists so programs like slstats can get at the definition - * of sl_softc.) + * of sl_softc.) Fields owned by the SLIP subsystem are protected + * using sc_mtx, with the exception of sc_next, which is protected + * by the global slip_mtx. */ struct sl_softc { struct ifnet sc_if; /* network-visible interface */ @@ -66,6 +69,7 @@ struct slcompress sc_comp; /* tcp compression data */ LIST_ENTRY(sl_softc) sl_next; u_char *bpfbuf; /* hang buffer for bpf here */ + struct mtx sc_mtx; }; /* internal flags */ --- //depot/vendor/freebsd/src/sys/net/if_spppsubr.c 2004/03/13 17:35:36 +++ //depot/user/rwatson/netperf/sys/net/if_spppsubr.c 2004/03/13 21:52:56 @@ -92,12 +92,8 @@ #include #if defined(__FreeBSD__) && __FreeBSD__ >= 3 -# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg, handle) -# define TIMEOUT(fun, arg1, arg2, handle) handle = timeout(fun, arg1, arg2) # define IOCTL_CMD_T u_long #else -# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg) -# define TIMEOUT(fun, arg1, arg2, handle) timeout(fun, arg1, arg2) # define IOCTL_CMD_T int #endif @@ -259,10 +255,11 @@ void (*scr)(struct sppp *sp); }; +struct mtx sppp_mtx; +MTX_SYSINIT(sppp_mtx, &sppp_mtx, "sppp_mtx", MTX_DEF); + static struct sppp *spppq; -#if defined(__FreeBSD__) && __FreeBSD__ >= 3 -static struct callout_handle keepalive_ch; -#endif +static struct callout keepalive_callout; #if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113 #define SPP_FMT "%s%d: " @@ -960,13 +957,18 @@ { struct sppp *sp = (struct sppp*) ifp; + mtx_lock(&sppp_mtx); /* Initialize keepalive handler. */ - if (spppq != NULL) - TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch); + if (spppq == NULL) { + callout_init(&keepalive_callout, 0); + callout_reset(&keepalive_callout, hz * 10, sppp_keepalive, + NULL); + } /* Insert new entry into the keepalive list. */ sp->pp_next = spppq; spppq = sp; + mtx_unlock(&sppp_mtx); sp->pp_if.if_mtu = PP_MTU; sp->pp_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; @@ -1012,6 +1014,7 @@ struct sppp **q, *p, *sp = (struct sppp*) ifp; int i; + mtx_lock(&sppp_mtx); /* Remove the entry from the keepalive list. */ for (q = &spppq; (p = *q); q = &p->pp_next) if (p == sp) { @@ -1020,12 +1023,13 @@ } /* Stop keepalive handler. */ - if (spppq != NULL) - UNTIMEOUT(sppp_keepalive, 0, keepalive_ch); + if (spppq == NULL) + callout_stop(&keepalive_callout); + mtx_unlock(&sppp_mtx); for (i = 0; i < IDX_COUNT; i++) - UNTIMEOUT((cps[i])->TO, (void *)sp, sp->ch[i]); - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout((cps[i])->TO, (void *)sp, sp->ch[i]); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); mtx_destroy(&sp->pp_cpq.ifq_mtx); mtx_destroy(&sp->pp_fastq.ifq_mtx); } @@ -2004,8 +2008,8 @@ case STATE_STOPPING: sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: @@ -2015,8 +2019,8 @@ break; case STATE_ACK_SENT: (cp->scr)(sp); - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; } @@ -2032,7 +2036,7 @@ { sp->state[cp->protoidx] = newstate; - UNTIMEOUT(cp->TO, (void *)sp, sp->ch[cp->protoidx]); + untimeout(cp->TO, (void *)sp, sp->ch[cp->protoidx]); switch (newstate) { case STATE_INITIAL: case STATE_STARTING: @@ -2045,8 +2049,8 @@ case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; } } @@ -4142,7 +4146,7 @@ * a number between 300 and 810 seconds. */ i = 300 + ((unsigned)(random() & 0xff00) >> 7); - TIMEOUT(chap.TO, (void *)sp, i * hz, sp->ch[IDX_CHAP]); + sp->ch[IDX_CHAP] = timeout(chap.TO, (void *)sp, i * hz); } if (debug) { @@ -4186,7 +4190,7 @@ if (debug) log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp)); - UNTIMEOUT(chap.TO, (void *)sp, sp->ch[IDX_CHAP]); + untimeout(chap.TO, (void *)sp, sp->ch[IDX_CHAP]); sp->lcp.protos &= ~(1 << IDX_CHAP); lcp.Close(sp); @@ -4322,7 +4326,7 @@ /* ack and nak are his authproto */ case PAP_ACK: - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); if (debug) { log(LOG_DEBUG, SPP_FMT "pap success", SPP_ARGS(ifp)); @@ -4351,7 +4355,7 @@ break; case PAP_NAK: - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); if (debug) { log(LOG_INFO, SPP_FMT "pap failure", SPP_ARGS(ifp)); @@ -4408,8 +4412,8 @@ if (sp->myauth.proto == PPP_PAP) { /* we are peer, send a request, and start a timer */ pap.scr(sp); - TIMEOUT(sppp_pap_my_TO, (void *)sp, sp->lcp.timeout, - sp->pap_my_to_ch); + sp->pap_my_to_ch = timeout(sppp_pap_my_TO, (void *)sp, + sp->lcp.timeout); } } @@ -4512,8 +4516,8 @@ if (debug) log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp)); - UNTIMEOUT(pap.TO, (void *)sp, sp->ch[IDX_PAP]); - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(pap.TO, (void *)sp, sp->ch[IDX_PAP]); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); sp->lcp.protos &= ~(1 << IDX_PAP); lcp.Close(sp); @@ -4640,7 +4644,12 @@ struct sppp *sp; int s; + /* + * XXXRW: It would be nice to avoid calling all this stuff while + * holding sppp_mtx, or we risk lock order reversals. + */ s = splimp(); + mtx_lock(&sppp_mtx); for (sp=spppq; sp; sp=sp->pp_next) { struct ifnet *ifp = &sp->pp_if; @@ -4679,8 +4688,9 @@ sp->lcp.echoid, 4, &nmagic); } } + callout_reset(&keepalive_callout, hz * 10, sppp_keepalive, NULL); + mtx_unlock(&sppp_mtx); splx(s); - TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch); } /* --- //depot/vendor/freebsd/src/sys/net/if_stf.c 2004/03/09 12:30:36 +++ //depot/user/rwatson/netperf/sys/net/if_stf.c 2004/03/12 06:09:05 @@ -136,13 +136,11 @@ #define sc_ro __sc_ro46.__sc_ro4 const struct encaptab *encap_cookie; LIST_ENTRY(stf_softc) sc_list; /* all stf's are linked */ + struct mtx sc_mtx; /* protect sc_ro */ }; /* * All mutable global variables in if_stf.c are protected by stf_mtx. - * XXXRW: Note that mutable fields in the softc are not currently locked: - * in particular, sc_ro needs to be protected from concurrent entrance - * of stf_output(). */ static struct mtx stf_mtx; static LIST_HEAD(, stf_softc) stf_softc_list; @@ -196,6 +194,7 @@ free(sc, M_STF); return (ENOMEM); } + mtx_init(&sc->sc_mtx, "stf sc_mtx", NULL, MTX_DEF); sc->sc_if.if_mtu = IPV6_MMTU; sc->sc_if.if_ioctl = stf_ioctl; @@ -220,6 +219,7 @@ bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); + mtx_destroy(&sc->sc_mtx); free(sc, M_STF); } @@ -391,9 +391,10 @@ struct ip *ip; struct ip6_hdr *ip6; struct in6_ifaddr *ia6; -#ifdef MAC + struct route ro; int error; +#ifdef MAC error = mac_check_ifnet_transmit(ifp, m); if (error) { m_freem(m); @@ -495,9 +496,7 @@ else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); - /* - * XXXRW: Locking of sc_ro required. - */ + mtx_lock(&sc->sc_mtx); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { @@ -516,12 +515,21 @@ if (sc->sc_ro.ro_rt == NULL) { m_freem(m); ifp->if_oerrors++; + mtx_unlock(&sc->sc_mtx); return ENETUNREACH; } } + /* + * XXXRW: Holding mutex over call to ip_output(): potential lock + * order issue? Hard to resolve cleanly with the current route + * caching model, as we have to synchronize access to shared softc + * state. + */ ifp->if_opackets++; - return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); + error = ip_output(m, NULL, &ro, 0, NULL, NULL); + mtx_unlock(&sc->sc_mtx); + return (error); } static int --- //depot/vendor/freebsd/src/sys/net/if_tap.c 2004/03/18 06:20:27 +++ //depot/user/rwatson/netperf/sys/net/if_tap.c 2004/03/19 03:01:30 @@ -112,6 +112,10 @@ * All global variables in if_tap.c are locked with tapmtx, with the * exception of tapdebug, which is accessed unlocked; tapclones is * static at runtime. + * + * XXXRW: si_flags appears not to be protected from concurrent access, + * and is written at run-time. + * XXXRW: si_drv1 is also used for test-and-set, and isn't synchronized. */ static struct mtx tapmtx; static int tapdebug = 0; /* debug flag */ @@ -161,6 +165,7 @@ * The EBUSY algorithm here can't quite atomically * guarantee that this is race-free since we have to * release the tap mtx to deregister the clone handler. + * XXXRW: is this true? */ mtx_lock(&tapmtx); SLIST_FOREACH(tp, &taphead, tap_next) { @@ -692,6 +697,7 @@ case SIOCSIFADDR: /* set MAC address of the remote side */ mtx_lock(&tp->tap_mtx); + /* XXXRW: Does this actually do anything? */ bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); mtx_unlock(&tp->tap_mtx); break; @@ -746,6 +752,7 @@ if (flag & IO_NDELAY) return (EWOULDBLOCK); + /* This looks like a wanna-be condition variable. */ mtx_lock(&tp->tap_mtx); tp->tap_flags |= TAP_RWAIT; mtx_unlock(&tp->tap_mtx); --- //depot/vendor/freebsd/src/sys/net/if_tun.c 2004/03/29 14:20:33 +++ //depot/user/rwatson/netperf/sys/net/if_tun.c 2004/03/29 16:17:24 @@ -59,6 +59,12 @@ * tun_list is protected by global tunmtx. Other mutable fields are * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is * static for the duration of a tunnel interface. + * + * XXXRW: we allocate si_drv1 for the dev_t on demand, rather than when + * the dev_t is instantiated. Nothing serializes the test/set of that + * field. + * + * XXXRW: what serializes access to si_flags? */ struct tun_softc { TAILQ_ENTRY(tun_softc) tun_list; @@ -121,6 +127,9 @@ static d_ioctl_t tunioctl; static d_poll_t tunpoll; +/* + * XXXRW: can remove D_NEEDGIANT? Probably not because of si_drv1 for now. + */ static struct cdevsw tun_cdevsw = { .d_version = D_VERSION, .d_flags = D_PSEUDO | D_NEEDGIANT, @@ -364,6 +373,9 @@ ifp->if_flags |= IFF_UP | IFF_RUNNING; getmicrotime(&ifp->if_lastchange); + /* + * XXXRW: interface locking. + */ for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr == NULL) --- //depot/vendor/freebsd/src/sys/net/raw_cb.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/raw_cb.c 2004/04/07 20:11:34 @@ -32,7 +32,9 @@ #include #include +#include #include +#include #include #include #include @@ -49,10 +51,11 @@ * redo address binding to allow wildcards */ +struct mtx rawcb_mtx; struct rawcb_list_head rawcb_list; -static u_long raw_sendspace = RAWSNDQ; -static u_long raw_recvspace = RAWRCVQ; +static const u_long raw_sendspace = RAWSNDQ; +static const u_long raw_recvspace = RAWRCVQ; /* * Allocate a control block and a nominal amount @@ -79,7 +82,9 @@ rp->rcb_socket = so; rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; rp->rcb_proto.sp_protocol = proto; + mtx_lock(&rawcb_mtx); LIST_INSERT_HEAD(&rawcb_list, rp, list); + mtx_unlock(&rawcb_mtx); return (0); } @@ -93,6 +98,7 @@ { struct socket *so = rp->rcb_socket; + SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); LIST_REMOVE(rp, list); --- //depot/vendor/freebsd/src/sys/net/raw_cb.h 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/raw_cb.h 2004/04/07 20:11:34 @@ -57,6 +57,7 @@ #ifdef _KERNEL extern LIST_HEAD(rawcb_list_head, rawcb) rawcb_list; +extern struct mtx rawcb_mtx; /* protosw entries */ pr_ctlinput_t raw_ctlinput; --- //depot/vendor/freebsd/src/sys/net/raw_usrreq.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/raw_usrreq.c 2004/04/07 20:11:34 @@ -31,9 +31,12 @@ */ #include +#include #include #include #include +#include +#include #include #include #include @@ -43,12 +46,15 @@ #include +MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb_mtx", MTX_DEF); + /* * Initialize raw connection block q. */ void raw_init() { + LIST_INIT(&rawcb_list); } @@ -71,7 +77,12 @@ register struct mbuf *m = m0; struct socket *last; + /* + * XXXRW: Potential lock order issues due to holding the + * rawcb_mtx across all this stuff. Need to revisit. + */ last = 0; + mtx_lock(&rawcb_mtx); LIST_FOREACH(rp, &rawcb_list, list) { if (rp->rcb_proto.sp_family != proto->sp_family) continue; @@ -116,6 +127,7 @@ } } else m_freem(m); + mtx_unlock(&rawcb_mtx); } /*ARGSUSED*/ @@ -139,8 +151,12 @@ if (rp == 0) return EINVAL; raw_disconnect(rp); - sotryfree(so); - soisdisconnected(so); /* XXX huh? called after the sofree()? */ + SOCK_LOCK(so); + if (so->so_count != 0) { + soisdisconnected(so); + SOCK_UNLOCK(so); + } else + sofree(so); return 0; } --- //depot/vendor/freebsd/src/sys/net/rtsock.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/net/rtsock.c 2004/04/07 20:11:34 @@ -51,10 +51,18 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ +/* + * XXXRW: It would be really nice to add const to these, but that may + * not be possible due to where they are passed in. We might need + * to const-poison a whole boatload of APIs...? + */ static struct sockaddr route_dst = { 2, PF_ROUTE, }; static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; +/* + * XXXRW: These fields are locked by RTSOCK_LOCK(). + */ static struct { int ip_count; /* attacked w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ --- //depot/vendor/freebsd/src/sys/netatalk/aarp.c 2004/04/08 18:40:31 +++ //depot/user/rwatson/netperf/sys/netatalk/aarp.c 2004/04/08 18:52:42 @@ -63,6 +63,9 @@ #define AARPT_KILLC 20 #define AARPT_KILLI 3 +/* + * XXXRW: wot? + */ # if !defined(__FreeBSD__) extern u_char etherbroadcastaddr[6]; # endif /* __FreeBSD__ */ @@ -72,7 +75,7 @@ }; /* - * Not used? + * XXXRW: unused? */ u_char at_org_code[ 3 ] = { 0x08, 0x00, 0x07, @@ -81,6 +84,9 @@ 0x00, 0x00, 0x00, }; +/* + * XXXRW: Make use callouts, not timeouts. + */ static struct callout_handle aarptimer_ch = CALLOUT_HANDLE_INITIALIZER(&aarptimer_ch); @@ -399,7 +405,7 @@ } } - AARPTAB_LOCK(); + AARPTAB_LOCK(); /* XXXRW */ AARPTAB_LOOK(aat, spa); if (aat != NULL) { if (op == AARPOP_PROBE) { @@ -642,6 +648,9 @@ struct aarptab *aat; int i; + /* + * XXXRW: Should grab mutex before untimeout? + */ untimeout(aarptimer, 0, aarptimer_ch); AARPTAB_LOCK(); for (i = 0, aat = aarptab; i < AARPTAB_SIZE; i++, aat++) { --- //depot/vendor/freebsd/src/sys/netatalk/at_control.c 2004/03/21 20:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/at_control.c 2004/03/21 21:14:09 @@ -21,6 +21,9 @@ #include #include +/* + * XXXRW: Requires synchronization. + */ struct at_ifaddr *at_ifaddr_list; static int aa_dorangeroute(struct ifaddr *ifa, --- //depot/vendor/freebsd/src/sys/netatalk/at_rmx.c 2004/03/21 20:01:33 +++ //depot/user/rwatson/netperf/sys/netatalk/at_rmx.c 2004/03/21 20:08:45 @@ -40,11 +40,23 @@ int at_inithead(void **head, int off); -static char hexbuf[256]; +/* + * XXXRW: hexdump was a static global variable, but I moved it into the + * stack rather than stick a mutex around it. 256 bytes is smaller than + * it used to be, but this still might be a problem. Needs to be + * revisited. Should probably just use the new hexdump(9). + * + * XXXRW: All this appears to be present just so as to printf debugging + * information. Assuming that this code is known to work, we could just + * scrap all this. In fact, this code isn't even used as it stands, it's + * here for debugging purposes only and requires modifications to + * at_proto.c. + */ static char * prsockaddr(void *v) { + static char hexbuf[256]; char *bp = &hexbuf[0]; u_char *cp = v; --- //depot/vendor/freebsd/src/sys/netatalk/ddp_input.c 2004/03/21 20:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_input.c 2004/03/21 21:14:09 @@ -29,6 +29,12 @@ static volatile int ddp_forward = 1; static volatile int ddp_firewall = 0; static struct ddpstat ddpstat; + +/* + * XXXRW: If we're going to keep this cached route data, we'll need to lock it + * down, and change later function-local use of it to grab an extra reference + * after deciding it is useful. + */ static struct route forwro; static void ddp_input(struct mbuf *, struct ifnet *, struct elaphdr *, int); --- //depot/vendor/freebsd/src/sys/netatalk/ddp_pcb.c 2004/03/21 20:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_pcb.c 2004/03/22 05:14:14 @@ -22,12 +22,18 @@ #include #include +struct mtx ddp_list_mtx; static struct ddpcb *ddp_ports[ ATPORT_LAST ]; -struct ddpcb *ddpcb_list = NULL; +struct ddpcb *ddpcb_list = NULL; void at_sockaddr(struct ddpcb *ddp, struct sockaddr **addr) { + + /* + * Prevent modification of ddp during copy of addr. + */ + DDP_LOCK_ASSERT(ddp); *addr = sodupsockaddr((struct sockaddr *)&ddp->ddp_lsat, M_NOWAIT); } @@ -38,6 +44,12 @@ struct at_ifaddr *aa; struct ddpcb *ddpp; + /* + * We read and write both the ddp passed in, and also ddp_ports. + */ + DDP_LIST_XLOCK_ASSERT(); + DDP_LOCK_ASSERT(ddp); + if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) { /* shouldn't be bound */ return (EINVAL); } @@ -134,6 +146,9 @@ struct ifnet *ifp; u_short hintnet = 0, net; + DDP_LIST_XLOCK_ASSERT(); + DDP_LOCK_ASSERT(ddp); + if (sat->sat_family != AF_APPLETALK) { return (EAFNOSUPPORT); } @@ -222,6 +237,9 @@ void at_pcbdisconnect(struct ddpcb *ddp) { + + DDP_LOCK_ASSERT(ddp); + ddp->ddp_fsat.sat_addr.s_net = ATADDR_ANYNET; ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE; ddp->ddp_fsat.sat_port = ATADDR_ANYPORT; @@ -233,8 +251,17 @@ struct ddpcb *ddp; MALLOC(ddp, struct ddpcb *, sizeof *ddp, M_PCB, M_WAITOK | M_ZERO); + DDP_LOCK_INIT(ddp); ddp->ddp_lsat.sat_port = ATADDR_ANYPORT; + /* + * XXXRW: Is this unlocked assignment payer for socket and + * back-pointer something that needs to be protected? + */ + ddp->ddp_socket = so; + so->so_pcb = (caddr_t)ddp; + + DDP_LIST_XLOCK(); ddp->ddp_next = ddpcb_list; ddp->ddp_prev = NULL; ddp->ddp_pprev = NULL; @@ -243,15 +270,21 @@ ddpcb_list->ddp_prev = ddp; } ddpcb_list = ddp; + DDP_LIST_XUNLOCK(); - ddp->ddp_socket = so; - so->so_pcb = (caddr_t)ddp; - return (0); + return(0); } void at_pcbdetach(struct socket *so, struct ddpcb *ddp) { + + /* + * We modify ddp, ddp_ports, and the global list. + */ + DDP_LIST_XLOCK_ASSERT(); + DDP_LOCK_ASSERT(ddp); + soisdisconnected(so); so->so_pcb = NULL; sotryfree(so); @@ -281,6 +314,8 @@ if (ddp->ddp_next) { ddp->ddp_next->ddp_prev = ddp->ddp_prev; } + DDP_UNLOCK(ddp); + DDP_LOCK_DESTROY(ddp); FREE(ddp, M_PCB); } @@ -296,6 +331,8 @@ { struct ddpcb *ddp; + DDP_LIST_SLOCK_ASSERT(); + /* * Check for bad ports. */ @@ -308,11 +345,13 @@ * the interface? */ for (ddp = ddp_ports[ to->sat_port - 1 ]; ddp; ddp = ddp->ddp_pnext) { + DDP_LOCK(ddp); /* XXX should we handle 0.YY? */ /* XXXX.YY to socket on destination interface */ if (to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net && to->sat_addr.s_node == ddp->ddp_lsat.sat_addr.s_node) { + DDP_UNLOCK(ddp); break; } @@ -320,6 +359,7 @@ if (to->sat_addr.s_node == ATADDR_BCAST && (to->sat_addr.s_net == 0 || to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net) && ddp->ddp_lsat.sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) { + DDP_UNLOCK(ddp); break; } @@ -330,8 +370,10 @@ ntohs(aa->aa_firstnet) && ntohs(ddp->ddp_lsat.sat_addr.s_net) <= ntohs(aa->aa_lastnet)) { + DDP_UNLOCK(ddp); break; } + DDP_UNLOCK(ddp); } return (ddp); } --- //depot/vendor/freebsd/src/sys/netatalk/ddp_pcb.h 2004/03/18 23:25:31 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_pcb.h 2004/03/21 20:20:48 @@ -17,4 +17,23 @@ struct thread *td); void at_sockaddr(struct ddpcb *ddp, struct sockaddr **addr); +/* Lock macros for per-pcb locks. */ +#define DDP_LOCK_INIT(ddp) mtx_init(&(ddp)->ddp_mtx, "ddp_mtx", \ + NULL, MTX_DEF) +#define DDP_LOCK_DESTROY(ddp) mtx_destroy(&(ddp)->ddp_mtx) +#define DDP_LOCK(ddp) mtx_lock(&(ddp)->ddp_mtx) +#define DDP_UNLOCK(ddp) mtx_unlock(&(ddp)->ddp_mtx) +#define DDP_LOCK_ASSERT(ddp) mtx_assert(&(ddp)->ddp_mtx, MA_OWNED) + +/* Lock macros for global pcb list lock. */ +#define DDP_LIST_LOCK_INIT() mtx_init(&ddp_list_mtx, "ddp_list_mtx", \ + NULL, MTX_DEF) +#define DDP_LIST_LOCK_DESTROY() mtx_destroy(&ddp_list_mtx) +#define DDP_LIST_XLOCK() mtx_lock(&ddp_list_mtx) +#define DDP_LIST_XUNLOCK() mtx_unlock(&ddp_list_mtx) +#define DDP_LIST_XLOCK_ASSERT() mtx_assert(&ddp_list_mtx, MA_OWNED) +#define DDP_LIST_SLOCK() mtx_lock(&ddp_list_mtx) +#define DDP_LIST_SUNLOCK() mtx_unlock(&ddp_list_mtx) +#define DDP_LIST_SLOCK_ASSERT() mtx_assert(&ddp_list_mtx, MA_OWNED) + #endif --- //depot/vendor/freebsd/src/sys/netatalk/ddp_usrreq.c 2004/03/21 20:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_usrreq.c 2004/03/21 21:14:09 @@ -22,6 +22,9 @@ #include #include +/* + * XXXRW: These structures are currently not mutable. + */ static u_long ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */ static u_long ddp_recvspace = 10 * (587 + sizeof(struct sockaddr_at)); @@ -32,36 +35,38 @@ { struct ddpcb *ddp; int error = 0; - int s; + ddp = sotoddpcb(so); + if (ddp != NULL) + return (EINVAL); - ddp = sotoddpcb(so); - if (ddp != NULL) { - return (EINVAL); - } + /* + * Allocate socket buffer space first so that it's present + * before first use. + */ + error = soreserve(so, ddp_sendspace, ddp_recvspace); + if (error) + return (error); - s = splnet(); + DDP_LIST_XLOCK(); error = at_pcballoc(so); - splx(s); - if (error) { - return (error); - } - return (soreserve(so, ddp_sendspace, ddp_recvspace)); + DDP_LIST_XUNLOCK(); + return (error); } static int ddp_detach(struct socket *so) { struct ddpcb *ddp; - int s; ddp = sotoddpcb(so); - if (ddp == NULL) { + if (ddp == NULL) return (EINVAL); - } - s = splnet(); + + DDP_LIST_XLOCK(); + DDP_LOCK(ddp); at_pcbdetach(so, ddp); - splx(s); + DDP_LIST_XUNLOCK(); return (0); } @@ -70,15 +75,16 @@ { struct ddpcb *ddp; int error = 0; - int s; ddp = sotoddpcb(so); if (ddp == NULL) { return (EINVAL); } - s = splnet(); + DDP_LIST_XLOCK(); + DDP_LOCK(ddp); error = at_pcbsetaddr(ddp, nam, td); - splx(s); + DDP_UNLOCK(ddp); + DDP_LIST_XUNLOCK(); return (error); } @@ -87,20 +93,22 @@ { struct ddpcb *ddp; int error = 0; - int s; ddp = sotoddpcb(so); if (ddp == NULL) { return (EINVAL); } + DDP_LIST_XLOCK(); + DDP_LOCK(ddp); if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) { + DDP_UNLOCK(ddp); return (EISCONN); } - s = splnet(); - error = at_pcbconnect(ddp, nam, td); - splx(s); + error = at_pcbconnect( ddp, nam, td ); + DDP_UNLOCK(ddp); + DDP_LIST_XUNLOCK(); if (error == 0) soisconnected(so); return (error); @@ -111,20 +119,20 @@ { struct ddpcb *ddp; - int s; ddp = sotoddpcb(so); if (ddp == NULL) { return (EINVAL); } + DDP_LOCK(ddp); if (ddp->ddp_fsat.sat_addr.s_node == ATADDR_ANYNODE) { + DDP_UNLOCK(ddp); return (ENOTCONN); } - s = splnet(); at_pcbdisconnect(ddp); ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE; - splx(s); + DDP_UNLOCK(ddp); soisdisconnected(so); return (0); } @@ -142,13 +150,19 @@ return (0); } +/* + * XXXRW: If an explicit address is specified, then we temporarily change + * the address on the pcb for sending. This is inefficient because it + * requires us to perform global rather than pcb-local operations. It + * may also create a race if other users of the socket are simultaneously + * sending. + */ static int ddp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct ddpcb *ddp; int error = 0; - int s; ddp = sotoddpcb(so); if (ddp == NULL) { @@ -160,28 +174,29 @@ } if (addr != NULL) { + DDP_LIST_XLOCK(); + DDP_LOCK(ddp); if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) { - return (EISCONN); + error = EISCONN; + goto out; } - s = splnet(); error = at_pcbconnect(ddp, addr, td); - splx(s); - if (error) { - return (error); + if (error == 0) { + error = ddp_output(m, so); + at_pcbdisconnect(ddp); } +out: + DDP_UNLOCK(ddp); + DDP_LIST_XUNLOCK(); } else { - if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT) { - return (ENOTCONN); - } + DDP_LOCK(ddp); + if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT) + error = ENOTCONN; + else + error = ddp_output(m, so); + DDP_UNLOCK(ddp); } - - s = splnet(); - error = ddp_output(m, so); - if (addr != NULL) { - at_pcbdisconnect(ddp); - } - splx(s); return (error); } @@ -189,29 +204,29 @@ ddp_abort(struct socket *so) { struct ddpcb *ddp; - int s; ddp = sotoddpcb(so); if (ddp == NULL) { return (EINVAL); } soisdisconnected(so); - s = splnet(); + DDP_LIST_XLOCK(); + DDP_LOCK(ddp); at_pcbdetach(so, ddp); - splx(s); + DDP_LIST_XUNLOCK(); return (0); } void ddp_init(void) { - atintrq1.ifq_maxlen = IFQ_MAXLEN; atintrq2.ifq_maxlen = IFQ_MAXLEN; aarpintrq.ifq_maxlen = IFQ_MAXLEN; mtx_init(&atintrq1.ifq_mtx, "at1_inq", NULL, MTX_DEF); mtx_init(&atintrq2.ifq_mtx, "at2_inq", NULL, MTX_DEF); mtx_init(&aarpintrq.ifq_mtx, "aarp_inq", NULL, MTX_DEF); + DDP_LIST_LOCK_INIT(); netisr_register(NETISR_ATALK1, at1intr, &atintrq1, 0); netisr_register(NETISR_ATALK2, at2intr, &atintrq2, 0); netisr_register(NETISR_AARP, aarpintr, &aarpintrq, 0); @@ -226,6 +241,7 @@ for (ddp = ddpcb_list; ddp != NULL; ddp = ddp->ddp_next) { at_pcbdetach(ddp->ddp_socket, ddp); } + DDP_LIST_LOCK_DESTROY(); } #endif @@ -244,7 +260,9 @@ if (ddp == NULL) { return (EINVAL); } + DDP_LOCK(ddp); at_sockaddr(ddp, nam); + DDP_UNLOCK(ddp); return (0); } --- //depot/vendor/freebsd/src/sys/netatalk/ddp_var.h 2004/03/21 20:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_var.h 2004/03/21 21:14:09 @@ -13,6 +13,7 @@ struct socket *ddp_socket; struct ddpcb *ddp_prev, *ddp_next; struct ddpcb *ddp_pprev, *ddp_pnext; + struct mtx ddp_mtx; }; #define sotoddpcb(so) ((struct ddpcb *)(so)->so_pcb) @@ -34,5 +35,6 @@ extern int ddp_cksum; extern struct ddpcb *ddpcb_list; extern struct pr_usrreqs ddp_usrreqs; +extern struct mtx ddp_list_mtx; #endif #endif /* _NETATALK_DDP_VAR_H_ */ --- //depot/vendor/freebsd/src/sys/netatm/atm_socket.c 2003/10/31 10:36:05 +++ //depot/user/rwatson/netperf/sys/netatm/atm_socket.c 2004/02/28 14:29:37 @@ -173,6 +173,7 @@ /* * Break links and free control blocks */ + SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); --- //depot/vendor/freebsd/src/sys/netgraph/ng_ksocket.c 2004/01/26 06:10:37 +++ //depot/user/rwatson/netperf/sys/netgraph/ng_ksocket.c 2004/04/04 14:05:41 @@ -1005,6 +1005,9 @@ * before dereferencing the socket pointer. */ +/* + * XXXRW: ng_ksocket_incoming() is called without Giant. Is that OK? + */ static void ng_ksocket_incoming(struct socket *so, void *arg, int waitflag) { --- //depot/vendor/freebsd/src/sys/netgraph/ng_socket.c 2004/01/27 14:05:28 +++ //depot/user/rwatson/netperf/sys/netgraph/ng_socket.c 2004/03/13 19:02:19 @@ -153,6 +153,9 @@ SYSCTL_INT(_net_graph, OID_AUTO, recvspace, CTLFLAG_RW, &ngpdg_recvspace , 0, "Maximum space for incoming Netgraph datagrams"); +/* + * XXXRW: Locking? + */ /* List of all sockets */ static LIST_HEAD(, ngpcb) ngsocklist; --- //depot/vendor/freebsd/src/sys/netinet/if_ether.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/if_ether.c 2004/04/07 20:11:34 @@ -98,6 +98,11 @@ #define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */ }; +/* + * XXXRW: Need to document (and/or fix) locking for this. We always + * seem to hold a lock (and assert) when referencing this list, but it's + * not clear it's always the same lock. + */ static LIST_HEAD(, llinfo_arp) llinfo_arp; static struct ifqueue arpintrq; --- //depot/vendor/freebsd/src/sys/netinet/igmp.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/igmp.c 2004/04/07 20:11:34 @@ -80,10 +80,28 @@ SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW, &igmpstat, igmpstat, ""); +/* + * igmp_mtx protects all mutable global variables in igmp.c, as well as + * the data fields in struct router_info. In general, a router_info + * structure will be valid as long as the referencing struct in_multi is + * valid, so no reference counting is used. We allow unlocked reads of + * router_info data when accessed via an in_multi read-only. + */ +static struct mtx igmp_mtx; static SLIST_HEAD(, router_info) router_info_head; static int igmp_timers_are_running; + +/* + * XXXRW: can we define these such that these can be made const? In any + * case, these shouldn't be changed after igmp_init() and therefore don't + * need locking. + */ static u_long igmp_all_hosts_group; static u_long igmp_all_rtrs_group; + +/* + * XXXRW: These variables make me vaguely nervous. + */ static struct mbuf *router_alert; static struct route igmprt; @@ -108,6 +126,7 @@ /* * Construct a Router Alert option to use in outgoing packets + * XXXRW: This might actually need a MAC label. */ MGET(router_alert, M_DONTWAIT, MT_DATA); ra = mtod(router_alert, struct ipoption *); @@ -118,6 +137,7 @@ ra->ipopt_list[3] = 0x00; router_alert->m_len = sizeof(ra->ipopt_dst) + ra->ipopt_list[1]; + mtx_init(&igmp_mtx, "igmp_mtx", NULL, MTX_DEF); SLIST_INIT(&router_info_head); } @@ -126,6 +146,7 @@ { struct router_info *rti; + mtx_assert(&igmp_mtx, MA_OWNED); IGMP_PRINTF("[igmp.c, _find_rti] --> entering \n"); SLIST_FOREACH(rti, &router_info_head, rti_list) { if (rti->rti_ifp == ifp) { @@ -134,6 +155,9 @@ return rti; } } + /* + * XXXRW: return value of malloc not checked, despite M_NOWAIT. + */ MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT); rti->rti_ifp = ifp; rti->rti_type = IGMP_V2_ROUTER; @@ -197,7 +221,6 @@ timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; - rti = find_rti(ifp); /* * In the IGMPv2 specification, there are 3 states and a flag. @@ -224,8 +247,11 @@ * value in RFC 1112. */ + mtx_lock(&igmp_mtx); + rti = find_rti(ifp); rti->rti_type = IGMP_V1_ROUTER; rti->rti_time = 0; + mtx_unlock(&igmp_mtx); timer = IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ; @@ -344,7 +370,9 @@ inm->inm_timer = 0; inm->inm_state = IGMP_OTHERMEMBER; } else { + mtx_lock(&igmp_mtx); inm->inm_rti = find_rti(inm->inm_ifp); + mtx_unlock(&igmp_mtx); igmp_sendpkt(inm, inm->inm_rti->rti_type, 0); inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ); @@ -404,6 +432,7 @@ struct router_info *rti; IGMP_PRINTF("[igmp.c,_slowtimo] -- > entering \n"); + mtx_lock(&igmp_mtx); SLIST_FOREACH(rti, &router_info_head, rti_list) { if (rti->rti_type == IGMP_V1_ROUTER) { rti->rti_time++; @@ -411,6 +440,7 @@ rti->rti_type = IGMP_V2_ROUTER; } } + mtx_unlock(&igmp_mtx); IGMP_PRINTF("[igmp.c,_slowtimo] -- > exiting \n"); splx(s); } --- //depot/vendor/freebsd/src/sys/netinet/in_gif.c 2003/10/29 07:10:52 +++ //depot/user/rwatson/netperf/sys/netinet/in_gif.c 2004/03/09 15:48:47 @@ -174,6 +174,9 @@ } bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); + /* + * XXXRW: locking of gif's softc. + */ if (dst->sin_family != sin_dst->sin_family || dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) { /* cache route doesn't match */ @@ -320,6 +323,10 @@ case 0: case 127: case 255: return 0; } + + /* + * XXXRW: Lock in_ifaddrhead walking. + */ /* reject packets with broadcast on source */ TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) @@ -328,6 +335,7 @@ return 0; } + /* XXXRW: unlocked read. */ /* ingress filters on outer source */ if ((sc->gif_if.if_flags & IFF_LINK2) == 0 && ifp) { struct sockaddr_in sin; @@ -383,6 +391,11 @@ in_gif_attach(sc) struct gif_softc *sc; { + + /* + * XXXRW: Technically, NULL can also be returned for ENOMEM, + * not just EEXIST. + */ sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck, &in_gif_protosw, sc); if (sc->encap_cookie4 == NULL) --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/in_pcb.c 2004/04/07 20:11:34 @@ -683,6 +683,7 @@ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { + SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); } --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.h 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/in_pcb.h 2004/04/07 20:11:34 @@ -244,9 +244,14 @@ #define INP_LOCK(inp) mtx_lock(&(inp)->inp_mtx) #define INP_UNLOCK(inp) mtx_unlock(&(inp)->inp_mtx) #ifndef INET6 -#define INP_LOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_OWNED) +#define INP_LOCK_ASSERT(inp) do { \ + mtx_assert(&(inp)->inp_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) #else -#define INP_LOCK_ASSERT(inp) +#define INP_LOCK_ASSERT(inp) do { \ + NET_ASSERT_GIANT(); \ +} while (0) #endif #define INP_INFO_LOCK_INIT(ipi, d) \ @@ -256,11 +261,21 @@ #define INP_INFO_RUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #ifndef INET6 -#define INP_INFO_RLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) -#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) +#define INP_INFO_RLOCK_ASSERT(ipi) do { \ + mtx_assert(&(ipi)->ipi_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) +#define INP_INFO_WLOCK_ASSERT(ipi) do { \ + mtx_assert(&(ipi)->ipi_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) #else -#define INP_INFO_RLOCK_ASSERT(ipi) -#define INP_INFO_WLOCK_ASSERT(ipi) +#define INP_INFO_RLOCK_ASSERT(ipi) do { \ + NET_ASSERT_GIANT(); \ +} while (0) +#define INP_INFO_WLOCK_ASSERT(ipi) do { \ + NET_ASSERT_GIANT(); \ +} while (0) #endif #define INP_PCBHASH(faddr, lport, fport, mask) \ --- //depot/vendor/freebsd/src/sys/netinet/in_proto.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/in_proto.c 2004/04/07 20:11:34 @@ -178,7 +178,7 @@ { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR|PR_LASTHDR, encap4_input, 0, 0, rip_ctloutput, 0, - encap_init, 0, 0, 0, + encap_init, 0, 0, 0, &rip_usrreqs }, { SOCK_RAW, &inetdomain, IPPROTO_MOBILE, PR_ATOMIC|PR_ADDR|PR_LASTHDR, --- //depot/vendor/freebsd/src/sys/netinet/ip_divert.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/ip_divert.c 2004/04/07 20:11:34 @@ -219,20 +219,6 @@ sizeof(divsrc.sin_zero)); } - /* - * XXX sbappendaddr must be protected by Giant until - * we have locking at the socket layer. When entered - * from below we come in w/o Giant and must take it - * here. Unfortunately we cannot tell whether we're - * entering from above (already holding Giant), - * below (potentially without Giant), or otherwise - * (e.g. from tcp_syncache through a timeout) so we - * have to grab it regardless. This causes a LOR with - * the tcp lock, at least, and possibly others. For - * the moment we're ignoring this. Once sockets are - * locked this cruft can be removed. - */ - mtx_lock(&Giant); /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)divert_info(mtag)); @@ -254,7 +240,6 @@ INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&divcbinfo); - mtx_unlock(&Giant); if (sa == NULL) { m_freem(m); ipstat.ips_noproto++; --- //depot/vendor/freebsd/src/sys/netinet/ip_dummynet.c 2004/03/02 17:35:21 +++ //depot/user/rwatson/netperf/sys/netinet/ip_dummynet.c 2004/03/03 20:00:06 @@ -171,7 +171,10 @@ #define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx) #define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx) #define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) -#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) +#define DUMMYNET_LOCK_ASSERT() do { \ + mtx_assert(&dummynet_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) static int config_pipe(struct dn_pipe *p); static int ip_dn_ctl(struct sockopt *sopt); --- //depot/vendor/freebsd/src/sys/netinet/ip_encap.c 2004/03/09 18:50:38 +++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.c 2004/03/09 19:47:45 @@ -1,4 +1,4 @@ -/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.19 2004/03/10 02:48:50 rwatson Exp $ */ +/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.18 2003/06/01 09:20:38 phk Exp $ */ /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ /* @@ -106,8 +106,7 @@ LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); /* - * We currently keey encap_init() for source code compatibility reasons -- - * it's referenced by KAME pieces in netinet6. + * XXXRW: encap_init() was entirely useless, so I deleted it. */ void encap_init() @@ -185,6 +184,10 @@ } mtx_unlock(&encapmtx); + /* + * XXXRW: Need drain mechanism to prevent the encapsulation + * entry from being released while in use. + */ if (match) { /* found a match, "match" has the best one */ psw = match->psw; @@ -255,6 +258,10 @@ } mtx_unlock(&encapmtx); + /* + * XXXRW: Need drain mechanism so the encap entry isn't freed + * while in use. + */ if (match) { /* found a match */ psw = (const struct ip6protosw *)match->psw; --- //depot/vendor/freebsd/src/sys/netinet/ip_encap.h 2002/03/19 13:30:39 +++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.h 2004/03/11 22:46:13 @@ -35,6 +35,15 @@ #ifdef _KERNEL +/* + * This structure is entirely static after registration, and other than + * its entry in the encapsulation table, requires no locking. The chain + * field is locked using the global encapmtx. + * + * XXXRW: Need to add a refcount/drain mechanism so that encapsulation + * entries can't be removed while in use. This likely requires a + * refcount and cv to wait for it to drain, or an sx lock. + */ struct encaptab { LIST_ENTRY(encaptab) chain; int af; --- //depot/vendor/freebsd/src/sys/netinet/ip_fastfwd.c 2004/02/25 11:55:45 +++ //depot/user/rwatson/netperf/sys/netinet/ip_fastfwd.c 2004/02/28 14:29:37 @@ -609,6 +609,7 @@ sizeof(struct sockaddr_in *), M_NOWAIT); if (mtag == NULL) { + /* XXX statistic */ if (ro.ro_rt) RTFREE(ro.ro_rt); goto drop; --- //depot/vendor/freebsd/src/sys/netinet/ip_fw2.c 2004/02/25 11:55:45 +++ //depot/user/rwatson/netperf/sys/netinet/ip_fw2.c 2004/02/29 22:51:24 @@ -122,7 +122,10 @@ #define IPFW_LOCK_DESTROY(_chain) mtx_destroy(&(_chain)->mtx) #define IPFW_LOCK(_chain) mtx_lock(&(_chain)->mtx) #define IPFW_UNLOCK(_chain) mtx_unlock(&(_chain)->mtx) -#define IPFW_LOCK_ASSERT(_chain) mtx_assert(&(_chain)->mtx, MA_OWNED) +#define IPFW_LOCK_ASSERT(_chain) do { \ + mtx_assert(&(_chain)->mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) /* * list of rules for layer 3 @@ -1296,7 +1299,8 @@ } static int -check_uidgid(ipfw_insn_u32 *insn, +check_uidgid(struct ip_fw_chain *chain, + ipfw_insn_u32 *insn, int proto, struct ifnet *oif, struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, u_int16_t src_port) @@ -1317,7 +1321,10 @@ match = 0; - INP_INFO_RLOCK(pi); /* XXX LOR with IPFW */ + /* NB: reorder to avoid LOR between IPFW and inp */ + IPFW_UNLOCK(chain); + INP_INFO_RLOCK(pi); + IPFW_LOCK(chain); pcb = (oif) ? in_pcblookup_hash(pi, dst_ip, htons(dst_port), @@ -1657,7 +1664,7 @@ break; if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) - match = check_uidgid( + match = check_uidgid(chain, (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, --- //depot/vendor/freebsd/src/sys/netinet/ip_id.c 2004/02/25 19:55:40 +++ //depot/user/rwatson/netperf/sys/netinet/ip_id.c 2004/03/11 19:03:57 @@ -79,6 +79,9 @@ 2729 }; +/* + * XXXRW: Locking? + */ static u_int16_t ru_x; static u_int16_t ru_seed, ru_seed2; static u_int16_t ru_a, ru_b; --- //depot/vendor/freebsd/src/sys/netinet/ip_input.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/ip_input.c 2004/04/07 20:11:34 @@ -927,8 +927,10 @@ /* attach next hop info for TCP */ struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); - if (mtag == NULL) + if (mtag == NULL) { + /* XXX statistic */ goto bad; + } *(struct sockaddr_in **)(mtag+1) = args.next_hop; m_tag_prepend(m, mtag); } @@ -1850,6 +1852,7 @@ struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); if (mtag == NULL) { + /* XXX statistic */ m_freem(m); return; } --- //depot/vendor/freebsd/src/sys/netinet/ip_mroute.c 2004/03/07 23:51:05 +++ //depot/user/rwatson/netperf/sys/netinet/ip_mroute.c 2004/03/09 11:24:20 @@ -103,7 +103,10 @@ static struct mtx mfc_mtx; #define MFC_LOCK() mtx_lock(&mfc_mtx) #define MFC_UNLOCK() mtx_unlock(&mfc_mtx) -#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) +#define MFC_LOCK_ASSERT() do { \ + mtx_assert(&mfc_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) #define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF) #define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) @@ -1303,13 +1306,10 @@ socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) { if (s) { - mtx_lock(&Giant); /* XXX until sockets are locked */ if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { sorwakeup(s); - mtx_unlock(&Giant); return 0; } - mtx_unlock(&Giant); } m_freem(mm); return -1; --- //depot/vendor/freebsd/src/sys/netinet/ip_output.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/ip_output.c 2004/04/07 20:11:34 @@ -157,6 +157,12 @@ M_ASSERTPKTHDR(m); + /* + * When packet comes from dummynet restore state from + * previous processing instead of the header. Yech! + * + * XXX add conditional compilation? + */ args.next_hop = ip_claim_next_hop(m); dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (dummytag != NULL) { @@ -177,6 +183,12 @@ ifp = dt->ifp; } +#ifdef IPSEC + /* XXXRW: so is not defined -- merge error from netperf+sockets? */ + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); +#endif /*IPSEC*/ + if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); @@ -871,6 +883,7 @@ PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); if (mtag == NULL) { + /* XXX statistic */ error = ENOBUFS; goto bad; } @@ -889,6 +902,7 @@ CSUM_IP_CHECKED | CSUM_IP_VALID; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); + /* XXX netisr_queue(NETISR_IP, m); */ ip_input(m); goto done; } --- //depot/vendor/freebsd/src/sys/netinet/raw_ip.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/raw_ip.c 2004/04/07 20:11:34 @@ -86,6 +86,9 @@ * so leave them not initialized and rely on BSS being set to 0. */ +/* + * XXXRW: Locking for mrouter bits? + */ /* The socket used to communicate with the multicast routing daemon. */ struct socket *ip_mrouter; --- //depot/vendor/freebsd/src/sys/netinet/tcp_debug.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_debug.c 2004/04/07 20:11:34 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include --- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_input.c 2004/04/07 20:11:34 @@ -424,7 +424,7 @@ struct tcpopt to; /* options in this segment */ struct rmxp_tao tao; /* our TAO cache entry */ int headlocked = 0; - struct sockaddr_in *next_hop = NULL; + struct sockaddr_in *next_hop; int rstreason; /* For badport_bandlim accounting purposes */ struct ip6_hdr *ip6 = NULL; @@ -1162,6 +1162,7 @@ acked = th->th_ack - tp->snd_una; tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; + SOCKBUF_LOCK(&so->so_snd); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) @@ -1199,7 +1200,9 @@ tp->t_rxtcur, tcp_timer_rexmt, tp); - sowwakeup(so); + sowwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_snd); + /* Unlocked read. */ if (so->so_snd.sb_cc) (void) tcp_output(tp); goto check_delack; @@ -2089,6 +2092,7 @@ incr = incr * incr / cw; tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<snd_scale); } + SOCKBUF_LOCK(&so->so_snd); if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); @@ -2098,7 +2102,8 @@ tp->snd_wnd -= acked; ourfinisacked = 0; } - sowwakeup(so); + sowwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_snd); /* detect una wraparound */ if (tcp_do_newreno && !IN_FASTRECOVERY(tp) && SEQ_GT(tp->snd_una, tp->snd_recover) && @@ -2214,6 +2219,7 @@ * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ + /* Unlocked read. */ if (th->th_urp + so->so_rcv.sb_cc > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ @@ -2233,6 +2239,8 @@ * of data past the urgent section as the original * spec states (in one of two places). */ + /* Unlocked read of sb_cc. */ + /* XXXRW: Unlocked assignment of so_oobmark, so_state. */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; so->so_oobmark = so->so_rcv.sb_cc + @@ -2925,6 +2933,10 @@ } tp->t_maxseg = mss; + /* + * XXXRW: read-modify-write on socket buffer without acquiring + * the socket buffer lock. + */ if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe) bufsize = metrics.rmx_recvpipe; else --- //depot/vendor/freebsd/src/sys/netinet/tcp_output.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_output.c 2004/04/07 20:11:34 @@ -210,6 +210,7 @@ * to send then the probe will be the FIN * itself. */ + /* Unlocked read of sb_cc. */ if (off < so->so_snd.sb_cc) flags &= ~TH_FIN; sendwin = 1; @@ -231,6 +232,7 @@ * be set to snd_una, the offset will be 0, and the length may * wind up 0. */ + /* Unlocked read of sb_cc. */ len = (long)ulmin(so->so_snd.sb_cc, sendwin) - off; @@ -292,6 +294,7 @@ len = tp->t_maxseg; sendalot = 1; } + /* Unlocked read of sb_cc. */ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; @@ -319,6 +322,7 @@ * * note: the len + off check is almost certainly unnecessary. */ + /* Unlocked read of sb_cc. */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && len + off >= so->so_snd.sb_cc && @@ -397,6 +401,7 @@ * if window is nonzero, transmit what we can, * otherwise force out a byte. */ + /* Unlocked read of sb_cc. */ if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) && !callout_active(tp->tt_persist)) { tp->t_rxtshift = 0; @@ -664,6 +669,7 @@ * give data to the user when a buffer fills or * a PUSH comes in.) */ + /* Unlocked read of sb_cc. */ if (off + len == so->so_snd.sb_cc) flags |= TH_PUSH; } else { --- //depot/vendor/freebsd/src/sys/netinet/tcp_subr.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_subr.c 2004/04/07 20:11:34 @@ -557,6 +557,7 @@ #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ + int callout_flag; tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO); if (tm == NULL) @@ -570,11 +571,17 @@ tcp_mssdflt; /* Set up our timeouts. */ - callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0); - callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0); - callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0); - callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0); - callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0); + /* + * XXXRW: Are these actually MPSAFE? I think so, but need to + * review the timed wait code, as it has some list variables, + * etc, that are global. + */ + callout_flag = debug_mpsafenet ? CALLOUT_MPSAFE : 0; + callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, callout_flag); + callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, callout_flag); + callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, callout_flag); + callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, callout_flag); + callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, callout_flag); if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); @@ -1539,7 +1546,7 @@ /* * Move a TCP connection into TIME_WAIT state. - * tcbinfo is unlocked. + * tcbinfo is locked. * inp is locked, and is unlocked before returning. */ void @@ -1551,6 +1558,11 @@ int tw_time, acknow; struct socket *so; + INP_INFO_WLOCK_ASSERT(&tcbinfo); +#if 0 + INP_LOCK_ASSERT(tp); +#endif + tw = uma_zalloc(tcptw_zone, M_NOWAIT); if (tw == NULL) { tw = tcp_timer_2msl_tw(1); @@ -1601,13 +1613,19 @@ } tcp_discardcb(tp); so = inp->inp_socket; + SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); tw->tw_so_options = so->so_options; + sotryfree(so); /* NB: drops lock */ + inp->inp_socket = NULL; if (acknow) tcp_twrespond(tw, TH_ACK); +#if 0 + /* XXXRW: Sam removed this, need to check why. */ sotryfree(so); inp->inp_socket = NULL; +#endif inp->inp_ppcb = (caddr_t)tw; inp->inp_vflag |= INP_TIMEWAIT; tcp_timer_2msl_reset(tw, tw_time); @@ -1683,6 +1701,8 @@ int isipv6 = inp->inp_inc.inc_isipv6; #endif + INP_LOCK_ASSERT(inp); + m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return (ENOBUFS); --- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.c 2004/03/27 13:10:42 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_syncache.c 2004/03/28 07:24:45 @@ -540,7 +540,7 @@ struct socket *so; struct tcpcb *tp; - GIANT_REQUIRED; /* XXX until socket locking */ + NET_ASSERT_GIANT(); INP_INFO_WLOCK_ASSERT(&tcbinfo); /* --- //depot/vendor/freebsd/src/sys/netinet/tcp_timer.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_timer.c 2004/04/07 20:11:34 @@ -269,6 +269,9 @@ } } +/* + * XXXRW: This doesn't look MPSAFE. + */ void tcp_timer_2msl_reset(struct tcptw *tw, int timeo) { @@ -283,6 +286,9 @@ LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl); } +/* + * XXXRW: This doesn't look MPSAFE. + */ void tcp_timer_2msl_stop(struct tcptw *tw) { @@ -291,6 +297,9 @@ LIST_REMOVE(tw, tw_2msl); } +/* + * XXXRW: This doesn't look MPSAFE. + */ struct tcptw * tcp_timer_2msl_tw(int reuse) { --- //depot/vendor/freebsd/src/sys/netinet/tcp_usrreq.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_usrreq.c 2004/04/07 20:11:34 @@ -116,7 +116,6 @@ static int tcp_usr_attach(struct socket *so, int proto, struct thread *td) { - int s = splnet(); int error; struct inpcb *inp; struct tcpcb *tp = 0; @@ -142,11 +141,71 @@ out: TCPDEBUG2(PRU_ATTACH); INP_INFO_WUNLOCK(&tcbinfo); - splx(s); return error; } /* + * Common code to setup and teardown locking. Most + * code begins with a COMMON_START macro and finishes + * with COMMON_END. You indicate whether the inpcb + * and enclosing head are to be locked read or write + * and whether there is an existing sockbuf lock that + * needs to be re-ordered. + */ +#define INI_NOLOCK 0 /* no head lock */ +#define INI_READ 1 /* read head lock */ +#define INI_WRITE 2 /* write head lock */ +#define SBI_NONE 0 /* no sockbuf lock to reorder */ +#define SBI_SND 1 /* reorder so->so_snd lock */ +#define SBI_RCV 2 /* reorder so->so_rcv lock */ + +#define COMMON_START0(_headrw, _sbrw) do { \ + if (_sbrw == SBI_SND) \ + SOCKBUF_UNLOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_UNLOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RLOCK(&tcbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WLOCK(&tcbinfo); \ + inp = sotoinpcb(so); \ + if (inp == 0) { \ + if (_sbrw == SBI_SND) \ + SOCKBUF_LOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_LOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&tcbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&tcbinfo); \ + return EINVAL; \ + } \ + INP_LOCK(inp); \ + if (_sbrw == SBI_SND) \ + SOCKBUF_LOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_LOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&tcbinfo); \ + tp = intotcpcb(inp); \ + TCPDEBUG1(); \ +} while(0) + +#define COMMON_START(_headrw, _sbrw) do { \ + TCPDEBUG0; \ + COMMON_START0(_headrw, _sbrw); \ +} while (0) + +#define COMMON_END(_headrw, req) \ + TCPDEBUG2(req); \ + do { \ + if (tp) \ + INP_UNLOCK(inp); \ + if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&tcbinfo); \ + } while(0) + +/* * pru_detach() detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a pru_disconnect(), @@ -156,83 +215,26 @@ static int tcp_usr_detach(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - TCPDEBUG0; - INP_INFO_WLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&tcbinfo); - splx(s); - return EINVAL; /* XXX */ - } - INP_LOCK(inp); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START(INI_WRITE, SBI_NONE); tp = tcp_disconnect(tp); - - TCPDEBUG2(PRU_DETACH); - if (tp) - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&tcbinfo); - splx(s); + COMMON_END(INI_WRITE, PRU_DETACH); return error; } -#define INI_NOLOCK 0 -#define INI_READ 1 -#define INI_WRITE 2 - -#define COMMON_START() \ - TCPDEBUG0; \ - do { \ - if (inirw == INI_READ) \ - INP_INFO_RLOCK(&tcbinfo); \ - else if (inirw == INI_WRITE) \ - INP_INFO_WLOCK(&tcbinfo); \ - inp = sotoinpcb(so); \ - if (inp == 0) { \ - if (inirw == INI_READ) \ - INP_INFO_RUNLOCK(&tcbinfo); \ - else if (inirw == INI_WRITE) \ - INP_INFO_WUNLOCK(&tcbinfo); \ - splx(s); \ - return EINVAL; \ - } \ - INP_LOCK(inp); \ - if (inirw == INI_READ) \ - INP_INFO_RUNLOCK(&tcbinfo); \ - tp = intotcpcb(inp); \ - TCPDEBUG1(); \ -} while(0) - -#define COMMON_END(req) \ -out: TCPDEBUG2(req); \ - do { \ - if (tp) \ - INP_UNLOCK(inp); \ - if (inirw == INI_WRITE) \ - INP_INFO_WUNLOCK(&tcbinfo); \ - splx(s); \ - return error; \ - goto out; \ -} while(0) - /* * Give the socket an address. */ static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; - const int inirw = INI_WRITE; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) @@ -245,23 +247,20 @@ IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); error = in_pcbbind(inp, nam, td->td_ucred); - if (error) - goto out; - COMMON_END(PRU_BIND); + COMMON_END(INI_WRITE, PRU_BIND); + return error; } #ifdef INET6 static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; - const int inirw = INI_WRITE; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) @@ -274,7 +273,7 @@ IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { @@ -292,9 +291,9 @@ } } error = in6_pcbbind(inp, nam, td->td_ucred); - if (error) - goto out; - COMMON_END(PRU_BIND); +out: + COMMON_END(INI_WRITE, PRU_BIND); + return error; } #endif /* INET6 */ @@ -304,31 +303,28 @@ static int tcp_usr_listen(struct socket *so, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); if (inp->inp_lport == 0) error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error == 0) tp->t_state = TCPS_LISTEN; - COMMON_END(PRU_LISTEN); + COMMON_END(INI_WRITE, PRU_LISTEN); + return error; } #ifdef INET6 static int tcp6_usr_listen(struct socket *so, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); if (inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) @@ -337,7 +333,8 @@ } if (error == 0) tp->t_state = TCPS_LISTEN; - COMMON_END(PRU_LISTEN); + COMMON_END(INI_WRITE, PRU_LISTEN); + return error; } #endif /* INET6 */ @@ -351,12 +348,10 @@ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; - const int inirw = INI_WRITE; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) @@ -370,23 +365,23 @@ if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); - COMMON_END(PRU_CONNECT); +out: + COMMON_END(INI_WRITE, PRU_CONNECT); + return error; } #ifdef INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; - const int inirw = INI_WRITE; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) @@ -398,7 +393,7 @@ && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; @@ -421,7 +416,9 @@ if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); - COMMON_END(PRU_CONNECT); +out: + COMMON_END(INI_WRITE, PRU_CONNECT); + return error; } #endif /* INET6 */ @@ -439,15 +436,14 @@ static int tcp_usr_disconnect(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); tp = tcp_disconnect(tp); - COMMON_END(PRU_DISCONNECT); + COMMON_END(INI_WRITE, PRU_DISCONNECT); + return error; } /* @@ -458,7 +454,6 @@ static int tcp_usr_accept(struct socket *so, struct sockaddr **nam) { - int s; int error = 0; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; @@ -468,34 +463,21 @@ if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; - goto out; + goto out; /* NB: ok 'cuz tp is NULL */ } - s = splnet(); - INP_INFO_RLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (!inp) { - INP_INFO_RUNLOCK(&tcbinfo); - splx(s); - return (EINVAL); - } - INP_LOCK(inp); - INP_INFO_RUNLOCK(&tcbinfo); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START0(INI_READ, SBI_NONE); /* - * We inline in_setpeeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. + * We inline in_setpeeraddr so that we can copy the + * data of interest and defer the malloc until after + * we release the lock. */ port = inp->inp_fport; addr = inp->inp_faddr; -out: TCPDEBUG2(PRU_ACCEPT); - if (tp) - INP_UNLOCK(inp); - splx(s); +out: + COMMON_END(INI_READ, PRU_ACCEPT); if (error == 0) *nam = in_sockaddr(port, &addr); return error; @@ -505,7 +487,6 @@ static int tcp6_usr_accept(struct socket *so, struct sockaddr **nam) { - int s; struct inpcb *inp = NULL; int error = 0; struct tcpcb *tp = NULL; @@ -517,25 +498,14 @@ if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; - goto out; + goto out; /* NB: ok 'cuz tp is NULL */ } - s = splnet(); - INP_INFO_RLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_RUNLOCK(&tcbinfo); - splx(s); - return (EINVAL); - } - INP_LOCK(inp); - INP_INFO_RUNLOCK(&tcbinfo); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START0(INI_READ, SBI_NONE); /* - * We inline in6_mapped_peeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. + * We inline in6_mapped_peeraddr so that we can + * copy the data of interest and defer the malloc + * until after we release the lock. */ if (inp->inp_vflag & INP_IPV4) { v4 = 1; @@ -546,10 +516,8 @@ addr6 = inp->in6p_faddr; } -out: TCPDEBUG2(PRU_ACCEPT); - if (tp) - INP_UNLOCK(inp); - splx(s); +out: + COMMON_END(INI_READ, PRU_ACCEPT); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); @@ -587,18 +555,17 @@ static int tcp_usr_shutdown(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); socantsendmore(so); tp = tcp_usrclosed(tp); if (tp) error = tcp_output(tp); - COMMON_END(PRU_SHUTDOWN); + COMMON_END(INI_WRITE, PRU_SHUTDOWN); + return error; } /* @@ -607,15 +574,14 @@ static int tcp_usr_rcvd(struct socket *so, int flags) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_READ; - COMMON_START(); + COMMON_START(INI_READ, SBI_RCV); tcp_output(tp); - COMMON_END(PRU_RCVD); + COMMON_END(INI_READ, PRU_RCVD); + return error; } /* @@ -629,11 +595,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; #ifdef INET6 int isipv6; #endif @@ -645,9 +609,11 @@ * We really want to have to this function upgrade from read lock * to write lock. XXX */ + SOCKBUF_UNLOCK(&so->so_snd); INP_INFO_WLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { + SOCKBUF_LOCK(&so->so_snd); /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a @@ -663,6 +629,7 @@ goto out; } INP_LOCK(inp); + SOCKBUF_LOCK(&so->so_snd); #ifdef INET6 isipv6 = nam && nam->sa_family == AF_INET6; #endif /* INET6 */ @@ -748,13 +715,16 @@ tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } + /* Unlocked read of sb_cc. */ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); tp->t_force = 0; } - COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : +out: + COMMON_END(INI_WRITE, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); + return error; } /* @@ -763,15 +733,14 @@ static int tcp_usr_abort(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE, SBI_NONE); tp = tcp_drop(tp, ECONNABORTED); - COMMON_END(PRU_ABORT); + COMMON_END(INI_WRITE, PRU_ABORT); + return error; } /* @@ -780,13 +749,11 @@ static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { - int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_READ; - COMMON_START(); + COMMON_START(INI_READ, SBI_NONE); if ((so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || @@ -802,7 +769,9 @@ *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); - COMMON_END(PRU_RCVOOB); +out: + COMMON_END(INI_READ, PRU_RCVOOB); + return error; } /* xxx - should be const */ @@ -1021,17 +990,15 @@ struct socket *so; struct sockopt *sopt; { - int error, opt, optval, s; + int error, opt, optval; struct inpcb *inp; struct tcpcb *tp; error = 0; - s = splnet(); /* XXX */ INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { INP_INFO_RUNLOCK(&tcbinfo); - splx(s); return (ECONNRESET); } INP_LOCK(inp); @@ -1044,7 +1011,6 @@ #endif /* INET6 */ error = ip_ctloutput(so, sopt); INP_UNLOCK(inp); - splx(s); return (error); } tp = intotcpcb(inp); @@ -1151,7 +1117,6 @@ break; } INP_UNLOCK(inp); - splx(s); return (error); } @@ -1238,7 +1203,9 @@ tp = tcp_drop(tp, 0); else { soisdisconnecting(so); + SOCKBUF_LOCK(&so->so_rcv); sbflush(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_rcv); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); @@ -1291,4 +1258,3 @@ } return (tp); } - --- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/udp_usrreq.c 2004/04/07 20:11:34 @@ -897,24 +897,67 @@ SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); +/* + * Common code to setup and teardown locking. Most + * code begins with a COMMON_START macro and finishes + * with COMMON_END. You indicate whether the inpcb + * and enclosing head are to be locked read or write + * and whether there is an existing sockbuf lock that + * needs to be re-ordered. + */ +#define INI_NOLOCK 0 /* no head lock */ +#define INI_READ 1 /* read head lock */ +#define INI_WRITE 2 /* write head lock */ +#define SBI_NONE 0 /* no sockbuf lock to reorder */ +#define SBI_SND 1 /* reorder so->so_snd lock */ +#define SBI_RCV 2 /* reorder so->so_rcv lock */ + +#define COMMON_START(_headrw, _sbrw) do { \ + if (_sbrw == SBI_SND) \ + SOCKBUF_UNLOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_UNLOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RLOCK(&udbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WLOCK(&udbinfo); \ + inp = sotoinpcb(so); \ + if (inp == 0) { \ + if (_sbrw == SBI_SND) \ + SOCKBUF_LOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_LOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&udbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&udbinfo); \ + return EINVAL; \ + } \ + INP_LOCK(inp); \ + if (_sbrw == SBI_SND) \ + SOCKBUF_LOCK(&so->so_snd); \ + else if (_sbrw == SBI_RCV) \ + SOCKBUF_LOCK(&so->so_rcv); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&udbinfo); \ +} while(0) + +#define COMMON_END(_headrw) \ + do { \ + INP_UNLOCK(inp); \ + if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&udbinfo); \ + } while(0) + static int udp_abort(struct socket *so) { struct inpcb *inp; - int s; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbinfo); - return EINVAL; /* ??? possible? panic instead? */ - } - INP_LOCK(inp); + COMMON_START(INI_WRITE, SBI_NONE); soisdisconnected(so); - s = splnet(); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); - splx(s); return 0; } @@ -956,20 +999,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; - int s, error; + int error; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbinfo); - return EINVAL; - } - INP_LOCK(inp); - s = splnet(); + COMMON_START(INI_WRITE, SBI_NONE); error = in_pcbbind(inp, nam, td->td_ucred); - splx(s); - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&udbinfo); + COMMON_END(INI_WRITE); return error; } @@ -977,31 +1011,22 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; - int s, error; + int error; struct sockaddr_in *sin; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbinfo); - return EINVAL; - } - INP_LOCK(inp); + COMMON_START(INI_WRITE, SBI_NONE); if (inp->inp_faddr.s_addr != INADDR_ANY) { - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&udbinfo); - return EISCONN; + error = EISCONN; + goto out; } - s = splnet(); sin = (struct sockaddr_in *)nam; if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); error = in_pcbconnect(inp, nam, td->td_ucred); - splx(s); if (error == 0) soisconnected(so); - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&udbinfo); +out: + COMMON_END(INI_WRITE); return error; } @@ -1009,49 +1034,31 @@ udp_detach(struct socket *so) { struct inpcb *inp; - int s; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbinfo); - return EINVAL; - } - INP_LOCK(inp); - s = splnet(); + COMMON_START(INI_WRITE, SBI_NONE); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); - splx(s); return 0; } static int udp_disconnect(struct socket *so) { + int error = 0; struct inpcb *inp; - int s; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbinfo); - return EINVAL; - } - INP_LOCK(inp); + COMMON_START(INI_WRITE, SBI_NONE); if (inp->inp_faddr.s_addr == INADDR_ANY) { - INP_INFO_WUNLOCK(&udbinfo); - INP_UNLOCK(inp); - return ENOTCONN; + error = ENOTCONN; + goto out; } - s = splnet(); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&udbinfo); - splx(s); + COMMON_END(INI_WRITE); so->so_state &= ~SS_ISCONNECTED; /* XXX */ - return 0; +out: + return error; } static int @@ -1059,20 +1066,23 @@ struct mbuf *control, struct thread *td) { struct inpcb *inp; - int ret; + int error; + SOCKBUF_UNLOCK(&so->so_snd); INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { + SOCKBUF_LOCK(&so->so_snd); INP_INFO_WUNLOCK(&udbinfo); m_freem(m); return EINVAL; } INP_LOCK(inp); - ret = udp_output(inp, m, addr, control, td); + SOCKBUF_LOCK(&so->so_snd); + error = udp_output(inp, m, addr, control, td); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); - return ret; + return error; } int @@ -1080,16 +1090,9 @@ { struct inpcb *inp; - INP_INFO_RLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_RUNLOCK(&udbinfo); - return EINVAL; - } - INP_LOCK(inp); - INP_INFO_RUNLOCK(&udbinfo); + COMMON_START(INI_READ, SBI_NONE); socantsendmore(so); - INP_UNLOCK(inp); + COMMON_END(INI_READ); return 0; } --- //depot/vendor/freebsd/src/sys/netinet6/in6_gif.c 2003/10/29 07:10:52 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_gif.c 2004/03/09 15:48:47 @@ -82,6 +82,11 @@ &rip6_usrreqs }; +/* + * XXXRW: in6_gif per-softc locking required. Need to lock both the + * members, and also prevent the softc from disappearing during use + * including the route). + */ int in6_gif_output(ifp, family, m) struct ifnet *ifp; @@ -379,6 +384,10 @@ in6_gif_attach(sc) struct gif_softc *sc; { + + /* + * XXXRW: Technically, encap_attach() can return NULL due to ENOMEM? + */ sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck, (struct protosw *)&in6_gif_protosw, sc); if (sc->encap_cookie6 == NULL) --- //depot/vendor/freebsd/src/sys/netinet6/in6_ifattach.c 2004/02/25 19:55:40 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_ifattach.c 2004/03/07 10:23:16 @@ -226,8 +226,8 @@ struct sockaddr_dl *sdl; u_int8_t *addr; size_t addrlen; - static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - static u_int8_t allone[8] = + static const u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + static const u_int8_t allone[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; for (ifa = ifp->if_addrlist.tqh_first; --- //depot/vendor/freebsd/src/sys/netinet6/in6_pcb.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_pcb.c 2004/04/07 20:11:34 @@ -438,6 +438,7 @@ in_pcbremlists(inp); if (so) { + SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); } --- //depot/vendor/freebsd/src/sys/netinet6/in6_prefix.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_prefix.c 2004/04/07 20:11:34 @@ -96,6 +96,8 @@ static int link_stray_ia6s __P((struct rr_prefix *rpp)); static void rp_remove __P((struct rr_prefix *rpp)); +static int delete_each_prefix __P((struct rr_prefix *rpp, u_char origin)); + /* * Copy bits from src to tgt, from off bit for len bits. * Caller must specify collect tgtsize and srcsize. @@ -951,7 +953,7 @@ } } -int +static int delete_each_prefix(struct rr_prefix *rpp, u_char origin) { int error = 0; --- //depot/vendor/freebsd/src/sys/netinet6/in6_prefix.h 2001/07/17 00:20:42 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_prefix.h 2004/03/07 11:31:09 @@ -88,4 +88,3 @@ void in6_rr_timer __P((void *)); extern struct callout in6_rr_timer_ch; -int delete_each_prefix __P((struct rr_prefix *rpp, u_char origin)); --- //depot/vendor/freebsd/src/sys/netinet6/in6_proto.c 2004/04/07 13:52:05 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_proto.c 2004/04/07 20:11:34 @@ -67,6 +67,8 @@ #include "opt_random_ip_id.h" #include +#include +#include #include #include #include --- //depot/vendor/freebsd/src/sys/netinet6/in6_rmx.c 2003/11/20 12:10:44 +++ //depot/user/rwatson/netperf/sys/netinet6/in6_rmx.c 2004/02/28 14:29:37 @@ -79,6 +79,8 @@ #include #include #include +#include +#include #include #include #include --- //depot/vendor/freebsd/src/sys/netinet6/nd6.c 2004/01/28 07:06:38 +++ //depot/user/rwatson/netperf/sys/netinet6/nd6.c 2004/03/11 19:06:45 @@ -98,6 +98,9 @@ /* for debugging? */ static int nd6_inuse, nd6_allocated; +/* + * XXXRW: What follows requires locking. + */ struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6}; struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; --- //depot/vendor/freebsd/src/sys/netipx/ipx.c 2003/06/10 22:25:41 +++ //depot/user/rwatson/netperf/sys/netipx/ipx.c 2004/03/18 06:20:12 @@ -50,6 +50,9 @@ #include #include +/* + * XXXRW: Requires synchronization. + */ struct ipx_ifaddr *ipx_ifaddr; static void ipx_ifscrub(struct ifnet *ifp, struct ipx_ifaddr *ia); --- //depot/vendor/freebsd/src/sys/netipx/ipx_input.c 2003/11/08 14:30:39 +++ //depot/user/rwatson/netperf/sys/netipx/ipx_input.c 2004/03/19 02:34:30 @@ -72,23 +72,39 @@ SYSCTL_INT(_net_ipx, OID_AUTO, ipxnetbios, CTLFLAG_RW, &ipxnetbios, 0, ""); -union ipx_net ipx_zeronet; -union ipx_host ipx_zerohost; +const union ipx_net ipx_zeronet; +const union ipx_host ipx_zerohost; -union ipx_net ipx_broadnet; -union ipx_host ipx_broadhost; +const union ipx_net ipx_broadnet = { .s_net[0] = 0xffff, + .s_net[1] = 0xffff }; +const union ipx_host ipx_broadhost = { .s_host[0] = 0xffff, + .s_host[1] = 0xffff, + .s_host[2] = 0xffff }; +/* + * XXXRW: Locking needed here. + */ struct ipxstat ipxstat; + +/* + * XXXRW: These should/could also be const, since they're set only at + * init time. + */ struct sockaddr_ipx ipx_netmask, ipx_hostmask; -static u_short allones[] = {-1, -1, -1}; +/* + * XXXRW: Locking needed here. + */ +u_short ipxpcb_lport_cache; +struct ipxpcbhead ipxpcb_list; +struct ipxpcbhead ipxrawpcb_list; -struct ipxpcb ipxpcb; -struct ipxpcb ipxrawpcb; - static int ipxqmaxlen = IFQ_MAXLEN; static struct ifqueue ipxintrq; +/* + * XXXRW: Locking needed here. + */ long ipx_pexseq; static int ipx_do_route(struct ipx_addr *src, struct route *ro); @@ -103,13 +119,14 @@ void ipx_init() { - ipx_broadnet = *(union ipx_net *)allones; - ipx_broadhost = *(union ipx_host *)allones; read_random(&ipx_pexseq, sizeof ipx_pexseq); - ipxpcb.ipxp_next = ipxpcb.ipxp_prev = &ipxpcb; - ipxrawpcb.ipxp_next = ipxrawpcb.ipxp_prev = &ipxrawpcb; - + LIST_INIT(&ipxpcb_list); + LIST_INIT(&ipxrawpcb_list); + + /* + * XXXRW: These should be const? + */ ipx_netmask.sipx_len = 6; ipx_netmask.sipx_addr.x_net = ipx_broadnet; @@ -133,6 +150,9 @@ struct ipx_ifaddr *ia; int len; + /* + * XXXRW: Would be nice to remove this. + */ GIANT_REQUIRED; /* @@ -153,8 +173,7 @@ /* * Give any raw listeners a crack at the packet */ - for (ipxp = ipxrawpcb.ipxp_next; ipxp != &ipxrawpcb; - ipxp = ipxp->ipxp_next) { + LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) { struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL); if (m1 != NULL) ipx_input(m1, ipxp); @@ -467,8 +486,7 @@ /* * Give any raw listeners a crack at the packet */ - for (ipxp = ipxrawpcb.ipxp_next; ipxp != &ipxrawpcb; - ipxp = ipxp->ipxp_next) { + LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) { struct mbuf *m0 = m_copy(m, 0, (int)M_COPYALL); if (m0 != NULL) { register struct ipx *ipx; --- //depot/vendor/freebsd/src/sys/netipx/ipx_pcb.c 2004/02/29 19:15:33 +++ //depot/user/rwatson/netperf/sys/netipx/ipx_pcb.c 2004/03/19 02:34:30 @@ -56,7 +56,7 @@ int ipx_pcballoc(so, head, td) struct socket *so; - struct ipxpcb *head; + struct ipxpcbhead *head; struct thread *td; { register struct ipxpcb *ipxp; @@ -107,13 +107,16 @@ } ipxp->ipxp_laddr = sipx->sipx_addr; noname: + /* + * XXXRW: I wonder what causes this loop to terminate... + */ if (lport == 0) do { - ipxpcb.ipxp_lport++; - if ((ipxpcb.ipxp_lport < IPXPORT_RESERVED) || - (ipxpcb.ipxp_lport >= IPXPORT_WELLKNOWN)) - ipxpcb.ipxp_lport = IPXPORT_RESERVED; - lport = htons(ipxpcb.ipxp_lport); + ipxpcb_lport_cache++; + if ((ipxpcb_lport_cache < IPXPORT_RESERVED) || + (ipxpcb_lport_cache >= IPXPORT_WELLKNOWN)) + ipxpcb_lport_cache = IPXPORT_RESERVED; + lport = htons(ipxpcb_lport_cache); } while (ipx_pcblookup(&zeroipx_addr, lport, 0)); ipxp->ipxp_lport = lport; return (0); @@ -268,6 +271,7 @@ { struct socket *so = ipxp->ipxp_socket; + SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); if (ipxp->ipxp_route.ro_rt != NULL) @@ -323,18 +327,27 @@ register struct ipxpcb *ipxp, *oinp; int s = splimp(); - for (ipxp = (&ipxpcb)->ipxp_next; ipxp != (&ipxpcb);) { + for (ipxp = LIST_FIRST(&ipxpcb_list); ipxp != NULL;) { if (!ipx_hosteq(*dst,ipxp->ipxp_faddr)) { - next: - ipxp = ipxp->ipxp_next; +next: + ipxp = LIST_NEXT(ipxp, ipxp_list); + } + if (ipxp->ipxp_socket == 0) { + goto next; continue; } - if (ipxp->ipxp_socket == 0) - goto next; if (errno) ipxp->ipxp_socket->so_error = errno; + /* + * XXXRW: I can't find any consumers of this interface, and + * so don't know if calling the notify function could result + * in the ipxp list pointers changing. Before moving this to + * the queue(9) macros, there was some fancy footwork here + * that didn't seem to be useful. If the list can be changed + * by a notification, it will make locking very difficult. + */ oinp = ipxp; - ipxp = ipxp->ipxp_next; + ipxp = LIST_NEXT(ipxp, ipxp_list); oinp->ipxp_notify_param = param; (*notify)(oinp); } @@ -372,7 +385,7 @@ u_short fport; fport = faddr->x_port; - for (ipxp = (&ipxpcb)->ipxp_next; ipxp != (&ipxpcb); ipxp = ipxp->ipxp_next) { + LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) { if (ipxp->ipxp_lport != lport) continue; wildcard = 0; --- //depot/vendor/freebsd/src/sys/netipx/ipx_pcb.h 2003/01/01 10:50:52 +++ //depot/user/rwatson/netperf/sys/netipx/ipx_pcb.h 2004/03/19 02:34:30 @@ -43,9 +43,7 @@ * IPX protocol interface control block. */ struct ipxpcb { - struct ipxpcb *ipxp_next; /* doubly linked list */ - struct ipxpcb *ipxp_prev; - struct ipxpcb *ipxp_head; + LIST_ENTRY(ipxpcb) ipxp_list; /* list of ipxpcbs */ struct socket *ipxp_socket; /* back pointer to socket */ struct ipx_addr ipxp_faddr; /* destination address */ struct ipx_addr ipxp_laddr; /* socket's address */ @@ -58,6 +56,11 @@ u_char ipxp_rpt; /* last received packet type by ipx_input() */ }; +LIST_HEAD(ipxpcbhead, ipxpcb); +extern struct ipxpcbhead ipxpcb_list; +extern struct ipxpcbhead ipxrawpcb_list; +extern u_short ipxpcb_lport_cache; + /* possible flags */ #define IPXP_IN_ABORT 0x1 /* calling abort through socket */ @@ -82,7 +85,7 @@ #ifdef _KERNEL extern struct ipxpcb ipxpcb; /* head of list */ -int ipx_pcballoc(struct socket *so, struct ipxpcb *head, +int ipx_pcballoc(struct socket *so, struct ipxpcbhead *head, struct thread *p); int ipx_pcbbind(struct ipxpcb *ipxp, struct sockaddr *nam, struct thread *p); --- //depot/vendor/freebsd/src/sys/netipx/ipx_usrreq.c 2003/11/17 16:40:43 +++ //depot/user/rwatson/netperf/sys/netipx/ipx_usrreq.c 2004/03/19 02:34:30 @@ -423,8 +423,12 @@ s = splnet(); ipx_pcbdetach(ipxp); splx(s); - sotryfree(so); - soisdisconnected(so); + SOCK_LOCK(so); + if (so->so_count != 0) { + soisdisconnected(so); + SOCK_UNLOCK(so); + } else + sofree(so); return (0); } @@ -441,7 +445,7 @@ if (ipxp != NULL) return (EINVAL); s = splnet(); - error = ipx_pcballoc(so, &ipxpcb, td); + error = ipx_pcballoc(so, &ipxpcb_list, td); splx(s); if (error == 0) error = soreserve(so, ipxsendspace, ipxrecvspace); @@ -602,7 +606,7 @@ if (td != NULL && (error = suser(td)) != 0) return (error); s = splnet(); - error = ipx_pcballoc(so, &ipxrawpcb, td); + error = ipx_pcballoc(so, &ipxrawpcb_list, td); splx(s); if (error) return (error); --- //depot/vendor/freebsd/src/sys/netipx/ipx_var.h 2003/03/04 15:20:46 +++ //depot/user/rwatson/netperf/sys/netipx/ipx_var.h 2004/03/19 02:34:30 @@ -66,19 +66,19 @@ extern int ipxcksum; extern long ipx_pexseq; extern struct ipxstat ipxstat; -extern struct ipxpcb ipxrawpcb; extern struct pr_usrreqs ipx_usrreqs; extern struct pr_usrreqs ripx_usrreqs; extern struct sockaddr_ipx ipx_netmask; extern struct sockaddr_ipx ipx_hostmask; -extern union ipx_net ipx_zeronet; -extern union ipx_host ipx_zerohost; -extern union ipx_net ipx_broadnet; -extern union ipx_host ipx_broadhost; +extern const union ipx_net ipx_zeronet; +extern const union ipx_host ipx_zerohost; +extern const union ipx_net ipx_broadnet; +extern const union ipx_host ipx_broadhost; struct ifnet; struct ipx_addr; +struct ipxpcb; struct mbuf; struct thread; struct route; --- //depot/vendor/freebsd/src/sys/netipx/spx_usrreq.c 2004/02/29 19:15:33 +++ //depot/user/rwatson/netperf/sys/netipx/spx_usrreq.c 2004/03/19 02:34:30 @@ -77,7 +77,7 @@ #define spxstat spx_istat.newstats #endif -static int spx_backoff[SPX_MAXRXTSHIFT+1] = +static const int spx_backoff[SPX_MAXRXTSHIFT+1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; static struct spxpcb *spx_close(struct spxpcb *cb); @@ -1325,7 +1325,7 @@ if (ipxp != NULL) return (EISCONN); s = splnet(); - error = ipx_pcballoc(so, &ipxpcb, td); + error = ipx_pcballoc(so, &ipxpcb_list, td); if (error) goto spx_attach_end; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -1746,9 +1746,7 @@ register struct spxpcb *cb; int s = splnet(); - ipxp = ipxpcb.ipxp_next; - if (ipxp != NULL) - for (; ipxp != &ipxpcb; ipxp = ipxp->ipxp_next) + LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) { if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL && (cb->s_flags & SF_DELACK)) { cb->s_flags &= ~SF_DELACK; @@ -1756,6 +1754,7 @@ spxstat.spxs_delack++; spx_output(cb, (struct mbuf *)NULL); } + } splx(s); } @@ -1774,15 +1773,15 @@ /* * Search through tcb's and update active timers. + * + * XXXRW: spx_timers() may remove an ipxpcb entry, so we have to be + * ready to continue despite that. The logic here is a bit + * obfuscated. */ - ip = ipxpcb.ipxp_next; - if (ip == NULL) { - splx(s); - return; - } - while (ip != &ipxpcb) { + ip = LIST_FIRST(&ipxpcb_list); + while (ip != NULL) { + ipnxt = LIST_NEXT(ip, ipxp_list); cb = ipxtospxpcb(ip); - ipnxt = ip->ipxp_next; if (cb == NULL) goto tpgone; for (i = 0; i < SPXT_NTIMERS; i++) { @@ -1796,7 +1795,7 @@ if (cb->s_rtt) cb->s_rtt++; tpgone: - ip = ipnxt; + ip = LIST_NEXT(ip, ipxp_list); } spx_iss += SPX_ISSINCR/PR_SLOWHZ; /* increment iss */ splx(s); --- //depot/vendor/freebsd/src/sys/netnatm/natm.c 2004/02/29 19:15:33 +++ //depot/user/rwatson/netperf/sys/netnatm/natm.c 2004/04/02 20:09:09 @@ -61,11 +61,11 @@ #include -static u_long natm5_sendspace = 16*1024; -static u_long natm5_recvspace = 16*1024; +static const u_long natm5_sendspace = 16*1024; +static const u_long natm5_recvspace = 16*1024; -static u_long natm0_sendspace = 16*1024; -static u_long natm0_recvspace = 16*1024; +static const u_long natm0_sendspace = 16*1024; +static const u_long natm0_recvspace = 16*1024; /* * user requests @@ -135,6 +135,7 @@ * we turn on 'drain' *before* we sofree. */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); out: @@ -463,6 +464,7 @@ */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); --- //depot/vendor/freebsd/src/sys/netsmb/smb_trantcp.c 2004/02/29 19:15:33 +++ //depot/user/rwatson/netperf/sys/netsmb/smb_trantcp.c 2004/04/06 13:53:21 @@ -184,6 +184,9 @@ return 0; } +/* + * XXXRW: nb_upcall() is called without Giant, which is probably safeish. + */ static void nb_upcall(struct socket *so, void *arg, int waitflag) { --- //depot/vendor/freebsd/src/sys/nfsclient/bootp_subr.c 2004/03/12 12:39:01 +++ //depot/user/rwatson/netperf/sys/nfsclient/bootp_subr.c 2004/03/12 21:34:00 @@ -591,7 +591,7 @@ int retry; const char *s; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Create socket and set its recieve timeout. @@ -983,7 +983,7 @@ struct ifaddr *ifa; struct sockaddr_dl *sdl; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); error = socreate(AF_INET, &ifctx->so, SOCK_DGRAM, 0, td->td_ucred, td); if (error != 0) --- //depot/vendor/freebsd/src/sys/nfsclient/krpc_subr.c 2003/11/14 16:30:34 +++ //depot/user/rwatson/netperf/sys/nfsclient/krpc_subr.c 2004/02/28 14:29:37 @@ -215,8 +215,6 @@ nam = mhead = NULL; from = NULL; - GIANT_REQUIRED; /* XXX until socket locking done */ - /* * Create socket and set its recieve timeout. */ --- //depot/vendor/freebsd/src/sys/nfsclient/nfs_socket.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsclient/nfs_socket.c 2004/04/07 20:11:34 @@ -156,12 +156,12 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) { struct socket *so; - int s, error, rcvreserve, sndreserve; + int error, rcvreserve, sndreserve; int pktscale; struct sockaddr *saddr; struct thread *td = &thread0; /* only used for socreate and sobind */ - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); nmp->nm_so = NULL; saddr = nmp->nm_nam; @@ -241,25 +241,25 @@ * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ - s = splnet(); + SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - (void) tsleep(&so->so_timeo, + (void) msleep(&so->so_timeo, SOCK_MTX(so), PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0) { so->so_state &= ~SS_ISCONNECTING; - splx(s); + SOCK_UNLOCK(so); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; - splx(s); + SOCK_UNLOCK(so); goto bad; } - splx(s); + SOCK_UNLOCK(so); } so->so_rcv.sb_timeo = 5 * hz; so->so_snd.sb_timeo = 5 * hz; @@ -379,7 +379,7 @@ { struct socket *so; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); if (nmp->nm_so) { so = nmp->nm_so; @@ -415,7 +415,7 @@ struct sockaddr *sendnam; int error, soflags, flags; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); KASSERT(rep, ("nfs_send: called with rep == NULL")); @@ -498,7 +498,7 @@ int error, sotype, rcvflg; struct thread *td = curthread; /* XXX */ - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Set up arguments for soreceive() @@ -1147,6 +1147,7 @@ * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; + SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || @@ -1182,6 +1183,7 @@ rep->r_rtt = 0; } } + SOCKBUF_UNLOCK(&so->so_snd); } splx(s); callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); --- //depot/vendor/freebsd/src/sys/nfsclient/nfs_vfsops.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsclient/nfs_vfsops.c 2004/04/07 20:11:34 @@ -384,7 +384,7 @@ u_long l; char buf[128]; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ --- //depot/vendor/freebsd/src/sys/nfsserver/nfs.h 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs.h 2004/04/07 20:11:34 @@ -116,6 +116,13 @@ #ifdef _KERNEL +extern struct mtx nfsd_mtx; +#define NFSD_LOCK_ASSERT() mtx_assert(&nfsd_mtx, MA_OWNED) +#define NFSD_UNLOCK_ASSERT() mtx_assert(&nfsd_mtx, MA_NOTOWNED) +#define NFSD_LOCK_DONTCARE() +#define NFSD_LOCK() mtx_lock(&nfsd_mtx) +#define NFSD_UNLOCK() mtx_unlock(&nfsd_mtx) + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_NFSRVDESC); MALLOC_DECLARE(M_NFSD); @@ -144,8 +151,8 @@ nfsrv_rpc_autherr; /* Procedure table data */ -extern int nfsrvv2_procid[NFS_NPROCS]; -extern int nfsrv_nfsv3_procid[NFS_NPROCS]; +extern const int nfsrvv2_procid[NFS_NPROCS]; +extern const int nfsrv_nfsv3_procid[NFS_NPROCS]; extern int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct thread *td, struct mbuf **mreqp); --- //depot/vendor/freebsd/src/sys/nfsserver/nfs_serv.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_serv.c 2004/04/07 20:11:34 @@ -179,6 +179,8 @@ u_long testmode, nfsmode; int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv3_access: v3 proc called on a v2 connection"); @@ -211,9 +213,13 @@ if ((nfsmode & testmode) && nfsrv_access(vp, VEXEC, cred, rdonly, td, 0)) nfsmode &= ~testmode; + NFSD_UNLOCK(); + mtx_lock(&Giant); getret = VOP_GETATTR(vp, vap, cred, td); vput(vp); + mtx_unlock(&Giant); vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); @@ -245,6 +251,8 @@ int error = 0, rdonly; struct mbuf *mb, *mreq; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); @@ -254,9 +262,13 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); error = VOP_GETATTR(vp, vap, cred, td); vput(vp); + mtx_unlock(&Giant); vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); if (error) { error = 0; @@ -299,14 +311,21 @@ struct timespec guard; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); + /* XXXRW: need to drop NFSD lock here? */ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) { error = ESTALE; goto out; } + NFSD_UNLOCK(); + mtx_lock(&Giant); (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); + NFSD_LOCK(); VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); @@ -363,20 +382,26 @@ * vp now an active resource, pay careful attention to cleanup */ if (v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = preat_ret = VOP_GETATTR(vp, &preat, cred, td); if (!error && gcheck && (preat.va_ctime.tv_sec != guard.tv_sec || preat.va_ctime.tv_nsec != guard.tv_nsec)) error = NFSERR_NOT_SYNC; if (error) { + mtx_unlock(&Giant); /* VFS */ vput(vp); vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_WCCDATA(v3)); if (v3) nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); error = 0; goto nfsmout; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); } /* @@ -396,13 +421,23 @@ td, 0)) != 0) goto out; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = VOP_SETATTR(vp, vap, cred, td); postat_ret = VOP_GETATTR(vp, vap, cred, td); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); if (!error) error = postat_ret; out: - if (vp != NULL) + if (vp != NULL) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } + vp = NULL; nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { @@ -416,9 +451,13 @@ /* fall through */ nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); if (vp) vput(vp); vn_finished_write(mp); + mtx_unlock(&Giant); + NFSD_LOCK(); return(error); } @@ -444,6 +483,8 @@ struct mbuf *mb, *mreq; struct vattr va, dirattr, *vap = &va; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -464,11 +505,15 @@ * structure in case macros jump to nfsmout. */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (error) { if (dirp) { vrele(dirp); dirp = NULL; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) nfsm_srvpostop_attr(dirattr_ret, &dirattr); @@ -549,6 +594,8 @@ */ if (error) { + mtx_unlock(&Giant); + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) nfsm_srvpostop_attr(dirattr_ret, &dirattr); @@ -577,7 +624,9 @@ error = VOP_GETATTR(vp, vap, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ ndp->ni_vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { if (v3) @@ -595,6 +644,8 @@ } nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp) vrele(dirp); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -602,6 +653,8 @@ vrele(ndp->ni_startdir); if (ndp->ni_vp) vput(ndp->ni_vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -630,6 +683,8 @@ fhandle_t *fhp; struct uio io, *uiop = &io; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); #ifndef nolint mp = NULL; @@ -639,6 +694,7 @@ nfsm_srvmtofh(fhp); len = 0; i = 0; + NFSD_UNLOCK(); while (len < NFS_MAXPATHLEN) { MGET(nmp, M_TRYWAIT, MT_DATA); MCLGET(nmp, M_TRYWAIT); @@ -666,6 +722,7 @@ uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; + NFSD_LOCK(); error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE); if (error) { nfsm_reply(2 * NFSX_UNSIGNED); @@ -674,18 +731,20 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (vp->v_type != VLNK) { if (v3) error = EINVAL; else error = ENXIO; - goto out; - } - error = VOP_READLINK(vp, uiop, cred); -out: + } else + error = VOP_READLINK(vp, uiop, cred); getret = VOP_GETATTR(vp, &attr, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) nfsm_srvpostop_attr(getret, &attr); @@ -705,8 +764,13 @@ nfsmout: if (mp3) m_freem(mp3); - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -741,6 +805,8 @@ off_t off; int ioflag = 0; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); @@ -779,18 +845,24 @@ if ((error = nfsrv_access(vp, VREAD, cred, rdonly, td, 1)) != 0) error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 1); } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ getret = VOP_GETATTR(vp, vap, cred, td); if (!error) error = getret; if (error) { vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) nfsm_srvpostop_attr(getret, vap); error = 0; goto nfsmout; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); /* * Calculate byte count to read @@ -870,6 +942,7 @@ tl += (NFSX_V2FATTR / sizeof (u_int32_t)); } len = left = nfsm_rndup(cnt); + NFSD_UNLOCK(); if (cnt > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. @@ -915,6 +988,7 @@ uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; + mtx_lock(&Giant); /* VFS */ error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); off = uiop->uio_offset; nh->nh_nextr = off; @@ -924,6 +998,8 @@ error = getret; m_freem(mreq); vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) @@ -933,9 +1009,13 @@ } } else { uiop->uio_resid = 0; + mtx_lock(&Giant); /* VFS */ } + mtx_assert(&Giant, MA_OWNED); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_srvfillattr(vap, fp); tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; @@ -951,8 +1031,13 @@ } *tl = txdr_unsigned(cnt); nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -988,6 +1073,8 @@ off_t off; struct mount *mntp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (mrep == NULL) { *mrq = NULL; @@ -1000,7 +1087,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mntp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); if (v3) { tl = nfsm_dissect(u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); @@ -1063,8 +1154,13 @@ error = 0; goto nfsmout; } - if (v3) + if (v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ forat_ret = VOP_GETATTR(vp, &forat, cred, td); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } if (vp->v_type != VREG) { if (v3) error = EINVAL; @@ -1074,7 +1170,11 @@ if (!error) error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1); if (error) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; nfsm_reply(NFSX_WCCDATA(v3)); if (v3) @@ -1083,6 +1183,7 @@ goto nfsmout; } + NFSD_UNLOCK(); if (len > 0) { MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); @@ -1116,12 +1217,18 @@ uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; uiop->uio_offset = off; + mtx_lock(&Giant); /* VFS */ error = VOP_WRITE(vp, uiop, ioflags, cred); + /* XXXRW: unlocked write. */ nfsrvstats.srvvop_writes++; FREE((caddr_t)iv, M_TEMP); - } + } else + mtx_lock(&Giant); /* VFS */ + mtx_assert(&Giant, MA_OWNED); /* VFS */ aftat_ret = VOP_GETATTR(vp, vap, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; if (!error) error = aftat_ret; @@ -1159,9 +1266,13 @@ } error = 0; nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (vp) vput(vp); vn_finished_write(mntp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return(error); } @@ -1195,6 +1306,8 @@ u_quad_t cur_usec; struct mount *mntp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); #ifndef nolint i = 0; @@ -1348,8 +1461,13 @@ error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, nfsd->nd_nam, &rdonly, TRUE); if (!error) { - if (v3) + if (v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ forat_ret = VOP_GETATTR(vp, &forat, cred, td); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } if (vp->v_type != VREG) { if (v3) error = EINVAL; @@ -1361,6 +1479,7 @@ } if (!error) error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1); + NFSD_UNLOCK(); if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) @@ -1372,6 +1491,7 @@ uiop->uio_td = NULL; uiop->uio_offset = nfsd->nd_off; uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; + mtx_lock(&Giant); /* VFS */ if (uiop->uio_resid > 0) { mp = mrep; i = 0; @@ -1402,6 +1522,7 @@ } if (!error) { error = VOP_WRITE(vp, uiop, ioflags, cred); + /* XXXRW: unlocked write. */ nfsrvstats.srvvop_writes++; vn_finished_write(mntp); } @@ -1413,6 +1534,8 @@ vput(vp); vp = NULL; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); /* * Loop around generating replies for all write rpcs that have @@ -1507,6 +1630,8 @@ struct mbuf *mp; struct nfsrv_descript *p; + NFSD_LOCK_ASSERT(); + NFS_DPF(WG, ("C%03x-%03x", nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_hash); @@ -1573,6 +1698,8 @@ u_char cverf[NFSX_V3CREATEVERF]; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); #ifndef nolint rdev = 0; @@ -1585,7 +1712,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; @@ -1604,7 +1735,11 @@ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); if (dirp && !v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(dirp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); dirp = NULL; } if (error) { @@ -1679,6 +1814,8 @@ * The only possible error we can have at this point is EEXIST. * nd.ni_vp will also be non-NULL in that case. */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (nd.ni_vp == NULL) { if (vap->va_mode == (mode_t)VNOVAL) vap->va_mode = 0; @@ -1797,6 +1934,8 @@ } } ereply: + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { @@ -1813,6 +1952,8 @@ error = 0; nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (nd.ni_startdir) { vrele(nd.ni_startdir); nd.ni_startdir = NULL; @@ -1829,6 +1970,8 @@ if (nd.ni_vp) vput(nd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -1858,6 +2001,8 @@ struct mount *mp = NULL; int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv_mknod: v3 proc called on a v2 connection"); @@ -1869,7 +2014,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; @@ -1915,6 +2064,8 @@ vap->va_type = vtyp; if (vap->va_mode == (mode_t)VNOVAL) vap->va_mode = 0; + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (vtyp == VSOCK) { vrele(nd.ni_startdir); nd.ni_startdir = NULL; @@ -1987,6 +2138,8 @@ VOP_UNLOCK(dirp, 0, td); } ereply: + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (v3) { if (!error) { @@ -1995,9 +2148,15 @@ } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (0); nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp) vrele(dirp); if (nd.ni_startdir) @@ -2012,6 +2171,8 @@ if (nd.ni_vp) vput(nd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -2037,6 +2198,8 @@ fhandle_t *fhp; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -2046,7 +2209,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; @@ -2054,6 +2221,8 @@ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp && !v3) { vrele(dirp); dirp = NULL; @@ -2099,6 +2268,8 @@ vrele(dirp); dirp = NULL; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); ereply: nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { @@ -2106,6 +2277,8 @@ error = 0; } nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp) { if (nd.ni_dvp == nd.ni_vp) @@ -2116,6 +2289,8 @@ if (nd.ni_vp) vput(nd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return(error); } @@ -2144,6 +2319,8 @@ uid_t saved_uid; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); #ifndef nolint fvp = NULL; @@ -2176,7 +2353,11 @@ error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE); if (fdirp && !v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(fdirp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); fdirp = NULL; } if (error) { @@ -2197,6 +2378,8 @@ tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (tdirp && !v3) { vrele(tdirp); tdirp = NULL; @@ -2282,9 +2465,13 @@ /* fall through */ out1: + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { /* Release existing locks to prevent deadlock. */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (tond.ni_dvp) { if (tond.ni_dvp == tond.ni_vp) vrele(tond.ni_dvp); @@ -2306,6 +2493,8 @@ tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td); VOP_UNLOCK(tdirp, 0, td); } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } @@ -2316,6 +2505,8 @@ /* * Clear out tond related fields */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (tdirp) vrele(tdirp); if (tond.ni_startdir) @@ -2344,6 +2535,8 @@ vrele(fromnd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -2369,6 +2562,8 @@ fhandle_t *fhp, *dfhp; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -2379,7 +2574,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); @@ -2394,6 +2593,8 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (v3) getret = VOP_GETATTR(vp, &at, cred, td); if (vp->v_type == VDIR) { @@ -2404,8 +2605,12 @@ nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp && !v3) { vrele(dirp); dirp = NULL; @@ -2459,6 +2664,8 @@ VOP_UNLOCK(dirp, 0, td); } } + mtx_lock(&Giant); /* VFS */ + NFSD_LOCK(); ereply: nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { @@ -2469,6 +2676,8 @@ /* fall through */ nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); if (dirp) vrele(dirp); @@ -2483,6 +2692,8 @@ if (nd.ni_vp) vrele(nd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return(error); } @@ -2512,6 +2723,8 @@ fhandle_t *fhp; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -2521,13 +2734,19 @@ error = ESTALE; goto out; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp && !v3) { vrele(dirp); dirp = NULL; @@ -2626,6 +2845,8 @@ vrele(nd.ni_startdir); nd.ni_startdir = NULL; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { @@ -2638,6 +2859,8 @@ /* fall through */ nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp) { if (nd.ni_dvp == nd.ni_vp) @@ -2655,6 +2878,8 @@ FREE(pathcp, M_TEMP); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -2684,6 +2909,8 @@ fhandle_t *fhp; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -2693,7 +2920,11 @@ error = ESTALE; goto out; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; @@ -2702,7 +2933,11 @@ error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, v3, &dirfor, &dirfor_ret, td, FALSE); if (dirp && !v3) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(dirp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); dirp = NULL; } if (error) { @@ -2725,6 +2960,8 @@ * nd.ni_vp, if it exists, is referenced but not locked. */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vap->va_type = VDIR; if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); @@ -2779,6 +3016,8 @@ VOP_UNLOCK(dirp, 0, td); } } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { @@ -2796,6 +3035,8 @@ /* fall through */ nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (dirp) vrele(dirp); if (nd.ni_dvp) { @@ -2812,6 +3053,8 @@ vrele(nd.ni_vp); } vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -2837,6 +3080,8 @@ struct nameidata nd; struct mount *mp = NULL; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); ndclear(&nd); @@ -2846,7 +3091,11 @@ error = ESTALE; goto out; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; @@ -2886,6 +3135,8 @@ * Issue or abort op. Since SAVESTART is not set, path name * component is freed by the VOP after either. */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (!error) error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -2910,6 +3161,8 @@ VOP_UNLOCK(dirp, 0, td); } } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); nfsm_reply(NFSX_WCCDATA(v3)); error = 0; if (v3) @@ -2917,6 +3170,8 @@ /* fall through */ nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ NDFREE(&nd, NDF_ONLY_PNBUF); if (dirp) vrele(dirp); @@ -2930,6 +3185,8 @@ vput(nd.ni_vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return(error); } @@ -2999,6 +3256,8 @@ u_quad_t off, toff, verf; u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */ + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); @@ -3025,7 +3284,11 @@ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE); if (!error && vp->v_type != VDIR) { error = ENOTDIR; + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; } if (error) { @@ -3039,6 +3302,8 @@ /* * Obtain lock on vnode for this section of the code */ + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (v3) { error = getret = VOP_GETATTR(vp, &at, cred, td); #if 0 @@ -3049,10 +3314,17 @@ error = NFSERR_BAD_COOKIE; #endif } - if (!error) + if (!error) { + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ + } if (error) { vput(vp); + mtx_unlock(&Giant); + NFSD_LOCK(); vp = NULL; nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) @@ -3094,10 +3366,12 @@ VOP_UNLOCK(vp, 0, td); if (error) { vrele(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); @@ -3113,7 +3387,9 @@ */ if (siz == 0) { vrele(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { @@ -3161,6 +3437,8 @@ goto again; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { @@ -3237,7 +3515,11 @@ cookiep++; ncookies--; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; nfsm_clget; *tl = nfsrv_nfs_false; @@ -3257,8 +3539,13 @@ FREE((caddr_t)cookies, M_TEMP); nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -3292,6 +3579,8 @@ u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */ int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv_readdirplus: v3 proc called on a v2 connection"); @@ -3315,8 +3604,12 @@ error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE); if (!error && vp->v_type != VDIR) { error = ENOTDIR; + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); } if (error) { nfsm_reply(NFSX_UNSIGNED); @@ -3324,6 +3617,8 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = getret = VOP_GETATTR(vp, &at, cred, td); #if 0 /* @@ -3332,10 +3627,17 @@ if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; #endif - if (!error) + if (!error) { + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ + } if (error) { vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); @@ -3370,10 +3672,12 @@ error = getret; if (error) { vrele(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); + NFSD_LOCK(); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); error = 0; @@ -3388,6 +3692,8 @@ */ if (siz == 0) { vrele(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); vp = NULL; nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); @@ -3441,19 +3747,23 @@ EOPNOTSUPP) { error = NFSERR_NOTSUPP; vrele(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); + NFSD_LOCK(); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); error = 0; goto nfsmout; } vput(nvp); + mtx_unlock(&Giant); /* VFS */ nvp = NULL; dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; + NFSD_LOCK(); nfsm_reply(cnt); nfsm_srvpostop_attr(getret, &at); tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); @@ -3462,6 +3772,8 @@ bp = bpos; be = bp + M_TRAILINGSPACE(mp); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0 && dp->d_type != DT_WHT) { @@ -3517,16 +3829,16 @@ fl.fl_off.nfsuquad[0] = 0; fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); - nfsm_clget; + nfsm_clget_nolock; *tl = nfsrv_nfs_true; bp += NFSX_UNSIGNED; - nfsm_clget; + nfsm_clget_nolock; *tl = 0; bp += NFSX_UNSIGNED; - nfsm_clget; + nfsm_clget_nolock; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; - nfsm_clget; + nfsm_clget_nolock; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; @@ -3555,7 +3867,7 @@ xfer = sizeof (struct flrep); cp = (caddr_t)&fl; while (xfer > 0) { - nfsm_clget; + nfsm_clget_nolock; if ((bp + xfer) > be) tsiz = be - bp; else @@ -3574,10 +3886,12 @@ ncookies--; } vrele(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; - nfsm_clget; + nfsm_clget_nolock; *tl = nfsrv_nfs_false; bp += NFSX_UNSIGNED; + NFSD_LOCK(); nfsm_clget; if (eofflag) *tl = nfsrv_nfs_true; @@ -3592,8 +3906,13 @@ FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vrele(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -3620,6 +3939,8 @@ struct mount *mp = NULL; int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv_commit: v3 proc called on a v2 connection"); @@ -3629,7 +3950,11 @@ error = ESTALE; goto ereply; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ (void) vn_start_write(NULL, &mp, V_WAIT); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); /* @@ -3646,6 +3971,8 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ for_ret = VOP_GETATTR(vp, &bfor, cred, td); if (cnt > MAX_COMMIT_COUNT) { @@ -3733,7 +4060,9 @@ aft_ret = VOP_GETATTR(vp, &aft, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); ereply: nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); @@ -3747,9 +4076,13 @@ error = 0; } nfsmout: + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (vp) vput(vp); vn_finished_write(mp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return(error); } @@ -3777,6 +4110,8 @@ struct statfs statfs; u_quad_t tval; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); @@ -3789,10 +4124,14 @@ goto nfsmout; } sf = &statfs; + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = VFS_STATFS(vp->v_mount, sf, td); getret = VOP_GETATTR(vp, &at, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); @@ -3826,8 +4165,13 @@ sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); vput(vp); + mtx_unlock(&Giant); + NFSD_LOCK(); + } return(error); } @@ -3854,6 +4198,8 @@ struct statfs sb; int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv_fsinfo: v3 proc called on a v2 connection"); @@ -3867,13 +4213,17 @@ goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ /* XXX Try to make a guess on the max file size. */ VFS_STATFS(vp->v_mount, &sb, td); maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1; getret = VOP_GETATTR(vp, &at, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO); @@ -3901,8 +4251,13 @@ NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -3928,6 +4283,8 @@ fhandle_t *fhp; int v3 = (nfsd->nd_flag & ND_NFSV3); + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (!v3) panic("nfsrv_pathconf: v3 proc called on a v2 connection"); @@ -3940,6 +4297,8 @@ error = 0; goto nfsmout; } + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); if (!error) error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); @@ -3949,7 +4308,9 @@ error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); getret = VOP_GETATTR(vp, &at, cred, td); vput(vp); + mtx_unlock(&Giant); /* VFS */ vp = NULL; + NFSD_LOCK(); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) { @@ -3971,8 +4332,13 @@ pc->pc_caseinsensitive = nfsrv_nfs_false; pc->pc_casepreserving = nfsrv_nfs_true; nfsmout: - if (vp) + if (vp) { + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ vput(vp); + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); + } return(error); } @@ -3989,6 +4355,8 @@ int error = NFSERR_RETVOID; struct mbuf *mb, *mreq; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); nfsm_reply(0); nfsmout: @@ -4008,6 +4376,8 @@ int error; struct mbuf *mb, *mreq; + NFSD_LOCK_ASSERT(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (nfsd->nd_repstat) error = nfsd->nd_repstat; @@ -4040,6 +4410,9 @@ struct vattr vattr; int error; + NFSD_LOCK_ASSERT(); + NFSD_UNLOCK(); + nfsdbprintf(("%s %d\n", __FILE__, __LINE__)); if (flags & VWRITE) { /* Just vn_writechk() changed to check rdonly */ @@ -4053,7 +4426,8 @@ case VREG: case VDIR: case VLNK: - return (EROFS); + error = EROFS; + goto out; default: break; } @@ -4062,12 +4436,15 @@ * If there's shared text associated with * the inode, we can't allow writing. */ - if (vp->v_vflag & VV_TEXT) + if (vp->v_vflag & VV_TEXT) { + NFSD_LOCK(); return (ETXTBSY); + } } + mtx_lock(&Giant); /* VFS */ error = VOP_GETATTR(vp, &vattr, cred, td); if (error) - return (error); + goto out; error = VOP_ACCESS(vp, flags, cred, td); /* * Allow certain operations for the owner (reads and writes @@ -4075,5 +4452,8 @@ */ if (override && error == EACCES && cred->cr_uid == vattr.va_uid) error = 0; +out: + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return error; } --- //depot/vendor/freebsd/src/sys/nfsserver/nfs_srvcache.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_srvcache.c 2004/04/07 20:11:34 @@ -44,7 +44,9 @@ #include #include #include +#include #include +#include #include #include /* for sodupsockaddr */ @@ -72,7 +74,7 @@ /* * Static array that defines which nfs rpc's are nonidempotent */ -static int nonidempotent[NFS_NPROCS] = { +static const int nonidempotent[NFS_NPROCS] = { FALSE, FALSE, TRUE, @@ -99,7 +101,7 @@ }; /* True iff the rpc reply is an nfs status ONLY! */ -static int nfsv2_repstat[NFS_NPROCS] = { +static const int nfsv2_repstat[NFS_NPROCS] = { FALSE, FALSE, FALSE, @@ -154,6 +156,8 @@ caddr_t bpos; int ret; + NFSD_LOCK_ASSERT(); + /* * Don't cache recent requests for reliable transport protocols. * (Maybe we should for the case of a reconnect, but..) @@ -167,7 +171,8 @@ NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void) tsleep(rp, PZERO-1, "nfsrc", 0); + (void) msleep(rp, &nfsd_mtx, PZERO-1, + "nfsrc", 0); goto loop; } rp->rc_flag |= RC_LOCKED; @@ -188,8 +193,10 @@ ret = RC_REPLY; } else if (rp->rc_flag & RC_REPMBUF) { nfsrvstats.srvcache_nonidemdonehits++; + NFSD_UNLOCK(); *repp = m_copym(rp->rc_reply, 0, M_COPYALL, M_TRYWAIT); + NFSD_LOCK(); ret = RC_REPLY; } else { nfsrvstats.srvcache_idemdonehits++; @@ -207,15 +214,17 @@ nfsrvstats.srvcache_misses++; NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff)); if (numnfsrvcache < desirednfsrvcache) { + NFSD_UNLOCK(); rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp, M_NFSD, M_WAITOK | M_ZERO); + NFSD_LOCK(); numnfsrvcache++; rp->rc_flag = RC_LOCKED; } else { rp = TAILQ_FIRST(&nfsrvlruhead); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void) tsleep(rp, PZERO-1, "nfsrc", 0); + (void) msleep(rp, &nfsd_mtx, PZERO-1, "nfsrc", 0); rp = TAILQ_FIRST(&nfsrvlruhead); } rp->rc_flag |= RC_LOCKED; @@ -261,6 +270,8 @@ { struct nfsrvcache *rp; + NFSD_LOCK_ASSERT(); + if (!nd->nd_nam2) return; loop: @@ -270,7 +281,8 @@ NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void) tsleep(rp, PZERO-1, "nfsrc", 0); + (void) msleep(rp, &nfsd_mtx, PZERO-1, + "nfsrc", 0); goto loop; } rp->rc_flag |= RC_LOCKED; @@ -298,8 +310,10 @@ rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; } else { + NFSD_UNLOCK(); rp->rc_reply = m_copym(repmbuf, 0, M_COPYALL, M_TRYWAIT); + NFSD_LOCK(); rp->rc_flag |= RC_REPMBUF; } } @@ -322,6 +336,8 @@ { struct nfsrvcache *rp, *nextrp; + NFSD_LOCK_ASSERT(); + for (rp = TAILQ_FIRST(&nfsrvlruhead); rp != 0; rp = nextrp) { nextrp = TAILQ_NEXT(rp, rc_lru); LIST_REMOVE(rp, rc_hash); --- //depot/vendor/freebsd/src/sys/nfsserver/nfs_srvsock.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_srvsock.c 2004/04/07 20:11:34 @@ -97,7 +97,7 @@ static void nfs_realign(struct mbuf **pm, int hsiz); /* XXX SHARED */ static int nfsrv_getstream(struct nfssvc_sock *, int); -int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, +int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct thread *td, struct mbuf **mreqp) = { @@ -140,9 +140,13 @@ caddr_t bpos; struct mbuf *mb; + /* XXXRW: not 100% clear the lock is needed here. */ + NFSD_LOCK_ASSERT(); + nd->nd_repstat = err; if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */ siz = 0; + NFSD_UNLOCK(); MGETHDR(mreq, M_TRYWAIT, MT_DATA); mb = mreq; /* @@ -155,6 +159,7 @@ MCLGET(mreq, M_TRYWAIT); } else mreq->m_data += min(max_hdr, M_TRAILINGSPACE(mreq)); + NFSD_LOCK(); tl = mtod(mreq, u_int32_t *); bpos = ((caddr_t)tl) + mreq->m_len; *tl++ = txdr_unsigned(nd->nd_retxid); @@ -236,13 +241,18 @@ struct mbuf *n = NULL; int off = 0; + /* XXXRW: may not need lock? */ + NFSD_LOCK_ASSERT(); + ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { + NFSD_UNLOCK(); MGET(n, M_TRYWAIT, MT_DATA); if (m->m_len >= MINCLSIZE) { MCLGET(n, M_TRYWAIT); } + NFSD_LOCK(); n->m_len = 0; break; } @@ -281,6 +291,8 @@ int error = 0; struct mbuf *mrep, *md; + NFSD_LOCK_ASSERT(); + mrep = nd->nd_mrep; md = nd->nd_md; dpos = nd->nd_dpos; @@ -410,6 +422,9 @@ struct uio auio; int flags, error; + NFSD_UNLOCK_ASSERT(); + + /* XXXRW: Unlocked read. */ if ((slp->ns_flag & SLP_VALID) == 0) return; #ifdef notdef @@ -417,12 +432,29 @@ * Define this to test for nfsds handling this under heavy load. */ if (waitflag == M_DONTWAIT) { + NFSD_LOCK(); slp->ns_flag |= SLP_NEEDQ; goto dorecs; } #endif - GIANT_REQUIRED; /* XXX until socket locking is done */ + + /* + * XXXRW: Oh crap! This code is called without Giant, but touches + * the NFS global socket list, and also the socket entries + * themselves along with record queues, flags, etc. For now, try + * to minimize the race by processing asynchronously, but there + * is still a problem. We probably need to queue events + * asynchronously using taskqueues from here, or add some + * locking to the NFS code. We can't grab Giant because the + * caller is likely holding a lock on the PCB for the socket, + * not to mention the socket buffer, etc. + */ +#if 0 + slp->ns_flag |= SLP_NEEDQ; + goto dorecs; +#endif + NFSD_LOCK(); auio.uio_td = NULL; if (so->so_type == SOCK_STREAM) { /* @@ -441,8 +473,10 @@ */ auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; + NFSD_UNLOCK(); error = so->so_proto->pr_usrreqs->pru_soreceive (so, &nam, &auio, &mp, NULL, &flags); + NFSD_LOCK(); if (error || mp == NULL) { if (error == EWOULDBLOCK) slp->ns_flag |= SLP_NEEDQ; @@ -476,6 +510,7 @@ do { auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; + NFSD_UNLOCK(); error = so->so_proto->pr_usrreqs->pru_soreceive (so, &nam, &auio, &mp, NULL, &flags); if (mp) { @@ -487,13 +522,16 @@ if (nam) FREE(nam, M_SONAME); m_freem(mp); + NFSD_LOCK(); continue; } + NFSD_LOCK(); nfs_realign(&mp, 10 * NFSX_UNSIGNED); rec->nr_address = nam; rec->nr_packet = mp; STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link); - } + } else + NFSD_LOCK(); if (error) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && error != EWOULDBLOCK) { @@ -512,6 +550,7 @@ (STAILQ_FIRST(&slp->ns_rec) != NULL || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) nfsrv_wakenfsd(slp); + NFSD_UNLOCK(); } /* @@ -528,6 +567,8 @@ struct mbuf *om, *m2, *recm; u_int32_t recmark; + NFSD_LOCK_ASSERT(); + if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); slp->ns_flag |= SLP_GETSTREAM; @@ -586,8 +627,10 @@ while (len < slp->ns_reclen) { if ((len + m->m_len) > slp->ns_reclen) { + NFSD_UNLOCK(); m2 = m_copym(m, 0, slp->ns_reclen - len, waitflag); + NFSD_LOCK(); if (m2) { if (om) { om->m_next = m2; @@ -630,8 +673,10 @@ *mpp = recm; if (slp->ns_flag & SLP_LASTFRAG) { struct nfsrv_rec *rec; + NFSD_UNLOCK(); rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK); + NFSD_LOCK(); if (!rec) { m_freem(slp->ns_frag); } else { @@ -658,6 +703,8 @@ struct nfsrv_descript *nd; int error; + NFSD_LOCK_ASSERT(); + *ndp = NULL; if ((slp->ns_flag & SLP_VALID) == 0 || STAILQ_FIRST(&slp->ns_rec) == NULL) @@ -667,8 +714,10 @@ nam = rec->nr_address; m = rec->nr_packet; free(rec, M_NFSRVDESC); + NFSD_UNLOCK(); MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); + NFSD_LOCK(); nd->nd_md = nd->nd_mrep = m; nd->nd_nam2 = nam; nd->nd_dpos = mtod(m, caddr_t); @@ -695,6 +744,8 @@ { struct nfsd *nd; + NFSD_LOCK_ASSERT(); + if ((slp->ns_flag & SLP_VALID) == 0) return; TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) { @@ -725,7 +776,15 @@ struct sockaddr *sendnam; int error, soflags, flags; - GIANT_REQUIRED; /* XXX until socket locking is done */ + NET_ASSERT_GIANT(); + /* + * XXXRW: I think this is a good idea to avoid socket locking + * problems. There's certainly no need to hold the lock here, + * assuming the socket won't fall away from under us (which + * should be ensured by the caller). However, I'm not 100% + * sure: the BSD/OS code doesn't appear to. + */ + NFSD_UNLOCK_ASSERT(); soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) @@ -766,6 +825,7 @@ u_quad_t cur_usec; s = splnet(); + NFSD_LOCK(); /* * Scan the write gathering queues for writes that need to be * completed now. @@ -776,6 +836,7 @@ LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) nfsrv_wakenfsd(slp); } + NFSD_UNLOCK(); splx(s); callout_reset(&nfsrv_callout, nfsrv_ticks, nfsrv_timer, NULL); } --- //depot/vendor/freebsd/src/sys/nfsserver/nfs_srvsubs.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_srvsubs.c 2004/04/07 20:11:34 @@ -85,8 +85,8 @@ u_int32_t nfsrv_nfs_prog, nfsrv_nfs_true, nfsrv_nfs_false; /* And other global data */ -static nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, - NFNON, NFCHR, NFNON }; +static const nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, + NFLNK, NFNON, NFCHR, NFNON }; #define vtonfsv2_type(a) txdr_unsigned(nfsv2_type[((int32_t)(a))]) #define vtonfsv3_mode(m) txdr_unsigned((m) & ALLPERMS) @@ -100,10 +100,12 @@ static int nfs_prev_nfssvc_sy_narg; static sy_call_t *nfs_prev_nfssvc_sy_call; +struct mtx nfsd_mtx; + /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ -int nfsrv_nfsv3_procid[NFS_NPROCS] = { +const int nfsrv_nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, @@ -132,7 +134,7 @@ /* * and the reverse mapping from generic to Version 2 procedure numbers */ -int nfsrvv2_procid[NFS_NPROCS] = { +const int nfsrvv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, @@ -163,7 +165,7 @@ * Use 0 (which gets converted to NFSERR_IO) as the catch all for ones not * specifically defined in RFC 1094. */ -static u_char nfsrv_v2errmap[ELAST] = { +static const u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, 0, 0, 0, NFSERR_NXIO, 0, 0, 0, 0, 0, 0, NFSERR_ACCES, 0, 0, @@ -192,12 +194,12 @@ * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ -static short nfsv3err_null[] = { +static const short nfsv3err_null[] = { 0, 0, }; -static short nfsv3err_getattr[] = { +static const short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, @@ -206,7 +208,7 @@ 0, }; -static short nfsv3err_setattr[] = { +static const short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, @@ -222,7 +224,7 @@ 0, }; -static short nfsv3err_lookup[] = { +static const short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, @@ -235,7 +237,7 @@ 0, }; -static short nfsv3err_access[] = { +static const short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, @@ -244,7 +246,7 @@ 0, }; -static short nfsv3err_readlink[] = { +static const short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -256,7 +258,7 @@ 0, }; -static short nfsv3err_read[] = { +static const short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, @@ -268,7 +270,7 @@ 0, }; -static short nfsv3err_write[] = { +static const short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -283,7 +285,7 @@ 0, }; -static short nfsv3err_create[] = { +static const short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -300,7 +302,7 @@ 0, }; -static short nfsv3err_mkdir[] = { +static const short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -317,7 +319,7 @@ 0, }; -static short nfsv3err_symlink[] = { +static const short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -334,7 +336,7 @@ 0, }; -static short nfsv3err_mknod[] = { +static const short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -352,7 +354,7 @@ 0, }; -static short nfsv3err_remove[] = { +static const short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, @@ -366,7 +368,7 @@ 0, }; -static short nfsv3err_rmdir[] = { +static const short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, @@ -384,7 +386,7 @@ 0, }; -static short nfsv3err_rename[] = { +static const short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, @@ -407,7 +409,7 @@ 0, }; -static short nfsv3err_link[] = { +static const short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -427,7 +429,7 @@ 0, }; -static short nfsv3err_readdir[] = { +static const short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -440,7 +442,7 @@ 0, }; -static short nfsv3err_readdirplus[] = { +static const short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, @@ -454,7 +456,7 @@ 0, }; -static short nfsv3err_fsstat[] = { +static const short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, @@ -463,7 +465,7 @@ 0, }; -static short nfsv3err_fsinfo[] = { +static const short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, @@ -471,7 +473,7 @@ 0, }; -static short nfsv3err_pathconf[] = { +static const short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, @@ -479,7 +481,7 @@ 0, }; -static short nfsv3err_commit[] = { +static const short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, @@ -488,7 +490,7 @@ 0, }; -static short *nfsrv_v3errmap[] = { +static const short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, @@ -520,8 +522,11 @@ nfsrv_modevent(module_t mod, int type, void *data) { + NET_LOCK_GIANT(); + switch (type) { case MOD_LOAD: + mtx_init(&nfsd_mtx, "nfsd_mtx", NULL, MTX_DEF); nfsrv_rpc_vers = txdr_unsigned(RPC_VER2); nfsrv_rpc_call = txdr_unsigned(RPC_CALL); nfsrv_rpc_reply = txdr_unsigned(RPC_REPLY); @@ -538,10 +543,11 @@ if (nfsrv_ticks < 1) nfsrv_ticks = 1; + nfsrv_initcache(); /* Init the server request cache */ + NFSD_LOCK(); nfsrv_init(0); /* Init server data structures */ - nfsrv_initcache(); /* Init the server request cache */ - callout_init(&nfsrv_callout, 0); + NFSD_UNLOCK(); nfsrv_timer(0); nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg; @@ -555,8 +561,10 @@ callout_stop(&nfsrv_callout); sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg; sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call; + mtx_destroy(&nfsd_mtx); break; } + NET_UNLOCK_GIANT(); return 0; } static moduledata_t nfsserver_mod = { @@ -601,6 +609,10 @@ struct componentname *cnp = &ndp->ni_cnd; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0; + NFSD_LOCK_ASSERT(); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ + *retdirp = NULL; cnp->cn_flags |= NOMACCHECK; cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); @@ -644,8 +656,12 @@ /* * Extract and set starting directory. */ + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, nam, &rdonly, pubflag); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ if (error) goto out; if (dp->v_type != VDIR) { @@ -866,6 +882,8 @@ } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (error); } @@ -880,6 +898,8 @@ int count, i; char *cp; + NFSD_LOCK_DONTCARE(); + /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. @@ -1041,6 +1061,8 @@ struct sockaddr_int *saddr; #endif + NFSD_LOCK_ASSERT(); + *vpp = NULL; if (nfs_ispublicfh(fhp)) { @@ -1052,12 +1074,14 @@ mp = vfs_getvfs(&fhp->fh_fsid); if (!mp) return (ESTALE); + NFSD_UNLOCK(); + mtx_lock(&Giant); /* VFS */ error = VFS_CHECKEXP(mp, nam, &exflags, &credanon); if (error) - return (error); + goto out; error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); if (error) - return (error); + goto out; #ifdef MNT_EXNORESPORT if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { saddr = (struct sockaddr_in *)nam; @@ -1067,7 +1091,7 @@ ntohs(saddr->sin_port) >= IPPORT_RESERVED) { vput(*vpp); *vpp = NULL; - return (NFSERR_AUTHERR | AUTH_TOOWEAK); + error = NFSERR_AUTHERR | AUTH_TOOWEAK; } } #endif @@ -1089,6 +1113,9 @@ if (!lockflag) VOP_UNLOCK(*vpp, 0, td); +out: + mtx_unlock(&Giant); /* VFS */ + NFSD_LOCK(); return (0); } @@ -1104,6 +1131,8 @@ char *cp = (char *)fhp; int i; + NFSD_LOCK_DONTCARE(); + for (i = 0; i < NFSX_V3FH; i++) if (*cp++ != 0) return (FALSE); @@ -1122,6 +1151,8 @@ { struct sockaddr_in *inetaddr; + NFSD_LOCK_DONTCARE(); + switch (family) { case AF_INET: inetaddr = (struct sockaddr_in *)nam; @@ -1157,9 +1188,11 @@ int nfsrv_errmap(struct nfsrv_descript *nd, int err) { - short *defaulterrp, *errp; + const short *defaulterrp, *errp; int e; + NFSD_LOCK_DONTCARE(); + if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; @@ -1185,6 +1218,13 @@ nfsrv_object_create(struct vnode *vp) { + GIANT_REQUIRED; + /* + * XXXRW: Don't want to hold nfsd_mtx across VFS operation that + * may block. + */ + NFSD_UNLOCK_ASSERT(); + if (vp == NULL || vp->v_type != VREG) return (1); return (vfs_object_create(vp, curthread, curthread->td_ucred)); @@ -1201,6 +1241,8 @@ int i, j; gid_t v; + NFSD_LOCK_DONTCARE(); + /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; @@ -1219,6 +1261,8 @@ { int i; + NFSD_LOCK_DONTCARE(); + bzero((caddr_t)outcred, sizeof (struct ucred)); outcred->cr_ref = 1; outcred->cr_uid = incred->cr_uid; @@ -1237,6 +1281,8 @@ { u_int32_t *tl; + NFSD_LOCK_DONTCARE(); + if (v3) { tl = nfsm_build_xx(NFSX_UNSIGNED + NFSX_V3FH, mb, bpos); *tl++ = txdr_unsigned(NFSX_V3FH); @@ -1263,6 +1309,8 @@ { u_int32_t *tl; + NFSD_LOCK_DONTCARE(); + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; @@ -1277,6 +1325,8 @@ { u_int32_t *tl; + NFSD_LOCK_DONTCARE(); + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; @@ -1290,15 +1340,26 @@ void nfsm_clget_xx(u_int32_t **tl, struct mbuf *mb, struct mbuf **mp, - char **bp, char **be, caddr_t bpos) + char **bp, char **be, caddr_t bpos, int droplock) { struct mbuf *nmp; + NFSD_LOCK_DONTCARE(); + + if (droplock) + NFSD_LOCK_ASSERT(); + else + NFSD_UNLOCK_ASSERT(); + if (*bp >= *be) { if (*mp == mb) (*mp)->m_len += *bp - bpos; + if (droplock) + NFSD_UNLOCK(); MGET(nmp, M_TRYWAIT, MT_DATA); MCLGET(nmp, M_TRYWAIT); + if (droplock) + NFSD_LOCK(); nmp->m_len = NFSMSIZ(nmp); (*mp)->m_next = nmp; *mp = nmp; @@ -1315,6 +1376,8 @@ u_int32_t *tl; int fhlen; + NFSD_LOCK_DONTCARE(); + if (nfsd->nd_flag & ND_NFSV3) { tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) @@ -1341,6 +1404,8 @@ { u_int32_t *tl; + NFSD_LOCK_DONTCARE(); + tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; --- //depot/vendor/freebsd/src/sys/nfsserver/nfs_syscalls.c 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_syscalls.c 2004/04/07 20:11:34 @@ -142,11 +142,14 @@ error = suser(td); if (error) return (error); - mtx_lock(&Giant); + NET_LOCK_GIANT(); + NFSD_LOCK(); while (nfssvc_sockhead_flag & SLP_INIT) { nfssvc_sockhead_flag |= SLP_WANTINIT; - (void) tsleep(&nfssvc_sockhead, PSOCK, "nfsd init", 0); + (void) msleep(&nfssvc_sockhead, &nfsd_mtx, PSOCK, + "nfsd init", 0); } + NFSD_UNLOCK(); if (uap->flag & NFSSVC_ADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); if (error) @@ -180,7 +183,7 @@ if (error == EINTR || error == ERESTART) error = 0; done2: - mtx_unlock(&Giant); + NET_UNLOCK_GIANT(); return (error); } @@ -195,10 +198,14 @@ struct socket *so; int error, s; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); so = fp->f_data; #if 0 + /* + * XXXRW: If this code is ever enabled, there's a race when running + * MPSAFE. + */ tslp = NULL; /* * Add it to the list, as required. @@ -263,12 +270,16 @@ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO); STAILQ_INIT(&slp->ns_rec); + NFSD_LOCK(); TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); slp->ns_so = so; slp->ns_nam = mynam; fhold(fp); slp->ns_fp = fp; + /* + * XXXRW: Socket locking here? + */ s = splnet(); so->so_upcallarg = (caddr_t)slp; so->so_upcall = nfsrv_rcv; @@ -276,6 +287,7 @@ slp->ns_flag = (SLP_VALID | SLP_NEEDQ); nfsrv_wakenfsd(slp); splx(s); + NFSD_UNLOCK(); return (0); } @@ -295,6 +307,8 @@ int procrastinate; u_quad_t cur_usec; + NET_ASSERT_GIANT(); + #ifndef nolint cacherep = RC_DOIT; writes_todo = 0; @@ -302,6 +316,8 @@ nfsd = (struct nfsd *) malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK | M_ZERO); s = splnet(); + NFSD_LOCK(); + nfsd->nfsd_td = td; TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); nfs_numnfsd++; @@ -315,8 +331,8 @@ (nfsd_head_flag & NFSD_CHECKSLP) == 0) { nfsd->nfsd_flag |= NFSD_WAITING; nfsd_waiting++; - error = tsleep(nfsd, PSOCK | PCATCH, - "-", 0); + error = msleep(nfsd, &nfsd_mtx, + PSOCK | PCATCH, "-", 0); nfsd_waiting--; if (error) goto done; @@ -343,8 +359,10 @@ else if (slp->ns_flag & SLP_NEEDQ) { slp->ns_flag &= ~SLP_NEEDQ; (void) nfs_slplock(slp, 1); + NFSD_UNLOCK(); nfsrv_rcv(slp->ns_so, (caddr_t)slp, M_TRYWAIT); + NFSD_LOCK(); nfs_slpunlock(slp); } error = nfsrv_dorec(slp, nfsd, &nd); @@ -458,6 +476,7 @@ nd->nd_mrep = NULL; /* FALLTHROUGH */ case RC_REPLY: + NFSD_UNLOCK(); siz = m_length(mreq, NULL); if (siz <= 0 || siz > NFS_MAXPACKET) { printf("mbuf siz=%d\n",siz); @@ -474,11 +493,16 @@ M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT); *mtod(m, u_int32_t *) = htonl(0x80000000 | siz); } + NFSD_LOCK(); if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED) (void) nfs_slplock(slp, 1); - if (slp->ns_flag & SLP_VALID) + if (slp->ns_flag & SLP_VALID) { + NFSD_UNLOCK(); + NET_LOCK_GIANT(); error = nfsrv_send(slp->ns_so, nd->nd_nam2, m); - else { + NET_UNLOCK_GIANT(); + NFSD_LOCK(); + } else { error = EPIPE; m_freem(m); } @@ -535,6 +559,7 @@ free((caddr_t)nfsd, M_NFSD); if (--nfs_numnfsd == 0) nfsrv_init(TRUE); /* Reinitialize everything */ + NFSD_UNLOCK(); return (error); } @@ -554,9 +579,18 @@ struct nfsrv_rec *rec; int s; + NET_ASSERT_GIANT(); + NFSD_LOCK_ASSERT(); + + /* + * XXXRW: By clearing all flags, other threads/etc should ignore + * this slp and we can safely release nfsd_mtx so we can clean + * up the slp safely. + */ slp->ns_flag &= ~SLP_ALLFLAGS; fp = slp->ns_fp; if (fp) { + NFSD_UNLOCK(); slp->ns_fp = NULL; so = slp->ns_so; so->so_rcv.sb_flags &= ~SB_UPCALL; @@ -564,6 +598,7 @@ so->so_upcallarg = NULL; soshutdown(so, SHUT_RDWR); closef(fp, NULL); + NFSD_LOCK(); if (slp->ns_nam) FREE(slp->ns_nam, M_SONAME); m_freem(slp->ns_raw); @@ -593,6 +628,8 @@ nfsrv_slpderef(struct nfssvc_sock *slp) { + NFSD_LOCK_ASSERT(); + if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); free((caddr_t)slp, M_NFSSVC); @@ -601,17 +638,22 @@ /* * Lock a socket against others. + * + * XXXRW: Wait argument is always 1 in the caller. Replace with a real + * sleep lock? */ int nfs_slplock(struct nfssvc_sock *slp, int wait) { int *statep = &slp->ns_solock; + NFSD_LOCK_ASSERT(); + if (!wait && (*statep & NFSRV_SNDLOCK)) return(0); /* already locked, fail */ while (*statep & NFSRV_SNDLOCK) { *statep |= NFSRV_WANTSND; - (void) tsleep(statep, PZERO - 1, "nfsslplck", 0); + (void) msleep(statep, &nfsd_mtx, PZERO - 1, "nfsslplck", 0); } *statep |= NFSRV_SNDLOCK; return (1); @@ -625,6 +667,8 @@ { int *statep = &slp->ns_solock; + NFSD_LOCK_ASSERT(); + if ((*statep & NFSRV_SNDLOCK) == 0) panic("nfs slpunlock"); *statep &= ~NFSRV_SNDLOCK; @@ -644,6 +688,9 @@ { struct nfssvc_sock *slp, *nslp; + NET_ASSERT_GIANT(); + NFSD_LOCK_ASSERT(); + if (nfssvc_sockhead_flag & SLP_INIT) panic("nfsd init"); nfssvc_sockhead_flag |= SLP_INIT; --- //depot/vendor/freebsd/src/sys/nfsserver/nfsm_subs.h 2004/04/06 22:01:20 +++ //depot/user/rwatson/netperf/sys/nfsserver/nfsm_subs.h 2004/04/07 20:11:34 @@ -160,7 +160,7 @@ caddr_t *bpos); void nfsm_srvpostop_fh_xx(fhandle_t *f, struct mbuf **mb, caddr_t *bpos); void nfsm_clget_xx(u_int32_t **tl, struct mbuf *mb, struct mbuf **mp, - char **bp, char **be, caddr_t bpos); + char **bp, char **be, caddr_t bpos, int droplock); #define nfsm_srvfhtom(f, v3) \ nfsm_srvfhtom_xx((f), (v3), &mb, &bpos) @@ -178,6 +178,9 @@ nfsm_srvfattr(nfsd, (a), (f)) #define nfsm_clget \ - nfsm_clget_xx(&tl, mb, &mp, &bp, &be, bpos) + nfsm_clget_xx(&tl, mb, &mp, &bp, &be, bpos, 1) + +#define nfsm_clget_nolock \ + nfsm_clget_xx(&tl, mb, &mp, &bp, &be, bpos, 0) #endif --- //depot/vendor/freebsd/src/sys/rpc/rpcclnt.c 2004/03/27 21:56:16 +++ //depot/user/rwatson/netperf/sys/rpc/rpcclnt.c 2004/03/28 07:24:45 @@ -360,7 +360,7 @@ RPC_RETURN(EFAULT); } - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* create the socket */ rpc->rc_so = NULL; @@ -618,7 +618,7 @@ { struct socket *so; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); if (rpc->rc_so) { so = rpc->rc_so; @@ -669,7 +669,7 @@ #endif int error, soflags, flags; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); if (rep) { if (rep->r_flags & R_SOFTTERM) { @@ -754,7 +754,7 @@ #endif int error, sotype, rcvflg; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Set up arguments for soreceive() @@ -1439,6 +1439,7 @@ * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; + SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((rpc->rc_flag & RPCCLNT_DUMBTIMR) || (rep->r_flags & R_SENT) || @@ -1473,6 +1474,7 @@ rep->r_rtt = 0; } } + SOCKBUF_UNLOCK(&so->so_snd); } splx(s); --- //depot/vendor/freebsd/src/sys/sys/mutex.h 2004/03/28 15:15:25 +++ //depot/user/rwatson/netperf/sys/sys/mutex.h 2004/03/28 17:06:00 @@ -350,7 +350,10 @@ * without special recursion handling. * * This mechanism is intended as temporary until everything of - * importance is properly locked. + * importance is properly locked. Note: the semantics for + * NET_{LOCK,UNLOCK}_GIANT() are not the same as DROP_GIANT() + * and PICKUP_GIANT(), as they are plain mutex operations + * without a recursion counter. */ extern int debug_mpsafenet; /* defined in net/netisr.c */ #define NET_LOCK_GIANT() do { \ --- //depot/vendor/freebsd/src/sys/sys/socketvar.h 2004/04/06 21:20:57 +++ //depot/user/rwatson/netperf/sys/sys/socketvar.h 2004/04/07 20:11:34 @@ -35,6 +35,8 @@ #include /* for TAILQ macros */ #include /* for struct selinfo */ +#include +#include /* * Kernel structure per socket. @@ -44,19 +46,6 @@ */ typedef u_quad_t so_gen_t; -/* - * List of locks: - * (c) const, inited in either socreate() or sonewconn() - * (m) sb_mtx mutex - * (mr) so_rcv.sb_mtx mutex - * (sg) sigio_lock sx - * (sh) sohead_lock sx - * - * Lock of so_rcv.sb_mtx can duplicate, provided that sohead_lock - * is exclusively locked. - * - * Brackets mean that this data is not protected yet. - */ struct socket { int so_count; /* reference count */ short so_type; /* generic type, see socket.h */ @@ -95,6 +84,7 @@ */ struct sockbuf { struct selinfo sb_sel; /* process selecting read/write */ + struct mtx sb_mtx; /* sockbuf lock */ #define sb_startzero sb_mb struct mbuf *sb_mb; /* the mbuf chain */ struct mbuf *sb_mbtail; /* the last mbuf in the chain */ @@ -142,6 +132,22 @@ } \ } while (/*CONSTCOND*/0) +#define SOCKBUF_MTX(_sb) (&(_sb)->sb_mtx) +#define SOCKBUF_LOCK_INIT(_sb, _name) \ + mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF) +#define SOCKBUF_LOCK_DESTROY(_sb) mtx_destroy(SOCKBUF_MTX(_sb)) +#define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) +#define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) +#define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) +#define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) + +/* NB: we reuse the rcv sockbuf's lock for other items in the socket */ +#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv) +#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv) +#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv) +#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv) +#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv) + /* * Socket state bits. */ @@ -193,6 +199,7 @@ uid_t so_uid; /* XXX */ }; +#ifdef _KERNEL /* * Macros for sockets and socket buffering. */ @@ -263,47 +270,61 @@ ((sb)->sb_flags |= SB_LOCK), 0) /* release lock on sockbuf sb */ -#define sbunlock(sb) { \ +#define sbunlock(sb) do { \ (sb)->sb_flags &= ~SB_LOCK; \ if ((sb)->sb_flags & SB_WANT) { \ (sb)->sb_flags &= ~SB_WANT; \ wakeup(&(sb)->sb_flags); \ } \ -} +} while (0) /* * soref()/sorele() ref-count the socket structure. Note that you must * still explicitly close the socket, but the last ref count will free * the structure. */ -#define soref(so) do { \ - ++(so)->so_count; \ - } while (0) +#define soref(so) do { \ + ++(so)->so_count; \ +} while (0) -#define sorele(so) do { \ - if ((so)->so_count <= 0) \ - panic("sorele");\ - if (--(so)->so_count == 0)\ - sofree(so); \ - } while (0) +#define sorele(so) do { \ + SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count <= 0) \ + panic("sorele"); \ + if (--(so)->so_count == 0) \ + sofree(so); \ + else \ + SOCK_UNLOCK(so); \ + so = NULL; \ +} while (0) -#define sotryfree(so) do { \ - if ((so)->so_count == 0) \ - sofree(so); \ - } while(0) +#define sotryfree(so) do { \ + SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count == 0) \ + sofree(so); \ + else \ + SOCK_UNLOCK(so); \ + so = NULL; \ +} while (0) -#define sorwakeup(so) do { \ - if (sb_notify(&(so)->so_rcv)) \ - sowakeup((so), &(so)->so_rcv); \ - } while (0) +#define sorwakeup(so) do { \ + if (sb_notify(&(so)->so_rcv)) \ + sowakeup((so), &(so)->so_rcv); \ +} while (0) +#define sorwakeup_locked(so) do { \ + if (sb_notify(&(so)->so_rcv)) \ + sowakeup_locked((so), &(so)->so_rcv); \ +} while (0) -#define sowwakeup(so) do { \ - if (sb_notify(&(so)->so_snd)) \ - sowakeup((so), &(so)->so_snd); \ - } while (0) +#define sowwakeup(so) do { \ + if (sb_notify(&(so)->so_snd)) \ + sowakeup((so), &(so)->so_snd); \ +} while (0) +#define sowwakeup_locked(so) do { \ + if (sb_notify(&(so)->so_snd)) \ + sowakeup_locked((so), &(so)->so_snd); \ +} while (0) -#ifdef _KERNEL - /* * Argument structure for sosetopt et seq. This is in the KERNEL * section because it will never be visible to user code. @@ -351,12 +372,19 @@ int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type); int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); void sbappend(struct sockbuf *sb, struct mbuf *m); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m); void sbappendstream(struct sockbuf *sb, struct mbuf *m); +void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m); int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); +int sbappendaddr_locked(struct sockbuf *sb, struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control); int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); +int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); void sbcheck(struct sockbuf *sb); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * @@ -365,6 +393,7 @@ void sbdroprecord(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbinsertoob(struct sockbuf *sb, struct mbuf *m0); +void sbinsertoob_locked(struct sockbuf *sb, struct mbuf *m0); void sbrelease(struct sockbuf *sb, struct socket *so); int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td); @@ -377,7 +406,9 @@ int socheckuid(struct socket *so, uid_t uid); int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); void socantrcvmore(struct socket *so); +void socantrcvmore_locked(struct socket *so); void socantsendmore(struct socket *so); +void socantsendmore_locked(struct socket *so); int soclose(struct socket *so); int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); int soconnect2(struct socket *so1, struct socket *so2); @@ -418,6 +449,7 @@ int soshutdown(struct socket *so, int how); void sotoxsocket(struct socket *so, struct xsocket *xso); void sowakeup(struct socket *so, struct sockbuf *sb); +void sowakeup_locked(struct socket *so, struct sockbuf *sb); #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *, const char *, int); --- //depot/vendor/freebsd/src/sys/sys/unpcb.h 2004/04/06 21:20:57 +++ //depot/user/rwatson/netperf/sys/sys/unpcb.h 2004/04/07 20:11:34 @@ -78,6 +78,7 @@ unp_gen_t unp_gencnt; /* generation count of this instance */ int unp_flags; /* flags */ struct xucred unp_peercred; /* peer credentials, if applicable */ + struct mtx unp_mtx; /* locking */ }; /*