--- //depot/vendor/freebsd/src/sys/dev/firewire/if_fwe.c 2004/07/20 04:30:46 +++ //depot/user/rwatson/netperf/sys/dev/firewire/if_fwe.c 2004/07/25 19:09:59 @@ -215,7 +215,8 @@ ifp->if_start = fwe_start; ifp->if_ioctl = fwe_ioctl; ifp->if_mtu = ETHERMTU; - ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); + ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST| + IFF_NEEDSGIANT); ifp->if_snd.ifq_maxlen = TX_MAX_QUEUE; s = splimp(); @@ -487,6 +488,8 @@ struct fwe_softc *fwe = ((struct fwe_eth_softc *)ifp->if_softc)->fwe; int s; + GIANT_REQUIRED; + FWEDEBUG(ifp, "starting\n"); if (fwe->dma_ch < 0) { --- //depot/vendor/freebsd/src/sys/dev/random/randomdev_soft.c 2004/07/18 09:10:43 +++ //depot/user/rwatson/netperf/sys/dev/random/randomdev_soft.c 2004/08/05 14:02:15 @@ -53,7 +53,7 @@ #include #include -#define RANDOM_FIFO_MAX 256 /* How many events to queue up */ +#define RANDOM_FIFO_MAX 4 /* How many events to queue up */ static void random_kthread(void *); static void @@ -296,6 +296,10 @@ { struct harvest *event; + /* Lockless read to avoid lock operations if fifo is full. */ + if (harvestfifo[origin].count >= RANDOM_FIFO_MAX) + return; + /* Lock the particular fifo */ mtx_lock_spin(&harvestfifo[origin].lock); --- //depot/vendor/freebsd/src/sys/dev/usb/if_aue.c 2004/06/27 12:45:37 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_aue.c 2004/07/25 19:09:59 @@ -717,7 +717,8 @@ ifp->if_softc = sc; if_initname(ifp, "aue", sc->aue_unit); ifp->if_mtu = ETHERMTU; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSGIANT; ifp->if_ioctl = aue_ioctl; ifp->if_start = aue_start; ifp->if_watchdog = aue_watchdog; --- //depot/vendor/freebsd/src/sys/dev/usb/if_axe.c 2004/07/18 06:50:35 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_axe.c 2004/07/25 19:09:59 @@ -476,7 +476,8 @@ ifp->if_softc = sc; if_initname(ifp, "axe", sc->axe_unit); ifp->if_mtu = ETHERMTU; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSGIANT; ifp->if_ioctl = axe_ioctl; ifp->if_start = axe_start; ifp->if_watchdog = axe_watchdog; --- //depot/vendor/freebsd/src/sys/dev/usb/if_cue.c 2004/06/27 12:45:37 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_cue.c 2004/07/25 19:09:59 @@ -507,7 +507,8 @@ ifp->if_softc = sc; if_initname(ifp, "cue", sc->cue_unit); ifp->if_mtu = ETHERMTU; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSGIANT; ifp->if_ioctl = cue_ioctl; ifp->if_start = cue_start; ifp->if_watchdog = cue_watchdog; --- //depot/vendor/freebsd/src/sys/dev/usb/if_kue.c 2004/06/27 12:45:37 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_kue.c 2004/07/25 19:09:59 @@ -481,7 +481,8 @@ ifp->if_softc = sc; if_initname(ifp, "kue", sc->kue_unit); ifp->if_mtu = ETHERMTU; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSGIANT; ifp->if_ioctl = kue_ioctl; ifp->if_start = kue_start; ifp->if_watchdog = kue_watchdog; --- //depot/vendor/freebsd/src/sys/dev/usb/if_rue.c 2004/06/27 12:45:37 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_rue.c 2004/07/25 19:09:59 @@ -660,7 +660,8 @@ ifp->if_softc = sc; if_initname(ifp, "rue", sc->rue_unit); ifp->if_mtu = ETHERMTU; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST || + IFF_NEEDSGIANT; ifp->if_ioctl = rue_ioctl; ifp->if_start = rue_start; ifp->if_watchdog = rue_watchdog; --- //depot/vendor/freebsd/src/sys/dev/usb/if_udav.c 2004/06/27 12:45:37 +++ //depot/user/rwatson/netperf/sys/dev/usb/if_udav.c 2004/07/25 19:09:59 @@ -400,7 +400,8 @@ #elif defined(__FreeBSD__) if_initname(ifp, "udav", device_get_unit(self)); #endif - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | + IFF_NEEDSGIANT; ifp->if_start = udav_start; ifp->if_ioctl = udav_ioctl; ifp->if_watchdog = udav_watchdog; --- //depot/vendor/freebsd/src/sys/fs/portalfs/portal_vnops.c 2004/06/24 00:50:46 +++ //depot/user/rwatson/netperf/sys/fs/portalfs/portal_vnops.c 2004/06/24 03:49:05 @@ -194,6 +194,7 @@ unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); + /* XXXRW: Locking? */ if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) sodupsockaddr((struct sockaddr *)unp2->unp_addr, --- //depot/vendor/freebsd/src/sys/i386/conf/GENERIC 2004/08/03 19:25:40 +++ //depot/user/rwatson/netperf/sys/i386/conf/GENERIC 2004/08/04 20:26:26 @@ -29,7 +29,8 @@ makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols -options SCHED_ULE # ULE scheduler +#options SCHED_ULE # ULE scheduler +options SCHED_4BSD options INET # InterNETworking options INET6 # IPv6 communications protocols options FFS # Berkeley Fast Filesystem @@ -68,6 +69,7 @@ options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed +options BREAK_TO_DEBUGGER # To make an SMP kernel, the next two are needed options SMP # Symmetric MultiProcessor Kernel --- //depot/vendor/freebsd/src/sys/i386/i386/trap.c 2004/07/10 22:15:40 +++ //depot/user/rwatson/netperf/sys/i386/i386/trap.c 2004/08/05 23:43:24 @@ -983,6 +983,9 @@ ktrsyscall(code, narg, args); #endif + CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, + td->td_proc->p_pid, td->td_proc->p_comm, code); + /* * Try to run the syscall without Giant if the syscall * is MP safe. @@ -1050,6 +1053,9 @@ */ userret(td, &frame, sticks); + CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td, + td->td_proc->p_pid, td->td_proc->p_comm, code); + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); --- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c 2004/08/04 18:35:46 +++ //depot/user/rwatson/netperf/sys/kern/kern_descrip.c 2004/08/04 20:26:26 @@ -333,12 +333,29 @@ struct vnode *vp; u_int newmin; int error, flg, tmp; + int giant_locked; + /* + * XXXRW: Some fcntl() calls require Giant -- others don't. Try to + * avoid grabbing Giant for calls we know don't need it. + */ + switch (cmd) { + case F_DUPFD: + case F_GETFD: + case F_SETFD: + case F_GETFL: + giant_locked = 0; + break; + + default: + giant_locked = 1; + mtx_lock(&Giant); + } + error = 0; flg = F_POSIX; p = td->td_proc; fdp = p->p_fd; - mtx_lock(&Giant); FILEDESC_LOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { @@ -350,6 +367,7 @@ switch (cmd) { case F_DUPFD: + mtx_assert(&Giant, MA_NOTOWNED); FILEDESC_UNLOCK(fdp); newmin = arg; PROC_LOCK(p); @@ -364,17 +382,21 @@ break; case F_GETFD: + mtx_assert(&Giant, MA_NOTOWNED); td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_UNLOCK(fdp); break; case F_SETFD: + mtx_assert(&Giant, MA_NOTOWNED); *pop = (*pop &~ UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_UNLOCK(fdp); break; case F_GETFL: + /* MPSAFE */ + mtx_assert(&Giant, MA_NOTOWNED); FILE_LOCK(fp); FILEDESC_UNLOCK(fdp); td->td_retval[0] = OFLAGS(fp->f_flag); @@ -382,6 +404,7 @@ break; case F_SETFL: + mtx_assert(&Giant, MA_OWNED); FILE_LOCK(fp); FILEDESC_UNLOCK(fdp); fhold_locked(fp); @@ -409,6 +432,7 @@ break; case F_GETOWN: + mtx_assert(&Giant, MA_OWNED); fhold(fp); FILEDESC_UNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); @@ -418,6 +442,7 @@ break; case F_SETOWN: + mtx_assert(&Giant, MA_OWNED); fhold(fp); FILEDESC_UNLOCK(fdp); tmp = arg; @@ -426,10 +451,12 @@ break; case F_SETLKW: + mtx_assert(&Giant, MA_OWNED); flg |= F_WAIT; /* FALLTHROUGH F_SETLK */ case F_SETLK: + mtx_assert(&Giant, MA_OWNED); if (fp->f_type != DTYPE_VNODE) { FILEDESC_UNLOCK(fdp); error = EBADF; @@ -503,6 +530,7 @@ break; case F_GETLK: + mtx_assert(&Giant, MA_OWNED); if (fp->f_type != DTYPE_VNODE) { FILEDESC_UNLOCK(fdp); error = EBADF; @@ -542,7 +570,8 @@ break; } done2: - mtx_unlock(&Giant); + if (giant_locked) + mtx_unlock(&Giant); return (error); } @@ -667,10 +696,7 @@ * XXX this duplicates parts of close(). */ if (delfp != NULL) { - /* XXX need to call knote_fdclose() */ - mtx_lock(&Giant); (void) closef(delfp, td); - mtx_unlock(&Giant); if (holdleaders) { FILEDESC_LOCK(fdp); fdp->fd_holdleaderscount--; @@ -934,12 +960,10 @@ error = 0; holdleaders = 0; fdp = td->td_proc->p_fd; - mtx_lock(&Giant); FILEDESC_LOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { FILEDESC_UNLOCK(fdp); - mtx_unlock(&Giant); return (EBADF); } fdp->fd_ofiles[fd] = NULL; @@ -960,12 +984,16 @@ */ if (fd < fdp->fd_knlistsize) { FILEDESC_UNLOCK(fdp); + /* + * XXXRW: Acquire Giant around kqueue related shutdown for the + * fd. + */ + mtx_lock(&Giant); knote_fdclose(td, fd); + mtx_unlock(&Giant); } else FILEDESC_UNLOCK(fdp); - error = closef(fp, td); - mtx_unlock(&Giant); if (holdleaders) { FILEDESC_LOCK(fdp); fdp->fd_holdleaderscount--; @@ -1348,6 +1376,8 @@ /* * Free a file descriptor. + * + * ffree() is MPSAFE. */ void ffree(fp) @@ -1472,6 +1502,9 @@ /* * Release a filedesc structure. + * + * XXXRW: Requires Giant because it calls into VFS. Status of p_fdtol is + * unclear to me. */ void fdfree(td) @@ -1612,10 +1645,16 @@ * * Since setugidsafety calls this only for fd 0, 1 and 2, this check is * sufficient. We also don't for check setugidness since we know we are. + * + * XXXRW: Requires Giant due to check of v_vflag. Actually, this can + * probably become a lockless read. */ static int is_unsafe(struct file *fp) { + + GIANT_REQUIRED; + if (fp->f_type == DTYPE_VNODE) { struct vnode *vp = fp->f_vnode; @@ -1627,6 +1666,9 @@ /* * Make this setguid thing safe, if at all possible. + * + * XXXRW: Requires Giant because it calls knote_fdclose(). closef() is now + * MPSAFE. */ void setugidsafety(td) @@ -1676,6 +1718,8 @@ /* * Close any files on exec? + * + * XXXRW: Requires Giant due to knote_fdclosed(). closef() is now MPSAFE. */ void fdcloseexec(td) @@ -1684,6 +1728,8 @@ struct filedesc *fdp; int i; + GIANT_REQUIRED; + /* Certain daemons might not have file descriptors. */ fdp = td->td_proc->p_fd; if (fdp == NULL) @@ -1727,6 +1773,8 @@ * significance in the Standard C library. fdcheckstd() will create a * descriptor referencing /dev/null for each of stdin, stdout, and * stderr that is not already open. + * + * XXXRW: Requires Giant for VFS access. do_dup is MPSAFE. */ int fdcheckstd(td) @@ -1798,6 +1846,11 @@ * Decrement reference count on file structure. * Note: td may be NULL when closing a file * that was being passed in a message. + * + * XXXRW: This function may now be called without Giant. If it is called + * with Giant, then it will recurse Giant to guarantee the right locks + * are held to enter VFS. This is somewhat expensive, but is required to + * make close() Giant-free for sockets, pipes, et al. */ int closef(fp, td) @@ -1821,6 +1874,7 @@ */ if (td != NULL && fp->f_type == DTYPE_VNODE) { + mtx_lock(&Giant); if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -1864,6 +1918,7 @@ } FILEDESC_UNLOCK(fdp); } + mtx_unlock(&Giant); } return (fdrop(fp, td)); } @@ -1871,6 +1926,8 @@ /* * Drop reference on struct file passed in, may call closef if the * reference hits zero. + * + * XXXRW: fdrop() is MPSAFE. */ int fdrop(fp, td) @@ -1962,6 +2019,8 @@ * the descriptor does not represent a vnode. Note that pipes use vnodes * but never have VM objects (so VOP_GETVOBJECT() calls will return an * error). The returned vnode will be vref()d. + * + * XXXRW: _fgetvp() is not MPSAFE because of VFS access. */ static __inline int _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) @@ -2011,6 +2070,8 @@ * * We bump the ref count on the returned socket. XXX Also obtain the SX * lock in the future. + * + * XXXRW: fgetsock() is MPSAFE. */ int fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) @@ -2056,6 +2117,9 @@ * Drop reference on struct file passed in, may call closef if the * reference hits zero. * Expects struct file locked, and will unlock it. + * + * XXXRW: fdrop_locked() is MPSAFE, as fo_close() will acquire Giant if it + * needs it. */ int fdrop_locked(fp, td) @@ -2072,6 +2136,7 @@ } /* We have the last ref so we can proceed without the file lock. */ FILE_UNLOCK(fp); + if (fp->f_count < 0) panic("fdrop: count < 0"); if (fp->f_ops != &badfileops) --- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c 2004/08/02 00:20:28 +++ //depot/user/rwatson/netperf/sys/kern/kern_mbuf.c 2004/08/02 22:40:15 @@ -213,7 +213,7 @@ #endif } else m->m_data = m->m_dat; - mbstat.m_mbufs += 1; /* XXX */ + atomic_add_long(&mbstat.m_mbufs, 1); return (0); } @@ -228,7 +228,7 @@ m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ + atomic_subtract_long(&mbstat.m_mbufs, 1); } /* XXX Only because of stats */ @@ -240,8 +240,8 @@ m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ - mbstat.m_mclusts -= 1; /* XXX */ + atomic_subtract_long(&mbstat.m_mbufs, 1); + atomic_subtract_long(&mbstat.m_mclusts, 1); } /* @@ -266,7 +266,7 @@ m->m_ext.ref_cnt = (u_int *)uma_find_refcnt(zone_clust, m->m_ext.ext_buf); *(m->m_ext.ref_cnt) = 1; - mbstat.m_mclusts += 1; /* XXX */ + atomic_add_long(&mbstat.m_mclusts, 1); return (0); } @@ -274,7 +274,7 @@ static void mb_dtor_clust(void *mem, int size, void *arg) { - mbstat.m_mclusts -= 1; /* XXX */ + atomic_subtract_long(&mbstat.m_mclusts, 1); } /* @@ -291,7 +291,7 @@ uma_zalloc_arg(zone_clust, m, how); if (m->m_ext.ext_buf == NULL) return (ENOMEM); - mbstat.m_mclusts -= 1; /* XXX */ + atomic_subtract_long(&mbstat.m_mclusts, 1); return (0); } @@ -307,7 +307,7 @@ m = (struct mbuf *)mem; uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); m->m_ext.ext_buf = NULL; - mbstat.m_mclusts += 1; /* XXX */ + atomic_add_long(&mbstat.m_mclusts, 1); } /* @@ -351,8 +351,8 @@ return (error); #endif } - mbstat.m_mbufs += 1; /* XXX */ - mbstat.m_mclusts += 1; /* XXX */ + atomic_add_long(&mbstat.m_mbufs, 1); + atomic_add_long(&mbstat.m_mclusts, 1); return (0); } @@ -372,7 +372,7 @@ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, "mb_reclaim()"); - mbstat.m_drain++; + atomic_add_long(&mbstat.m_drain, 1); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) --- //depot/vendor/freebsd/src/sys/kern/kern_resource.c 2004/08/04 18:20:34 +++ //depot/user/rwatson/netperf/sys/kern/kern_resource.c 2004/08/04 20:26:26 @@ -1142,9 +1142,9 @@ return (0); } uip->ui_sbsize = new; + UIDINFO_UNLOCK(uip); *hiwat = to; - if (uip->ui_sbsize < 0) + if (new < 0) printf("negative sbsize for uid = %d\n", uip->ui_uid); - UIDINFO_UNLOCK(uip); return (1); } --- //depot/vendor/freebsd/src/sys/kern/kern_thread.c 2004/08/02 00:20:28 +++ //depot/user/rwatson/netperf/sys/kern/kern_thread.c 2004/08/06 18:08:56 @@ -613,7 +613,8 @@ KASSERT(ke != NULL, ("thread exiting without a kse")); KASSERT(kg != NULL, ("thread exiting without a kse group")); PROC_LOCK_ASSERT(p, MA_OWNED); - CTR1(KTR_PROC, "thread_exit: thread %p", td); + CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, + (long)p->p_pid, p->p_comm); mtx_assert(&Giant, MA_NOTOWNED); if (td->td_standin != NULL) { --- //depot/vendor/freebsd/src/sys/kern/kern_timeout.c 2004/08/06 02:45:37 +++ //depot/user/rwatson/netperf/sys/kern/kern_timeout.c 2004/08/06 20:41:48 @@ -42,10 +42,14 @@ #include #include #include +#include #include #include +#include #include +#include "opt_timeout.h" + static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); @@ -55,6 +59,7 @@ static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); + /* * TODO: * allocate more timeout table slots when table overflows. @@ -245,8 +250,11 @@ if (!(c_flags & CALLOUT_MPSAFE)) { mtx_lock(&Giant); gcalls++; + CTR1(KTR_CALLOUT, "callout %p", c_func); } else { mpcalls++; + CTR1(KTR_CALLOUT, "callout mpsafe %p", + c_func); } #ifdef DIAGNOSTIC binuptime(&bt1); --- //depot/vendor/freebsd/src/sys/kern/sched_4bsd.c 2004/07/23 23:10:40 +++ //depot/user/rwatson/netperf/sys/kern/sched_4bsd.c 2004/07/27 00:55:10 @@ -819,9 +819,13 @@ */ kg = td->td_ksegrp; if (td->td_priority != kg->kg_user_pri) { +#if 0 mtx_lock_spin(&sched_lock); +#endif td->td_priority = kg->kg_user_pri; +#if 0 mtx_unlock_spin(&sched_lock); +#endif } } --- //depot/vendor/freebsd/src/sys/kern/subr_log.c 2004/06/16 09:52:13 +++ //depot/user/rwatson/netperf/sys/kern/subr_log.c 2004/06/18 00:24:39 @@ -83,7 +83,12 @@ struct callout sc_callout; /* callout to wakeup syslog */ } logsoftc; -int log_open; /* also used in log() */ +/* + * log_mtx protects logsoftc, log_open. Note that log_mtx does *not* + * protect the structures associated with msgbuf, which require Giant. + */ +struct mtx log_mtx; +int log_open; /* also used in log() */ /* Times per second to check for a pending syslog wakeup. */ static int log_wakeups_per_second = 5; @@ -94,17 +99,24 @@ static int logopen(struct cdev *dev, int flags, int mode, struct thread *td) { - if (log_open) + + mtx_lock(&log_mtx); + if (log_open) { + mtx_unlock(&log_mtx); return (EBUSY); + } log_open = 1; - callout_init(&logsoftc.sc_callout, 0); + callout_init(&logsoftc.sc_callout, CALLOUT_MPSAFE); + mtx_unlock(&log_mtx); fsetown(td->td_proc->p_pid, &logsoftc.sc_sigio); /* signal process only */ + mtx_lock(&log_mtx); if (log_wakeups_per_second < 1) { printf("syslog wakeup is less than one. Adjusting to 1.\n"); log_wakeups_per_second = 1; } callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); return (0); } @@ -113,9 +125,11 @@ logclose(struct cdev *dev, int flag, int mode, struct thread *td) { + mtx_lock(&log_mtx); log_open = 0; callout_stop(&logsoftc.sc_callout); logsoftc.sc_state = 0; + mtx_unlock(&log_mtx); funsetown(&logsoftc.sc_sigio); return (0); } @@ -134,14 +148,18 @@ splx(s); return (EWOULDBLOCK); } + mtx_lock(&log_mtx); logsoftc.sc_state |= LOG_RDWAIT; + mtx_unlock(&log_mtx); if ((error = tsleep(mbp, LOG_RDPRI | PCATCH, "klog", 0))) { splx(s); return (error); } } splx(s); + mtx_lock(&log_mtx); logsoftc.sc_state &= ~LOG_RDWAIT; + mtx_unlock(&log_mtx); while (uio->uio_resid > 0) { l = imin(sizeof(buf), uio->uio_resid); @@ -178,8 +196,11 @@ logtimeout(void *arg) { - if (!log_open) + mtx_lock(&log_mtx); + if (!log_open) { + mtx_unlock(&log_mtx); return; + } if (log_wakeups_per_second < 1) { printf("syslog wakeup is less than one. Adjusting to 1.\n"); log_wakeups_per_second = 1; @@ -187,6 +208,7 @@ if (msgbuftrigger == 0) { callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); return; } msgbuftrigger = 0; @@ -199,6 +221,7 @@ } callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second, logtimeout, NULL); + mtx_unlock(&log_mtx); } /*ARGSUSED*/ @@ -217,10 +240,12 @@ break; case FIOASYNC: + mtx_lock(&log_mtx); if (*(int *)data) logsoftc.sc_state |= LOG_ASYNC; else logsoftc.sc_state &= ~LOG_ASYNC; + mtx_unlock(&log_mtx); break; case FIOSETOWN: @@ -249,6 +274,7 @@ log_drvinit(void *unused) { + mtx_init(&log_mtx, "log_mtx", NULL, MTX_DEF); make_dev(&log_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "klog"); } --- //depot/vendor/freebsd/src/sys/kern/subr_prf.c 2004/07/10 21:40:38 +++ //depot/user/rwatson/netperf/sys/kern/subr_prf.c 2004/07/10 22:08:23 @@ -84,6 +84,9 @@ size_t remain; }; +/* + * XXXRW: We access subr_log.c's log_open variable unlocked. + */ extern int log_open; static void msglogchar(int c, int pri); --- //depot/vendor/freebsd/src/sys/kern/subr_witness.c 2004/08/04 20:35:39 +++ //depot/user/rwatson/netperf/sys/kern/subr_witness.c 2004/08/05 21:29:14 @@ -316,6 +316,7 @@ */ { "ddp_list_mtx", &lock_class_mtx_sleep }, { "ddp_mtx", &lock_class_mtx_sleep }, + { "at_ifaddr", &lock_class_mtx_sleep }, { NULL, NULL }, /* * spin locks --- //depot/vendor/freebsd/src/sys/kern/sys_generic.c 2004/07/10 15:45:28 +++ //depot/user/rwatson/netperf/sys/kern/sys_generic.c 2004/07/27 18:12:39 @@ -483,9 +483,8 @@ return (error); mtx_lock(&Giant); if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - fdrop(fp, td); - mtx_unlock(&Giant); - return (EBADF); + error = EBADF; + goto done; } fdp = td->td_proc->p_fd; switch (com = uap->com) { @@ -493,16 +492,14 @@ FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; FILEDESC_UNLOCK(fdp); - fdrop(fp, td); - mtx_unlock(&Giant); - return (0); + error = 0; + goto done; case FIOCLEX: FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; FILEDESC_UNLOCK(fdp); - fdrop(fp, td); - mtx_unlock(&Giant); - return (0); + error = 0; + goto done; } /* @@ -511,9 +508,8 @@ */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) { - fdrop(fp, td); - mtx_unlock(&Giant); - return (ENOTTY); + error = ENOTTY; + goto done; } memp = NULL; @@ -529,7 +525,6 @@ if (error) { if (memp) free(memp, M_IOCTLOPS); - fdrop(fp, td); goto done; } } else { @@ -579,9 +574,9 @@ } if (memp) free(memp, M_IOCTLOPS); - fdrop(fp, td); done: mtx_unlock(&Giant); + fdrop(fp, td); return (error); } --- //depot/vendor/freebsd/src/sys/kern/sys_pipe.c 2004/08/03 03:01:00 +++ //depot/user/rwatson/netperf/sys/kern/sys_pipe.c 2004/08/04 20:26:26 @@ -394,6 +394,9 @@ * This routine will 'realloc' the size of a pipe safely, if it fails * it will retain the old buffer. * If it fails it will return ENOMEM. + * + * This version of the call should not be called directly except via + * pipe_create(), as it doesn't assert the pipe lock. */ static int pipespace_new(cpipe, size) --- //depot/vendor/freebsd/src/sys/kern/uipc_mbuf.c 2004/07/21 21:05:26 +++ //depot/user/rwatson/netperf/sys/kern/uipc_mbuf.c 2004/07/24 17:51:01 @@ -415,12 +415,12 @@ np = &n->m_next; } if (top == NULL) - mbstat.m_mcfail++; /* XXX: No consistency. */ + atomic_add_long(&mbstat.m_mcfail, 1); return (top); nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + atomic_add_long(&mbstat.m_mcfail, 1); return (NULL); } @@ -481,7 +481,7 @@ return top; nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + atomic_add_long(&mbstat.m_mcfail, 1); return (NULL); } @@ -584,7 +584,7 @@ nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + atomic_add_long(&mbstat.m_mcfail, 1); return (NULL); } @@ -744,7 +744,7 @@ return (m); bad: m_freem(n); - mbstat.m_mpfail++; /* XXX: No consistency. */ + atomic_add_long(&mbstat.m_mpfail, 1); return (NULL); } --- //depot/vendor/freebsd/src/sys/kern/uipc_socket.c 2004/07/25 23:30:37 +++ //depot/user/rwatson/netperf/sys/kern/uipc_socket.c 2004/08/02 01:36:03 @@ -295,6 +295,7 @@ * SO_ACCEPTCONN before the call to pru_listen()? * XXXRW: General atomic test-and-set concerns here also. */ + /* Unlocked read. */ if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) return (EINVAL); @@ -323,6 +324,7 @@ KASSERT(so->so_count == 0, ("socket %p so_count not 0", so)); SOCK_LOCK_ASSERT(so); + /* XXXRW: Why would SS_NOFDREF be unset here? so_count is 0. */ if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0) { SOCK_UNLOCK(so); return; @@ -381,6 +383,45 @@ sodealloc(so); } +#if 0 +static void +dump_socket(struct socket *so) +{ + + printf("so = %p\n", so); + printf(" so_count = %d\n", so->so_count); + printf(" so_type = %d\n", so->so_type); + printf(" so_options = %d\n", so->so_options); + printf(" so_linger = %d\n", so->so_linger); + printf(" so_state = %d\n", so->so_state); + printf(" so_qstate = %d\n", so->so_qstate); + printf(" so_pcb = %p\n", so->so_pcb); + printf(" so_proto = %p\n", so->so_proto); + printf(" pr_type = %d\n", so->so_proto->pr_type); + printf(" pr_domain = %p\n", so->so_proto->pr_domain); + printf(" dom_family = %d\n", so->so_proto->pr_domain->dom_family); + printf(" dom_name = %s\n", so->so_proto->pr_domain->dom_name); + printf(" pr_protocol = %d\n", so->so_proto->pr_protocol); + printf(" pr_flags = %d\n", so->so_proto->pr_flags); + printf(" so_head = %p\n", so->so_head); + printf(" TAILQ_FIRST(so_incomp) = %p\n", TAILQ_FIRST(&so->so_incomp)); + printf(" TAILQ_FIRST(so_comp) = %p\n", TAILQ_FIRST(&so->so_comp)); + printf(" so_qlen = %d\n", so->so_qlen); + printf(" so_incqlen = %d\n", so->so_incqlen); + printf(" so_qlimit = %d\n", so->so_qlimit); + printf(" so_timeo = %d\n", so->so_timeo); + printf(" so_error = %d\n", so->so_error); + printf(" so_sigio = %p\n", so->so_sigio); + printf(" so_oobmark = %lu\n", so->so_oobmark); + printf(" so_rcv.sb_state = %d\n", so->so_rcv.sb_state); + printf(" so_snd.sb_state = %d\n", so->so_snd.sb_state); + printf(" so_upcall = %p\n", so->so_upcall); + printf(" so_upcallarg = %p\n", so->so_upcallarg); + printf(" so_cred = %p\n", so->so_cred); + printf(" so_gencnt = %d\n", (int)so->so_gencnt); +} +#endif + /* * Close a socket on last file table reference removal. * Initiate disconnect if connected. @@ -399,6 +440,23 @@ KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); funsetown(&so->so_sigio); + /*- + * XXXRW: Lots of locking problems here. For one thing, + * we should probably clear the SO_ACCEPTCONN flag and + * push that down to the protocol layer. Also, the locking + * for the queue draining is probably not right either: + * what prevents new ones from being inserted after we get + * past this? Ideally, the downcall would prevent this, but + * there isn't one. + */ + /* + * XXXRW: soclose() should be split into two parts: one to handle + * listen sockets, and the other to handle the remainder. To + * prevent races with the protocol code, we need to detach before + * draining the queues. For non-listen sockets, the disconnect + * has to happen before the detach. + */ + /* Unlocked read. */ if (so->so_options & SO_ACCEPTCONN) { struct socket *sp; ACCEPT_LOCK(); @@ -424,6 +482,7 @@ } if (so->so_pcb == NULL) goto discard; + /* XXXRW: so_state locking? */ if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); @@ -508,6 +567,7 @@ * This allows user to disconnect by connecting to, e.g., * a null address. */ + /* Unlocked read. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) @@ -532,6 +592,7 @@ { int error; + /* Unlocked read. */ if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) @@ -622,8 +683,6 @@ #define snderr(errno) { error = (errno); goto release; } SOCKBUF_LOCK(&so->so_snd); -restart: - SOCKBUF_LOCK_ASSERT(&so->so_snd); error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out_locked; @@ -662,11 +721,10 @@ (atomic || space < so->so_snd.sb_lowat || space < clen)) { if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) snderr(EWOULDBLOCK); - sbunlock(&so->so_snd); error = sbwait(&so->so_snd); if (error) - goto out_locked; - goto restart; + goto release; + continue; } SOCKBUF_UNLOCK(&so->so_snd); mp = ⊤ @@ -778,6 +836,15 @@ break; } } while (space > 0 && atomic); + /* + * XXXRW: There may be a race condition here regarding + * SO_DONTROUTE. We hold the sblock() so in theory + * there won't be other consumers of the socket doing + * a pru_send(), but SO_DONTROUTE is also consumed + * from the protocol side. It sounds like we might + * want to use MSG_DONTROUTE here, if the semantics are + * right. + */ if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; @@ -788,7 +855,7 @@ * done could be out of date. We could have recieved * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We could - * probably recheck again inside the splnet() protection + * probably recheck again inside the locking protection * here, but there are probably other places that this * also happens. We must rethink this. */ @@ -806,6 +873,10 @@ /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); + /* + * XXXRW: Second half of above comment on SO_DONTROUTE. + * and so_options. + */ if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; @@ -976,16 +1047,17 @@ return (soreceive_rcvoob(so, uio, flags)); if (mp != NULL) *mp = NULL; + /* Unlocked read. */ if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreqs->pru_rcvd)(so, 0); SOCKBUF_LOCK(&so->so_rcv); -restart: - SOCKBUF_LOCK_ASSERT(&so->so_rcv); error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) goto out; +restart: + SOCKBUF_LOCK_ASSERT(&so->so_rcv); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more @@ -1026,6 +1098,7 @@ m = so->so_rcv.sb_mb; goto dontblock; } + /* XXXRW: so_state locking? */ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { error = ENOTCONN; @@ -1033,6 +1106,7 @@ } if (uio->uio_resid == 0) goto release; + /* XXXRW: so_state locking? */ if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; @@ -1040,10 +1114,9 @@ } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) - goto out; + goto release; goto restart; } dontblock: @@ -1072,10 +1145,25 @@ if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); - orig_resid = 0; - if (psa != NULL) + if (psa != NULL) { *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); + if (*psa == NULL) { + error = ENOMEM; + goto release; + } + /* + * XXXRW: In the rwatson_netperf branch, we don't + * release the socket buffer lock here because + * we always use M_NOWAIT. In the main tree, + * or if we restore conditional waiting, we + * need to refresh nextpacket from the socket + * buffer version of the head mbuf, or we may + * have a stale value if it was previously NULL + * and now isn't. + */ + nextrecord = m->m_nextpkt; + } if (flags & MSG_PEEK) { m = m->m_next; } else { @@ -1084,6 +1172,7 @@ m = so->so_rcv.sb_mb; sockbuf_pushsync(&so->so_rcv, nextrecord); } + orig_resid = 0; } /* @@ -1098,6 +1187,14 @@ do { if (flags & MSG_PEEK) { + /* + * XXXRW: In the BSD/OS version of this + * code, m_copym() is called with M_TRYWAIT, + * and we catch allcation failures and + * jump to release with error of ENOBUFS. + * For consistency with the current + * implementation, we don't. + */ if (controlp != NULL) { *controlp = m_copy(m, 0, m->m_len); controlp = &(*controlp)->m_next; @@ -1112,6 +1209,12 @@ m = so->so_rcv.sb_mb; } } while (m != NULL && m->m_type == MT_CONTROL); + /* + * XXXRW: Since we're dropping the socket buffer locks, and + * may have modified the mbuf list on the socket buffer, push + * out the latest version so it can be seen by any other code + * that accesses the buffer in the mean time. + */ if ((flags & MSG_PEEK) == 0) sockbuf_pushsync(&so->so_rcv, nextrecord); while (cm != NULL) { @@ -1133,10 +1236,21 @@ } cm = cmn; } + /* + * XXXRW: During externalization, additional mbuf chains + * may have been added to the socket buffer. Update our + * local cache to avoid using a stale value and corrupting + * the list. + */ nextrecord = so->so_rcv.sb_mb->m_nextpkt; orig_resid = 0; } if (m != NULL) { + /* + * XXXRW: Before the advent of sockbuf_pushsync() above, it + * was necessary to do the equivilent work here. Now, we + * just assert that it is the case. + */ if ((flags & MSG_PEEK) == 0) { KASSERT(m->m_nextpkt == nextrecord, ("soreceive: post-control, nextrecord !sync")); @@ -1151,6 +1265,11 @@ if (type == MT_OOBDATA) flags |= MSG_OOB; } else { + /* + * XXXRW: Before the advent of sockbuf_pushsync() above, it + * was necessary to update sb_mb to nextrecord here. Now, we + * just assert that is the case. + */ if ((flags & MSG_PEEK) == 0) { KASSERT(so->so_rcv.sb_mb == nextrecord, ("soreceive: sb_mb != nextrecord")); @@ -1254,14 +1373,7 @@ so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } - if (m != NULL) { - m->m_nextpkt = nextrecord; - if (nextrecord == NULL) - so->so_rcv.sb_lastrecord = m; - } else { - so->so_rcv.sb_mb = nextrecord; - SB_EMPTY_FIXUP(&so->so_rcv); - } + sockbuf_pushsync(&so->so_rcv, nextrecord); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); } @@ -1310,6 +1422,12 @@ /* * Notify the protocol that some data has been * drained before blocking. + * + * XXXRW: We drop the socket buffer lock here + * because we know the protocol will need to do its + * own locking. However, after calling pru_rcvd() + * and re-grabbing the buffer mutex, we don't + * re-check the condition before sleeping. */ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); @@ -1319,8 +1437,10 @@ SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); - if (error) + if (error) { + error = 0; goto release; + } m = so->so_rcv.sb_mb; if (m != NULL) nextrecord = m->m_nextpkt; @@ -1349,6 +1469,11 @@ } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); + /* + * XXXRW: We drop the socket buffer lock before calling + * down into the protocol. Is that OK in the calling + * context? + */ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) { SOCKBUF_UNLOCK(&so->so_rcv); (*pr->pr_usrreqs->pru_rcvd)(so, flags); @@ -1357,10 +1482,8 @@ } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (orig_resid == uio->uio_resid && orig_resid && - (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { - sbunlock(&so->so_rcv); - goto restart; - } + (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) + goto restart; /* XXX multi-counts msgs */ if (flagsp != NULL) *flagsp |= flags; @@ -1428,6 +1551,7 @@ sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); SOCKBUF_UNLOCK(sb); + /* XXXRW: is passing in sb_mb this way really safe? */ SOCKBUF_LOCK_INIT(&asb, "so_rcv"); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(asb.sb_mb); @@ -1879,6 +2003,7 @@ case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: + /* Unlocked read. */ optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); @@ -2084,10 +2209,16 @@ { int revents = 0; + /* + * XXXRW: Lots of unlocked reads, and some writes. Probably + * some more locking is called for here, especially when + * setting the sb_sel flags. + */ if (events & (POLLIN | POLLRDNORM)) if (soreadable(so)) revents |= events & (POLLIN | POLLRDNORM); + /* Unlocked read. */ if (events & POLLINIGNEOF) if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || !TAILQ_EMPTY(&so->so_comp) || so->so_error) @@ -2130,6 +2261,7 @@ switch (kn->kn_filter) { case EVFILT_READ: + /* Unlocked read. */ if (so->so_options & SO_ACCEPTCONN) kn->kn_fop = &solisten_filtops; else @@ -2246,6 +2378,7 @@ { struct socket *so = kn->kn_fp->f_data; + /* Unlocked read. */ kn->kn_data = so->so_qlen; return (! TAILQ_EMPTY(&so->so_comp)); } --- //depot/vendor/freebsd/src/sys/kern/uipc_socket2.c 2004/06/26 19:15:28 +++ //depot/user/rwatson/netperf/sys/kern/uipc_socket2.c 2004/07/16 01:43:15 @@ -106,21 +106,39 @@ { SOCK_LOCK(so); + /* + * XXXRW: so_state locking? + * + * XXXRW: Why not clear SS_ISDISCONNECTED, SS_ISCONFIRMING? + */ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; SOCK_UNLOCK(so); } +/* + * soisconnected() transitions a socket to a fully connected state. If + * so->so_head is non-NULL, we transition it from the incompletely + * connected queue to the completely connected queue. Otherwise, we + * just wake up the socket since it was an out-bound connection. + */ void soisconnected(so) struct socket *so; { struct socket *head; + /* XXXRW: so_state locking? */ SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; SOCK_UNLOCK(so); + + /* + * XXXRW: Maybe an unlocked read of so_head would be desirable + * here? Unlocked read of so_qstate probably not a good idea + * regardless. so_options handling here is a little unsavory. + */ ACCEPT_LOCK(); head = so->so_head; if (head != NULL && (so->so_qstate & SQ_INCOMP)) { @@ -135,15 +153,37 @@ sorwakeup(head); wakeup_one(&head->so_timeo); } else { - ACCEPT_UNLOCK(); + void (*so_upcall)(struct socket *, void *, int); + void *so_upcallarg; + /* + * XXXRW: Should probably copy the upcall fields to + * local stack variables while holding the socket + * lock (and clear the option), then release the + * lock and make the call. This will prevent lock + * order reversals/recursion between this code and + * the upcall implementation, which will likely + * also want to frob the socket using locks. + * + * NOTE: We keep a local copy of the function + * pointer so we can invoke the upcall without + * holding locks. However, we also have to copy + * in the upcall fields from the head because the + * filter may need to be called more than once. + */ SOCK_LOCK(so); - so->so_upcall = + so_upcall = so->so_upcall = head->so_accf->so_accept_filter->accf_callback; - so->so_upcallarg = head->so_accf->so_accept_filter_arg; + so_upcallarg = so->so_upcallarg = + head->so_accf->so_accept_filter_arg; so->so_rcv.sb_flags |= SB_UPCALL; so->so_options &= ~SO_ACCEPTFILTER; SOCK_UNLOCK(so); - so->so_upcall(so, so->so_upcallarg, M_TRYWAIT); + ACCEPT_UNLOCK(); + /* + * Call with our existing reference, but without + * any locks. + */ + so_upcall(so, so_upcallarg, M_TRYWAIT); } return; } @@ -233,6 +273,8 @@ so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; + /* XXXRW: so_state locking? */ + /* XXXRW: should mask additional head->so_state bits here? */ so->so_state = head->so_state | SS_NOFDREF; so->so_proto = head->so_proto; so->so_timeo = head->so_timeo; @@ -277,6 +319,7 @@ } ACCEPT_UNLOCK(); if (connstatus) { + /* XXXRW: so_state locking? */ so->so_state |= connstatus; sorwakeup(head); wakeup_one(&head->so_timeo); @@ -405,6 +448,10 @@ } KNOTE(&sb->sb_sel.si_note, 0); SOCKBUF_UNLOCK(sb); + /* + * XXXRW: What locks should be held over what parts of the + * following? Currently we hold none. + */ if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) @@ -451,7 +498,7 @@ register struct socket *so; u_long sndcc, rcvcc; { - struct thread *td = curthread; + struct thread *td = curthread; /* XXX */ SOCKBUF_LOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); @@ -515,6 +562,13 @@ /* * td will only be NULL when we're in an interrupt * (e.g. in tcp_input()) + * + * XXXRW: This comment is true, but only because the caller passed + * in NULL, not for the 4.x reason that there is no thread + * available. Need to be careful of callers that do this wrong; + * I suspect many do it wrong, and therefore many socket buffers + * end up with the wrong limits, especially via the soreserve() + * path. */ if (cc > sb_max_adj) return (0); @@ -1368,6 +1422,7 @@ xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; + /* Unlocked read. */ xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2004/07/17 21:10:41 +++ //depot/user/rwatson/netperf/sys/kern/uipc_syscalls.c 2004/07/19 22:28:50 @@ -278,6 +278,7 @@ error = fgetsock(td, uap->s, &head, &fflag); if (error) goto done2; + /* Unlocked read. */ if ((head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; goto done; @@ -285,6 +286,10 @@ error = falloc(td, &nfp, &fd); if (error) goto done; + /* + * Unlocked reads. + * XXXRW: Dubious use of so->so_error. + */ ACCEPT_LOCK(); if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { ACCEPT_UNLOCK(); @@ -333,6 +338,11 @@ /* An extra reference on `nfp' has been held for us by falloc(). */ td->td_retval[0] = fd; + /* + * XXXRW: Might make life simpler to grab the socket buffer lock + * here so it's held when KNOTE() calls back into the socket + * code. + */ /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); @@ -482,6 +492,7 @@ NET_LOCK_GIANT(); if ((error = fgetsock(td, fd, &so, NULL)) != 0) goto done2; + /* XXXRW: so_state locking? */ if (so->so_state & SS_ISCONNECTING) { error = EALREADY; goto done1; @@ -496,12 +507,14 @@ error = soconnect(so, sa, td); if (error) goto bad; + /* XXXRW: so_state locking? */ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EINPROGRESS; goto done1; } s = splnet(); SOCK_LOCK(so); + /* XXXRW: so_state locking? */ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, "connec", 0); @@ -518,6 +531,7 @@ SOCK_UNLOCK(so); splx(s); bad: + /* XXXRW: so_state locking? */ if (!interrupted) so->so_state &= ~SS_ISCONNECTING; if (error == ERESTART) @@ -1481,6 +1495,7 @@ NET_LOCK_GIANT(); if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) goto done2; + /* XXXRW: so_state locking? */ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { error = ENOTCONN; goto done1; @@ -1722,6 +1737,7 @@ error = EINVAL; goto done; } + /* XXXRW: so_state locking? */ if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto done; @@ -1813,6 +1829,7 @@ * before going to the extra work of constituting the sf_buf. */ SOCKBUF_LOCK(&so->so_snd); + /* XXXRW: so_state locking? */ if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { if (so->so_snd.sb_state & SBS_CANTSENDMORE) error = EPIPE; @@ -1878,6 +1895,7 @@ * Get the page from backing store. */ bsize = vp->v_mount->mnt_stat.f_iosize; + mtx_lock(&Giant); /* VFS */ vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); /* * XXXMAC: Because we don't have fp->f_cred here, @@ -1889,6 +1907,7 @@ IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); VOP_UNLOCK(vp, 0, td); + mtx_unlock(&Giant); /* VFS */ if (error) VM_OBJECT_LOCK(obj); vm_page_lock_queues(); @@ -1999,6 +2018,7 @@ * after checking the connection state above in order to avoid * a race condition with sbwait(). */ + /* XXXRW: so_state locking? */ if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { if (so->so_state & SS_NBIO) { m_freem(m); --- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c 2004/07/25 23:35:28 +++ //depot/user/rwatson/netperf/sys/kern/uipc_usrreq.c 2004/08/02 01:36:03 @@ -263,6 +263,8 @@ return (0); } +int old_locking = 1; + static int uipc_rcvd(struct socket *so, int flags) { @@ -272,15 +274,17 @@ if (unp == NULL) return (EINVAL); - UNP_LOCK(); switch (so->so_type) { case SOCK_DGRAM: panic("uipc_rcvd DGRAM?"); /*NOTREACHED*/ case SOCK_STREAM: - if (unp->unp_conn == NULL) + UNP_LOCK(); + if (unp->unp_conn == NULL) { + UNP_UNLOCK(); break; + } so2 = unp->unp_conn->unp_socket; SOCKBUF_LOCK(&so2->so_snd); SOCKBUF_LOCK(&so->so_rcv); @@ -295,14 +299,22 @@ (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, newhiwat, RLIM_INFINITY); unp->unp_cc = so->so_rcv.sb_cc; + + /* + * XXXRW: It would be desirable to release the UNP lock + * before doing the socket wakeup. However, to do that we + * need to acquire a reference to so2 so it doesn't evaporate + * if the peer closes it; this may be more expensive than the + * wakeup causing contention on the socket mutex. + */ SOCKBUF_UNLOCK(&so->so_rcv); sowwakeup_locked(so2); + UNP_UNLOCK(); break; default: panic("uipc_rcvd unknown socktype"); } - UNP_UNLOCK(); return (0); } @@ -374,6 +386,7 @@ * Note: A better implementation would complain * if not equal to the peer's address. */ + /* Unlocked read. */ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { error = unp_connect(so, nam, td); @@ -385,6 +398,7 @@ } } + /* Unlocked read. */ if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; break; @@ -453,6 +467,7 @@ sb->st_blksize = so->so_snd.sb_hiwat; if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { so2 = unp->unp_conn->unp_socket; + /* Unlocked read. */ sb->st_blksize += so2->so_rcv.sb_cc; } sb->st_dev = NODEV; @@ -1402,6 +1417,10 @@ UNP_LOCK_ASSERT(); + /* + * XXXRW: unp_gcing seems like a bad idea. Use a real lock + * instead? + */ if (unp_gcing) return; unp_gcing = 1; @@ -1484,6 +1503,9 @@ * of a signal. If sbwait does return * an error, we'll go into an infinite * loop. Delete all of this for now. + * + * XXXRW: We will need to lock the socket + * buffer here if we enable this code. */ (void) sbwait(&so->so_rcv); goto restart; --- //depot/vendor/freebsd/src/sys/kern/vfs_aio.c 2004/07/27 03:55:33 +++ //depot/user/rwatson/netperf/sys/kern/vfs_aio.c 2004/08/02 01:36:03 @@ -1223,6 +1223,8 @@ /* * Wake up aio requests that may be serviceable now. + * + * XXXRW: This doesn't look MPSAFE at all. */ static void aio_swake_cb(struct socket *so, struct sockbuf *sb) --- //depot/vendor/freebsd/src/sys/kern/vfs_vnops.c 2004/07/22 20:40:37 +++ //depot/user/rwatson/netperf/sys/kern/vfs_vnops.c 2004/07/24 17:51:01 @@ -117,6 +117,8 @@ exclusive = 0; #endif + GIANT_REQUIRED; + restart: fmode = *flagp; if (fmode & O_CREAT) { @@ -315,6 +317,8 @@ { int error; + GIANT_REQUIRED; + if (flags & FWRITE) vp->v_writecount--; error = VOP_CLOSE(vp, flags, file_cred, td); @@ -387,6 +391,8 @@ struct ucred *cred; int error; + GIANT_REQUIRED; + if ((ioflg & IO_NODELOCKED) == 0) { mp = NULL; if (rw == UIO_WRITE) { @@ -474,6 +480,8 @@ int error = 0; int iaresid; + GIANT_REQUIRED; + do { int chunk; @@ -518,7 +526,6 @@ struct vnode *vp; int error, ioflag; - mtx_lock(&Giant); KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); vp = fp->f_vnode; @@ -527,6 +534,7 @@ ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; + mtx_lock(&Giant); VOP_LEASE(vp, td, fp->f_cred, LEASE_READ); /* * According to McKusick the vn lock is protecting f_offset here. @@ -568,10 +576,10 @@ struct mount *mp; int error, ioflag; - mtx_lock(&Giant); KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); vp = fp->f_vnode; + mtx_lock(&Giant); if (vp->v_type == VREG) bwillwrite(); ioflag = IO_UNIT; @@ -647,6 +655,8 @@ int error; u_short mode; + GIANT_REQUIRED; + #ifdef MAC error = mac_check_vnode_stat(active_cred, file_cred, vp); if (error) @@ -769,6 +779,8 @@ struct vattr vattr; int error; + GIANT_REQUIRED; + switch (vp->v_type) { case VREG: @@ -846,6 +858,8 @@ int error; #endif + GIANT_REQUIRED; + vp = fp->f_vnode; #ifdef MAC vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); @@ -1000,6 +1014,8 @@ { int error; + GIANT_REQUIRED; + if (vp != NULL) { if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { if (error != EOPNOTSUPP) @@ -1054,6 +1070,8 @@ struct thread *td = curthread; int error; + GIANT_REQUIRED; + if (mp->mnt_kern_flag & MNTK_SUSPEND) return (0); mp->mnt_kern_flag |= MNTK_SUSPEND; @@ -1075,6 +1093,8 @@ struct mount *mp; { + GIANT_REQUIRED; + if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) return; mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); @@ -1089,6 +1109,8 @@ vn_kqfilter(struct file *fp, struct knote *kn) { + GIANT_REQUIRED; + return (VOP_KQFILTER(fp->f_vnode, kn)); } --- //depot/vendor/freebsd/src/sys/net/if.c 2004/08/06 09:10:25 +++ //depot/user/rwatson/netperf/sys/net/if.c 2004/08/06 20:41:48 @@ -264,6 +264,14 @@ if_clone_init(); } +/* + * Grow the global ifindex_table by a power of two over the current side. + * Note that this call assumes ifindex_table (and hence the if_indexentry + * entries in it) can be relocated in memory without causing a problem. + * + * XXXRW: Locking assertion needed here? What protects against untimely + * indirection through ifindex_table? Note M_WAITOK. + */ static void if_grow(void) { @@ -280,6 +288,10 @@ ifindex_table = e; } +/* + * if_check() scans the list of registered network interfaces to check that + * initialization-time invariants are met. + */ /* ARGSUSED*/ static void if_check(void *dummy __unused) @@ -306,6 +318,9 @@ if_slowtimo(0); } +/* + * if_findindex ... XXXRW: what? + */ static int if_findindex(struct ifnet *ifp) { @@ -319,6 +334,10 @@ case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: + /* + * XXXRW: Assumes that arpcom always has its ifnet at the + * head of its structure. + */ snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":"); break; default: @@ -367,6 +386,10 @@ struct sockaddr_dl *sdl; struct ifaddr *ifa; + /* + * XXXRW: Shouldn't we add to the global list only once the ifnet + * is ready for use? + */ TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp); IF_AFDATA_LOCK_INIT(ifp); ifp->if_afdata_initialized = 0; --- //depot/vendor/freebsd/src/sys/net/if_ethersubr.c 2004/07/27 23:25:38 +++ //depot/user/rwatson/netperf/sys/net/if_ethersubr.c 2004/08/05 04:24:12 @@ -615,9 +615,6 @@ } ether_demux(ifp, m); - /* First chunk of an mbuf contains good entropy */ - if (harvest.ethernet) - random_harvest(m, 16, 3, 0, RANDOM_NET); } /* --- //depot/vendor/freebsd/src/sys/net/if_gif.c 2004/07/15 08:30:31 +++ //depot/user/rwatson/netperf/sys/net/if_gif.c 2004/07/15 22:19:17 @@ -89,6 +89,10 @@ * gif_mtx protects the global gif_softc_list. * XXX: Per-softc locking is still required. */ +/* + * XXXRW: Note that gif_mtx only protects global gif-related data, not + * per-softc data. See also netinet/in_gif.c for locking needs. + */ static struct mtx gif_mtx; static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static LIST_HEAD(, gif_softc) gif_softc_list; @@ -501,6 +505,9 @@ } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +/* + * XXXRW: per-gif softc locking required. + */ int gif_ioctl(ifp, cmd, data) struct ifnet *ifp; @@ -757,8 +764,10 @@ int s; int error = 0; + /* + * XXXRW: per-gif softc locking required. + */ s = splnet(); - mtx_lock(&gif_mtx); LIST_FOREACH(sc2, &gif_softc_list, gif_list) { if (sc2 == sc) @@ -787,6 +796,9 @@ } mtx_unlock(&gif_mtx); + /* + * XXXRW: Lock gif softc fields. + */ /* XXX we can detach from both, but be polite just in case */ if (sc->gif_psrc) switch (sc->gif_psrc->sa_family) { --- //depot/vendor/freebsd/src/sys/net/if_gre.c 2004/08/05 08:15:37 +++ //depot/user/rwatson/netperf/sys/net/if_gre.c 2004/08/05 21:29:14 @@ -95,7 +95,8 @@ /* * gre_mtx protects all global variables in if_gre.c. - * XXX: gre_softc data not protected yet. + * + * XXXRW: It does not protect softc-specific data. */ struct mtx gre_mtx; static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation"); --- //depot/vendor/freebsd/src/sys/net/if_gre.h 2004/03/22 16:06:54 +++ //depot/user/rwatson/netperf/sys/net/if_gre.h 2004/03/22 16:18:59 @@ -54,6 +54,12 @@ WCCP_V2 } wccp_ver_t; +/* + * XXXRW: softc fields need locking. + * + * XXXRW: gre's notion of a 'called' count is not MP-safe, as it assumes + * only one packet can be processed at a time. + */ struct gre_softc { struct ifnet sc_if; LIST_ENTRY(gre_softc) sc_list; --- //depot/vendor/freebsd/src/sys/net/if_sl.c 2004/07/15 08:30:31 +++ //depot/user/rwatson/netperf/sys/net/if_sl.c 2004/07/15 22:19:17 @@ -79,6 +79,7 @@ #include #include #include +#include #include #include @@ -162,6 +163,7 @@ #define ABT_WINDOW (ABT_COUNT*2+2) /* in seconds - time to count */ static LIST_HEAD(sl_list, sl_softc) sl_list; +static struct mtx slip_mtx; #define FRAME_END 0xc0 /* Frame End */ #define FRAME_ESCAPE 0xdb /* Frame Esc */ @@ -182,7 +184,9 @@ static int slopen(struct cdev *, struct tty *); static int sloutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); -static int slstart(struct tty *); +static int slstart(struct tty *tp); +static void slstart_schedule(struct sl_softc *); +static void slstart_task(void *, int); static struct linesw slipdisc = { .l_open = slopen, @@ -203,6 +207,7 @@ { switch (type) { case MOD_LOAD: + mtx_init(&slip_mtx, "slip_mtx", NULL, MTX_DEF); ldisc_register(SLIPDISC, &slipdisc); LIST_INIT(&sl_list); break; @@ -224,6 +229,7 @@ DECLARE_MODULE(if_sl, sl_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +/* Locked using slip_mtx. */ static int *st_unit_list; static size_t st_unit_max = 0; @@ -232,6 +238,7 @@ { struct sl_softc *nc; + mtx_assert(&slip_mtx, MA_OWNED); LIST_FOREACH(nc, &sl_list, sl_next) { if (nc->sc_if.if_dunit == unit) return (0); @@ -245,6 +252,7 @@ { size_t i; + mtx_assert(&slip_mtx, MA_OWNED); for (i = 0; i < st_unit_max; i++) if (st_unit_list[i] == unit) return 1; @@ -257,6 +265,7 @@ { int *t; + mtx_assert(&slip_mtx, MA_OWNED); if (slisstatic(unit)) return; @@ -319,10 +328,12 @@ sc->sc_if.if_linkmib = sc; sc->sc_if.if_linkmiblen = sizeof *sc; mtx_init(&sc->sc_fastq.ifq_mtx, "sl_fastq", NULL, MTX_DEF); + mtx_init(&sc->sc_mtx, "slip sc_mtx", NULL, MTX_DEF); /* * Find a suitable unit number. */ + mtx_lock(&slip_mtx); for (unit=0; ; unit++) { if (slisstatic(unit)) continue; @@ -332,6 +343,7 @@ } if_initname(&sc->sc_if, "sl", unit); LIST_INSERT_HEAD(&sl_list, sc, sl_next); + mtx_unlock(&slip_mtx); if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_SLIP, SLIP_HDRLEN); @@ -364,6 +376,7 @@ tp->t_sc = (caddr_t)sc; sc->sc_ttyp = tp; sc->sc_if.if_baudrate = tp->t_ospeed; + TASK_INIT(&sc->sc_task, 0, slstart_task, sc->sc_ttyp); ttyflush(tp, FREAD | FWRITE); /* @@ -391,10 +404,20 @@ static void sldestroy(struct sl_softc *sc) { + + /* + * XXXRW: Slight race here: we may detach bpf/if before we + * attach. This appears to be a property of the unit selection + * process, which might be better handled by the interface + * cloning subsystem? + */ bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); + mtx_lock(&slip_mtx); LIST_REMOVE(sc, sl_next); + mtx_unlock(&slip_mtx); m_free(sc->sc_mbuf); + mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_fastq.ifq_mtx); if (sc->bpfbuf) free(sc->bpfbuf, M_SL); @@ -423,6 +446,10 @@ clist_free_cblocks(&tp->t_outq); sc = (struct sl_softc *)tp->t_sc; if (sc != NULL) { + /* + * XXXRW: tear-down race between timeout and slclose()? + */ + mtx_lock(&sc->sc_mtx); if (sc->sc_outfill) { sc->sc_outfill = 0; untimeout(sl_outfill, sc, sc->sc_ofhandle); @@ -431,6 +458,7 @@ sc->sc_keepalive = 0; untimeout(sl_keepalive, sc, sc->sc_kahandle); } + mtx_unlock(&sc->sc_mtx); if_down(&sc->sc_if); sc->sc_ttyp = NULL; tp->t_sc = NULL; @@ -468,12 +496,21 @@ splx(s); return (ENXIO); } + /* + * XXXRW: we hold the mutex over all of this to protect + * the unit change and global list consistency. However, + * some of these functions probably sleep, making this + * wrong. If we have to support renumbering, we probably + * need a way to reserve both numbers to prevent them + * from being reused during the change, or a way to sleep + * waiting for a change to end (i.e., a CV). + */ + mtx_lock(&slip_mtx); if (sc->sc_if.if_dunit != unit) { if (!slisunitfree(unit)) { - splx(s); + mtx_unlock(&slip_mtx); return (ENXIO); } - wasup = sc->sc_if.if_flags & IFF_UP; bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); @@ -491,9 +528,11 @@ SLIP_HIWAT + 2 * sc->sc_if.if_mtu + 1); } slmarkstatic(unit); + mtx_unlock(&slip_mtx); break; case SLIOCSKEEPAL: + mtx_lock(&sc->sc_mtx); sc->sc_keepalive = *(u_int *)data * hz; if (sc->sc_keepalive) { sc->sc_flags |= SC_KEEPALIVE; @@ -505,6 +544,7 @@ sc->sc_flags &= ~SC_KEEPALIVE; } } + mtx_unlock(&sc->sc_mtx); break; case SLIOCGKEEPAL: @@ -512,6 +552,7 @@ break; case SLIOCSOUTFILL: + mtx_lock(&sc->sc_mtx); sc->sc_outfill = *(u_int *)data * hz; if (sc->sc_outfill) { sc->sc_flags |= SC_OUTWAIT; @@ -523,9 +564,11 @@ sc->sc_flags &= ~SC_OUTWAIT; } } + mtx_unlock(&sc->sc_mtx); break; case SLIOCGOUTFILL: + /* Unlocked read. */ *(int *)data = sc->sc_outfill / hz; break; @@ -588,12 +631,26 @@ return (ENOBUFS); } s = splimp(); + /* + * XXXRW: We run slstart() in a task queue to avoid running tty code + * out of a network context. However, there's a race here wherein + * the task may not run until after the SLIP interface is reused or + * destroyed. + */ if (sc->sc_ttyp->t_outq.c_cc == 0) slstart(sc->sc_ttyp); splx(s); return (0); } +static void +slstart_schedule(sc) + struct sl_softc *sc; +{ + + taskqueue_enqueue(taskqueue_swi_giant, &sc->sc_task); +} + /* * Start output on interface. Get another datagram * to send from the interface queue and map it to @@ -604,6 +661,18 @@ register struct tty *tp; { register struct sl_softc *sc = (struct sl_softc *)tp->t_sc; + + slstart_schedule(sc); + return 0; +} + +static void +slstart_task(context, pending) + void *context; + int pending; +{ + register struct sl_softc *sc = (struct sl_softc *)context; + register struct tty *tp = sc->sc_ttyp; register struct mbuf *m; register u_char *cp; register struct ip *ip; @@ -619,17 +688,20 @@ (*tp->t_oproc)(tp); if (tp->t_outq.c_cc != 0) { - if (sc != NULL) + if (sc != NULL) { + mtx_lock(&sc->sc_mtx); sc->sc_flags &= ~SC_OUTWAIT; + mtx_unlock(&sc->sc_mtx); + } if (tp->t_outq.c_cc > SLIP_HIWAT) - return 0; + return; } /* * This happens briefly when the line shuts down. */ if (sc == NULL) - return 0; + return; /* * Get a packet and send it to the interface. @@ -642,7 +714,7 @@ IF_DEQUEUE(&sc->sc_if.if_snd, m); splx(s); if (m == NULL) - return 0; + return; /* * We do the header compression here rather than in sloutput @@ -650,6 +722,7 @@ * queueing, and the connection id compression will get * munged when this happens. */ + mtx_lock(&sc->sc_mtx); if (sc->sc_if.if_bpf) { /* * We need to save the TCP/IP header before it's @@ -680,9 +753,10 @@ } ip = mtod(m, struct ip *); if (ip->ip_v == IPVERSION && ip->ip_p == IPPROTO_TCP) { - if (sc->sc_if.if_flags & SC_COMPRESS) + if (sc->sc_if.if_flags & SC_COMPRESS) { *mtod(m, u_char *) |= sl_compress_tcp(m, ip, &sc->sc_comp, 1); + } } if (sc->sc_if.if_bpf && sc->bpfbuf) { /* @@ -694,6 +768,7 @@ bcopy(mtod(m, caddr_t), &sc->bpfbuf[SLX_CHDR], CHDR_LEN); BPF_TAP(&sc->sc_if, sc->bpfbuf, len + SLIP_HDRLEN); } + mtx_unlock(&sc->sc_mtx); /* * If system is getting low on clists, just flush our @@ -709,7 +784,9 @@ continue; } + mtx_lock(&sc->sc_mtx); sc->sc_flags &= ~SC_OUTWAIT; + mtx_unlock(&sc->sc_mtx); /* * The extra FRAME_END will start up a new packet, and thus * will flush any accumulated garbage. We do this whenever @@ -786,7 +863,6 @@ sc->sc_if.if_opackets++; } } - return 0; } /* @@ -799,6 +875,8 @@ { struct mbuf *m, *newm; + mtx_assert(&sc->sc_mtx, MA_OWNED); + MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (NULL); @@ -854,13 +932,16 @@ if (sc == NULL) return 0; if (c & TTY_ERRORMASK || (tp->t_state & TS_CONNECTED) == 0) { + mtx_lock(&sc->sc_mtx); sc->sc_flags |= SC_ERROR; + mtx_unlock(&sc->sc_mtx); return 0; } c &= TTY_CHARMASK; ++sc->sc_if.if_ibytes; + mtx_lock(&sc->sc_mtx); if (sc->sc_if.if_flags & IFF_DEBUG) { if (c == ABT_ESC) { /* @@ -879,6 +960,7 @@ sc->sc_starttime = time_second; if (sc->sc_abortcount >= ABT_COUNT) { slclose(tp,0); + mtx_unlock(&sc->sc_mtx); return 0; } } @@ -901,6 +983,7 @@ case FRAME_ESCAPE: sc->sc_escape = 1; + mtx_unlock(&sc->sc_mtx); return 0; case FRAME_END: @@ -985,6 +1068,7 @@ if (sc->sc_mp < sc->sc_ep) { *sc->sc_mp++ = c; sc->sc_escape = 0; + mtx_unlock(&sc->sc_mtx); return 0; } @@ -996,6 +1080,7 @@ newpack: sc->sc_mp = sc->sc_buf = sc->sc_ep - SLRMAX; sc->sc_escape = 0; + mtx_unlock(&sc->sc_mtx); return 0; } @@ -1079,6 +1164,7 @@ { struct sl_softc *sc = chan; + mtx_lock(&sc->sc_mtx); if (sc->sc_keepalive) { if (sc->sc_flags & SC_KEEPALIVE) { if (sc->sc_ttyp->t_pgrp != NULL) { @@ -1092,6 +1178,7 @@ } else { sc->sc_flags &= ~SC_KEEPALIVE; } + mtx_unlock(&sc->sc_mtx); } static void @@ -1102,6 +1189,7 @@ register struct tty *tp = sc->sc_ttyp; int s; + mtx_lock(&sc->sc_mtx); if (sc->sc_outfill && tp != NULL) { if (sc->sc_flags & SC_OUTWAIT) { s = splimp (); @@ -1115,4 +1203,5 @@ } else { sc->sc_flags &= ~SC_OUTWAIT; } + mtx_unlock(&sc->sc_mtx); } --- //depot/vendor/freebsd/src/sys/net/if_slvar.h 2004/04/07 20:52:05 +++ //depot/user/rwatson/netperf/sys/net/if_slvar.h 2004/07/14 06:07:17 @@ -34,13 +34,17 @@ #ifndef _NET_IF_SLVAR_H_ #define _NET_IF_SLVAR_H_ +#include +#include #include /* * Definitions for SLIP interface data structures * * (This exists so programs like slstats can get at the definition - * of sl_softc.) + * of sl_softc.) Fields owned by the SLIP subsystem are protected + * using sc_mtx, with the exception of sc_next, which is protected + * by the global slip_mtx. */ struct sl_softc { struct ifnet sc_if; /* network-visible interface */ @@ -66,6 +70,8 @@ struct slcompress sc_comp; /* tcp compression data */ LIST_ENTRY(sl_softc) sl_next; u_char *bpfbuf; /* hang buffer for bpf here */ + struct task sc_task; /* run slstart() w/Giant for ttys */ + struct mtx sc_mtx; }; /* internal flags */ --- //depot/vendor/freebsd/src/sys/net/if_spppsubr.c 2004/07/15 08:30:31 +++ //depot/user/rwatson/netperf/sys/net/if_spppsubr.c 2004/07/15 22:19:17 @@ -92,12 +92,8 @@ #include #if defined(__FreeBSD__) && __FreeBSD__ >= 3 -# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg, handle) -# define TIMEOUT(fun, arg1, arg2, handle) handle = timeout(fun, arg1, arg2) # define IOCTL_CMD_T u_long #else -# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg) -# define TIMEOUT(fun, arg1, arg2, handle) timeout(fun, arg1, arg2) # define IOCTL_CMD_T int #endif @@ -259,10 +255,11 @@ void (*scr)(struct sppp *sp); }; +struct mtx sppp_mtx; +MTX_SYSINIT(sppp_mtx, &sppp_mtx, "sppp_mtx", MTX_DEF); + static struct sppp *spppq; -#if defined(__FreeBSD__) && __FreeBSD__ >= 3 -static struct callout_handle keepalive_ch; -#endif +static struct callout keepalive_callout; #if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113 #define SPP_FMT "%s%d: " @@ -964,13 +961,18 @@ { struct sppp *sp = (struct sppp*) ifp; + mtx_lock(&sppp_mtx); /* Initialize keepalive handler. */ - if (spppq == NULL) - TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch); + if (spppq == NULL) { + callout_init(&keepalive_callout, 0); + callout_reset(&keepalive_callout, hz * 10, sppp_keepalive, + NULL); + } /* Insert new entry into the keepalive list. */ sp->pp_next = spppq; spppq = sp; + mtx_unlock(&sppp_mtx); sp->pp_if.if_mtu = PP_MTU; sp->pp_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; @@ -1016,6 +1018,7 @@ struct sppp **q, *p, *sp = (struct sppp*) ifp; int i; + mtx_lock(&sppp_mtx); /* Remove the entry from the keepalive list. */ for (q = &spppq; (p = *q); q = &p->pp_next) if (p == sp) { @@ -1025,11 +1028,12 @@ /* Stop keepalive handler. */ if (spppq == NULL) - UNTIMEOUT(sppp_keepalive, 0, keepalive_ch); + callout_stop(&keepalive_callout); + mtx_unlock(&sppp_mtx); for (i = 0; i < IDX_COUNT; i++) - UNTIMEOUT((cps[i])->TO, (void *)sp, sp->ch[i]); - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout((cps[i])->TO, (void *)sp, sp->ch[i]); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); mtx_destroy(&sp->pp_cpq.ifq_mtx); mtx_destroy(&sp->pp_fastq.ifq_mtx); } @@ -2008,8 +2012,8 @@ case STATE_STOPPING: sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: @@ -2019,8 +2023,8 @@ break; case STATE_ACK_SENT: (cp->scr)(sp); - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; } @@ -2036,7 +2040,7 @@ { sp->state[cp->protoidx] = newstate; - UNTIMEOUT(cp->TO, (void *)sp, sp->ch[cp->protoidx]); + untimeout(cp->TO, (void *)sp, sp->ch[cp->protoidx]); switch (newstate) { case STATE_INITIAL: case STATE_STARTING: @@ -2049,8 +2053,8 @@ case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: - TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout, - sp->ch[cp->protoidx]); + sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp, + sp->lcp.timeout); break; } } @@ -4147,7 +4151,7 @@ * a number between 300 and 810 seconds. */ i = 300 + ((unsigned)(random() & 0xff00) >> 7); - TIMEOUT(chap.TO, (void *)sp, i * hz, sp->ch[IDX_CHAP]); + sp->ch[IDX_CHAP] = timeout(chap.TO, (void *)sp, i * hz); } if (debug) { @@ -4191,7 +4195,7 @@ if (debug) log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp)); - UNTIMEOUT(chap.TO, (void *)sp, sp->ch[IDX_CHAP]); + untimeout(chap.TO, (void *)sp, sp->ch[IDX_CHAP]); sp->lcp.protos &= ~(1 << IDX_CHAP); lcp.Close(sp); @@ -4327,7 +4331,7 @@ /* ack and nak are his authproto */ case PAP_ACK: - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); if (debug) { log(LOG_DEBUG, SPP_FMT "pap success", SPP_ARGS(ifp)); @@ -4356,7 +4360,7 @@ break; case PAP_NAK: - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); if (debug) { log(LOG_INFO, SPP_FMT "pap failure", SPP_ARGS(ifp)); @@ -4413,8 +4417,8 @@ if (sp->myauth.proto == PPP_PAP) { /* we are peer, send a request, and start a timer */ pap.scr(sp); - TIMEOUT(sppp_pap_my_TO, (void *)sp, sp->lcp.timeout, - sp->pap_my_to_ch); + sp->pap_my_to_ch = timeout(sppp_pap_my_TO, (void *)sp, + sp->lcp.timeout); } } @@ -4517,8 +4521,8 @@ if (debug) log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp)); - UNTIMEOUT(pap.TO, (void *)sp, sp->ch[IDX_PAP]); - UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); + untimeout(pap.TO, (void *)sp, sp->ch[IDX_PAP]); + untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch); sp->lcp.protos &= ~(1 << IDX_PAP); lcp.Close(sp); @@ -4645,7 +4649,12 @@ struct sppp *sp; int s; + /* + * XXXRW: It would be nice to avoid calling all this stuff while + * holding sppp_mtx, or we risk lock order reversals. + */ s = splimp(); + mtx_lock(&sppp_mtx); for (sp=spppq; sp; sp=sp->pp_next) { struct ifnet *ifp = &sp->pp_if; @@ -4684,8 +4693,9 @@ sp->lcp.echoid, 4, &nmagic); } } + callout_reset(&keepalive_callout, hz * 10, sppp_keepalive, NULL); + mtx_unlock(&sppp_mtx); splx(s); - TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch); } /* --- //depot/vendor/freebsd/src/sys/net/if_stf.c 2004/07/15 08:30:31 +++ //depot/user/rwatson/netperf/sys/net/if_stf.c 2004/07/15 22:19:17 @@ -139,13 +139,11 @@ #define sc_ro __sc_ro46.__sc_ro4 const struct encaptab *encap_cookie; LIST_ENTRY(stf_softc) sc_list; /* all stf's are linked */ + struct mtx sc_mtx; /* protect sc_ro */ }; /* * All mutable global variables in if_stf.c are protected by stf_mtx. - * XXXRW: Note that mutable fields in the softc are not currently locked: - * in particular, sc_ro needs to be protected from concurrent entrance - * of stf_output(). */ static struct mtx stf_mtx; static LIST_HEAD(, stf_softc) stf_softc_list; @@ -231,6 +229,7 @@ ifc_free_unit(ifc, unit); return (ENOMEM); } + mtx_init(&sc->sc_mtx, "stf sc_mtx", NULL, MTX_DEF); ifp->if_mtu = IPV6_MMTU; ifp->if_ioctl = stf_ioctl; @@ -255,6 +254,7 @@ bpfdetach(&sc->sc_if); if_detach(&sc->sc_if); + mtx_destroy(&sc->sc_mtx); free(sc, M_STF); } @@ -430,9 +430,10 @@ struct ip *ip; struct ip6_hdr *ip6; struct in6_ifaddr *ia6; -#ifdef MAC + struct route ro; int error; +#ifdef MAC error = mac_check_ifnet_transmit(ifp, m); if (error) { m_freem(m); @@ -534,9 +535,7 @@ else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); - /* - * XXXRW: Locking of sc_ro required. - */ + mtx_lock(&sc->sc_mtx); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { @@ -555,12 +554,21 @@ if (sc->sc_ro.ro_rt == NULL) { m_freem(m); ifp->if_oerrors++; + mtx_unlock(&sc->sc_mtx); return ENETUNREACH; } } + /* + * XXXRW: Holding mutex over call to ip_output(): potential lock + * order issue? Hard to resolve cleanly with the current route + * caching model, as we have to synchronize access to shared softc + * state. + */ ifp->if_opackets++; - return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); + error = ip_output(m, NULL, &ro, 0, NULL, NULL); + mtx_unlock(&sc->sc_mtx); + return (error); } static int --- //depot/vendor/freebsd/src/sys/net/if_tap.c 2004/06/17 17:21:12 +++ //depot/user/rwatson/netperf/sys/net/if_tap.c 2004/06/18 00:24:39 @@ -113,6 +113,10 @@ * All global variables in if_tap.c are locked with tapmtx, with the * exception of tapdebug, which is accessed unlocked; tapclones is * static at runtime. + * + * XXXRW: si_flags appears not to be protected from concurrent access, + * and is written at run-time. + * XXXRW: si_drv1 is also used for test-and-set, and isn't synchronized. */ static struct mtx tapmtx; static int tapdebug = 0; /* debug flag */ @@ -162,6 +166,7 @@ * The EBUSY algorithm here can't quite atomically * guarantee that this is race-free since we have to * release the tap mtx to deregister the clone handler. + * XXXRW: is this true? */ mtx_lock(&tapmtx); SLIST_FOREACH(tp, &taphead, tap_next) { @@ -693,6 +698,7 @@ case SIOCSIFADDR: /* set MAC address of the remote side */ mtx_lock(&tp->tap_mtx); + /* XXXRW: Does this actually do anything? */ bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); mtx_unlock(&tp->tap_mtx); break; @@ -747,6 +753,7 @@ if (flag & IO_NDELAY) return (EWOULDBLOCK); + /* This looks like a wanna-be condition variable. */ mtx_lock(&tp->tap_mtx); tp->tap_flags |= TAP_RWAIT; mtx_unlock(&tp->tap_mtx); --- //depot/vendor/freebsd/src/sys/net/if_tun.c 2004/07/15 08:30:31 +++ //depot/user/rwatson/netperf/sys/net/if_tun.c 2004/07/15 22:19:17 @@ -59,6 +59,12 @@ * tun_list is protected by global tunmtx. Other mutable fields are * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is * static for the duration of a tunnel interface. + * + * XXXRW: we allocate si_drv1 for the dev_t on demand, rather than when + * the dev_t is instantiated. Nothing serializes the test/set of that + * field. + * + * XXXRW: what serializes access to si_flags? */ struct tun_softc { TAILQ_ENTRY(tun_softc) tun_list; @@ -121,6 +127,9 @@ static d_ioctl_t tunioctl; static d_poll_t tunpoll; +/* + * XXXRW: can remove D_NEEDGIANT? Probably not because of si_drv1 for now. + */ static struct cdevsw tun_cdevsw = { .d_version = D_VERSION, .d_flags = D_PSEUDO | D_NEEDGIANT, @@ -382,6 +391,9 @@ ifp->if_flags |= IFF_UP | IFF_RUNNING; getmicrotime(&ifp->if_lastchange); + /* + * XXXRW: interface locking. + */ for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr == NULL) --- //depot/vendor/freebsd/src/sys/net/raw_cb.c 2004/06/15 04:15:33 +++ //depot/user/rwatson/netperf/sys/net/raw_cb.c 2004/06/18 00:24:39 @@ -123,6 +123,7 @@ m_freem(dtom(rp->rcb_faddr)); rp->rcb_faddr = 0; #endif + /* Unlocked read. */ if (rp->rcb_socket->so_state & SS_NOFDREF) raw_detach(rp); } --- //depot/vendor/freebsd/src/sys/net/raw_usrreq.c 2004/06/15 04:15:33 +++ //depot/user/rwatson/netperf/sys/net/raw_usrreq.c 2004/06/18 00:24:39 @@ -76,6 +76,10 @@ register struct mbuf *m = m0; struct socket *last; + /* + * XXXRW: Potential lock order issues due to holding the + * rawcb_mtx across all this stuff. Need to revisit. + */ last = 0; mtx_lock(&rawcb_mtx); LIST_FOREACH(rp, &rawcb_list, list) { --- //depot/vendor/freebsd/src/sys/net/rtsock.c 2004/07/06 03:30:34 +++ //depot/user/rwatson/netperf/sys/net/rtsock.c 2004/07/09 21:19:53 @@ -54,10 +54,18 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ +/* + * XXXRW: It would be really nice to add const to these, but that may + * not be possible due to where they are passed in. We might need + * to const-poison a whole boatload of APIs...? + */ static struct sockaddr route_dst = { 2, PF_ROUTE, }; static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; +/* + * XXXRW: These fields are locked by RTSOCK_LOCK(). + */ static struct { int ip_count; /* attacked w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ --- //depot/vendor/freebsd/src/sys/netatalk/aarp.c 2004/07/12 18:35:53 +++ //depot/user/rwatson/netperf/sys/netatalk/aarp.c 2004/07/20 01:38:51 @@ -1,4 +1,5 @@ /* + * Copyright (c) 2004 Robert N. M. Watson * Copyright (c) 1990,1991 Regents of The University of Michigan. * All Rights Reserved. * @@ -63,10 +64,6 @@ #define AARPT_KILLC 20 #define AARPT_KILLI 3 -# if !defined(__FreeBSD__) -extern u_char etherbroadcastaddr[6]; -# endif /* __FreeBSD__ */ - static const u_char atmulticastaddr[ 6 ] = { 0x09, 0x00, 0x07, 0xff, 0xff, 0xff, }; @@ -78,6 +75,9 @@ 0x00, 0x00, 0x00, }; +/* + * XXXRW: Make use callouts, not timeouts. + */ static struct callout_handle aarptimer_ch = CALLOUT_HANDLE_INITIALIZER(&aarptimer_ch); @@ -112,6 +112,7 @@ struct at_ifaddr *aa; struct sockaddr_at *sat2; + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { sat2 = &(aa->aa_addr); if (sat2->sat_addr.s_net == sat->sat_addr.s_net) { @@ -123,6 +124,8 @@ break; } } + IFAREF((struct ifaddr *)aa); + AT_IFADDR_LIST_UNLOCK(); return (aa); } @@ -205,6 +208,7 @@ ntohs(AA_SAT(aa)->sat_addr.s_net), AA_SAT(aa)->sat_addr.s_node); #endif /* NETATALKDEBUG */ + IFAFREE((struct ifaddr *)aa); sa.sa_len = sizeof(struct sockaddr); sa.sa_family = AF_UNSPEC; @@ -233,6 +237,7 @@ bcopy(ifp->if_broadcastaddr, (caddr_t)desten, sizeof(ifp->if_addrlen)); } + IFAFREE((struct ifaddr *)aa); return (1); } @@ -316,8 +321,6 @@ int op; u_short net; - GIANT_REQUIRED; - ea = mtod(m, struct ether_aarp *); /* Check to see if from my hardware address */ @@ -344,6 +347,8 @@ /* * Since we don't know the net, we just look for the first * phase 1 address on the interface. + * + * XXXRW: Note, if_addrhead not locked here. */ for (aa = (struct at_ifaddr *)TAILQ_FIRST(&ifp->if_addrhead); aa; aa = (struct at_ifaddr *)aa->aa_ifa.ifa_link.tqe_next) { @@ -377,6 +382,7 @@ untimeout(aarpprobe, ifp, aa->aa_ch); wakeup(aa); m_freem(m); + IFAFREE((struct ifaddr *)aa); return; } else if (op != AARPOP_PROBE) { /* @@ -389,6 +395,7 @@ ea->aarp_sha[ 0 ], ea->aarp_sha[ 1 ], ea->aarp_sha[ 2 ], ea->aarp_sha[ 3 ], ea->aarp_sha[ 4 ], ea->aarp_sha[ 5 ]); m_freem(m); + IFAFREE((struct ifaddr *)aa); return; } } @@ -405,6 +412,7 @@ aarptfree(aat); AARPTAB_UNLOCK(); m_freem(m); + IFAFREE((struct ifaddr *)aa); return; } @@ -440,6 +448,7 @@ if (tpa.s_net != ma.s_net || tpa.s_node != ma.s_node || op == AARPOP_RESPONSE || (aa->aa_flags & AFA_PROBING)) { m_freem(m); + IFAFREE((struct ifaddr *)aa); return; } @@ -458,6 +467,7 @@ sizeof(struct ether_aarp)); M_PREPEND(m, sizeof(struct llc), M_DONTWAIT); if (m == NULL) { + IFAFREE((struct ifaddr *)aa); return; } llc = mtod(m, struct llc *); @@ -471,6 +481,7 @@ } else { eh->ether_type = htons(ETHERTYPE_AARP); } + IFAFREE((struct ifaddr *)aa); ea->aarp_tpnode = ea->aarp_spnode; ea->aarp_spnode = ma.s_node; @@ -636,6 +647,9 @@ struct aarptab *aat; int i; + /* + * XXXRW: Should grab mutex before untimeout? + */ untimeout(aarptimer, 0, aarptimer_ch); AARPTAB_LOCK(); for (i = 0, aat = aarptab; i < AARPTAB_SIZE; i++, aat++) { --- //depot/vendor/freebsd/src/sys/netatalk/at_control.c 2004/07/19 17:20:31 +++ //depot/user/rwatson/netperf/sys/netatalk/at_control.c 2004/07/20 01:38:51 @@ -21,7 +21,15 @@ #include #include +/* + * Access to at_ifaddr_list and the aa_next pointer in struct at_ifaddr are + * locked with this global mutex. + * + * XXXRW: We'll need something better in the future. + */ +struct mtx at_ifaddr_mtx; struct at_ifaddr *at_ifaddr_list; +MTX_SYSINIT(at_ifaddr_mtx, &at_ifaddr_mtx, "at_ifaddr_mtx", MTX_DEF); static int aa_dorangeroute(struct ifaddr *ifa, u_int first, u_int last, int cmd); @@ -53,10 +61,12 @@ struct at_ifaddr *aa0; struct at_ifaddr *aa = NULL; struct ifaddr *ifa, *ifa0; + int error; /* * If we have an ifp, then find the matching at_ifaddr if it exists */ + mtx_lock(&at_ifaddr_mtx); if (ifp != NULL) { for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if (aa->aa_ifp == ifp) @@ -90,16 +100,20 @@ * If we a retrying to delete an addres but didn't find such, * then rewurn with an error */ - if (cmd == SIOCDIFADDR && aa == NULL) + if (cmd == SIOCDIFADDR && aa == NULL) { + mtx_unlock(&at_ifaddr_mtx); return (EADDRNOTAVAIL); + } /*FALLTHROUGH*/ case SIOCSIFADDR: /* * If we are not superuser, then we don't get to do these ops. */ - if (suser(td)) + if (suser(td)) { + mtx_unlock(&at_ifaddr_mtx); return (EPERM); + } sat = satosat(&ifr->ifr_addr); nr = (struct netrange *)sat->sat_zero; @@ -127,8 +141,7 @@ } } - if (ifp == NULL) - panic("at_control"); + KASSERT(ifp != NULL, ("at_control: ifp == NULL")); /* * If we failed to find an existing at_ifaddr entry, then we @@ -193,6 +206,9 @@ /* * If we DID find one then we clobber any routes * dependent on it.. + * + * XXXRW: Do we really want to hold at_ifaddr_mtx + * over at_scrub()? */ at_scrub(ifp, aa); } @@ -222,8 +238,10 @@ } } - if (aa == NULL) + if (aa == NULL) { + mtx_unlock(&at_ifaddr_mtx); return (EADDRNOTAVAIL); + } break; } @@ -231,6 +249,7 @@ * By the time this switch is run we should be able to assume that * the "aa" pointer is valid when needed. */ + error = 0; switch (cmd) { case SIOCGIFADDR: @@ -252,18 +271,30 @@ break; case SIOCSIFADDR: - return (at_ifinit(ifp, aa, - (struct sockaddr_at *)&ifr->ifr_addr)); + /* + * XXXRW: Do we really want to hold at_ifaddr_mtx over + * at_ifinit(). + */ + error = at_ifinit(ifp, aa, + (struct sockaddr_at *)&ifr->ifr_addr); + break; case SIOCAIFADDR: if (sateqaddr(&ifra->ifra_addr, &aa->aa_addr)) - return (0); - return (at_ifinit(ifp, aa, - (struct sockaddr_at *)&ifr->ifr_addr)); + break; + /* + * XXXRW: Do we really want to hold at_ifaddr_mtx over + * at_ifinit(). + */ + error = at_ifinit(ifp, aa, + (struct sockaddr_at *)&ifr->ifr_addr); + break; case SIOCDIFADDR: /* * scrub all routes.. didn't we just DO this? XXX yes, del it + * + * XXXRW: Do we want to hold at_ifaddr_mtx over at_scrub()? */ at_scrub(ifp, aa); @@ -287,10 +318,9 @@ /* * if we found it, remove it, otherwise we screwed up. */ - if (aa->aa_next) - aa->aa_next = aa0->aa_next; - else - panic("at_control"); + KASSERT(aa->aa_next != NULL, + ("at_control: aa->aa_next == NULL")); + aa->aa_next = aa0->aa_next; } /* @@ -300,11 +330,14 @@ break; default: - if (ifp == NULL || ifp->if_ioctl == NULL) - return (EOPNOTSUPP); - return ((*ifp->if_ioctl)(ifp, cmd, data)); + if (ifp == NULL || ifp->if_ioctl == NULL) { + error = EOPNOTSUPP; + break; + } + error = (*ifp->if_ioctl)(ifp, cmd, data); } - return (0); + mtx_unlock(&at_ifaddr_mtx); + return (error); } /* @@ -318,6 +351,14 @@ { int error; + /* + * XXXRW: We don't asser the at_ifaddr_mtx here, but we do rely on + * the caller holding a reference to 'aa' in such a way that it won't + * go away as at_scrub() runs. In practice this means that either + * the caller has to hold an ifaddr reference or at_ifaddr_mtx. + * + * mtx_assert(&at_ifaddr_mtx, MA_OWNED); + */ if (aa->aa_flags & AFA_ROUTE) { if (ifp->if_flags & IFF_LOOPBACK) { if ((error = aa_delsingleroute(&aa->aa_ifa, @@ -352,6 +393,7 @@ int netinc, nodeinc, nnets; u_short net; + mtx_assert(&at_ifaddr_mtx, MA_OWNED); /* * save the old addresses in the at_ifaddr just in case we need them. */ @@ -495,11 +537,15 @@ /* * start off the probes as an asynchronous * activity. though why wait 200mSec? + * + * XXXRW: This probably opens up a race, but + * the race already existed it and so I + * haven't fixed it. */ aa->aa_ch = timeout(aarpprobe, (caddr_t)ifp, hz / 5); - if (tsleep(aa, PPAUSE|PCATCH, "at_ifinit", - 0)) { + if (msleep(aa, &at_ifaddr_mtx, PPAUSE|PCATCH, + "at_ifinit", 0)) { /* * theoretically we shouldn't time * out here so if we returned with an @@ -547,6 +593,9 @@ /* * Now that we have selected an address, we need to tell the interface * about it, just in case it needs to adjust something. + * + * XXXRW: Should we release at_ifaddr_mtx before calling into the + * interface ioctl() code? */ if (ifp->if_ioctl != NULL && (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)aa))) { @@ -619,6 +668,10 @@ * getting risky by now XXX */ if (error) { + /* + * XXXRW: Do we want to hold at_ifaddr_mtx over calls to + * at_scrub()? + */ at_scrub(ifp, aa); aa->aa_addr = oldaddr; aa->aa_firstnet = onr.nr_firstnet; @@ -641,6 +694,7 @@ at_broadcast(struct sockaddr_at *sat) { struct at_ifaddr *aa; + int ret; /* * If the node is not right, it can't be a broadcast @@ -657,13 +711,18 @@ /* * failing that, if the net is one we have, it's a broadcast as well. */ + ret = 0; + mtx_lock(&at_ifaddr_mtx); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if ((aa->aa_ifp->if_flags & IFF_BROADCAST) && (ntohs(sat->sat_addr.s_net) >= ntohs(aa->aa_firstnet) - && ntohs(sat->sat_addr.s_net) <= ntohs(aa->aa_lastnet))) - return (1); + && ntohs(sat->sat_addr.s_net) <= ntohs(aa->aa_lastnet))) { + ret = 1; + break; + } } - return (0); + mtx_unlock(&at_ifaddr_mtx); + return (ret); } /* @@ -785,6 +844,10 @@ struct ifaddr *ifa; struct ifnet *ifp; + /* + * XXXRW: #if 0, and hence untested. + */ + mtx_lock(&at_ifaddr_mtx); while ((aa = at_ifaddr_list) != NULL) { ifp = aa->aa_ifp; at_scrub(ifp, aa); @@ -795,13 +858,12 @@ while (ifa->ifa_next && (ifa->ifa_next != (struct ifaddr *)aa)) ifa = ifa->ifa_next; - if (ifa->ifa_next) - ifa->ifa_next = - ((struct ifaddr *)aa)->ifa_next; - else - panic("at_entry"); + KASSERT(ifa->ifa_next != NULL, + ("aa_clean: ifa_next == NULL")); + ifa->ifa_next = ((struct ifaddr *)aa)->ifa_next; } } + mtx_unlock(&at_ifaddr_mtx); } #endif --- //depot/vendor/freebsd/src/sys/netatalk/at_rmx.c 2004/07/12 18:40:50 +++ //depot/user/rwatson/netperf/sys/netatalk/at_rmx.c 2004/07/12 19:43:55 @@ -40,6 +40,19 @@ int at_inithead(void **head, int off); +/* + * XXXRW: hexdump was a static global variable, but I moved it into the + * stack rather than stick a mutex around it. 256 bytes is smaller than + * it used to be, but this still might be a problem. Needs to be + * revisited. Should probably just use the new hexdump(9). + * + * XXXRW: All this appears to be present just so as to printf debugging + * information. Assuming that this code is known to work, we could just + * scrap all this. In fact, this code isn't even used as it stands, it's + * here for debugging purposes only and requires modifications to + * at_proto.c. + */ + #define HEXBUF_LEN 256 static const char * --- //depot/vendor/freebsd/src/sys/netatalk/at_var.h 2004/03/22 04:56:26 +++ //depot/user/rwatson/netperf/sys/netatalk/at_var.h 2004/07/20 01:38:51 @@ -60,6 +60,11 @@ #define AFA_PHASE2 0x0004 #ifdef _KERNEL +extern struct mtx at_ifaddr_mtx; +#define AT_IFADDR_LIST_ASSERT() mtx_assert(&at_ifaddr_mtx, MA_OWNED) +#define AT_IFADDR_LIST_LOCK() mtx_lock(&at_ifaddr_mtx) +#define AT_IFADDR_LIST_UNLOCK() mtx_unlock(&at_ifaddr_mtx) + extern struct at_ifaddr *at_ifaddr_list; #endif --- //depot/vendor/freebsd/src/sys/netatalk/ddp_input.c 2004/07/12 18:40:50 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_input.c 2004/07/20 01:38:51 @@ -32,6 +32,11 @@ static volatile int ddp_firewall = 0; static struct ddpstat ddpstat; +/* + * XXXRW: If we're going to keep this cached route data, we'll need to lock it + * down, and change later function-local use of it to grab an extra reference + * after deciding it is useful. + */ static struct route forwro; static void ddp_input(struct mbuf *, struct ifnet *, struct elaphdr *, int); @@ -42,7 +47,6 @@ void at2intr(struct mbuf *m) { - GIANT_REQUIRED; /* * Phase 2 packet handling @@ -70,8 +74,6 @@ elhp = mtod(m, struct elaphdr *); m_adj(m, SZ_ELAPHDR); - GIANT_REQUIRED; - if (elhp->el_type == ELAP_DDPEXTEND) { ddp_input(m, m->m_pkthdr.rcvif, NULL, 1); } else { @@ -129,6 +131,7 @@ * Make sure that we point to the phase1 ifaddr info * and that it's valid for this packet. */ + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if ((aa->aa_ifp == ifp) && ((aa->aa_flags & AFA_PHASE2) == 0) @@ -144,6 +147,8 @@ m_freem(m); return; } + IFAREF((struct ifaddr *)aa); + AT_IFADDR_LIST_UNLOCK(); } else { /* * There was no 'elh' passed on. This could still be @@ -188,6 +193,7 @@ * this node number will match (which may NOT be what we want, * but it's probably safe in 99.999% of cases. */ + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if (phase == 1 && (aa->aa_flags & AFA_PHASE2)) { continue; @@ -202,11 +208,15 @@ break; } } + if (aa != NULL) + IFAREF((struct ifaddr *)aa); + AT_IFADDR_LIST_UNLOCK(); } else { /* * A destination network was given. We just try to find * which ifaddr info matches it. */ + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { /* * This is a kludge. Accept packets that are @@ -240,6 +250,9 @@ } break; } + if (aa != NULL) + IFAREF((struct ifaddr *)aa); + AT_IFADDR_LIST_UNLOCK(); } } @@ -252,6 +265,8 @@ if (mlen < dlen) { ddpstat.ddps_toosmall++; m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return; } if (mlen > dlen) { @@ -274,6 +289,8 @@ */ if (ddp_forward == 0) { m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return; } /* @@ -318,6 +335,8 @@ && ((forwro.ro_rt == NULL) || (forwro.ro_rt->rt_ifp != ifp))) { m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return; } @@ -335,6 +354,8 @@ } else { ddpstat.ddps_forward++; } + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return; } @@ -397,6 +418,8 @@ DDP_LIST_SUNLOCK(); if (m != NULL) m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); } #if 0 --- //depot/vendor/freebsd/src/sys/netatalk/ddp_output.c 2004/06/13 02:50:44 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_output.c 2004/07/20 01:38:51 @@ -151,6 +151,7 @@ && (ro->ro_rt->rt_ifa) && (ifp = ro->ro_rt->rt_ifa->ifa_ifp)) { net = ntohs(satosat(ro->ro_rt->rt_gateway)->sat_addr.s_net); + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if (((net == 0) || (aa->aa_ifp == ifp)) && net >= ntohs(aa->aa_firstnet) && @@ -158,6 +159,8 @@ break; } } + IFAREF((struct ifaddr *)aa); + AT_IFADDR_LIST_UNLOCK(); } else { m_freem(m); #ifdef NETATALK_DEBUG @@ -177,6 +180,8 @@ ifp->if_xname); #endif m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return (ENETUNREACH); } @@ -203,6 +208,8 @@ MGET(m0, M_TRYWAIT, MT_HEADER); if (m0 == NULL) { m_freem(m); + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); printf("ddp_route: no buffers\n"); return (ENOBUFS); } @@ -236,9 +243,13 @@ if ((satosat(&aa->aa_addr)->sat_addr.s_net == satosat(&ro->ro_dst)->sat_addr.s_net) && (satosat(&aa->aa_addr)->sat_addr.s_node == satosat(&ro->ro_dst)->sat_addr.s_node)) { + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return (if_simloop(ifp, m, gate.sat_family, 0)); } + if (aa != NULL) + IFAFREE((struct ifaddr *)aa); return ((*ifp->if_output)(ifp, m, (struct sockaddr *)&gate, NULL)); /* XXX */ } --- //depot/vendor/freebsd/src/sys/netatalk/ddp_pcb.c 2004/07/12 18:40:50 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_pcb.c 2004/07/20 01:38:51 @@ -44,6 +44,7 @@ struct sockaddr_at lsat, *sat; struct at_ifaddr *aa; struct ddpcb *ddpp; + int error; /* * We read and write both the ddp passed in, and also ddp_ports. @@ -51,37 +52,46 @@ DDP_LIST_XLOCK_ASSERT(); DDP_LOCK_ASSERT(ddp); - if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) { /* shouldn't be bound */ - return (EINVAL); + error = 0; + if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) { + /* shouldn't be bound */ + error = EINVAL; + goto out; } if (addr != NULL) { /* validate passed address */ sat = (struct sockaddr_at *)addr; if (sat->sat_family != AF_APPLETALK) { - return (EAFNOSUPPORT); + error = EAFNOSUPPORT; + goto out; } if (sat->sat_addr.s_node != ATADDR_ANYNODE || sat->sat_addr.s_net != ATADDR_ANYNET) { + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if ((sat->sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) && (sat->sat_addr.s_node == AA_SAT(aa)->sat_addr.s_node)) { break; } } - if (!aa) { - return (EADDRNOTAVAIL); + AT_IFADDR_LIST_UNLOCK(); + if (aa != NULL) { + error = EADDRNOTAVAIL; + goto out; } } if (sat->sat_port != ATADDR_ANYPORT) { if (sat->sat_port < ATPORT_FIRST || sat->sat_port >= ATPORT_LAST) { - return (EINVAL); + error = EINVAL; + goto out; } if (sat->sat_port < ATPORT_RESERVED && suser(td)) { - return (EACCES); + error = EACCES; + goto out; } } } else { @@ -96,7 +106,8 @@ if (sat->sat_addr.s_node == ATADDR_ANYNODE && sat->sat_addr.s_net == ATADDR_ANYNET) { if (at_ifaddr_list == NULL) { - return (EADDRNOTAVAIL); + error = EADDRNOTAVAIL; + goto out; } sat->sat_addr = AA_SAT(at_ifaddr_list)->sat_addr; } @@ -113,7 +124,8 @@ } } if (sat->sat_port == ATPORT_LAST) { - return (EADDRNOTAVAIL); + error = EADDRNOTAVAIL; + goto out; } ddp->ddp_lsat.sat_port = sat->sat_port; ddp_ports[ sat->sat_port - 1 ] = ddp; @@ -126,7 +138,8 @@ } } if (ddpp != NULL) { - return (EADDRINUSE); + error = EADDRINUSE; + goto out; } ddp->ddp_pnext = ddp_ports[ sat->sat_port - 1 ]; ddp_ports[ sat->sat_port - 1 ] = ddp; @@ -135,7 +148,8 @@ } } - return (0); +out: + return (error); } int @@ -181,6 +195,7 @@ } aa = NULL; if ((ifp = ro->ro_rt->rt_ifp) != NULL) { + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if (aa->aa_ifp == ifp && ntohs(net) >= ntohs(aa->aa_firstnet) && @@ -188,6 +203,7 @@ break; } } + AT_IFADDR_LIST_UNLOCK(); } if (aa == NULL || (satosat(&ro->ro_dst)->sat_addr.s_net != (hintnet ? hintnet : sat->sat_addr.s_net) || @@ -218,11 +234,13 @@ */ aa = NULL; if (ro->ro_rt && (ifp = ro->ro_rt->rt_ifp)) { + AT_IFADDR_LIST_LOCK(); for (aa = at_ifaddr_list; aa != NULL; aa = aa->aa_next) { if (aa->aa_ifp == ifp) { break; } } + AT_IFADDR_LIST_UNLOCK(); } if (aa == NULL) { return (ENETUNREACH); @@ -257,6 +275,10 @@ DDP_LOCK_INIT(ddp); ddp->ddp_lsat.sat_port = ATADDR_ANYPORT; + /* + * XXXRW: Is this unlocked assignment payer for socket and + * back-pointer something that needs to be protected? + */ ddp->ddp_socket = so; so->so_pcb = (caddr_t)ddp; --- //depot/vendor/freebsd/src/sys/netatalk/ddp_usrreq.c 2004/07/12 18:40:50 +++ //depot/user/rwatson/netperf/sys/netatalk/ddp_usrreq.c 2004/07/12 19:43:55 @@ -23,6 +23,11 @@ #include #include +/* + * XXXRW: These structures are currently not mutable. They're not marked + * with 'const' because I suspect that consumers of this code would like for + * them to be mutable someday. + */ static u_long ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */ static u_long ddp_recvspace = 10 * (587 + sizeof(struct sockaddr_at)); @@ -149,6 +154,13 @@ return (0); } +/* + * XXXRW: If an explicit address is specified, then we temporarily change + * the address on the pcb for sending. This is inefficient because it + * requires us to perform global rather than pcb-local operations. It + * may also create a race if other users of the socket are simultaneously + * sending. + */ static int ddp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) --- //depot/vendor/freebsd/src/sys/netgraph/ng_ksocket.c 2004/06/25 19:25:33 +++ //depot/user/rwatson/netperf/sys/netgraph/ng_ksocket.c 2004/06/26 16:46:29 @@ -998,6 +998,9 @@ * before dereferencing the socket pointer. */ +/* + * XXXRW: ng_ksocket_incoming() is called without Giant. Is that OK? + */ static void ng_ksocket_incoming(struct socket *so, void *arg, int waitflag) { --- //depot/vendor/freebsd/src/sys/netinet/accf_http.c 2004/06/14 18:20:38 +++ //depot/user/rwatson/netperf/sys/netinet/accf_http.c 2004/06/14 19:35:13 @@ -161,6 +161,7 @@ sohashttpget(struct socket *so, void *arg, int waitflag) { + /* Unlocked read. */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) { struct mbuf *m; char *cmp; @@ -214,6 +215,7 @@ struct mbuf *m, *n; int i, cc, spaces, inspaces; + /* Unlocked read. */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv)) goto fallout; @@ -301,6 +303,7 @@ int ccleft, copied; DPRINT("start"); + /* Unlocked read. */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv)) goto gotit; --- //depot/vendor/freebsd/src/sys/netinet/if_ether.c 2004/06/13 10:56:09 +++ //depot/user/rwatson/netperf/sys/netinet/if_ether.c 2004/06/13 20:00:27 @@ -98,6 +98,11 @@ #define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */ }; +/* + * XXXRW: Need to document (and/or fix) locking for this. We always + * seem to hold a lock (and assert) when referencing this list, but it's + * not clear it's always the same lock. + */ static LIST_HEAD(, llinfo_arp) llinfo_arp; static struct ifqueue arpintrq; --- //depot/vendor/freebsd/src/sys/netinet/igmp.c 2004/06/11 03:45:34 +++ //depot/user/rwatson/netperf/sys/netinet/igmp.c 2004/06/11 13:24:13 @@ -99,6 +99,9 @@ static u_long igmp_all_hosts_group; static u_long igmp_all_rtrs_group; +/* + * XXXRW: These variables make me vaguely nervous. + */ static struct mbuf *router_alert; static struct route igmprt; @@ -123,6 +126,7 @@ /* * Construct a Router Alert option to use in outgoing packets + * XXXRW: This might actually need a MAC label. */ MGET(router_alert, M_DONTWAIT, MT_DATA); ra = mtod(router_alert, struct ipoption *); --- //depot/vendor/freebsd/src/sys/netinet/in_gif.c 2004/06/18 02:06:42 +++ //depot/user/rwatson/netperf/sys/netinet/in_gif.c 2004/06/18 03:27:04 @@ -174,6 +174,9 @@ } bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); + /* + * XXXRW: locking of gif's softc. + */ if (dst->sin_family != sin_dst->sin_family || dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) { /* cache route doesn't match */ @@ -321,6 +324,10 @@ case 0: case 127: case 255: return 0; } + + /* + * XXXRW: Lock in_ifaddrhead walking. + */ /* reject packets with broadcast on source */ TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) @@ -329,6 +336,7 @@ return 0; } + /* XXXRW: unlocked read. */ /* ingress filters on outer source */ if ((sc->gif_if.if_flags & IFF_LINK2) == 0 && ifp) { struct sockaddr_in sin; @@ -384,6 +392,11 @@ in_gif_attach(sc) struct gif_softc *sc; { + + /* + * XXXRW: Technically, NULL can also be returned for ENOMEM, + * not just EEXIST. + */ sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck, &in_gif_protosw, sc); if (sc->encap_cookie4 == NULL) --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.c 2004/07/28 13:05:41 +++ //depot/user/rwatson/netperf/sys/netinet/in_pcb.c 2004/08/06 05:46:33 @@ -461,6 +461,9 @@ in_addr_t laddr, faddr; int anonport, error; + INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); + INP_LOCK_ASSERT(inp); + lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); @@ -530,6 +533,9 @@ u_short lport, fport; int error; + INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); + INP_LOCK_ASSERT(inp); + if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) @@ -662,6 +668,7 @@ #ifdef IPSEC ipsec_pcbdisconn(inp->inp_sp); #endif + /* Unlocked read. */ if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } @@ -732,6 +739,10 @@ struct in_addr addr; in_port_t port; + /* + * XXXRW: Why not use the inpcb lock instead of the info lock + * here? + */ s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); @@ -740,10 +751,8 @@ splx(s); return ECONNRESET; } - INP_LOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; - INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); @@ -765,6 +774,10 @@ struct in_addr addr; in_port_t port; + /* + * XXXRW: Why not use the inpcb lock instead of the info lock + * here? + */ s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); @@ -773,10 +786,8 @@ splx(s); return ECONNRESET; } - INP_LOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; - INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); @@ -1113,7 +1124,7 @@ u_int32_t hashkey_faddr; INP_INFO_WLOCK_ASSERT(pcbinfo); - /* XXX? INP_LOCK_ASSERT(inp); */ + INP_LOCK_ASSERT(inp); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; --- //depot/vendor/freebsd/src/sys/netinet/ip_divert.c 2004/08/03 12:35:30 +++ //depot/user/rwatson/netperf/sys/netinet/ip_divert.c 2004/08/04 20:26:26 @@ -358,6 +358,9 @@ m->m_pkthdr.rcvif = ifa->ifa_ifp; } #ifdef MAC + /* + * XXXRW: perhaps should be mac_create_mbuf_from_inpcb()? + */ SOCK_LOCK(so); mac_create_mbuf_from_socket(so, m); SOCK_UNLOCK(so); @@ -429,6 +432,7 @@ /* The socket is always "connected" because we always know "where" to send the packet */ INP_UNLOCK(inp); + /* XXXRW: Not clear if this is adequate. */ SOCK_LOCK(so); so->so_state |= SS_ISCONNECTED; SOCK_UNLOCK(so); @@ -473,6 +477,8 @@ static int div_disconnect(struct socket *so) { + + /* Unlocked read. */ if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return div_abort(so); --- //depot/vendor/freebsd/src/sys/netinet/ip_encap.c 2004/03/10 02:50:38 +++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.c 2004/03/10 03:47:45 @@ -1,4 +1,4 @@ -/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.19 2004/03/10 02:48:50 rwatson Exp $ */ +/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.18 2003/06/01 09:20:38 phk Exp $ */ /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ /* @@ -106,8 +106,7 @@ LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); /* - * We currently keey encap_init() for source code compatibility reasons -- - * it's referenced by KAME pieces in netinet6. + * XXXRW: encap_init() was entirely useless, so I deleted it. */ void encap_init() @@ -185,6 +184,10 @@ } mtx_unlock(&encapmtx); + /* + * XXXRW: Need drain mechanism to prevent the encapsulation + * entry from being released while in use. + */ if (match) { /* found a match, "match" has the best one */ psw = match->psw; @@ -255,6 +258,10 @@ } mtx_unlock(&encapmtx); + /* + * XXXRW: Need drain mechanism so the encap entry isn't freed + * while in use. + */ if (match) { /* found a match */ psw = (const struct ip6protosw *)match->psw; --- //depot/vendor/freebsd/src/sys/netinet/ip_encap.h 2002/03/19 21:30:39 +++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.h 2004/03/12 06:46:13 @@ -35,6 +35,15 @@ #ifdef _KERNEL +/* + * This structure is entirely static after registration, and other than + * its entry in the encapsulation table, requires no locking. The chain + * field is locked using the global encapmtx. + * + * XXXRW: Need to add a refcount/drain mechanism so that encapsulation + * entries can't be removed while in use. This likely requires a + * refcount and cv to wait for it to drain, or an sx lock. + */ struct encaptab { LIST_ENTRY(encaptab) chain; int af; --- //depot/vendor/freebsd/src/sys/netinet/ip_fastfwd.c 2004/06/27 09:10:34 +++ //depot/user/rwatson/netperf/sys/netinet/ip_fastfwd.c 2004/06/27 16:41:59 @@ -642,6 +642,7 @@ /* * Return packet for processing by ip_input */ + /* XXX statistic */ if (ro.ro_rt) RTFREE(ro.ro_rt); return 0; --- //depot/vendor/freebsd/src/sys/netinet/ip_fw2.c 2004/07/21 19:55:40 +++ //depot/user/rwatson/netperf/sys/netinet/ip_fw2.c 2004/07/24 17:51:01 @@ -1535,7 +1535,8 @@ } static int -check_uidgid(ipfw_insn_u32 *insn, +check_uidgid(struct ip_fw_chain *chain, + ipfw_insn_u32 *insn, int proto, struct ifnet *oif, struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, u_int16_t src_port, @@ -1565,7 +1566,9 @@ return 0; match = 0; if (*lookup == 0) { + IPFW_UNLOCK(chain); INP_INFO_RLOCK(pi); /* XXX LOR with IPFW */ + IPFW_LOCK(chain); pcb = (oif) ? in_pcblookup_hash(pi, dst_ip, htons(dst_port), @@ -1930,7 +1933,7 @@ break; if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) - match = check_uidgid( + match = check_uidgid(chain, (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, --- //depot/vendor/freebsd/src/sys/netinet/ip_id.c 2004/02/26 03:55:40 +++ //depot/user/rwatson/netperf/sys/netinet/ip_id.c 2004/03/12 03:03:57 @@ -79,6 +79,9 @@ 2729 }; +/* + * XXXRW: Locking? + */ static u_int16_t ru_x; static u_int16_t ru_seed, ru_seed2; static u_int16_t ru_a, ru_b; --- //depot/vendor/freebsd/src/sys/netinet/ip_input.c 2004/08/03 12:35:30 +++ //depot/user/rwatson/netperf/sys/netinet/ip_input.c 2004/08/04 20:26:26 @@ -857,8 +857,10 @@ /* attach next hop info for TCP */ struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); - if (mtag == NULL) + if (mtag == NULL) { + /* XXX statistic */ goto bad; + } *(struct sockaddr_in **)(mtag+1) = args.next_hop; m_tag_prepend(m, mtag); } @@ -1878,6 +1880,7 @@ struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); if (mtag == NULL) { + /* XXX statistic */ m_freem(m); return; } --- //depot/vendor/freebsd/src/sys/netinet/ip_output.c 2004/08/03 14:15:35 +++ //depot/user/rwatson/netperf/sys/netinet/ip_output.c 2004/08/04 20:26:26 @@ -157,6 +157,12 @@ M_ASSERTPKTHDR(m); + /* + * When packet comes from dummynet restore state from + * previous processing instead of the header. Yech! + * + * XXX add conditional compilation? + */ args.next_hop = m_claim_next(m, PACKET_TAG_IPFORWARD); dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (dummytag != NULL) { @@ -868,6 +874,7 @@ PACKET_TAG_IPFORWARD, sizeof(struct sockaddr_in *), M_NOWAIT); if (mtag == NULL) { + /* XXX statistic */ error = ENOBUFS; goto bad; } @@ -886,6 +893,7 @@ CSUM_IP_CHECKED | CSUM_IP_VALID; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); + /* XXX netisr_queue(NETISR_IP, m); */ ip_input(m); goto done; } @@ -1400,9 +1408,11 @@ m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); - - return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, - m)); + INP_LOCK(inp); + error = ip_pcbopts(sopt->sopt_name, &inp->inp_options, + m); + INP_UNLOCK(inp); + return (error); } case IP_TOS: @@ -1473,7 +1483,15 @@ case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: + /* + * XXXRW: ip_setmoptions() will calling a blocking + * memory allocation, so the inpcb lock should + * really be acquired in ip_setmoptions(), which + * isn't possible with the current API. + */ + INP_LOCK(inp); error = ip_setmoptions(sopt, &inp->inp_moptions); + INP_UNLOCK(inp); break; case IP_PORTRANGE: @@ -1524,7 +1542,9 @@ req = mtod(m, caddr_t); len = m->m_len; optname = sopt->sopt_name; + INP_LOCK(inp); error = ipsec4_set_policy(inp, optname, req, len, priv); + INP_UNLOCK(inp); m_freem(m); break; } @@ -1540,6 +1560,12 @@ switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: + /* + * XXXRW: We should make a local copy of the + * options while holding the inpcb lock, and then + * copy out without holding the lock. Currently, + * we race. + */ if (inp->inp_options) error = sooptcopyout(sopt, mtod(inp->inp_options, @@ -1617,7 +1643,15 @@ case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: + /* + * XXXRW: ip_getmoptions() may perform a copy out + * to user space, and should do that without + * holding the inpcb lock. However, we don't pass + * the inpcb down, so it can't do that. + */ + INP_LOCK(inp); error = ip_getmoptions(sopt, inp->inp_moptions); + INP_UNLOCK(inp); break; #if defined(IPSEC) || defined(FAST_IPSEC) --- //depot/vendor/freebsd/src/sys/netinet/raw_ip.c 2004/07/26 07:25:53 +++ //depot/user/rwatson/netperf/sys/netinet/raw_ip.c 2004/08/02 01:36:03 @@ -87,6 +87,9 @@ * so leave them not initialized and rely on BSS being set to 0. */ +/* + * XXXRW: Locking for mrouter bits? + */ /* The socket used to communicate with the multicast routing daemon. */ struct socket *ip_mrouter; @@ -632,6 +635,7 @@ } INP_LOCK(inp); soisdisconnected(so); + /* Unlocked read. */ if (so->so_state & SS_NOFDREF) rip_pcbdetach(so, inp); else @@ -643,6 +647,8 @@ static int rip_disconnect(struct socket *so) { + + /* Unlocked read. */ if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return rip_abort(so); @@ -739,6 +745,7 @@ INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); + /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { INP_INFO_WUNLOCK(&ripcbinfo); --- //depot/vendor/freebsd/src/sys/netinet/tcp_debug.c 2004/04/07 20:52:05 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_debug.c 2004/04/08 03:11:34 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include --- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c 2004/07/12 19:30:33 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_input.c 2004/07/12 19:43:55 @@ -429,7 +429,7 @@ struct tcpopt to; /* options in this segment */ struct rmxp_tao tao; /* our TAO cache entry */ int headlocked = 0; - struct sockaddr_in *next_hop = NULL; + struct sockaddr_in *next_hop; int rstreason; /* For badport_bandlim accounting purposes */ struct ip6_hdr *ip6 = NULL; @@ -1226,6 +1226,7 @@ tcp_timer_rexmt, tp); sowwakeup(so); + /* Unlocked read. */ if (so->so_snd.sb_cc) (void) tcp_output(tp); goto check_delack; @@ -1744,6 +1745,7 @@ * If new data are received on a connection after the * user processes are gone, then RST the other end. */ + /* Unlocked read. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { tp = tcp_close(tp); @@ -2219,6 +2221,7 @@ * we should release the tp also, and use a * compressed state. */ + /* Unlocked read. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); callout_reset(tp->tt_2msl, tcp_maxidle, @@ -2305,6 +2308,7 @@ * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ + /* Unlocked read. */ if (th->th_urp + so->so_rcv.sb_cc > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ @@ -2324,6 +2328,7 @@ * of data past the urgent section as the original * spec states (in one of two places). */ + /* Unlocked read of sb_cc. */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; SOCKBUF_LOCK(&so->so_rcv); --- //depot/vendor/freebsd/src/sys/netinet/tcp_output.c 2004/07/28 02:15:32 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_output.c 2004/08/02 01:36:03 @@ -271,6 +271,7 @@ * to send then the probe will be the FIN * itself. */ + /* Unlocked read of sb_cc. */ if (off < so->so_snd.sb_cc) flags &= ~TH_FIN; sendwin = 1; @@ -295,6 +296,7 @@ * If sack_rxmit is true we are retransmitting from the scoreboard * in which case len is already set. */ + /* Unlocked read of sb_cc. */ if (!sack_rxmit) len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); @@ -356,6 +358,7 @@ len = tp->t_maxseg; sendalot = 1; } + /* Unlocked read of sb_cc. */ if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; @@ -388,6 +391,7 @@ * * note: the len + off check is almost certainly unnecessary. */ + /* Unlocked read of sb_cc. */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && len + off >= so->so_snd.sb_cc && @@ -479,6 +483,7 @@ * if window is nonzero, transmit what we can, * otherwise force out a byte. */ + /* Unlocked read of sb_cc. */ if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) && !callout_active(tp->tt_persist)) { tp->t_rxtshift = 0; @@ -785,6 +790,7 @@ * give data to the user when a buffer fills or * a PUSH comes in.) */ + /* Unlocked read of sb_cc. */ if (off + len == so->so_snd.sb_cc) flags |= TH_PUSH; } else { --- //depot/vendor/freebsd/src/sys/netinet/tcp_subr.c 2004/08/06 03:50:29 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_subr.c 2004/08/06 20:41:48 @@ -1618,7 +1618,7 @@ /* * Move a TCP connection into TIME_WAIT state. - * tcbinfo is unlocked. + * tcbinfo is locked. * inp is locked, and is unlocked before returning. */ void @@ -1630,6 +1630,11 @@ int tw_time, acknow; struct socket *so; + INP_INFO_WLOCK_ASSERT(&tcbinfo); +#if 0 + INP_LOCK_ASSERT(tp); +#endif + tw = uma_zalloc(tcptw_zone, M_NOWAIT); if (tw == NULL) { tw = tcp_timer_2msl_tw(1); @@ -1763,6 +1768,8 @@ int isipv6 = inp->inp_inc.inc_isipv6; #endif + INP_LOCK_ASSERT(inp); + m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return (ENOBUFS); --- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.c 2004/07/17 21:40:38 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_syncache.c 2004/07/19 22:28:50 @@ -561,6 +561,9 @@ goto abort2; } #ifdef MAC + /* + * XXXRW: Would prefer inpcb. + */ SOCK_LOCK(so); mac_set_socket_peer_from_mbuf(m, so); SOCK_UNLOCK(so); @@ -734,6 +737,14 @@ abort: INP_UNLOCK(inp); abort2: + /* + * XXXRW: Technically speaking, this soabort() likely doesn't + * do anything, since we insert sockets into the accept queues + * in an already completed state, and soabort() leaves it to + * the close() on the listen socket to remove completed + * connections. However, this means a TCP socket without + * full TCP state could slip through...? + */ if (so != NULL) (void) soabort(so); return (NULL); --- //depot/vendor/freebsd/src/sys/netinet/tcp_timer.c 2004/06/23 21:05:32 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_timer.c 2004/08/06 04:34:32 @@ -250,6 +250,9 @@ splx(s); } +/* + * XXXRW: What locks this list? + */ struct twlist { LIST_HEAD(, tcptw) tw_list; struct tcptw tw_tail; @@ -277,10 +280,15 @@ int i; struct tcptw *tw_tail; + INP_LOCK_ASSERT(tw->tw_inpcb); + if (tw->tw_time != 0) LIST_REMOVE(tw, tw_2msl); tw->tw_time = timeo + ticks; i = timeo > tcp_msl ? 1 : 0; + /* + * XXXRW: Locking? + */ tw_tail = &twl_2msl[i].tw_tail; LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl); } @@ -289,6 +297,9 @@ tcp_timer_2msl_stop(struct tcptw *tw) { + /* + * XXXRW: Locking? + */ if (tw->tw_time != 0) LIST_REMOVE(tw, tw_2msl); } @@ -299,7 +310,10 @@ struct tcptw *tw, *tw_tail; struct twlist *twl; int i; - + + /* + * XXXRW: Locking? + */ for (i = 0; i < 2; i++) { twl = tw_2msl_list[i]; tw_tail = &twl->tw_tail; --- //depot/vendor/freebsd/src/sys/netinet/tcp_usrreq.c 2004/07/26 21:30:49 +++ //depot/user/rwatson/netperf/sys/netinet/tcp_usrreq.c 2004/08/02 01:36:03 @@ -146,6 +146,50 @@ } /* + * Common code to setup and teardown locking. Most + * code begins with a COMMON_START macro and finishes + * with COMMON_END. You indicate whether the inpcb + * and enclosing head are to be locked read or write. + */ +#define INI_NOLOCK 0 /* no head lock */ +#define INI_READ 1 /* read head lock */ +#define INI_WRITE 2 /* write head lock */ + +#define COMMON_START0(_headrw) do { \ + if (_headrw == INI_READ) \ + INP_INFO_RLOCK(&tcbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WLOCK(&tcbinfo); \ + inp = sotoinpcb(so); \ + if (inp == 0) { \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&tcbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&tcbinfo); \ + return EINVAL; \ + } \ + INP_LOCK(inp); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&tcbinfo); \ + tp = intotcpcb(inp); \ + TCPDEBUG1(); \ +} while(0) + +#define COMMON_START(_headrw) do { \ + TCPDEBUG0; \ + COMMON_START0(_headrw); \ +} while (0) + +#define COMMON_END(_headrw, req) \ + TCPDEBUG2(req); \ + do { \ + if (tp) \ + INP_UNLOCK(inp); \ + if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&tcbinfo); \ + } while(0) + +/* * pru_detach() detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a pru_disconnect(), @@ -158,63 +202,13 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - TCPDEBUG0; - INP_INFO_WLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (inp == NULL) { - INP_INFO_WUNLOCK(&tcbinfo); - return error; - } - INP_LOCK(inp); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START(INI_WRITE); tp = tcp_disconnect(tp); - - TCPDEBUG2(PRU_DETACH); - if (tp) - INP_UNLOCK(inp); - INP_INFO_WUNLOCK(&tcbinfo); + COMMON_END(INI_WRITE, PRU_DETACH); return error; } -#define INI_NOLOCK 0 -#define INI_READ 1 -#define INI_WRITE 2 - -#define COMMON_START() \ - TCPDEBUG0; \ - do { \ - if (inirw == INI_READ) \ - INP_INFO_RLOCK(&tcbinfo); \ - else if (inirw == INI_WRITE) \ - INP_INFO_WLOCK(&tcbinfo); \ - inp = sotoinpcb(so); \ - if (inp == 0) { \ - if (inirw == INI_READ) \ - INP_INFO_RUNLOCK(&tcbinfo); \ - else if (inirw == INI_WRITE) \ - INP_INFO_WUNLOCK(&tcbinfo); \ - return EINVAL; \ - } \ - INP_LOCK(inp); \ - if (inirw == INI_READ) \ - INP_INFO_RUNLOCK(&tcbinfo); \ - tp = intotcpcb(inp); \ - TCPDEBUG1(); \ -} while(0) - -#define COMMON_END(req) \ -out: TCPDEBUG2(req); \ - do { \ - if (tp) \ - INP_UNLOCK(inp); \ - if (inirw == INI_WRITE) \ - INP_INFO_WUNLOCK(&tcbinfo); \ - return error; \ - goto out; \ -} while(0) - /* * Give the socket an address. */ @@ -225,7 +219,6 @@ struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; - const int inirw = INI_WRITE; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) @@ -238,11 +231,10 @@ IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE); error = in_pcbbind(inp, nam, td->td_ucred); - if (error) - goto out; - COMMON_END(PRU_BIND); + COMMON_END(INI_WRITE, PRU_BIND); + return error; } #ifdef INET6 @@ -253,7 +245,6 @@ struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; - const int inirw = INI_WRITE; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) @@ -266,7 +257,7 @@ IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { @@ -284,9 +275,9 @@ } } error = in6_pcbbind(inp, nam, td->td_ucred); - if (error) - goto out; - COMMON_END(PRU_BIND); +out: + COMMON_END(INI_WRITE, PRU_BIND); + return error; } #endif /* INET6 */ @@ -299,14 +290,14 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE); if (inp->inp_lport == 0) error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error == 0) tp->t_state = TCPS_LISTEN; - COMMON_END(PRU_LISTEN); + COMMON_END(INI_WRITE, PRU_LISTEN); + return error; } #ifdef INET6 @@ -316,9 +307,8 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE); if (inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) @@ -327,7 +317,8 @@ } if (error == 0) tp->t_state = TCPS_LISTEN; - COMMON_END(PRU_LISTEN); + COMMON_END(INI_WRITE, PRU_LISTEN); + return error; } #endif /* INET6 */ @@ -345,7 +336,6 @@ struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; - const int inirw = INI_WRITE; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) @@ -359,11 +349,13 @@ if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); - COMMON_START(); + COMMON_START(INI_WRITE); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); - COMMON_END(PRU_CONNECT); +out: + COMMON_END(INI_WRITE, PRU_CONNECT); + return error; } #ifdef INET6 @@ -374,7 +366,6 @@ struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; - const int inirw = INI_WRITE; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) @@ -386,7 +377,7 @@ && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); - COMMON_START(); + COMMON_START(INI_WRITE); if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; @@ -409,7 +400,9 @@ if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); - COMMON_END(PRU_CONNECT); +out: + COMMON_END(INI_WRITE, PRU_CONNECT); + return error; } #endif /* INET6 */ @@ -430,11 +423,11 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE); tp = tcp_disconnect(tp); - COMMON_END(PRU_DISCONNECT); + COMMON_END(INI_WRITE, PRU_DISCONNECT); + return error; } /* @@ -452,33 +445,24 @@ in_port_t port = 0; TCPDEBUG0; + /* Unlocked read. */ if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; - goto out; + goto out; /* NB: ok 'cuz tp is NULL */ } - INP_INFO_RLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (!inp) { - INP_INFO_RUNLOCK(&tcbinfo); - return (EINVAL); - } - INP_LOCK(inp); - INP_INFO_RUNLOCK(&tcbinfo); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START0(INI_READ); /* - * We inline in_setpeeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. + * We inline in_setpeeraddr so that we can copy the + * data of interest and defer the malloc until after + * we release the lock. */ port = inp->inp_fport; addr = inp->inp_faddr; -out: TCPDEBUG2(PRU_ACCEPT); - if (tp) - INP_UNLOCK(inp); +out: + COMMON_END(INI_READ, PRU_ACCEPT); if (error == 0) *nam = in_sockaddr(port, &addr); return error; @@ -497,25 +481,17 @@ int v4 = 0; TCPDEBUG0; + /* Unlocked read. */ if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; - goto out; + goto out; /* NB: ok 'cuz tp is NULL */ } - INP_INFO_RLOCK(&tcbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_RUNLOCK(&tcbinfo); - return (EINVAL); - } - INP_LOCK(inp); - INP_INFO_RUNLOCK(&tcbinfo); - tp = intotcpcb(inp); - TCPDEBUG1(); + COMMON_START0(INI_READ); /* - * We inline in6_mapped_peeraddr and COMMON_END here, so that we can - * copy the data of interest and defer the malloc until after we - * release the lock. + * We inline in6_mapped_peeraddr so that we can + * copy the data of interest and defer the malloc + * until after we release the lock. */ if (inp->inp_vflag & INP_IPV4) { v4 = 1; @@ -526,9 +502,8 @@ addr6 = inp->in6p_faddr; } -out: TCPDEBUG2(PRU_ACCEPT); - if (tp) - INP_UNLOCK(inp); +out: + COMMON_END(INI_READ, PRU_ACCEPT); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); @@ -569,14 +544,14 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE); socantsendmore(so); tp = tcp_usrclosed(tp); if (tp) error = tcp_output(tp); - COMMON_END(PRU_SHUTDOWN); + COMMON_END(INI_WRITE, PRU_SHUTDOWN); + return error; } /* @@ -588,11 +563,11 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_READ; - COMMON_START(); + COMMON_START(INI_READ); tcp_output(tp); - COMMON_END(PRU_RCVD); + COMMON_END(INI_READ, PRU_RCVD); + return error; } /* @@ -609,7 +584,6 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; #ifdef INET6 int isipv6; #endif @@ -724,13 +698,16 @@ tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } + /* Unlocked read of sb_cc. */ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); tp->t_force = 0; } - COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : +out: + COMMON_END(INI_WRITE, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); + return error; } /* @@ -742,11 +719,11 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_WRITE; - COMMON_START(); + COMMON_START(INI_WRITE); tp = tcp_drop(tp, ECONNABORTED); - COMMON_END(PRU_ABORT); + COMMON_END(INI_WRITE, PRU_ABORT); + return error; } /* @@ -758,9 +735,9 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp; - const int inirw = INI_READ; - COMMON_START(); + COMMON_START(INI_READ); + /* Unlocked read. */ if ((so->so_oobmark == 0 && (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || @@ -776,7 +753,9 @@ *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); - COMMON_END(PRU_RCVOOB); +out: + COMMON_END(INI_READ, PRU_RCVOOB); + return error; } /* xxx - should be const */ @@ -1179,16 +1158,28 @@ inp->inp_vflag |= INP_IPV4; tp = tcp_newtcpcb(inp); if (tp == 0) { - int nofd = so->so_state & SS_NOFDREF; /* XXX */ - - so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ + int nofd; + /* + * XXXRW: This is a potentially racy scenario: we perform + * a read-update-write on so_state but don't hold a lock, + * not to mention calling out to external code that may + * grab locks. This section requires attention. + */ + SOCK_LOCK(so); + nofd = so->so_state & SS_NOFDREF; + /* don't free the socket yet */ + if (nofd) + so->so_state &= ~SS_NOFDREF; + SOCK_UNLOCK(so); #ifdef INET6 if (isipv6) in6_pcbdetach(inp); else #endif in_pcbdetach(inp); + SOCK_LOCK(so); so->so_state |= nofd; + SOCK_UNLOCK(so); return (ENOBUFS); } tp->t_state = TCPS_CLOSED; @@ -1268,4 +1259,3 @@ } return (tp); } - --- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c 2004/08/06 02:10:37 +++ //depot/user/rwatson/netperf/sys/netinet/udp_usrreq.c 2004/08/06 20:41:48 @@ -903,24 +903,50 @@ SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); +/* + * Common code to setup and teardown locking. Most + * code begins with a COMMON_START macro and finishes + * with COMMON_END. You indicate whether the inpcb + * and enclosing head are to be locked read or write . + */ +#define INI_NOLOCK 0 /* no head lock */ +#define INI_READ 1 /* read head lock */ +#define INI_WRITE 2 /* write head lock */ + +#define COMMON_START(_headrw) do { \ + if (_headrw == INI_READ) \ + INP_INFO_RLOCK(&udbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WLOCK(&udbinfo); \ + inp = sotoinpcb(so); \ + if (inp == 0) { \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&udbinfo); \ + else if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&udbinfo); \ + return EINVAL; \ + } \ + INP_LOCK(inp); \ + if (_headrw == INI_READ) \ + INP_INFO_RUNLOCK(&udbinfo); \ +} while(0) + +#define COMMON_END(_headrw) \ + do { \ + INP_UNLOCK(inp); \ + if (_headrw == INI_WRITE) \ + INP_INFO_WUNLOCK(&udbinfo); \ + } while(0) + static int udp_abort(struct socket *so) { struct inpcb *inp; - int s; - INP_INFO_WLOCK(&udbinfo); - inp = sotoinpcb(so); - if (inp == 0) { - INP_INFO_WUNLOCK(&udbin