--- //depot/vendor/freebsd/src/sys/compat/svr4/svr4_stream.c	2003/10/20 10:40:41
+++ //depot/user/rwatson/netperf/sys/compat/svr4/svr4_stream.c	2004/05/25 01:58:02
@@ -171,7 +171,9 @@
 		return (error);
 
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_send(td->td_ucred, so);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto done1;
 #endif
@@ -275,7 +277,9 @@
 		return (error);
 
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_receive(td->td_ucred, so);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto done1;
 #endif
--- //depot/vendor/freebsd/src/sys/fs/fifofs/fifo_vnops.c	2004/06/01 01:20:38
+++ //depot/user/rwatson/netperf/sys/fs/fifofs/fifo_vnops.c	2004/06/01 03:01:56
@@ -211,7 +211,9 @@
 		}
 		fip->fi_readers = fip->fi_writers = 0;
 		wso->so_snd.sb_lowat = PIPE_BUF;
-		rso->so_state |= SS_CANTRCVMORE;
+		SOCKBUF_LOCK(&rso->so_rcv);
+		rso->so_rcv.sb_state |= SBS_CANTRCVMORE;
+		SOCKBUF_UNLOCK(&rso->so_rcv);
 		vp->v_fifoinfo = fip;
 	}
 
@@ -229,7 +231,9 @@
 	if (ap->a_mode & FREAD) {
 		fip->fi_readers++;
 		if (fip->fi_readers == 1) {
-			fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+			SOCKBUF_LOCK(&fip->fi_writesock->so_snd);
+			fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE;
+			SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd);
 			if (fip->fi_writers > 0) {
 				wakeup(&fip->fi_writers);
 				sowwakeup(fip->fi_writesock);
@@ -243,7 +247,9 @@
 		}
 		fip->fi_writers++;
 		if (fip->fi_writers == 1) {
-			fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+			SOCKBUF_LOCK(&fip->fi_writesock->so_rcv);
+			fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+			SOCKBUF_UNLOCK(&fip->fi_writesock->so_rcv);
 			if (fip->fi_readers > 0) {
 				wakeup(&fip->fi_readers);
 				sorwakeup(fip->fi_writesock);
@@ -425,8 +431,10 @@
 
 	ap->a_kn->kn_hook = (caddr_t)so;
 
+	SOCKBUF_LOCK(sb);
 	SLIST_INSERT_HEAD(&sb->sb_sel.si_note, ap->a_kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
+	SOCKBUF_UNLOCK(sb);
 
 	return (0);
 }
@@ -436,23 +444,34 @@
 {
 	struct socket *so = (struct socket *)kn->kn_hook;
 
+	SOCKBUF_LOCK(&so->so_rcv);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
+	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 static int
 filt_fiforead(struct knote *kn, long hint)
 {
 	struct socket *so = (struct socket *)kn->kn_hook;
+	int needlock, result;
 
+	needlock = !SOCKBUF_OWNED(&so->so_rcv);
+	if (needlock)
+		SOCKBUF_LOCK(&so->so_rcv);
 	kn->kn_data = so->so_rcv.sb_cc;
-	if (so->so_state & SS_CANTRCVMORE) {
+	/* Unlocked read. */
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
-		return (1);
+		result = 1;
+	} else {
+		kn->kn_flags &= ~EV_EOF;
+		result = (kn->kn_data > 0);
 	}
-	kn->kn_flags &= ~EV_EOF;
-	return (kn->kn_data > 0);
+	if (needlock)
+		SOCKBUF_UNLOCK(&so->so_rcv);
+	return (result);
 }
 
 static void
@@ -460,23 +479,34 @@
 {
 	struct socket *so = (struct socket *)kn->kn_hook;
 
+	SOCKBUF_LOCK(&so->so_snd);
 	SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
+	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 static int
 filt_fifowrite(struct knote *kn, long hint)
 {
 	struct socket *so = (struct socket *)kn->kn_hook;
+	int needlock, result;
 
+	needlock = !SOCKBUF_OWNED(&so->so_snd);
+	if (needlock)
+		SOCKBUF_LOCK(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
-	if (so->so_state & SS_CANTSENDMORE) {
+	/* Unlocked read. */
+	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
-		return (1);
+		result = 1;
+	} else {
+		kn->kn_flags &= ~EV_EOF;
+	        result = (kn->kn_data >= so->so_snd.sb_lowat);
 	}
-	kn->kn_flags &= ~EV_EOF;
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+	if (needlock)
+		SOCKBUF_UNLOCK(&so->so_snd);
+	return (result);
 }
 
 /* ARGSUSED */
--- //depot/vendor/freebsd/src/sys/fs/portalfs/portal_vnops.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/fs/portalfs/portal_vnops.c	2004/05/31 04:42:57
@@ -193,6 +193,7 @@
 
 	unp2 = sotounpcb(so2);
 	unp3 = sotounpcb(so3);
+	/* XXXRW: Locking? */
 	if (unp2->unp_addr)
 		unp3->unp_addr = (struct sockaddr_un *)
 		    sodupsockaddr((struct sockaddr *)unp2->unp_addr,
@@ -284,6 +285,7 @@
 	 * and keep polling the reference count.   XXX.
 	 */
 	s = splnet();
+	/* XXXRW: Locking? */
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		if (fmp->pm_server->f_count == 1) {
 			error = ECONNREFUSED;
@@ -304,8 +306,12 @@
 	 */
 	so->so_rcv.sb_timeo = 0;
 	so->so_snd.sb_timeo = 0;
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags |= SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags |= SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 
 	pcred.pcr_flag = ap->a_mode;
--- //depot/vendor/freebsd/src/sys/i386/conf/GENERIC	2004/05/28 00:26:29
+++ //depot/user/rwatson/netperf/sys/i386/conf/GENERIC	2004/05/31 01:41:33
@@ -66,6 +66,7 @@
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			# Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
+options 	BREAK_TO_DEBUGGER
 
 # To make an SMP kernel, the next two are needed
 options 	SMP		# Symmetric MultiProcessor Kernel
--- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c	2004/06/01 18:05:37
+++ //depot/user/rwatson/netperf/sys/kern/kern_descrip.c	2004/06/02 04:53:21
@@ -2024,7 +2024,9 @@
 		*spp = fp->f_data;
 		if (fflagp)
 			*fflagp = fp->f_flag;
+		SOCK_LOCK(*spp);
 		soref(*spp);
+		SOCK_UNLOCK(*spp);
 	}
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 	return (error);
@@ -2039,6 +2041,7 @@
 {
 
 	NET_ASSERT_GIANT();
+	SOCK_LOCK(so);
 	sorele(so);
 }
 
@@ -2052,7 +2055,7 @@
 	struct file *fp;
 	struct thread *td;
 {
-	int error;
+	int error, type;
 
 	FILE_LOCK_ASSERT(fp, MA_OWNED);
 
@@ -2062,15 +2065,35 @@
 	}
 	/* We have the last ref so we can proceed without the file lock. */
 	FILE_UNLOCK(fp);
+
+	/*
+	 * XXXRW: It's not pretty, but this way we can avoid holding Giant
+	 * over operation vectors that don't require it.  Note that
+	 * technically, this is slightly conservative as badops doesn't
+	 * need Giant.
+	 */
+	type = fp->f_type;
+	switch(type) {
+	case DTYPE_SOCKET:
+	case DTYPE_PIPE:
+		break;
+	default:
+		mtx_lock(&Giant);
+	}
 	if (fp->f_count < 0)
 		panic("fdrop: count < 0");
-	mtx_lock(&Giant);
 	if (fp->f_ops != &badfileops)
 		error = fo_close(fp, td);
 	else
 		error = 0;
 	ffree(fp);
-	mtx_unlock(&Giant);
+	switch(type) {
+	case DTYPE_SOCKET:
+	case DTYPE_PIPE:
+		break;
+	default:
+		mtx_unlock(&Giant);
+	}
 	return (error);
 }
 
--- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c	2004/06/01 16:20:40
+++ //depot/user/rwatson/netperf/sys/kern/kern_mbuf.c	2004/06/08 21:28:41
@@ -214,7 +214,7 @@
 #endif
 	} else
 		m->m_data = m->m_dat;
-	mbstat.m_mbufs += 1;	/* XXX */
+	atomic_add_long(&mbstat.m_mbufs, 1);
 /*	return 1;
 */
 }
@@ -230,7 +230,7 @@
 	m = (struct mbuf *)mem;
 	if ((m->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(m, NULL);
-	mbstat.m_mbufs -= 1;	/* XXX */
+	atomic_subtract_long(&mbstat.m_mbufs, 1);
 }
 
 /* XXX Only because of stats */
@@ -242,8 +242,8 @@
 	m = (struct mbuf *)mem;
 	if ((m->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(m, NULL);
-	mbstat.m_mbufs -= 1;	/* XXX */
-	mbstat.m_mclusts -= 1;	/* XXX */
+	atomic_subtract_long(&mbstat.m_mbufs, 1);
+	atomic_subtract_long(&mbstat.m_mclusts, 1);
 }
 
 /*
@@ -268,7 +268,7 @@
 	m->m_ext.ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
 	    m->m_ext.ext_buf);
 	*(m->m_ext.ref_cnt) = 1;
-	mbstat.m_mclusts += 1;	/* XXX */
+	atomic_add_long(&mbstat.m_mclusts, 1);
 /*	return 1;
 */
 }
@@ -277,7 +277,7 @@
 static void
 mb_dtor_clust(void *mem, int size, void *arg)
 {
-	mbstat.m_mclusts -= 1;	/* XXX */
+	atomic_subtract_long(&mbstat.m_mclusts, 1);
 }
 
 /*
@@ -294,7 +294,7 @@
 	uma_zalloc_arg(zone_clust, m, M_NOWAIT);
 	if (m->m_ext.ext_buf == NULL)	/* XXX */
 		panic("mb_init_pack(): Can't deal with failure yet.");
-	mbstat.m_mclusts -= 1;	/* XXX */
+	atomic_subtract_long(&mbstat.m_mclusts, 1);
 }
 
 /*
@@ -309,7 +309,7 @@
 	m = (struct mbuf *)mem;
 	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
 	m->m_ext.ext_buf = NULL;
-	mbstat.m_mclusts += 1;	/* XXX */
+	atomic_add_long(&mbstat.m_mclusts, 1);
 }
 
 /*
@@ -353,8 +353,8 @@
 		}
 #endif
 	}
-	mbstat.m_mbufs += 1;	/* XXX */
-	mbstat.m_mclusts += 1;	/* XXX */
+	atomic_add_long(&mbstat.m_mbufs, 1);
+	atomic_add_long(&mbstat.m_mclusts, 1);
 /*	return 1;
 */
 }
@@ -375,7 +375,7 @@
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
 	    "mb_reclaim()");
 
-	mbstat.m_drain++;
+	atomic_add_long(&mbstat.m_drain, 1);
 	for (dp = domains; dp != NULL; dp = dp->dom_next)
 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 			if (pr->pr_drain != NULL)
--- //depot/vendor/freebsd/src/sys/kern/kern_prot.c	2004/04/05 21:06:48
+++ //depot/user/rwatson/netperf/sys/kern/kern_prot.c	2004/05/08 02:12:27
@@ -1685,7 +1685,9 @@
 	if (error)
 		return (ENOENT);
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_visible(cred, so);
+	SOCK_UNLOCK(so);
 	if (error)
 		return (error);
 #endif
--- //depot/vendor/freebsd/src/sys/kern/kern_timeout.c	2004/04/25 04:10:43
+++ //depot/user/rwatson/netperf/sys/kern/kern_timeout.c	2004/05/04 02:32:28
@@ -44,6 +44,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sbuf.h>
 #include <sys/sysctl.h>
 
 static int avg_depth;
@@ -55,6 +56,89 @@
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
+
+/*-
+ * Sampling buffer of function pointers executed by timeouts and callouts.
+ * This circular buffer wraps when it fills, and uses an inefficient
+ * sbuf-based sysctl to dump sample data to userspace.  Sysctls can select
+ * to monitor mpsafe and !mpsafe callouts/timeouts as desired.  Suggested
+ * use is: (1) set sample of interest (mpsafe/notmpsafe), (2) reset the
+ * buffer, (3) do some benchmark/test, (5) disable sampling, (6) dump
+ * buffer.
+ *
+ * XXX: ifdef TIMEOUT_SAMPLING?
+ */
+
+#define	MAXFUNC	200000
+static void * func_array[MAXFUNC];
+static int array_off;
+
+static void
+push_cfunc(void *ptr)
+{
+
+	/* XXX */
+	func_array[array_off % MAXFUNC] = ptr;
+	array_off++;
+}
+
+static int
+sysctl_cfunc(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf sb;
+	int error, i;
+
+	if (req->newptr != NULL)
+		return (EINVAL);
+
+	sbuf_new(&sb, NULL, 0, SBUF_AUTOEXTEND);
+
+	for (i = 0; i < MAXFUNC; i++) {
+		if (func_array[i] == NULL)
+			break;
+		sbuf_printf(&sb, "%p ", func_array[i]);
+	}
+	sbuf_finish(&sb);
+
+	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb) + 1, req);
+
+	sbuf_delete(&sb);
+
+	return (error);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, to_cfunc, CTLTYPE_STRING|CTLFLAG_RD, 0, 0,
+    sysctl_cfunc, "A", "callout/timeout sample");
+
+static int
+sysctl_cfunc_reset(SYSCTL_HANDLER_ARGS)
+{
+	int dummy, error;
+
+	dummy = 0;
+	error = sysctl_handle_int(oidp, &dummy, 0, req);
+	if (error)
+		return (error);
+
+	if (dummy != 0) {
+		bzero(func_array, sizeof(void *) * MAXFUNC);
+		array_off = 0;
+	}
+
+	return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, to_cfunc_reset, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+    sysctl_cfunc_reset, "I", "Reset sample");
+
+static int cfunc_sample_mpsafe;
+static int cfunc_sample_notmpsafe;
+
+SYSCTL_INT(_debug, OID_AUTO, to_cfunc_mpsafe, CTLFLAG_RW,
+    &cfunc_sample_mpsafe, 0, "Sample mpsafe callouts");
+SYSCTL_INT(_debug, OID_AUTO, to_cfunc_notmpsafe, CTLFLAG_RW,
+    &cfunc_sample_notmpsafe, 0, "Sample !mpsafe callouts");
+
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
@@ -245,8 +329,12 @@
 				if (!(c_flags & CALLOUT_MPSAFE)) {
 					mtx_lock(&Giant);
 					gcalls++;
+					if (cfunc_sample_mpsafe)
+						push_cfunc(c_func);
 				} else {
 					mpcalls++;
+					if (cfunc_sample_notmpsafe)
+						push_cfunc(c_func);
 				}
 #ifdef DIAGNOSTIC
 				binuptime(&bt1);
--- //depot/vendor/freebsd/src/sys/kern/subr_log.c	2004/04/05 21:06:48
+++ //depot/user/rwatson/netperf/sys/kern/subr_log.c	2004/04/07 03:50:45
@@ -83,7 +83,12 @@
 	struct	callout sc_callout;	/* callout to wakeup syslog  */
 } logsoftc;
 
-int	log_open;			/* also used in log() */
+/*
+ * log_mtx protects logsoftc, log_open.  Note that log_mtx does *not*
+ * protect the structures associated with msgbuf, which require Giant.
+ */
+struct mtx	log_mtx;
+int		log_open;		/* also used in log() */
 
 /* Times per second to check for a pending syslog wakeup. */
 static int	log_wakeups_per_second = 5;
@@ -94,17 +99,24 @@
 static	int
 logopen(dev_t dev, int flags, int mode, struct thread *td)
 {
-	if (log_open)
+
+	mtx_lock(&log_mtx);
+	if (log_open) {
+		mtx_unlock(&log_mtx);
 		return (EBUSY);
+	}
 	log_open = 1;
-	callout_init(&logsoftc.sc_callout, 0);
+	callout_init(&logsoftc.sc_callout, CALLOUT_MPSAFE);
+	mtx_unlock(&log_mtx);
 	fsetown(td->td_proc->p_pid, &logsoftc.sc_sigio);	/* signal process only */
+	mtx_lock(&log_mtx);
 	if (log_wakeups_per_second < 1) {
 		printf("syslog wakeup is less than one.  Adjusting to 1.\n");
 		log_wakeups_per_second = 1;
 	}
 	callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second,
 	    logtimeout, NULL);
+	mtx_unlock(&log_mtx);
 	return (0);
 }
 
@@ -113,9 +125,11 @@
 logclose(dev_t dev, int flag, int mode, struct thread *td)
 {
 
+	mtx_lock(&log_mtx);
 	log_open = 0;
 	callout_stop(&logsoftc.sc_callout);
 	logsoftc.sc_state = 0;
+	mtx_unlock(&log_mtx);
 	funsetown(&logsoftc.sc_sigio);
 	return (0);
 }
@@ -134,14 +148,18 @@
 			splx(s);
 			return (EWOULDBLOCK);
 		}
+		mtx_lock(&log_mtx);
 		logsoftc.sc_state |= LOG_RDWAIT;
+		mtx_unlock(&log_mtx);
 		if ((error = tsleep(mbp, LOG_RDPRI | PCATCH, "klog", 0))) {
 			splx(s);
 			return (error);
 		}
 	}
 	splx(s);
+	mtx_lock(&log_mtx);
 	logsoftc.sc_state &= ~LOG_RDWAIT;
+	mtx_unlock(&log_mtx);
 
 	while (uio->uio_resid > 0) {
 		l = imin(sizeof(buf), uio->uio_resid);
@@ -178,8 +196,11 @@
 logtimeout(void *arg)
 {
 
-	if (!log_open)
+	mtx_lock(&log_mtx);
+	if (!log_open) {
+		mtx_unlock(&log_mtx);
 		return;
+	}
 	if (log_wakeups_per_second < 1) {
 		printf("syslog wakeup is less than one.  Adjusting to 1.\n");
 		log_wakeups_per_second = 1;
@@ -187,6 +208,7 @@
 	if (msgbuftrigger == 0) {
 		callout_reset(&logsoftc.sc_callout,
 		    hz / log_wakeups_per_second, logtimeout, NULL);
+		mtx_unlock(&log_mtx);
 		return;
 	}
 	msgbuftrigger = 0;
@@ -199,6 +221,7 @@
 	}
 	callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second,
 	    logtimeout, NULL);
+	mtx_unlock(&log_mtx);
 }
 
 /*ARGSUSED*/
@@ -217,10 +240,12 @@
 		break;
 
 	case FIOASYNC:
+		mtx_lock(&log_mtx);
 		if (*(int *)data)
 			logsoftc.sc_state |= LOG_ASYNC;
 		else
 			logsoftc.sc_state &= ~LOG_ASYNC;
+		mtx_unlock(&log_mtx);
 		break;
 
 	case FIOSETOWN:
@@ -249,6 +274,7 @@
 log_drvinit(void *unused)
 {
 
+	mtx_init(&log_mtx, "log_mtx", NULL, MTX_DEF);
 	make_dev(&log_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "klog");
 }
 
--- //depot/vendor/freebsd/src/sys/kern/subr_prf.c	2004/04/05 21:06:48
+++ //depot/user/rwatson/netperf/sys/kern/subr_prf.c	2004/04/07 03:50:45
@@ -83,6 +83,9 @@
 	size_t	remain;
 };
 
+/*
+ * XXXRW: We access subr_log.c's log_open variable unlocked.
+ */
 extern	int log_open;
 
 static void  msglogchar(int c, int pri);
--- //depot/vendor/freebsd/src/sys/kern/subr_witness.c	2004/06/03 20:10:43
+++ //depot/user/rwatson/netperf/sys/kern/subr_witness.c	2004/06/04 03:56:30
@@ -272,37 +272,44 @@
 	 */
 	{ "filedesc structure", &lock_class_mtx_sleep },
 	{ "accept", &lock_class_mtx_sleep },
+	{ "so_snd", &lock_class_mtx_sleep },
+	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "sellck", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * Routing
 	 */
+	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "radix node head", &lock_class_mtx_sleep },
 	{ "rtentry", &lock_class_mtx_sleep },
 	{ "ifaddr", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * UNIX Domain Sockets
+	 */
+	{ "unp head", &lock_class_mtx_sleep },
+	{ "unp", &lock_class_mtx_sleep },
+	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
-	 */
 	/*
 	 * UDP/IP
 	 */
 	{ "udp", &lock_class_mtx_sleep },
 	{ "udpinp", &lock_class_mtx_sleep },
+	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * TCP/IP
 	 */
 	{ "tcp", &lock_class_mtx_sleep },
 	{ "tcpinp", &lock_class_mtx_sleep },
+	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * SLIP
 	 */
 	{ "slip_mtx", &lock_class_mtx_sleep },
 	{ "slip sc_mtx", &lock_class_mtx_sleep },
-	{ NULL, NULL },
 	/*
 	 * spin locks
 	 */
--- //depot/vendor/freebsd/src/sys/kern/sys_socket.c	2004/04/05 21:06:48
+++ //depot/user/rwatson/netperf/sys/kern/sys_socket.c	2004/05/30 18:22:04
@@ -77,7 +77,9 @@
 
 	NET_LOCK_GIANT();
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_receive(active_cred, so);
+	SOCK_UNLOCK(so);
 	if (error) {
 		NET_UNLOCK_GIANT();
 		return (error);
@@ -102,7 +104,9 @@
 
 	NET_LOCK_GIANT();
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_send(active_cred, so);
+	SOCK_UNLOCK(so);
 	if (error) {
 		NET_UNLOCK_GIANT();
 		return (error);
@@ -127,25 +131,40 @@
 	switch (cmd) {
 
 	case FIONBIO:
+		SOCK_LOCK(so);
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
+		SOCK_UNLOCK(so);
 		return (0);
 
 	case FIOASYNC:
+		/*
+		 * XXXRW: Implicit assumption that SOCK_LOCK(so)
+		 * == SOCKBUF_LOCK(&so->so_rcv);
+		 */
 		if (*(int *)data) {
+			SOCK_LOCK(so);
 			so->so_state |= SS_ASYNC;
 			so->so_rcv.sb_flags |= SB_ASYNC;
+			SOCK_UNLOCK(so);
+			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags |= SB_ASYNC;
+			SOCKBUF_UNLOCK(&so->so_snd);
 		} else {
+			SOCK_LOCK(so);
 			so->so_state &= ~SS_ASYNC;
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
+			SOCK_UNLOCK(so);
+			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags &= ~SB_ASYNC;
+			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 		return (0);
 
 	case FIONREAD:
+		/* Unlocked read. */
 		*(int *)data = so->so_rcv.sb_cc;
 		return (0);
 
@@ -164,7 +183,8 @@
 		return (0);
 
 	case SIOCATMARK:
-		*(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+		/* Unlocked read. */
+		*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
 		return (0);
 	}
 	/*
@@ -203,13 +223,17 @@
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 	/*
-	 * If SS_CANTRCVMORE is set, but there's still data left in the
+	 * If SBS_CANTRCVMORE is set, but there's still data left in the
 	 * receive buffer, the socket is still readable.
+	 *
+	 * XXXRW: perhaps should lock socket buffer so st_size result
+	 * is consistent.
 	 */
-	if ((so->so_state & SS_CANTRCVMORE) == 0 ||
+	/* Unlocked read. */
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 ||
 	    so->so_rcv.sb_cc != 0)
 		ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
-	if ((so->so_state & SS_CANTSENDMORE) == 0)
+	if ((so->so_snd.sb_state & SBS_CANTSENDMORE) == 0)
 		ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 	ub->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 	ub->st_uid = so->so_cred->cr_uid;
--- //depot/vendor/freebsd/src/sys/kern/uipc_mbuf.c	2004/05/31 21:50:45
+++ //depot/user/rwatson/netperf/sys/kern/uipc_mbuf.c	2004/06/08 21:28:41
@@ -397,12 +397,12 @@
 		np = &n->m_next;
 	}
 	if (top == NULL)
-		mbstat.m_mcfail++;	/* XXX: No consistency. */
+		atomic_add_long(&mbstat.m_mcfail, 1);
 
 	return (top);
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */
+	atomic_add_long(&mbstat.m_mcfail, 1);
 	return (NULL);
 }
 
@@ -462,7 +462,7 @@
 	return top;
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */ 
+	atomic_add_long(&mbstat.m_mcfail, 1);
 	return (NULL);
 }
 
@@ -564,7 +564,7 @@
 
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */
+	atomic_add_long(&mbstat.m_mcfail, 1);
 	return (NULL);
 }
 
@@ -724,7 +724,7 @@
 	return (m);
 bad:
 	m_freem(n);
-	mbstat.m_mpfail++;	/* XXX: No consistency. */
+	atomic_add_long(&mbstat.m_mpfail, 1);
 	return (NULL);
 }
 
--- //depot/vendor/freebsd/src/sys/kern/uipc_socket.c	2004/06/08 13:10:43
+++ //depot/user/rwatson/netperf/sys/kern/uipc_socket.c	2004/06/09 02:50:26
@@ -109,7 +109,6 @@
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
-
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -144,6 +143,8 @@
 			return so;
 		}
 #endif
+		SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
+		SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 		/* XXX race condition for reentrant kernel */
 		so->so_gencnt = ++so_gencnt;
 		/* sx_init(&so->so_sxlock, "socket sxlock"); */
@@ -199,9 +200,12 @@
 #ifdef MAC
 	mac_create_socket(cred, so);
 #endif
+	SOCK_LOCK(so);
 	soref(so);
+	SOCK_UNLOCK(so);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	if (error) {
+		SOCK_LOCK(so);
 		so->so_state |= SS_NOFDREF;
 		sorele(so);
 		return (error);
@@ -245,6 +249,8 @@
 	mac_destroy_socket(so);
 #endif
 	crfree(so->so_cred);
+	SOCKBUF_LOCK_DESTROY(&so->so_snd);
+	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	/* sx_destroy(&so->so_sxlock); */
 	uma_zfree(socket_zone, so);
 	--numopensockets;
@@ -259,19 +265,28 @@
 	int s, error;
 
 	s = splnet();
+	/* Unlocked read. */
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 			    SS_ISDISCONNECTING)) {
 		splx(s);
 		return (EINVAL);
 	}
+	/*
+	 * XXXRW: Ordering issue here -- perhaps we need to set
+	 * SO_ACCEPTCONN before the call to pru_listen()?
+	 * XXXRW: General atomic test-and-set concerns here also.
+	 */
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td);
 	if (error) {
 		splx(s);
 		return (error);
 	}
 	ACCEPT_LOCK();
-	if (TAILQ_EMPTY(&so->so_comp))
+	if (TAILQ_EMPTY(&so->so_comp)) {
+		SOCK_LOCK(so);
 		so->so_options |= SO_ACCEPTCONN;
+		SOCK_UNLOCK(so);
+	}
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
@@ -285,13 +300,17 @@
 	struct socket *so;
 {
 	struct socket *head;
-	int s;
 
 	KASSERT(so->so_count == 0, ("socket %p so_count not 0", so));
+	SOCK_LOCK_ASSERT(so);
 
-	if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0)
+	/* XXXRW: Why would SS_NOFDREF be unset here?  so_count is 0. */
+	if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0) {
+		SOCK_UNLOCK(so);
 		return;
+	}
 
+	SOCK_UNLOCK(so);
 	ACCEPT_LOCK();
 	head = so->so_head;
 	if (head != NULL) {
@@ -327,17 +346,56 @@
 	    ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
 	    so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
 	ACCEPT_UNLOCK();
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags |= SB_NOINTR;
 	(void)sblock(&so->so_snd, M_WAITOK);
-	s = splimp();
-	socantsendmore(so);
-	splx(s);
+	socantsendmore_locked(so);
 	sbunlock(&so->so_snd);
 	sbrelease(&so->so_snd, so);
+	SOCKBUF_UNLOCK(&so->so_snd);
 	sorflush(so);
 	sodealloc(so);
 }
 
+#if 0
+static void
+dump_socket(struct socket *so)
+{
+
+	printf("so = %p\n", so);
+	printf("  so_count = %d\n", so->so_count);
+	printf("  so_type = %d\n", so->so_type);
+	printf("  so_options = %d\n", so->so_options);
+	printf("  so_linger = %d\n", so->so_linger);
+	printf("  so_state = %d\n", so->so_state);
+	printf("  so_qstate = %d\n", so->so_qstate);
+	printf("  so_pcb = %p\n", so->so_pcb);
+	printf("  so_proto = %p\n", so->so_proto);
+	printf("    pr_type = %d\n", so->so_proto->pr_type);
+	printf("    pr_domain = %p\n", so->so_proto->pr_domain);
+	printf("      dom_family = %d\n", so->so_proto->pr_domain->dom_family);
+	printf("      dom_name = %s\n", so->so_proto->pr_domain->dom_name);
+	printf("    pr_protocol = %d\n", so->so_proto->pr_protocol);
+	printf("    pr_flags = %d\n", so->so_proto->pr_flags);
+	printf("  so_head = %p\n", so->so_head);
+	printf("  TAILQ_FIRST(so_incomp) = %p\n", TAILQ_FIRST(&so->so_incomp));
+	printf("  TAILQ_FIRST(so_comp) = %p\n", TAILQ_FIRST(&so->so_comp));
+	printf("  so_qlen = %d\n", so->so_qlen);
+	printf("  so_incqlen = %d\n", so->so_incqlen);
+	printf("  so_qlimit = %d\n", so->so_qlimit);
+	printf("  so_timeo = %d\n", so->so_timeo);
+	printf("  so_error = %d\n", so->so_error);
+	printf("  so_sigio = %p\n", so->so_sigio);
+	printf("  so_oobmark = %lu\n", so->so_oobmark);
+	printf("  so_rcv.sb_state = %d\n", so->so_rcv.sb_state);
+	printf("  so_snd.sb_state = %d\n", so->so_snd.sb_state);
+	printf("  so_upcall = %p\n", so->so_upcall);
+	printf("  so_upcallarg = %p\n", so->so_upcallarg);
+	printf("  so_cred = %p\n", so->so_cred);
+	printf("  so_gencnt = %d\n", (int)so->so_gencnt);
+}
+#endif
+
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
@@ -354,7 +412,26 @@
 	int s = splnet();		/* conservative */
 	int error = 0;
 
+	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
+
 	funsetown(&so->so_sigio);
+	/*-
+	 * XXXRW: Lots of locking problems here.  For one thing,
+	 * we should probably clear the SO_ACCEPTCONN flag and
+	 * push that down to the protocol layer.  Also, the locking
+	 * for the queue draining is probably not right either:
+	 * what prevents new ones from being inserted after we get
+	 * past this?  Ideally, the downcall would prevent this, but
+	 * there isn't one.
+	 */
+	/*
+	 * XXXRW: soclose() should be split into two parts: one to handle
+	 * listen sockets, and the other to handle the remainder.  To
+	 * prevent races with the protocol code, we need to detach before
+	 * draining the queues.  For non-listen sockets, the disconnect
+	 * has to happen before the detach.
+	 */
+	/* Unlocked read. */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp;
 		ACCEPT_LOCK();
@@ -380,6 +457,7 @@
 	}
 	if (so->so_pcb == NULL)
 		goto discard;
+	/* XXXRW: so_state locking? */
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
@@ -400,13 +478,14 @@
 	}
 drop:
 	if (so->so_pcb != NULL) {
-		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
+		int error2;
+		error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
 		if (error == 0)
 			error = error2;
 	}
 discard:
-	if (so->so_state & SS_NOFDREF)
-		panic("soclose: NOFDREF");
+	SOCK_LOCK(so);
+	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele(so);
 	splx(s);
@@ -414,7 +493,9 @@
 }
 
 /*
- * Must be called at splnet...
+ * XXXRW: soabort() must not be called with any locks held, as the protocol
+ * will need to be able to grab socket and socket buffer locks, and we also
+ * try to free the socket if the protocol generates an error.
  */
 int
 soabort(so)
@@ -424,6 +505,7 @@
 
 	error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
 	if (error) {
+		SOCK_LOCK(so);
 		sotryfree(so);	/* note: does not decrement the ref count */
 		return error;
 	}
@@ -435,14 +517,13 @@
 	struct socket *so;
 	struct sockaddr **nam;
 {
-	int s = splnet();
 	int error;
 
-	if ((so->so_state & SS_NOFDREF) == 0)
-		panic("soaccept: !NOFDREF");
+	SOCK_LOCK(so);
+	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
+	SOCK_UNLOCK(so);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
-	splx(s);
 	return (error);
 }
 
@@ -452,25 +533,23 @@
 	struct sockaddr *nam;
 	struct thread *td;
 {
-	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
-	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
 	 * This allows user to disconnect by connecting to, e.g.,
 	 * a null address.
 	 */
+	/* Unlocked read. */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so))))
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
-	splx(s);
 	return (error);
 }
 
@@ -479,11 +558,9 @@
 	struct socket *so1;
 	struct socket *so2;
 {
-	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
-	splx(s);
 	return (error);
 }
 
@@ -491,20 +568,14 @@
 sodisconnect(so)
 	struct socket *so;
 {
-	int s = splnet();
 	int error;
 
-	if ((so->so_state & SS_ISCONNECTED) == 0) {
-		error = ENOTCONN;
-		goto bad;
-	}
-	if (so->so_state & SS_ISDISCONNECTING) {
-		error = EALREADY;
-		goto bad;
-	}
+	/* Unlocked read. */
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return ENOTCONN;
+	if (so->so_state & SS_ISDISCONNECTING)
+		return EALREADY;
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
-bad:
-	splx(s);
 	return (error);
 }
 
@@ -555,7 +626,7 @@
 	struct mbuf **mp;
 	struct mbuf *m;
 	long space, len = 0, resid;
-	int clen = 0, error, s, dontroute;
+	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 #ifdef ZERO_COPY_SOCKETS
 	int cow_send;
@@ -587,20 +658,19 @@
 		td->td_proc->p_stats->p_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
-#define	snderr(errno)	{ error = (errno); splx(s); goto release; }
+#define	snderr(errno)	{ error = (errno); goto release; }
 
-restart:
+	SOCKBUF_LOCK(&so->so_snd);
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	do {
-		s = splnet();
-		if (so->so_state & SS_CANTSENDMORE)
+		SOCKBUF_LOCK_ASSERT(&so->so_snd);
+		if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
@@ -629,14 +699,12 @@
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO))
 				snderr(EWOULDBLOCK);
-			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
-			splx(s);
 			if (error)
-				goto out;
-			goto restart;
+				goto release;
+			continue;
 		}
-		splx(s);
+		SOCKBUF_UNLOCK(&so->so_snd);
 		mp = &top;
 		space -= clen;
 		do {
@@ -657,6 +725,7 @@
 					MGETHDR(m, M_TRYWAIT, MT_DATA);
 					if (m == NULL) {
 						error = ENOBUFS;
+						SOCKBUF_LOCK(&so->so_snd);
 						goto release;
 					}
 					m->m_pkthdr.len = 0;
@@ -665,6 +734,7 @@
 					MGET(m, M_TRYWAIT, MT_DATA);
 					if (m == NULL) {
 						error = ENOBUFS;
+						SOCKBUF_LOCK(&so->so_snd);
 						goto release;
 					}
 				}
@@ -718,6 +788,7 @@
 			}
 			if (m == NULL) {
 				error = ENOBUFS;
+				SOCKBUF_LOCK(&so->so_snd);
 				goto release;
 			}
 
@@ -732,8 +803,10 @@
 			m->m_len = len;
 			*mp = m;
 			top->m_pkthdr.len += len;
-			if (error)
+			if (error) {
+				SOCKBUF_LOCK(&so->so_snd);
 				goto release;
+			}
 			mp = &m->m_next;
 			if (resid <= 0) {
 				if (flags & MSG_EOR)
@@ -741,15 +814,26 @@
 				break;
 			}
 		    } while (space > 0 && atomic);
-		    if (dontroute)
+		    /*
+		     * XXXRW: There may be a race condition here regarding
+		     * SO_DONTROUTE.  We hold the sblock() so in theory
+		     * there won't be other consumers of the socket doing
+		     * a pru_send(), but SO_DONTROUTE is also consumed
+		     * from the protocol side.  It sounds like we might
+		     * want to use MSG_DONTROUTE here, if the semantics are
+		     * right.
+		     */
+		    if (dontroute) {
+			    SOCK_LOCK(so);
 			    so->so_options |= SO_DONTROUTE;
-		    s = splnet();				/* XXX */
+			    SOCK_UNLOCK(so);
+		    }
 		    /*
-		     * XXX all the SS_CANTSENDMORE checks previously
+		     * XXX all the SBS_CANTSENDMORE checks previously
 		     * done could be out of date.  We could have recieved
 		     * a reset packet in an interrupt or maybe we slept
 		     * while doing page faults in uiomove() etc. We could
-		     * probably recheck again inside the splnet() protection
+		     * probably recheck again inside the locking protection
 		     * here, but there are probably other places that this
 		     * also happens.  We must rethink this.
 		     */
@@ -767,21 +851,32 @@
 			/* If there is more to send set PRUS_MORETOCOME */
 			(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			top, addr, control, td);
-		    splx(s);
-		    if (dontroute)
+		    /*
+		     * XXXRW: Second half of above comment on SO_DONTROUTE.
+		     * and so_options.
+		     */
+		    if (dontroute) {
+			    SOCK_LOCK(so);
 			    so->so_options &= ~SO_DONTROUTE;
+			    SOCK_UNLOCK(so);
+		    }
 		    clen = 0;
 		    control = NULL;
 		    top = NULL;
 		    mp = &top;
-		    if (error)
+		    if (error) {
+			SOCKBUF_LOCK(&so->so_snd);
 			goto release;
+		    }
 		} while (resid && space > 0);
+		SOCKBUF_LOCK(&so->so_snd);
 	} while (resid);
 
 release:
+	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	sbunlock(&so->so_snd);
 out:
+	SOCKBUF_UNLOCK(&so->so_snd);
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
@@ -815,7 +910,7 @@
 	int *flagsp;
 {
 	struct mbuf *m, **mp;
-	int flags, len, error, s, offset;
+	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
@@ -870,15 +965,17 @@
 	}
 	if (mp != NULL)
 		*mp = NULL;
+	/* Unlocked read. */
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
-restart:
+	SOCKBUF_LOCK(&so->so_rcv);
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
-		return (error);
-	s = splnet();
+		goto out;
 
+restart:
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
@@ -896,9 +993,8 @@
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != NULL || !so->so_rcv.sb_cc,
-		    ("receive: m == %p so->so_rcv.sb_cc == %u",
-		    m, so->so_rcv.sb_cc));
+		KASSERT(!(m == NULL && so->so_rcv.sb_cc),
+		    ("m %p so->so_rcv.sb_cc %u", m, so->so_rcv.sb_cc));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
@@ -907,7 +1003,8 @@
 				so->so_error = 0;
 			goto release;
 		}
-		if (so->so_state & SS_CANTRCVMORE) {
+		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m)
 				goto dontblock;
 			else
@@ -918,6 +1015,7 @@
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
+		/* XXXRW: so_state locking? */
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
@@ -925,6 +1023,7 @@
 		}
 		if (uio->uio_resid == 0)
 			goto release;
+		/* XXXRW: so_state locking? */
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			error = EWOULDBLOCK;
@@ -932,14 +1031,14 @@
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		splx(s);
 		if (error)
-			return (error);
+			goto release;
 		goto restart;
 	}
 dontblock:
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	KASSERT(error == 0, ("unexpected state, error %u", error));
 	if (uio->uio_td)
 		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
@@ -948,41 +1047,110 @@
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
-		orig_resid = 0;
-		if (psa != NULL)
+		if (psa != NULL) {
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
-			    mp0 == NULL ? M_WAITOK : M_NOWAIT);
+					M_NOWAIT);
+			if (*psa == NULL) {
+				error = ENOMEM;
+				goto release;
+			}
+			/*
+			 * XXXRW: In the rwatson_netperf branch, we don't
+			 * release the socket buffer lock here because
+			 * we always use M_NOWAIT.  In the main tree,
+			 * or if we restore conditional waiting, we
+			 * need to refresh nextpacket from the socket
+			 * buffer version of the head mbuf, or we may
+			 * have a stale value if it was previously NULL
+			 * and now isn't.
+			 */
+			nextrecord = m->m_nextpkt;
+		}
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
+			/*
+			 * XXXRW: When running MPSAFE, we may release
+			 * locks after this point, and therefore have
+			 * effective preemption relative to the socket
+			 * buffer mbuf chain.  Since we've modified
+			 * the head, we have to make sure the new head
+			 * has the right nextpkt copied from the
+			 * original head.
+			 */
+			m->m_nextpkt = nextrecord;
 		}
+		orig_resid = 0;
 	}
-	while (m != NULL && m->m_type == MT_CONTROL && error == 0) {
-		if (flags & MSG_PEEK) {
-			if (controlp != NULL)
-				*controlp = m_copy(m, 0, m->m_len);
-			m = m->m_next;
-		} else {
-			sbfree(&so->so_rcv, m);
-			so->so_rcv.sb_mb = m->m_next;
-			m->m_next = NULL;
-			if (pr->pr_domain->dom_externalize)
-				error =
-				(*pr->pr_domain->dom_externalize)(m, controlp);
-			else if (controlp != NULL)
-				*controlp = m;
-			else
-				m_freem(m);
-			m = so->so_rcv.sb_mb;
+	if (m != NULL && m->m_type == MT_CONTROL) {
+		struct mbuf *cm = NULL;
+		struct mbuf **cme = &cm;
+
+		do {
+			if (flags & MSG_PEEK) {
+				/*
+				 * XXXRW: In the BSD/OS version of this
+				 * code, m_copym() is called with M_TRYWAIT,
+				 * and we catch allcation failures and
+				 * jump to release with error of ENOBUFS.
+				 * For consistency with the current
+				 * implementation, we don't.
+				 */
+				if (controlp != NULL) {
+					*controlp = m_copym(m, 0, m->m_len,
+						M_DONTWAIT);
+					controlp = &(*controlp)->m_next;
+				}
+				m = m->m_next;
+			} else {
+				sbfree(&so->so_rcv, m);
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = NULL;
+				if (controlp) {
+					/*
+					 * Collect mbufs for processing below.
+					 */
+					*cme = m;
+					cme = &(*cme)->m_next;
+				} else
+					m_free(m);
+				m = so->so_rcv.sb_mb;
+			}
+		} while (m != NULL && m->m_type == MT_CONTROL);
+		/*
+		 * XXXRW: Since we're dropping the socket buffer locks, and
+		 * may have modified the mbuf list on the socket buffer,
+		 * push out the latest version so it can be seen by any
+		 * other code that accesses the buffer in the mean time.
+		 */
+		so->so_rcv.sb_mb->m_nextpkt = nextrecord;
+		if (nextrecord == NULL)
+			so->so_rcv.sb_lastrecord = so->so_rcv.sb_mb;
+		if (cm != NULL) {
+			if (pr->pr_domain->dom_externalize != NULL) {
+				/*
+				 * NB: drop the lock to avoid potential LORs;
+				 * in particular unix domain sockets grab the
+				 * file descriptor lock which would be a LOR.
+				 */
+				SOCKBUF_UNLOCK(&so->so_rcv);
+				error = (*pr->pr_domain->dom_externalize)
+						(cm, controlp);
+				SOCKBUF_LOCK(&so->so_rcv);
+			} else
+				m_freem(cm);
 		}
-		if (controlp != NULL) {
-			orig_resid = 0;
-			while (*controlp != NULL)
-				controlp = &(*controlp)->m_next;
-		}
+		/*
+		 * XXXRW: During externalization, additional mbuf chains
+		 * may have been added to the socket buffer.  Update our
+		 * local cache to avoid using a stale value and corrupting
+		 * the list.
+		 */
+		nextrecord = so->so_rcv.sb_mb->m_nextpkt;
+		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
@@ -1008,6 +1176,7 @@
 			SB_EMPTY_FIXUP(&so->so_rcv);
 		}
 	}
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
@@ -1022,7 +1191,7 @@
 		else
 		    KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
 			("m->m_type == %d", m->m_type));
-		so->so_state &= ~SS_RCVATMARK;
+		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
@@ -1039,7 +1208,7 @@
 		if (mp == NULL) {
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
-			splx(s);
+			SOCKBUF_UNLOCK(&so->so_rcv);
 #ifdef ZERO_COPY_SOCKETS
 			if (so_zero_copy_receive) {
 				vm_page_t pg;
@@ -1063,7 +1232,7 @@
 			} else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
-			s = splnet();
+			SOCKBUF_LOCK(&so->so_rcv);
 			if (error)
 				goto release;
 		} else
@@ -1105,6 +1274,7 @@
 					*mp = m_copym(m, 0, len, M_TRYWAIT);
 				m->m_data += len;
 				m->m_len -= len;
+				SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 				so->so_rcv.sb_cc -= len;
 			}
 		}
@@ -1112,7 +1282,9 @@
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
-					so->so_state |= SS_RCVATMARK;
+					/* XXXRW: so_state locking? */
+					SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
@@ -1132,21 +1304,30 @@
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
-			if (so->so_error || so->so_state & SS_CANTRCVMORE)
+			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+			if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
+			 *
+			 * XXXRW: We drop the socket buffer lock here
+			 * because we know the protocol will need to do its
+			 * own locking.  However, after calling pru_rcvd()
+			 * and re-grabbing the buffer mutex, we don't
+			 * re-check the condition before sleeping.
 			 */
-			if (pr->pr_flags & PR_WANTRCVD && so->so_pcb != NULL)
+			if (pr->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
+				SOCKBUF_UNLOCK(&so->so_rcv);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
+				SOCKBUF_LOCK(&so->so_rcv);
+			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			error = sbwait(&so->so_rcv);
 			if (error) {
-				sbunlock(&so->so_rcv);
-				splx(s);
-				return (0);
+				error = 0;
+				goto release;
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
@@ -1175,21 +1356,28 @@
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+		/*
+		 * XXXRW: We drop the socket buffer lock before calling
+		 * down into the protocol.  Is that OK in the calling
+		 * context?
+		 */
+		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
+			SOCKBUF_LOCK(&so->so_rcv);
+		}
 	}
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
-	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
-		sbunlock(&so->so_rcv);
-		splx(s);
-		goto restart;
-	}
+	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0)
+		goto restart;		/* XXX multi-counts msgs */
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
-	splx(s);
+out:
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	return (error);
 }
 
@@ -1216,23 +1404,23 @@
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
-	int s;
 	struct sockbuf asb;
 
+	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
-	s = splimp();
-	socantrcvmore(so);
+	socantrcvmore_locked(so);
 	sbunlock(sb);
 	asb = *sb;
 	/*
-	 * Invalidate/clear most of the sockbuf structure, but keep
-	 * its selinfo structure valid.
+	 * Invalidate/clear most of the sockbuf structure, but leave
+	 * selinfo and mutex data unchanged.
 	 */
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 
+	/* XXXRW: is passing in sb_mb this way really safe? */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
 	sbrelease(&asb, so);
@@ -1244,20 +1432,31 @@
 	struct	socket *so;
 	struct	sockopt *sopt;
 {
-	struct accept_filter_arg	*afap = NULL;
+	struct accept_filter_arg	*afap;
 	struct accept_filter	*afp;
-	struct so_accf	*af = so->so_accf;
+	struct so_accf	*newaf;
 	int	error = 0;
 
+	newaf = NULL;
+	afap = NULL;
+
+	/*
+	 * XXXRW: Configuring accept filters should be an atomic
+	 * test-and-set operation to prevent races during setup and
+	 * and attach.  There may be more general issues of racing
+	 * and ordering here that are not yet addressed by locking.
+	 */
 	/* do not set/remove accept filters on non listen sockets */
+	SOCK_LOCK(so);
 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
-		error = EINVAL;
-		goto out;
+		SOCK_UNLOCK(so);
+		return (EINVAL);
 	}
 
 	/* removing the filter */
 	if (sopt == NULL) {
-		if (af != NULL) {
+		if (so->so_accf != NULL) {
+			struct so_accf *af = so->so_accf;
 			if (af->so_accept_filter != NULL &&
 				af->so_accept_filter->accf_destroy != NULL) {
 				af->so_accept_filter->accf_destroy(so);
@@ -1269,47 +1468,80 @@
 			so->so_accf = NULL;
 		}
 		so->so_options &= ~SO_ACCEPTFILTER;
+		SOCK_UNLOCK(so);
 		return (0);
 	}
-	/* adding a filter */
-	/* must remove previous filter first */
-	if (af != NULL) {
-		error = EINVAL;
-		goto out;
-	}
+	SOCK_UNLOCK(so);
+
+	/*-
+	 * Adding a filter.
+	 *
+	 * Do memory allocation, copyin, and filter lookup now while we're
+	 * not holding any locks.  Avoids sleeping with a mutex, as well
+	 * as introducing a lock order between accept filter locks and
+	 * socket locks here.
+	 */
+	MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP,
+	    M_WAITOK);
 	/* don't put large objects on the kernel stack */
-	MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 	afap->af_name[sizeof(afap->af_name)-1] = '\0';
 	afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
-	if (error)
-		goto out;
+	if (error) {
+		FREE(afap, M_TEMP);
+		return (error);
+	}
 	afp = accept_filt_get(afap->af_name);
 	if (afp == NULL) {
-		error = ENOENT;
+		FREE(afap, M_TEMP);
+		return (ENOENT);
+	}
+
+	/*
+	 * Allocate the new accept filter instance storage.  We may have
+	 * to free it again later if we fail to attach it.  If attached
+	 * properly, 'newaf' is NULLed to avoid a free() while in use.
+	 */
+	MALLOC(newaf, struct so_accf *, sizeof(*newaf), M_ACCF, M_WAITOK |
+	    M_ZERO);
+	if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
+		int len = strlen(afap->af_name) + 1;
+		MALLOC(newaf->so_accept_filter_str, char *, len, M_ACCF,
+		    M_WAITOK);
+		strcpy(newaf->so_accept_filter_str, afap->af_name);
+	}
+
+	SOCK_LOCK(so);
+	/* must remove previous filter first */
+	if (so->so_accf != NULL) {
+		error = EINVAL;
 		goto out;
 	}
-	MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO);
+	/*
+	 * Invoke the accf_create() method of the filter if required.
+	 * XXXRW: the socket mutex is held over this call, so the
+	 * create method cannot block.  This may be something we have
+	 * to change, but it would require addressing possible races.
+	 */
 	if (afp->accf_create != NULL) {
-		if (afap->af_name[0] != '\0') {
-			int len = strlen(afap->af_name) + 1;
-
-			MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
-			strcpy(af->so_accept_filter_str, afap->af_name);
-		}
-		af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
-		if (af->so_accept_filter_arg == NULL) {
-			FREE(af->so_accept_filter_str, M_ACCF);
-			FREE(af, M_ACCF);
-			so->so_accf = NULL;
+		newaf->so_accept_filter_arg =
+		    afp->accf_create(so, afap->af_arg);
+		if (newaf->so_accept_filter_arg == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 	}
-	af->so_accept_filter = afp;
-	so->so_accf = af;
+	newaf->so_accept_filter = afp;
+	so->so_accf = newaf;
 	so->so_options |= SO_ACCEPTFILTER;
+	newaf = NULL;
 out:
+	SOCK_UNLOCK(so);
+	if (newaf != NULL) {
+		if (newaf->so_accept_filter_str != NULL)
+			FREE(newaf->so_accept_filter_str, M_ACCF);
+		FREE(newaf, M_ACCF);
+	}
 	if (afap != NULL)
 		FREE(afap, M_TEMP);
 	return (error);
@@ -1383,11 +1615,13 @@
 			if (error)
 				goto bad;
 
+			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
+			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
@@ -1405,10 +1639,12 @@
 					    sizeof optval);
 			if (error)
 				goto bad;
+			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
+			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SNDBUF:
@@ -1570,23 +1806,33 @@
 		switch (sopt->sopt_name) {
 #ifdef INET
 		case SO_ACCEPTFILTER:
+			/* Unlocked read. */
 			if ((so->so_options & SO_ACCEPTCONN) == 0)
 				return (EINVAL);
 			MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
 				M_TEMP, M_WAITOK | M_ZERO);
+			SOCK_LOCK(so);
 			if ((so->so_options & SO_ACCEPTFILTER) != 0) {
 				strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
 				if (so->so_accf->so_accept_filter_str != NULL)
 					strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
 			}
+			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, afap, sizeof(*afap));
 			FREE(afap, M_TEMP);
 			break;
 #endif
 
 		case SO_LINGER:
+			/*
+			 * XXXRW: We grab the lock here to get a consistent
+			 * snapshot of both fields.  This may not really
+			 * be necessary.
+			 */
+			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
+			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
@@ -1601,6 +1847,7 @@
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
+			/* Unlocked read. */
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
@@ -1807,10 +2054,16 @@
 	int revents = 0;
 	int s = splnet();
 
+	/*
+	 * XXXRW: Lots of unlocked reads, and some writes.  Probably
+	 * some more locking is called for here, especially when
+	 * setting the sb_sel flags.
+	 */
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
+	/* Unlocked read. */
 	if (events & POLLINIGNEOF)
 		if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
 		    !TAILQ_EMPTY(&so->so_comp) || so->so_error)
@@ -1821,20 +2074,30 @@
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
-		if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
+		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if (revents == 0) {
 		if (events &
 		    (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM |
 		     POLLRDBAND)) {
+			/*
+			 * XXXRW: Should lock also be held over selrecord()?
+			 */
 			selrecord(td, &so->so_rcv.sb_sel);
+			SOCKBUF_LOCK(&so->so_rcv);
 			so->so_rcv.sb_flags |= SB_SEL;
+			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
+			/*
+			 * XXXRW: Should lock also be held over selrecord()?
+			 */
 			selrecord(td, &so->so_snd.sb_sel);
+			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags |= SB_SEL;
+			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 	}
 
@@ -1847,10 +2110,10 @@
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
-	int s;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
+		/* Unlocked read. */
 		if (so->so_options & SO_ACCEPTCONN)
 			kn->kn_fop = &solisten_filtops;
 		else
@@ -1865,10 +2128,10 @@
 		return (1);
 	}
 
-	s = splnet();
+	SOCKBUF_LOCK(sb);
 	SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
 
@@ -1876,12 +2139,12 @@
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_rcv);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*ARGSUSED*/
@@ -1889,10 +2152,13 @@
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int result;
+	int needlock, result;
 
+	needlock = !SOCKBUF_OWNED(&so->so_rcv);
+	if (needlock)
+		SOCKBUF_LOCK(&so->so_rcv);
 	kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
-	if (so->so_state & SS_CANTRCVMORE) {
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		result = 1;
@@ -1902,6 +2168,8 @@
 		result = (kn->kn_data >= kn->kn_sdata);
 	else
 		result = (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
+	if (needlock)
+		SOCKBUF_UNLOCK(&so->so_rcv);
 	return (result);
 }
 
@@ -1909,12 +2177,12 @@
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_snd);
 	SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 /*ARGSUSED*/
@@ -1922,10 +2190,13 @@
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int result;
+	int needlock, result;
 
+	needlock = !SOCKBUF_OWNED(&so->so_snd);
+	if (needlock)
+		SOCKBUF_LOCK(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
-	if (so->so_state & SS_CANTSENDMORE) {
+	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		result = 1;
@@ -1938,6 +2209,8 @@
 		result = (kn->kn_data >= kn->kn_sdata);
 	else
 		result = (kn->kn_data >= so->so_snd.sb_lowat);
+	if (needlock)
+		SOCKBUF_UNLOCK(&so->so_snd);
 	return (result);
 }
 
@@ -1947,6 +2220,7 @@
 {
 	struct socket *so = kn->kn_fp->f_data;
 
+	/* Unlocked read. */
 	kn->kn_data = so->so_qlen;
 	return (! TAILQ_EMPTY(&so->so_comp));
 }
--- //depot/vendor/freebsd/src/sys/kern/uipc_socket2.c	2004/06/04 04:10:43
+++ //depot/user/rwatson/netperf/sys/kern/uipc_socket2.c	2004/06/09 02:50:26
@@ -105,18 +105,40 @@
 	register struct socket *so;
 {
 
+	SOCK_LOCK(so);
+	/* XXXRW: so_state locking? */
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
+	SOCK_UNLOCK(so);
 }
 
+/*
+ * soisconnected() transitions a socket to a fully connected state.  If
+ * so->so_head is non-NULL, we transition it from the incompletely
+ * connected queue to the completely connected queue.  Otherwise, we
+ * just wake up the socket since it was an out-bound connection.
+ */
 void
 soisconnected(so)
 	struct socket *so;
 {
 	struct socket *head;
+#ifdef INVARIANTS
+	int need_lock = !SOCK_OWNED(so);
+#endif
+	KASSERT(need_lock == 1, ("soisconnected(): called with lock!"));
 
+	/* XXXRW: so_state locking? */
+	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
+	SOCK_UNLOCK(so);
+
+	/*
+	 * XXXRW: Maybe an unlocked read of so_head would be desirable
+	 * here?  Unlocked read of so_qstate probably not a good idea
+	 * regardless.  so_options handling here is a little unsavory.
+	 */
 	ACCEPT_LOCK();
 	head = so->so_head;
 	if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
@@ -131,18 +153,47 @@
 			sorwakeup(head);
 			wakeup_one(&head->so_timeo);
 		} else {
-			ACCEPT_UNLOCK();
-			so->so_upcall =
+			void (*so_upcall)(struct socket *, void *, int);
+			void *so_upcallarg;
+			/*
+			 * XXXRW: Should probably copy the upcall fields to
+			 * local stack variables while holding the socket
+			 * lock (and clear the option), then release the
+			 * lock and make the call.  This will prevent lock
+			 * order reversals/recursion between this code and
+			 * the upcall implementation, which will likely
+			 * also want to frob the socket using locks.
+			 *
+			 * NOTE: We keep a local copy of the function
+			 * pointer so we can invoke the upcall without
+			 * holding locks.  However, we also have to copy
+			 * in the upcall fields from the head because the
+			 * filter may need to be called more than once.
+			 */
+			SOCK_LOCK(so);
+			so_upcall = so->so_upcall =
 			    head->so_accf->so_accept_filter->accf_callback;
-			so->so_upcallarg = head->so_accf->so_accept_filter_arg;
+			so_upcallarg = so->so_upcallarg =
+			    head->so_accf->so_accept_filter_arg;
 			so->so_rcv.sb_flags |= SB_UPCALL;
 			so->so_options &= ~SO_ACCEPTFILTER;
-			so->so_upcall(so, so->so_upcallarg, M_TRYWAIT);
+			SOCK_UNLOCK(so);
+			ACCEPT_UNLOCK();
+			/*
+			 * Call with our existing reference, but without
+			 * any locks.
+			 */
+			so_upcall(so, so_upcallarg, M_TRYWAIT);
 		}
 		return;
 	}
 	ACCEPT_UNLOCK();
 	wakeup(&so->so_timeo);
+	/*
+	 * XXXRW: If assuming socket lock is socket buffer receive
+	 * lock, should use a _locked variant of sorwakeup() and
+	 * avoid recursion here.
+	 */
 	sorwakeup(so);
 	sowwakeup(so);
 }
@@ -152,9 +203,27 @@
 	register struct socket *so;
 {
 
+	/*
+	 * XXXRW: Assuming we do need SOCK_LOCK(so) here, and the receive
+	 * and base socket lock remain identical, then we should combine
+	 * the SOCK_LOCK() and SOCKBUF_LOCK(...rcv) sections here.
+	 */
+	/* XXXRW: so_state locking? */
+	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
-	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+	so->so_state |= SS_ISDISCONNECTING;
+	SOCK_UNLOCK(so);
+	SOCKBUF_LOCK(&so->so_rcv);
+	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
+	so->so_snd.sb_state |= SBS_CANTSENDMORE;
+	SOCKBUF_UNLOCK(&so->so_snd);
 	wakeup(&so->so_timeo);
+	/*
+	 * XXXRW: Multiple socket buffer lock/unlock here could be avoided
+	 * by coallescing with the above?
+	 */
 	sowwakeup(so);
 	sorwakeup(so);
 }
@@ -163,11 +232,30 @@
 soisdisconnected(so)
 	register struct socket *so;
 {
-
+	
+	/*
+	 * XXXRW: Assuming we do need SOCK_LOCK(so) here, and the receive
+	 * and base socket lock remain identical, then we should combine
+	 * the SOCK_LOCK() and SOCKBUF_LOCK(...rcv) sections here.
+	 */
+	/* XXXRW: so_state locking? */
+	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
-	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
+	SOCK_UNLOCK(so);
+	SOCKBUF_LOCK(&so->so_rcv);
+	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
+	so->so_snd.sb_state |= SBS_CANTSENDMORE;
+	SOCKBUF_UNLOCK(&so->so_snd);
+	so->so_state |= SS_ISDISCONNECTED;
 	wakeup(&so->so_timeo);
+	/* Unlocked read of sb_cc. */
 	sbdrop(&so->so_snd, so->so_snd.sb_cc);
+	/*
+	 * XXXRW: Avoid multiple lock/unlock of socket buffer locks here
+	 * by coallescing with the above?
+	 */
 	sowwakeup(so);
 	sorwakeup(so);
 }
@@ -204,13 +292,17 @@
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
+	/* XXXRW: so_state locking? */
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_proto = head->so_proto;
 	so->so_timeo = head->so_timeo;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
+	SOCK_LOCK(head);
 	mac_create_socket_from_socket(head, so);
+	SOCK_UNLOCK(head);
 #endif
+
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
 	    (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
@@ -246,6 +338,7 @@
 	}
 	ACCEPT_UNLOCK();
 	if (connstatus) {
+		/* XXXRW: so_state locking? */
 		so->so_state |= connstatus;
 		sorwakeup(head);
 		wakeup_one(&head->so_timeo);
@@ -268,8 +361,20 @@
 	struct socket *so;
 {
 
-	so->so_state |= SS_CANTSENDMORE;
-	sowwakeup(so);
+	/* XXXRW: so_state locking? */
+	SOCKBUF_LOCK(&so->so_snd);
+	socantsendmore_locked(so);
+	SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+void
+socantsendmore_locked(so)
+	struct socket *so;
+{
+	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+
+	so->so_snd.sb_state |= SBS_CANTSENDMORE;
+	sowwakeup_locked(so);
 }
 
 void
@@ -277,8 +382,20 @@
 	struct socket *so;
 {
 
-	so->so_state |= SS_CANTRCVMORE;
-	sorwakeup(so);
+	/* XXXRW: so_state locking? */
+	SOCKBUF_LOCK(&so->so_rcv);
+	socantrcvmore_locked(so);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+void
+socantrcvmore_locked(so)
+	struct socket *so;
+{
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+	sorwakeup_locked(so);
 }
 
 /*
@@ -288,9 +405,10 @@
 sbwait(sb)
 	struct sockbuf *sb;
 {
+	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_flags |= SB_WAIT;
-	return (tsleep(&sb->sb_cc,
+	return (msleep(&sb->sb_cc, &sb->sb_mtx,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo));
 }
@@ -305,9 +423,11 @@
 {
 	int error;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
-		error = tsleep(&sb->sb_flags,
+		error = msleep(&sb->sb_flags, &sb->sb_mtx,
 		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
 		    "sblock", 0);
 		if (error)
@@ -318,9 +438,56 @@
 }
 
 /*
+ * The part of sowakeup that must be done while
+ * holding the sockbuf lock.
+ */
+static __inline void
+sowakeup_under_lock(struct socket *so, struct sockbuf *sb)
+{
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	selwakeuppri(&sb->sb_sel, PSOCK);
+	sb->sb_flags &= ~SB_SEL;
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup(&sb->sb_cc);
+	}
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ *
+ * The caller is assumed to hold the necessary
+ * sockbuf lock.
+ */
+void
+sowakeup_locked(so, sb)
+	register struct socket *so;
+	register struct sockbuf *sb;
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	sowakeup_under_lock(so, sb);
+
+	/* XXXRW: so_state locking? */
+	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
+		pgsigio(&so->so_sigio, SIGIO, 0);
+	if (sb->sb_flags & SB_UPCALL)
+		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
+	if (sb->sb_flags & SB_AIO)		/* XXX locking */
+		aio_swake(so, sb);
+	KNOTE(&sb->sb_sel.si_note, 0);
+}
+
+/*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
  * if the socket has the SS_ASYNC flag set.
+ *
+ * The caller does not hold the sockbuf lock.
  */
 void
 sowakeup(so, sb)
@@ -328,19 +495,30 @@
 	register struct sockbuf *sb;
 {
 
-	selwakeuppri(&sb->sb_sel, PSOCK);
-	sb->sb_flags &= ~SB_SEL;
-	if (sb->sb_flags & SB_WAIT) {
-		sb->sb_flags &= ~SB_WAIT;
-		wakeup(&sb->sb_cc);
-	}
+	SOCKBUF_LOCK(sb);
+	sowakeup_under_lock(so, sb);
+	SOCKBUF_UNLOCK(sb);
+
+	/* Unlocked read. */
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
+	/*
+	 * XXXRW: Need to hold a lock over so_upcall to prevent it from
+	 * changing while in process?
+	 */
 	if (sb->sb_flags & SB_UPCALL)
 		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
-	if (sb->sb_flags & SB_AIO)
+	if (sb->sb_flags & SB_AIO)		/* XXX locking */
 		aio_swake(so, sb);
+
+	/*
+	 * XXXRW: More efficient to do this with the sowakeup_under_lock()
+	 * code above to avoid multiple lock/unlock.  Does order relative
+	 * to so_upcall(), et al, matter?
+	 */
+	SOCKBUF_LOCK(sb);
 	KNOTE(&sb->sb_sel.si_note, 0);
+	SOCKBUF_UNLOCK(sb);
 }
 
 /*
@@ -380,18 +558,22 @@
 	register struct socket *so;
 	u_long sndcc, rcvcc;
 {
-	struct thread *td = curthread;
+	struct thread *td = curthread;		/* XXX */
 
 	if (sbreserve(&so->so_snd, sndcc, so, td) == 0)
 		goto bad;
 	if (sbreserve(&so->so_rcv, rcvcc, so, td) == 0)
 		goto bad2;
+	SOCKBUF_LOCK(&so->so_rcv);
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 bad2:
 	sbrelease(&so->so_snd, so);
@@ -436,6 +618,13 @@
 	/*
 	 * td will only be NULL when we're in an interrupt
 	 * (e.g. in tcp_input())
+	 *
+	 * XXXRW: This comment is true, but only because the caller passed
+	 * in NULL, not for the 4.x reason that there is no thread
+	 * available.  Need to be careful of callers that do this wrong;
+	 * I suspect many do it wrong, and therefore many socket buffers
+	 * end up with the wrong limits, especially via the soreserve()
+	 * path.
 	 */
 	if (cc > sb_max_adj)
 		return (0);
@@ -500,6 +689,8 @@
 {
 	struct mbuf *m = sb->sb_mb;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -519,6 +710,8 @@
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -541,6 +734,7 @@
 #endif /* SOCKBUF_DEBUG */
 
 #define SBLINKRECORD(sb, m0) do {					\
+	SOCKBUF_LOCK_ASSERT(sb);					\
 	if ((sb)->sb_lastrecord != NULL)				\
 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
 	else								\
@@ -555,7 +749,7 @@
  * discarded and mbufs are compacted where possible.
  */
 void
-sbappend(sb, m)
+sbappend_locked(sb, m)
 	struct sockbuf *sb;
 	struct mbuf *m;
 {
@@ -563,6 +757,9 @@
 
 	if (m == 0)
 		return;
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
@@ -570,7 +767,7 @@
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
-				sbappendrecord(sb, m); /* XXXXXX!!!! */
+				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
@@ -583,7 +780,7 @@
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
-					sbappendrecord(sb, m); /* XXXXXX!!!! */
+					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
@@ -600,13 +797,33 @@
 }
 
 /*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb.  The additional space associated
+ * the mbuf chain is recorded in sb.  Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+void
+sbappend(sb, m)
+	struct sockbuf *sb;
+	struct mbuf *m;
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		sbappend_locked(sb, m);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		sbappend_locked(sb, m);
+}
+
+/*
  * This version of sbappend() should only be used when the caller
  * absolutely knows that there will never be more than one record
  * in the socket buffer, that is, a stream protocol (such as TCP).
  */
 void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
 {
+	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@@ -619,6 +836,22 @@
 	SBLASTRECORDCHK(sb);
 }
 
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		sbappendstream_locked(sb, m);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		sbappendstream_locked(sb, m);
+}
+
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(sb)
@@ -628,6 +861,8 @@
 	struct mbuf *n = 0;
 	u_long len = 0, mbcnt = 0;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
@@ -650,12 +885,14 @@
  * begins a new record.
  */
 void
-sbappendrecord(sb, m0)
+sbappendrecord_locked(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	if (m0 == 0)
 		return;
 	m = sb->sb_mb;
@@ -683,18 +920,37 @@
 }
 
 /*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+void
+sbappendrecord(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		sbappendrecord_locked(sb, m0);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		sbappendrecord_locked(sb, m0);
+}
+
+/*
  * As above except that OOB data
  * is inserted at the beginning of the sockbuf,
  * but after any other OOB data.
  */
 void
-sbinsertoob(sb, m0)
+sbinsertoob_locked(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 	register struct mbuf **mp;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	if (m0 == 0)
 		return;
 	for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
@@ -729,13 +985,31 @@
 }
 
 /*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+void
+sbinsertoob(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		sbinsertoob_locked(sb, m0);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		sbinsertoob_locked(sb, m0);
+}
+
+/*
  * Append address and data, and optionally, control (ancillary) data
  * to the receive queue of a socket.  If present,
  * m0 must include a packet header with total length.
  * Returns 0 if no space in sockbuf or insufficient mbufs.
  */
 int
-sbappendaddr(sb, asa, m0, control)
+sbappendaddr_locked(sb, asa, m0, control)
 	struct sockbuf *sb;
 	const struct sockaddr *asa;
 	struct mbuf *m0, *control;
@@ -743,11 +1017,14 @@
 	struct mbuf *m, *n, *nlast;
 	int space = asa->sa_len;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	space += m_length(control, &n);
+
 	if (space > sbspace(sb))
 		return (0);
 #if MSIZE <= 256
@@ -769,25 +1046,50 @@
 	sballoc(sb, n);
 	nlast = n;
 	SBLINKRECORD(sb, m);
+	sb->sb_mbtail = nlast;
 
-	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb);
-
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket.  If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
 int
-sbappendcontrol(sb, m0, control)
+sbappendaddr(sb, asa, m0, control)
+	struct sockbuf *sb;
+	const struct sockaddr *asa;
+	struct mbuf *m0, *control;
+{
+	int retval;
+
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		retval = sbappendaddr_locked(sb, asa, m0, control);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		retval = sbappendaddr_locked(sb, asa, m0, control);
+	return (retval);
+}
+
+int
+sbappendcontrol_locked(sb, m0, control)
 	struct sockbuf *sb;
 	struct mbuf *control, *m0;
 {
 	struct mbuf *m, *n, *mlast;
 	int space;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	if (control == 0)
 		panic("sbappendcontrol");
 	space = m_length(control, &n) + m_length(m0, NULL);
+
 	if (space > sbspace(sb))
 		return (0);
 	n->m_next = m0;			/* concatenate data to control */
@@ -799,14 +1101,30 @@
 	sballoc(sb, m);
 	mlast = m;
 	SBLINKRECORD(sb, control);
+	sb->sb_mbtail = mlast;
 
-	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb);
+	SBLASTRECORDCHK(sb);
 
-	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
+int
+sbappendcontrol(sb, m0, control)
+	struct sockbuf *sb;
+	struct mbuf *control, *m0;
+{
+	int retval;
+
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		retval = sbappendcontrol_locked(sb, m0, control);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		retval = sbappendcontrol_locked(sb, m0, control);
+	return (retval);
+}
+
 /*
  * Compress mbuf chain m into the socket
  * buffer sb following mbuf n.  If n
@@ -820,6 +1138,8 @@
 	register int eor = 0;
 	register struct mbuf *o;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
@@ -876,6 +1196,8 @@
 	register struct sockbuf *sb;
 {
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	if (sb->sb_flags & SB_LOCK)
 		panic("sbflush: locked");
 	while (sb->sb_mbcnt) {
@@ -888,7 +1210,8 @@
 		sbdrop(sb, (int)sb->sb_cc);
 	}
 	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
-		panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+		panic("sbflush: cc %u || mb %p || mbcnt %u",
+			sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
 }
 
 /*
@@ -901,7 +1224,11 @@
 {
 	register struct mbuf *m;
 	struct mbuf *next;
+	int need_lock = !SOCKBUF_OWNED(sb);
 
+	if (need_lock)
+		SOCKBUF_LOCK(sb);
+
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	while (len > 0) {
 		if (m == 0) {
@@ -945,6 +1272,9 @@
 	} else if (m->m_nextpkt == NULL) {
 		sb->sb_lastrecord = m;
 	}
+
+	if (need_lock)
+		SOCKBUF_UNLOCK(sb);
 }
 
 /*
@@ -956,6 +1286,10 @@
 	register struct sockbuf *sb;
 {
 	register struct mbuf *m;
+	int need_lock = !SOCKBUF_OWNED(sb);
+
+	if (need_lock)
+		SOCKBUF_LOCK(sb);
 
 	m = sb->sb_mb;
 	if (m) {
@@ -966,6 +1300,9 @@
 		} while (m);
 	}
 	SB_EMPTY_FIXUP(sb);
+
+	if (need_lock)
+		SOCKBUF_UNLOCK(sb);
 }
 
 /*
@@ -1100,6 +1437,7 @@
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
+	/* Unlocked read. */
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
--- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c	2004/06/07 21:50:35
+++ //depot/user/rwatson/netperf/sys/kern/uipc_syscalls.c	2004/06/09 02:50:26
@@ -190,7 +190,9 @@
 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
 		goto done2;
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_bind(td->td_ucred, so, sa);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto done1;
 #endif
@@ -223,7 +225,9 @@
 	NET_LOCK_GIANT();
 	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
 #ifdef MAC
+		SOCK_LOCK(so);
 		error = mac_check_socket_listen(td->td_ucred, so);
+		SOCK_UNLOCK(so);
 		if (error)
 			goto done;
 #endif
@@ -274,6 +278,7 @@
 	error = fgetsock(td, uap->s, &head, &fflag);
 	if (error)
 		goto done2;
+	/* Unlocked read. */
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto done;
@@ -281,6 +286,10 @@
 	error = falloc(td, &nfp, &fd);
 	if (error)
 		goto done;
+	/*
+	 * Unlocked reads.
+	 * XXXRW: Dubious use of so->so_error.
+	 */
 	ACCEPT_LOCK();
 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
 		ACCEPT_UNLOCK();
@@ -288,7 +297,7 @@
 		goto noconnection;
 	}
 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
-		if (head->so_state & SS_CANTRCVMORE) {
+		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			head->so_error = ECONNABORTED;
 			break;
 		}
@@ -309,8 +318,15 @@
 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
 
+	/*
+	 * XXXRW: Before changing the flags on the socket, we have to bump
+	 * the reference count.  Otherwise, if the protocol calls sofree(),
+	 * the socket will be released due to a zero refcount.
+	 */
+	SOCK_LOCK(so);
 	soref(so);			/* file descriptor reference */
-	
+	SOCK_UNLOCK(so);
+
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 	so->so_qstate &= ~SQ_COMP;
@@ -321,6 +337,11 @@
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	td->td_retval[0] = fd;
 
+	/*
+	 * XXXRW: Might make life simpler to grab the socket buffer lock
+	 * here so it's held when KNOTE() calls back into the socket
+	 * code.
+	 */
 	/* connection has been removed from the listen queue */
 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
 
@@ -470,25 +491,32 @@
 	NET_LOCK_GIANT();
 	if ((error = fgetsock(td, fd, &so, NULL)) != 0)
 		goto done2;
+	/* XXXRW: so_state locking? */
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
 		goto done1;
 	}
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_connect(td->td_ucred, so, sa);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto bad;
 #endif
 	error = soconnect(so, sa, td);
 	if (error)
 		goto bad;
+	/* XXXRW: so_state locking? */
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EINPROGRESS;
 		goto done1;
 	}
 	s = splnet();
+	SOCK_LOCK(so);
+	/* XXXRW: so_state locking? */
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
+		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+		    "connec", 0);
 		if (error) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
@@ -499,8 +527,10 @@
 		error = so->so_error;
 		so->so_error = 0;
 	}
+	SOCK_UNLOCK(so);
 	splx(s);
 bad:
+	/* XXXRW: so_state locking? */
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
@@ -694,7 +724,9 @@
 		goto bad2;
 
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_send(td->td_ucred, so);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto bad;
 #endif
@@ -937,7 +969,9 @@
 	}
 
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_receive(td->td_ucred, so);
+	SOCK_UNLOCK(so);
 	if (error) {
 		fputsock(so);
 		NET_UNLOCK_GIANT();
@@ -1491,6 +1525,7 @@
 	NET_LOCK_GIANT();
 	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
 		goto done2;
+	/* XXXRW: so_state locking? */
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
 		goto done1;
@@ -1733,6 +1768,7 @@
 		error = EINVAL;
 		goto done;
 	}
+	/* XXXRW: so_state locking? */
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
 		goto done;
@@ -1743,7 +1779,9 @@
 	}
 
 #ifdef MAC
+	SOCK_LOCK(so);
 	error = mac_check_socket_send(td->td_ucred, so);
+	SOCK_UNLOCK(so);
 	if (error)
 		goto done;
 #endif
@@ -1782,7 +1820,9 @@
 	/*
 	 * Protect against multiple writers to the socket.
 	 */
+	SOCKBUF_LOCK(&so->so_snd);
 	(void) sblock(&so->so_snd, M_WAITOK);
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 	/*
 	 * Loop through the pages in the file, starting with the requested
@@ -1822,14 +1862,18 @@
 		 * Optimize the non-blocking case by looking at the socket space
 		 * before going to the extra work of constituting the sf_buf.
 		 */
+		SOCKBUF_LOCK(&so->so_snd);
+		/* XXXRW: so_state locking? */
 		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
-			if (so->so_state & SS_CANTSENDMORE)
+			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 				error = EPIPE;
 			else
 				error = EAGAIN;
 			sbunlock(&so->so_snd);
+			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		}
+		SOCKBUF_UNLOCK(&so->so_snd);
 		VM_OBJECT_LOCK(obj);
 		/*
 		 * Attempt to look up the page.
@@ -1885,6 +1929,7 @@
 			 * Get the page from backing store.
 			 */
 			bsize = vp->v_mount->mnt_stat.f_iosize;
+			mtx_lock(&Giant);	/* VFS */
 			vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
 			/*
 			 * XXXMAC: Because we don't have fp->f_cred here,
@@ -1896,6 +1941,7 @@
 			    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 			    td->td_ucred, NOCRED, &resid, td);
 			VOP_UNLOCK(vp, 0, td);
+			mtx_unlock(&Giant);	/* VFS */
 			if (error)
 				VM_OBJECT_LOCK(obj);
 			vm_page_lock_queues();
@@ -1917,7 +1963,9 @@
 			}
 			vm_page_unlock_queues();
 			VM_OBJECT_UNLOCK(obj);
+			SOCKBUF_LOCK(&so->so_snd);
 			sbunlock(&so->so_snd);
+			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		}
 		vm_page_unlock_queues();
@@ -1933,7 +1981,9 @@
 			if (pg->wire_count == 0 && pg->object == NULL)
 				vm_page_free(pg);
 			vm_page_unlock_queues();
+			SOCKBUF_LOCK(&so->so_snd);
 			sbunlock(&so->so_snd);
+			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EINTR;
 			goto done;
 		}
@@ -1970,6 +2020,7 @@
 		 * Add the buffer to the socket buffer chain.
 		 */
 		s = splnet();
+		SOCKBUF_LOCK(&so->so_snd);
 retry_space:
 		/*
 		 * Make sure that the socket is still able to take more data.
@@ -1982,8 +2033,9 @@
 		 * blocks before the pru_send (or more accurately, any blocking
 		 * results in a loop back to here to re-check).
 		 */
-		if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
-			if (so->so_state & SS_CANTSENDMORE) {
+		SOCKBUF_LOCK_ASSERT(&so->so_snd);
+		if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
+			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 				error = EPIPE;
 			} else {
 				error = so->so_error;
@@ -1991,6 +2043,7 @@
 			}
 			m_freem(m);
 			sbunlock(&so->so_snd);
+			SOCKBUF_UNLOCK(&so->so_snd);
 			splx(s);
 			goto done;
 		}
@@ -1999,10 +2052,12 @@
 		 * after checking the connection state above in order to avoid
 		 * a race condition with sbwait().
 		 */
+		/* XXXRW: so_state locking? */
 		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
 			if (so->so_state & SS_NBIO) {
 				m_freem(m);
 				sbunlock(&so->so_snd);
+				SOCKBUF_UNLOCK(&so->so_snd);
 				splx(s);
 				error = EAGAIN;
 				goto done;
@@ -2021,15 +2076,20 @@
 			}
 			goto retry_space;
 		}
+		SOCKBUF_UNLOCK(&so->so_snd);
 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
 		splx(s);
 		if (error) {
+			SOCKBUF_LOCK(&so->so_snd);
 			sbunlock(&so->so_snd);
+			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		}
 		headersent = 1;
 	}
+	SOCKBUF_LOCK(&so->so_snd);
 	sbunlock(&so->so_snd);
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 	/*
 	 * Send trailers. Wimp out and use writev(2).
--- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c	2004/06/04 04:10:43
+++ //depot/user/rwatson/netperf/sys/kern/uipc_usrreq.c	2004/06/09 02:51:49
@@ -81,6 +81,13 @@
 static const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
 static ino_t	unp_ino;		/* prototype for fake inode numbers */
 
+static struct mtx unp_mtx;
+#define	UNP_LOCK_INIT() \
+	mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
+#define	UNP_LOCK()		mtx_lock(&unp_mtx)
+#define	UNP_UNLOCK()		mtx_unlock(&unp_mtx)
+#define	UNP_LOCK_ASSERT()	mtx_assert(&unp_mtx, MA_OWNED)
+
 static int     unp_attach(struct socket *);
 static void    unp_detach(struct unpcb *);
 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
@@ -104,8 +111,10 @@
 
 	if (unp == NULL)
 		return (EINVAL);
+	UNP_LOCK();
 	unp_drop(unp, ECONNABORTED);
-	unp_detach(unp);
+	unp_detach(unp);	/* NB: unlocks */
+	SOCK_LOCK(so);
 	sotryfree(so);
 	return (0);
 }
@@ -114,6 +123,7 @@
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp = sotounpcb(so);
+	const struct sockaddr *sa;
 
 	if (unp == NULL)
 		return (EINVAL);
@@ -123,13 +133,14 @@
 	 * if it was bound and we are still connected
 	 * (our peer may have closed already!).
 	 */
-	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
-		*nam = sodupsockaddr(
-		    (struct sockaddr *)unp->unp_conn->unp_addr, M_WAITOK);
-	} else {
-		*nam = sodupsockaddr((const struct sockaddr *)&sun_noname,
-		    M_WAITOK);
-	}
+	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
+	UNP_LOCK();
+	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
+		sa = (struct sockaddr *) unp->unp_conn->unp_addr;
+	else
+		sa = &sun_noname;
+	bcopy(sa, *nam, sa->sa_len);
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -158,21 +169,29 @@
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct unpcb *unp = sotounpcb(so);
+	int retval;
 
 	if (unp == NULL)
 		return (EINVAL);
-	return (unp_connect(so, nam, curthread));
+	UNP_LOCK();
+	retval = unp_connect(so, nam, curthread);
+	UNP_UNLOCK();
+	return (retval);
 }
 
 int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp = sotounpcb(so1);
+	int retval;
 
 	if (unp == NULL)
 		return (EINVAL);
 
-	return (unp_connect2(so1, so2));
+	UNP_LOCK();
+	retval = unp_connect2(so1, so2);
+	UNP_UNLOCK();
+	return (retval);
 }
 
 /* control is EOPNOTSUPP */
@@ -185,7 +204,8 @@
 	if (unp == NULL)
 		return (EINVAL);
 
-	unp_detach(unp);
+	UNP_LOCK();
+	unp_detach(unp);	/* NB: unlocks unp */
 	return (0);
 }
 
@@ -196,7 +216,9 @@
 
 	if (unp == NULL)
 		return (EINVAL);
+	UNP_LOCK();
 	unp_disconnect(unp);
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -204,31 +226,38 @@
 uipc_listen(struct socket *so, struct thread *td)
 {
 	struct unpcb *unp = sotounpcb(so);
+	int retval;
 
 	if (unp == NULL || unp->unp_vnode == NULL)
 		return (EINVAL);
-	return (unp_listen(unp, td));
+	UNP_LOCK();
+	retval = unp_listen(unp, td);
+	UNP_UNLOCK();
+	return (retval);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp = sotounpcb(so);
+	const struct sockaddr *sa;
 
 	if (unp == NULL)
 		return (EINVAL);
-	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
-		*nam = sodupsockaddr(
-		    (struct sockaddr *)unp->unp_conn->unp_addr, M_WAITOK);
+	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
+	UNP_LOCK();
+	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
+		sa = (struct sockaddr *) unp->unp_conn->unp_addr;
 	else {
 		/*
 		 * XXX: It seems that this test always fails even when
 		 * connection is established.  So, this else clause is
 		 * added as workaround to return PF_LOCAL sockaddr.
 		 */
-		*nam = sodupsockaddr((const struct sockaddr *)&sun_noname,
-		    M_WAITOK);
+		sa = &sun_noname;
 	}
+	bcopy(sa, *nam, sa->sa_len);
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -241,6 +270,7 @@
 
 	if (unp == NULL)
 		return (EINVAL);
+	UNP_LOCK();
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		panic("uipc_rcvd DGRAM?");
@@ -250,6 +280,9 @@
 		if (unp->unp_conn == NULL)
 			break;
 		so2 = unp->unp_conn->unp_socket;
+		/* NB: careful of order here */
+		SOCKBUF_LOCK(&so2->so_snd);
+		SOCKBUF_LOCK(&so->so_rcv);
 		/*
 		 * Adjust backpressure on sender
 		 * and wakeup any waiting to write.
@@ -261,12 +294,15 @@
 		(void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
 		    newhiwat, RLIM_INFINITY);
 		unp->unp_cc = so->so_rcv.sb_cc;
-		sowwakeup(so2);
+		sowwakeup_locked(so2);
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCKBUF_UNLOCK(&so2->so_snd);
 		break;
 
 	default:
 		panic("uipc_rcvd unknown socktype");
 	}
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -293,6 +329,7 @@
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 
+	UNP_LOCK();
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
@@ -317,13 +354,14 @@
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
-		if (sbappendaddr(&so2->so_rcv, from, m, control)) {
-			sorwakeup(so2);
+		SOCKBUF_LOCK(&so2->so_rcv);
+		if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
+			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
-		} else {
+		} else
 			error = ENOBUFS;
-		}
+		SOCKBUF_UNLOCK(&so2->so_rcv);
 		if (nam != NULL)
 			unp_disconnect(unp);
 		break;
@@ -335,6 +373,7 @@
 		 * Note: A better implementation would complain
 		 * if not equal to the peer's address.
 		 */
+		/* Unlocked read. */
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				error = unp_connect(so, nam, td);
@@ -346,24 +385,25 @@
 			}
 		}
 
-		if (so->so_state & SS_CANTSENDMORE) {
+		/* Unlocked read. */
+		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			error = EPIPE;
 			break;
 		}
 		if (unp->unp_conn == NULL)
 			panic("uipc_send connected but no connection?");
 		so2 = unp->unp_conn->unp_socket;
+		SOCKBUF_LOCK(&so2->so_rcv);
 		/*
 		 * Send to paired receive port, and then reduce
 		 * send buffer hiwater marks to maintain backpressure.
 		 * Wake up readers.
 		 */
 		if (control != NULL) {
-			if (sbappendcontrol(&so2->so_rcv, m, control))
+			if (sbappendcontrol_locked(&so2->so_rcv, m, control))
 				control = NULL;
-		} else {
-			sbappend(&so2->so_rcv, m);
-		}
+		} else
+			sbappend_locked(&so2->so_rcv, m);
 		so->so_snd.sb_mbmax -=
 			so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
 		unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
@@ -372,7 +412,8 @@
 		(void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
 		    newhiwat, RLIM_INFINITY);
 		unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
-		sorwakeup(so2);
+		sorwakeup_locked(so2);
+		SOCKBUF_UNLOCK(&so2->so_rcv);
 		m = NULL;
 		break;
 
@@ -388,10 +429,10 @@
 		socantsendmore(so);
 		unp_shutdown(unp);
 	}
+	UNP_UNLOCK();
 
 	if (control != NULL && error != 0)
-		unp_dispose(control);
-
+		unp_dispose(control);		/* XXX need head lock? */
 release:
 	if (control != NULL)
 		m_freem(control);
@@ -408,15 +449,18 @@
 
 	if (unp == NULL)
 		return (EINVAL);
+	UNP_LOCK();
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
 		so2 = unp->unp_conn->unp_socket;
+		/* Unlocked read. */
 		sb->st_blksize += so2->so_rcv.sb_cc;
 	}
 	sb->st_dev = NOUDEV;
 	if (unp->unp_ino == 0)
 		unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
 	sb->st_ino = unp->unp_ino;
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -427,8 +471,10 @@
 
 	if (unp == NULL)
 		return (EINVAL);
+	UNP_LOCK();
 	socantsendmore(so);
 	unp_shutdown(unp);
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -436,15 +482,18 @@
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp = sotounpcb(so);
+	const struct sockaddr *sa;
 
 	if (unp == NULL)
 		return (EINVAL);
+	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
+	UNP_LOCK();
 	if (unp->unp_addr != NULL)
-		*nam = sodupsockaddr((struct sockaddr *)unp->unp_addr,
-		    M_WAITOK);
+		sa = (struct sockaddr *) unp->unp_addr;
 	else
-		*nam = sodupsockaddr((const struct sockaddr *)&sun_noname,
-		    M_WAITOK);
+		sa = &sun_noname;
+	bcopy(sa, *nam, sa->sa_len);
+	UNP_UNLOCK();
 	return (0);
 }
 
@@ -462,21 +511,26 @@
 	struct sockopt *sopt;
 {
 	struct unpcb *unp = sotounpcb(so);
+	struct xucred xu;
 	int error;
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
+			error = 0;
+			UNP_LOCK();
 			if (unp->unp_flags & UNP_HAVEPC)
-				error = sooptcopyout(sopt, &unp->unp_peercred,
-				    sizeof(unp->unp_peercred));
+				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
+			UNP_UNLOCK();
+			if (error == 0)
+				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 		default:
 			error = EOPNOTSUPP;
@@ -547,15 +601,22 @@
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_WAITOK);
+	/*
+	 * XXXRW: Why check result if M_WAITOK?
+	 */
 	if (unp == NULL)
 		return (ENOBUFS);
 	bzero(unp, sizeof *unp);
+	LIST_INIT(&unp->unp_refs);
+	unp->unp_socket = so;
+
+	UNP_LOCK();
 	unp->unp_gencnt = ++unp_gencnt;
 	unp_count++;
-	LIST_INIT(&unp->unp_refs);
-	unp->unp_socket = so;
 	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
 			 : &unp_shead, unp, unp_link);
+	UNP_UNLOCK();
+
 	so->so_pcb = unp;
 	return (0);
 }
@@ -564,18 +625,27 @@
 unp_detach(unp)
 	register struct unpcb *unp;
 {
+	struct vnode *vp;
+
+	UNP_LOCK_ASSERT();
+
 	LIST_REMOVE(unp, unp_link);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
-	if (unp->unp_vnode != NULL) {
+	if ((vp = unp->unp_vnode) != NULL) {
+		/*
+		 * XXXRW: should v_socket be frobbed only while holding
+		 * Giant?
+		 */
 		unp->unp_vnode->v_socket = NULL;
-		vrele(unp->unp_vnode);
 		unp->unp_vnode = NULL;
 	}
 	if (unp->unp_conn != NULL)
 		unp_disconnect(unp);
-	while (!LIST_EMPTY(&unp->unp_refs))
-		unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET);
+	while (!LIST_EMPTY(&unp->unp_refs)) {
+		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
+		unp_drop(ref, ECONNRESET);
+	}
 	soisdisconnected(unp->unp_socket);
 	unp->unp_socket->so_pcb = NULL;
 	if (unp_rights) {
@@ -591,7 +661,13 @@
 	}
 	if (unp->unp_addr != NULL)
 		FREE(unp->unp_addr, M_SONAME);
+	UNP_UNLOCK();
 	uma_zfree(unp_zone, unp);
+	if (vp) {
+		mtx_lock(&Giant);
+		vrele(vp);
+		mtx_unlock(&Giant);
+	}
 }
 
 static int
@@ -608,6 +684,12 @@
 	struct nameidata nd;
 	char *buf;
 
+	/*
+	 * XXXRW: This test-and-set of unp_vnode is non-atomic; the
+	 * unlocked read here is fine, but the value of unp_vnode needs
+	 * to be tested again after we do all the lookups to see if the
+	 * pcb is still unbound?
+	 */
 	if (unp->unp_vnode != NULL)
 		return (EINVAL);
 
@@ -618,15 +700,14 @@
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	strlcpy(buf, soun->sun_path, namelen + 1);
 
+	mtx_lock(&Giant);
 restart:
 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
 	    buf, td);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
-	if (error) {
-		free(buf, M_TEMP);
-		return (error);
-	}
+	if (error)
+		goto done;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -636,14 +717,12 @@
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
-			free(buf, M_TEMP);
-			return (EADDRINUSE);
+			error = EADDRINUSE;
+			goto done;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
-		if (error) {
-			free(buf, M_TEMP);
-			return (error);
-		}
+		if (error)
+			goto done;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
@@ -659,18 +738,22 @@
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
-	if (error) {
-		free(buf, M_TEMP);
-		return (error);
-	}
+	if (error)
+		goto done;
 	vp = nd.ni_vp;
+	ASSERT_VOP_LOCKED(vp, "unp_bind");
+	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
+	UNP_LOCK();
 	vp->v_socket = unp->unp_socket;
 	unp->unp_vnode = vp;
-	unp->unp_addr = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
+	unp->unp_addr = soun;
+	UNP_UNLOCK();
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
+done:
+	mtx_unlock(&Giant);
 	free(buf, M_TEMP);
-	return (0);
+	return (error);
 }
 
 static int
@@ -682,22 +765,32 @@
 	register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	register struct vnode *vp;
 	register struct socket *so2, *so3;
-	struct unpcb *unp, *unp2, *unp3;
+	struct unpcb *unp = sotounpcb(so);
+	struct unpcb *unp2, *unp3;
 	int error, len;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
+	struct sockaddr *sa;
 
+	UNP_LOCK_ASSERT();
+
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	strlcpy(buf, soun->sun_path, len + 1);
-
+	UNP_UNLOCK();
+	sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
+	mtx_lock(&Giant);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
 	error = namei(&nd);
 	if (error)
-		return (error);
-	vp = nd.ni_vp;
+		vp = NULL;
+	else
+		vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
+	if (error)
+		goto bad;
+
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
@@ -714,20 +807,33 @@
 		error = EPROTOTYPE;
 		goto bad;
 	}
+	mtx_unlock(&Giant);
+	UNP_LOCK();
 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
-		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
-		    (so3 = sonewconn(so2, 0)) == NULL) {
+		if (so2->so_options & SO_ACCEPTCONN) {
+			/*
+			 * NB: drop locks here so unp_attach is entered
+			 *     w/o locks; this avoids a recursive lock
+			 *     of the head and holding sleep locks across
+			 *     a (potentially) blocking malloc.
+			 */
+			UNP_UNLOCK();
+			so3 = sonewconn(so2, 0);
+			UNP_LOCK();
+		} else
+			so3 = NULL;
+		if (so3 == NULL) {
 			error = ECONNREFUSED;
-			goto bad;
+			goto bad2;
 		}
 		unp = sotounpcb(so);
 		unp2 = sotounpcb(so2);
 		unp3 = sotounpcb(so3);
-		if (unp2->unp_addr != NULL)
-			unp3->unp_addr = (struct sockaddr_un *)
-			    sodupsockaddr((struct sockaddr *)unp2->unp_addr,
-			    M_WAITOK);
-
+		if (unp2->unp_addr != NULL) {
+			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
+			unp3->unp_addr = (struct sockaddr_un *) sa;
+			sa = NULL;
+		}
 		/*
 		 * unp_peercred management:
 		 *
@@ -750,15 +856,24 @@
 		    sizeof(unp->unp_peercred));
 		unp->unp_flags |= UNP_HAVEPC;
 #ifdef MAC
+		SOCK_LOCK(so);
 		mac_set_socket_peer_from_socket(so, so3);
 		mac_set_socket_peer_from_socket(so3, so);
+		SOCK_UNLOCK(so);
 #endif
 
 		so2 = so3;
 	}
 	error = unp_connect2(so, so2);
+bad2:
+	UNP_UNLOCK();
+	mtx_lock(&Giant);
 bad:
-	vput(vp);
+	if (vp != NULL)
+		vput(vp);
+	mtx_unlock(&Giant);
+	free(sa, M_SONAME);
+	UNP_LOCK();
 	return (error);
 }
 
@@ -770,6 +885,8 @@
 	register struct unpcb *unp = sotounpcb(so);
 	register struct unpcb *unp2;
 
+	UNP_LOCK_ASSERT();
+
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
 	unp2 = sotounpcb(so2);
@@ -799,6 +916,8 @@
 {
 	register struct unpcb *unp2 = unp->unp_conn;
 
+	UNP_LOCK_ASSERT();
+
 	if (unp2 == NULL)
 		return;
 	unp->unp_conn = NULL;
@@ -806,6 +925,7 @@
 
 	case SOCK_DGRAM:
 		LIST_REMOVE(unp, unp_reflink);
+		/* XXXRW: so_state locking? */
 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
 		break;
 
@@ -815,6 +935,7 @@
 		soisdisconnected(unp2->unp_socket);
 		break;
 	}
+	return;
 }
 
 #ifdef notdef
@@ -857,8 +978,10 @@
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
+	UNP_LOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
+	UNP_UNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
@@ -872,6 +995,7 @@
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
+	UNP_LOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		if (unp->unp_gencnt <= gencnt) {
@@ -881,6 +1005,7 @@
 			unp_list[i++] = unp;
 		}
 	}
+	UNP_UNLOCK();
 	n = i;			/* in case we lost some during malloc */
 
 	error = 0;
@@ -939,6 +1064,8 @@
 {
 	struct socket *so;
 
+	UNP_LOCK_ASSERT();
+
 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 	    (so = unp->unp_conn->unp_socket))
 		socantrcvmore(so);
@@ -951,6 +1078,8 @@
 {
 	struct socket *so = unp->unp_socket;
 
+	UNP_LOCK_ASSERT();
+
 	so->so_error = errno;
 	unp_disconnect(unp);
 }
@@ -1102,6 +1231,8 @@
 	uma_zone_set_max(unp_zone, nmbclusters);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
+
+	UNP_LOCK_INIT();
 }
 
 static int
@@ -1258,6 +1389,12 @@
 	struct file **extra_ref, **fpp;
 	int nunref, i;
 
+	UNP_LOCK_ASSERT();
+
+	/*
+	 * XXXRW: unp_gcing seems like a bad idea.  Use a real lock
+	 * instead?
+	 */
 	if (unp_gcing)
 		return;
 	unp_gcing = 1;
@@ -1266,6 +1403,10 @@
 	 * before going through all this, set all FDs to
 	 * be NOT defered and NOT externally accessible
 	 */
+	/*
+	 * XXXRW: Acquiring a sleep lock while holding UNP
+	 * mutex cannot be a good thing.
+	 */
 	sx_slock(&filelist_lock);
 	LIST_FOREACH(fp, &filehead, f_list)
 		fp->f_gcflag &= ~(FMARK|FDEFER);
@@ -1348,7 +1489,9 @@
 			 * message buffers. Follow those links and mark them
 			 * as accessible too.
 			 */
+			SOCKBUF_LOCK(&so->so_rcv);
 			unp_scan(so->so_rcv.sb_mb, unp_mark);
+			SOCKBUF_UNLOCK(&so->so_rcv);
 		}
 	} while (unp_defer);
 	sx_sunlock(&filelist_lock);
@@ -1452,7 +1595,11 @@
 	struct unpcb *unp;
 	struct thread *td;
 {
+	UNP_LOCK_ASSERT();
 
+	/*
+	 * XXXRW: Why populate the local peer cred with our own credential?
+	 */
 	cru2x(td->td_ucred, &unp->unp_peercred);
 	unp->unp_flags |= UNP_HAVEPCCACHED;
 	return (0);
--- //depot/vendor/freebsd/src/sys/kern/vfs_aio.c	2004/05/30 20:35:40
+++ //depot/user/rwatson/netperf/sys/kern/vfs_aio.c	2004/05/31 05:13:02
@@ -566,8 +566,12 @@
 			so = fp->f_data;
 			TAILQ_REMOVE(&so->so_aiojobq, aiocbe, list);
 			if (TAILQ_EMPTY(&so->so_aiojobq)) {
+				SOCKBUF_LOCK(&so->so_snd);
 				so->so_snd.sb_flags &= ~SB_AIO;
+				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCKBUF_LOCK(&so->so_rcv);
 				so->so_rcv.sb_flags &= ~SB_AIO;
+				SOCKBUF_UNLOCK(&so->so_rcv);
 			}
 		}
 		TAILQ_REMOVE(&ki->kaio_sockqueue, aiocbe, plist);
@@ -1231,10 +1235,14 @@
 
 	if (sb == &so->so_snd) {
 		opcode = LIO_WRITE;
+		SOCKBUF_LOCK(&so->so_snd);
 		so->so_snd.sb_flags &= ~SB_AIO;
+		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
 		opcode = LIO_READ;
+		SOCKBUF_LOCK(&so->so_rcv);
 		so->so_rcv.sb_flags &= ~SB_AIO;
+		SOCKBUF_UNLOCK(&so->so_rcv);
 	}
 
 	for (cb = TAILQ_FIRST(&so->so_aiojobq); cb; cb = cbn) {
@@ -1443,10 +1451,15 @@
 		    LIO_WRITE) && (!sowriteable(so)))) {
 			TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list);
 			TAILQ_INSERT_TAIL(&ki->kaio_sockqueue, aiocbe, plist);
-			if (opcode == LIO_READ)
+			if (opcode == LIO_READ) {
+				SOCKBUF_LOCK(&so->so_rcv);
 				so->so_rcv.sb_flags |= SB_AIO;
-			else
+				SOCKBUF_UNLOCK(&so->so_rcv);
+			} else {
+				SOCKBUF_LOCK(&so->so_snd);
 				so->so_snd.sb_flags |= SB_AIO;
+				SOCKBUF_UNLOCK(&so->so_snd);
+			}
 			aiocbe->jobstate = JOBST_JOBQGLOBAL; /* XXX */
 			ki->kaio_queue_count++;
 			num_queue_count++;
--- //depot/vendor/freebsd/src/sys/net/bpf.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/net/bpf.c	2004/04/08 03:11:34
@@ -553,7 +553,7 @@
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int error;
-	static struct sockaddr dst;
+	struct sockaddr dst;
 	int datlen;
 
 	if (d->bd_bif == 0)
@@ -564,6 +564,7 @@
 	if (uio->uio_resid == 0)
 		return (0);
 
+	bzero(&dst, sizeof(dst));
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
 	if (error)
 		return (error);
@@ -579,12 +580,10 @@
 	mac_create_mbuf_from_bpfdesc(d, m);
 	BPFD_UNLOCK(d);
 #endif
-	mtx_lock(&Giant);
+	/* NB: the driver frees the mbuf */
+	NET_LOCK_GIANT();
 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
-	mtx_unlock(&Giant);
-	/*
-	 * The driver frees the mbuf.
-	 */
+	NET_UNLOCK_GIANT();
 	return (error);
 }
 
--- //depot/vendor/freebsd/src/sys/net/if.c	2004/04/24 22:25:43
+++ //depot/user/rwatson/netperf/sys/net/if.c	2004/05/04 02:32:28
@@ -367,6 +367,10 @@
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 
+	/*
+	 * XXXRW: Shouldn't we add to the global list only once the ifnet
+	 * is ready for use?
+	 */
 	IF_AFDATA_LOCK_INIT(ifp);
 	ifp->if_afdata_initialized = 0;
 	IFNET_WLOCK();
@@ -657,6 +661,7 @@
 
 /*
  * Create a clone network interface.
+ * XXXRW: Locking?
  */
 int
 if_clone_create(char *name, int len)
@@ -729,6 +734,7 @@
 
 /*
  * Destroy a clone network interface.
+ * XXXRW: Locking?
  */
 int
 if_clone_destroy(const char *name)
@@ -766,6 +772,7 @@
 
 /*
  * Look up a network interface cloner.
+ * XXXRW: Locking?
  */
 static struct if_clone *
 if_clone_lookup(const char *name, int *unitp)
@@ -807,6 +814,7 @@
 
 /*
  * Register a network interface cloner.
+ * XXXRW: Locking?
  */
 void
 if_clone_attach(struct if_clone *ifc)
@@ -849,6 +857,7 @@
 
 /*
  * Unregister a network interface cloner.
+ * XXXRW: Locking?
  */
 void
 if_clone_detach(struct if_clone *ifc)
@@ -861,6 +870,7 @@
 
 /*
  * Provide list of interface cloners to userspace.
+ * XXXRW: Locking?
  */
 static int
 if_clone_list(struct if_clonereq *ifcr)
--- //depot/vendor/freebsd/src/sys/net/if_gif.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/net/if_gif.c	2004/05/31 01:41:33
@@ -88,6 +88,10 @@
  * gif_mtx protects the global gif_softc_list.
  * XXX: Per-softc locking is still required.
  */
+/*
+ * XXXRW: Note that gif_mtx only protects global gif-related data, not
+ * per-softc data.  See also netinet/in_gif.c for locking needs.
+ */
 static struct mtx gif_mtx;
 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static LIST_HEAD(, gif_softc) gif_softc_list;
@@ -500,6 +504,9 @@
 }
 
 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
+/*
+ * XXXRW: per-gif softc locking required.
+ */
 int
 gif_ioctl(ifp, cmd, data)
 	struct ifnet *ifp;
@@ -756,8 +763,9 @@
 	int s;
 	int error = 0; 
 
-	s = splnet();
-
+	/*
+	 * XXXRW: per-gif softc locking required.
+	 */
 	mtx_lock(&gif_mtx);
 	LIST_FOREACH(sc2, &gif_softc_list, gif_list) {
 		if (sc2 == sc)
@@ -786,6 +794,9 @@
 	}
 	mtx_unlock(&gif_mtx);
 
+	/*
+	 * XXXRW: Lock gif softc fields.
+	 */
 	/* XXX we can detach from both, but be polite just in case */
 	if (sc->gif_psrc)
 		switch (sc->gif_psrc->sa_family) {
--- //depot/vendor/freebsd/src/sys/net/if_gre.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/net/if_gre.c	2004/05/31 01:41:33
@@ -94,7 +94,8 @@
 
 /*
  * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
+ *
+ * XXXRW: It does not protect softc-specific data.
  */
 struct mtx gre_mtx;
 static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
--- //depot/vendor/freebsd/src/sys/net/if_gre.h	2004/03/22 16:06:54
+++ //depot/user/rwatson/netperf/sys/net/if_gre.h	2004/03/22 16:18:59
@@ -54,6 +54,12 @@
 	WCCP_V2
 } wccp_ver_t;
 
+/*
+ * XXXRW: softc fields need locking.
+ *
+ * XXXRW: gre's notion of a 'called' count is not MP-safe, as it assumes
+ * only one packet can be processed at a time.
+ */
 struct gre_softc {
 	struct ifnet sc_if;
 	LIST_ENTRY(gre_softc) sc_list;
--- //depot/vendor/freebsd/src/sys/net/if_sl.c	2004/06/07 20:45:32
+++ //depot/user/rwatson/netperf/sys/net/if_sl.c	2004/06/09 02:50:26
@@ -162,6 +162,7 @@
 #define	ABT_WINDOW	(ABT_COUNT*2+2)	/* in seconds - time to count */
 
 static LIST_HEAD(sl_list, sl_softc) sl_list;
+static struct mtx slip_mtx;
 
 #define FRAME_END	 	0xc0		/* Frame End */
 #define FRAME_ESCAPE		0xdb		/* Frame Esc */
@@ -198,6 +199,7 @@
 { 
 	switch (type) { 
 	case MOD_LOAD: 
+		mtx_init(&slip_mtx, "slip_mtx", NULL, MTX_DEF);
 		ldisc_register(SLIPDISC, &slipdisc);
 		LIST_INIT(&sl_list);
 		break; 
@@ -217,6 +219,7 @@
 
 DECLARE_MODULE(if_sl, sl_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
+/* Locked using slip_mtx. */
 static int *st_unit_list;
 static size_t st_unit_max = 0;
 
@@ -225,6 +228,7 @@
 {
 	struct sl_softc *nc;
 
+	mtx_assert(&slip_mtx, MA_OWNED);
 	LIST_FOREACH(nc, &sl_list, sl_next) {
 		if (nc->sc_if.if_dunit == unit)
 			return (0);
@@ -238,6 +242,7 @@
 {
 	size_t i;
 
+	mtx_assert(&slip_mtx, MA_OWNED);
 	for (i = 0; i < st_unit_max; i++)
 		if (st_unit_list[i] == unit)
 			return 1;
@@ -250,6 +255,7 @@
 {
 	int *t;
 
+	mtx_assert(&slip_mtx, MA_OWNED);
 	if (slisstatic(unit))
 		return;
 
@@ -312,10 +318,12 @@
 	sc->sc_if.if_linkmib = sc;
 	sc->sc_if.if_linkmiblen = sizeof *sc;
 	mtx_init(&sc->sc_fastq.ifq_mtx, "sl_fastq", NULL, MTX_DEF);
+	mtx_init(&sc->sc_mtx, "slip sc_mtx", NULL, MTX_DEF);
 
 	/*
 	 * Find a suitable unit number.
 	 */
+	mtx_lock(&slip_mtx);
 	for (unit=0; ; unit++) {
 		if (slisstatic(unit))
 			continue;
@@ -325,6 +333,7 @@
 	}
 	if_initname(&sc->sc_if, "sl", unit);
 	LIST_INSERT_HEAD(&sl_list, sc, sl_next);
+	mtx_unlock(&slip_mtx);
 
 	if_attach(&sc->sc_if);
 	bpfattach(&sc->sc_if, DLT_SLIP, SLIP_HDRLEN);
@@ -387,10 +396,20 @@
 static void
 sldestroy(struct sl_softc *sc)
 {
+
+	/*
+	 * XXXRW: Slight race here: we may detach bpf/if before we
+	 * attach.  This appears to be a property of the unit selection
+	 * process, which might be better handled by the interface
+	 * cloning subsystem?
+	 */
 	bpfdetach(&sc->sc_if);
 	if_detach(&sc->sc_if);
+	mtx_lock(&slip_mtx);
 	LIST_REMOVE(sc, sl_next);
+	mtx_unlock(&slip_mtx);
 	m_free(sc->sc_mbuf);
+	mtx_destroy(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_fastq.ifq_mtx);
 	if (sc->bpfbuf)
 		free(sc->bpfbuf, M_SL);
@@ -420,6 +439,10 @@
 	tp->t_line = 0;
 	sc = (struct sl_softc *)tp->t_sc;
 	if (sc != NULL) {
+		/*
+		 * XXXRW: tear-down race between timeout and slclose()?
+		 */
+		mtx_lock(&sc->sc_mtx);
 		if (sc->sc_outfill) {
 			sc->sc_outfill = 0;
 			untimeout(sl_outfill, sc, sc->sc_ofhandle);
@@ -428,6 +451,7 @@
 			sc->sc_keepalive = 0;
 			untimeout(sl_keepalive, sc, sc->sc_kahandle);
 		}
+		mtx_unlock(&sc->sc_mtx);
 		if_down(&sc->sc_if);
 		sc->sc_ttyp = NULL;
 		tp->t_sc = NULL;
@@ -465,12 +489,21 @@
 			splx(s);
 			return (ENXIO);
 		}
+		/*
+		 * XXXRW: we hold the mutex over all of this to protect
+		 * the unit change and global list consistency.  However,
+		 * some of these functions probably sleep, making this
+		 * wrong.  If we have to support renumbering, we probably
+		 * need a way to reserve both numbers to prevent them
+		 * from being reused during the change, or a way to sleep
+		 * waiting for a change to end (i.e., a CV).
+		 */
+		mtx_lock(&slip_mtx);
 		if (sc->sc_if.if_dunit != unit) {
 			if (!slisunitfree(unit)) {
-				splx(s);
+				mtx_unlock(&slip_mtx);
 				return (ENXIO);
 			}
-
 			wasup = sc->sc_if.if_flags & IFF_UP;
 			bpfdetach(&sc->sc_if);
 			if_detach(&sc->sc_if);
@@ -488,9 +521,11 @@
 			    SLIP_HIWAT + 2 * sc->sc_if.if_mtu + 1);
 		}
 		slmarkstatic(unit);
+		mtx_unlock(&slip_mtx);
 		break;
 
 	case SLIOCSKEEPAL:
+		mtx_lock(&sc->sc_mtx);
 		sc->sc_keepalive = *(u_int *)data * hz;
 		if (sc->sc_keepalive) {
 			sc->sc_flags |= SC_KEEPALIVE;
@@ -502,6 +537,7 @@
 				sc->sc_flags &= ~SC_KEEPALIVE;
 			}
 		}
+		mtx_unlock(&sc->sc_mtx);
 		break;
 
 	case SLIOCGKEEPAL:
@@ -509,6 +545,7 @@
 		break;
 
 	case SLIOCSOUTFILL:
+		mtx_lock(&sc->sc_mtx);
 		sc->sc_outfill = *(u_int *)data * hz;
 		if (sc->sc_outfill) {
 			sc->sc_flags |= SC_OUTWAIT;
@@ -520,9 +557,11 @@
 				sc->sc_flags &= ~SC_OUTWAIT;
 			}
 		}
+		mtx_unlock(&sc->sc_mtx);
 		break;
 
 	case SLIOCGOUTFILL:
+		/* Unlocked read. */
 		*(int *)data = sc->sc_outfill / hz;
 		break;
 
@@ -615,8 +654,11 @@
 		(*tp->t_oproc)(tp);
 
 		if (tp->t_outq.c_cc != 0) {
-			if (sc != NULL)
+			if (sc != NULL) {
+				mtx_lock(&sc->sc_mtx);
 				sc->sc_flags &= ~SC_OUTWAIT;
+				mtx_unlock(&sc->sc_mtx);
+			}
 			if (tp->t_outq.c_cc > SLIP_HIWAT)
 				return 0;
 		}
@@ -646,6 +688,7 @@
 		 * queueing, and the connection id compression will get
 		 * munged when this happens.
 		 */
+		mtx_lock(&sc->sc_mtx);
 		if (sc->sc_if.if_bpf) {
 			/*
 			 * We need to save the TCP/IP header before it's
@@ -676,9 +719,10 @@
 		}
 		ip = mtod(m, struct ip *);
 		if (ip->ip_v == IPVERSION && ip->ip_p == IPPROTO_TCP) {
-			if (sc->sc_if.if_flags & SC_COMPRESS)
+			if (sc->sc_if.if_flags & SC_COMPRESS) {
 				*mtod(m, u_char *) |= sl_compress_tcp(m, ip,
 				    &sc->sc_comp, 1);
+			}
 		}
 		if (sc->sc_if.if_bpf && sc->bpfbuf) {
 			/*
@@ -690,6 +734,7 @@
 			bcopy(mtod(m, caddr_t), &sc->bpfbuf[SLX_CHDR], CHDR_LEN);
 			BPF_TAP(&sc->sc_if, sc->bpfbuf, len + SLIP_HDRLEN);
 		}
+		mtx_unlock(&sc->sc_mtx);
 
 		/*
 		 * If system is getting low on clists, just flush our
@@ -705,7 +750,9 @@
 			continue;
 		}
 
+		mtx_lock(&sc->sc_mtx);
 		sc->sc_flags &= ~SC_OUTWAIT;
+		mtx_unlock(&sc->sc_mtx);
 		/*
 		 * The extra FRAME_END will start up a new packet, and thus
 		 * will flush any accumulated garbage.  We do this whenever
@@ -795,6 +842,8 @@
 {
 	struct mbuf *m, *newm;
 
+	mtx_assert(&sc->sc_mtx, MA_OWNED);
+
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
@@ -850,13 +899,16 @@
 	if (sc == NULL)
 		return 0;
 	if (c & TTY_ERRORMASK || (tp->t_state & TS_CONNECTED) == 0) {
+		mtx_lock(&sc->sc_mtx);
 		sc->sc_flags |= SC_ERROR;
+		mtx_unlock(&sc->sc_mtx);
 		return 0;
 	}
 	c &= TTY_CHARMASK;
 
 	++sc->sc_if.if_ibytes;
 
+	mtx_lock(&sc->sc_mtx);
 	if (sc->sc_if.if_flags & IFF_DEBUG) {
 		if (c == ABT_ESC) {
 			/*
@@ -875,6 +927,7 @@
 					sc->sc_starttime = time_second;
 				if (sc->sc_abortcount >= ABT_COUNT) {
 					slclose(tp,0);
+					mtx_unlock(&sc->sc_mtx);
 					return 0;
 				}
 			}
@@ -897,6 +950,7 @@
 
 	case FRAME_ESCAPE:
 		sc->sc_escape = 1;
+		mtx_unlock(&sc->sc_mtx);
 		return 0;
 
 	case FRAME_END:
@@ -981,6 +1035,7 @@
 	if (sc->sc_mp < sc->sc_ep) {
 		*sc->sc_mp++ = c;
 		sc->sc_escape = 0;
+		mtx_unlock(&sc->sc_mtx);
 		return 0;
 	}
 
@@ -992,6 +1047,7 @@
 newpack:
 	sc->sc_mp = sc->sc_buf = sc->sc_ep - SLRMAX;
 	sc->sc_escape = 0;
+	mtx_unlock(&sc->sc_mtx);
 	return 0;
 }
 
@@ -1075,6 +1131,7 @@
 {
 	struct sl_softc *sc = chan;
 
+	mtx_lock(&sc->sc_mtx);
 	if (sc->sc_keepalive) {
 		if (sc->sc_flags & SC_KEEPALIVE) {
 			if (sc->sc_ttyp->t_pgrp != NULL) {
@@ -1088,6 +1145,7 @@
 	} else {
 		sc->sc_flags &= ~SC_KEEPALIVE;
 	}
+	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
@@ -1098,6 +1156,7 @@
 	register struct tty *tp = sc->sc_ttyp;
 	int s;
 
+	mtx_lock(&sc->sc_mtx);
 	if (sc->sc_outfill && tp != NULL) {
 		if (sc->sc_flags & SC_OUTWAIT) {
 			s = splimp ();
@@ -1111,4 +1170,5 @@
 	} else {
 		sc->sc_flags &= ~SC_OUTWAIT;
 	}
+	mtx_unlock(&sc->sc_mtx);
 }
--- //depot/vendor/freebsd/src/sys/net/if_slvar.h	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/net/if_slvar.h	2004/04/08 03:11:34
@@ -34,13 +34,16 @@
 #ifndef _NET_IF_SLVAR_H_
 #define _NET_IF_SLVAR_H_
 
+#include <sys/_mutex.h>
 #include <sys/callout.h>
 
 /*
  * Definitions for SLIP interface data structures
  *
  * (This exists so programs like slstats can get at the definition
- *  of sl_softc.)
+ *  of sl_softc.)  Fields owned by the SLIP subsystem are protected
+ * using sc_mtx, with the exception of sc_next, which is protected
+ * by the global slip_mtx.
  */
 struct sl_softc {
 	struct	ifnet sc_if;		/* network-visible interface */
@@ -66,6 +69,7 @@
 	struct	slcompress sc_comp;	/* tcp compression data */
 	LIST_ENTRY(sl_softc) sl_next;
 	u_char	*bpfbuf;		/* hang buffer for bpf here */
+	struct	mtx sc_mtx;
 };
 
 /* internal flags */
--- //depot/vendor/freebsd/src/sys/net/if_spppsubr.c	2004/05/25 21:55:56
+++ //depot/user/rwatson/netperf/sys/net/if_spppsubr.c	2004/05/31 01:41:33
@@ -92,12 +92,8 @@
 #include <net/if_sppp.h>
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
-# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg, handle)
-# define TIMEOUT(fun, arg1, arg2, handle) handle = timeout(fun, arg1, arg2)
 # define IOCTL_CMD_T	u_long
 #else
-# define UNTIMEOUT(fun, arg, handle) untimeout(fun, arg)
-# define TIMEOUT(fun, arg1, arg2, handle) timeout(fun, arg1, arg2)
 # define IOCTL_CMD_T	int
 #endif
 
@@ -259,10 +255,11 @@
 	void	(*scr)(struct sppp *sp);
 };
 
+struct mtx sppp_mtx;
+MTX_SYSINIT(sppp_mtx, &sppp_mtx, "sppp_mtx", MTX_DEF);
+
 static struct sppp *spppq;
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
-static struct callout_handle keepalive_ch;
-#endif
+static struct callout keepalive_callout;
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
 #define	SPP_FMT		"%s%d: "
@@ -960,13 +957,18 @@
 {
 	struct sppp *sp = (struct sppp*) ifp;
 
+	mtx_lock(&sppp_mtx);
 	/* Initialize keepalive handler. */
-	if (spppq == NULL)
-		TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch);
+	if (spppq == NULL) {
+		callout_init(&keepalive_callout, 0);
+		callout_reset(&keepalive_callout, hz * 10, sppp_keepalive,
+		    NULL); 
+	}
 
 	/* Insert new entry into the keepalive list. */
 	sp->pp_next = spppq;
 	spppq = sp;
+	mtx_unlock(&sppp_mtx);
 
 	sp->pp_if.if_mtu = PP_MTU;
 	sp->pp_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
@@ -1012,6 +1014,7 @@
 	struct sppp **q, *p, *sp = (struct sppp*) ifp;
 	int i;
 
+	mtx_lock(&sppp_mtx);
 	/* Remove the entry from the keepalive list. */
 	for (q = &spppq; (p = *q); q = &p->pp_next)
 		if (p == sp) {
@@ -1021,11 +1024,12 @@
 
 	/* Stop keepalive handler. */
 	if (spppq == NULL)
-		UNTIMEOUT(sppp_keepalive, 0, keepalive_ch);
+		callout_stop(&keepalive_callout);
+	mtx_unlock(&sppp_mtx);
 
 	for (i = 0; i < IDX_COUNT; i++)
-		UNTIMEOUT((cps[i])->TO, (void *)sp, sp->ch[i]);
-	UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
+		untimeout((cps[i])->TO, (void *)sp, sp->ch[i]);
+	untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
 	mtx_destroy(&sp->pp_cpq.ifq_mtx);
 	mtx_destroy(&sp->pp_fastq.ifq_mtx);
 }
@@ -2004,8 +2008,8 @@
 		case STATE_STOPPING:
 			sppp_cp_send(sp, cp->proto, TERM_REQ,
 				     ++sp->pp_seq[cp->protoidx], 0, 0);
-			TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout,
-			    sp->ch[cp->protoidx]);
+			sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp,
+			    sp->lcp.timeout);
 			break;
 		case STATE_REQ_SENT:
 		case STATE_ACK_RCVD:
@@ -2015,8 +2019,8 @@
 			break;
 		case STATE_ACK_SENT:
 			(cp->scr)(sp);
-			TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout,
-			    sp->ch[cp->protoidx]);
+			sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp,
+			    sp->lcp.timeout);
 			break;
 		}
 
@@ -2032,7 +2036,7 @@
 {
 	sp->state[cp->protoidx] = newstate;
 
-	UNTIMEOUT(cp->TO, (void *)sp, sp->ch[cp->protoidx]);
+	untimeout(cp->TO, (void *)sp, sp->ch[cp->protoidx]);
 	switch (newstate) {
 	case STATE_INITIAL:
 	case STATE_STARTING:
@@ -2045,8 +2049,8 @@
 	case STATE_REQ_SENT:
 	case STATE_ACK_RCVD:
 	case STATE_ACK_SENT:
-		TIMEOUT(cp->TO, (void *)sp, sp->lcp.timeout,
-		    sp->ch[cp->protoidx]);
+		sp->ch[cp->protoidx] = timeout(cp->TO, (void *)sp,
+		    sp->lcp.timeout);
 		break;
 	}
 }
@@ -4142,7 +4146,7 @@
 		 * a number between 300 and 810 seconds.
 		 */
 		i = 300 + ((unsigned)(random() & 0xff00) >> 7);
-		TIMEOUT(chap.TO, (void *)sp, i * hz, sp->ch[IDX_CHAP]);
+		sp->ch[IDX_CHAP] = timeout(chap.TO, (void *)sp, i * hz);
 	}
 
 	if (debug) {
@@ -4186,7 +4190,7 @@
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp));
-	UNTIMEOUT(chap.TO, (void *)sp, sp->ch[IDX_CHAP]);
+	untimeout(chap.TO, (void *)sp, sp->ch[IDX_CHAP]);
 	sp->lcp.protos &= ~(1 << IDX_CHAP);
 
 	lcp.Close(sp);
@@ -4322,7 +4326,7 @@
 
 	/* ack and nak are his authproto */
 	case PAP_ACK:
-		UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
+		untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "pap success",
 			    SPP_ARGS(ifp));
@@ -4351,7 +4355,7 @@
 		break;
 
 	case PAP_NAK:
-		UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
+		untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
 		if (debug) {
 			log(LOG_INFO, SPP_FMT "pap failure",
 			    SPP_ARGS(ifp));
@@ -4408,8 +4412,8 @@
 	if (sp->myauth.proto == PPP_PAP) {
 		/* we are peer, send a request, and start a timer */
 		pap.scr(sp);
-		TIMEOUT(sppp_pap_my_TO, (void *)sp, sp->lcp.timeout,
-		    sp->pap_my_to_ch);
+		sp->pap_my_to_ch = timeout(sppp_pap_my_TO, (void *)sp,
+		    sp->lcp.timeout);
 	}
 }
 
@@ -4512,8 +4516,8 @@
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp));
-	UNTIMEOUT(pap.TO, (void *)sp, sp->ch[IDX_PAP]);
-	UNTIMEOUT(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
+	untimeout(pap.TO, (void *)sp, sp->ch[IDX_PAP]);
+	untimeout(sppp_pap_my_TO, (void *)sp, sp->pap_my_to_ch);
 	sp->lcp.protos &= ~(1 << IDX_PAP);
 
 	lcp.Close(sp);
@@ -4640,7 +4644,12 @@
 	struct sppp *sp;
 	int s;
 
+	/*
+	 * XXXRW: It would be nice to avoid calling all this stuff while
+	 * holding sppp_mtx, or we risk lock order reversals.
+	 */
 	s = splimp();
+	mtx_lock(&sppp_mtx);
 	for (sp=spppq; sp; sp=sp->pp_next) {
 		struct ifnet *ifp = &sp->pp_if;
 
@@ -4679,8 +4688,9 @@
 				sp->lcp.echoid, 4, &nmagic);
 		}
 	}
+	callout_reset(&keepalive_callout, hz * 10, sppp_keepalive, NULL);
+	mtx_unlock(&sppp_mtx);
 	splx(s);
-	TIMEOUT(sppp_keepalive, 0, hz * 10, keepalive_ch);
 }
 
 /*
--- //depot/vendor/freebsd/src/sys/net/if_stf.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/net/if_stf.c	2004/05/31 01:41:33
@@ -137,13 +137,11 @@
 #define sc_ro	__sc_ro46.__sc_ro4
 	const struct encaptab *encap_cookie;
 	LIST_ENTRY(stf_softc) sc_list;	/* all stf's are linked */
+	struct mtx	sc_mtx;		/* protect sc_ro */
 };
 
 /*
  * All mutable global variables in if_stf.c are protected by stf_mtx.
- * XXXRW: Note that mutable fields in the softc are not currently locked:
- * in particular, sc_ro needs to be protected from concurrent entrance
- * of stf_output().
  */
 static struct mtx stf_mtx;
 static LIST_HEAD(, stf_softc) stf_softc_list;
@@ -199,6 +197,7 @@
 		free(sc, M_STF);
 		return (ENOMEM);
 	}
+	mtx_init(&sc->sc_mtx, "stf sc_mtx", NULL, MTX_DEF);
 
 	ifp->if_mtu    = IPV6_MMTU;
 	ifp->if_ioctl  = stf_ioctl;
@@ -223,6 +222,7 @@
 	bpfdetach(&sc->sc_if);
 	if_detach(&sc->sc_if);
 
+	mtx_destroy(&sc->sc_mtx);
 	free(sc, M_STF);
 }
 
@@ -394,9 +394,10 @@
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia6;
-#ifdef MAC
+	struct route ro;
 	int error;
 
+#ifdef MAC
 	error = mac_check_ifnet_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
@@ -498,9 +499,7 @@
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
-	/*
-	 * XXXRW: Locking of sc_ro required.
-	 */
+	mtx_lock(&sc->sc_mtx);
 	dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
 	if (dst4->sin_family != AF_INET ||
 	    bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
@@ -519,12 +518,21 @@
 		if (sc->sc_ro.ro_rt == NULL) {
 			m_freem(m);
 			ifp->if_oerrors++;
+			mtx_unlock(&sc->sc_mtx);
 			return ENETUNREACH;
 		}
 	}
 
+	/*
+	 * XXXRW: Holding mutex over call to ip_output(): potential lock
+	 * order issue?  Hard to resolve cleanly with the current route
+	 * caching model, as we have to synchronize access to shared softc
+	 * state.
+	 */
 	ifp->if_opackets++;
-	return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL);
+	error = ip_output(m, NULL, &ro, 0, NULL, NULL);
+	mtx_unlock(&sc->sc_mtx);
+	return (error);
 }
 
 static int
--- //depot/vendor/freebsd/src/sys/net/if_tap.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/net/if_tap.c	2004/05/31 01:41:33
@@ -113,6 +113,10 @@
  * All global variables in if_tap.c are locked with tapmtx, with the
  * exception of tapdebug, which is accessed unlocked; tapclones is
  * static at runtime.
+ *
+ * XXXRW: si_flags appears not to be protected from concurrent access,
+ * and is written at run-time.
+ * XXXRW: si_drv1 is also used for test-and-set, and isn't synchronized.
  */
 static struct mtx		tapmtx;
 static int			tapdebug = 0;        /* debug flag   */
@@ -162,6 +166,7 @@
 		 * The EBUSY algorithm here can't quite atomically
 		 * guarantee that this is race-free since we have to
 		 * release the tap mtx to deregister the clone handler.
+		 * XXXRW: is this true?
 		 */
 		mtx_lock(&tapmtx);
 		SLIST_FOREACH(tp, &taphead, tap_next) {
@@ -693,6 +698,7 @@
 
 		case SIOCSIFADDR:	/* set MAC address of the remote side */
 			mtx_lock(&tp->tap_mtx);
+			/* XXXRW: Does this actually do anything? */
 			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
 			mtx_unlock(&tp->tap_mtx);
 			break;
@@ -747,6 +753,7 @@
 			if (flag & IO_NDELAY)
 				return (EWOULDBLOCK);
 
+			/* This looks like a wanna-be condition variable. */
 			mtx_lock(&tp->tap_mtx);
 			tp->tap_flags |= TAP_RWAIT;
 			mtx_unlock(&tp->tap_mtx);
--- //depot/vendor/freebsd/src/sys/net/if_tun.c	2004/03/29 22:20:33
+++ //depot/user/rwatson/netperf/sys/net/if_tun.c	2004/03/30 00:17:24
@@ -59,6 +59,12 @@
  * tun_list is protected by global tunmtx.  Other mutable fields are
  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
  * static for the duration of a tunnel interface.
+ *
+ * XXXRW: we allocate si_drv1 for the dev_t on demand, rather than when
+ * the dev_t is instantiated.  Nothing serializes the test/set of that
+ * field.
+ *
+ * XXXRW: what serializes access to si_flags?
  */
 struct tun_softc {
 	TAILQ_ENTRY(tun_softc)	tun_list;
@@ -121,6 +127,9 @@
 static d_ioctl_t	tunioctl;
 static d_poll_t		tunpoll;
 
+/*
+ * XXXRW: can remove D_NEEDGIANT?  Probably not because of si_drv1 for now.
+ */
 static struct cdevsw tun_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_PSEUDO | D_NEEDGIANT,
@@ -364,6 +373,9 @@
 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
 	getmicrotime(&ifp->if_lastchange);
 
+	/*
+	 * XXXRW: interface locking.
+	 */
 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
 	     ifa = TAILQ_NEXT(ifa, ifa_link)) {
 		if (ifa->ifa_addr == NULL)
--- //depot/vendor/freebsd/src/sys/net/raw_cb.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/net/raw_cb.c	2004/04/23 01:08:25
@@ -32,7 +32,9 @@
 
 #include <sys/param.h>
 #include <sys/domain.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -49,10 +51,11 @@
  *	redo address binding to allow wildcards
  */
 
+struct mtx rawcb_mtx;
 struct rawcb_list_head rawcb_list;
 
-static u_long	raw_sendspace = RAWSNDQ;
-static u_long	raw_recvspace = RAWRCVQ;
+static const u_long	raw_sendspace = RAWSNDQ;
+static const u_long	raw_recvspace = RAWRCVQ;
 
 /*
  * Allocate a control block and a nominal amount
@@ -79,7 +82,9 @@
 	rp->rcb_socket = so;
 	rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
 	rp->rcb_proto.sp_protocol = proto;
+	mtx_lock(&rawcb_mtx);
 	LIST_INSERT_HEAD(&rawcb_list, rp, list);
+	mtx_unlock(&rawcb_mtx);
 	return (0);
 }
 
@@ -93,6 +98,7 @@
 {
 	struct socket *so = rp->rcb_socket;
 
+	SOCK_LOCK(so);
 	so->so_pcb = 0;
 	sotryfree(so);
 	LIST_REMOVE(rp, list);
@@ -117,6 +123,7 @@
 		m_freem(dtom(rp->rcb_faddr));
 	rp->rcb_faddr = 0;
 #endif
+	/* Unlocked read. */
 	if (rp->rcb_socket->so_state & SS_NOFDREF)
 		raw_detach(rp);
 }
--- //depot/vendor/freebsd/src/sys/net/raw_cb.h	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/net/raw_cb.h	2004/04/08 03:11:34
@@ -57,6 +57,7 @@
 
 #ifdef _KERNEL
 extern LIST_HEAD(rawcb_list_head, rawcb) rawcb_list;
+extern struct mtx rawcb_mtx;
 
 /* protosw entries */
 pr_ctlinput_t	raw_ctlinput;
--- //depot/vendor/freebsd/src/sys/net/raw_usrreq.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/net/raw_usrreq.c	2004/05/29 04:16:19
@@ -31,9 +31,12 @@
  */
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
@@ -43,12 +46,15 @@
 
 #include <net/raw_cb.h>
 
+MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
+
 /*
  * Initialize raw connection block q.
  */
 void
 raw_init()
 {
+
 	LIST_INIT(&rawcb_list);
 }
 
@@ -71,7 +77,12 @@
 	register struct mbuf *m = m0;
 	struct socket *last;
 
+	/*
+	 * XXXRW: Potential lock order issues due to holding the
+	 * rawcb_mtx across all this stuff.  Need to revisit.
+	 */
 	last = 0;
+	mtx_lock(&rawcb_mtx);
 	LIST_FOREACH(rp, &rawcb_list, list) {
 		if (rp->rcb_proto.sp_family != proto->sp_family)
 			continue;
@@ -116,6 +127,7 @@
 		}
 	} else
 		m_freem(m);
+	mtx_unlock(&rawcb_mtx);
 }
 
 /*ARGSUSED*/
@@ -139,8 +151,8 @@
 	if (rp == 0)
 		return EINVAL;
 	raw_disconnect(rp);
+	soisdisconnected(so);
 	sotryfree(so);
-	soisdisconnected(so);	/* XXX huh? called after the sofree()? */
 	return 0;
 }
 
--- //depot/vendor/freebsd/src/sys/net/rtsock.c	2004/06/09 02:50:37
+++ //depot/user/rwatson/netperf/sys/net/rtsock.c	2004/06/09 03:06:49
@@ -54,10 +54,18 @@
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
+/*
+ * XXXRW: It would be really nice to add const to these, but that may
+ * not be possible due to where they are passed in.  We might need
+ * to const-poison a whole boatload of APIs...?
+ */
 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
+/*
+ * XXXRW: These fields are locked by RTSOCK_LOCK().
+ */
 static struct {
 	int	ip_count;	/* attacked w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
--- //depot/vendor/freebsd/src/sys/netatalk/aarp.c	2004/04/25 09:25:44
+++ //depot/user/rwatson/netperf/sys/netatalk/aarp.c	2004/05/05 03:19:17
@@ -63,6 +63,9 @@
 #define AARPT_KILLC	20
 #define AARPT_KILLI	3
 
+/*
+ * XXXRW: wot?
+ */
 # if !defined(__FreeBSD__)
 extern u_char			etherbroadcastaddr[6];
 # endif /* __FreeBSD__ */
@@ -72,7 +75,7 @@
 };
 
 /*
- * Not used?
+ * XXXRW: unused?
  */
 u_char	at_org_code[ 3 ] = {
     0x08, 0x00, 0x07,
@@ -81,6 +84,9 @@
     0x00, 0x00, 0x00,
 };
 
+/*
+ * XXXRW: Make use callouts, not timeouts.
+ */
 static struct callout_handle aarptimer_ch =
     CALLOUT_HANDLE_INITIALIZER(&aarptimer_ch);
 
@@ -639,6 +645,9 @@
     struct aarptab	*aat;
     int			i;
 
+    /*
+     * XXXRW: Should grab mutex before untimeout?
+     */
     untimeout(aarptimer, 0, aarptimer_ch);
     AARPTAB_LOCK();
     for (i = 0, aat = aarptab; i < AARPTAB_SIZE; i++, aat++) {
--- //depot/vendor/freebsd/src/sys/netatalk/at_control.c	2004/03/22 04:56:26
+++ //depot/user/rwatson/netperf/sys/netatalk/at_control.c	2004/03/22 05:14:09
@@ -21,6 +21,9 @@
 #include <netatalk/at_var.h>
 #include <netatalk/at_extern.h>
 
+/*
+ * XXXRW: Requires synchronization.
+ */
 struct at_ifaddr	*at_ifaddr_list;
 
 static int aa_dorangeroute(struct ifaddr *ifa,
--- //depot/vendor/freebsd/src/sys/netatalk/at_rmx.c	2004/03/22 04:01:33
+++ //depot/user/rwatson/netperf/sys/netatalk/at_rmx.c	2004/03/22 04:08:45
@@ -40,11 +40,23 @@
 
 int at_inithead(void **head, int off);
 
-static char hexbuf[256];
+/*
+ * XXXRW: hexdump was a static global variable, but I moved it into the
+ * stack rather than stick a mutex around it.  256 bytes is smaller than
+ * it used to be, but this still might be a problem.  Needs to be
+ * revisited.  Should probably just use the new hexdump(9).
+ *
+ * XXXRW: All this appears to be present just so as to printf debugging
+ * information.  Assuming that this code is known to work, we could just
+ * scrap all this.  In fact, this code isn't even used as it stands, it's
+ * here for debugging purposes only and requires modifications to
+ * at_proto.c.
+ */
 
 static char *
 prsockaddr(void *v)
 {
+	static char hexbuf[256];
 	char *bp = &hexbuf[0];
 	u_char *cp = v;
 
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_input.c	2004/03/22 04:56:26
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_input.c	2004/05/25 01:53:13
@@ -29,6 +29,12 @@
 static volatile int	ddp_forward = 1;
 static volatile int	ddp_firewall = 0;
 static struct ddpstat	ddpstat;
+
+/*
+ * XXXRW: If we're going to keep this cached route data, we'll need to lock it
+ * down, and change later function-local use of it to grab an extra reference
+ * after deciding it is useful.
+ */
 static struct route	forwro;
 
 static void     ddp_input(struct mbuf *, struct ifnet *, struct elaphdr *, int);
@@ -366,10 +372,13 @@
     }
 
 #ifdef MAC
+    SOCK_LOCK(ddp->ddp_socket);
     if (mac_check_socket_deliver(ddp->ddp_socket, m) != 0) {
+        SOCK_UNLOCK(ddp->ddp_socket);
 	m_freem(m);
 	return;
     }
+    SOCK_UNLOCK(ddp->ddp_socket);
 #endif
 
     /* 
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_output.c	2004/03/22 04:56:26
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_output.c	2004/05/25 01:53:13
@@ -52,7 +52,9 @@
     struct ddpcb *ddp = sotoddpcb(so);
 
 #ifdef MAC
+    SOCK_LOCK(so);
     mac_create_mbuf_from_socket(so, m);
+    SOCK_UNLOCK(so);
 #endif
 
     M_PREPEND(m, sizeof(struct ddpehdr), M_TRYWAIT);
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_pcb.c	2004/03/22 04:56:26
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_pcb.c	2004/03/22 13:14:14
@@ -22,12 +22,18 @@
 #include <netatalk/ddp_pcb.h>
 #include <netatalk/at_extern.h>
 
+struct mtx		 ddp_list_mtx;
 static struct ddpcb	*ddp_ports[ ATPORT_LAST ];
-struct ddpcb	*ddpcb_list = NULL;
+struct ddpcb		*ddpcb_list = NULL;
 
 void
 at_sockaddr(struct ddpcb *ddp, struct sockaddr **addr)
 {
+
+    /*
+     * Prevent modification of ddp during copy of addr.
+     */
+    DDP_LOCK_ASSERT(ddp);
     *addr = sodupsockaddr((struct sockaddr *)&ddp->ddp_lsat, M_NOWAIT);
 }
 
@@ -38,6 +44,12 @@
     struct at_ifaddr	*aa;
     struct ddpcb	*ddpp;
 
+    /*
+     * We read and write both the ddp passed in, and also ddp_ports.
+     */
+    DDP_LIST_XLOCK_ASSERT();
+    DDP_LOCK_ASSERT(ddp);
+
     if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) { /* shouldn't be bound */
 	return (EINVAL);
     }
@@ -134,6 +146,9 @@
     struct ifnet	*ifp;
     u_short		hintnet = 0, net;
 
+    DDP_LIST_XLOCK_ASSERT();
+    DDP_LOCK_ASSERT(ddp);
+
     if (sat->sat_family != AF_APPLETALK) {
 	return (EAFNOSUPPORT);
     }
@@ -222,6 +237,9 @@
 void 
 at_pcbdisconnect(struct ddpcb	*ddp)
 {
+
+    DDP_LOCK_ASSERT(ddp);
+
     ddp->ddp_fsat.sat_addr.s_net = ATADDR_ANYNET;
     ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE;
     ddp->ddp_fsat.sat_port = ATADDR_ANYPORT;
@@ -233,8 +251,17 @@
 	struct ddpcb	*ddp;
 
 	MALLOC(ddp, struct ddpcb *, sizeof *ddp, M_PCB, M_WAITOK | M_ZERO);
+	DDP_LOCK_INIT(ddp);
 	ddp->ddp_lsat.sat_port = ATADDR_ANYPORT;
 
+	/*
+	 * XXXRW: Is this unlocked assignment payer for socket and
+	 * back-pointer something that needs to be protected?
+	 */
+	ddp->ddp_socket = so;
+	so->so_pcb = (caddr_t)ddp;
+
+	DDP_LIST_XLOCK();
 	ddp->ddp_next = ddpcb_list;
 	ddp->ddp_prev = NULL;
 	ddp->ddp_pprev = NULL;
@@ -243,15 +270,21 @@
 		ddpcb_list->ddp_prev = ddp;
 	}
 	ddpcb_list = ddp;
+	DDP_LIST_XUNLOCK();
 
-	ddp->ddp_socket = so;
-	so->so_pcb = (caddr_t)ddp;
-	return (0);
+	return(0);
 }
 
 void
 at_pcbdetach(struct socket *so, struct ddpcb *ddp)
 {
+
+    /*
+     * We modify ddp, ddp_ports, and the global list.
+     */
+    DDP_LIST_XLOCK_ASSERT();
+    DDP_LOCK_ASSERT(ddp);
+
     soisdisconnected(so);
     so->so_pcb = NULL;
     sotryfree(so);
@@ -281,6 +314,8 @@
     if (ddp->ddp_next) {
 	ddp->ddp_next->ddp_prev = ddp->ddp_prev;
     }
+    DDP_UNLOCK(ddp);
+    DDP_LOCK_DESTROY(ddp);
     FREE(ddp, M_PCB);
 }
 
@@ -296,6 +331,8 @@
 {
     struct ddpcb	*ddp;
 
+    DDP_LIST_SLOCK_ASSERT();
+
     /*
      * Check for bad ports.
      */
@@ -308,11 +345,13 @@
      * the interface?
      */
     for (ddp = ddp_ports[ to->sat_port - 1 ]; ddp; ddp = ddp->ddp_pnext) {
+	DDP_LOCK(ddp);
 	/* XXX should we handle 0.YY? */
 
 	/* XXXX.YY to socket on destination interface */
 	if (to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net &&
 		to->sat_addr.s_node == ddp->ddp_lsat.sat_addr.s_node) {
+	    DDP_UNLOCK(ddp);
 	    break;
 	}
 
@@ -320,6 +359,7 @@
 	if (to->sat_addr.s_node == ATADDR_BCAST && (to->sat_addr.s_net == 0 ||
 		to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net) &&
 		ddp->ddp_lsat.sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) {
+	    DDP_UNLOCK(ddp);
 	    break;
 	}
 
@@ -330,8 +370,10 @@
 		ntohs(aa->aa_firstnet) &&
 		ntohs(ddp->ddp_lsat.sat_addr.s_net) <=
 		ntohs(aa->aa_lastnet)) {
+	    DDP_UNLOCK(ddp);
 	    break;
 	}
+	DDP_UNLOCK(ddp);
     }
     return (ddp);
 }
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_pcb.h	2004/03/19 07:25:31
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_pcb.h	2004/03/22 04:20:48
@@ -17,4 +17,23 @@
 	    struct thread *td);
 void	at_sockaddr(struct ddpcb *ddp, struct sockaddr **addr);
 
+/* Lock macros for per-pcb locks. */
+#define	DDP_LOCK_INIT(ddp)	mtx_init(&(ddp)->ddp_mtx, "ddp_mtx",	\
+				    NULL, MTX_DEF)
+#define	DDP_LOCK_DESTROY(ddp)	mtx_destroy(&(ddp)->ddp_mtx)
+#define	DDP_LOCK(ddp)		mtx_lock(&(ddp)->ddp_mtx)
+#define	DDP_UNLOCK(ddp)		mtx_unlock(&(ddp)->ddp_mtx)
+#define	DDP_LOCK_ASSERT(ddp)	mtx_assert(&(ddp)->ddp_mtx, MA_OWNED)
+
+/* Lock macros for global pcb list lock. */
+#define	DDP_LIST_LOCK_INIT()	mtx_init(&ddp_list_mtx, "ddp_list_mtx",	\
+				    NULL, MTX_DEF)
+#define	DDP_LIST_LOCK_DESTROY()	mtx_destroy(&ddp_list_mtx)
+#define	DDP_LIST_XLOCK()	mtx_lock(&ddp_list_mtx)
+#define	DDP_LIST_XUNLOCK()	mtx_unlock(&ddp_list_mtx)
+#define	DDP_LIST_XLOCK_ASSERT()	mtx_assert(&ddp_list_mtx, MA_OWNED)
+#define	DDP_LIST_SLOCK()	mtx_lock(&ddp_list_mtx)
+#define	DDP_LIST_SUNLOCK()	mtx_unlock(&ddp_list_mtx)
+#define	DDP_LIST_SLOCK_ASSERT()	mtx_assert(&ddp_list_mtx, MA_OWNED)
+
 #endif
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_usrreq.c	2004/05/05 03:36:28
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_usrreq.c	2004/05/23 16:56:02
@@ -22,6 +22,9 @@
 #include <netatalk/ddp_pcb.h>
 #include <netatalk/at_extern.h>
 
+/*
+ * XXXRW: These structures are currently not mutable.
+ */
 static u_long	ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */
 static u_long	ddp_recvspace = 10 * (587 + sizeof(struct sockaddr_at));
 
@@ -32,36 +35,38 @@
 {
 	struct ddpcb	*ddp;
 	int		error = 0;
-	int		s;
 	
+	ddp = sotoddpcb(so);
+	if (ddp != NULL)
+		return (EINVAL);
 
-	ddp = sotoddpcb(so);
-	if (ddp != NULL) {
-	    return (EINVAL);
-	}
+	/*
+	 * Allocate socket buffer space first so that it's present
+	 * before first use.
+	 */
+	error = soreserve(so, ddp_sendspace, ddp_recvspace);
+	if (error)
+		return (error);
 
-	s = splnet();
+	DDP_LIST_XLOCK();
 	error = at_pcballoc(so);
-	splx(s);
-	if (error) {
-	    return (error);
-	}
-	return (soreserve(so, ddp_sendspace, ddp_recvspace));
+	DDP_LIST_XUNLOCK();
+	return (error);
 }
 
 static int
 ddp_detach(struct socket *so)
 {
 	struct ddpcb	*ddp;
-	int		s;
 	
 	ddp = sotoddpcb(so);
-	if (ddp == NULL) {
+	if (ddp == NULL)
 	    return (EINVAL);
-	}
-	s = splnet();
+
+	DDP_LIST_XLOCK();
+	DDP_LOCK(ddp);
 	at_pcbdetach(so, ddp);
-	splx(s);
+	DDP_LIST_XUNLOCK();
 	return (0);
 }
 
@@ -70,15 +75,16 @@
 {
 	struct ddpcb	*ddp;
 	int		error = 0;
-	int		s;
 	
 	ddp = sotoddpcb(so);
 	if (ddp == NULL) {
 	    return (EINVAL);
 	}
-	s = splnet();
+	DDP_LIST_XLOCK();
+	DDP_LOCK(ddp);
 	error = at_pcbsetaddr(ddp, nam, td);
-	splx(s);
+	DDP_UNLOCK(ddp);
+	DDP_LIST_XUNLOCK();
 	return (error);
 }
     
@@ -87,20 +93,23 @@
 {
 	struct ddpcb	*ddp;
 	int		error = 0;
-	int		s;
 	
 	ddp = sotoddpcb(so);
 	if (ddp == NULL) {
 	    return (EINVAL);
 	}
 
+	DDP_LIST_XLOCK();
+	DDP_LOCK(ddp);
 	if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) {
+	    DDP_UNLOCK(ddp);
+	    DDP_LIST_XUNLOCK();
 	    return (EISCONN);
 	}
 
-	s = splnet();
-	error = at_pcbconnect(ddp, nam, td);
-	splx(s);
+	error = at_pcbconnect( ddp, nam, td );
+	DDP_UNLOCK(ddp);
+	DDP_LIST_XUNLOCK();
 	if (error == 0)
 	    soisconnected(so);
 	return (error);
@@ -111,20 +120,20 @@
 {
 
 	struct ddpcb	*ddp;
-	int		s;
 	
 	ddp = sotoddpcb(so);
 	if (ddp == NULL) {
 	    return (EINVAL);
 	}
+	DDP_LOCK(ddp);
 	if (ddp->ddp_fsat.sat_addr.s_node == ATADDR_ANYNODE) {
+	    DDP_UNLOCK(ddp);
 	    return (ENOTCONN);
 	}
 
-	s = splnet();
 	at_pcbdisconnect(ddp);
 	ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE;
-	splx(s);
+	DDP_UNLOCK(ddp);
 	soisdisconnected(so);
 	return (0);
 }
@@ -142,13 +151,19 @@
 	return (0);
 }
 
+/*
+ * XXXRW: If an explicit address is specified, then we temporarily change
+ * the address on the pcb for sending.  This is inefficient because it
+ * requires us to perform global rather than pcb-local operations.  It
+ * may also create a race if other users of the socket are simultaneously
+ * sending.
+ */
 static int
 ddp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
             struct mbuf *control, struct thread *td)
 {
 	struct ddpcb	*ddp;
 	int		error = 0;
-	int		s;
 	
 	ddp = sotoddpcb(so);
 	if (ddp == NULL) {
@@ -160,28 +175,29 @@
     	}
 
 	if (addr != NULL) {
+		DDP_LIST_XLOCK();
+		DDP_LOCK(ddp);
 		if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) {
-			return (EISCONN);
+			error = EISCONN;
+			goto out;
 		}
 
-		s = splnet();
 		error = at_pcbconnect(ddp, addr, td);
-		splx(s);
-		if (error) {
-			return (error);
+		if (error == 0) {
+			error = ddp_output(m, so);
+			at_pcbdisconnect(ddp);
 		}
+out:
+		DDP_UNLOCK(ddp);
+		DDP_LIST_XUNLOCK();
 	} else {
-		if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT) {
-			return (ENOTCONN);
-		}
+		DDP_LOCK(ddp);
+		if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT)
+			error = ENOTCONN;
+		else
+			error = ddp_output(m, so);
+		DDP_UNLOCK(ddp);
 	}
-
-	s = splnet();
-	error = ddp_output(m, so);
-	if (addr != NULL) {
-	    at_pcbdisconnect(ddp);
-	}
-	splx(s);
 	return (error);
 }
 
@@ -189,28 +205,28 @@
 ddp_abort(struct socket *so)
 {
 	struct ddpcb	*ddp;
-	int		s;
 	
 	ddp = sotoddpcb(so);
 	if (ddp == NULL) {
 		return (EINVAL);
 	}
-	s = splnet();
+	DDP_LIST_XLOCK();
+	DDP_LOCK(ddp);
 	at_pcbdetach(so, ddp);
-	splx(s);
+	DDP_LIST_XUNLOCK();
 	return (0);
 }
 
 void 
 ddp_init(void)
 {
-
 	atintrq1.ifq_maxlen = IFQ_MAXLEN;
 	atintrq2.ifq_maxlen = IFQ_MAXLEN;
 	aarpintrq.ifq_maxlen = IFQ_MAXLEN;
 	mtx_init(&atintrq1.ifq_mtx, "at1_inq", NULL, MTX_DEF);
 	mtx_init(&atintrq2.ifq_mtx, "at2_inq", NULL, MTX_DEF);
 	mtx_init(&aarpintrq.ifq_mtx, "aarp_inq", NULL, MTX_DEF);
+	DDP_LIST_LOCK_INIT();
 	netisr_register(NETISR_ATALK1, at1intr, &atintrq1, 0);
 	netisr_register(NETISR_ATALK2, at2intr, &atintrq2, 0);
 	netisr_register(NETISR_AARP, aarpintr, &aarpintrq, 0);
@@ -225,6 +241,7 @@
     for (ddp = ddpcb_list; ddp != NULL; ddp = ddp->ddp_next) {
 	at_pcbdetach(ddp->ddp_socket, ddp);
     }
+    DDP_LIST_LOCK_DESTROY();
 }
 #endif
 
@@ -243,7 +260,9 @@
 	if (ddp == NULL) {
 	    return (EINVAL);
 	}
+	DDP_LOCK(ddp);
 	at_sockaddr(ddp, nam);
+	DDP_UNLOCK(ddp);
 	return (0);
 }
 
--- //depot/vendor/freebsd/src/sys/netatalk/ddp_var.h	2004/03/22 04:56:26
+++ //depot/user/rwatson/netperf/sys/netatalk/ddp_var.h	2004/03/22 05:14:09
@@ -13,6 +13,7 @@
     struct socket	*ddp_socket;
     struct ddpcb	*ddp_prev, *ddp_next;
     struct ddpcb	*ddp_pprev, *ddp_pnext;
+    struct mtx		 ddp_mtx;
 };
 
 #define sotoddpcb(so)	((struct ddpcb *)(so)->so_pcb)
@@ -34,5 +35,6 @@
 extern int	ddp_cksum;
 extern struct ddpcb		*ddpcb_list;
 extern struct pr_usrreqs	ddp_usrreqs;
+extern struct mtx		 ddp_list_mtx;
 #endif
 #endif /* _NETATALK_DDP_VAR_H_ */
--- //depot/vendor/freebsd/src/sys/netatm/atm_aal5.c	2003/11/18 00:40:43
+++ //depot/user/rwatson/netperf/sys/netatm/atm_aal5.c	2004/05/30 18:22:04
@@ -767,7 +767,7 @@
 	 * that there's room in the socket buffer
 	 */
 	if (((so->so_state & SS_ISCONNECTED) == 0) ||
-	    (so->so_state & SS_CANTRCVMORE) ||
+	    (so->so_rcv.sb_state & SBS_CANTRCVMORE) ||
 	    (len > sbspace(&so->so_rcv))) {
 		atm_sock_stat.as_indrop[atp->atp_type]++;
 		KB_FREEALL(m);
--- //depot/vendor/freebsd/src/sys/netatm/atm_socket.c	2003/10/31 18:36:05
+++ //depot/user/rwatson/netperf/sys/netatm/atm_socket.c	2004/02/28 22:29:37
@@ -173,6 +173,7 @@
 	/*
 	 * Break links and free control blocks
 	 */
+	SOCK_LOCK(so);
 	so->so_pcb = NULL;
 	sotryfree(so);
 
--- //depot/vendor/freebsd/src/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	2004/06/02 04:20:47
+++ //depot/user/rwatson/netperf/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	2004/06/02 04:53:21
@@ -996,8 +996,12 @@
 			/* Close L2CAP socket */
 			s->l2so->so_upcallarg = NULL;
 			s->l2so->so_upcall = NULL;
+			SOCKBUF_LOCK(&s->l2so->so_rcv);
 			s->l2so->so_rcv.sb_flags &= ~SB_UPCALL;
+			SOCKBUF_UNLOCK(&s->l2so->so_rcv);
+			SOCKBUF_LOCK(&s->l2so->so_snd);
 			s->l2so->so_snd.sb_flags &= ~SB_UPCALL;
+			SOCKBUF_UNLOCK(&s->l2so->so_snd);
 			soclose(s->l2so);
 
 			mtx_unlock(&s->session_mtx);
@@ -1023,7 +1027,7 @@
 {
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
-	if (s->l2so->so_state & SS_CANTRCVMORE) {
+	if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \
 "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, 
@@ -1236,8 +1240,12 @@
 	/* Prepare L2CAP socket */
 	l2so->so_upcallarg = NULL;
 	l2so->so_upcall = ng_btsocket_rfcomm_upcall;
+	SOCKBUF_LOCK(&l2so->so_rcv);
 	l2so->so_rcv.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&l2so->so_rcv);
+	SOCKBUF_LOCK(&l2so->so_snd);
 	l2so->so_snd.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state |= SS_NBIO;
 	s->l2so = l2so;
 
@@ -1316,8 +1324,12 @@
 	/* Return L2CAP socket back to its original state */
 	l2so->so_upcallarg = NULL;
 	l2so->so_upcall = NULL;
+	SOCKBUF_LOCK(&l2so->so_rcv);
 	l2so->so_rcv.sb_flags &= ~SB_UPCALL;
+	SOCKBUF_LOCK(&l2so->so_rcv);
+	SOCKBUF_LOCK(&l2so->so_snd);
 	l2so->so_snd.sb_flags &= ~SB_UPCALL;
+	SOCKBUF_LOCK(&l2so->so_snd);
 	l2so->so_state &= ~SS_NBIO;
 
 	mtx_destroy(&s->session_mtx);
@@ -1356,7 +1368,7 @@
 	ACCEPT_LOCK();
 	if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
 		ACCEPT_UNLOCK();
-		if (s0->l2so->so_state & SS_CANTRCVMORE)
+		if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			return (ECONNABORTED);
 		return (EWOULDBLOCK);
 	}
@@ -1370,8 +1382,10 @@
 	s0->l2so->so_qlen --;
 	l2so->so_qstate &= ~SQ_COMP;
 	l2so->so_head = NULL;
+	SOCK_LOCK(l2so);
 	soref(l2so);
 	l2so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(l2so);
 	ACCEPT_UNLOCK();
 
 	error = soaccept(l2so, (struct sockaddr **) &l2sa);
--- //depot/vendor/freebsd/src/sys/netgraph/ng_base.c	2004/05/29 07:25:30
+++ //depot/user/rwatson/netperf/sys/netgraph/ng_base.c	2004/05/31 01:41:33
@@ -170,6 +170,7 @@
 #define NG_IDHASH_FN(ID) ((ID) % (NG_ID_HASH_SIZE))
 #define NG_IDHASH_FIND(ID, node)					\
 	do { 								\
+		mtx_assert(&ng_idhash_mtx, MA_OWNED);			\
 		LIST_FOREACH(node, &ng_ID_hash[NG_IDHASH_FN(ID)],	\
 						nd_idnodes) {		\
 			if (NG_NODE_IS_VALID(node)			\
@@ -3231,10 +3232,12 @@
 {
 	node_p node;
 	int i = 1;
+	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(node, &ng_allnodes, nd_all) {
 		printf("[%d] ", i++);
 		dumpnode(node, NULL, 0);
 	}
+	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static void
@@ -3242,10 +3245,12 @@
 {
 	hook_p hook;
 	int i = 1;
+	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(hook, &ng_allhooks, hk_all) {
 		printf("[%d] ", i++);
 		dumphook(hook, NULL, 0);
 	}
+	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static int
--- //depot/vendor/freebsd/src/sys/netgraph/ng_ksocket.c	2004/06/02 04:20:47
+++ //depot/user/rwatson/netperf/sys/netgraph/ng_ksocket.c	2004/06/02 04:53:21
@@ -609,9 +609,15 @@
 	/* Add our hook for incoming data and other events */
 	priv->so->so_upcallarg = (caddr_t)node;
 	priv->so->so_upcall = ng_ksocket_incoming;
+	SOCKBUF_LOCK(&priv->so->so_rcv);
 	priv->so->so_rcv.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&priv->so->so_rcv);
+	SOCKBUF_LOCK(&priv->so->so_snd);
 	priv->so->so_snd.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&priv->so->so_snd);
+	SOCK_LOCK(priv->so);
 	priv->so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(priv->so);
 	/*
 	 * --Original comment--
 	 * On a cloned socket we may have already received one or more
@@ -941,8 +947,12 @@
 	/* Close our socket (if any) */
 	if (priv->so != NULL) {
 		priv->so->so_upcall = NULL;
+		SOCKBUF_LOCK(&priv->so->so_rcv);
 		priv->so->so_rcv.sb_flags &= ~SB_UPCALL;
+		SOCKBUF_UNLOCK(&priv->so->so_rcv);
+		SOCKBUF_LOCK(&priv->so->so_snd);
 		priv->so->so_snd.sb_flags &= ~SB_UPCALL;
+		SOCKBUF_UNLOCK(&priv->so->so_snd);
 		soclose(priv->so);
 		priv->so = NULL;
 	}
@@ -1003,6 +1013,9 @@
  * before dereferencing the socket pointer.
  */
 
+/*
+ * XXXRW: ng_ksocket_incoming() is called without Giant.  Is that OK?
+ */
 static void
 ng_ksocket_incoming(struct socket *so, void *arg, int waitflag)
 {
@@ -1144,7 +1157,7 @@
 	 * If the peer has closed the connection, forward a 0-length mbuf
 	 * to indicate end-of-file.
 	 */
-	if (so->so_state & SS_CANTRCVMORE && !(priv->flags & KSF_EOFSEEN)) {
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE && !(priv->flags & KSF_EOFSEEN)) {
 		MGETHDR(m, waitflag, MT_DATA);
 		if (m != NULL) {
 			m->m_len = m->m_pkthdr.len = 0;
@@ -1171,7 +1184,7 @@
 	}
 	/* Unlocked read. */
 	if (TAILQ_EMPTY(&head->so_comp)) {
-		if (head->so_state & SS_CANTRCVMORE)
+		if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
 			return ECONNABORTED;
 		return EWOULDBLOCK;
 	}
@@ -1205,8 +1218,10 @@
 	head->so_qlen--;
 	so->so_qstate &= ~SQ_COMP;
 	so->so_head = NULL;
+	SOCK_LOCK(so);
 	soref(so);
 	so->so_state |= SS_NBIO;
+	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 
 	/* XXX KNOTE(&head->so_rcv.sb_sel.si_note, 0); */
@@ -1255,8 +1270,12 @@
 
 	so->so_upcallarg = (caddr_t)node;
 	so->so_upcall = ng_ksocket_incoming;
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 	/* Fill in the response data and send it or return it to the caller */
 	resp_data = (struct ng_ksocket_accept *)resp->data;
--- //depot/vendor/freebsd/src/sys/netgraph/ng_socket.c	2004/05/29 00:56:26
+++ //depot/user/rwatson/netperf/sys/netgraph/ng_socket.c	2004/05/31 01:41:33
@@ -151,6 +151,9 @@
 SYSCTL_INT(_net_graph, OID_AUTO, recvspace, CTLFLAG_RW,
     &ngpdg_recvspace , 0, "Maximum space for incoming Netgraph datagrams");
 
+/*
+ * XXXRW: Locking?
+ */
 /* List of all sockets */
 static LIST_HEAD(, ngpcb) ngsocklist;
 
--- //depot/vendor/freebsd/src/sys/netinet/accf_http.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/netinet/accf_http.c	2004/05/31 01:41:33
@@ -161,7 +161,8 @@
 sohashttpget(struct socket *so, void *arg, int waitflag)
 {
 
-	if ((so->so_state & SS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) {
+	/* Unlocked read. */
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) {
 		struct mbuf *m;
 		char *cmp;
 		int	cmplen, cc;
@@ -214,7 +215,8 @@
 	struct mbuf *m, *n;
 	int	i, cc, spaces, inspaces;
 
-	if ((so->so_state & SS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+	/* Unlocked read. */
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
 		goto fallout;
 
 	m = so->so_rcv.sb_mb;
@@ -301,7 +303,8 @@
 	int ccleft, copied;
 
 	DPRINT("start");
-	if ((so->so_state & SS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+	/* Unlocked read. */
+	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
 		goto gotit;
 
 	/*
--- //depot/vendor/freebsd/src/sys/netinet/if_ether.c	2004/04/25 15:02:49
+++ //depot/user/rwatson/netperf/sys/netinet/if_ether.c	2004/05/04 02:32:28
@@ -98,6 +98,11 @@
 #define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
 };
 
+/*
+ * XXXRW: Need to document (and/or fix) locking for this.  We always
+ * seem to hold a lock (and assert) when referencing this list, but it's
+ * not clear it's always the same lock.
+ */
 static	LIST_HEAD(, llinfo_arp) llinfo_arp;
 
 static struct	ifqueue arpintrq;
--- //depot/vendor/freebsd/src/sys/netinet/igmp.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/igmp.c	2004/04/08 03:11:34
@@ -80,10 +80,28 @@
 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW, &igmpstat,
     igmpstat, "");
 
+/*
+ * igmp_mtx protects all mutable global variables in igmp.c, as well as
+ * the data fields in struct router_info.  In general, a router_info
+ * structure will be valid as long as the referencing struct in_multi is
+ * valid, so no reference counting is used.  We allow unlocked reads of
+ * router_info data when accessed via an in_multi read-only.
+ */
+static struct mtx igmp_mtx;
 static SLIST_HEAD(, router_info) router_info_head;
 static int igmp_timers_are_running;
+
+/*
+ * XXXRW: can we define these such that these can be made const?  In any
+ * case, these shouldn't be changed after igmp_init() and therefore don't
+ * need locking.
+ */
 static u_long igmp_all_hosts_group;
 static u_long igmp_all_rtrs_group;
+
+/*
+ * XXXRW: These variables make me vaguely nervous.
+ */
 static struct mbuf *router_alert;
 static struct route igmprt;
 
@@ -108,6 +126,7 @@
 
 	/*
 	 * Construct a Router Alert option to use in outgoing packets
+	 * XXXRW: This might actually need a MAC label.
 	 */
 	MGET(router_alert, M_DONTWAIT, MT_DATA);
 	ra = mtod(router_alert, struct ipoption *);
@@ -118,6 +137,7 @@
 	ra->ipopt_list[3] = 0x00;
 	router_alert->m_len = sizeof(ra->ipopt_dst) + ra->ipopt_list[1];
 
+	mtx_init(&igmp_mtx, "igmp_mtx", NULL, MTX_DEF);
 	SLIST_INIT(&router_info_head);
 }
 
@@ -126,6 +146,7 @@
 {
 	struct router_info *rti;
 
+	mtx_assert(&igmp_mtx, MA_OWNED);
 	IGMP_PRINTF("[igmp.c, _find_rti] --> entering \n");
 	SLIST_FOREACH(rti, &router_info_head, rti_list) {
 		if (rti->rti_ifp == ifp) {
@@ -134,6 +155,9 @@
 			return rti;
 		}
 	}
+	/*
+	 * XXXRW: return value of malloc not checked, despite M_NOWAIT.
+	 */
 	MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
 	rti->rti_ifp = ifp;
 	rti->rti_type = IGMP_V2_ROUTER;
@@ -197,7 +221,6 @@
 	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
-	rti = find_rti(ifp);
 
 	/*
 	 * In the IGMPv2 specification, there are 3 states and a flag.
@@ -224,8 +247,11 @@
 			 * value in RFC 1112.
 			 */
 
+			mtx_lock(&igmp_mtx);
+			rti = find_rti(ifp);
 			rti->rti_type = IGMP_V1_ROUTER;
 			rti->rti_time = 0;
+			mtx_unlock(&igmp_mtx);
 
 			timer = IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ;
 
@@ -344,7 +370,9 @@
 		inm->inm_timer = 0;
 		inm->inm_state = IGMP_OTHERMEMBER;
 	} else {
+		mtx_lock(&igmp_mtx);
 		inm->inm_rti = find_rti(inm->inm_ifp);
+		mtx_unlock(&igmp_mtx);
 		igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
 		inm->inm_timer = IGMP_RANDOM_DELAY(
 					IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ);
@@ -404,6 +432,7 @@
 	struct router_info *rti;
 
 	IGMP_PRINTF("[igmp.c,_slowtimo] -- > entering \n");
+	mtx_lock(&igmp_mtx);
 	SLIST_FOREACH(rti, &router_info_head, rti_list) {
 		if (rti->rti_type == IGMP_V1_ROUTER) {
 			rti->rti_time++;
@@ -411,6 +440,7 @@
 				rti->rti_type = IGMP_V2_ROUTER;
 		}
 	}
+	mtx_unlock(&igmp_mtx);
 	IGMP_PRINTF("[igmp.c,_slowtimo] -- > exiting \n");
 	splx(s);
 }
--- //depot/vendor/freebsd/src/sys/netinet/in_gif.c	2004/04/14 01:15:27
+++ //depot/user/rwatson/netperf/sys/netinet/in_gif.c	2004/04/17 01:57:58
@@ -174,6 +174,9 @@
 	}
 	bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
 
+	/*
+	 * XXXRW: locking of gif's softc.
+	 */
 	if (dst->sin_family != sin_dst->sin_family ||
 	    dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
 		/* cache route doesn't match */
@@ -320,6 +323,10 @@
 	case 0: case 127: case 255:
 		return 0;
 	}
+
+	/*
+	 * XXXRW: Lock in_ifaddrhead walking.
+	 */
 	/* reject packets with broadcast on source */
 	TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
@@ -328,6 +335,7 @@
 			return 0;
 	}
 
+	/* XXXRW: unlocked read. */
 	/* ingress filters on outer source */
 	if ((sc->gif_if.if_flags & IFF_LINK2) == 0 && ifp) {
 		struct sockaddr_in sin;
@@ -383,6 +391,11 @@
 in_gif_attach(sc)
 	struct gif_softc *sc;
 {
+	
+	/*
+	 * XXXRW: Technically, NULL can also be returned for ENOMEM,
+	 * not just EEXIST.
+	 */
 	sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
 	    &in_gif_protosw, sc);
 	if (sc->encap_cookie4 == NULL)
--- //depot/vendor/freebsd/src/sys/netinet/in_pcb.c	2004/05/20 06:35:28
+++ //depot/user/rwatson/netperf/sys/netinet/in_pcb.c	2004/05/23 16:56:02
@@ -176,7 +176,9 @@
 	error = mac_init_inpcb(inp, M_NOWAIT);
 	if (error != 0)
 		goto out;
+	SOCK_LOCK(so);
 	mac_create_inpcb_from_socket(so, inp);
+	SOCK_UNLOCK(so);
 #endif
 #if defined(IPSEC) || defined(FAST_IPSEC)
 #ifdef FAST_IPSEC
@@ -671,6 +673,7 @@
 #ifdef IPSEC
 	ipsec_pcbdisconn(inp->inp_sp);
 #endif
+	/* Unlocked read. */
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in_pcbdetach(inp);
 }
@@ -690,6 +693,7 @@
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	if (so) {
+		SOCK_LOCK(so);
 		so->so_pcb = 0;
 		sotryfree(so);
 	}
@@ -1174,10 +1178,11 @@
 #ifdef MAC
 	struct inpcb *inp;
 
-	/* XXX: Will assert socket lock when we have them. */
 	inp = (struct inpcb *)so->so_pcb;
 	INP_LOCK(inp);
+	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
+	SOCK_UNLOCK(so);
 	INP_UNLOCK(inp);
 #endif
 }
--- //depot/vendor/freebsd/src/sys/netinet/in_pcb.h	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/in_pcb.h	2004/04/08 03:11:34
@@ -244,9 +244,14 @@
 #define INP_LOCK(inp)		mtx_lock(&(inp)->inp_mtx)
 #define INP_UNLOCK(inp)		mtx_unlock(&(inp)->inp_mtx)
 #ifndef INET6
-#define INP_LOCK_ASSERT(inp)	mtx_assert(&(inp)->inp_mtx, MA_OWNED)
+#define INP_LOCK_ASSERT(inp)	do {					\
+	mtx_assert(&(inp)->inp_mtx, MA_OWNED);				\
+	NET_ASSERT_GIANT();						\
+} while (0)
 #else
-#define INP_LOCK_ASSERT(inp)
+#define INP_LOCK_ASSERT(inp)	do {					\
+	NET_ASSERT_GIANT();						\
+} while (0)
 #endif
 
 #define INP_INFO_LOCK_INIT(ipi, d) \
@@ -256,11 +261,21 @@
 #define INP_INFO_RUNLOCK(ipi)	mtx_unlock(&(ipi)->ipi_mtx)
 #define INP_INFO_WUNLOCK(ipi)	mtx_unlock(&(ipi)->ipi_mtx)
 #ifndef INET6
-#define INP_INFO_RLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
-#define INP_INFO_WLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
+#define INP_INFO_RLOCK_ASSERT(ipi)	do {				\
+	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED);				\
+	NET_ASSERT_GIANT();						\
+} while (0)
+#define INP_INFO_WLOCK_ASSERT(ipi)	do {				\
+	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED);				\
+	NET_ASSERT_GIANT();						\
+} while (0)
 #else
-#define INP_INFO_RLOCK_ASSERT(ipi)
-#define INP_INFO_WLOCK_ASSERT(ipi)
+#define INP_INFO_RLOCK_ASSERT(ipi)	do {				\
+	NET_ASSERT_GIANT();						\
+} while (0)
+#define INP_INFO_WLOCK_ASSERT(ipi)	do {				\
+	NET_ASSERT_GIANT();						\
+} while (0)
 #endif
 
 #define INP_PCBHASH(faddr, lport, fport, mask) \
--- //depot/vendor/freebsd/src/sys/netinet/in_proto.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/in_proto.c	2004/04/08 03:11:34
@@ -178,7 +178,7 @@
 { SOCK_RAW,	&inetdomain,	IPPROTO_IPV4,	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   encap4_input,	0,	 	0,		rip_ctloutput,
   0,
-  encap_init,		0,		0,		0,
+  encap_init,	0,		0,		0,
   &rip_usrreqs
 },
 { SOCK_RAW,	&inetdomain,	IPPROTO_MOBILE,	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
--- //depot/vendor/freebsd/src/sys/netinet/ip_divert.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/ip_divert.c	2004/05/08 02:12:27
@@ -219,20 +219,6 @@
 		    sizeof(divsrc.sin_zero));
 	}
 
-	/*
-	 * XXX sbappendaddr must be protected by Giant until
-	 * we have locking at the socket layer.  When entered
-	 * from below we come in w/o Giant and must take it
-	 * here.  Unfortunately we cannot tell whether we're
-	 * entering from above (already holding Giant),
-	 * below (potentially without Giant), or otherwise
-	 * (e.g. from tcp_syncache through a timeout) so we
-	 * have to grab it regardless.  This causes a LOR with
-	 * the tcp lock, at least, and possibly others.  For
-	 * the moment we're ignoring this. Once sockets are
-	 * locked this cruft can be removed.
-	 */
-	mtx_lock(&Giant);
 	/* Put packet on socket queue, if any */
 	sa = NULL;
 	nport = htons((u_int16_t)divert_info(mtag));
@@ -254,7 +240,6 @@
 		INP_UNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&divcbinfo);
-	mtx_unlock(&Giant);
 	if (sa == NULL) {
 		m_freem(m);
 		ipstat.ips_noproto++;
@@ -277,10 +262,6 @@
 
 	KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null"));
 
-#ifdef MAC
-	mac_create_mbuf_from_socket(so, m);
-#endif
-
 	if (control)
 		m_freem(control);		/* XXX */
 
@@ -337,6 +318,9 @@
 			/* Send packet to output processing */
 			ipstat.ips_rawout++;			/* XXX */
 
+#ifdef MAC
+			mac_create_mbuf_from_inpcb(inp, m);
+#endif
 			error = ip_output(m,
 				    inp->inp_options, NULL,
 				    (so->so_options & SO_DONTROUTE) |
@@ -363,6 +347,14 @@
 			}
 			m->m_pkthdr.rcvif = ifa->ifa_ifp;
 		}
+#ifdef MAC
+		/*
+		 * XXXRW: perhaps should be mac_create_mbuf_from_inpcb()?
+		 */
+		SOCK_LOCK(so);
+		mac_create_mbuf_from_socket(so, m);
+		SOCK_UNLOCK(so);
+#endif
 		/* Send packet to input processing */
 		ip_input(m);
 	}
@@ -430,6 +422,7 @@
 	/* The socket is always "connected" because
 	   we always know "where" to send the packet */
 	INP_UNLOCK(inp);
+	/* XXXRW: so_state locking. */
 	so->so_state |= SS_ISCONNECTED;
 	return 0;
 }
@@ -472,6 +465,8 @@
 static int
 div_disconnect(struct socket *so)
 {
+
+	/* Unlocked read. */
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
 	return div_abort(so);
--- //depot/vendor/freebsd/src/sys/netinet/ip_dummynet.c	2004/03/03 01:35:21
+++ //depot/user/rwatson/netperf/sys/netinet/ip_dummynet.c	2004/03/04 04:00:06
@@ -171,7 +171,10 @@
 #define	DUMMYNET_LOCK_DESTROY()	mtx_destroy(&dummynet_mtx)
 #define	DUMMYNET_LOCK()		mtx_lock(&dummynet_mtx)
 #define	DUMMYNET_UNLOCK()	mtx_unlock(&dummynet_mtx)
-#define	DUMMYNET_LOCK_ASSERT()	mtx_assert(&dummynet_mtx, MA_OWNED)
+#define	DUMMYNET_LOCK_ASSERT()	do {				\
+	mtx_assert(&dummynet_mtx, MA_OWNED);			\
+	NET_ASSERT_GIANT();					\
+} while (0)
 
 static int config_pipe(struct dn_pipe *p);
 static int ip_dn_ctl(struct sockopt *sopt);
--- //depot/vendor/freebsd/src/sys/netinet/ip_encap.c	2004/03/10 02:50:38
+++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.c	2004/03/10 03:47:45
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet/ip_encap.c,v 1.19 2004/03/10 02:48:50 rwatson Exp $	*/
+/*	$FreeBSD: src/sys/netinet/ip_encap.c,v 1.18 2003/06/01 09:20:38 phk Exp $	*/
 /*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
 
 /*
@@ -106,8 +106,7 @@
 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab);
 
 /*
- * We currently keey encap_init() for source code compatibility reasons --
- * it's referenced by KAME pieces in netinet6.
+ * XXXRW: encap_init() was entirely useless, so I deleted it.
  */
 void
 encap_init()
@@ -185,6 +184,10 @@
 	}
 	mtx_unlock(&encapmtx);
 
+	/*
+	 * XXXRW: Need drain mechanism to prevent the encapsulation
+	 * entry from being released while in use.
+	 */
 	if (match) {
 		/* found a match, "match" has the best one */
 		psw = match->psw;
@@ -255,6 +258,10 @@
 	}
 	mtx_unlock(&encapmtx);
 
+	/*
+	 * XXXRW: Need drain mechanism so the encap entry isn't freed
+	 * while in use.
+	 */
 	if (match) {
 		/* found a match */
 		psw = (const struct ip6protosw *)match->psw;
--- //depot/vendor/freebsd/src/sys/netinet/ip_encap.h	2002/03/19 21:30:39
+++ //depot/user/rwatson/netperf/sys/netinet/ip_encap.h	2004/03/12 06:46:13
@@ -35,6 +35,15 @@
 
 #ifdef _KERNEL
 
+/*
+ * This structure is entirely static after registration, and other than
+ * its entry in the encapsulation table, requires no locking.  The chain
+ * field is locked using the global encapmtx.
+ *
+ * XXXRW: Need to add a refcount/drain mechanism so that encapsulation
+ * entries can't be removed while in use.  This likely requires a
+ * refcount and cv to wait for it to drain, or an sx lock.
+ */
 struct encaptab {
 	LIST_ENTRY(encaptab) chain;
 	int af;
--- //depot/vendor/freebsd/src/sys/netinet/ip_fastfwd.c	2004/05/06 18:50:39
+++ //depot/user/rwatson/netperf/sys/netinet/ip_fastfwd.c	2004/05/23 16:56:02
@@ -644,6 +644,7 @@
 				/*
 				 * Return packet for processing by ip_input
 				 */
+				/* XXX statistic */
 				if (ro.ro_rt)
 					RTFREE(ro.ro_rt);
 				return 0;
--- //depot/vendor/freebsd/src/sys/netinet/ip_fw2.c	2004/05/30 18:02:05
+++ //depot/user/rwatson/netperf/sys/netinet/ip_fw2.c	2004/05/31 01:41:33
@@ -123,7 +123,10 @@
 #define	IPFW_LOCK_DESTROY(_chain)	mtx_destroy(&(_chain)->mtx)
 #define	IPFW_LOCK(_chain)	mtx_lock(&(_chain)->mtx)
 #define	IPFW_UNLOCK(_chain)	mtx_unlock(&(_chain)->mtx)
-#define	IPFW_LOCK_ASSERT(_chain)	mtx_assert(&(_chain)->mtx, MA_OWNED)
+#define	IPFW_LOCK_ASSERT(_chain)	do {				\
+	mtx_assert(&(_chain)->mtx, MA_OWNED);				\
+	NET_ASSERT_GIANT();						\
+} while (0)
 
 /*
  * list of rules for layer 3
@@ -1314,7 +1317,8 @@
 }
 
 static int
-check_uidgid(ipfw_insn_u32 *insn,
+check_uidgid(struct ip_fw_chain *chain,
+	ipfw_insn_u32 *insn,
 	int proto, struct ifnet *oif,
 	struct in_addr dst_ip, u_int16_t dst_port,
 	struct in_addr src_ip, u_int16_t src_port)
@@ -1335,7 +1339,10 @@
 
 	match = 0;
 
-	INP_INFO_RLOCK(pi);	/* XXX LOR with IPFW */
+	/* NB: reorder to avoid LOR between IPFW and inp */
+	IPFW_UNLOCK(chain);
+	INP_INFO_RLOCK(pi);
+	IPFW_LOCK(chain);
 	pcb =  (oif) ?
 		in_pcblookup_hash(pi,
 		    dst_ip, htons(dst_port),
@@ -1675,7 +1682,7 @@
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP)
-					match = check_uidgid(
+					match = check_uidgid(chain,
 						    (ipfw_insn_u32 *)cmd,
 						    proto, oif,
 						    dst_ip, dst_port,
--- //depot/vendor/freebsd/src/sys/netinet/ip_id.c	2004/02/26 03:55:40
+++ //depot/user/rwatson/netperf/sys/netinet/ip_id.c	2004/03/12 03:03:57
@@ -79,6 +79,9 @@
 	2729
 };
 
+/*
+ * XXXRW: Locking?
+ */
 static u_int16_t ru_x;
 static u_int16_t ru_seed, ru_seed2;
 static u_int16_t ru_a, ru_b;
--- //depot/vendor/freebsd/src/sys/netinet/ip_input.c	2004/05/06 18:50:39
+++ //depot/user/rwatson/netperf/sys/netinet/ip_input.c	2004/05/23 16:56:02
@@ -931,8 +931,10 @@
 		/* attach next hop info for TCP */
 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
 		    sizeof(struct sockaddr_in *), M_NOWAIT);
-		if (mtag == NULL)
+		if (mtag == NULL) {
+			/* XXX statistic */
 			goto bad;
+		}
 		*(struct sockaddr_in **)(mtag+1) = args.next_hop;
 		m_tag_prepend(m, mtag);
 	}
@@ -1863,6 +1865,7 @@
 		struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD,
 		    sizeof(struct sockaddr_in *), M_NOWAIT);
 		if (mtag == NULL) {
+			/* XXX statistic */
 			m_freem(m);
 			return;
 		}
--- //depot/vendor/freebsd/src/sys/netinet/ip_mroute.c	2004/05/30 20:25:31
+++ //depot/user/rwatson/netperf/sys/netinet/ip_mroute.c	2004/05/31 01:41:33
@@ -104,7 +104,10 @@
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()	mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()	mtx_unlock(&mfc_mtx)
-#define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
+#define	MFC_LOCK_ASSERT()	do {					\
+	mtx_assert(&mfc_mtx, MA_OWNED);					\
+	NET_ASSERT_GIANT();						\
+} while (0)
 #define	MFC_LOCK_INIT()	mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
@@ -1304,13 +1307,10 @@
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
-	mtx_lock(&Giant);		/* XXX until sockets are locked */
 	if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) {
 	    sorwakeup(s);
-	    mtx_unlock(&Giant);
 	    return 0;
 	}
-	mtx_unlock(&Giant);
     }
     m_freem(mm);
     return -1;
--- //depot/vendor/freebsd/src/sys/netinet/ip_output.c	2004/05/11 19:15:32
+++ //depot/user/rwatson/netperf/sys/netinet/ip_output.c	2004/05/23 16:56:02
@@ -157,6 +157,12 @@
 
 	M_ASSERTPKTHDR(m);
 	
+	/*
+	 * When packet comes from dummynet restore state from
+	 * previous processing instead of the header.  Yech!
+	 *
+	 * XXX add conditional compilation?
+	 */
 	args.next_hop = m_claim_next(m, PACKET_TAG_IPFORWARD);
 	dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
 	if (dummytag != NULL) {
@@ -871,6 +877,7 @@
 				    PACKET_TAG_IPFORWARD,
 				    sizeof(struct sockaddr_in *), M_NOWAIT);
 				if (mtag == NULL) {
+					/* XXX statistic */
 					error = ENOBUFS;
 					goto bad;
 				}
@@ -889,6 +896,7 @@
 				    CSUM_IP_CHECKED | CSUM_IP_VALID;
 				ip->ip_len = htons(ip->ip_len);
 				ip->ip_off = htons(ip->ip_off);
+				/* XXX netisr_queue(NETISR_IP, m); */
 				ip_input(m);
 				goto done;
 			}
--- //depot/vendor/freebsd/src/sys/netinet/raw_ip.c	2004/06/03 03:20:41
+++ //depot/user/rwatson/netperf/sys/netinet/raw_ip.c	2004/06/04 03:56:30
@@ -87,6 +87,9 @@
  * so leave them not initialized and rely on BSS being set to 0.
  */
 
+/*
+ * XXXRW: Locking for mrouter bits?
+ */
 /* The socket used to communicate with the multicast routing daemon.  */
 struct socket  *ip_mrouter;
 
@@ -623,6 +626,7 @@
 	}
 	INP_LOCK(inp);
 	soisdisconnected(so);
+	/* Unlocked read. */
 	if (so->so_state & SS_NOFDREF)
 		rip_pcbdetach(so, inp);
 	else
@@ -634,6 +638,8 @@
 static int
 rip_disconnect(struct socket *so)
 {
+
+	/* Unlocked read. */
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
 	return rip_abort(so);
@@ -730,6 +736,7 @@
 
 	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
+	/* Unlocked read. */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			INP_INFO_WUNLOCK(&ripcbinfo);
--- //depot/vendor/freebsd/src/sys/netinet/tcp_debug.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_debug.c	2004/04/08 03:11:34
@@ -50,6 +50,7 @@
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
+#include <sys/mbuf.h>
 #include <sys/socket.h>
 
 #include <netinet/in.h>
--- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c	2004/05/02 15:10:41
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_input.c	2004/05/30 18:22:04
@@ -355,7 +355,8 @@
 		flags = q->tqe_th->th_flags & TH_FIN;
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
-		if (so->so_state & SS_CANTRCVMORE)
+		/* Unlocked read. */
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			m_freem(q->tqe_m);
 		else
 			sbappendstream(&so->so_rcv, q->tqe_m);
@@ -424,7 +425,7 @@
 	struct tcpopt to;		/* options in this segment */
 	struct rmxp_tao tao;		/* our TAO cache entry */
 	int headlocked = 0;
-	struct sockaddr_in *next_hop = NULL;
+	struct sockaddr_in *next_hop;
 	int rstreason; /* For badport_bandlim accounting purposes */
 
 	struct ip6_hdr *ip6 = NULL;
@@ -747,6 +748,7 @@
 		tiwin = th->th_win;
 
 #ifdef MAC
+	INP_LOCK_ASSERT(inp);
 	if (mac_check_inpcb_deliver(inp, m))
 		goto drop;
 #endif
@@ -1162,6 +1164,7 @@
 				acked = th->th_ack - tp->snd_una;
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
+				SOCKBUF_LOCK(&so->so_snd);
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
@@ -1199,7 +1202,9 @@
 						      tp->t_rxtcur,
 						      tcp_timer_rexmt, tp);
 
-				sowwakeup(so);
+				sowwakeup_locked(so);
+				SOCKBUF_UNLOCK(&so->so_snd);
+				/* Unlocked read. */
 				if (so->so_snd.sb_cc)
 					(void) tcp_output(tp);
 				goto check_delack;
@@ -1237,7 +1242,8 @@
 #endif
 			 * Add data to socket buffer.
 			 */
-			if (so->so_state & SS_CANTRCVMORE) {
+			/* Unlocked read. */
+			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
@@ -1352,7 +1358,10 @@
 			tcpstat.tcps_connects++;
 			soisconnected(so);
 #ifdef MAC
+			/* XXXRW: lock order? */
+			SOCK_LOCK(so);
 			mac_set_socket_peer_from_mbuf(m, so);
+			SOCK_UNLOCK(so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
@@ -1707,6 +1716,7 @@
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
+	/* Unlocked read. */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		tp = tcp_close(tp);
@@ -2099,6 +2109,7 @@
 				incr = incr * incr / cw;
 			tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
 		}
+		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > so->so_snd.sb_cc) {
 			tp->snd_wnd -= so->so_snd.sb_cc;
 			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
@@ -2108,7 +2119,8 @@
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
-		sowwakeup(so);
+		sowwakeup_locked(so);
+		SOCKBUF_UNLOCK(&so->so_snd);
 		/* detect una wraparound */
 		if (tcp_do_newreno && !IN_FASTRECOVERY(tp) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
@@ -2141,7 +2153,8 @@
 		 * we should release the tp also, and use a
 		 * compressed state.
 		 */
-				if (so->so_state & SS_CANTRCVMORE) {
+				/* Unlocked read. */
+				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					callout_reset(tp->tt_2msl, tcp_maxidle,
 						      tcp_timer_2msl, tp);
@@ -2224,6 +2237,7 @@
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
+		/* Unlocked read. */
 		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
@@ -2243,12 +2257,17 @@
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
+		/* Unlocked read of sb_cc. */
+		/* XXXRW: Unlocked assignment of so_oobmark, sb_state. */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = so->so_rcv.sb_cc +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
-			if (so->so_oobmark == 0)
-				so->so_state |= SS_RCVATMARK;
+			if (so->so_oobmark == 0) {
+				SOCKBUF_LOCK(&so->so_rcv);
+				so->so_rcv.sb_state |= SBS_RCVATMARK;
+				SOCKBUF_UNLOCK(&so->so_rcv);
+			}
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
@@ -2309,7 +2328,8 @@
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
 			ND6_HINT(tp);
-			if (so->so_state & SS_CANTRCVMORE)
+			/* Unlocked read. */
+			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream(&so->so_rcv, m);
@@ -2935,6 +2955,10 @@
 	}
 	tp->t_maxseg = mss;
 
+	/*
+	 * XXXRW: read-modify-write on socket buffer without acquiring
+	 * the socket buffer lock.
+	 */
 	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
--- //depot/vendor/freebsd/src/sys/netinet/tcp_output.c	2004/05/04 02:15:32
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_output.c	2004/05/04 02:32:28
@@ -210,6 +210,7 @@
 			 * to send then the probe will be the FIN
 			 * itself.
 			 */
+			/* Unlocked read of sb_cc. */
 			if (off < so->so_snd.sb_cc)
 				flags &= ~TH_FIN;
 			sendwin = 1;
@@ -231,6 +232,7 @@
 	 * be set to snd_una, the offset will be 0, and the length may
 	 * wind up 0.
 	 */
+	/* Unlocked read of sb_cc. */
 	len = (long)ulmin(so->so_snd.sb_cc, sendwin) - off;
 
 
@@ -292,6 +294,7 @@
 		len = tp->t_maxseg;
 		sendalot = 1;
 	}
+	/* Unlocked read of sb_cc. */
 	if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
 		flags &= ~TH_FIN;
 
@@ -319,6 +322,7 @@
 		 *
 		 * note: the len + off check is almost certainly unnecessary.
 		 */
+		/* Unlocked read of sb_cc. */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
 		    len + off >= so->so_snd.sb_cc &&
@@ -397,6 +401,7 @@
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
+	/* Unlocked read of sb_cc. */
 	if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
 	    !callout_active(tp->tt_persist)) {
 		tp->t_rxtshift = 0;
@@ -664,6 +669,7 @@
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
+		/* Unlocked read of sb_cc. */
 		if (off + len == so->so_snd.sb_cc)
 			flags |= TH_PUSH;
 	} else {
--- //depot/vendor/freebsd/src/sys/netinet/tcp_subr.c	2004/05/04 02:15:32
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_subr.c	2004/05/04 02:32:28
@@ -576,6 +576,7 @@
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
+	int callout_flag;
 
 	tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
@@ -589,11 +590,17 @@
 		tcp_mssdflt;
 
 	/* Set up our timeouts. */
-	callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0);
-	callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0);
-	callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0);
-	callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0);
-	callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0);
+	/*
+	 * XXXRW: Are these actually MPSAFE?  I think so, but need to
+	 * review the timed wait code, as it has some list variables,
+	 * etc, that are global.
+	 */
+	callout_flag = debug_mpsafenet ? CALLOUT_MPSAFE : 0;
+	callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, callout_flag);
+	callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, callout_flag);
+	callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, callout_flag);
+	callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, callout_flag);
+	callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, callout_flag);
 
 	if (tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
@@ -1595,7 +1602,7 @@
 
 /*
  * Move a TCP connection into TIME_WAIT state.
- *    tcbinfo is unlocked.
+ *    tcbinfo is locked.
  *    inp is locked, and is unlocked before returning.
  */
 void
@@ -1607,6 +1614,11 @@
 	int tw_time, acknow;
 	struct socket *so;
 
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+#if 0
+	INP_LOCK_ASSERT(tp);
+#endif
+
 	tw = uma_zalloc(tcptw_zone, M_NOWAIT);
 	if (tw == NULL) {
 		tw = tcp_timer_2msl_tw(1);
@@ -1657,13 +1669,19 @@
 	}
 	tcp_discardcb(tp);
 	so = inp->inp_socket;
+	SOCK_LOCK(so);
 	so->so_pcb = NULL;
 	tw->tw_cred = crhold(so->so_cred);
 	tw->tw_so_options = so->so_options;
+	sotryfree(so);			/* NB: drops lock */
+	inp->inp_socket = NULL;
 	if (acknow)
 		tcp_twrespond(tw, TH_ACK);
+#if 0
+	/* XXXRW: Sam removed this, need to check why. */
 	sotryfree(so);
 	inp->inp_socket = NULL;
+#endif
 	inp->inp_ppcb = (caddr_t)tw;
 	inp->inp_vflag |= INP_TIMEWAIT;
 	tcp_timer_2msl_reset(tw, tw_time);
@@ -1739,6 +1757,8 @@
 	int isipv6 = inp->inp_inc.inc_isipv6;
 #endif
 
+	INP_LOCK_ASSERT(inp);
+
 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
 	if (m == NULL)
 		return (ENOBUFS);
--- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.c	2004/05/04 02:15:32
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_syncache.c	2004/06/02 04:26:26
@@ -540,7 +540,7 @@
 	struct socket *so;
 	struct tcpcb *tp;
 
-	GIANT_REQUIRED;			/* XXX until socket locking */
+	NET_ASSERT_GIANT();
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 
 	/*
@@ -559,7 +559,12 @@
 		goto abort2;
 	}
 #ifdef MAC
+	/*
+	 * XXXRW: Would prefer inpcb -- also, lock order concerns.
+	 */
+	SOCK_LOCK(so);
 	mac_set_socket_peer_from_mbuf(m, so);
+	SOCK_UNLOCK(so);
 #endif
 
 	inp = sotoinpcb(so);
@@ -724,6 +729,14 @@
 abort:
 	INP_UNLOCK(inp);
 abort2:
+	/*
+	 * XXXRW: Technically speaking, this soabort() likely doesn't
+	 * do anything, since we insert sockets into the accept queues
+	 * in an already completed state, and soabort() leaves it to
+	 * the close() on the listen socket to remove completed
+	 * connections.  However, this means a TCP socket without
+	 * full TCP state could slip through...?
+	 */
 	if (so != NULL)
 		(void) soabort(so);
 	return (NULL);
--- //depot/vendor/freebsd/src/sys/netinet/tcp_timer.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_timer.c	2004/04/08 03:11:34
@@ -269,6 +269,9 @@
 	}
 }
 
+/*
+ * XXXRW: This doesn't look MPSAFE.
+ */
 void
 tcp_timer_2msl_reset(struct tcptw *tw, int timeo)
 {
@@ -283,6 +286,9 @@
 	LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
 }
 
+/*
+ * XXXRW: This doesn't look MPSAFE.
+ */
 void
 tcp_timer_2msl_stop(struct tcptw *tw)
 {
@@ -291,6 +297,9 @@
 		LIST_REMOVE(tw, tw_2msl);
 }
 
+/*
+ * XXXRW: This doesn't look MPSAFE.
+ */
 struct tcptw *
 tcp_timer_2msl_tw(int reuse)
 {
--- //depot/vendor/freebsd/src/sys/netinet/tcp_usrreq.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet/tcp_usrreq.c	2004/06/03 00:15:54
@@ -116,7 +116,6 @@
 static int
 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 {
-	int s = splnet();
 	int error;
 	struct inpcb *inp;
 	struct tcpcb *tp = 0;
@@ -142,11 +141,71 @@
 out:
 	TCPDEBUG2(PRU_ATTACH);
 	INP_INFO_WUNLOCK(&tcbinfo);
-	splx(s);
 	return error;
 }
 
 /*
+ * Common code to setup and teardown locking.  Most
+ * code begins with a COMMON_START macro and finishes
+ * with COMMON_END.  You indicate whether the inpcb
+ * and enclosing head are to be locked read or write 
+ * and whether there is an existing sockbuf lock that
+ * needs to be re-ordered.
+ */
+#define INI_NOLOCK	0		/* no head lock */
+#define INI_READ	1		/* read head lock */
+#define INI_WRITE	2		/* write head lock */
+#define SBI_NONE	0		/* no sockbuf lock to reorder */
+#define SBI_SND		1		/* reorder so->so_snd lock */
+#define SBI_RCV		2		/* reorder so->so_rcv lock */
+
+#define	COMMON_START0(_headrw, _sbrw) do {			\
+	if (_sbrw == SBI_SND)					\
+		SOCKBUF_UNLOCK(&so->so_snd);			\
+	else if (_sbrw == SBI_RCV)				\
+		SOCKBUF_UNLOCK(&so->so_rcv);			\
+	if (_headrw == INI_READ)				\
+		INP_INFO_RLOCK(&tcbinfo);			\
+	else if (_headrw == INI_WRITE)				\
+		INP_INFO_WLOCK(&tcbinfo);			\
+	inp = sotoinpcb(so);					\
+	if (inp == 0) {						\
+		if (_sbrw == SBI_SND)				\
+			SOCKBUF_LOCK(&so->so_snd);		\
+		else if (_sbrw == SBI_RCV)			\
+			SOCKBUF_LOCK(&so->so_rcv);		\
+		if (_headrw == INI_READ)			\
+			INP_INFO_RUNLOCK(&tcbinfo);		\
+		else if (_headrw == INI_WRITE)			\
+			INP_INFO_WUNLOCK(&tcbinfo);		\
+		return EINVAL;					\
+	}							\
+	INP_LOCK(inp);						\
+	if (_sbrw == SBI_SND)					\
+		SOCKBUF_LOCK(&so->so_snd);			\
+	else if (_sbrw == SBI_RCV)				\
+		SOCKBUF_LOCK(&so->so_rcv);			\
+	if (_headrw == INI_READ)				\
+		INP_INFO_RUNLOCK(&tcbinfo);			\
+	tp = intotcpcb(inp);					\
+	TCPDEBUG1();						\
+} while(0)
+
+#define	COMMON_START(_headrw, _sbrw) do {			\
+	TCPDEBUG0;						\
+	COMMON_START0(_headrw, _sbrw);				\
+} while (0)
+
+#define COMMON_END(_headrw, req)				\
+	TCPDEBUG2(req);						\
+	do {							\
+		if (tp)						\
+			INP_UNLOCK(inp);			\
+		if (_headrw == INI_WRITE)			\
+			INP_INFO_WUNLOCK(&tcbinfo);		\
+	} while(0)
+
+/*
  * pru_detach() detaches the TCP protocol from the socket.
  * If the protocol state is non-embryonic, then can't
  * do this directly: have to initiate a pru_disconnect(),
@@ -156,83 +215,26 @@
 static int
 tcp_usr_detach(struct socket *so)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	TCPDEBUG0;
 
-	INP_INFO_WLOCK(&tcbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&tcbinfo);
-		splx(s);
-		return EINVAL;	/* XXX */
-	}
-	INP_LOCK(inp);
-	tp = intotcpcb(inp);
-	TCPDEBUG1();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	tp = tcp_disconnect(tp);
-
-	TCPDEBUG2(PRU_DETACH);
-	if (tp)
-		INP_UNLOCK(inp);
-	INP_INFO_WUNLOCK(&tcbinfo);
-	splx(s);
+	COMMON_END(INI_WRITE, PRU_DETACH);
 	return error;
 }
 
-#define INI_NOLOCK	0
-#define INI_READ	1
-#define INI_WRITE	2
-
-#define	COMMON_START()						\
-	TCPDEBUG0;						\
-	do {							\
-		if (inirw == INI_READ)				\
-			INP_INFO_RLOCK(&tcbinfo);		\
-		else if (inirw == INI_WRITE)			\
-			INP_INFO_WLOCK(&tcbinfo);		\
-		inp = sotoinpcb(so);				\
-		if (inp == 0) {					\
-			if (inirw == INI_READ)			\
-				INP_INFO_RUNLOCK(&tcbinfo);	\
-			else if (inirw == INI_WRITE)		\
-				INP_INFO_WUNLOCK(&tcbinfo);	\
-			splx(s);				\
-			return EINVAL;				\
-		}						\
-		INP_LOCK(inp);					\
-		if (inirw == INI_READ)				\
-			INP_INFO_RUNLOCK(&tcbinfo);		\
-		tp = intotcpcb(inp);				\
-		TCPDEBUG1();					\
-} while(0)
-
-#define COMMON_END(req)						\
-out:	TCPDEBUG2(req);						\
-	do {							\
-		if (tp)						\
-			INP_UNLOCK(inp);			\
-		if (inirw == INI_WRITE)				\
-			INP_INFO_WUNLOCK(&tcbinfo);		\
-		splx(s);					\
-		return error;					\
-		goto out;					\
-} while(0)
-
 /*
  * Give the socket an address.
  */
 static int
 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
-	const int inirw = INI_WRITE;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
@@ -245,23 +247,20 @@
 	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	error = in_pcbbind(inp, nam, td->td_ucred);
-	if (error)
-		goto out;
-	COMMON_END(PRU_BIND);
+	COMMON_END(INI_WRITE, PRU_BIND);
+	return error;
 }
 
 #ifdef INET6
 static int
 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6p;
-	const int inirw = INI_WRITE;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
@@ -274,7 +273,7 @@
 	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -292,9 +291,9 @@
 		}
 	}
 	error = in6_pcbbind(inp, nam, td->td_ucred);
-	if (error)
-		goto out;
-	COMMON_END(PRU_BIND);
+out:
+	COMMON_END(INI_WRITE, PRU_BIND);
+	return error;
 }
 #endif /* INET6 */
 
@@ -304,31 +303,28 @@
 static int
 tcp_usr_listen(struct socket *so, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if (inp->inp_lport == 0)
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 	if (error == 0)
 		tp->t_state = TCPS_LISTEN;
-	COMMON_END(PRU_LISTEN);
+	COMMON_END(INI_WRITE, PRU_LISTEN);
+	return error;
 }
 
 #ifdef INET6
 static int
 tcp6_usr_listen(struct socket *so, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if (inp->inp_lport == 0) {
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
@@ -337,7 +333,8 @@
 	}
 	if (error == 0)
 		tp->t_state = TCPS_LISTEN;
-	COMMON_END(PRU_LISTEN);
+	COMMON_END(INI_WRITE, PRU_LISTEN);
+	return error;
 }
 #endif /* INET6 */
 
@@ -351,12 +348,10 @@
 static int
 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
-	const int inirw = INI_WRITE;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
@@ -370,23 +365,23 @@
 	if (td && jailed(td->td_ucred))
 		prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if ((error = tcp_connect(tp, nam, td)) != 0)
 		goto out;
 	error = tcp_output(tp);
-	COMMON_END(PRU_CONNECT);
+out:
+	COMMON_END(INI_WRITE, PRU_CONNECT);
+	return error;
 }
 
 #ifdef INET6
 static int
 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6p;
-	const int inirw = INI_WRITE;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
@@ -398,7 +393,7 @@
 	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
 		struct sockaddr_in sin;
 
@@ -421,7 +416,9 @@
 	if ((error = tcp6_connect(tp, nam, td)) != 0)
 		goto out;
 	error = tcp_output(tp);
-	COMMON_END(PRU_CONNECT);
+out:
+	COMMON_END(INI_WRITE, PRU_CONNECT);
+	return error;
 }
 #endif /* INET6 */
 
@@ -439,15 +436,14 @@
 static int
 tcp_usr_disconnect(struct socket *so)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	tp = tcp_disconnect(tp);
-	COMMON_END(PRU_DISCONNECT);
+	COMMON_END(INI_WRITE, PRU_DISCONNECT);
+	return error;
 }
 
 /*
@@ -458,7 +454,6 @@
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 {
-	int s;
 	int error = 0;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
@@ -466,36 +461,24 @@
 	in_port_t port = 0;
 	TCPDEBUG0;
 
+	/* Unlocked read. */
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
-		goto out;
+		goto out;	/* NB: ok 'cuz tp is NULL */
 	}
 
-	s = splnet();
-	INP_INFO_RLOCK(&tcbinfo);
-	inp = sotoinpcb(so);
-	if (!inp) {
-		INP_INFO_RUNLOCK(&tcbinfo);
-		splx(s);
-		return (EINVAL);
-	}
-	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&tcbinfo);
-	tp = intotcpcb(inp);
-	TCPDEBUG1();
+	COMMON_START0(INI_READ, SBI_NONE);
 
 	/* 
-	 * We inline in_setpeeraddr and COMMON_END here, so that we can
-	 * copy the data of interest and defer the malloc until after we
-	 * release the lock.
+	 * We inline in_setpeeraddr so that we can copy the
+	 * data of interest and defer the malloc until after
+	 * we release the lock.
 	 */
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 
-out:	TCPDEBUG2(PRU_ACCEPT);
-	if (tp)
-		INP_UNLOCK(inp);
-	splx(s);
+out:
+	COMMON_END(INI_READ, PRU_ACCEPT);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
@@ -505,7 +488,6 @@
 static int
 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
 {
-	int s;
 	struct inpcb *inp = NULL;
 	int error = 0;
 	struct tcpcb *tp = NULL;
@@ -515,27 +497,17 @@
 	int v4 = 0;
 	TCPDEBUG0;
 
+	/* Unlocked read. */
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
-		goto out;
+		goto out;		/* NB: ok 'cuz tp is NULL */
 	}
 
-	s = splnet();
-	INP_INFO_RLOCK(&tcbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&tcbinfo);
-		splx(s);
-		return (EINVAL);
-	}
-	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&tcbinfo);
-	tp = intotcpcb(inp);
-	TCPDEBUG1();
+	COMMON_START0(INI_READ, SBI_NONE);
 	/* 
-	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
-	 * copy the data of interest and defer the malloc until after we
-	 * release the lock.
+	 * We inline in6_mapped_peeraddr so that we can
+	 * copy the data of interest and defer the malloc
+	 * until after we release the lock.
 	 */
 	if (inp->inp_vflag & INP_IPV4) {
 		v4 = 1;
@@ -546,10 +518,8 @@
 		addr6 = inp->in6p_faddr;
 	}
 
-out:	TCPDEBUG2(PRU_ACCEPT);
-	if (tp)
-		INP_UNLOCK(inp);
-	splx(s);
+out:
+	COMMON_END(INI_READ, PRU_ACCEPT);
 	if (error == 0) {
 		if (v4)
 			*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -587,18 +557,17 @@
 static int
 tcp_usr_shutdown(struct socket *so)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	socantsendmore(so);
 	tp = tcp_usrclosed(tp);
 	if (tp)
 		error = tcp_output(tp);
-	COMMON_END(PRU_SHUTDOWN);
+	COMMON_END(INI_WRITE, PRU_SHUTDOWN);
+	return error;
 }
 
 /*
@@ -607,15 +576,14 @@
 static int
 tcp_usr_rcvd(struct socket *so, int flags)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_READ;
 
-	COMMON_START();
+	COMMON_START(INI_READ, SBI_NONE);
 	tcp_output(tp);
-	COMMON_END(PRU_RCVD);
+	COMMON_END(INI_READ, PRU_RCVD);
+	return error;
 }
 
 /*
@@ -629,11 +597,9 @@
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 
 	     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 #ifdef INET6
 	int isipv6;
 #endif
@@ -650,7 +616,7 @@
 	if (inp == NULL) {
 		/*
 		 * OOPS! we lost a race, the TCP session got reset after
-		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
+		 * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a
 		 * network interrupt in the non-splnet() section of sosend().
 		 */
 		if (m)
@@ -748,13 +714,16 @@
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
+		/* Unlocked read of sb_cc. */
 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
 		tp->t_force = 1;
 		error = tcp_output(tp);
 		tp->t_force = 0;
 	}
-	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 
+out:
+	COMMON_END(INI_WRITE, (flags & PRUS_OOB) ? PRU_SENDOOB : 
 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+	return error;
 }
 
 /*
@@ -763,15 +732,14 @@
 static int
 tcp_usr_abort(struct socket *so)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
 
-	COMMON_START();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	tp = tcp_drop(tp, ECONNABORTED);
-	COMMON_END(PRU_ABORT);
+	COMMON_END(INI_WRITE, PRU_ABORT);
+	return error;
 }
 
 /*
@@ -780,15 +748,14 @@
 static int
 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
-	int s = splnet();
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
-	const int inirw = INI_READ;
 
-	COMMON_START();
+	COMMON_START(INI_READ, SBI_NONE);
+	/* Unlocked read. */
 	if ((so->so_oobmark == 0 &&
-	     (so->so_state & SS_RCVATMARK) == 0) ||
+	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    tp->t_oobflags & TCPOOB_HADDATA) {
 		error = EINVAL;
@@ -802,7 +769,9 @@
 	*mtod(m, caddr_t) = tp->t_iobc;
 	if ((flags & MSG_PEEK) == 0)
 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
-	COMMON_END(PRU_RCVOOB);
+out:
+	COMMON_END(INI_READ, PRU_RCVOOB);
+	return error;
 }
 
 /* xxx - should be const */
@@ -1021,17 +990,15 @@
 	struct socket *so;
 	struct sockopt *sopt;
 {
-	int	error, opt, optval, s;
+	int	error, opt, optval;
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
 
 	error = 0;
-	s = splnet();		/* XXX */
 	INP_INFO_RLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
 	if (inp == NULL) {
 		INP_INFO_RUNLOCK(&tcbinfo);
-		splx(s);
 		return (ECONNRESET);
 	}
 	INP_LOCK(inp);
@@ -1044,7 +1011,6 @@
 #endif /* INET6 */
 		error = ip_ctloutput(so, sopt);
 		INP_UNLOCK(inp);
-		splx(s);
 		return (error);
 	}
 	tp = intotcpcb(inp);
@@ -1151,7 +1117,6 @@
 		break;
 	}
 	INP_UNLOCK(inp);
-	splx(s);
 	return (error);
 }
 
@@ -1202,16 +1167,28 @@
 	inp->inp_vflag |= INP_IPV4;
 	tp = tcp_newtcpcb(inp);
 	if (tp == 0) {
-		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
-
-		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
+		int nofd;
+		/*
+		 * XXXRW: This is a potentially racy scenario: we perform
+		 * a read-update-write on so_state but don't hold a lock,
+		 * not to mention calling out to external code that may
+		 * grab locks.  This section requires attention.
+		 */
+		SOCK_LOCK(so);
+		nofd = so->so_state & SS_NOFDREF;
+		/* don't free the socket yet */
+		if (nofd)
+			so->so_state &= ~SS_NOFDREF;
+		SOCK_UNLOCK(so);
 #ifdef INET6
 		if (isipv6)
 			in6_pcbdetach(inp);
 		else
 #endif
 		in_pcbdetach(inp);
+		SOCK_LOCK(so);
 		so->so_state |= nofd;
+		SOCK_UNLOCK(so);
 		return (ENOBUFS);
 	}
 	tp->t_state = TCPS_CLOSED;
@@ -1238,7 +1215,9 @@
 		tp = tcp_drop(tp, 0);
 	else {
 		soisdisconnecting(so);
+		SOCKBUF_LOCK(&so->so_rcv);
 		sbflush(&so->so_rcv);
+		SOCKBUF_UNLOCK(&so->so_rcv);
 		tp = tcp_usrclosed(tp);
 		if (tp)
 			(void) tcp_output(tp);
@@ -1291,4 +1270,3 @@
 	}
 	return (tp);
 }
-
--- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c	2004/05/04 02:15:32
+++ //depot/user/rwatson/netperf/sys/netinet/udp_usrreq.c	2004/05/30 18:22:04
@@ -899,24 +899,67 @@
 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
+/*
+ * Common code to setup and teardown locking.  Most
+ * code begins with a COMMON_START macro and finishes
+ * with COMMON_END.  You indicate whether the inpcb
+ * and enclosing head are to be locked read or write 
+ * and whether there is an existing sockbuf lock that
+ * needs to be re-ordered.
+ */
+#define INI_NOLOCK	0		/* no head lock */
+#define INI_READ	1		/* read head lock */
+#define INI_WRITE	2		/* write head lock */
+#define SBI_NONE	0		/* no sockbuf lock to reorder */
+#define SBI_SND		1		/* reorder so->so_snd lock */
+#define SBI_RCV		2		/* reorder so->so_rcv lock */
+
+#define	COMMON_START(_headrw, _sbrw) do {			\
+	if (_sbrw == SBI_SND)					\
+		SOCKBUF_UNLOCK(&so->so_snd);			\
+	else if (_sbrw == SBI_RCV)				\
+		SOCKBUF_UNLOCK(&so->so_rcv);			\
+	if (_headrw == INI_READ)				\
+		INP_INFO_RLOCK(&udbinfo);			\
+	else if (_headrw == INI_WRITE)				\
+		INP_INFO_WLOCK(&udbinfo);			\
+	inp = sotoinpcb(so);					\
+	if (inp == 0) {						\
+		if (_sbrw == SBI_SND)				\
+			SOCKBUF_LOCK(&so->so_snd);		\
+		else if (_sbrw == SBI_RCV)			\
+			SOCKBUF_LOCK(&so->so_rcv);		\
+		if (_headrw == INI_READ)			\
+			INP_INFO_RUNLOCK(&udbinfo);		\
+		else if (_headrw == INI_WRITE)			\
+			INP_INFO_WUNLOCK(&udbinfo);		\
+		return EINVAL;					\
+	}							\
+	INP_LOCK(inp);						\
+	if (_sbrw == SBI_SND)					\
+		SOCKBUF_LOCK(&so->so_snd);			\
+	else if (_sbrw == SBI_RCV)				\
+		SOCKBUF_LOCK(&so->so_rcv);			\
+	if (_headrw == INI_READ)				\
+		INP_INFO_RUNLOCK(&udbinfo);			\
+} while(0)
+
+#define COMMON_END(_headrw)					\
+	do {							\
+		INP_UNLOCK(inp);				\
+		if (_headrw == INI_WRITE)			\
+			INP_INFO_WUNLOCK(&udbinfo);		\
+	} while(0)
+
 static int
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
-	int s;
 
-	INP_INFO_WLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;	/* ??? possible? panic instead? */
-	}
-	INP_LOCK(inp);
+	COMMON_START(INI_WRITE, SBI_NONE);
 	soisdisconnected(so);
-	s = splnet();
 	in_pcbdetach(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	splx(s);
 	return 0;
 }
 
@@ -958,20 +1001,11 @@
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
-	int s, error;
+	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
-	s = splnet();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	error = in_pcbbind(inp, nam, td->td_ucred);
-	splx(s);
-	INP_UNLOCK(inp);
-	INP_INFO_WUNLOCK(&udbinfo);
+	COMMON_END(INI_WRITE);
 	return error;
 }
 
@@ -979,31 +1013,22 @@
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
-	int s, error;
+	int error;
 	struct sockaddr_in *sin;
 
-	INP_INFO_WLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
-		INP_UNLOCK(inp);
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EISCONN;
+		error = EISCONN;
+		goto out;
 	}
-	s = splnet();
 	sin = (struct sockaddr_in *)nam;
 	if (td && jailed(td->td_ucred))
 		prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
 	error = in_pcbconnect(inp, nam, td->td_ucred);
-	splx(s);
 	if (error == 0)
 		soisconnected(so);
-	INP_UNLOCK(inp);
-	INP_INFO_WUNLOCK(&udbinfo);
+out:
+	COMMON_END(INI_WRITE);
 	return error;
 }
 
@@ -1011,49 +1036,31 @@
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
-	int s;
 
-	INP_INFO_WLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
-	s = splnet();
+	COMMON_START(INI_WRITE, SBI_NONE);
 	in_pcbdetach(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	splx(s);
 	return 0;
 }
 
 static int
 udp_disconnect(struct socket *so)
 {
+	int error = 0;
 	struct inpcb *inp;
-	int s;
 
-	INP_INFO_WLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
+	COMMON_START(INI_WRITE, SBI_NONE);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		INP_UNLOCK(inp);
-		return ENOTCONN;
+		error = ENOTCONN;
+		goto out;
 	}
 
-	s = splnet();
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
-	INP_UNLOCK(inp);
-	INP_INFO_WUNLOCK(&udbinfo);
-	splx(s);
+	COMMON_END(INI_WRITE);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
-	return 0;
+out:
+	return error;
 }
 
 static int
@@ -1061,20 +1068,21 @@
 	    struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
-	int ret;
+	int error;
 
 	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
 	if (inp == 0) {
+		SOCKBUF_LOCK(&so->so_snd);
 		INP_INFO_WUNLOCK(&udbinfo);
 		m_freem(m);
 		return EINVAL;
 	}
 	INP_LOCK(inp);
-	ret = udp_output(inp, m, addr, control, td);
+	error = udp_output(inp, m, addr, control, td);
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return ret; 
+	return error; 
 }
 
 int
@@ -1082,16 +1090,9 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&udbinfo);
-	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&udbinfo);
+	COMMON_START(INI_READ, SBI_NONE);
 	socantsendmore(so);
-	INP_UNLOCK(inp);
+	COMMON_END(INI_READ);
 	return 0;
 }
 
--- //depot/vendor/freebsd/src/sys/netinet6/in6_gif.c	2003/10/29 15:10:52
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_gif.c	2004/03/09 23:48:47
@@ -82,6 +82,11 @@
   &rip6_usrreqs
 };
 
+/*
+ * XXXRW: in6_gif per-softc locking required.  Need to lock both the
+ * members, and also prevent the softc from disappearing during use
+ * including the route).
+ */
 int
 in6_gif_output(ifp, family, m)
 	struct ifnet *ifp;
@@ -379,6 +384,10 @@
 in6_gif_attach(sc)
 	struct gif_softc *sc;
 {
+
+	/*
+	 * XXXRW: Technically, encap_attach() can return NULL due to ENOMEM?
+	 */
 	sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
 	    (struct protosw *)&in6_gif_protosw, sc);
 	if (sc->encap_cookie6 == NULL)
--- //depot/vendor/freebsd/src/sys/netinet6/in6_ifattach.c	2004/02/26 03:55:40
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_ifattach.c	2004/03/07 18:23:16
@@ -226,8 +226,8 @@
 	struct sockaddr_dl *sdl;
 	u_int8_t *addr;
 	size_t addrlen;
-	static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-	static u_int8_t allone[8] =
+	static const u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+	static const u_int8_t allone[8] =
 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 	for (ifa = ifp->if_addrlist.tqh_first;
--- //depot/vendor/freebsd/src/sys/netinet6/in6_pcb.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_pcb.c	2004/04/23 01:08:25
@@ -419,6 +419,7 @@
 #ifdef IPSEC
 	ipsec_pcbdisconn(inp->inp_sp);
 #endif
+	/* Unlocked read. */
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in6_pcbdetach(inp);
 }
@@ -438,6 +439,7 @@
 	in_pcbremlists(inp);
 
 	if (so) {
+		SOCK_LOCK(so);
 		so->so_pcb = NULL;
 		sotryfree(so);
 	}
--- //depot/vendor/freebsd/src/sys/netinet6/in6_prefix.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_prefix.c	2004/04/08 03:11:34
@@ -96,6 +96,8 @@
 static int link_stray_ia6s __P((struct rr_prefix *rpp));
 static void	rp_remove __P((struct rr_prefix *rpp));
 
+static int	delete_each_prefix __P((struct rr_prefix *rpp, u_char origin));
+
 /*
  * Copy bits from src to tgt, from off bit for len bits.
  * Caller must specify collect tgtsize and srcsize.
@@ -951,7 +953,7 @@
 	}
 }
 
-int
+static int
 delete_each_prefix(struct rr_prefix *rpp, u_char origin)
 {
 	int error = 0;
--- //depot/vendor/freebsd/src/sys/netinet6/in6_prefix.h	2001/07/17 07:20:42
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_prefix.h	2004/03/07 19:31:09
@@ -88,4 +88,3 @@
 
 void in6_rr_timer __P((void *));
 extern struct callout in6_rr_timer_ch;
-int delete_each_prefix  __P((struct rr_prefix *rpp, u_char origin));
--- //depot/vendor/freebsd/src/sys/netinet6/in6_proto.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_proto.c	2004/04/08 03:11:34
@@ -67,6 +67,8 @@
 #include "opt_random_ip_id.h"
 
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
--- //depot/vendor/freebsd/src/sys/netinet6/in6_rmx.c	2003/11/20 20:10:44
+++ //depot/user/rwatson/netperf/sys/netinet6/in6_rmx.c	2004/02/28 22:29:37
@@ -79,6 +79,8 @@
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
--- //depot/vendor/freebsd/src/sys/netinet6/ip6_output.c	2004/05/14 04:01:38
+++ //depot/user/rwatson/netperf/sys/netinet6/ip6_output.c	2004/05/23 16:56:02
@@ -2068,6 +2068,7 @@
 
 				bzero(&sro, sizeof(sro));
 
+				/* Unlocked read. */
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
--- //depot/vendor/freebsd/src/sys/netinet6/nd6.c	2004/04/26 20:35:32
+++ //depot/user/rwatson/netperf/sys/netinet6/nd6.c	2004/05/04 02:32:28
@@ -98,6 +98,9 @@
 /* for debugging? */
 static int nd6_inuse, nd6_allocated;
 
+/*
+ * XXXRW: What follows requires locking.
+ */
 struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
--- //depot/vendor/freebsd/src/sys/netinet6/raw_ip6.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet6/raw_ip6.c	2004/04/23 01:08:25
@@ -603,6 +603,7 @@
 {
 	struct inpcb *inp = sotoinpcb(so);
 
+	/* Unlocked read. */
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
 	inp->in6p_faddr = in6addr_any;
@@ -691,6 +692,7 @@
 	struct sockaddr_in6 *dst;
 
 	/* always copy sockaddr to avoid overwrites */
+	/* Unlocked read. */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
--- //depot/vendor/freebsd/src/sys/netinet6/udp6_usrreq.c	2004/04/07 20:52:05
+++ //depot/user/rwatson/netperf/sys/netinet6/udp6_usrreq.c	2004/04/23 01:08:25
@@ -671,6 +671,7 @@
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
 	splx(s);
+	/* XXXRW: so_state locking? */
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	return 0;
 }
--- //depot/vendor/freebsd/src/sys/netipx/ipx.c	2003/06/11 05:25:41
+++ //depot/user/rwatson/netperf/sys/netipx/ipx.c	2004/03/18 14:20:12
@@ -50,6 +50,9 @@
 #include <netipx/ipx_if.h>
 #include <netipx/ipx_var.h>
 
+/*
+ * XXXRW: Requires synchronization.
+ */
 struct ipx_ifaddr *ipx_ifaddr;
 
 static	void ipx_ifscrub(struct ifnet *ifp, struct ipx_ifaddr *ia);
--- //depot/vendor/freebsd/src/sys/netipx/ipx_input.c	2003/11/08 22:30:39
+++ //depot/user/rwatson/netperf/sys/netipx/ipx_input.c	2004/03/19 10:34:30
@@ -72,23 +72,39 @@
 SYSCTL_INT(_net_ipx, OID_AUTO, ipxnetbios, CTLFLAG_RW,
 	   &ipxnetbios, 0, "");
 
-union	ipx_net ipx_zeronet;
-union	ipx_host ipx_zerohost;
+const union	ipx_net ipx_zeronet;
+const union	ipx_host ipx_zerohost;
 
-union	ipx_net	ipx_broadnet;
-union	ipx_host ipx_broadhost;
+const union	ipx_net	ipx_broadnet = { .s_net[0] = 0xffff,
+					    .s_net[1] = 0xffff };
+const union	ipx_host ipx_broadhost = { .s_host[0] = 0xffff,
+					    .s_host[1] = 0xffff,
+					    .s_host[2] = 0xffff };
 
+/*
+ * XXXRW: Locking needed here.
+ */
 struct	ipxstat ipxstat;
+
+/*
+ * XXXRW: These should/could also be const, since they're set only at
+ * init time.
+ */
 struct	sockaddr_ipx ipx_netmask, ipx_hostmask;
 
-static	u_short allones[] = {-1, -1, -1};
+/*
+ * XXXRW: Locking needed here.
+ */
+u_short			ipxpcb_lport_cache;
+struct ipxpcbhead	ipxpcb_list;
+struct ipxpcbhead	ipxrawpcb_list;
 
-struct	ipxpcb ipxpcb;
-struct	ipxpcb ipxrawpcb;
-
 static int ipxqmaxlen = IFQ_MAXLEN;
 static	struct ifqueue ipxintrq;
 
+/*
+ * XXXRW: Locking needed here.
+ */
 long	ipx_pexseq;
 
 static	int ipx_do_route(struct ipx_addr *src, struct route *ro);
@@ -103,13 +119,14 @@
 void
 ipx_init()
 {
-	ipx_broadnet = *(union ipx_net *)allones;
-	ipx_broadhost = *(union ipx_host *)allones;
 
 	read_random(&ipx_pexseq, sizeof ipx_pexseq);
-	ipxpcb.ipxp_next = ipxpcb.ipxp_prev = &ipxpcb;
-	ipxrawpcb.ipxp_next = ipxrawpcb.ipxp_prev = &ipxrawpcb;
-
+	LIST_INIT(&ipxpcb_list);
+	LIST_INIT(&ipxrawpcb_list);
+	
+	/*
+	 * XXXRW: These should be const?
+	 */
 	ipx_netmask.sipx_len = 6;
 	ipx_netmask.sipx_addr.x_net = ipx_broadnet;
 
@@ -133,6 +150,9 @@
 	struct ipx_ifaddr *ia;
 	int len;
 
+	/*
+	 * XXXRW: Would be nice to remove this.
+	 */
 	GIANT_REQUIRED;
 
 	/*
@@ -153,8 +173,7 @@
 	/*
 	 * Give any raw listeners a crack at the packet
 	 */
-	for (ipxp = ipxrawpcb.ipxp_next; ipxp != &ipxrawpcb;
-	     ipxp = ipxp->ipxp_next) {
+	LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) {
 		struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL);
 		if (m1 != NULL)
 			ipx_input(m1, ipxp);
@@ -467,8 +486,7 @@
 	/*
 	 * Give any raw listeners a crack at the packet
 	 */
-	for (ipxp = ipxrawpcb.ipxp_next; ipxp != &ipxrawpcb;
-	     ipxp = ipxp->ipxp_next) {
+	LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) {
 		struct mbuf *m0 = m_copy(m, 0, (int)M_COPYALL);
 		if (m0 != NULL) {
 			register struct ipx *ipx;
--- //depot/vendor/freebsd/src/sys/netipx/ipx_pcb.c	2004/03/01 03:15:33
+++ //depot/user/rwatson/netperf/sys/netipx/ipx_pcb.c	2004/03/19 10:34:30
@@ -56,7 +56,7 @@
 int
 ipx_pcballoc(so, head, td)
 	struct socket *so;
-	struct ipxpcb *head;
+	struct ipxpcbhead *head;
 	struct thread *td;
 {
 	register struct ipxpcb *ipxp;
@@ -107,13 +107,16 @@
 	}
 	ipxp->ipxp_laddr = sipx->sipx_addr;
 noname:
+	/*
+	 * XXXRW: I wonder what causes this loop to terminate...
+	 */
 	if (lport == 0)
 		do {
-			ipxpcb.ipxp_lport++;
-			if ((ipxpcb.ipxp_lport < IPXPORT_RESERVED) ||
-			    (ipxpcb.ipxp_lport >= IPXPORT_WELLKNOWN))
-				ipxpcb.ipxp_lport = IPXPORT_RESERVED;
-			lport = htons(ipxpcb.ipxp_lport);
+			ipxpcb_lport_cache++;
+			if ((ipxpcb_lport_cache < IPXPORT_RESERVED) ||
+			    (ipxpcb_lport_cache >= IPXPORT_WELLKNOWN))
+				ipxpcb_lport_cache = IPXPORT_RESERVED;
+			lport = htons(ipxpcb_lport_cache);
 		} while (ipx_pcblookup(&zeroipx_addr, lport, 0));
 	ipxp->ipxp_lport = lport;
 	return (0);
@@ -268,6 +271,7 @@
 {
 	struct socket *so = ipxp->ipxp_socket;
 
+	SOCK_LOCK(so);
 	so->so_pcb = 0;
 	sotryfree(so);
 	if (ipxp->ipxp_route.ro_rt != NULL)
@@ -323,18 +327,27 @@
 	register struct ipxpcb *ipxp, *oinp;
 	int s = splimp();
 
-	for (ipxp = (&ipxpcb)->ipxp_next; ipxp != (&ipxpcb);) {
+	for (ipxp = LIST_FIRST(&ipxpcb_list); ipxp != NULL;) {
 		if (!ipx_hosteq(*dst,ipxp->ipxp_faddr)) {
-	next:
-			ipxp = ipxp->ipxp_next;
+next:
+			ipxp = LIST_NEXT(ipxp, ipxp_list);
+		}
+		if (ipxp->ipxp_socket == 0) {
+			goto next;
 			continue;
 		}
-		if (ipxp->ipxp_socket == 0)
-			goto next;
 		if (errno) 
 			ipxp->ipxp_socket->so_error = errno;
+		/*
+		 * XXXRW: I can't find any consumers of this interface, and
+		 * so don't know if calling the notify function could result
+		 * in the ipxp list pointers changing.  Before moving this to
+		 * the queue(9) macros, there was some fancy footwork here
+		 * that didn't seem to be useful.  If the list can be changed
+		 * by a notification, it will make locking very difficult.
+		 */
 		oinp = ipxp;
-		ipxp = ipxp->ipxp_next;
+		ipxp = LIST_NEXT(ipxp, ipxp_list);
 		oinp->ipxp_notify_param = param;
 		(*notify)(oinp);
 	}
@@ -372,7 +385,7 @@
 	u_short fport;
 
 	fport = faddr->x_port;
-	for (ipxp = (&ipxpcb)->ipxp_next; ipxp != (&ipxpcb); ipxp = ipxp->ipxp_next) {
+	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
 		if (ipxp->ipxp_lport != lport)
 			continue;
 		wildcard = 0;
--- //depot/vendor/freebsd/src/sys/netipx/ipx_pcb.h	2003/01/01 18:50:52
+++ //depot/user/rwatson/netperf/sys/netipx/ipx_pcb.h	2004/03/19 10:34:30
@@ -43,9 +43,7 @@
  * IPX protocol interface control block.
  */
 struct ipxpcb {
-	struct	ipxpcb *ipxp_next;	/* doubly linked list */
-	struct	ipxpcb *ipxp_prev;
-	struct	ipxpcb *ipxp_head;
+	LIST_ENTRY(ipxpcb)	ipxp_list;	/* list of ipxpcbs */
 	struct	socket *ipxp_socket;	/* back pointer to socket */
 	struct	ipx_addr ipxp_faddr;	/* destination address */
 	struct	ipx_addr ipxp_laddr;	/* socket's address */
@@ -58,6 +56,11 @@
 	u_char	ipxp_rpt;		/* last received packet type by ipx_input() */
 };
 
+LIST_HEAD(ipxpcbhead, ipxpcb);
+extern struct ipxpcbhead ipxpcb_list;
+extern struct ipxpcbhead ipxrawpcb_list;
+extern u_short ipxpcb_lport_cache;
+
 /* possible flags */
 
 #define IPXP_IN_ABORT		0x1	/* calling abort through socket */
@@ -82,7 +85,7 @@
 #ifdef _KERNEL
 extern struct ipxpcb ipxpcb;			/* head of list */
 
-int	ipx_pcballoc(struct socket *so, struct ipxpcb *head,
+int	ipx_pcballoc(struct socket *so, struct ipxpcbhead *head,
 			  struct thread *p);
 int	ipx_pcbbind(struct ipxpcb *ipxp, struct sockaddr *nam,
 			 struct thread *p);
--- //depot/vendor/freebsd/src/sys/netipx/ipx_usrreq.c	2003/11/18 00:40:43
+++ //depot/user/rwatson/netperf/sys/netipx/ipx_usrreq.c	2004/05/29 04:16:19
@@ -423,8 +423,8 @@
 	s = splnet();
 	ipx_pcbdetach(ipxp);
 	splx(s);
+	soisdisconnected(so);
 	sotryfree(so);
-	soisdisconnected(so);
 	return (0);
 }
 
@@ -441,7 +441,7 @@
 	if (ipxp != NULL)
 		return (EINVAL);
 	s = splnet();
-	error = ipx_pcballoc(so, &ipxpcb, td);
+	error = ipx_pcballoc(so, &ipxpcb_list, td);
 	splx(s);
 	if (error == 0)
 		error = soreserve(so, ipxsendspace, ipxrecvspace);
@@ -602,7 +602,7 @@
 	if (td != NULL && (error = suser(td)) != 0)
 		return (error);
 	s = splnet();
-	error = ipx_pcballoc(so, &ipxrawpcb, td);
+	error = ipx_pcballoc(so, &ipxrawpcb_list, td);
 	splx(s);
 	if (error)
 		return (error);
--- //depot/vendor/freebsd/src/sys/netipx/ipx_var.h	2003/03/04 23:20:46
+++ //depot/user/rwatson/netperf/sys/netipx/ipx_var.h	2004/03/19 10:34:30
@@ -66,19 +66,19 @@
 extern int ipxcksum;
 extern long ipx_pexseq;
 extern struct ipxstat ipxstat;
-extern struct ipxpcb ipxrawpcb;
 extern struct pr_usrreqs ipx_usrreqs;
 extern struct pr_usrreqs ripx_usrreqs;
 extern struct sockaddr_ipx ipx_netmask;
 extern struct sockaddr_ipx ipx_hostmask;
 
-extern union ipx_net ipx_zeronet;
-extern union ipx_host ipx_zerohost;
-extern union ipx_net ipx_broadnet;
-extern union ipx_host ipx_broadhost;
+extern const union ipx_net ipx_zeronet;
+extern const union ipx_host ipx_zerohost;
+extern const union ipx_net ipx_broadnet;
+extern const union ipx_host ipx_broadhost;
 
 struct ifnet;
 struct ipx_addr;
+struct ipxpcb;
 struct mbuf;
 struct thread;
 struct route;
--- //depot/vendor/freebsd/src/sys/netipx/spx_usrreq.c	2004/03/01 03:15:33
+++ //depot/user/rwatson/netperf/sys/netipx/spx_usrreq.c	2004/06/02 04:24:11
@@ -77,7 +77,7 @@
 #define spxstat spx_istat.newstats
 #endif  
 
-static int spx_backoff[SPX_MAXRXTSHIFT+1] =
+static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
 
 static	struct spxpcb *spx_close(struct spxpcb *cb);
@@ -314,8 +314,19 @@
 	return;
 
 dropwithreset:
-	if (dropsocket)
+	if (dropsocket) {
+		struct socket *head;
+		ACCEPT_LOCK();
+		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
+		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
+		head = so->so_head;
+		TAILQ_REMOVE(&head->so_incomp, so, so_list);
+		head->so_incqlen--;
+		so->so_qstate &= ~SQ_INCOMP;
+		so->so_head = NULL;
+		ACCEPT_UNLOCK();
 		soabort(so);
+	}
 	si->si_seq = ntohs(si->si_seq);
 	si->si_ack = ntohs(si->si_ack);
 	si->si_alo = ntohs(si->si_alo);
@@ -564,10 +575,14 @@
 			m = dtom(q);
 			if (SI(q)->si_cc & SPX_OB) {
 				cb->s_oobflags &= ~SF_IOOB;
+				/* Unlocked read. */
 				if (so->so_rcv.sb_cc)
 					so->so_oobmark = so->so_rcv.sb_cc;
-				else
-					so->so_state |= SS_RCVATMARK;
+				else {
+					SOCKBUF_LOCK(&so->so_rcv);
+					so->so_rcv.sb_state |= SBS_RCVATMARK;
+					SOCKBUF_UNLOCK(&so->so_rcv);
+				}
 			}
 			q = q->si_prev;
 			remque(q->si_next);
@@ -597,7 +612,9 @@
 					MCHTYPE(m, MT_OOBDATA);
 					spx_newchecks[1]++;
 					so->so_oobmark = 0;
-					so->so_state &= ~SS_RCVATMARK;
+					SOCKBUF_LOCK(&so->so_rcv);
+					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
+					SOCKBUF_UNLOCK(&so->so_rcv);
 				}
 				if (packetp == 0) {
 					m->m_data += SPINC;
@@ -1325,7 +1342,7 @@
 	if (ipxp != NULL)
 		return (EISCONN);
 	s = splnet();
-	error = ipx_pcballoc(so, &ipxpcb, td);
+	error = ipx_pcballoc(so, &ipxpcb_list, td);
 	if (error)
 		goto spx_attach_end;
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -1536,8 +1553,9 @@
 	ipxp = sotoipxpcb(so);
 	cb = ipxtospxpcb(ipxp);
 
+	/* XXXRW: sb_state locking? */
 	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
-	    (so->so_state & SS_RCVATMARK)) {
+	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
 		m->m_len = 1;
 		*mtod(m, caddr_t) = cb->s_iobc;
 		return (0);
@@ -1746,9 +1764,7 @@
 	register struct spxpcb *cb;
 	int s = splnet();
 
-	ipxp = ipxpcb.ipxp_next;
-	if (ipxp != NULL)
-	for (; ipxp != &ipxpcb; ipxp = ipxp->ipxp_next)
+	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
 		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
 		    (cb->s_flags & SF_DELACK)) {
 			cb->s_flags &= ~SF_DELACK;
@@ -1756,6 +1772,7 @@
 			spxstat.spxs_delack++;
 			spx_output(cb, (struct mbuf *)NULL);
 		}
+	}
 	splx(s);
 }
 
@@ -1774,15 +1791,15 @@
 
 	/*
 	 * Search through tcb's and update active timers.
+	 *
+	 * XXXRW: spx_timers() may remove an ipxpcb entry, so we have to be
+	 * ready to continue despite that.  The logic here is a bit
+	 * obfuscated.
 	 */
-	ip = ipxpcb.ipxp_next;
-	if (ip == NULL) {
-		splx(s);
-		return;
-	}
-	while (ip != &ipxpcb) {
+	ip = LIST_FIRST(&ipxpcb_list);
+	while (ip != NULL) {
+		ipnxt = LIST_NEXT(ip, ipxp_list);
 		cb = ipxtospxpcb(ip);
-		ipnxt = ip->ipxp_next;
 		if (cb == NULL)
 			goto tpgone;
 		for (i = 0; i < SPXT_NTIMERS; i++) {
@@ -1796,7 +1813,7 @@
 		if (cb->s_rtt)
 			cb->s_rtt++;
 tpgone:
-		ip = ipnxt;
+		ip = LIST_NEXT(ip, ipxp_list);
 	}
 	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
 	splx(s);
--- //depot/vendor/freebsd/src/sys/netnatm/natm.c	2004/03/01 03:15:33
+++ //depot/user/rwatson/netperf/sys/netnatm/natm.c	2004/04/03 04:09:09
@@ -61,11 +61,11 @@
 
 #include <netnatm/natm.h>
 
-static u_long natm5_sendspace = 16*1024;
-static u_long natm5_recvspace = 16*1024;
+static const u_long natm5_sendspace = 16*1024;
+static const u_long natm5_recvspace = 16*1024;
 
-static u_long natm0_sendspace = 16*1024;
-static u_long natm0_recvspace = 16*1024;
+static const u_long natm0_sendspace = 16*1024;
+static const u_long natm0_recvspace = 16*1024;
 
 /*
  * user requests
@@ -135,6 +135,7 @@
      * we turn on 'drain' *before* we sofree.
      */
     npcb_free(npcb, NPCB_DESTROY);	/* drain */
+    SOCK_LOCK(so);
     so->so_pcb = NULL;
     sotryfree(so);
  out:
@@ -463,6 +464,7 @@
        */
 
       npcb_free(npcb, NPCB_DESTROY);	/* drain */
+      SOCK_LOCK(so);
       so->so_pcb = NULL;
       sotryfree(so);
 
--- //depot/vendor/freebsd/src/sys/netsmb/smb_trantcp.c	2004/03/01 03:15:33
+++ //depot/user/rwatson/netperf/sys/netsmb/smb_trantcp.c	2004/05/31 05:39:37
@@ -184,6 +184,9 @@
 	return 0;
 }
 
+/*
+ * XXXRW: nb_upcall() is called without Giant, which is probably safeish.
+ */
 static void
 nb_upcall(struct socket *so, void *arg, int waitflag)
 {
@@ -238,7 +241,9 @@
 	nbp->nbp_tso = so;
 	so->so_upcallarg = (caddr_t)nbp;
 	so->so_upcall = nb_upcall;
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	so->so_rcv.sb_timeo = (5 * hz);
 	so->so_snd.sb_timeo = (5 * hz);
 	error = soreserve(so, nbp->nbp_sndbuf, nbp->nbp_rcvbuf);
@@ -246,8 +251,12 @@
 		goto bad;
 	nb_setsockopt_int(so, SOL_SOCKET, SO_KEEPALIVE, 1);
 	nb_setsockopt_int(so, IPPROTO_TCP, TCP_NODELAY, 1);
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags &= ~SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags &= ~SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_snd);
 	error = soconnect(so, (struct sockaddr*)to, td);
 	if (error)
 		goto bad;
@@ -414,8 +423,8 @@
 		 * If we don't have one waiting, return.
 		 */
 		error = nbssn_recvhdr(nbp, &len, &rpcode, MSG_DONTWAIT, td);
-		if (so->so_state &
-		    (SS_ISDISCONNECTING | SS_ISDISCONNECTED | SS_CANTRCVMORE)) {
+		if (so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED) ||
+		    so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			nbp->nbp_state = NBST_CLOSED;
 			NBDEBUG("session closed by peer\n");
 			return ECONNRESET;
--- //depot/vendor/freebsd/src/sys/nfsclient/bootp_subr.c	2004/03/12 20:39:01
+++ //depot/user/rwatson/netperf/sys/nfsclient/bootp_subr.c	2004/03/13 05:34:00
@@ -591,7 +591,7 @@
 	int retry;
 	const char *s;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	/*
 	 * Create socket and set its recieve timeout.
@@ -983,7 +983,7 @@
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	error = socreate(AF_INET, &ifctx->so, SOCK_DGRAM, 0, td->td_ucred, td);
 	if (error != 0)
--- //depot/vendor/freebsd/src/sys/nfsclient/krpc_subr.c	2003/11/15 00:30:34
+++ //depot/user/rwatson/netperf/sys/nfsclient/krpc_subr.c	2004/02/28 22:29:37
@@ -215,8 +215,6 @@
 	nam = mhead = NULL;
 	from = NULL;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
-
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
--- //depot/vendor/freebsd/src/sys/nfsclient/nfs_socket.c	2004/04/07 05:01:20
+++ //depot/user/rwatson/netperf/sys/nfsclient/nfs_socket.c	2004/06/01 04:11:50
@@ -156,12 +156,12 @@
 nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
 {
 	struct socket *so;
-	int s, error, rcvreserve, sndreserve;
+	int error, rcvreserve, sndreserve;
 	int pktscale;
 	struct sockaddr *saddr;
 	struct thread *td = &thread0; /* only used for socreate and sobind */
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	nmp->nm_so = NULL;
 	saddr = nmp->nm_nam;
@@ -241,25 +241,25 @@
 		 * connect system call but with the wait timing out so
 		 * that interruptible mounts don't hang here for a long time.
 		 */
-		s = splnet();
+		SOCK_LOCK(so);
 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-			(void) tsleep(&so->so_timeo,
+			(void) msleep(&so->so_timeo, SOCK_MTX(so),
 			    PSOCK, "nfscon", 2 * hz);
 			if ((so->so_state & SS_ISCONNECTING) &&
 			    so->so_error == 0 && rep &&
 			    (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
 				so->so_state &= ~SS_ISCONNECTING;
-				splx(s);
+				SOCK_UNLOCK(so);
 				goto bad;
 			}
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
+			SOCK_UNLOCK(so);
 			goto bad;
 		}
-		splx(s);
+		SOCK_UNLOCK(so);
 	}
 	so->so_rcv.sb_timeo = 5 * hz;
 	so->so_snd.sb_timeo = 5 * hz;
@@ -319,8 +319,12 @@
 	error = soreserve(so, sndreserve, rcvreserve);
 	if (error)
 		goto bad;
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags |= SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags |= SB_NOINTR;
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 	/* Initialize other non-zero congestion variables */
 	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
@@ -379,7 +383,7 @@
 {
 	struct socket *so;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	if (nmp->nm_so) {
 		so = nmp->nm_so;
@@ -415,7 +419,7 @@
 	struct sockaddr *sendnam;
 	int error, soflags, flags;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	KASSERT(rep, ("nfs_send: called with rep == NULL"));
 
@@ -498,7 +502,7 @@
 	int error, sotype, rcvflg;
 	struct thread *td = curthread;	/* XXX */
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	/*
 	 * Set up arguments for soreceive()
@@ -1147,6 +1151,7 @@
 		 * Set r_rtt to -1 in case we fail to send it now.
 		 */
 		rep->r_rtt = -1;
+		/* XXXRW: Unlocked reads. */
 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 		    (rep->r_flags & R_SENT) ||
--- //depot/vendor/freebsd/src/sys/nfsclient/nfs_vfsops.c	2004/04/07 05:01:20
+++ //depot/user/rwatson/netperf/sys/nfsclient/nfs_vfsops.c	2004/04/08 03:11:34
@@ -384,7 +384,7 @@
 	u_long l;
 	char buf[128];
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
--- //depot/vendor/freebsd/src/sys/nfsserver/nfs_syscalls.c	2004/05/31 16:35:36
+++ //depot/user/rwatson/netperf/sys/nfsserver/nfs_syscalls.c	2004/05/31 22:13:06
@@ -263,10 +263,14 @@
 		val = 1;
 		sosetopt(so, &sopt);
 	}
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags &= ~SB_NOINTR;
 	so->so_rcv.sb_timeo = 0;
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_flags &= ~SB_NOINTR;
 	so->so_snd.sb_timeo = 0;
+	SOCKBUF_UNLOCK(&so->so_snd);
 
 	slp = (struct nfssvc_sock *)
 		malloc(sizeof (struct nfssvc_sock), M_NFSSVC,
@@ -285,7 +289,9 @@
 	s = splnet();
 	so->so_upcallarg = (caddr_t)slp;
 	so->so_upcall = nfsrv_rcv;
+	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_flags |= SB_UPCALL;
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
 	nfsrv_wakenfsd(slp);
 	splx(s);
@@ -601,7 +607,13 @@
 		NFSD_UNLOCK();
 		slp->ns_fp = NULL;
 		so = slp->ns_so;
+		/*
+		 * XXXRW: More general locking issues here relating to
+		 * upcalls.
+		 */
+		SOCKBUF_LOCK(&so->so_rcv);
 		so->so_rcv.sb_flags &= ~SB_UPCALL;
+		SOCKBUF_UNLOCK(&so->so_rcv);
 		so->so_upcall = NULL;
 		so->so_upcallarg = NULL;
 		soshutdown(so, SHUT_RDWR);
--- //depot/vendor/freebsd/src/sys/pci/if_xl.c	2004/05/30 20:01:55
+++ //depot/user/rwatson/netperf/sys/pci/if_xl.c	2004/05/31 01:41:33
@@ -1769,7 +1769,7 @@
 	ether_ifattach(ifp, eaddr);
 
 	/* Hook interrupt last to avoid having to lock softc */
-	error = bus_setup_intr(dev, sc->xl_irq, INTR_TYPE_NET,
+	error = bus_setup_intr(dev, sc->xl_irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    xl_intr, sc, &sc->xl_intrhand);
 	if (error) {
 		printf("xl%d: couldn't set up irq\n", unit);
--- //depot/vendor/freebsd/src/sys/pci/if_xlreg.h	2003/11/14 19:01:11
+++ //depot/user/rwatson/netperf/sys/pci/if_xlreg.h	2004/04/25 22:02:25
@@ -606,16 +606,10 @@
 	struct mtx		xl_mtx;
 };
 
-#if 0
 /* These are a bit premature.  The driver still tries to sleep with locks. */
 #define XL_LOCK(_sc)		mtx_lock(&(_sc)->xl_mtx)
 #define XL_UNLOCK(_sc)		mtx_unlock(&(_sc)->xl_mtx)
 #define XL_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->xl_mtx, MA_OWNED)
-#else
-#define XL_LOCK(x)		do { } while (0)
-#define XL_UNLOCK(x)		do { } while (0)
-#define XL_LOCK_ASSERT(x)	do { } while (0)
-#endif
 
 #define xl_rx_goodframes(x) \
 	((x.xl_upper_frames_ok & 0x03) << 8) | x.xl_rx_frames_ok
--- //depot/vendor/freebsd/src/sys/rpc/rpcclnt.c	2004/03/28 05:56:16
+++ //depot/user/rwatson/netperf/sys/rpc/rpcclnt.c	2004/03/28 15:24:45
@@ -360,7 +360,7 @@
 		RPC_RETURN(EFAULT);
 	}
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	/* create the socket */
 	rpc->rc_so = NULL;
@@ -618,7 +618,7 @@
 {
 	struct socket  *so;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	if (rpc->rc_so) {
 		so = rpc->rc_so;
@@ -669,7 +669,7 @@
 #endif
 	int error, soflags, flags;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	if (rep) {
 		if (rep->r_flags & R_SOFTTERM) {
@@ -754,7 +754,7 @@
 #endif
 	int error, sotype, rcvflg;
 
-	GIANT_REQUIRED;		/* XXX until socket locking done */
+	NET_ASSERT_GIANT();
 
 	/*
 	 * Set up arguments for soreceive()
@@ -1439,6 +1439,7 @@
 		 * Set r_rtt to -1 in case we fail to send it now.
 		 */
 		rep->r_rtt = -1;
+		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 		    ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
 		     (rep->r_flags & R_SENT) ||
@@ -1473,6 +1474,7 @@
 				rep->r_rtt = 0;
 			}
 		}
+		SOCKBUF_UNLOCK(&so->so_snd);
 	}
 	splx(s);
 
--- //depot/vendor/freebsd/src/sys/security/mac/mac_net.c	2004/05/03 23:38:47
+++ //depot/user/rwatson/netperf/sys/security/mac/mac_net.c	2004/06/07 01:03:29
@@ -83,6 +83,16 @@
     &nmacmbufs, 0, "number of mbufs in use");
 #endif
 
+/*
+ * XXXRW: struct ifnet locking is incomplete in the network code, so we
+ * use our own global mutex for struct ifnet.  Non-ideal, but should help
+ * in the SMP environment.
+ */
+static struct mtx mac_ifnet_mtx;
+MTX_SYSINIT(mac_ifnet_mtx, &mac_ifnet_mtx, "mac_ifnet", MTX_DEF);
+#define	MAC_IFNET_LOCK(ifp)	mtx_lock(&mac_ifnet_mtx)
+#define	MAC_IFNET_UNLOCK(ifp)	mtx_unlock(&mac_ifnet_mtx)
+
 struct label *
 mac_mbuf_to_label(struct mbuf *mbuf)
 {
@@ -243,6 +253,18 @@
 	MAC_PERFORM(copy_mbuf_label, src_label, dest_label);
 }
 
+static void
+mac_copy_ifnet_label(struct label *src, struct label *dest)
+{
+
+	MAC_PERFORM(copy_ifnet_label, src, dest);
+}
+
+/*
+ * XXXRW: Need to use a routine to copy the ifnet label while holding the
+ * ifnet mutex, similar to sockets, pipes, vnodes, et al, to avoid holding
+ * the mutex over the copyin/copyout.
+ */
 static int
 mac_externalize_ifnet_label(struct label *label, char *elements,
     char *outbuf, size_t outbuflen)
@@ -254,6 +276,9 @@
 	return (error);
 }
 
+/*
+ * XXXRW: See comment for mac_externalize_ifnet_label().
+ */
 static int
 mac_internalize_ifnet_label(struct label *label, char *string)
 {
@@ -268,7 +293,9 @@
 mac_create_ifnet(struct ifnet *ifnet)
 {
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_PERFORM(create_ifnet, ifnet, ifnet->if_label);
+	MAC_IFNET_UNLOCK(ifnet);
 }
 
 void
@@ -310,8 +337,10 @@
 
 	label = mac_mbuf_to_label(mbuf);
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_PERFORM(create_mbuf_linklayer, ifnet, ifnet->if_label, mbuf,
 	    label);
+	MAC_IFNET_UNLOCK(ifnet);
 }
 
 void
@@ -321,8 +350,10 @@
 
 	label = mac_mbuf_to_label(mbuf);
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_PERFORM(create_mbuf_from_ifnet, ifnet, ifnet->if_label, mbuf,
 	    label);
+	MAC_IFNET_UNLOCK(ifnet);
 }
 
 void
@@ -334,8 +365,10 @@
 	oldmbuflabel = mac_mbuf_to_label(oldmbuf);
 	newmbuflabel = mac_mbuf_to_label(newmbuf);
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_PERFORM(create_mbuf_multicast_encap, oldmbuf, oldmbuflabel,
 	    ifnet, ifnet->if_label, newmbuf, newmbuflabel);
+	MAC_IFNET_UNLOCK(ifnet);
 }
 
 void
@@ -360,8 +393,10 @@
 	if (!mac_enforce_network)
 		return (0);
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_CHECK(check_bpfdesc_receive, bpf_d, bpf_d->bd_label, ifnet,
 	    ifnet->if_label);
+	MAC_IFNET_UNLOCK(ifnet);
 
 	return (error);
 }
@@ -379,8 +414,10 @@
 
 	label = mac_mbuf_to_label(mbuf);
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_CHECK(check_ifnet_transmit, ifnet, ifnet->if_label, mbuf,
 	    label);
+	MAC_IFNET_UNLOCK(ifnet);
 
 	return (error);
 }
@@ -390,6 +427,7 @@
     struct ifnet *ifnet)
 {
 	char *elements, *buffer;
+	struct label *intlabel;
 	struct mac mac;
 	int error;
 
@@ -409,8 +447,13 @@
 	}
 
 	buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO);
+	intlabel = mac_ifnet_label_alloc();
+	MAC_IFNET_LOCK(ifnet);
+	mac_copy_ifnet_label(ifnet->if_label, intlabel);
+	MAC_IFNET_UNLOCK(ifnet);
 	error = mac_externalize_ifnet_label(ifnet->if_label, elements,
 	    buffer, mac.m_buflen);
+	mac_ifnet_label_free(intlabel);
 	if (error == 0)
 		error = copyout(buffer, mac.m_string, strlen(buffer)+1);
 
@@ -463,14 +506,17 @@
 		return (error);
 	}
 
+	MAC_IFNET_LOCK(ifnet);
 	MAC_CHECK(check_ifnet_relabel, cred, ifnet, ifnet->if_label,
 	    intlabel);
 	if (error) {
+		MAC_IFNET_UNLOCK(ifnet);
 		mac_ifnet_label_free(intlabel);
 		return (error);
 	}
 
 	MAC_PERFORM(relabel_ifnet, cred, ifnet, ifnet->if_label, intlabel);
+	MAC_IFNET_UNLOCK(ifnet);
 
 	mac_ifnet_label_free(intlabel);
 	return (0);
--- //depot/vendor/freebsd/src/sys/security/mac/mac_socket.c	2004/02/26 03:55:40
+++ //depot/user/rwatson/netperf/sys/security/mac/mac_socket.c	2004/05/08 02:12:27
@@ -218,6 +218,7 @@
     struct socket *newsocket)
 {
 
+	SOCK_LOCK_ASSERT(oldsocket);
 	MAC_PERFORM(create_socket_from_socket, oldsocket, oldsocket->so_label,
 	    newsocket, newsocket->so_label);
 }
@@ -227,6 +228,7 @@
     struct label *newlabel)
 {
 
+	SOCK_LOCK_ASSERT(socket);
 	MAC_PERFORM(relabel_socket, cred, socket, socket->so_label, newlabel);
 }
 
@@ -237,6 +239,7 @@
 
 	label = mac_mbuf_to_label(mbuf);
 
+	/* XXXRW: SOCK_LOCK_ASSERT(socket); */
 	MAC_PERFORM(set_socket_peer_from_mbuf, mbuf, label, socket,
 	    socket->so_peerlabel);
 }
@@ -246,6 +249,7 @@
     struct socket *newsocket)
 {
 
+	/* XXXRW: Socket locking for both sockets? */
 	MAC_PERFORM(set_socket_peer_from_socket, oldsocket,
 	    oldsocket->so_label, newsocket, newsocket->so_peerlabel);
 }
@@ -257,6 +261,7 @@
 
 	label = mac_mbuf_to_label(mbuf);
 
+	SOCK_LOCK_ASSERT(socket);
 	MAC_PERFORM(create_mbuf_from_socket, socket, socket->so_label, mbuf,
 	    label);
 }
@@ -267,6 +272,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -282,6 +289,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -297,6 +306,8 @@
 	struct label *label;
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -313,6 +324,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -325,6 +338,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(so);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -339,6 +354,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	MAC_CHECK(check_socket_relabel, cred, socket, socket->so_label,
 	    newlabel);
 
@@ -350,6 +367,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(so);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -363,6 +382,8 @@
 {
 	int error;
 
+	SOCK_LOCK_ASSERT(socket);
+
 	if (!mac_enforce_socket)
 		return (0);
 
@@ -377,12 +398,24 @@
 {
 	int error;
 
+	/*
+	 * We acquire the socket lock when we perform the test and set,
+	 * but have to release it as the pcb code needs to acquire the
+	 * pcb lock, which will precede the socket lock in the lock
+	 * order.  However, this is fine, as any race will simply
+	 * result in the inpcb being refreshed twice, but still
+	 * consistently, as the inpcb code will acquire the socket lock
+	 * before refreshing, holding both locks.
+	 */
+	SOCK_LOCK(so);
 	error = mac_check_socket_relabel(cred, so, label);
-	if (error)
+	if (error) {
+		SOCK_UNLOCK(so);
 		return (error);
+	}
 
 	mac_relabel_socket(cred, so, label);
-
+	SOCK_UNLOCK(so);
 	/*
 	 * If the protocol has expressed interest in socket layer changes,
 	 * such as if it needs to propagate changes to a cached pcb
@@ -392,7 +425,7 @@
 	if (so->so_proto->pr_usrreqs->pru_sosetlabel != NULL)
 		(so->so_proto->pr_usrreqs->pru_sosetlabel)(so);
 
-	return (0);
+	return (error);
 }
 
 int
@@ -419,9 +452,7 @@
 	if (error)
 		goto out;
 
-	/* XXX: Socket lock here. */
 	error = mac_socket_label_set(cred, so, intlabel);
-	/* XXX: Socket unlock here. */
 out:
 	mac_socket_label_free(intlabel);
 	return (error);
@@ -431,6 +462,7 @@
 mac_getsockopt_label(struct ucred *cred, struct socket *so, struct mac *mac)
 {
 	char *buffer, *elements;
+	struct label *intlabel;
 	int error;
 
 	error = mac_check_structmac_consistent(mac);
@@ -445,8 +477,13 @@
 	}
 
 	buffer = malloc(mac->m_buflen, M_MACTEMP, M_WAITOK | M_ZERO);
+	intlabel = mac_socket_label_alloc(M_WAITOK);
+	SOCK_LOCK(so);
+	mac_copy_socket_label(so->so_label, intlabel);
+	SOCK_UNLOCK(so);
 	error = mac_externalize_socket_label(so->so_label, elements,
 	    buffer, mac->m_buflen);
+	mac_socket_label_free(intlabel);
 	if (error == 0)
 		error = copyout(buffer, mac->m_string, strlen(buffer)+1);
 
@@ -461,6 +498,7 @@
     struct mac *mac)
 {
 	char *elements, *buffer;
+	struct label *intlabel;
 	int error;
 
 	error = mac_check_structmac_consistent(mac);
@@ -475,8 +513,13 @@
 	}
 
 	buffer = malloc(mac->m_buflen, M_MACTEMP, M_WAITOK | M_ZERO);
+	intlabel = mac_socket_label_alloc(M_WAITOK);
+	SOCK_LOCK(so);
+	mac_copy_socket_label(so->so_peerlabel, intlabel);
+	SOCK_UNLOCK(so);
 	error = mac_externalize_socket_peer_label(so->so_peerlabel,
 	    elements, buffer, mac->m_buflen);
+	mac_socket_label_free(intlabel);
 	if (error == 0)
 		error = copyout(buffer, mac->m_string, strlen(buffer)+1);
 
--- //depot/vendor/freebsd/src/sys/sys/mac_policy.h	2004/05/10 18:40:36
+++ //depot/user/rwatson/netperf/sys/sys/mac_policy.h	2004/06/07 01:03:29
@@ -122,6 +122,8 @@
 	void	(*mpo_destroy_vnode_label)(struct label *label);
 	void	(*mpo_copy_cred_label)(struct label *src,
 		    struct label *dest);
+	void	(*mpo_copy_ifnet_label)(struct label *src,
+		    struct label *dest);
 	void	(*mpo_copy_mbuf_label)(struct label *src,
 		    struct label *dest);
 	void	(*mpo_copy_pipe_label)(struct label *src,
--- //depot/vendor/freebsd/src/sys/sys/mutex.h	2004/03/28 23:15:25
+++ //depot/user/rwatson/netperf/sys/sys/mutex.h	2004/03/29 01:06:00
@@ -350,7 +350,10 @@
  * without special recursion handling.
  *
  * This mechanism is intended as temporary until everything of
- * importance is properly locked.
+ * importance is properly locked.  Note: the semantics for
+ * NET_{LOCK,UNLOCK}_GIANT() are not the same as DROP_GIANT()
+ * and PICKUP_GIANT(), as they are plain mutex operations
+ * without a recursion counter.
  */
 extern	int debug_mpsafenet;		/* defined in net/netisr.c */
 #define	NET_LOCK_GIANT() do {						\
--- //depot/vendor/freebsd/src/sys/sys/socketvar.h	2004/06/04 04:10:43
+++ //depot/user/rwatson/netperf/sys/sys/socketvar.h	2004/06/09 02:50:26
@@ -35,6 +35,8 @@
 
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/selinfo.h>		/* for struct selinfo */
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
 
 /*
  * Kernel structure per socket.
@@ -55,11 +57,11 @@
  * (g) used only as a sleep/wakeup address, no value.
  */
 struct socket {
-	int	so_count;		/* reference count */
+	int	so_count;		/* (b) reference count */
 	short	so_type;		/* (a) generic type, see socket.h */
 	short	so_options;		/* from socket call, see socket.h */
 	short	so_linger;		/* time to linger while closing */
-	short	so_state;		/* internal state flags SS_* */
+	short	so_state;		/* (b) internal state flags SS_* */
 	int	so_qstate;		/* (e) internal state flags SQ_* */
 	void	*so_pcb;		/* protocol control block */
 	struct	protosw *so_proto;	/* (a) protocol handle */
@@ -82,8 +84,8 @@
 	short	so_incqlen;		/* (e) number of unaccepted incomplete
 					   connections */
 	short	so_qlimit;		/* (e) max number queued connections */
-	short	so_timeo;		/* connection timeout */
-	u_short	so_error;		/* error affecting connection */
+	short	so_timeo;		/* (g)connection timeout */
+	u_short	so_error;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
 	u_long	so_oobmark;		/* chars to oob mark */
@@ -93,21 +95,26 @@
  */
 	struct sockbuf {
 		struct	selinfo sb_sel;	/* process selecting read/write */
+		struct	mtx sb_mtx;	/* sockbuf lock */
+		int	sb_state;	/* (c/d) socket-related state locked on sockbuf */
 #define	sb_startzero	sb_mb
-		struct	mbuf *sb_mb;	/* the mbuf chain */
-		struct	mbuf *sb_mbtail; /* the last mbuf in the chain */
-		struct	mbuf *sb_lastrecord;	/* first mbuf of last record in
-						 * socket buffer */
-		u_int	sb_cc;		/* actual chars in buffer */
-		u_int	sb_hiwat;	/* max actual char count */
-		u_int	sb_mbcnt;	/* chars of mbufs used */
-		u_int	sb_mbmax;	/* max chars of mbufs to use */
-		u_int	sb_ctl;		/* non-data chars in buffer */
-		int	sb_lowat;	/* low water mark */
-		int	sb_timeo;	/* timeout for read/write */
-		short	sb_flags;	/* flags, see below */
+		struct	mbuf *sb_mb;	/* (c/d) the mbuf chain */
+		struct	mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
+		struct	mbuf *sb_lastrecord;	/* (c/d) first mbuf of last
+						 * record in socket buffer */
+		u_int	sb_cc;		/* (c/d) actual chars in buffer */
+		u_int	sb_hiwat;	/* (c/d) max actual char count */
+		u_int	sb_mbcnt;	/* (c/d) chars of mbufs used */
+		u_int	sb_mbmax;	/* (c/d) max chars of mbufs to use */
+		u_int	sb_ctl;		/* (c/d) non-data chars in buffer */
+		int	sb_lowat;	/* (c/d) low water mark */
+		int	sb_timeo;	/* (c/d) timeout for read/write */
+		short	sb_flags;	/* (c/d) flags, see below */
 	} so_rcv, so_snd;
 #define	SB_MAX		(256*1024)	/* default for max chars in sockbuf */
+/*
+ * Constants for sb_flags field of struct sockbuf.
+ */
 #define	SB_LOCK		0x01		/* lock on data queue */
 #define	SB_WANT		0x02		/* someone is waiting to lock */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
@@ -120,9 +127,9 @@
 
 	void	(*so_upcall)(struct socket *, void *, int);
 	void	*so_upcallarg;
-	struct	ucred *so_cred;		/* user credentials */
-	struct	label *so_label;	/* MAC label for socket */
-	struct	label *so_peerlabel;	/* cached MAC label for socket peer */
+	struct	ucred *so_cred;		/* (a) user credentials */
+	struct	label *so_label;	/* (b) MAC label for socket */
+	struct	label *so_peerlabel;	/* (b) cached MAC label for peer */
 	/* NB: generation count must not be first; easiest to make it last. */
 	so_gen_t so_gencnt;		/* generation count */
 	void	*so_emuldata;		/* private data for emulators */
@@ -147,25 +154,52 @@
  * until such time as it proves to be a good idea.
  */
 extern struct mtx accept_mtx;
-#define ACCEPT_LOCK()			mtx_lock(&accept_mtx)
-#define ACCEPT_UNLOCK()			mtx_unlock(&accept_mtx)
+#define	ACCEPT_LOCK()			mtx_lock(&accept_mtx)
+#define	ACCEPT_UNLOCK()			mtx_unlock(&accept_mtx)
+
+#define	SOCKBUF_MTX(_sb)		(&(_sb)->sb_mtx)
+#define	SOCKBUF_LOCK_INIT(_sb, _name) \
+	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
+#define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
+#define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
+#define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
+#define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
+#define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
+
+/* NB: we reuse the rcv sockbuf's lock for other items in the socket */
+#define	SOCK_MTX(_so)			SOCKBUF_MTX(&(_so)->so_rcv)
+#define	SOCK_LOCK(_so)			SOCKBUF_LOCK(&(_so)->so_rcv)
+#define	SOCK_OWNED(_so)			SOCKBUF_OWNED(&(_so)->so_rcv)
+#define	SOCK_UNLOCK(_so)		SOCKBUF_UNLOCK(&(_so)->so_rcv)
+#define	SOCK_LOCK_ASSERT(_so)		SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
 
-/*
+/*-
  * Socket state bits.
+ *
+ * Historically, this bits were all kept in the so_state field.  For
+ * locking reasons, they are now in multiple fields, as they are
+ * locked differently.  so_state maintains basic socket state protected
+ * by the socket lock.  so_qstate holds information about the socket
+ * accept queues.  Each socket bufer also has a state field holding
+ * information relevant to that socket buffer (can't send, rcv).  Many
+ * fields will be read without locks to improve performance and avoid
+ * lock order issues.  However, this approach must be used with caution.
  */
 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
-#define	SS_CANTSENDMORE		0x0010	/* can't send more data to peer */
-#define	SS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
-#define	SS_RCVATMARK		0x0040	/* at mark on input */
-
 #define	SS_NBIO			0x0100	/* non-blocking ops */
 #define	SS_ASYNC		0x0200	/* async i/o notify */
 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
+#define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
 
-#define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
+/*
+ * Socket state bits now stored in the socket buffer state field.
+ */
+#define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
+#define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
+#define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 /*
  * Socket state bits stored in so_qstate.
@@ -205,6 +239,7 @@
 	uid_t	so_uid;		/* XXX */
 };
 
+#ifdef _KERNEL
 /*
  * Macros for sockets and socket buffering.
  */
@@ -232,7 +267,7 @@
 /* can we read something from so? */
 #define	soreadable(so) \
     ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
-	((so)->so_state & SS_CANTRCVMORE) || \
+	((so)->so_rcv.sb_state & SBS_CANTRCVMORE) || \
 	!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
 
 /* can we write something to so? */
@@ -240,7 +275,7 @@
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
-     ((so)->so_state & SS_CANTSENDMORE) || \
+     ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
      (so)->so_error)
 
 /* adjust counters in sb reflecting allocation of m */
@@ -275,47 +310,63 @@
 		((sb)->sb_flags |= SB_LOCK), 0)
 
 /* release lock on sockbuf sb */
-#define	sbunlock(sb) { \
+#define	sbunlock(sb) do { \
+	SOCKBUF_LOCK_ASSERT(sb); \
 	(sb)->sb_flags &= ~SB_LOCK; \
 	if ((sb)->sb_flags & SB_WANT) { \
 		(sb)->sb_flags &= ~SB_WANT; \
 		wakeup(&(sb)->sb_flags); \
 	} \
-}
+} while (0)
 
 /*
  * soref()/sorele() ref-count the socket structure.  Note that you must
  * still explicitly close the socket, but the last ref count will free
  * the structure.
  */
-#define soref(so)	do {			\
-				++(so)->so_count; \
-			} while (0)
+#define soref(so) do {					\
+	SOCK_LOCK_ASSERT(so);				\
+	++(so)->so_count;				\
+} while (0)
 
-#define sorele(so)	do {				\
-				if ((so)->so_count <= 0)	\
-					panic("sorele");\
-				if (--(so)->so_count == 0)\
-					sofree(so);	\
-			} while (0)
+#define sorele(so) do {					\
+	SOCK_LOCK_ASSERT(so);				\
+	if ((so)->so_count <= 0)			\
+		panic("sorele");			\
+	if (--(so)->so_count == 0)			\
+		sofree(so);				\
+	else						\
+		SOCK_UNLOCK(so);			\
+	so = NULL;					\
+} while (0)
 
-#define sotryfree(so)	do {				\
-				if ((so)->so_count == 0)	\
-					sofree(so);	\
-			} while(0)
+#define sotryfree(so) do {				\
+	SOCK_LOCK_ASSERT(so);				\
+	if ((so)->so_count == 0)			\
+		sofree(so);				\
+	else						\
+		SOCK_UNLOCK(so);			\
+	so = NULL;					\
+} while (0)
 
-#define	sorwakeup(so)	do {					\
-				if (sb_notify(&(so)->so_rcv))	\
-					sowakeup((so), &(so)->so_rcv); \
-			} while (0)
+#define	sorwakeup(so) do {				\
+	if (sb_notify(&(so)->so_rcv))			\
+		sowakeup((so), &(so)->so_rcv);		\
+} while (0)
+#define	sorwakeup_locked(so) do {			\
+	if (sb_notify(&(so)->so_rcv))			\
+		sowakeup_locked((so), &(so)->so_rcv);	\
+} while (0)
 
-#define	sowwakeup(so)	do {					\
-				if (sb_notify(&(so)->so_snd))	\
-					sowakeup((so), &(so)->so_snd); \
-			} while (0)
+#define	sowwakeup(so) do {				\
+	if (sb_notify(&(so)->so_snd))			\
+		sowakeup((so), &(so)->so_snd);		\
+} while (0)
+#define	sowwakeup_locked(so)	do {			\
+	if (sb_notify(&(so)->so_snd))			\
+		sowakeup_locked((so), &(so)->so_snd);	\
+} while (0)
 
-#ifdef _KERNEL
-
 /*
  * Argument structure for sosetopt et seq.  This is in the KERNEL
  * section because it will never be visible to user code.
@@ -363,12 +414,19 @@
 int	sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
 int	getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
 void	sbappend(struct sockbuf *sb, struct mbuf *m);
+void	sbappend_locked(struct sockbuf *sb, struct mbuf *m);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m);
+void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
+int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
+	    struct mbuf *m0, struct mbuf *control);
 int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
+int	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
+	    struct mbuf *control);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
+void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcheck(struct sockbuf *sb);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
@@ -377,6 +435,7 @@
 void	sbdroprecord(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbinsertoob(struct sockbuf *sb, struct mbuf *m0);
+void	sbinsertoob_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbrelease(struct sockbuf *sb, struct socket *so);
 int	sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
@@ -389,7 +448,9 @@
 int	socheckuid(struct socket *so, uid_t uid);
 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
 void	socantrcvmore(struct socket *so);
+void	socantrcvmore_locked(struct socket *so);
 void	socantsendmore(struct socket *so);
+void	socantsendmore_locked(struct socket *so);
 int	soclose(struct socket *so);
 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
@@ -430,6 +491,7 @@
 int	soshutdown(struct socket *so, int how);
 void	sotoxsocket(struct socket *so, struct xsocket *xso);
 void	sowakeup(struct socket *so, struct sockbuf *sb);
+void	sowakeup_locked(struct socket *so, struct sockbuf *sb);
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);