--- //depot/vendor/freebsd/src/sys/kern/kern_malloc.c	2005/04/12 23:55:38
+++ //depot/user/rwatson/percpu/sys/kern/kern_malloc.c	2005/04/17 18:30:41
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2005 Robert N. M. Watson
  * Copyright (c) 1987, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -44,6 +45,7 @@
 #include <sys/mutex.h>
 #include <sys/vmmeter.h>
 #include <sys/proc.h>
+#include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
@@ -133,6 +135,33 @@
 	{0, NULL},
 };
 
+/*
+ * Two malloc type structures are present: malloc_type, which is used by a
+ * type owner to declare the type, and malloc_type_internal, which holds
+ * malloc-owned statistics and other ABI-sensitive fields, such as the set of
+ * malloc statistics indexed by the compile-time MAXCPU constant.
+ *
+ * The malloc_type ks_next field is protected by malloc_mtx.  Other fields in
+ * malloc_type are static after initialization so unsynchronized.
+ *
+ * Statistics in malloc_type_stats are written only when holding a critical
+ * section, but read lock-free resulting in possible (minor) races, which the
+ * monitoring app should take into account.
+ */
+struct malloc_type_stats {
+	u_long		mts_memalloced;	/* Bytes allocated on CPU. */
+	u_long		mts_memfreed;	/* Bytes freed on CPU. */
+	u_long		mts_numallocs;	/* Number of allocates on CPU. */
+	u_long		mts_numfrees;	/* Number of frees on CPU. */
+	u_long		mts_size;	/* Bitmask of sizes allocated on CPU. */
+};
+
+struct malloc_type_internal {
+	struct malloc_type_stats	 mti_stats[MAXCPU];
+};
+
+uma_zone_t mt_zone;
+
 #ifdef DEBUG_MEMGUARD
 u_int vm_memguard_divisor;
 SYSCTL_UINT(_vm, OID_AUTO, memguard_divisor, CTLFLAG_RD, &vm_memguard_divisor,
@@ -197,41 +226,44 @@
  * Add this to the informational malloc_type bucket.
  */
 static void
-malloc_type_zone_allocated(struct malloc_type *ksp, unsigned long size,
+malloc_type_zone_allocated(struct malloc_type *type, unsigned long size,
     int zindx)
 {
-	mtx_lock(&ksp->ks_mtx);
-	ksp->ks_calls++;
+	struct malloc_type_internal *mti;
+	struct malloc_type_stats *mts;
+
+	critical_enter();
+	mti = (struct malloc_type_internal *)(type->ks_handle);
+	mts = &mti->mti_stats[curcpu];
+	mts->mts_memalloced += size;
+	mts->mts_numallocs++;
 	if (zindx != -1)
-		ksp->ks_size |= 1 << zindx;
-	if (size != 0) {
-		ksp->ks_memuse += size;
-		ksp->ks_inuse++;
-		if (ksp->ks_memuse > ksp->ks_maxused)
-			ksp->ks_maxused = ksp->ks_memuse;
-	}
-	mtx_unlock(&ksp->ks_mtx);
+		mts->mts_size |= 1 << zindx;
+	critical_exit();
 }
 
 void
-malloc_type_allocated(struct malloc_type *ksp, unsigned long size)
+malloc_type_allocated(struct malloc_type *type, unsigned long size)
 {
-	malloc_type_zone_allocated(ksp, size, -1);
+
+	malloc_type_zone_allocated(type, size, -1);
 }
 
 /*
  * Remove this allocation from the informational malloc_type bucket.
  */
 void
-malloc_type_freed(struct malloc_type *ksp, unsigned long size)
+malloc_type_freed(struct malloc_type *type, unsigned long size)
 {
-	mtx_lock(&ksp->ks_mtx);
-	KASSERT(size <= ksp->ks_memuse,
-		("malloc(9)/free(9) confusion.\n%s",
-		 "Probably freeing with wrong type, but maybe not here."));
-	ksp->ks_memuse -= size;
-	ksp->ks_inuse--;
-	mtx_unlock(&ksp->ks_mtx);
+	struct malloc_type_internal *mti;
+	struct malloc_type_stats *mts;
+
+	critical_enter();
+	mti = (struct malloc_type_internal *)type->ks_handle;
+	mts = &mti->mti_stats[curcpu];
+	mts->mts_memfreed += size;
+	mts->mts_numfrees++;
+	critical_exit();
 }
 
 /*
@@ -351,9 +383,6 @@
 	}
 #endif
 
-	KASSERT(type->ks_memuse > 0,
-		("malloc(9)/free(9) confusion.\n%s",
-		 "Probably freeing with wrong type, but maybe not here."));
 	size = 0;
 
 	slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
@@ -405,6 +434,11 @@
 	if (addr == NULL)
 		return (malloc(size, type, flags));
 
+	/*
+	 * XXX: Should report free of old memory and alloc of new memory to
+	 * per-CPU stats.
+	 */
+
 #ifdef DEBUG_MEMGUARD
 /* XXX: CHANGEME! */
 if (type == M_SUBPROC) {
@@ -543,6 +577,13 @@
 
 	uma_startup2();
 
+	mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
+#ifdef INVARIANTS
+		    mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
+#else
+		    NULL, NULL, NULL, NULL,
+#endif
+	    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
 	for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
 		int size = kmemzones[indx].kz_size;
 		char *name = kmemzones[indx].kz_name;
@@ -562,127 +603,145 @@
 }
 
 void
-malloc_init(void *data)
+malloc_init(void *type)
 {
-	struct malloc_type *type = (struct malloc_type *)data;
+	struct malloc_type_internal *mti;
+	struct malloc_type *mt;
 
-	mtx_lock(&malloc_mtx);
-	if (type->ks_magic != M_MAGIC)
-		panic("malloc type lacks magic");
+	KASSERT(cnt.v_page_count != 0, ("malloc_register before vm_init"));
 
-	if (cnt.v_page_count == 0)
-		panic("malloc_init not allowed before vm init");
+	mt = type;
+	mti = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);
+	mt->ks_handle = mti;
 
-	if (type->ks_next != NULL)
-		return;
-
-	type->ks_next = kmemstatistics;	
+	mtx_lock(&malloc_mtx);
+	mt->ks_next = kmemstatistics;
 	kmemstatistics = type;
-	mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF);
 	mtx_unlock(&malloc_mtx);
 }
 
 void
-malloc_uninit(void *data)
+malloc_uninit(void *type)
 {
-	struct malloc_type *type = (struct malloc_type *)data;
-	struct malloc_type *t;
+	struct malloc_type_internal *mti;
+	struct malloc_type *mt, *temp;
 
+	mt = type;
+	KASSERT(mt->ks_handle != NULL, ("malloc_deregister: cookie NULL"));
 	mtx_lock(&malloc_mtx);
-	mtx_lock(&type->ks_mtx);
-	if (type->ks_magic != M_MAGIC)
-		panic("malloc type lacks magic");
-
-	if (cnt.v_page_count == 0)
-		panic("malloc_uninit not allowed before vm init");
-
-	if (type == kmemstatistics)
-		kmemstatistics = type->ks_next;
-	else {
-		for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
-			if (t->ks_next == type) {
-				t->ks_next = type->ks_next;
-				break;
-			}
+	mti = mt->ks_handle;
+	mt->ks_handle = NULL;
+	if (mt != kmemstatistics) {
+		for (temp = kmemstatistics; temp != NULL;
+		    temp = temp->ks_next) {
+			if (temp->ks_next == mt)
+				temp->ks_next = mt->ks_next;
 		}
-	}
-	type->ks_next = NULL;
-	mtx_destroy(&type->ks_mtx);
+	} else
+		kmemstatistics = mt->ks_next;
 	mtx_unlock(&malloc_mtx);
+	uma_zfree(mt_zone, type);
 }
 
 static int
 sysctl_kern_malloc(SYSCTL_HANDLER_ARGS)
 {
+	struct malloc_type_stats *mts, mts_local;
+	struct malloc_type_internal *mti;
+	long temp_allocs, temp_bytes;
 	struct malloc_type *type;
 	int linesize = 128;
-	int curline;
+	struct sbuf sbuf;
 	int bufsize;
 	int first;
 	int error;
 	char *buf;
-	char *p;
 	int cnt;
-	int len;
 	int i;
 
 	cnt = 0;
 
+	/* Guess at how much room is needed. */
 	mtx_lock(&malloc_mtx);
 	for (type = kmemstatistics; type != NULL; type = type->ks_next)
 		cnt++;
+	mtx_unlock(&malloc_mtx);
 
-	mtx_unlock(&malloc_mtx);
 	bufsize = linesize * (cnt + 1);
-	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
+	buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
+	sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN);
+
 	mtx_lock(&malloc_mtx);
 
-	len = snprintf(p, linesize,
+
+	sbuf_printf(&sbuf,
 	    "\n        Type  InUse MemUse HighUse Requests  Size(s)\n");
-	p += len;
-
 	for (type = kmemstatistics; cnt != 0 && type != NULL;
 	    type = type->ks_next, cnt--) {
-		if (type->ks_calls == 0)
+		mti = type->ks_handle;
+		bzero(&mts_local, sizeof(mts_local));
+		for (i = 0; i < MAXCPU; i++) {
+			mts = &mti->mti_stats[i];
+			mts_local.mts_memalloced += mts->mts_memalloced;
+			mts_local.mts_memfreed += mts->mts_memfreed;
+			mts_local.mts_numallocs += mts->mts_numallocs;
+			mts_local.mts_numfrees += mts->mts_numfrees;
+			mts_local.mts_size |= mts->mts_size;
+		}
+		if (mts_local.mts_numallocs == 0)
 			continue;
 
-		curline = linesize - 2;	/* Leave room for the \n */
-		len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu",
-			type->ks_shortdesc,
-			type->ks_inuse,
-			(type->ks_memuse + 1023) / 1024,
-			(type->ks_maxused + 1023) / 1024,
-			(long long unsigned)type->ks_calls);
-		curline -= len;
-		p += len;
+		/*
+		 * Due to races in per-CPU statistics gather, it's possible to
+		 * get a slightly negative number here.  If we do, approximate
+		 * with 0.
+		 */
+		if (mts_local.mts_numallocs > mts_local.mts_numfrees)
+			temp_allocs = mts_local.mts_numallocs -
+			    mts_local.mts_numfrees;
+		else
+			temp_allocs = 0;
+
+		/*
+		 * Ditto for bytes allocated.
+		 */
+		if (mts_local.mts_memalloced > mts_local.mts_memfreed)
+			temp_bytes = mts_local.mts_memalloced -
+			    mts_local.mts_memfreed;
+		else
+			temp_bytes = 0;
+
+		/*
+		 * XXXRW: High-waterwark is no longer easily available, so
+		 * we just print '-' for that column.
+		 */
+		sbuf_printf(&sbuf, "%13s%6lu%6luK       -%9lu",
+		    type->ks_shortdesc,
+		    temp_allocs,
+		    (temp_bytes + 1023) / 1024,
+		    mts_local.mts_numallocs);
 
 		first = 1;
 		for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1;
 		    i++) {
-			if (type->ks_size & (1 << i)) {
+			if (mts_local.mts_size & (1 << i)) {
 				if (first)
-					len = snprintf(p, curline, "  ");
+					sbuf_printf(&sbuf, "  ");
 				else
-					len = snprintf(p, curline, ",");
-				curline -= len;
-				p += len;
-
-				len = snprintf(p, curline,
-				    "%s", kmemzones[i].kz_name);
-				curline -= len;
-				p += len;
-
+					sbuf_printf(&sbuf, ",");
+				sbuf_printf(&sbuf, "%s",
+				    kmemzones[i].kz_name);
 				first = 0;
 			}
 		}
-
-		len = snprintf(p, 2, "\n");
-		p += len;
+		sbuf_printf(&sbuf, "\n");
 	}
+	sbuf_finish(&sbuf);
+	mtx_unlock(&malloc_mtx);
 
-	mtx_unlock(&malloc_mtx);
-	error = SYSCTL_OUT(req, buf, p - buf);
+	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
 
+	sbuf_delete(&sbuf);
 	free(buf, M_TEMP);
 	return (error);
 }
@@ -696,6 +755,7 @@
 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
 {
 	int linesize = 64;
+	struct sbuf sbuf;
 	uint64_t count;
 	uint64_t waste;
 	uint64_t mem;
@@ -704,7 +764,6 @@
 	char *buf;
 	int rsize;
 	int size;
-	char *p;
 	int len;
 	int i;
 
@@ -714,34 +773,30 @@
 	waste = 0;
 	mem = 0;
 
-	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
-	len = snprintf(p, bufsize,
+	buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
+	sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN);
+	sbuf_printf(&sbuf, 
 	    "\n  Size                    Requests  Real Size\n");
-	bufsize -= len;
-	p += len;
-
 	for (i = 0; i < KMEM_ZSIZE; i++) {
 		size = i << KMEM_ZSHIFT;
 		rsize = kmemzones[kmemsize[i]].kz_size;
 		count = (long long unsigned)krequests[i];
 
-		len = snprintf(p, bufsize, "%6d%28llu%11d\n",
-		    size, (unsigned long long)count, rsize);
-		bufsize -= len;
-		p += len;
+		sbuf_printf(&sbuf, "%6d%28llu%11d\n", size,
+		    (unsigned long long)count, rsize);
 
 		if ((rsize * count) > (size * count))
 			waste += (rsize * count) - (size * count);
 		mem += (rsize * count);
 	}
-
-	len = snprintf(p, bufsize,
+	sbuf_printf(&sbuf,
 	    "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
 	    (unsigned long long)mem, (unsigned long long)waste);
-	p += len;
+	sbuf_finish(&sbuf);
 
-	error = SYSCTL_OUT(req, buf, p - buf);
+	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
 
+	sbuf_delete(&sbuf);
 	free(buf, M_TEMP);
 	return (error);
 }
--- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c	2005/02/16 21:50:29
+++ //depot/user/rwatson/percpu/sys/kern/kern_mbuf.c	2005/04/15 11:11:26
@@ -1,6 +1,7 @@
 /*-
- * Copyright (c) 2004, 2005,
- * 	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
+ * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
+ * Copyright (c) 2005 Robert N. M. Watson
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,6 +32,9 @@
 #include "opt_mac.h"
 #include "opt_param.h"
 
+/* Need mbstat_percpu definition from mbuf.h. */
+#define	WANT_MBSTAT_PERCPU
+
 #include <sys/param.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
@@ -39,6 +43,7 @@
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -79,7 +84,18 @@
  */
 
 int nmbclusters;
+
+/*
+ * mbstat is the mbuf statistics structure exposed to userspace.
+ *
+ * mbstat_percpu is the per-CPU statistics structure in which many of the
+ * mbstat measurements are gathered before being combined for exposure to
+ * userspace.  mbstat_percpu is read lockless, so subject to small
+ * consistency races.  It is modified holding a critical section to avoid
+ * read-modify-write races in the presence of preemption.
+ */
 struct mbstat mbstat;
+struct mbstat_percpu mbstat_percpu[MAXCPU];
 
 static void
 tunable_mbinit(void *dummy)
@@ -91,11 +107,13 @@
 }
 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
 
+static int sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS);
+
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
     "Maximum number of mbuf clusters allowed");
-SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
-    "Mbuf general information and statistics");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, NULL, 0,
+    sysctl_kern_ipc_mbstat, "", "Mbuf general information and statistics");
 
 /*
  * Zones from which we allocate.
@@ -170,8 +188,69 @@
 	mbstat.m_mcfail = mbstat.m_mpfail = 0;
 	mbstat.sf_iocnt = 0;
 	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
+
+	/* mbstat_percpu is zero'd by BSS. */
 }
 
+static int
+sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS)
+{
+	struct mbstat_percpu *mbp, mbp_local;
+	u_char cpu;
+
+	bzero(&mbp_local, sizeof(mbp_local));
+	for (cpu = 0; cpu < MAXCPU; cpu++) {
+		mbp = &mbstat_percpu[cpu];
+		mbp_local.mbp_mbuf_allocs += mbp->mbp_mbuf_allocs;
+		mbp_local.mbp_mbuf_frees += mbp->mbp_mbuf_frees;
+		mbp_local.mbp_mbuf_fails += mbp->mbp_mbuf_fails;
+		mbp_local.mbp_mbuf_drains += mbp->mbp_mbuf_drains;
+		mbp_local.mbp_clust_allocs += mbp->mbp_clust_allocs;
+		mbp_local.mbp_clust_frees += mbp->mbp_clust_frees;
+
+		mbp_local.mbp_copy_fails += mbp->mbp_copy_fails;
+		mbp_local.mbp_pullup_fails += mbp->mbp_pullup_fails;
+
+		mbp_local.sfp_iocnt += mbp->sfp_iocnt;
+		mbp_local.sfp_alloc_fails += mbp->sfp_alloc_fails;
+		mbp_local.sfp_alloc_waits += mbp->sfp_alloc_waits;
+	}
+
+	/*
+	 * If, due to races, the number of frees for mbufs or clusters is
+	 * greater than the number of allocs, adjust alloc stats to 0.  This
+	 * isn't quite accurate, but for the time being, we consider the
+	 * performance win of races worth the occasional inaccuracy.
+	 */
+	if (mbp_local.mbp_mbuf_allocs > mbp_local.mbp_mbuf_frees)
+		mbstat.m_mbufs = mbp_local.mbp_mbuf_allocs -
+		    mbp_local.mbp_mbuf_frees;
+	else
+		mbstat.m_mbufs = 0;
+
+	if (mbp_local.mbp_clust_allocs > mbp_local.mbp_clust_frees)
+		mbstat.m_mclusts = mbp_local.mbp_clust_allocs -
+		    mbp_local.mbp_clust_frees;
+	else
+		mbstat.m_mclusts = 0;
+
+	mbstat.m_drain = mbp_local.mbp_mbuf_drains;
+	mbstat.m_mcfail = mbp_local.mbp_copy_fails;
+	mbstat.m_mpfail = mbp_local.mbp_pullup_fails;
+
+	mbstat.sf_iocnt = mbp_local.sfp_iocnt;
+	mbstat.sf_allocfail = mbp_local.sfp_alloc_fails;
+	/*
+	 * sf_allocwait is protected by per-architecture mutex sf_buf_lock,
+	 * which is held whenever sf_allocwait is updated, so don't use the
+	 * per-cpu version here
+	 *
+	 * mbstat.sf_allocwait = mbp_local.sfp_alloc_waits;
+	 */
+
+	return (SYSCTL_OUT(req, &mbstat, sizeof(mbstat)));
+}
+
 /*
  * Constructor for Mbuf master zone.
  *
@@ -212,7 +291,10 @@
 #endif
 	} else
 		m->m_data = m->m_dat;
-	mbstat.m_mbufs += 1;	/* XXX */
+
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_mbuf_allocs++;
+	critical_exit();
 	return (0);
 }
 
@@ -227,7 +309,9 @@
 	m = (struct mbuf *)mem;
 	if ((m->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(m, NULL);
-	mbstat.m_mbufs -= 1;	/* XXX */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_mbuf_frees++;
+	critical_exit();
 }
 
 /* XXX Only because of stats */
@@ -235,12 +319,16 @@
 mb_dtor_pack(void *mem, int size, void *arg)
 {
 	struct mbuf *m;
+	u_char cpu;
 
 	m = (struct mbuf *)mem;
 	if ((m->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(m, NULL);
-	mbstat.m_mbufs -= 1;	/* XXX */
-	mbstat.m_mclusts -= 1;	/* XXX */
+	critical_enter();
+	cpu = curcpu;
+	mbstat_percpu[cpu].mbp_mbuf_frees++;
+	mbstat_percpu[cpu].mbp_clust_frees++;
+	critical_exit();
 }
 
 /*
@@ -263,7 +351,9 @@
 	m->m_ext.ext_size = MCLBYTES;
 	m->m_ext.ext_type = EXT_CLUSTER;
 	m->m_ext.ref_cnt = NULL;	/* Lazy counter assign. */
-	mbstat.m_mclusts += 1;	/* XXX */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_clust_allocs++;
+	critical_exit();
 	return (0);
 }
 
@@ -271,7 +361,10 @@
 static void
 mb_dtor_clust(void *mem, int size, void *arg)
 {
-	mbstat.m_mclusts -= 1;	/* XXX */
+
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_clust_frees++;
+	critical_exit();
 }
 
 /*
@@ -288,7 +381,9 @@
 	uma_zalloc_arg(zone_clust, m, how);
 	if (m->m_ext.ext_buf == NULL)
 		return (ENOMEM);
-	mbstat.m_mclusts -= 1;	/* XXX */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_clust_frees++;
+	critical_exit();
 	return (0);
 }
 
@@ -304,7 +399,9 @@
 	m = (struct mbuf *)mem;
 	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
 	m->m_ext.ext_buf = NULL;
-	mbstat.m_mclusts += 1;	/* XXX */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_clust_allocs++;
+	critical_exit();
 }
 
 /*
@@ -320,6 +417,7 @@
 #endif
 	int flags;
 	short type;
+	u_char cpu;
 
 	m = (struct mbuf *)mem;
 	args = (struct mb_args *)arg;
@@ -348,8 +446,11 @@
 			return (error);
 #endif
 	}
-	mbstat.m_mbufs += 1;	/* XXX */
-	mbstat.m_mclusts += 1;	/* XXX */
+	critical_enter();
+	cpu = curcpu;
+	mbstat_percpu[cpu].mbp_mbuf_allocs++;
+	mbstat_percpu[cpu].mbp_clust_allocs++;
+	critical_exit();
 	return (0);
 }
 
@@ -369,7 +470,9 @@
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
 	    "mb_reclaim()");
 
-	mbstat.m_drain++;
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_mbuf_drains++;
+	critical_exit();
 	for (dp = domains; dp != NULL; dp = dp->dom_next)
 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 			if (pr->pr_drain != NULL)
--- //depot/vendor/freebsd/src/sys/kern/uipc_mbuf.c	2005/03/17 19:35:19
+++ //depot/user/rwatson/percpu/sys/kern/uipc_mbuf.c	2005/04/15 10:55:44
@@ -36,6 +36,9 @@
 #include "opt_param.h"
 #include "opt_mbuf_stress_test.h"
 
+/* Need mbstat_percpu definition from mbuf.h. */
+#define	WANT_MBSTAT_PERCPU
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -44,8 +47,10 @@
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/pcpu.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
+#include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/uio.h>
 
@@ -428,13 +433,18 @@
 		m = m->m_next;
 		np = &n->m_next;
 	}
-	if (top == NULL)
-		mbstat.m_mcfail++;	/* XXX: No consistency. */
+	if (top == NULL) {
+		critical_enter();
+		mbstat_percpu[curcpu].mbp_copy_fails++;
+		critical_exit();
+	}
 
 	return (top);
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_copy_fails++;
+	critical_exit();
 	return (NULL);
 }
 
@@ -497,7 +507,9 @@
 	return top;
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */ 
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_copy_fails++;
+	critical_exit();
 	return (NULL);
 }
 
@@ -600,7 +612,9 @@
 
 nospace:
 	m_freem(top);
-	mbstat.m_mcfail++;	/* XXX: No consistency. */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_copy_fails++;
+	critical_exit();
 	return (NULL);
 }
 
@@ -762,7 +776,9 @@
 	return (m);
 bad:
 	m_freem(n);
-	mbstat.m_mpfail++;	/* XXX: No consistency. */
+	critical_enter();
+	mbstat_percpu[curcpu].mbp_pullup_fails++;
+	critical_exit();
 	return (NULL);
 }
 
--- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c	2005/04/16 18:50:30
+++ //depot/user/rwatson/percpu/sys/kern/uipc_syscalls.c	2005/04/25 10:22:44
@@ -39,6 +39,9 @@
 #include "opt_ktrace.h"
 #include "opt_mac.h"
 
+/* Need mbstat_percpu definition from mbuf.h. */
+#define WANT_MBSTAT_PERCPU
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -1933,7 +1936,9 @@
 			vm_page_io_finish(pg);
 			if (!error)
 				VM_OBJECT_UNLOCK(obj);
-			mbstat.sf_iocnt++;
+			critical_enter();
+			mbstat_percpu[curcpu].sfp_iocnt++;
+			critical_exit();
 		}
 	
 		if (error) {
@@ -1961,7 +1966,9 @@
 		 * but this wait can be interrupted.
 		 */
 		if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
-			mbstat.sf_allocfail++;
+			critical_enter();
+			mbstat_percpu[curcpu].sfp_alloc_fails++;
+			critical_exit();
 			vm_page_lock_queues();
 			vm_page_unwire(pg, 0);
 			if (pg->wire_count == 0 && pg->object == NULL)
--- //depot/vendor/freebsd/src/sys/sys/malloc.h	2005/01/07 02:32:16
+++ //depot/user/rwatson/percpu/sys/sys/malloc.h	2005/04/14 12:54:00
@@ -50,25 +50,51 @@
 
 #define	M_MAGIC		877983977	/* time when first defined :-) */
 
+/*
+ * ABI-compatible version of the old 'struct malloc_type', only all stats are
+ * now malloc-managed in malloc-owned memory rather than in caller memory, so
+ * as to avoid ABI issues.  The ks_next pointer is reused as a pointer to the
+ * internal data handle.
+ *
+ * XXXRW: Why is this not ifdef _KERNEL?
+ *
+ * XXXRW: Use of ks_shortdesc has leaked out of kern_malloc.c.
+ */
 struct malloc_type {
-	struct malloc_type *ks_next;	/* next in list */
-	u_long 	ks_memuse;	/* total memory held in bytes */
-	u_long	ks_size;	/* sizes of this thing that are allocated */
-	u_long	ks_inuse;	/* # of packets of this type currently in use */
-	uint64_t ks_calls;	/* total packets of this type ever allocated */
-	u_long	ks_maxused;	/* maximum number ever used */
-	u_long	ks_magic;	/* if it's not magic, don't touch it */
-	const char *ks_shortdesc;	/* short description */
-	struct mtx ks_mtx;	/* lock for stats */
+	struct malloc_type	*ks_next;	/* Next in global chain. */
+	u_long			 _ks_size;	/* No longer used. */
+	u_long			 _ks_inuse;	/* No longer used. */
+	uint64_t		 _ks_calls;	/* No longer used. */
+	u_long			 _ks_maxused;	/* No longer used. */
+	u_long			 ks_magic;	/* Detect programmer error. */
+	const char		*ks_shortdesc;	/* Printable type name. */
+
+	/*
+	 * struct malloc_type was terminated with a struct mtx, which is no
+	 * longer required.  For ABI reasons, continue to flesh out the full
+	 * size of the old structure, but reuse the _lo_class field for our
+	 * internal data handle.
+	 */
+	void			*ks_handle;	/* Priv. data, was lo_class. */
+	const char		*_lo_name;
+	const char		*_lo_type;
+	u_int			 _lo_flags;
+	void			*_lo_list_next;
+	struct witness		*_lo_witness;
+	uintptr_t		 _mtx_lock;
+	u_int			 _mtx_recurse;
 };
 
 #ifdef _KERNEL
-#define	MALLOC_DEFINE(type, shortdesc, longdesc) \
-	struct malloc_type type[1] = { \
-		{ NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, {} } \
-	}; \
-	SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, type); \
-	SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, malloc_uninit, type)
+#define	MALLOC_DEFINE(type, shortdesc, longdesc)			\
+	struct malloc_type type[1] = {					\
+		{ NULL, 0, 0, 0, 0, M_MAGIC, shortdesc, NULL, NULL,	\
+		    NULL, 0, NULL, NULL, 0, 0 }				\
+	};								\
+	SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init,	\
+	    type);							\
+	SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY,		\
+	    malloc_uninit, type);
 
 #define	MALLOC_DECLARE(type) \
 	extern struct malloc_type type[1]
@@ -112,6 +138,7 @@
 	    int flags);
 void	*reallocf(void *addr, unsigned long size, struct malloc_type *type,
 	    int flags);
+
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MALLOC_H_ */
--- //depot/vendor/freebsd/src/sys/sys/mbuf.h	2005/03/17 19:35:19
+++ //depot/user/rwatson/percpu/sys/sys/mbuf.h	2005/04/15 10:55:44
@@ -243,6 +243,29 @@
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 /*
+ * Per-CPU mbuf allocator statistics, which are collated to construct the
+ * global statistics.  They are read lockless, but written to while in a
+ * critical section to prevent read-modify-write races.
+ *
+ * XXXRW: As with comments below, maybe sendfile stats should be elsesewhere.
+ */
+struct mbstat_percpu {
+	u_long	mbp_mbuf_allocs;	/* mbufs alloc'd on CPU. */
+	u_long	mbp_mbuf_frees;		/* mbufs freed on CPU. */
+	u_long	mbp_mbuf_fails;		/* mbuf alloc failures on CPU. */
+	u_long	mbp_mbuf_drains;	/* mbuf drains on CPU .*/
+	u_long	mbp_clust_allocs;	/* clusters alloc'd on CPU. */
+	u_long	mbp_clust_frees;	/* clusters freed on CPU. */
+
+	u_long	mbp_copy_fails;		/* mbuf copy failures on CPU. */
+	u_long	mbp_pullup_fails;	/* mbuf pullup failures on CPU. */
+
+	u_long	sfp_iocnt;		/* sendfile I/O's on CPU. */
+	u_long	sfp_alloc_fails;	/* sendfile alloc failures on CPU. */
+	u_long	sfp_alloc_waits;	/* sendfile alloc waits on CPU. */
+};
+
+/*
  * General mbuf allocator statistics structure.
  */
 struct mbstat {
@@ -550,6 +573,15 @@
 extern	struct mbstat mbstat;		/* General mbuf stats/infos */
 extern	int nmbclusters;		/* Maximum number of clusters */
 
+/*
+ * Avoid exposing PERCPU definition outside of a very limited set of files,
+ * so that the compile-time value of PERCPU doesn't become part of the
+ * exposed kernel ABI.
+ */
+#ifdef WANT_MBSTAT_PERCPU
+extern	struct mbstat_percpu mbstat_percpu[MAXCPU];
+#endif
+
 struct uio;
 
 void		 m_adj(struct mbuf *, int);
--- //depot/vendor/freebsd/src/sys/sys/pcpu.h	2005/01/07 02:32:16
+++ //depot/user/rwatson/percpu/sys/sys/pcpu.h	2005/04/17 14:42:36
@@ -81,6 +81,7 @@
 extern struct cpuhead cpuhead;
 
 #define	CURPROC		(curthread->td_proc)
+#define	curcpu		PCPU_GET(cpuid)
 #define	curkse		(curthread->td_kse)
 #define	curksegrp	(curthread->td_ksegrp)
 #define	curproc		(curthread->td_proc)
--- //depot/vendor/freebsd/src/sys/vm/uma_core.c	2005/02/24 06:30:36
+++ //depot/user/rwatson/percpu/sys/vm/uma_core.c	2005/04/18 12:47:40
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2004-2005 Robert N. M. Watson
  * Copyright (c) 2004, 2005,
  *     Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
  * Copyright (c) 2002, 2003, 2004, 2005,
@@ -119,9 +120,6 @@
 /* This mutex protects the keg list */
 static struct mtx uma_mtx;
 
-/* These are the pcpu cache locks */
-static struct mtx uma_pcpu_mtx[MAXCPU];
-
 /* Linked list of boot time pages */
 static LIST_HEAD(,uma_slab) uma_boot_pages =
     LIST_HEAD_INITIALIZER(&uma_boot_pages);
@@ -384,48 +382,19 @@
 zone_timeout(uma_zone_t zone)
 {
 	uma_keg_t keg;
-	uma_cache_t cache;
 	u_int64_t alloc;
-	int cpu;
 
 	keg = zone->uz_keg;
 	alloc = 0;
 
 	/*
-	 * Aggregate per cpu cache statistics back to the zone.
-	 *
-	 * XXX This should be done in the sysctl handler.
-	 *
-	 * I may rewrite this to set a flag in the per cpu cache instead of
-	 * locking.  If the flag is not cleared on the next round I will have
-	 * to lock and do it here instead so that the statistics don't get too
-	 * far out of sync.
-	 */
-	if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
-		for (cpu = 0; cpu <= mp_maxid; cpu++) {
-			if (CPU_ABSENT(cpu))
-				continue;
-			CPU_LOCK(cpu);
-			cache = &zone->uz_cpu[cpu];
-			/* Add them up, and reset */
-			alloc += cache->uc_allocs;
-			cache->uc_allocs = 0;
-			CPU_UNLOCK(cpu);
-		}
-	}
-
-	/* Now push these stats back into the zone.. */
-	ZONE_LOCK(zone);
-	zone->uz_allocs += alloc;
-
-	/*
 	 * Expand the zone hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
-
+	ZONE_LOCK(zone);
 	if (keg->uk_flags & UMA_ZONE_HASH &&
 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
@@ -613,6 +582,10 @@
 /*
  * Drains the per cpu caches for a zone.
  *
+ * NOTE: This may only be called while the zone is being turn down, and not
+ * during normal operation.  This is necessary in order that we do not have
+ * to migrate CPUs to drain the per-CPU caches.
+ *
  * Arguments:
  *	zone     The zone to drain, must be unlocked.
  *
@@ -626,12 +599,20 @@
 	int cpu;
 
 	/*
-	 * We have to lock each cpu cache before locking the zone
+	 * XXX: It is safe to not lock the per-CPU caches, because we're
+	 * tearing down the zone anyway.  I.e., there will be no further use
+	 * of the caches at this point.
+	 *
+	 * XXX: It would good to be able to assert that the zone is being
+	 * torn down to prevent improper use of cache_drain().
+	 *
+	 * XXX: We lock the zone before passing into bucket_cache_drain() as
+	 * it is used elsewhere.  Should the tear-down path be made special
+	 * there in some form?
 	 */
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (CPU_ABSENT(cpu))
 			continue;
-		CPU_LOCK(cpu);
 		cache = &zone->uz_cpu[cpu];
 		bucket_drain(zone, cache->uc_allocbucket);
 		bucket_drain(zone, cache->uc_freebucket);
@@ -644,11 +625,6 @@
 	ZONE_LOCK(zone);
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
-	for (cpu = 0; cpu <= mp_maxid; cpu++) {
-		if (CPU_ABSENT(cpu))
-			continue;
-		CPU_UNLOCK(cpu);
-	}
 }
 
 /*
@@ -828,7 +804,8 @@
 	    &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
-			uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
+			uma_zfree_internal(keg->uk_slabzone, slab, NULL,
+			    SKIP_NONE);
 		ZONE_LOCK(zone);
 		return (NULL);
 	}
@@ -1643,10 +1620,6 @@
 #ifdef UMA_DEBUG
 	printf("Initializing pcpu cache locks.\n");
 #endif
-	/* Initialize the pcpu cache lock set once and for all */
-	for (i = 0; i <= mp_maxid; i++)
-		CPU_LOCK_INIT(i);
-
 #ifdef UMA_DEBUG
 	printf("Creating slab and hash zones.\n");
 #endif
@@ -1793,6 +1766,9 @@
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	int cpu;
+#ifdef INVARIANTS
+	int count;
+#endif
 	int badness;
 
 	/* This is the fast path allocation */
@@ -1827,12 +1803,33 @@
 		}
 	}
 
+	/*
+	 * If possible, allocate from the per-CPU cache.  There are two
+	 * requirements for safe access to the per-CPU cache: (1) the thread
+	 * accessing the cache must not be preempted or yield during access,
+	 * and (2) the thread must not migrate CPUs without switching which
+	 * cache it accesses.  We rely on a critical section to prevent
+	 * preemption and migration.  We release the critical section in
+	 * order to acquire the zone mutex if we are unable to allocate from
+	 * the current cache; when we re-acquire the critical section, we
+	 * must detect and handle migration if it has occurred.
+	 */
+#ifdef INVARIANTS
+	count = 0;
+#endif
 zalloc_restart:
-	cpu = PCPU_GET(cpuid);
-	CPU_LOCK(cpu);
+	critical_enter();
+	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zalloc_start:
+#ifdef INVARIANTS
+	count++;
+	KASSERT(count < 10, ("uma_zalloc_arg: count == 10"));
+#endif
+#if 0
+	critical_assert();
+#endif
 	bucket = cache->uc_allocbucket;
 
 	if (bucket) {
@@ -1845,12 +1842,12 @@
 			KASSERT(item != NULL,
 			    ("uma_zalloc: Bucket pointer mangled."));
 			cache->uc_allocs++;
+			critical_exit();
 #ifdef INVARIANTS
 			ZONE_LOCK(zone);
 			uma_dbg_alloc(zone, NULL, item);
 			ZONE_UNLOCK(zone);
 #endif
-			CPU_UNLOCK(cpu);
 			if (zone->uz_ctor != NULL) {
 				if (zone->uz_ctor(item, zone->uz_keg->uk_size,
 				    udata, flags) != 0) {
@@ -1880,7 +1877,33 @@
 			}
 		}
 	}
+	/*
+	 * Attempt to retrieve the item from the per-CPU cache has failed, so
+	 * we must go back to the zone.  This requires the zone lock, so we
+	 * must drop the critical section, then re-acquire it when we go back
+	 * to the cache.  Since the critical section is released, we may be
+	 * preempted or migrate.  As such, make sure not to maintain any
+	 * thread-local state specific to the cache from prior to releasing
+	 * the critical section.
+	 */
+	critical_exit();
 	ZONE_LOCK(zone);
+	critical_enter();
+	cpu = curcpu;
+	cache = &zone->uz_cpu[cpu];
+	bucket = cache->uc_allocbucket;
+	if (bucket != NULL) {
+		if (bucket->ub_cnt > 0) {
+			ZONE_UNLOCK(zone);
+			goto zalloc_start;
+		}
+		bucket = cache->uc_freebucket;
+		if (bucket != NULL && bucket->ub_cnt > 0) {
+			ZONE_UNLOCK(zone);
+			goto zalloc_start;
+		}
+	}
+
 	/* Since we have locked the zone we may as well send back our stats */
 	zone->uz_allocs += cache->uc_allocs;
 	cache->uc_allocs = 0;
@@ -1904,8 +1927,8 @@
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
-	/* We are no longer associated with this cpu!!! */
-	CPU_UNLOCK(cpu);
+	/* We are no longer associated with this CPU. */
+	critical_exit();
 
 	/* Bump up our uz_count so we get here less */
 	if (zone->uz_count < BUCKET_MAX)
@@ -2228,10 +2251,10 @@
 	uma_bucket_t bucket;
 	int bflags;
 	int cpu;
-	enum zfreeskip skip;
+#ifdef INVARIANTS
+	int count;
+#endif
 
-	/* This is the fast path free */
-	skip = SKIP_NONE;
 	keg = zone->uz_keg;
 
 #ifdef UMA_DEBUG_ALLOC_1
@@ -2240,25 +2263,50 @@
 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 	    zone->uz_name);
 
+	if (zone->uz_dtor)
+		zone->uz_dtor(item, keg->uk_size, udata);
+#ifdef INVARIANTS
+	ZONE_LOCK(zone);
+	if (keg->uk_flags & UMA_ZONE_MALLOC)
+		uma_dbg_free(zone, udata, item);
+	else
+		uma_dbg_free(zone, NULL, item);
+	ZONE_UNLOCK(zone);
+#endif
 	/*
 	 * The race here is acceptable.  If we miss it we'll just have to wait
 	 * a little longer for the limits to be reset.
 	 */
-
 	if (keg->uk_flags & UMA_ZFLAG_FULL)
 		goto zfree_internal;
 
-	if (zone->uz_dtor) {
-		zone->uz_dtor(item, keg->uk_size, udata);
-		skip = SKIP_DTOR;
-	}
-
+#ifdef INVARIANTS
+	count = 0;
+#endif
+	/*
+	 * If possible, free to the per-CPU cache.  There are two
+	 * requirements for safe access to the per-CPU cache: (1) the thread
+	 * accessing the cache must not be preempted or yield during access,
+	 * and (2) the thread must not migrate CPUs without switching which
+	 * cache it accesses.  We rely on a critical section to prevent
+	 * preemption and migration.  We release the critical section in
+	 * order to acquire the zone mutex if we are unable to free to the
+	 * current cache; when we re-acquire the critical section, we must
+	 * detect and handle migration if it has occurred.
+	 */
 zfree_restart:
-	cpu = PCPU_GET(cpuid);
-	CPU_LOCK(cpu);
+	critical_enter();
+	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zfree_start:
+#ifdef INVARIANTS
+	count++;
+	KASSERT(count < 10, ("uma_zfree_arg: count == 10"));
+#endif
+#if 0
+	critical_assert();
+#endif
 	bucket = cache->uc_freebucket;
 
 	if (bucket) {
@@ -2272,15 +2320,7 @@
 			    ("uma_zfree: Freeing to non free bucket index."));
 			bucket->ub_bucket[bucket->ub_cnt] = item;
 			bucket->ub_cnt++;
-#ifdef INVARIANTS
-			ZONE_LOCK(zone);
-			if (keg->uk_flags & UMA_ZONE_MALLOC)
-				uma_dbg_free(zone, udata, item);
-			else
-				uma_dbg_free(zone, NULL, item);
-			ZONE_UNLOCK(zone);
-#endif
-			CPU_UNLOCK(cpu);
+			critical_exit();
 			return;
 		} else if (cache->uc_allocbucket) {
 #ifdef UMA_DEBUG_ALLOC
@@ -2304,9 +2344,32 @@
 	 *
 	 * 1) The buckets are NULL
 	 * 2) The alloc and free buckets are both somewhat full.
+	 *
+	 * We must go back the zone, which requires acquiring the zone lock,
+	 * which in turn means we must release and re-acquire the critical
+	 * section.  Since the critical section is released, we may be
+	 * preempted or migrate.  As such, make sure not to maintain any
+	 * thread-local state specific to the cache from prior to releasing
+	 * the critical section.
 	 */
-
+	critical_exit();
 	ZONE_LOCK(zone);
+	critical_enter();
+	cpu = curcpu;
+	cache = &zone->uz_cpu[cpu];
+	if (cache->uc_freebucket != NULL) {
+		if (cache->uc_freebucket->ub_cnt <
+		    cache->uc_freebucket->ub_entries) {
+			ZONE_UNLOCK(zone);
+			goto zfree_start;
+		}
+		if (cache->uc_allocbucket != NULL &&
+		    (cache->uc_allocbucket->ub_cnt <
+		    cache->uc_freebucket->ub_cnt)) {
+			ZONE_UNLOCK(zone);
+			goto zfree_start;
+		}
+	}
 
 	bucket = cache->uc_freebucket;
 	cache->uc_freebucket = NULL;
@@ -2328,8 +2391,8 @@
 		cache->uc_freebucket = bucket;
 		goto zfree_start;
 	}
-	/* We're done with this CPU now */
-	CPU_UNLOCK(cpu);
+	/* We are no longer associated with this CPU. */
+	critical_exit();
 
 	/* And the zone.. */
 	ZONE_UNLOCK(zone);
@@ -2353,26 +2416,8 @@
 	/*
 	 * If nothing else caught this, we'll just do an internal free.
 	 */
-
 zfree_internal:
-
-#ifdef INVARIANTS
-	/*
-	 * If we need to skip the dtor and the uma_dbg_free in
-	 * uma_zfree_internal because we've already called the dtor
-	 * above, but we ended up here, then we need to make sure
-	 * that we take care of the uma_dbg_free immediately.
-	 */
-	if (skip) {
-		ZONE_LOCK(zone);
-		if (keg->uk_flags & UMA_ZONE_MALLOC)
-			uma_dbg_free(zone, udata, item);
-		else
-			uma_dbg_free(zone, NULL, item);
-		ZONE_UNLOCK(zone);
-	}
-#endif
-	uma_zfree_internal(zone, item, udata, skip);
+	uma_zfree_internal(zone, item, udata, SKIP_DTOR);
 
 	return;
 }
@@ -2655,7 +2700,7 @@
 		slab->us_flags = flags | UMA_SLAB_MALLOC;
 		slab->us_size = size;
 	} else {
-		uma_zfree_internal(slabzone, slab, NULL, 0);
+		uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
 	}
 
 	return (mem);
@@ -2666,7 +2711,7 @@
 {
 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
 	page_free(slab->us_data, slab->us_size, slab->us_flags);
-	uma_zfree_internal(slabzone, slab, NULL, 0);
+	uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE);
 }
 
 void
@@ -2743,6 +2788,7 @@
 	int cachefree;
 	uma_bucket_t bucket;
 	uma_cache_t cache;
+	u_int64_t alloc;
 
 	cnt = 0;
 	mtx_lock(&uma_mtx);
@@ -2766,15 +2812,9 @@
 	  LIST_FOREACH(z, &zk->uk_zones, uz_link) {
 		if (cnt == 0)	/* list may have changed size */
 			break;
-		if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
-			for (cpu = 0; cpu <= mp_maxid; cpu++) {
-				if (CPU_ABSENT(cpu))
-					continue;
-				CPU_LOCK(cpu);
-			}
-		}
 		ZONE_LOCK(z);
 		cachefree = 0;
+		alloc = 0;
 		if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
 				if (CPU_ABSENT(cpu))
@@ -2784,9 +2824,12 @@
 					cachefree += cache->uc_allocbucket->ub_cnt;
 				if (cache->uc_freebucket != NULL)
 					cachefree += cache->uc_freebucket->ub_cnt;
-				CPU_UNLOCK(cpu);
+				alloc += cache->uc_allocs;
+				cache->uc_allocs = 0;
 			}
 		}
+		alloc += z->uz_allocs;
+
 		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
 			cachefree += bucket->ub_cnt;
 		}
@@ -2797,7 +2840,7 @@
 		    zk->uk_maxpages * zk->uk_ipers,
 		    (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
 		    totalfree,
-		    (unsigned long long)z->uz_allocs);
+		    (unsigned long long)alloc);
 		ZONE_UNLOCK(z);
 		for (p = offset + 12; p > offset && *p == ' '; --p)
 			/* nothing */ ;
--- //depot/vendor/freebsd/src/sys/vm/uma_int.h	2005/02/16 21:50:29
+++ //depot/user/rwatson/percpu/sys/vm/uma_int.h	2005/03/15 19:57:24
@@ -342,16 +342,6 @@
 #define	ZONE_LOCK(z)	mtx_lock((z)->uz_lock)
 #define ZONE_UNLOCK(z)	mtx_unlock((z)->uz_lock)
 
-#define	CPU_LOCK_INIT(cpu)					\
-	mtx_init(&uma_pcpu_mtx[(cpu)], "UMA pcpu", "UMA pcpu",	\
-	    MTX_DEF | MTX_DUPOK)
-
-#define CPU_LOCK(cpu)						\
-	mtx_lock(&uma_pcpu_mtx[(cpu)])
-
-#define CPU_UNLOCK(cpu)						\
-	mtx_unlock(&uma_pcpu_mtx[(cpu)])
-
 /*
  * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup
  * the slab structure.