--- //depot/vendor/freebsd/src/usr.bin/vmstat/vmstat.c 2005/02/21 14:38:18 +++ //depot/user/rwatson/percpu/src/usr.bin/vmstat/vmstat.c 2005/04/30 10:39:05 @@ -46,6 +46,8 @@ #include __FBSDID("$FreeBSD: src/usr.bin/vmstat/vmstat.c,v 1.84 2005/02/21 14:35:00 ps Exp $"); +#define WANT_MALLOC_TYPE_INTERNAL + #include #include #include @@ -897,10 +899,22 @@ (long long)inttotal, (long long)(inttotal / uptime)); } +/* + * domem() replicates the kernel implementation of kern.malloc by inspecting + * kernel data structures, which is appropriate for use on a core dump. + * domem() must identify the set of malloc types, walk the list, and coalesce + * per-CPU state to report. It relies on direct access to internal kernel + * data structures that have a fragile (and intentionally unexposed) ABI. + * This logic should not be used by live monitoring tools, which should + * instead rely solely on the sysctl interface. + */ static void domem(void) { + struct malloc_type_stats *mts, mts_local; + struct malloc_type_internal mti; struct malloc_type type; + int i; if (kd == NULL) { dosysctl("kern.malloc"); @@ -924,15 +938,34 @@ sizeof(type)) errx(1, "%s: %p: %s", __func__, type.ks_next, kvm_geterr(kd)); - if (type.ks_calls == 0) + if (kvm_read(kd, (u_long)type.ks_handle, &mti, sizeof(mti)) != + sizeof(mti)) + errx(1, "%s: %p: %s", __func__, type.ks_handle, + kvm_geterr(kd)); + + bzero(&mts_local, sizeof(mts_local)); + for (i = 0; i < MAXCPU; i++) { + mts = &mti.mti_stats[i]; + mts_local.mts_memalloced += mts->mts_memalloced; + mts_local.mts_memfreed += mts->mts_memfreed; + mts_local.mts_numallocs += mts->mts_numallocs; + mts_local.mts_numfrees += mts->mts_numfrees; + mts_local.mts_size |= mts->mts_size; + } + if (mts_local.mts_numallocs == 0) continue; + + /* + * Unlike in kern_malloc.c, we don't mask inter-CPU races, as * vmstat on a core is likely for debugging purposes. + */ + str = kgetstr(type.ks_shortdesc); - (void)printf("%13s%6lu%6luK%7luK%9llu", + (void)printf("%13s%6lu%6luK -%9llu", str, - type.ks_inuse, - (type.ks_memuse + 1023) / 1024, - (type.ks_maxused + 1023) / 1024, - (long long unsigned)type.ks_calls); + mts_local.mts_numallocs - mts_local.mts_numfrees, + ((mts_local.mts_memalloced - mts_local.mts_memfreed) + + 1023) / 1024, + mts_local.mts_numallocs); free(str); for (kmemzonenum = 0, first = 1; ; kmemzonenum++) { kreado(X_KMEMZONES, &kz, sizeof(kz), @@ -941,7 +974,7 @@ (void)printf("\n"); break; } - if (!(type.ks_size & (1 << kmemzonenum))) + if (!(mts_local.mts_size & (1 << kmemzonenum))) continue; if (first) (void)printf(" "); --- //depot/vendor/freebsd/src/sys/conf/files 2005/05/05 23:45:18 +++ //depot/user/rwatson/percpu/sys/conf/files 2005/05/14 06:07:05 @@ -1766,6 +1766,9 @@ security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids security/mac_stub/mac_stub.c optional mac_stub security/mac_test/mac_test.c optional mac_test +test/test.c standard +test/test_mem_timing.c standard +test/test_synch_timing.c standard ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_inode.c optional ffs --- //depot/vendor/freebsd/src/sys/dev/aic7xxx/aicasm/Makefile 2004/12/21 08:50:46 +++ //depot/user/rwatson/percpu/sys/dev/aic7xxx/aicasm/Makefile 2005/04/09 19:51:53 @@ -33,6 +33,7 @@ .ifdef MAKESRCPATH CFLAGS+= -I${MAKESRCPATH} .endif +NOMAN= NO_MAN= YFLAGS= -b ${.TARGET:R} ${.TARGET:M*macro*:S/$(.TARGET)/-p mm/} -d LFLAGS+= ${.TARGET:M*macro*:S/$(.TARGET)/-Pmm/} --- //depot/vendor/freebsd/src/sys/i386/conf/GENERIC 2005/03/31 20:25:16 +++ //depot/user/rwatson/percpu/sys/i386/conf/GENERIC 2005/04/09 20:31:56 @@ -29,6 +29,8 @@ makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols +options BREAK_TO_DEBUGGER + #options SCHED_ULE # ULE scheduler options SCHED_4BSD # 4BSD scheduler options PREEMPTION # Enable kernel thread preemption --- //depot/vendor/freebsd/src/sys/kern/kern_malloc.c 2005/04/12 23:55:38 +++ //depot/user/rwatson/percpu/sys/kern/kern_malloc.c 2005/05/15 04:30:34 @@ -1,6 +1,7 @@ /*- * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2005 Robert N. M. Watson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -44,6 +45,7 @@ #include #include #include +#include #include #include @@ -133,6 +135,8 @@ {0, NULL}, }; +uma_zone_t mt_zone; + #ifdef DEBUG_MEMGUARD u_int vm_memguard_divisor; SYSCTL_UINT(_vm, OID_AUTO, memguard_divisor, CTLFLAG_RD, &vm_memguard_divisor, @@ -197,41 +201,44 @@ * Add this to the informational malloc_type bucket. */ static void -malloc_type_zone_allocated(struct malloc_type *ksp, unsigned long size, +malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size, int zindx) { - mtx_lock(&ksp->ks_mtx); - ksp->ks_calls++; + struct malloc_type_internal *mtip; + struct malloc_type_stats *mtsp; + + critical_enter(); + mtip = mtp->ks_handle; + mtsp = &mtip->mti_stats[curcpu]; + mtsp->mts_memalloced += size; + mtsp->mts_numallocs++; if (zindx != -1) - ksp->ks_size |= 1 << zindx; - if (size != 0) { - ksp->ks_memuse += size; - ksp->ks_inuse++; - if (ksp->ks_memuse > ksp->ks_maxused) - ksp->ks_maxused = ksp->ks_memuse; - } - mtx_unlock(&ksp->ks_mtx); + mtsp->mts_size |= 1 << zindx; + critical_exit(); } void -malloc_type_allocated(struct malloc_type *ksp, unsigned long size) +malloc_type_allocated(struct malloc_type *mtp, unsigned long size) { - malloc_type_zone_allocated(ksp, size, -1); + + malloc_type_zone_allocated(mtp, size, -1); } /* * Remove this allocation from the informational malloc_type bucket. */ void -malloc_type_freed(struct malloc_type *ksp, unsigned long size) +malloc_type_freed(struct malloc_type *mtp, unsigned long size) { - mtx_lock(&ksp->ks_mtx); - KASSERT(size <= ksp->ks_memuse, - ("malloc(9)/free(9) confusion.\n%s", - "Probably freeing with wrong type, but maybe not here.")); - ksp->ks_memuse -= size; - ksp->ks_inuse--; - mtx_unlock(&ksp->ks_mtx); + struct malloc_type_internal *mtip; + struct malloc_type_stats *mtsp; + + critical_enter(); + mtip = mtp->ks_handle; + mtsp = &mtip->mti_stats[curcpu]; + mtsp->mts_memfreed += size; + mtsp->mts_numfrees++; + critical_exit(); } /* @@ -243,7 +250,7 @@ * the allocation fails. */ void * -malloc(unsigned long size, struct malloc_type *type, int flags) +malloc(unsigned long size, struct malloc_type *mtp, int flags) { int indx; caddr_t va; @@ -290,7 +297,7 @@ #ifdef DEBUG_MEMGUARD /* XXX CHANGEME! */ - if (type == M_SUBPROC) + if (mtp == M_SUBPROC) return memguard_alloc(size, flags); #endif @@ -306,13 +313,13 @@ va = uma_zalloc(zone, flags); if (va != NULL) size = keg->uk_size; - malloc_type_zone_allocated(type, va == NULL ? 0 : size, indx); + malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else { size = roundup(size, PAGE_SIZE); zone = NULL; keg = NULL; va = uma_large_malloc(size, flags); - malloc_type_allocated(type, va == NULL ? 0 : size); + malloc_type_allocated(mtp, va == NULL ? 0 : size); } if (flags & M_WAITOK) KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL")); @@ -334,7 +341,7 @@ * This routine may not block. */ void -free(void *addr, struct malloc_type *type) +free(void *addr, struct malloc_type *mtp) { uma_slab_t slab; u_long size; @@ -345,15 +352,12 @@ #ifdef DEBUG_MEMGUARD /* XXX CHANGEME! */ - if (type == M_SUBPROC) { + if (mtp == M_SUBPROC) { memguard_free(addr); return; } #endif - KASSERT(type->ks_memuse > 0, - ("malloc(9)/free(9) confusion.\n%s", - "Probably freeing with wrong type, but maybe not here.")); size = 0; slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); @@ -365,7 +369,7 @@ if (!(slab->us_flags & UMA_SLAB_MALLOC)) { #ifdef INVARIANTS - struct malloc_type **mtp = addr; + struct malloc_type **mtpp = addr; #endif size = slab->us_keg->uk_size; #ifdef INVARIANTS @@ -377,25 +381,25 @@ * This code assumes that size is a multiple of 8 bytes for * 64 bit machines */ - mtp = (struct malloc_type **) - ((unsigned long)mtp & ~UMA_ALIGN_PTR); - mtp += (size - sizeof(struct malloc_type *)) / + mtpp = (struct malloc_type **) + ((unsigned long)mtpp & ~UMA_ALIGN_PTR); + mtpp += (size - sizeof(struct malloc_type *)) / sizeof(struct malloc_type *); - *mtp = type; + *mtpp = mtp; #endif uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab); } else { size = slab->us_size; uma_large_free(slab); } - malloc_type_freed(type, size); + malloc_type_freed(mtp, size); } /* * realloc: change the size of a memory block */ void * -realloc(void *addr, unsigned long size, struct malloc_type *type, int flags) +realloc(void *addr, unsigned long size, struct malloc_type *mtp, int flags) { uma_slab_t slab; unsigned long alloc; @@ -403,11 +407,16 @@ /* realloc(NULL, ...) is equivalent to malloc(...) */ if (addr == NULL) - return (malloc(size, type, flags)); + return (malloc(size, mtp, flags)); + + /* + * XXX: Should report free of old memory and alloc of new memory to + * per-CPU stats. + */ #ifdef DEBUG_MEMGUARD /* XXX: CHANGEME! */ -if (type == M_SUBPROC) { +if (mtp == M_SUBPROC) { slab = NULL; alloc = size; } else { @@ -435,12 +444,12 @@ #endif /* Allocate a new, bigger (or smaller) block */ - if ((newaddr = malloc(size, type, flags)) == NULL) + if ((newaddr = malloc(size, mtp, flags)) == NULL) return (NULL); /* Copy over original contents */ bcopy(addr, newaddr, min(size, alloc)); - free(addr, type); + free(addr, mtp); return (newaddr); } @@ -448,12 +457,12 @@ * reallocf: same as realloc() but free memory on failure. */ void * -reallocf(void *addr, unsigned long size, struct malloc_type *type, int flags) +reallocf(void *addr, unsigned long size, struct malloc_type *mtp, int flags) { void *mem; - if ((mem = realloc(addr, size, type, flags)) == NULL) - free(addr, type); + if ((mem = realloc(addr, size, mtp, flags)) == NULL) + free(addr, mtp); return (mem); } @@ -543,6 +552,13 @@ uma_startup2(); + mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal), +#ifdef INVARIANTS + mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, +#else + NULL, NULL, NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_MALLOC); for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { int size = kmemzones[indx].kz_size; char *name = kmemzones[indx].kz_name; @@ -564,125 +580,141 @@ void malloc_init(void *data) { - struct malloc_type *type = (struct malloc_type *)data; + struct malloc_type_internal *mtip; + struct malloc_type *mtp; - mtx_lock(&malloc_mtx); - if (type->ks_magic != M_MAGIC) - panic("malloc type lacks magic"); + KASSERT(cnt.v_page_count != 0, ("malloc_register before vm_init")); - if (cnt.v_page_count == 0) - panic("malloc_init not allowed before vm init"); + mtp = data; + mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO); + mtp->ks_handle = mtip; - if (type->ks_next != NULL) - return; - - type->ks_next = kmemstatistics; - kmemstatistics = type; - mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF); + mtx_lock(&malloc_mtx); + mtp->ks_next = kmemstatistics; + kmemstatistics = mtp; mtx_unlock(&malloc_mtx); } void malloc_uninit(void *data) { - struct malloc_type *type = (struct malloc_type *)data; - struct malloc_type *t; + struct malloc_type_internal *mtip; + struct malloc_type *mtp, *temp; + mtp = data; + KASSERT(mtp->ks_handle != NULL, ("malloc_deregister: cookie NULL")); mtx_lock(&malloc_mtx); - mtx_lock(&type->ks_mtx); - if (type->ks_magic != M_MAGIC) - panic("malloc type lacks magic"); - - if (cnt.v_page_count == 0) - panic("malloc_uninit not allowed before vm init"); - - if (type == kmemstatistics) - kmemstatistics = type->ks_next; - else { - for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { - if (t->ks_next == type) { - t->ks_next = type->ks_next; - break; - } + mtip = mtp->ks_handle; + mtp->ks_handle = NULL; + if (mtp != kmemstatistics) { + for (temp = kmemstatistics; temp != NULL; + temp = temp->ks_next) { + if (temp->ks_next == mtp) + temp->ks_next = mtp->ks_next; } - } - type->ks_next = NULL; - mtx_destroy(&type->ks_mtx); + } else + kmemstatistics = mtp->ks_next; mtx_unlock(&malloc_mtx); + uma_zfree(mt_zone, mtp); } static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS) { - struct malloc_type *type; + struct malloc_type_stats mts_local, *mtsp; + struct malloc_type_internal *mtip; + struct malloc_type *mtp; + struct sbuf sbuf; + long temp_allocs, temp_bytes; int linesize = 128; - int curline; int bufsize; int first; int error; char *buf; - char *p; int cnt; - int len; int i; cnt = 0; + /* Guess at how much room is needed. */ mtx_lock(&malloc_mtx); - for (type = kmemstatistics; type != NULL; type = type->ks_next) + for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) cnt++; + mtx_unlock(&malloc_mtx); - mtx_unlock(&malloc_mtx); bufsize = linesize * (cnt + 1); - p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + buf = malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN); + mtx_lock(&malloc_mtx); + sbuf_printf(&sbuf, + "\n Type InUse MemUse HighUse Requests Size(s)\n"); + for (mtp = kmemstatistics; cnt != 0 && mtp != NULL; + mtp = mtp->ks_next, cnt--) { + mtip = mtp->ks_handle; + bzero(&mts_local, sizeof(mts_local)); + for (i = 0; i < MAXCPU; i++) { + mtsp = &mtip->mti_stats[i]; + mts_local.mts_memalloced += mtsp->mts_memalloced; + mts_local.mts_memfreed += mtsp->mts_memfreed; + mts_local.mts_numallocs += mtsp->mts_numallocs; + mts_local.mts_numfrees += mtsp->mts_numfrees; + mts_local.mts_size |= mtsp->mts_size; + } + if (mts_local.mts_numallocs == 0) + continue; - len = snprintf(p, linesize, - "\n Type InUse MemUse HighUse Requests Size(s)\n"); - p += len; + /* + * Due to races in per-CPU statistics gather, it's possible to + * get a slightly negative number here. If we do, approximate + * with 0. + */ + if (mts_local.mts_numallocs > mts_local.mts_numfrees) + temp_allocs = mts_local.mts_numallocs - + mts_local.mts_numfrees; + else + temp_allocs = 0; - for (type = kmemstatistics; cnt != 0 && type != NULL; - type = type->ks_next, cnt--) { - if (type->ks_calls == 0) - continue; + /* + * Ditto for bytes allocated. + */ + if (mts_local.mts_memalloced > mts_local.mts_memfreed) + temp_bytes = mts_local.mts_memalloced - + mts_local.mts_memfreed; + else + temp_bytes = 0; - curline = linesize - 2; /* Leave room for the \n */ - len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu", - type->ks_shortdesc, - type->ks_inuse, - (type->ks_memuse + 1023) / 1024, - (type->ks_maxused + 1023) / 1024, - (long long unsigned)type->ks_calls); - curline -= len; - p += len; + /* + * XXXRW: High-waterwark is no longer easily available, so + * we just print '-' for that column. + */ + sbuf_printf(&sbuf, "%13s%6lu%6luK -%9lu", + mtp->ks_shortdesc, + temp_allocs, + (temp_bytes + 1023) / 1024, + mts_local.mts_numallocs); first = 1; for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1; i++) { - if (type->ks_size & (1 << i)) { + if (mts_local.mts_size & (1 << i)) { if (first) - len = snprintf(p, curline, " "); + sbuf_printf(&sbuf, " "); else - len = snprintf(p, curline, ","); - curline -= len; - p += len; - - len = snprintf(p, curline, - "%s", kmemzones[i].kz_name); - curline -= len; - p += len; - + sbuf_printf(&sbuf, ","); + sbuf_printf(&sbuf, "%s", + kmemzones[i].kz_name); first = 0; } } - - len = snprintf(p, 2, "\n"); - p += len; + sbuf_printf(&sbuf, "\n"); } + sbuf_finish(&sbuf); + mtx_unlock(&malloc_mtx); - mtx_unlock(&malloc_mtx); - error = SYSCTL_OUT(req, buf, p - buf); + error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); + sbuf_delete(&sbuf); free(buf, M_TEMP); return (error); } @@ -696,6 +728,7 @@ sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) { int linesize = 64; + struct sbuf sbuf; uint64_t count; uint64_t waste; uint64_t mem; @@ -704,7 +737,6 @@ char *buf; int rsize; int size; - char *p; int len; int i; @@ -714,34 +746,30 @@ waste = 0; mem = 0; - p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); - len = snprintf(p, bufsize, + buf = malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN); + sbuf_printf(&sbuf, "\n Size Requests Real Size\n"); - bufsize -= len; - p += len; - for (i = 0; i < KMEM_ZSIZE; i++) { size = i << KMEM_ZSHIFT; rsize = kmemzones[kmemsize[i]].kz_size; count = (long long unsigned)krequests[i]; - len = snprintf(p, bufsize, "%6d%28llu%11d\n", - size, (unsigned long long)count, rsize); - bufsize -= len; - p += len; + sbuf_printf(&sbuf, "%6d%28llu%11d\n", size, + (unsigned long long)count, rsize); if ((rsize * count) > (size * count)) waste += (rsize * count) - (size * count); mem += (rsize * count); } - - len = snprintf(p, bufsize, + sbuf_printf(&sbuf, "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", (unsigned long long)mem, (unsigned long long)waste); - p += len; + sbuf_finish(&sbuf); - error = SYSCTL_OUT(req, buf, p - buf); + error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); + sbuf_delete(&sbuf); free(buf, M_TEMP); return (error); } --- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c 2005/02/16 21:50:29 +++ //depot/user/rwatson/percpu/sys/kern/kern_mbuf.c 2005/04/15 11:11:26 @@ -1,6 +1,7 @@ /*- - * Copyright (c) 2004, 2005, - * Bosko Milekic . All rights reserved. + * Copyright (c) 2004, 2005 Bosko Milekic + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +32,9 @@ #include "opt_mac.h" #include "opt_param.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -39,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -79,7 +84,18 @@ */ int nmbclusters; + +/* + * mbstat is the mbuf statistics structure exposed to userspace. + * + * mbstat_percpu is the per-CPU statistics structure in which many of the + * mbstat measurements are gathered before being combined for exposure to + * userspace. mbstat_percpu is read lockless, so subject to small + * consistency races. It is modified holding a critical section to avoid + * read-modify-write races in the presence of preemption. + */ struct mbstat mbstat; +struct mbstat_percpu mbstat_percpu[MAXCPU]; static void tunable_mbinit(void *dummy) @@ -91,11 +107,13 @@ } SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); +static int sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS); + SYSCTL_DECL(_kern_ipc); SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0, "Maximum number of mbuf clusters allowed"); -SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, - "Mbuf general information and statistics"); +SYSCTL_PROC(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, NULL, 0, + sysctl_kern_ipc_mbstat, "", "Mbuf general information and statistics"); /* * Zones from which we allocate. @@ -170,8 +188,69 @@ mbstat.m_mcfail = mbstat.m_mpfail = 0; mbstat.sf_iocnt = 0; mbstat.sf_allocwait = mbstat.sf_allocfail = 0; + + /* mbstat_percpu is zero'd by BSS. */ } +static int +sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS) +{ + struct mbstat_percpu *mbp, mbp_local; + u_char cpu; + + bzero(&mbp_local, sizeof(mbp_local)); + for (cpu = 0; cpu < MAXCPU; cpu++) { + mbp = &mbstat_percpu[cpu]; + mbp_local.mbp_mbuf_allocs += mbp->mbp_mbuf_allocs; + mbp_local.mbp_mbuf_frees += mbp->mbp_mbuf_frees; + mbp_local.mbp_mbuf_fails += mbp->mbp_mbuf_fails; + mbp_local.mbp_mbuf_drains += mbp->mbp_mbuf_drains; + mbp_local.mbp_clust_allocs += mbp->mbp_clust_allocs; + mbp_local.mbp_clust_frees += mbp->mbp_clust_frees; + + mbp_local.mbp_copy_fails += mbp->mbp_copy_fails; + mbp_local.mbp_pullup_fails += mbp->mbp_pullup_fails; + + mbp_local.sfp_iocnt += mbp->sfp_iocnt; + mbp_local.sfp_alloc_fails += mbp->sfp_alloc_fails; + mbp_local.sfp_alloc_waits += mbp->sfp_alloc_waits; + } + + /* + * If, due to races, the number of frees for mbufs or clusters is + * greater than the number of allocs, adjust alloc stats to 0. This + * isn't quite accurate, but for the time being, we consider the + * performance win of races worth the occasional inaccuracy. + */ + if (mbp_local.mbp_mbuf_allocs > mbp_local.mbp_mbuf_frees) + mbstat.m_mbufs = mbp_local.mbp_mbuf_allocs - + mbp_local.mbp_mbuf_frees; + else + mbstat.m_mbufs = 0; + + if (mbp_local.mbp_clust_allocs > mbp_local.mbp_clust_frees) + mbstat.m_mclusts = mbp_local.mbp_clust_allocs - + mbp_local.mbp_clust_frees; + else + mbstat.m_mclusts = 0; + + mbstat.m_drain = mbp_local.mbp_mbuf_drains; + mbstat.m_mcfail = mbp_local.mbp_copy_fails; + mbstat.m_mpfail = mbp_local.mbp_pullup_fails; + + mbstat.sf_iocnt = mbp_local.sfp_iocnt; + mbstat.sf_allocfail = mbp_local.sfp_alloc_fails; + /* + * sf_allocwait is protected by per-architecture mutex sf_buf_lock, + * which is held whenever sf_allocwait is updated, so don't use the + * per-cpu version here + * + * mbstat.sf_allocwait = mbp_local.sfp_alloc_waits; + */ + + return (SYSCTL_OUT(req, &mbstat, sizeof(mbstat))); +} + /* * Constructor for Mbuf master zone. * @@ -212,7 +291,10 @@ #endif } else m->m_data = m->m_dat; - mbstat.m_mbufs += 1; /* XXX */ + + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_allocs++; + critical_exit(); return (0); } @@ -227,7 +309,9 @@ m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_frees++; + critical_exit(); } /* XXX Only because of stats */ @@ -235,12 +319,16 @@ mb_dtor_pack(void *mem, int size, void *arg) { struct mbuf *m; + u_char cpu; m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ - mbstat.m_mclusts -= 1; /* XXX */ + critical_enter(); + cpu = curcpu; + mbstat_percpu[cpu].mbp_mbuf_frees++; + mbstat_percpu[cpu].mbp_clust_frees++; + critical_exit(); } /* @@ -263,7 +351,9 @@ m->m_ext.ext_size = MCLBYTES; m->m_ext.ext_type = EXT_CLUSTER; m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */ - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_allocs++; + critical_exit(); return (0); } @@ -271,7 +361,10 @@ static void mb_dtor_clust(void *mem, int size, void *arg) { - mbstat.m_mclusts -= 1; /* XXX */ + + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_frees++; + critical_exit(); } /* @@ -288,7 +381,9 @@ uma_zalloc_arg(zone_clust, m, how); if (m->m_ext.ext_buf == NULL) return (ENOMEM); - mbstat.m_mclusts -= 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_frees++; + critical_exit(); return (0); } @@ -304,7 +399,9 @@ m = (struct mbuf *)mem; uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); m->m_ext.ext_buf = NULL; - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_allocs++; + critical_exit(); } /* @@ -320,6 +417,7 @@ #endif int flags; short type; + u_char cpu; m = (struct mbuf *)mem; args = (struct mb_args *)arg; @@ -348,8 +446,11 @@ return (error); #endif } - mbstat.m_mbufs += 1; /* XXX */ - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + cpu = curcpu; + mbstat_percpu[cpu].mbp_mbuf_allocs++; + mbstat_percpu[cpu].mbp_clust_allocs++; + critical_exit(); return (0); } @@ -369,7 +470,9 @@ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, "mb_reclaim()"); - mbstat.m_drain++; + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_drains++; + critical_exit(); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) --- //depot/vendor/freebsd/src/sys/kern/uipc_mbuf.c 2005/05/04 18:55:20 +++ //depot/user/rwatson/percpu/sys/kern/uipc_mbuf.c 2005/05/14 06:07:05 @@ -36,6 +36,9 @@ #include "opt_param.h" #include "opt_mbuf_stress_test.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -44,8 +47,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -428,13 +433,18 @@ m = m->m_next; np = &n->m_next; } - if (top == NULL) - mbstat.m_mcfail++; /* XXX: No consistency. */ + if (top == NULL) { + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); + } return (top); nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -497,7 +507,9 @@ return top; nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -600,7 +612,9 @@ nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -762,7 +776,9 @@ return (m); bad: m_freem(n); - mbstat.m_mpfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_pullup_fails++; + critical_exit(); return (NULL); } --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2005/05/04 18:55:20 +++ //depot/user/rwatson/percpu/sys/kern/uipc_syscalls.c 2005/05/14 06:07:05 @@ -39,6 +39,9 @@ #include "opt_ktrace.h" #include "opt_mac.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -1933,7 +1936,9 @@ vm_page_io_finish(pg); if (!error) VM_OBJECT_UNLOCK(obj); - mbstat.sf_iocnt++; + critical_enter(); + mbstat_percpu[curcpu].sfp_iocnt++; + critical_exit(); } if (error) { @@ -1961,7 +1966,9 @@ * but this wait can be interrupted. */ if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { - mbstat.sf_allocfail++; + critical_enter(); + mbstat_percpu[curcpu].sfp_alloc_fails++; + critical_exit(); vm_page_lock_queues(); vm_page_unwire(pg, 0); if (pg->wire_count == 0 && pg->object == NULL) --- //depot/vendor/freebsd/src/sys/sys/malloc.h 2005/01/07 02:32:16 +++ //depot/user/rwatson/percpu/sys/sys/malloc.h 2005/05/15 04:30:34 @@ -50,25 +50,76 @@ #define M_MAGIC 877983977 /* time when first defined :-) */ +/* + * Two malloc type structures are present: malloc_type, which is used by a + * type owner to declare the type, and malloc_type_internal, which holds + * malloc-owned statistics and other ABI-sensitive fields, such as the set of + * malloc statistics indexed by the compile-time MAXCPU constant. + * + * The malloc_type ks_next field is protected by malloc_mtx. Other fields in + * malloc_type are static after initialization so unsynchronized. + * + * Statistics in malloc_type_stats are written only when holding a critical + * section, but read lock-free resulting in possible (minor) races, which the + * monitoring app should take into account. Likewise, + */ +struct malloc_type_stats { + u_long mts_memalloced; /* Bytes allocated on CPU. */ + u_long mts_memfreed; /* Bytes freed on CPU. */ + u_long mts_numallocs; /* Number of allocates on CPU. */ + u_long mts_numfrees; /* number of frees on CPU. */ + u_long mts_size; /* Bitmask of sizes allocated on CPU. */ + u_long _mts_reserved1; /* Reserved field. */ + u_long _mts_reserved2; /* Reserved field. */ + u_long _mts_reserved3; /* Reserved field. */ +}; + +struct malloc_type_internal { + struct malloc_type_stats mti_stats[MAXCPU]; +}; + +/* + * ABI-compatible version of the old 'struct malloc_type', only all stats are + * now malloc-managed in malloc-owned memory rather than in caller memory, so + * as to avoid ABI issues. The ks_next pointer is reused as a pointer to the + * internal data handle. + */ struct malloc_type { - struct malloc_type *ks_next; /* next in list */ - u_long ks_memuse; /* total memory held in bytes */ - u_long ks_size; /* sizes of this thing that are allocated */ - u_long ks_inuse; /* # of packets of this type currently in use */ - uint64_t ks_calls; /* total packets of this type ever allocated */ - u_long ks_maxused; /* maximum number ever used */ - u_long ks_magic; /* if it's not magic, don't touch it */ - const char *ks_shortdesc; /* short description */ - struct mtx ks_mtx; /* lock for stats */ + struct malloc_type *ks_next; /* Next in global chain. */ + u_long _ks_memuse; /* No longer used. */ + u_long _ks_size; /* No longer used. */ + u_long _ks_inuse; /* No longer used. */ + uint64_t _ks_calls; /* No longer used. */ + u_long _ks_maxused; /* No longer used. */ + u_long ks_magic; /* Detect programmer error. */ + const char *ks_shortdesc; /* Printable type name. */ + + /* + * struct malloc_type was terminated with a struct mtx, which is no + * longer required. For ABI reasons, continue to flesh out the full + * size of the old structure, but reuse the _lo_class field for our + * internal data handle. + */ + void *ks_handle; /* Priv. data, was lo_class. */ + const char *_lo_name; + const char *_lo_type; + u_int _lo_flags; + void *_lo_list_next; + struct witness *_lo_witness; + uintptr_t _mtx_lock; + u_int _mtx_recurse; }; #ifdef _KERNEL -#define MALLOC_DEFINE(type, shortdesc, longdesc) \ - struct malloc_type type[1] = { \ - { NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, {} } \ - }; \ - SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, type); \ - SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, malloc_uninit, type) +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + struct malloc_type type[1] = { \ + { NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, NULL, NULL, \ + NULL, 0, NULL, NULL, 0, 0 } \ + }; \ + SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, \ + type); \ + SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \ + malloc_uninit, type); #define MALLOC_DECLARE(type) \ extern struct malloc_type type[1] --- //depot/vendor/freebsd/src/sys/sys/mbuf.h 2005/05/04 18:55:20 +++ //depot/user/rwatson/percpu/sys/sys/mbuf.h 2005/05/14 06:07:05 @@ -243,6 +243,29 @@ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ /* + * Per-CPU mbuf allocator statistics, which are collated to construct the + * global statistics. They are read lockless, but written to while in a + * critical section to prevent read-modify-write races. + * + * XXXRW: As with comments below, maybe sendfile stats should be elsesewhere. + */ +struct mbstat_percpu { + u_long mbp_mbuf_allocs; /* mbufs alloc'd on CPU. */ + u_long mbp_mbuf_frees; /* mbufs freed on CPU. */ + u_long mbp_mbuf_fails; /* mbuf alloc failures on CPU. */ + u_long mbp_mbuf_drains; /* mbuf drains on CPU .*/ + u_long mbp_clust_allocs; /* clusters alloc'd on CPU. */ + u_long mbp_clust_frees; /* clusters freed on CPU. */ + + u_long mbp_copy_fails; /* mbuf copy failures on CPU. */ + u_long mbp_pullup_fails; /* mbuf pullup failures on CPU. */ + + u_long sfp_iocnt; /* sendfile I/O's on CPU. */ + u_long sfp_alloc_fails; /* sendfile alloc failures on CPU. */ + u_long sfp_alloc_waits; /* sendfile alloc waits on CPU. */ +}; + +/* * General mbuf allocator statistics structure. */ struct mbstat { @@ -550,6 +573,15 @@ extern struct mbstat mbstat; /* General mbuf stats/infos */ extern int nmbclusters; /* Maximum number of clusters */ +/* + * Avoid exposing PERCPU definition outside of a very limited set of files, + * so that the compile-time value of PERCPU doesn't become part of the + * exposed kernel ABI. + */ +#ifdef WANT_MBSTAT_PERCPU +extern struct mbstat_percpu mbstat_percpu[MAXCPU]; +#endif + struct uio; void m_adj(struct mbuf *, int); --- //depot/vendor/freebsd/src/sys/vm/uma_core.c 2005/04/29 19:00:37 +++ //depot/user/rwatson/percpu/sys/vm/uma_core.c 2005/04/29 21:33:32 @@ -1817,6 +1817,9 @@ cache = &zone->uz_cpu[cpu]; zalloc_start: +#if 0 + critical_assert(); +#endif bucket = cache->uc_allocbucket; if (bucket) { @@ -2281,6 +2284,9 @@ cache = &zone->uz_cpu[cpu]; zfree_start: +#if 0 + critical_assert(); +#endif bucket = cache->uc_freebucket; if (bucket) {