--- //depot/vendor/freebsd/src/sys/kern/kern_malloc.c 2005/04/12 23:55:38 +++ //depot/user/rwatson/percpu/sys/kern/kern_malloc.c 2005/04/14 22:38:16 @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2005 Robert N. M. Watson * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -44,6 +45,7 @@ #include #include #include +#include #include #include @@ -133,6 +135,33 @@ {0, NULL}, }; +/* + * Two malloc type structures are present: malloc_type, which is used by a + * type owner to declare the type, and malloc_type_internal, which holds + * malloc-owned statistics and other ABI-sensitive fields, such as the set of + * malloc statistics indexed by the compile-time MAXCPU constant. + * + * The malloc_type ks_next field is protected by malloc_mtx. Other fields in + * malloc_type are static after initialization so unsynchronized. + * + * Statistics in malloc_type_stats are written only when holding a critical + * section, but read lock-free resulting in possible (minor) races, which the + * monitoring app should take into account. + */ +struct malloc_type_stats { + u_long mts_memalloced; /* Bytes allocated on CPU. */ + u_long mts_memfreed; /* Bytes freed on CPU. */ + u_long mts_numallocs; /* Number of allocates on CPU. */ + u_long mts_numfrees; /* Number of frees on CPU. */ + u_long mts_size; /* Bitmask of sizes allocated on CPU. */ +}; + +struct malloc_type_internal { + struct malloc_type_stats mti_stats[MAXCPU]; +}; + +uma_zone_t mt_zone; + #ifdef DEBUG_MEMGUARD u_int vm_memguard_divisor; SYSCTL_UINT(_vm, OID_AUTO, memguard_divisor, CTLFLAG_RD, &vm_memguard_divisor, @@ -197,41 +226,48 @@ * Add this to the informational malloc_type bucket. */ static void -malloc_type_zone_allocated(struct malloc_type *ksp, unsigned long size, +malloc_type_zone_allocated(struct malloc_type *type, unsigned long size, int zindx) { - mtx_lock(&ksp->ks_mtx); - ksp->ks_calls++; + struct malloc_type_internal *mti; + struct malloc_type_stats *mts; + u_char cpu; + + critical_enter(); + cpu = curthread->td_oncpu; + mti = (struct malloc_type_internal *)(type->ks_handle); + mts = &mti->mti_stats[cpu]; + mts->mts_memalloced += size; + mts->mts_numallocs++; if (zindx != -1) - ksp->ks_size |= 1 << zindx; - if (size != 0) { - ksp->ks_memuse += size; - ksp->ks_inuse++; - if (ksp->ks_memuse > ksp->ks_maxused) - ksp->ks_maxused = ksp->ks_memuse; - } - mtx_unlock(&ksp->ks_mtx); + mts->mts_size |= 1 << zindx; + critical_exit(); } void -malloc_type_allocated(struct malloc_type *ksp, unsigned long size) +malloc_type_allocated(struct malloc_type *type, unsigned long size) { - malloc_type_zone_allocated(ksp, size, -1); + + malloc_type_zone_allocated(type, size, -1); } /* * Remove this allocation from the informational malloc_type bucket. */ void -malloc_type_freed(struct malloc_type *ksp, unsigned long size) +malloc_type_freed(struct malloc_type *type, unsigned long size) { - mtx_lock(&ksp->ks_mtx); - KASSERT(size <= ksp->ks_memuse, - ("malloc(9)/free(9) confusion.\n%s", - "Probably freeing with wrong type, but maybe not here.")); - ksp->ks_memuse -= size; - ksp->ks_inuse--; - mtx_unlock(&ksp->ks_mtx); + struct malloc_type_internal *mti; + struct malloc_type_stats *mts; + u_char cpu; + + critical_enter(); + cpu = curthread->td_oncpu; + mti = (struct malloc_type_internal *)type->ks_handle; + mts = &mti->mti_stats[cpu]; + mts->mts_memfreed += size; + mts->mts_numfrees++; + critical_exit(); } /* @@ -351,9 +387,6 @@ } #endif - KASSERT(type->ks_memuse > 0, - ("malloc(9)/free(9) confusion.\n%s", - "Probably freeing with wrong type, but maybe not here.")); size = 0; slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); @@ -405,6 +438,11 @@ if (addr == NULL) return (malloc(size, type, flags)); + /* + * XXX: Should report free of old memory and alloc of new memory to + * per-CPU stats. + */ + #ifdef DEBUG_MEMGUARD /* XXX: CHANGEME! */ if (type == M_SUBPROC) { @@ -543,6 +581,13 @@ uma_startup2(); + mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal), +#ifdef INVARIANTS + mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, +#else + NULL, NULL, NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_MALLOC); for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { int size = kmemzones[indx].kz_size; char *name = kmemzones[indx].kz_name; @@ -562,127 +607,142 @@ } void -malloc_init(void *data) +malloc_init(void *type) { - struct malloc_type *type = (struct malloc_type *)data; + struct malloc_type_internal *mti; + struct malloc_type *mt; - mtx_lock(&malloc_mtx); - if (type->ks_magic != M_MAGIC) - panic("malloc type lacks magic"); + KASSERT(cnt.v_page_count != 0, ("malloc_register before vm_init")); - if (cnt.v_page_count == 0) - panic("malloc_init not allowed before vm init"); + mt = type; + mti = uma_zalloc(mt_zone, M_WAITOK | M_ZERO); + mt->ks_handle = mti; - if (type->ks_next != NULL) - return; - - type->ks_next = kmemstatistics; + mtx_lock(&malloc_mtx); + mt->ks_next = kmemstatistics; kmemstatistics = type; - mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF); mtx_unlock(&malloc_mtx); } void -malloc_uninit(void *data) +malloc_uninit(void *type) { - struct malloc_type *type = (struct malloc_type *)data; - struct malloc_type *t; + struct malloc_type_internal *mti; + struct malloc_type *mt, *temp; + mt = type; + KASSERT(mt->ks_handle != NULL, ("malloc_deregister: cookie NULL")); mtx_lock(&malloc_mtx); - mtx_lock(&type->ks_mtx); - if (type->ks_magic != M_MAGIC) - panic("malloc type lacks magic"); - - if (cnt.v_page_count == 0) - panic("malloc_uninit not allowed before vm init"); - - if (type == kmemstatistics) - kmemstatistics = type->ks_next; - else { - for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { - if (t->ks_next == type) { - t->ks_next = type->ks_next; - break; - } + mti = mt->ks_handle; + mt->ks_handle = NULL; + if (mt != kmemstatistics) { + for (temp = kmemstatistics; temp != NULL; + temp = temp->ks_next) { + if (temp->ks_next == mt) + temp->ks_next = mt->ks_next; } - } - type->ks_next = NULL; - mtx_destroy(&type->ks_mtx); + } else + kmemstatistics = mt->ks_next; mtx_unlock(&malloc_mtx); + uma_zfree(mt_zone, type); } static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS) { + struct malloc_type_stats *mts, mts_local; + struct malloc_type_internal *mti; + long temp_allocs, temp_bytes; struct malloc_type *type; int linesize = 128; - int curline; + struct sbuf sbuf; int bufsize; int first; int error; char *buf; - char *p; int cnt; - int len; int i; cnt = 0; + /* Guess at how much room is needed. */ mtx_lock(&malloc_mtx); for (type = kmemstatistics; type != NULL; type = type->ks_next) cnt++; + mtx_unlock(&malloc_mtx); - mtx_unlock(&malloc_mtx); bufsize = linesize * (cnt + 1); - p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN); + mtx_lock(&malloc_mtx); - len = snprintf(p, linesize, + + sbuf_printf(&sbuf, "\n Type InUse MemUse HighUse Requests Size(s)\n"); - p += len; - for (type = kmemstatistics; cnt != 0 && type != NULL; type = type->ks_next, cnt--) { - if (type->ks_calls == 0) + mti = type->ks_handle; + bzero(&mts_local, sizeof(mts_local)); + for (i = 0; i < MAXCPU; i++) { + mts = &mti->mti_stats[i]; + mts_local.mts_memalloced += mts->mts_memalloced; + mts_local.mts_memfreed += mts->mts_memfreed; + mts_local.mts_numallocs += mts->mts_numallocs; + mts_local.mts_numfrees += mts->mts_numfrees; + mts_local.mts_size |= mts->mts_size; + } + if (mts_local.mts_numallocs == 0) continue; - curline = linesize - 2; /* Leave room for the \n */ - len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu", - type->ks_shortdesc, - type->ks_inuse, - (type->ks_memuse + 1023) / 1024, - (type->ks_maxused + 1023) / 1024, - (long long unsigned)type->ks_calls); - curline -= len; - p += len; + /* + * Due to races in per-CPU statistics gather, it's possible to + * get a slightly negative number here. If we do, approximate + * with 0. + */ + if (mts_local.mts_numallocs > mts_local.mts_numfrees) + temp_allocs = mts_local.mts_numallocs - + mts_local.mts_numfrees; + else + temp_allocs = 0; + + /* + * Ditto for bytes allocated. + */ + if (mts_local.mts_memalloced > mts_local.mts_memfreed) + temp_bytes = mts_local.mts_memalloced - + mts_local.mts_memfreed; + else + temp_bytes = 0; + + sbuf_printf(&sbuf, "%13s%6lu%6luK%7luK%9lu", + type->ks_shortdesc, + temp_allocs, + (temp_bytes + 1023) / 1024, + 0L, /* XXX: Not available currently. */ + mts_local.mts_numallocs); first = 1; for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1; i++) { - if (type->ks_size & (1 << i)) { + if (mts_local.mts_size & (1 << i)) { if (first) - len = snprintf(p, curline, " "); + sbuf_printf(&sbuf, " "); else - len = snprintf(p, curline, ","); - curline -= len; - p += len; - - len = snprintf(p, curline, - "%s", kmemzones[i].kz_name); - curline -= len; - p += len; - + sbuf_printf(&sbuf, ","); + sbuf_printf(&sbuf, "%s", + kmemzones[i].kz_name); first = 0; } } - - len = snprintf(p, 2, "\n"); - p += len; + sbuf_printf(&sbuf, "\n"); } + sbuf_finish(&sbuf); + mtx_unlock(&malloc_mtx); - mtx_unlock(&malloc_mtx); - error = SYSCTL_OUT(req, buf, p - buf); + error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); + sbuf_delete(&sbuf); free(buf, M_TEMP); return (error); } @@ -696,6 +756,7 @@ sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) { int linesize = 64; + struct sbuf sbuf; uint64_t count; uint64_t waste; uint64_t mem; @@ -704,7 +765,6 @@ char *buf; int rsize; int size; - char *p; int len; int i; @@ -714,34 +774,30 @@ waste = 0; mem = 0; - p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); - len = snprintf(p, bufsize, + buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); + sbuf_new(&sbuf, buf, bufsize, SBUF_FIXEDLEN); + sbuf_printf(&sbuf, "\n Size Requests Real Size\n"); - bufsize -= len; - p += len; - for (i = 0; i < KMEM_ZSIZE; i++) { size = i << KMEM_ZSHIFT; rsize = kmemzones[kmemsize[i]].kz_size; count = (long long unsigned)krequests[i]; - len = snprintf(p, bufsize, "%6d%28llu%11d\n", - size, (unsigned long long)count, rsize); - bufsize -= len; - p += len; + sbuf_printf(&sbuf, "%6d%28llu%11d\n", size, + (unsigned long long)count, rsize); if ((rsize * count) > (size * count)) waste += (rsize * count) - (size * count); mem += (rsize * count); } - - len = snprintf(p, bufsize, + sbuf_printf(&sbuf, "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", (unsigned long long)mem, (unsigned long long)waste); - p += len; + sbuf_finish(&sbuf); - error = SYSCTL_OUT(req, buf, p - buf); + error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); + sbuf_delete(&sbuf); free(buf, M_TEMP); return (error); } --- //depot/vendor/freebsd/src/sys/sys/malloc.h 2005/01/07 02:32:16 +++ //depot/user/rwatson/percpu/sys/sys/malloc.h 2005/04/14 12:54:00 @@ -50,25 +50,51 @@ #define M_MAGIC 877983977 /* time when first defined :-) */ +/* + * ABI-compatible version of the old 'struct malloc_type', only all stats are + * now malloc-managed in malloc-owned memory rather than in caller memory, so + * as to avoid ABI issues. The ks_next pointer is reused as a pointer to the + * internal data handle. + * + * XXXRW: Why is this not ifdef _KERNEL? + * + * XXXRW: Use of ks_shortdesc has leaked out of kern_malloc.c. + */ struct malloc_type { - struct malloc_type *ks_next; /* next in list */ - u_long ks_memuse; /* total memory held in bytes */ - u_long ks_size; /* sizes of this thing that are allocated */ - u_long ks_inuse; /* # of packets of this type currently in use */ - uint64_t ks_calls; /* total packets of this type ever allocated */ - u_long ks_maxused; /* maximum number ever used */ - u_long ks_magic; /* if it's not magic, don't touch it */ - const char *ks_shortdesc; /* short description */ - struct mtx ks_mtx; /* lock for stats */ + struct malloc_type *ks_next; /* Next in global chain. */ + u_long _ks_size; /* No longer used. */ + u_long _ks_inuse; /* No longer used. */ + uint64_t _ks_calls; /* No longer used. */ + u_long _ks_maxused; /* No longer used. */ + u_long ks_magic; /* Detect programmer error. */ + const char *ks_shortdesc; /* Printable type name. */ + + /* + * struct malloc_type was terminated with a struct mtx, which is no + * longer required. For ABI reasons, continue to flesh out the full + * size of the old structure, but reuse the _lo_class field for our + * internal data handle. + */ + void *ks_handle; /* Priv. data, was lo_class. */ + const char *_lo_name; + const char *_lo_type; + u_int _lo_flags; + void *_lo_list_next; + struct witness *_lo_witness; + uintptr_t _mtx_lock; + u_int _mtx_recurse; }; #ifdef _KERNEL -#define MALLOC_DEFINE(type, shortdesc, longdesc) \ - struct malloc_type type[1] = { \ - { NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, {} } \ - }; \ - SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, type); \ - SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, malloc_uninit, type) +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + struct malloc_type type[1] = { \ + { NULL, 0, 0, 0, 0, M_MAGIC, shortdesc, NULL, NULL, \ + NULL, 0, NULL, NULL, 0, 0 } \ + }; \ + SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, \ + type); \ + SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \ + malloc_uninit, type); #define MALLOC_DECLARE(type) \ extern struct malloc_type type[1] @@ -112,6 +138,7 @@ int flags); void *reallocf(void *addr, unsigned long size, struct malloc_type *type, int flags); + #endif /* _KERNEL */ #endif /* !_SYS_MALLOC_H_ */