--- //depot/user/rwatson/netperf/sys/kern/kern_intr.c 2004/09/05 16:49:28 +++ //depot/user/rwatson/umaperthread/sys/kern/kern_intr.c 2004/09/07 20:50:31 @@ -57,6 +57,12 @@ #include #endif +/* + * XXXRW + */ +#include +#include + struct int_entropy { struct proc *proc; uintptr_t vector; @@ -75,6 +81,12 @@ &intr_storm_threshold, 0, "Number of consecutive interrupts before storm protection is enabled"); +static int ithread_uma_perthread_cache; +TUNABLE_INT("vm.ithread_uma_perthread_cache", &ithread_uma_perthread_cache); +SYSCTL_INT(_vm, OID_AUTO, ithread_uma_perthread_cache, CTLFLAG_RD, + &ithread_uma_perthread_cache, 0, + "Enable a per-thread mbuf zone cache on each ithread"); + static void ithread_loop(void *); static void ithread_update(struct ithd *); static void start_softintr(void *); @@ -496,6 +508,16 @@ warned = 0; /* + * XXXRW: To prove a point, do this for all ithreads because we don't + * know which will service network interrupts. + */ + if (ithread_uma_perthread_cache) { + uma_perthread_alloc(td, zone_mbuf); + uma_perthread_alloc(td, zone_clust); + uma_perthread_alloc(td, zone_pack); + } + + /* * As long as we have interrupts outstanding, go through the * list of handlers, giving each one a go at it. */ @@ -616,6 +638,12 @@ } mtx_unlock_spin(&sched_lock); } + + if (ithread_uma_perthread_cache) { + uma_perthread_free(curthread, zone_pack); + uma_perthread_free(curthread, zone_clust); + uma_perthread_free(curthread, zone_mbuf); + } } #ifdef DDB --- //depot/user/rwatson/netperf/sys/kern/kern_mbuf.c 2004/08/10 02:11:38 +++ //depot/user/rwatson/umaperthread/sys/kern/kern_mbuf.c 2004/09/05 22:24:23 @@ -135,9 +135,10 @@ * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate("Mbuf", MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET | UMA_ZONE_PERTHREAD); zone_clust = uma_zcreate("MbufClust", MCLBYTES, mb_ctor_clust, - mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT | + UMA_ZONE_PERTHREAD); if (nmbclusters > 0) uma_zone_set_max(zone_clust, nmbclusters); zone_pack = uma_zsecond_create("Packet", mb_ctor_pack, mb_dtor_pack, --- //depot/user/rwatson/netperf/sys/net/netisr.c 2004/09/05 16:49:28 +++ //depot/user/rwatson/umaperthread/sys/net/netisr.c 2004/09/05 22:24:23 @@ -320,6 +320,12 @@ const int polling = 0; #endif +#if 0 + uma_perthread_alloc(curthread, zone_mbuf); + uma_perthread_alloc(curthread, zone_clust); + uma_perthread_alloc(curthread, zone_pack); +#endif + do { bits = atomic_readandclear_int(&netisr); if (bits == 0) @@ -348,6 +354,12 @@ } } } while (polling); + +#if 0 + uma_perthread_free(curthread, zone_pack); + uma_perthread_free(curthread, zone_clust); + uma_perthread_free(curthread, zone_mbuf); +#endif } static void --- //depot/user/rwatson/netperf/sys/sys/proc.h 2004/09/05 16:49:28 +++ //depot/user/rwatson/umaperthread/sys/sys/proc.h 2004/09/05 22:24:23 @@ -289,6 +289,7 @@ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ + void *td_uma; /* (k) Per-thread UMA cache */ #define td_endzero td_base_pri --- //depot/user/rwatson/netperf/sys/vm/uma.h 2004/08/02 22:40:15 +++ //depot/user/rwatson/umaperthread/sys/vm/uma.h 2004/09/05 15:37:13 @@ -226,6 +226,7 @@ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ #define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ +#define UMA_ZONE_PERTHREAD 0x1000 /* Allowed in per-thread buckets */ /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -245,6 +246,21 @@ void uma_zdestroy(uma_zone_t zone); /* + * XXXRW: All this will change. + * + * Allocate a per-thread cache for a uma zone. Can only be used with UMA + * zones that have UMA_ZONE_PERTHREAD set, and that will not be destroyed. + * Currently, per-thread zones aren't properly drained, etc, etc, and only + * one may be allocated per-thread (later requests ignored). + */ +void uma_perthread_alloc(struct thread *td, uma_zone_t zone); + +/* + * Releases a per-thread cache for a uma zone. Caveats as above. + */ +void uma_perthread_free(struct thread *td, uma_zone_t zone); + +/* * Allocates an item out of a zone * * Arguments: --- //depot/user/rwatson/netperf/sys/vm/uma_core.c 2004/08/07 02:40:42 +++ //depot/user/rwatson/umaperthread/sys/vm/uma_core.c 2004/09/07 20:50:31 @@ -1750,18 +1750,144 @@ void uma_zdestroy(uma_zone_t zone) { + + KASSERT((zone->uz_keg->uk_flags & UMA_ZONE_PERTHREAD) == 0, + ("uma_zdestroy: can't destroy a zone with per-thread caches")); uma_zfree_internal(zones, zone, NULL, SKIP_NONE); } +/* + * XXXRW: This belongs somewhere else, and is a patently bad idea. Allow up + * to three zones to be cached per-thread. Three because that's how many + * mbuf zones there are. This is just a proof-of-concept, though, so it's + * OK. + */ +struct uma_perthread_set { + uma_zone_t upt_zone1; + struct uma_cache upt_cache1; + uma_zone_t upt_zone2; + struct uma_cache upt_cache2; + uma_zone_t upt_zone3; + struct uma_cache upt_cache3; +}; + +/* + * XXXRW: Some basic stats. + */ +static int perthread_alloc_zone_hit; +static int perthread_alloc_zone_miss; +static int perthread_free_zone_hit; +static int perthread_free_zone_miss; + +SYSCTL_DECL(_vm_stats_misc); +SYSCTL_INT(_vm_stats_misc, OID_AUTO, perthread_alloc_zone_hit, CTLFLAG_RD, + &perthread_alloc_zone_hit, 0, ""); +SYSCTL_INT(_vm_stats_misc, OID_AUTO, perthread_alloc_zone_miss, CTLFLAG_RD, + &perthread_alloc_zone_miss, 0, ""); +SYSCTL_INT(_vm_stats_misc, OID_AUTO, perthread_free_zone_hit, CTLFLAG_RD, + &perthread_free_zone_hit, 0, ""); +SYSCTL_INT(_vm_stats_misc, OID_AUTO, perthread_free_zone_miss, CTLFLAG_RD, + &perthread_free_zone_miss, 0, ""); + +/* See uma.h */ +void +uma_perthread_alloc(struct thread *td, uma_zone_t zone) +{ + struct uma_perthread_set *upt; + + KASSERT(td == curthread, ("uma_perthread_alloc: not curthread!")); + KASSERT(zone->uz_keg->uk_flags & UMA_ZONE_PERTHREAD, + ("uma_perthread_alloc: can't cache zone %s per-thread", + zone->uz_name)); + + /* + * XXXRW: would be nice to support caches for more than one zone on + * the thread, and/or fail non-silently. + * XXXRW: need a real type here, and maybe an internal zone. + * XXXRW: does uma_cache need more initialization than this? + * XXXRW: this surely behaves badly in the presence of zone + * destruction. + */ + upt = td->td_uma; + if (upt == NULL) { + upt = malloc(sizeof(*upt), M_TEMP, M_WAITOK | M_ZERO); + td->td_uma = upt; + } + + if (upt->upt_zone1 == NULL) { + upt->upt_zone1 = zone; + printf("Per-thread cache for thread %s zone %s added slot 1\n", + td->td_proc->p_comm, zone->uz_name); + } else if (upt->upt_zone2 == NULL) { + upt->upt_zone2 = zone; + printf("Per-thread cache for thread %s zone %s added slot 2\n", + td->td_proc->p_comm, zone->uz_name); + } else if (upt->upt_zone3 == NULL) { + upt->upt_zone3 = zone; + printf("Per-thread cache for thread %s zone %s added slot 3\n", + td->td_proc->p_comm, zone->uz_name); + } else + printf("Warning: cache pool for thread %p (%d %s) full\n", + curthread, curthread->td_proc->p_pid, + curthread->td_proc->p_comm); +} + +static void +uma_perthread_free_cache(struct uma_cache *cache, struct uma_zone *zone) +{ + + bucket_drain(zone, cache->uc_allocbucket); + bucket_drain(zone, cache->uc_freebucket); + if (cache->uc_freebucket != NULL) + bucket_free(cache->uc_freebucket); + if (cache->uc_allocbucket != NULL) + bucket_free(cache->uc_allocbucket); +} + /* See uma.h */ +void +uma_perthread_free(struct thread *td, uma_zone_t zone) +{ + struct uma_perthread_set *upt; + + KASSERT(td == curthread, ("uma_perthread_free: not curthread!")); + upt = td->td_uma; + KASSERT(upt != NULL, ("uma_perthread_free: no upt")); + + if (upt->upt_zone1 == zone) { + uma_perthread_free_cache(&upt->upt_cache1, zone); + upt->upt_zone1 = NULL; + bzero(&upt->upt_zone1, sizeof(upt->upt_zone1)); + } + if (upt->upt_zone2 == zone) { + uma_perthread_free_cache(&upt->upt_cache2, zone); + upt->upt_zone2 = NULL; + bzero(&upt->upt_zone2, sizeof(upt->upt_zone2)); + } + if (upt->upt_zone3 == zone) { + uma_perthread_free_cache(&upt->upt_cache3, zone); + upt->upt_zone3 = NULL; + bzero(&upt->upt_zone3, sizeof(upt->upt_zone3)); + } + if (upt->upt_zone1 == NULL && upt->upt_zone2 == NULL && + upt->upt_zone3 == NULL) { + free(upt, M_TEMP); + td->td_uma = NULL; + } +} + +/* See uma.h */ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) { void *item; uma_cache_t cache; uma_bucket_t bucket; - int cpu; + int cpu = -1; int badness; + int perthread_cache; + struct thread *td; + struct uma_perthread_set *upt; /* This is the fast path allocation */ #ifdef UMA_DEBUG_ALLOC_1 @@ -1795,10 +1921,31 @@ } } + td = curthread; zalloc_restart: - cpu = PCPU_GET(cpuid); - CPU_LOCK(cpu); - cache = &zone->uz_cpu[cpu]; + cache = NULL; + perthread_cache = 0; + upt = td->td_uma; + if (upt != NULL) { + if (zone == upt->upt_zone1) + cache = &upt->upt_cache1; + else if (zone == upt->upt_zone2) + cache = &upt->upt_cache2; + else if (zone == upt->upt_zone3) + cache = &upt->upt_cache3; + if (cache != NULL) + perthread_cache = 1; + } + if (cache == NULL) { + cpu = PCPU_GET(cpuid); + CPU_LOCK(cpu); + cache = &zone->uz_cpu[cpu]; + perthread_alloc_zone_miss++; + } else { + KASSERT(zone->uz_keg->uk_flags & UMA_ZONE_PERTHREAD, + ("uma_zalloc_arg: zone has per-thread cache but isn't per-thread")); + perthread_alloc_zone_hit++; + } zalloc_start: bucket = cache->uc_allocbucket; @@ -1818,7 +1965,8 @@ uma_dbg_alloc(zone, NULL, item); ZONE_UNLOCK(zone); #endif - CPU_UNLOCK(cpu); + if (!perthread_cache) + CPU_UNLOCK(cpu); if (zone->uz_ctor != NULL) { if (zone->uz_ctor(item, zone->uz_keg->uk_size, udata, flags) != 0) { @@ -1873,7 +2021,8 @@ goto zalloc_start; } /* We are no longer associated with this cpu!!! */ - CPU_UNLOCK(cpu); + if (!perthread_cache) + CPU_UNLOCK(cpu); /* Bump up our uz_count so we get here less */ if (zone->uz_count < BUCKET_MAX) @@ -2175,8 +2324,11 @@ uma_cache_t cache; uma_bucket_t bucket; int bflags; - int cpu; + int cpu = -1; enum zfreeskip skip; + int perthread_cache; + struct thread *td; + struct uma_perthread_set *upt; /* This is the fast path free */ skip = SKIP_NONE; @@ -2201,10 +2353,31 @@ skip = SKIP_DTOR; } + td = curthread; zfree_restart: - cpu = PCPU_GET(cpuid); - CPU_LOCK(cpu); - cache = &zone->uz_cpu[cpu]; + cache = NULL; + perthread_cache = 0; + upt = td->td_uma; + if (upt != NULL) { + if (zone == upt->upt_zone1) + cache = &upt->upt_cache1; + else if (zone == upt->upt_zone2) + cache = &upt->upt_cache2; + else if (zone == upt->upt_zone3) + cache = &upt->upt_cache3; + if (cache != NULL) + perthread_cache = 1; + } + if (cache == NULL) { + cpu = PCPU_GET(cpuid); + CPU_LOCK(cpu); + cache = &zone->uz_cpu[cpu]; + perthread_free_zone_miss++; + } else { + KASSERT(zone->uz_keg->uk_flags & UMA_ZONE_PERTHREAD, + ("uma_zfree_arg: zone has per-thread cache but isn't per-thread")); + perthread_free_zone_hit++; + } zfree_start: bucket = cache->uc_freebucket; @@ -2228,7 +2401,8 @@ uma_dbg_free(zone, NULL, item); ZONE_UNLOCK(zone); #endif - CPU_UNLOCK(cpu); + if (!perthread_cache) + CPU_UNLOCK(cpu); return; } else if (cache->uc_allocbucket) { #ifdef UMA_DEBUG_ALLOC @@ -2277,7 +2451,8 @@ goto zfree_start; } /* We're done with this CPU now */ - CPU_UNLOCK(cpu); + if (!perthread_cache) + CPU_UNLOCK(cpu); /* And the zone.. */ ZONE_UNLOCK(zone);