--- //depot/user/rwatson/netperf/sys/conf/files 2004/11/03 14:10:39 +++ //depot/user/rwatson/percpu/sys/conf/files 2004/11/04 00:18:33 @@ -1675,6 +1675,7 @@ security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids security/mac_stub/mac_stub.c optional mac_stub security/mac_test/mac_test.c optional mac_test +test/test_synch_timing.c standard ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_inode.c optional ffs --- //depot/user/rwatson/netperf/sys/kern/kern_switch.c 2004/10/19 18:01:38 +++ //depot/user/rwatson/percpu/sys/kern/kern_switch.c 2004/11/07 18:32:56 @@ -564,6 +564,18 @@ * Kernel thread preemption implementation. Critical sections mark * regions of code in which preemptions are not allowed. */ +#ifdef INVARIANT_SUPPORT +void +_critical_assert(const char *file, int line) +{ + + if (curthread->td_critnest == 0) { + panic("critical_asssert: critical section not owned " + "at %s:%d", file, line); + } +} +#endif + void critical_enter(void) { @@ -573,6 +585,8 @@ if (td->td_critnest == 0) cpu_critical_enter(td); td->td_critnest++; + CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, + (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); } void @@ -601,6 +615,8 @@ } else { td->td_critnest--; } + CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, + (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); } /* --- //depot/user/rwatson/netperf/sys/sys/ktr.h 2004/11/03 14:10:39 +++ //depot/user/rwatson/percpu/sys/sys/ktr.h 2004/11/07 18:32:56 @@ -76,7 +76,8 @@ #define KTR_CALLOUT 0x02000000 /* Callouts and timeouts */ #define KTR_GEOM 0x04000000 /* GEOM I/O events */ #define KTR_BUSDMA 0x08000000 /* busdma(9) events */ -#define KTR_ALL 0x0fffffff +#define KTR_CRITICAL 0010000000 /* Critical sections */ +#define KTR_ALL 0x1fffffff /* * Trace classes which can be assigned to particular use at compile time --- //depot/user/rwatson/netperf/sys/sys/systm.h 2004/11/03 14:10:39 +++ //depot/user/rwatson/percpu/sys/sys/systm.h 2004/11/06 17:55:50 @@ -86,6 +86,12 @@ #define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] #endif +#ifdef INVARIANTS +#define critical_assert() _critical_assert(__FILE__, __LINE__) +#else +#define critical_assert() +#endif + /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used @@ -139,6 +145,9 @@ void cpu_rootconf(void); extern uint32_t crc32_tab[]; uint32_t crc32(const void *buf, size_t size); +#ifdef INVARIANT_SUPPORT +void _critical_assert(const char *file, int line); +#endif void critical_enter(void); void critical_exit(void); void init_param1(void); --- //depot/user/rwatson/netperf/sys/vm/uma_core.c 2004/11/06 12:44:50 +++ //depot/user/rwatson/percpu/sys/vm/uma_core.c 2004/11/06 19:46:49 @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -382,48 +383,19 @@ zone_timeout(uma_zone_t zone) { uma_keg_t keg; - uma_cache_t cache; u_int64_t alloc; - int cpu; keg = zone->uz_keg; alloc = 0; /* - * Aggregate per cpu cache statistics back to the zone. - * - * XXX This should be done in the sysctl handler. - * - * I may rewrite this to set a flag in the per cpu cache instead of - * locking. If the flag is not cleared on the next round I will have - * to lock and do it here instead so that the statistics don't get too - * far out of sync. - */ - if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) { - for (cpu = 0; cpu <= mp_maxid; cpu++) { - if (CPU_ABSENT(cpu)) - continue; - CPU_LOCK(cpu); - cache = &zone->uz_cpu[cpu]; - /* Add them up, and reset */ - alloc += cache->uc_allocs; - cache->uc_allocs = 0; - CPU_UNLOCK(cpu); - } - } - - /* Now push these stats back into the zone.. */ - ZONE_LOCK(zone); - zone->uz_allocs += alloc; - - /* * Expand the zone hash table. * * This is done if the number of slabs is larger than the hash size. * What I'm trying to do here is completely reduce collisions. This * may be a little aggressive. Should I allow for two collisions max? */ - + ZONE_LOCK(zone); if (keg->uk_flags & UMA_ZONE_HASH && keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { struct uma_hash newhash; @@ -611,6 +583,10 @@ /* * Drains the per cpu caches for a zone. * + * NOTE: This may only be called while the zone is being turn down, and not + * during normal operation. This is necessary in order that we do not have + * to migrate CPUs to drain the per-CPU caches. + * * Arguments: * zone The zone to drain, must be unlocked. * @@ -624,12 +600,20 @@ int cpu; /* - * We have to lock each cpu cache before locking the zone + * XXX: It is safe to not lock the per-CPU caches, because we're + * tearing down the zone anyway. I.e., there will be no further use + * of the caches at this point. + * + * XXX: It would good to be able to assert that the zone is being + * torn down to prevent improper use of cache_drain(). + * + * XXX: We lock the zone before passing into bucket_cache_drain() as + * it is used elsewhere. Should the tear-down path be made special + * there in some form? */ for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; - CPU_LOCK(cpu); cache = &zone->uz_cpu[cpu]; bucket_drain(zone, cache->uc_allocbucket); bucket_drain(zone, cache->uc_freebucket); @@ -642,11 +626,6 @@ ZONE_LOCK(zone); bucket_cache_drain(zone); ZONE_UNLOCK(zone); - for (cpu = 0; cpu <= mp_maxid; cpu++) { - if (CPU_ABSENT(cpu)) - continue; - CPU_UNLOCK(cpu); - } } /* @@ -1784,6 +1763,7 @@ uma_cache_t cache; uma_bucket_t bucket; int cpu; + int count; int badness; /* This is the fast path allocation */ @@ -1818,12 +1798,28 @@ } } + /* + * If possible, allocate from the per-CPU cache. We require two + * types of protection: (1) we use critical sections to protect the + * cache from corruption and races, and (2) we rely on CPU pinning + * to fix which cache we work from in the event that we must + * release the critical section. Neither alone is sufficient due + * to preemption and possible processor migration for the current + * thread. We may need to exist the critical section if we have to + * acquire the zone mutex, which may require sleeping. + */ + count = 0; zalloc_restart: + sched_pin(); + critical_enter(); cpu = PCPU_GET(cpuid); - CPU_LOCK(cpu); cache = &zone->uz_cpu[cpu]; zalloc_start: + count++; + KASSERT(count < 10, ("uma_zalloc_arg: count == 10")); + /* XXXRW: assert pinned? */ + critical_assert(); bucket = cache->uc_allocbucket; if (bucket) { @@ -1836,12 +1832,13 @@ KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); cache->uc_allocs++; + critical_exit(); + sched_unpin(); #ifdef INVARIANTS ZONE_LOCK(zone); uma_dbg_alloc(zone, NULL, item); ZONE_UNLOCK(zone); #endif - CPU_UNLOCK(cpu); if (zone->uz_ctor != NULL) { if (zone->uz_ctor(item, zone->uz_keg->uk_size, udata, flags) != 0) { @@ -1871,7 +1868,34 @@ } } } + /* + * Attempt to retrieve the item from the per-CPU cache has failed, + * so we must go back to the zone. This requires the zone lock, so + * we must drop the critical section, then re-acquire it when we go + * back to the cache. Since the critical section is released, we + * may be preempted. However, as we are pinned, we should be able + * to return to the same cache. If things have changed + * sufficiently that our assumptions about need to allocate have + * changed, restart. + */ + critical_exit(); ZONE_LOCK(zone); + critical_enter(); + KASSERT(cache == &zone->uz_cpu[PCPU_GET(cpuid)], + ("uma_zalloc_arg: CPU changed!")); + bucket = cache->uc_allocbucket; + if (bucket != NULL) { + if (bucket != NULL && bucket->ub_cnt > 0) { + ZONE_UNLOCK(zone); + goto zalloc_start; + } + bucket = cache->uc_freebucket; + if (bucket != NULL && bucket->ub_cnt > 0) { + ZONE_UNLOCK(zone); + goto zalloc_start; + } + } + /* Since we have locked the zone we may as well send back our stats */ zone->uz_allocs += cache->uc_allocs; cache->uc_allocs = 0; @@ -1896,7 +1920,8 @@ goto zalloc_start; } /* We are no longer associated with this cpu!!! */ - CPU_UNLOCK(cpu); + critical_exit(); + sched_unpin(); /* Bump up our uz_count so we get here less */ if (zone->uz_count < BUCKET_MAX) @@ -2209,6 +2234,7 @@ uma_bucket_t bucket; int bflags; int cpu; + int count; enum zfreeskip skip; /* This is the fast path free */ @@ -2234,12 +2260,18 @@ skip = SKIP_DTOR; } + count = 0; zfree_restart: + sched_pin(); + critical_enter(); cpu = PCPU_GET(cpuid); - CPU_LOCK(cpu); cache = &zone->uz_cpu[cpu]; zfree_start: + /* XXXRW: assert pinned? */ + count++; + KASSERT(count < 10, ("uma_zfree_arg: count == 10")); + critical_assert(); bucket = cache->uc_freebucket; if (bucket) { @@ -2253,6 +2285,8 @@ ("uma_zfree: Freeing to non free bucket index.")); bucket->ub_bucket[bucket->ub_cnt] = item; bucket->ub_cnt++; + critical_exit(); + sched_unpin(); #ifdef INVARIANTS ZONE_LOCK(zone); if (keg->uk_flags & UMA_ZONE_MALLOC) @@ -2261,7 +2295,6 @@ uma_dbg_free(zone, NULL, item); ZONE_UNLOCK(zone); #endif - CPU_UNLOCK(cpu); return; } else if (cache->uc_allocbucket) { #ifdef UMA_DEBUG_ALLOC @@ -2285,9 +2318,32 @@ * * 1) The buckets are NULL * 2) The alloc and free buckets are both somewhat full. + * + * Since we acquire the zone lock, we must first release the + * critical section, as we might otherwise sleep. As a result, we + * may be preempted, so check if that happened and restart is so. + * Note that we remain pinned, so the same cache should still be + * appropriate. */ + critical_exit(); + ZONE_LOCK(zone); + critical_enter(); + KASSERT(cache == &zone->uz_cpu[PCPU_GET(cpuid)], + ("uma_zalloc_arg: CPU changed!")); - ZONE_LOCK(zone); + if (cache->uc_freebucket != NULL) { + if (cache->uc_freebucket->ub_cnt < + cache->uc_freebucket->ub_entries) { + ZONE_UNLOCK(zone); + goto zfree_start; + } + if (cache->uc_allocbucket != NULL && + (cache->uc_allocbucket->ub_cnt < + cache->uc_freebucket->ub_cnt)) { + ZONE_UNLOCK(zone); + goto zfree_start; + } + } bucket = cache->uc_freebucket; cache->uc_freebucket = NULL; @@ -2310,7 +2366,8 @@ goto zfree_start; } /* We're done with this CPU now */ - CPU_UNLOCK(cpu); + critical_exit(); + sched_unpin(); /* And the zone.. */ ZONE_UNLOCK(zone); @@ -2724,6 +2781,7 @@ int cachefree; uma_bucket_t bucket; uma_cache_t cache; + u_int64_t alloc; cnt = 0; mtx_lock(&uma_mtx); @@ -2747,6 +2805,13 @@ LIST_FOREACH(z, &zk->uk_zones, uz_link) { if (cnt == 0) /* list may have changed size */ break; + /* + * XXXRW: This is the last remaining use of a per-CPU mutex + * to protect access to a per-CPU zone cache. We should be + * migrating CPUs here to pick up data from each cache + * instead. Because this locking no longer protects other + * use of the cache, we can now get inconsistent results. + */ if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) @@ -2756,6 +2821,7 @@ } ZONE_LOCK(z); cachefree = 0; + alloc = 0; if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) @@ -2765,9 +2831,15 @@ cachefree += cache->uc_allocbucket->ub_cnt; if (cache->uc_freebucket != NULL) cachefree += cache->uc_freebucket->ub_cnt; + alloc += cache->uc_allocs; + cache->uc_allocs = 0; CPU_UNLOCK(cpu); } } + + /* Now push alloc stats back into zone. */ + z->uz_allocs += alloc; + LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { cachefree += bucket->ub_cnt; }