--- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c 2005/02/16 21:50:29 +++ //depot/user/rwatson/percpu/sys/kern/kern_mbuf.c 2005/04/15 11:11:26 @@ -1,6 +1,7 @@ /*- - * Copyright (c) 2004, 2005, - * Bosko Milekic . All rights reserved. + * Copyright (c) 2004, 2005 Bosko Milekic + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +32,9 @@ #include "opt_mac.h" #include "opt_param.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -39,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -79,7 +84,18 @@ */ int nmbclusters; + +/* + * mbstat is the mbuf statistics structure exposed to userspace. + * + * mbstat_percpu is the per-CPU statistics structure in which many of the + * mbstat measurements are gathered before being combined for exposure to + * userspace. mbstat_percpu is read lockless, so subject to small + * consistency races. It is modified holding a critical section to avoid + * read-modify-write races in the presence of preemption. + */ struct mbstat mbstat; +struct mbstat_percpu mbstat_percpu[MAXCPU]; static void tunable_mbinit(void *dummy) @@ -91,11 +107,13 @@ } SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); +static int sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS); + SYSCTL_DECL(_kern_ipc); SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0, "Maximum number of mbuf clusters allowed"); -SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, - "Mbuf general information and statistics"); +SYSCTL_PROC(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, NULL, 0, + sysctl_kern_ipc_mbstat, "", "Mbuf general information and statistics"); /* * Zones from which we allocate. @@ -170,8 +188,69 @@ mbstat.m_mcfail = mbstat.m_mpfail = 0; mbstat.sf_iocnt = 0; mbstat.sf_allocwait = mbstat.sf_allocfail = 0; + + /* mbstat_percpu is zero'd by BSS. */ } +static int +sysctl_kern_ipc_mbstat(SYSCTL_HANDLER_ARGS) +{ + struct mbstat_percpu *mbp, mbp_local; + u_char cpu; + + bzero(&mbp_local, sizeof(mbp_local)); + for (cpu = 0; cpu < MAXCPU; cpu++) { + mbp = &mbstat_percpu[cpu]; + mbp_local.mbp_mbuf_allocs += mbp->mbp_mbuf_allocs; + mbp_local.mbp_mbuf_frees += mbp->mbp_mbuf_frees; + mbp_local.mbp_mbuf_fails += mbp->mbp_mbuf_fails; + mbp_local.mbp_mbuf_drains += mbp->mbp_mbuf_drains; + mbp_local.mbp_clust_allocs += mbp->mbp_clust_allocs; + mbp_local.mbp_clust_frees += mbp->mbp_clust_frees; + + mbp_local.mbp_copy_fails += mbp->mbp_copy_fails; + mbp_local.mbp_pullup_fails += mbp->mbp_pullup_fails; + + mbp_local.sfp_iocnt += mbp->sfp_iocnt; + mbp_local.sfp_alloc_fails += mbp->sfp_alloc_fails; + mbp_local.sfp_alloc_waits += mbp->sfp_alloc_waits; + } + + /* + * If, due to races, the number of frees for mbufs or clusters is + * greater than the number of allocs, adjust alloc stats to 0. This + * isn't quite accurate, but for the time being, we consider the + * performance win of races worth the occasional inaccuracy. + */ + if (mbp_local.mbp_mbuf_allocs > mbp_local.mbp_mbuf_frees) + mbstat.m_mbufs = mbp_local.mbp_mbuf_allocs - + mbp_local.mbp_mbuf_frees; + else + mbstat.m_mbufs = 0; + + if (mbp_local.mbp_clust_allocs > mbp_local.mbp_clust_frees) + mbstat.m_mclusts = mbp_local.mbp_clust_allocs - + mbp_local.mbp_clust_frees; + else + mbstat.m_mclusts = 0; + + mbstat.m_drain = mbp_local.mbp_mbuf_drains; + mbstat.m_mcfail = mbp_local.mbp_copy_fails; + mbstat.m_mpfail = mbp_local.mbp_pullup_fails; + + mbstat.sf_iocnt = mbp_local.sfp_iocnt; + mbstat.sf_allocfail = mbp_local.sfp_alloc_fails; + /* + * sf_allocwait is protected by per-architecture mutex sf_buf_lock, + * which is held whenever sf_allocwait is updated, so don't use the + * per-cpu version here + * + * mbstat.sf_allocwait = mbp_local.sfp_alloc_waits; + */ + + return (SYSCTL_OUT(req, &mbstat, sizeof(mbstat))); +} + /* * Constructor for Mbuf master zone. * @@ -212,7 +291,10 @@ #endif } else m->m_data = m->m_dat; - mbstat.m_mbufs += 1; /* XXX */ + + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_allocs++; + critical_exit(); return (0); } @@ -227,7 +309,9 @@ m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_frees++; + critical_exit(); } /* XXX Only because of stats */ @@ -235,12 +319,16 @@ mb_dtor_pack(void *mem, int size, void *arg) { struct mbuf *m; + u_char cpu; m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); - mbstat.m_mbufs -= 1; /* XXX */ - mbstat.m_mclusts -= 1; /* XXX */ + critical_enter(); + cpu = curcpu; + mbstat_percpu[cpu].mbp_mbuf_frees++; + mbstat_percpu[cpu].mbp_clust_frees++; + critical_exit(); } /* @@ -263,7 +351,9 @@ m->m_ext.ext_size = MCLBYTES; m->m_ext.ext_type = EXT_CLUSTER; m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */ - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_allocs++; + critical_exit(); return (0); } @@ -271,7 +361,10 @@ static void mb_dtor_clust(void *mem, int size, void *arg) { - mbstat.m_mclusts -= 1; /* XXX */ + + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_frees++; + critical_exit(); } /* @@ -288,7 +381,9 @@ uma_zalloc_arg(zone_clust, m, how); if (m->m_ext.ext_buf == NULL) return (ENOMEM); - mbstat.m_mclusts -= 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_frees++; + critical_exit(); return (0); } @@ -304,7 +399,9 @@ m = (struct mbuf *)mem; uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); m->m_ext.ext_buf = NULL; - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + mbstat_percpu[curcpu].mbp_clust_allocs++; + critical_exit(); } /* @@ -320,6 +417,7 @@ #endif int flags; short type; + u_char cpu; m = (struct mbuf *)mem; args = (struct mb_args *)arg; @@ -348,8 +446,11 @@ return (error); #endif } - mbstat.m_mbufs += 1; /* XXX */ - mbstat.m_mclusts += 1; /* XXX */ + critical_enter(); + cpu = curcpu; + mbstat_percpu[cpu].mbp_mbuf_allocs++; + mbstat_percpu[cpu].mbp_clust_allocs++; + critical_exit(); return (0); } @@ -369,7 +470,9 @@ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, "mb_reclaim()"); - mbstat.m_drain++; + critical_enter(); + mbstat_percpu[curcpu].mbp_mbuf_drains++; + critical_exit(); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) --- //depot/vendor/freebsd/src/sys/kern/uipc_mbuf.c 2005/03/17 19:35:19 +++ //depot/user/rwatson/percpu/sys/kern/uipc_mbuf.c 2005/04/15 10:55:44 @@ -36,6 +36,9 @@ #include "opt_param.h" #include "opt_mbuf_stress_test.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -44,8 +47,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -428,13 +433,18 @@ m = m->m_next; np = &n->m_next; } - if (top == NULL) - mbstat.m_mcfail++; /* XXX: No consistency. */ + if (top == NULL) { + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); + } return (top); nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -497,7 +507,9 @@ return top; nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -600,7 +612,9 @@ nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_copy_fails++; + critical_exit(); return (NULL); } @@ -762,7 +776,9 @@ return (m); bad: m_freem(n); - mbstat.m_mpfail++; /* XXX: No consistency. */ + critical_enter(); + mbstat_percpu[curcpu].mbp_pullup_fails++; + critical_exit(); return (NULL); } --- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c 2005/03/31 04:35:16 +++ //depot/user/rwatson/percpu/sys/kern/uipc_syscalls.c 2005/04/15 10:55:44 @@ -39,6 +39,9 @@ #include "opt_ktrace.h" #include "opt_mac.h" +/* Need mbstat_percpu definition from mbuf.h. */ +#define WANT_MBSTAT_PERCPU + #include #include #include @@ -1926,7 +1929,9 @@ vm_page_io_finish(pg); if (!error) VM_OBJECT_UNLOCK(obj); - mbstat.sf_iocnt++; + critical_enter(); + mbstat_percpu[curcpu].sfp_iocnt++; + critical_exit(); } if (error) { @@ -1954,7 +1959,9 @@ * but this wait can be interrupted. */ if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { - mbstat.sf_allocfail++; + critical_enter(); + mbstat_percpu[curcpu].sfp_alloc_fails++; + critical_exit(); vm_page_lock_queues(); vm_page_unwire(pg, 0); if (pg->wire_count == 0 && pg->object == NULL) --- //depot/vendor/freebsd/src/sys/sys/mbuf.h 2005/03/17 19:35:19 +++ //depot/user/rwatson/percpu/sys/sys/mbuf.h 2005/04/15 10:55:44 @@ -243,6 +243,29 @@ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ /* + * Per-CPU mbuf allocator statistics, which are collated to construct the + * global statistics. They are read lockless, but written to while in a + * critical section to prevent read-modify-write races. + * + * XXXRW: As with comments below, maybe sendfile stats should be elsesewhere. + */ +struct mbstat_percpu { + u_long mbp_mbuf_allocs; /* mbufs alloc'd on CPU. */ + u_long mbp_mbuf_frees; /* mbufs freed on CPU. */ + u_long mbp_mbuf_fails; /* mbuf alloc failures on CPU. */ + u_long mbp_mbuf_drains; /* mbuf drains on CPU .*/ + u_long mbp_clust_allocs; /* clusters alloc'd on CPU. */ + u_long mbp_clust_frees; /* clusters freed on CPU. */ + + u_long mbp_copy_fails; /* mbuf copy failures on CPU. */ + u_long mbp_pullup_fails; /* mbuf pullup failures on CPU. */ + + u_long sfp_iocnt; /* sendfile I/O's on CPU. */ + u_long sfp_alloc_fails; /* sendfile alloc failures on CPU. */ + u_long sfp_alloc_waits; /* sendfile alloc waits on CPU. */ +}; + +/* * General mbuf allocator statistics structure. */ struct mbstat { @@ -550,6 +573,15 @@ extern struct mbstat mbstat; /* General mbuf stats/infos */ extern int nmbclusters; /* Maximum number of clusters */ +/* + * Avoid exposing PERCPU definition outside of a very limited set of files, + * so that the compile-time value of PERCPU doesn't become part of the + * exposed kernel ABI. + */ +#ifdef WANT_MBSTAT_PERCPU +extern struct mbstat_percpu mbstat_percpu[MAXCPU]; +#endif + struct uio; void m_adj(struct mbuf *, int); --- //depot/vendor/freebsd/src/sys/sys/pcpu.h 2005/01/07 02:32:16 +++ //depot/user/rwatson/percpu/sys/sys/pcpu.h 2005/04/15 10:55:44 @@ -81,6 +81,7 @@ extern struct cpuhead cpuhead; #define CURPROC (curthread->td_proc) +#define curcpu (curthread->td_oncpu) #define curkse (curthread->td_kse) #define curksegrp (curthread->td_ksegrp) #define curproc (curthread->td_proc)