diff -ruN vendor/src/sys/conf/files mbuma2/src/sys/conf/files --- vendor/src/sys/conf/files Thu May 27 21:58:07 2004 +++ mbuma2/src/sys/conf/files Thu May 27 21:59:12 2004 @@ -1075,6 +1075,7 @@ kern/kern_lockf.c standard kern/kern_mac.c standard kern/kern_malloc.c standard +kern/kern_mbuf.c standard kern/kern_mib.c standard kern/kern_module.c standard kern/kern_mutex.c standard @@ -1116,7 +1117,6 @@ kern/subr_kobj.c standard kern/subr_log.c standard kern/subr_mbpool.c optional libmbpool -kern/subr_mbuf.c standard kern/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_msgbuf.c standard diff -ruN vendor/src/sys/i386/i386/vm_machdep.c mbuma2/src/sys/i386/i386/vm_machdep.c --- vendor/src/sys/i386/i386/vm_machdep.c Wed May 26 10:00:08 2004 +++ mbuma2/src/sys/i386/i386/vm_machdep.c Wed May 26 10:01:23 2004 @@ -94,6 +94,10 @@ #include #endif +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); @@ -582,6 +586,9 @@ struct sf_buf *sf_bufs; vm_offset_t sf_base; int i; + + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); TAILQ_INIT(&sf_buf_freelist); diff -ruN vendor/src/sys/kern/kern_malloc.c mbuma2/src/sys/kern/kern_malloc.c --- vendor/src/sys/kern/kern_malloc.c Tue May 25 11:49:28 2004 +++ mbuma2/src/sys/kern/kern_malloc.c Tue Apr 20 15:00:39 2004 @@ -191,6 +191,7 @@ int indx; caddr_t va; uma_zone_t zone; + uma_keg_t keg; #ifdef DIAGNOSTIC unsigned long osize = size; #endif @@ -235,6 +236,7 @@ size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone; + keg = zone->uz_keg; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif @@ -244,10 +246,11 @@ goto out; ksp->ks_size |= 1 << indx; - size = zone->uz_size; + size = keg->uk_size; } else { size = roundup(size, PAGE_SIZE); zone = NULL; + keg = NULL; va = uma_large_malloc(size, flags); mtx_lock(&ksp->ks_mtx); if (va == NULL) @@ -309,7 +312,7 @@ #ifdef INVARIANTS struct malloc_type **mtp = addr; #endif - size = slab->us_zone->uz_size; + size = slab->us_keg->uk_size; #ifdef INVARIANTS /* * Cache a pointer to the malloc_type that most recently freed @@ -325,7 +328,7 @@ sizeof(struct malloc_type *); *mtp = type; #endif - uma_zfree_arg(slab->us_zone, addr, slab); + uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab); } else { size = slab->us_size; uma_large_free(slab); @@ -364,8 +367,8 @@ ("realloc: address %p out of range", (void *)addr)); /* Get the size of the original block */ - if (slab->us_zone) - alloc = slab->us_zone->uz_size; + if (slab->us_keg) + alloc = slab->us_keg->uk_size; else alloc = slab->us_size; @@ -410,7 +413,6 @@ void *dummy; { u_int8_t indx; - u_long npg; u_long mem_size; int i; @@ -462,17 +464,8 @@ */ init_param3(vm_kmem_size / PAGE_SIZE); - /* - * In mbuf_init(), we set up submaps for mbufs and clusters, in which - * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), - * respectively. Mathematically, this means that what we do here may - * amount to slightly more address space than we need for the submaps, - * but it never hurts to have an extra page in kmem_map. - */ - npg = (nmbufs*MSIZE + nmbclusters*MCLBYTES + vm_kmem_size) / PAGE_SIZE; - kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, - (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); + (vm_offset_t *)&kmemlimit, vm_kmem_size); kmem_map->system_map = 1; uma_startup2(); diff -ruN vendor/src/sys/kern/kern_mbuf.c mbuma2/src/sys/kern/kern_mbuf.c --- vendor/src/sys/kern/kern_mbuf.c Wed Dec 31 19:00:00 1969 +++ mbuma2/src/sys/kern/kern_mbuf.c Thu May 27 11:54:36 2004 @@ -0,0 +1,385 @@ +/*- + * Copyright (c) 2004 + * Bosko Milekic . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_mac.h" +#include "opt_param.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA + * Zones. + * + * Mbuf Clusters (2K, contiguous) are allocated from the Cluster + * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the + * administrator so desires. + * + * Mbufs are allocated from a UMA Master Zone called the Mbuf + * Zone. + * + * Additionally, FreeBSD provides a Packet Zone, which it + * configures as a Secondary Zone to the Mbuf Master Zone, + * thus sharing backend Slab kegs with the Mbuf Master Zone. + * + * Thus common-case allocations and locking are simplified: + * + * m_clget() m_getcl() + * | | + * | .------------>[(Packet Cache)] m_get(), m_gethdr() + * | | [ Packet ] | + * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] + * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] + * | \________ | + * [ Cluster Keg ] \ / + * | [ Mbuf Keg ] + * [ Cluster Slabs ] | + * | [ Mbuf Slabs ] + * \____________(VM)_________________/ + */ + +int nmbclusters; +struct mbstat mbstat; + +static void +tunable_mbinit(void *dummy) +{ + + /* This has to be done before VM init. */ + nmbclusters = 1024 + maxusers * 64; + TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); +} +SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); + +SYSCTL_DECL(_kern_ipc); +SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0, + "Maximum number of mbuf clusters allowed"); +SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, + "Mbuf general information and statistics"); + +/* + * Zones from which we allocate. + */ +uma_zone_t zone_mbuf; +uma_zone_t zone_clust; +uma_zone_t zone_pack; + +/* + * Local prototypes. + */ +static void mb_ctor_mbuf(void *, int, void *); +static void mb_ctor_clust(void *, int, void *); +static void mb_ctor_pack(void *, int, void *); +static void mb_dtor_mbuf(void *, int, void *); +static void mb_dtor_clust(void *, int, void *); /* XXX */ +static void mb_dtor_pack(void *, int, void *); /* XXX */ +static void mb_init_pack(void *, int); +static void mb_fini_pack(void *, int); + +static void mb_reclaim(void *); +static void mbuf_init(void *); + +/* + * Initialize FreeBSD Network buffer allocation. + */ +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL) +static void +mbuf_init(void *dummy) +{ + + /* + * Configure UMA zones for Mbufs, Clusters, and Packets. + */ + zone_mbuf = uma_zcreate("Mbuf", MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); + zone_clust = uma_zcreate("MbufClust", MCLBYTES, mb_ctor_clust, + mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + if (nmbclusters > 0) + uma_zone_set_max(zone_clust, nmbclusters); + zone_pack = uma_zsecond_create("Packet", mb_ctor_pack, mb_dtor_pack, + mb_init_pack, mb_fini_pack, zone_mbuf); + + /* uma_prealloc() goes here */ + + /* + * Hook event handler for low-memory situation, used to + * drain protocols and push data back to the caches (UMA + * later pushes it back to VM). + */ + EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, + EVENTHANDLER_PRI_FIRST); + + /* + * [Re]set counters and local statistics knobs. + * XXX Some of these should go and be replaced, but UMA stat + * gathering needs to be revised. + */ + mbstat.m_mbufs = 0; + mbstat.m_mclusts = 0; + mbstat.m_drain = 0; + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + mbstat.m_numtypes = MT_NTYPES; + + mbstat.m_mcfail = mbstat.m_mpfail = 0; + mbstat.sf_iocnt = 0; + mbstat.sf_allocwait = mbstat.sf_allocfail = 0; +} + +/* + * Constructor for Mbuf master zone. + * + * The 'arg' pointer points to a mb_args structure which + * contains call-specific information required to support the + * mbuf allocation API. + */ +static void +mb_ctor_mbuf(void *mem, int size, void *arg) +{ + struct mbuf *m; + struct mb_args *args; + int flags; + int how; + short type; + + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + how = args->how; + type = args->type; + + m->m_type = type; + m->m_next = NULL; + m->m_nextpkt = NULL; + if (flags & M_PKTHDR) { + m->m_data = m->m_pktdat; + m->m_flags = M_PKTHDR; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.csum_flags = 0; + SLIST_INIT(&m->m_pkthdr.tags); +#ifdef MAC + /* If the label init fails, fail the alloc */ + if (mac_init_mbuf(m, how) != 0) { + m_free(m); +/* XXX*/ panic("mb_ctor_mbuf(): can't deal with failure!"); +/* return 0; */ + } +#endif + } else { + m->m_data = m->m_dat; + m->m_flags = 0; + } + mbstat.m_mbufs += 1; /* XXX */ +/* return 1; +*/ +} + +/* + * The Mbuf master zone and Packet secondary zone destructor. + */ +static void +mb_dtor_mbuf(void *mem, int size, void *arg) +{ + struct mbuf *m; + + m = (struct mbuf *)mem; + if ((m->m_flags & M_PKTHDR) != 0) + m_tag_delete_chain(m, NULL); + mbstat.m_mbufs -= 1; /* XXX */ +} + +/* XXX Only because of stats */ +static void +mb_dtor_pack(void *mem, int size, void *arg) +{ + struct mbuf *m; + + m = (struct mbuf *)mem; + if ((m->m_flags & M_PKTHDR) != 0) + m_tag_delete_chain(m, NULL); + mbstat.m_mbufs -= 1; /* XXX */ + mbstat.m_mclusts -= 1; /* XXX */ +} + +/* + * The Cluster zone constructor. + * + * Here the 'arg' pointer points to the Mbuf which we + * are configuring cluster storage for. + */ +static void +mb_ctor_clust(void *mem, int size, void *arg) +{ + struct mbuf *m; + + m = (struct mbuf *)arg; + m->m_ext.ext_buf = (caddr_t)mem; + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ext_size = MCLBYTES; + m->m_ext.ext_type = EXT_CLUSTER; + m->m_ext.ref_cnt = (u_int *)uma_find_refcnt(zone_clust, + m->m_ext.ext_buf); + *(m->m_ext.ref_cnt) = 1; + mbstat.m_mclusts += 1; /* XXX */ +/* return 1; +*/ +} + +/* XXX */ +static void +mb_dtor_clust(void *mem, int size, void *arg) +{ + mbstat.m_mclusts -= 1; /* XXX */ +} + +/* + * The Packet secondary zone's init routine, executed on the + * object's transition from keg slab to zone cache. + */ +static void +mb_init_pack(void *mem, int size) +{ + struct mbuf *m; + + m = (struct mbuf *)mem; + m->m_ext.ext_buf = NULL; + uma_zalloc_arg(zone_clust, m, M_NOWAIT); + if (m->m_ext.ext_buf == NULL) /* XXX */ + panic("mb_init_pack(): Can't deal with failure yet."); + mbstat.m_mclusts -= 1; /* XXX */ +} + +/* + * The Packet secondary zone's fini routine, executed on the + * object's transition from zone cache to keg slab. + */ +static void +mb_fini_pack(void *mem, int size) +{ + struct mbuf *m; + + m = (struct mbuf *)mem; + uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); + m->m_ext.ext_buf = NULL; + mbstat.m_mclusts += 1; /* XXX */ +} + +/* + * The "packet" keg constructor. + */ +static void +mb_ctor_pack(void *mem, int size, void *arg) +{ + struct mbuf *m; + struct mb_args *args; + int flags, how; + short type; + + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + type = args->type; + how = args->how; + + m->m_type = type; + m->m_next = NULL; + m->m_data = m->m_ext.ext_buf; + m->m_flags = flags|M_EXT; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ext_size = MCLBYTES; + m->m_ext.ext_type = EXT_CLUSTER; + *(m->m_ext.ref_cnt) = 1; + + if (flags & M_PKTHDR) { + m->m_nextpkt = NULL; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.csum_flags = 0; + SLIST_INIT(&m->m_pkthdr.tags); +#ifdef MAC + /* If the label init fails, fail the alloc */ + if (mac_init_mbuf(m, how) != 0) { + m_free(m); +/* XXX*/ panic("mb_ctor_pack(): can't deal with failure!"); +/* return 0; */ + } +#endif + } + mbstat.m_mbufs += 1; /* XXX */ + mbstat.m_mclusts += 1; /* XXX */ +/* return 1; +*/ +} + +/* + * This is the protocol drain routine. + * + * No locks should be held when this is called. The drain routines have to + * presently acquire some locks which raises the possibility of lock order + * reversal. + */ +static void +mb_reclaim(void *junk) +{ + struct domain *dp; + struct protosw *pr; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, + "mb_reclaim()"); + + mbstat.m_drain++; + for (dp = domains; dp != NULL; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain != NULL) + (*pr->pr_drain)(); +} diff -ruN vendor/src/sys/kern/subr_mbuf.c mbuma2/src/sys/kern/subr_mbuf.c --- vendor/src/sys/kern/subr_mbuf.c Tue May 25 11:49:29 2004 +++ mbuma2/src/sys/kern/subr_mbuf.c Wed Dec 31 19:00:00 1969 @@ -1,1548 +0,0 @@ -/*- - * Copyright (c) 2001, 2002, 2003 - * Bosko Milekic . All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/kern/subr_mbuf.c,v 1.59 2004/04/16 14:35:11 rwatson Exp $"); - -#include "opt_mac.h" -#include "opt_param.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * mb_alloc: network buffer allocator - * - * XXX: currently, the "low watermark" sysctl is marked read-only as its - * effects are not completely implemented. To be fixed soon. - */ - -/* - * Maximum number of PCPU containers. If you know what you're doing you could - * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your - * system during compilation, and thus prevent kernel structure bloat. - * - * SMP and non-SMP kernels clearly have a different number of possible CPUs, - * but because we cannot assume a dense array of CPUs, we always allocate - * and traverse PCPU containers up to NCPU amount and merely check for - * CPU availability. - */ -#ifdef MBALLOC_NCPU -#define NCPU MBALLOC_NCPU -#else -#define NCPU MAXCPU -#endif - -/*- - * The mbuf allocator is based on Alfred Perlstein's - * "memcache" proof-of-concept allocator which was itself based on - * several well-known SMP-friendly allocators. - * - * The mb_alloc mbuf allocator is a special when compared to other - * general-purpose allocators. Some things to take note of: - * - * Mbufs and mbuf clusters are two different objects. Sometimes we - * will allocate a single mbuf, other times a single cluster, - * other times both. Further, we may sometimes wish to allocate a - * whole chain of mbufs with clusters. This allocator will perform - * the common case of each scenario in one function call (this - * includes constructing or destructing the object) while only - * locking/unlocking the cache once, if it can get away with it. - * The caches consist of pure mbufs and pure clusters; that is - * there are no 'zones' containing mbufs with already pre-hooked - * clusters. Since we can allocate both objects atomically anyway, - * we don't bother fragmenting our caches for any particular 'scenarios.' - * - * We allocate from seperate sub-maps of kmem_map, thus imposing - * an ultimate upper-limit on the number of allocatable clusters - * and mbufs and also, since the clusters all come from a - * virtually contiguous region, we can keep reference counters - * for them and "allocate" them purely by indexing into a - * dense refcount vector. - * - * We call out to protocol drain routines (which can be hooked - * into us) when we're low on space. - * - * The mbuf allocator keeps all objects that it allocates in mb_buckets. - * The buckets keep a number of objects (an object can be an mbuf or an - * mbuf cluster) and facilitate moving larger sets of contiguous objects - * from the per-CPU caches to the global cache. The buckets also have - * the added advantage that objects, when migrated from cache to cache, - * are migrated in chunks that keep contiguous objects together, - * minimizing TLB pollution. - * - * The buckets are kept on singly-linked lists called "containers." A container - * is protected by a mutex in order to ensure consistency. The mutex - * itself is allocated separately and attached to the container at boot time, - * thus allowing for certain containers to share the same lock. Per-CPU - * containers for mbufs and mbuf clusters all share the same per-CPU - * lock whereas the global cache containers for these objects share one - * global lock. - */ -struct mb_bucket { - SLIST_ENTRY(mb_bucket) mb_blist; - int mb_owner; - int mb_numfree; - void *mb_free[0]; -}; - -struct mb_container { - SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead; - struct mtx *mc_lock; - int mc_numowner; - u_int mc_starved; - long *mc_types; - u_long *mc_objcount; - u_long *mc_numbucks; -}; - -struct mb_gen_list { - struct mb_container mb_cont; - struct cv mgl_mstarved; -}; - -struct mb_pcpu_list { - struct mb_container mb_cont; -}; - -/* - * Boot-time configurable object counts that will determine the maximum - * number of permitted objects in the mbuf and mcluster cases. In the - * ext counter (nmbcnt) case, it's just an indicator serving to scale - * kmem_map size properly - in other words, we may be allowed to allocate - * more than nmbcnt counters, whereas we will never be allowed to allocate - * more than nmbufs mbufs or nmbclusters mclusters. - * As for nsfbufs, it is used to indicate how many sendfile(2) buffers will be - * allocatable by the sfbuf allocator (found in uipc_syscalls.c) - */ -#ifndef NMBCLUSTERS -#define NMBCLUSTERS (1024 + maxusers * 64) -#endif -#ifndef NMBUFS -#define NMBUFS (nmbclusters * 2) -#endif -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif -#ifndef NMBCNTS -#define NMBCNTS (nmbclusters + nsfbufs) -#endif -int nmbufs; -int nmbclusters; -int nmbcnt; -int nsfbufs; -int nsfbufspeak; -int nsfbufsused; - -/* - * Sizes of objects per bucket. There are this size's worth of mbufs - * or clusters in each bucket. Please keep these a power-of-2. - */ -#define MBUF_BUCK_SZ (PAGE_SIZE * 2) -#define CLUST_BUCK_SZ (PAGE_SIZE * 4) - -/* - * Perform sanity checks of tunables declared above. - */ -static void -tunable_mbinit(void *dummy) -{ - - /* - * This has to be done before VM init. - */ - nmbclusters = NMBCLUSTERS; - TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); - nmbufs = NMBUFS; - TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - nmbcnt = NMBCNTS; - TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt); - /* Sanity checks */ - if (nmbufs < nmbclusters * 2) - nmbufs = nmbclusters * 2; - if (nmbcnt < nmbclusters + nsfbufs) - nmbcnt = nmbclusters + nsfbufs; -} -SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); - -/* - * The freelist structures and mutex locks. The number statically declared - * here depends on the number of CPUs. - * - * We set up in such a way that all the objects (mbufs, clusters) - * share the same mutex lock. It has been established that we do not benefit - * from different locks for different objects, so we use the same lock, - * regardless of object type. This also allows us to do optimised - * multi-object allocations without dropping the lock in between. - */ -struct mb_lstmngr { - struct mb_gen_list *ml_genlist; - struct mb_pcpu_list *ml_cntlst[NCPU]; - struct mb_bucket **ml_btable; - vm_map_t ml_map; - vm_offset_t ml_mapbase; - vm_offset_t ml_maptop; - int ml_mapfull; - u_int ml_objsize; - u_int ml_objbucks; - u_int *ml_wmhigh; - u_int *ml_wmlow; -}; -static struct mb_lstmngr mb_list_mbuf, mb_list_clust; -static struct mtx mbuf_gen, mbuf_pcpu[NCPU]; -static u_int *cl_refcntmap; - -/* - * Local macros for internal allocator structure manipulations. - */ -#ifdef SMP -#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[PCPU_GET(cpuid)] -#else -#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[0] -#endif - -#define MB_GET_GEN_LIST(mb_lst) (mb_lst)->ml_genlist - -#define MB_LOCK_CONT(mb_cnt) mtx_lock((mb_cnt)->mb_cont.mc_lock) - -#define MB_UNLOCK_CONT(mb_cnt) mtx_unlock((mb_cnt)->mb_cont.mc_lock) - -#define MB_GET_PCPU_LIST_NUM(mb_lst, num) \ - (mb_lst)->ml_cntlst[(num)] - -#define MB_BUCKET_INDX(mb_obj, mb_lst) \ - (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / \ - ((mb_lst)->ml_objbucks * (mb_lst)->ml_objsize)) - -#define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst) \ -{ \ - struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead); \ - \ - (mb_bckt)->mb_numfree--; \ - (mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)]; \ - (*((mb_lst)->mb_cont.mc_objcount))--; \ - if ((mb_bckt)->mb_numfree == 0) { \ - SLIST_REMOVE_HEAD(_mchd, mb_blist); \ - SLIST_NEXT((mb_bckt), mb_blist) = NULL; \ - (mb_bckt)->mb_owner |= MB_BUCKET_FREE; \ - } \ -} - -#define MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst) \ - (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp); \ - (mb_bckt)->mb_numfree++; \ - (*((mb_lst)->mb_cont.mc_objcount))++; - -#define MB_MBTYPES_INC(mb_cnt, mb_type, mb_num) \ - if ((mb_type) != MT_NOTMBUF) \ - (*((mb_cnt)->mb_cont.mc_types + (mb_type))) += (mb_num) - -#define MB_MBTYPES_DEC(mb_cnt, mb_type, mb_num) \ - if ((mb_type) != MT_NOTMBUF) \ - (*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num) - -/* - * Ownership of buckets/containers is represented by integers. The PCPU - * lists range from 0 to NCPU-1. We need a free numerical id for the general - * list (we use NCPU). We also need a non-conflicting free bit to indicate - * that the bucket is free and removed from a container, while not losing - * the bucket's originating container id. We use the highest bit - * for the free marker. - */ -#define MB_GENLIST_OWNER (NCPU) -#define MB_BUCKET_FREE (1 << (sizeof(int) * 8 - 1)) - -/* Statistics structures for allocator (per-CPU and general). */ -static struct mbpstat mb_statpcpu[NCPU + 1]; -struct mbstat mbstat; - -/* Sleep time for wait code (in ticks). */ -static int mbuf_wait = 64; - -static u_int mbuf_hiwm = 512; /* High wm on # of mbufs per cache */ -static u_int mbuf_lowm = 128; /* Low wm on # of mbufs per cache */ -static u_int clust_hiwm = 128; /* High wm on # of clusters per cache */ -static u_int clust_lowm = 16; /* Low wm on # of clusters per cache */ - -/* - * Objects exported by sysctl(8). - */ -SYSCTL_DECL(_kern_ipc); -SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RDTUN, &nmbclusters, 0, - "Maximum number of mbuf clusters available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RDTUN, &nmbufs, 0, - "Maximum number of mbufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RDTUN, &nmbcnt, 0, - "Number used to scale kmem_map to ensure sufficient space for counters"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); -SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0, - "Sleep time of mbuf subsystem wait allocations during exhaustion"); -SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_hiwm, CTLFLAG_RW, &mbuf_hiwm, 0, - "Upper limit of number of mbufs allowed in each cache"); -SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_lowm, CTLFLAG_RD, &mbuf_lowm, 0, - "Lower limit of number of mbufs allowed in each cache"); -SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_hiwm, CTLFLAG_RW, &clust_hiwm, 0, - "Upper limit of number of mbuf clusters allowed in each cache"); -SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_lowm, CTLFLAG_RD, &clust_lowm, 0, - "Lower limit of number of mbuf clusters allowed in each cache"); -SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, - "Mbuf general information and statistics"); -SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu, - sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics"); - -/* - * Prototypes of local allocator routines. - */ -static void *mb_alloc_wait(struct mb_lstmngr *, short); -static struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int, - struct mb_pcpu_list *); -static void mb_reclaim(void); -static void mbuf_init(void *); - -/* - * Initial allocation numbers. Each parameter represents the number of buckets - * of each object that will be placed initially in each PCPU container for - * said object. - */ -#define NMB_MBUF_INIT 2 -#define NMB_CLUST_INIT 8 - -/* - * Internal flags that allow for cache locks to remain "persistent" across - * allocation and free calls. They may be used in combination. - */ -#define MBP_PERSIST 0x1 /* Return with lock still held. */ -#define MBP_PERSISTENT 0x2 /* Cache lock is already held coming in. */ - -/* - * Initialize the mbuf subsystem. - * - * We sub-divide the kmem_map into several submaps; this way, we don't have - * to worry about artificially limiting the number of mbuf or mbuf cluster - * allocations, due to fear of one type of allocation "stealing" address - * space initially reserved for another. - * - * Set up both the general containers and all the PCPU containers. Populate - * the PCPU containers with initial numbers. - */ -MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures"); -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL) -static void -mbuf_init(void *dummy) -{ - struct mb_pcpu_list *pcpu_cnt; - vm_size_t mb_map_size; - int i, j; - - /* - * Set up all the submaps, for each type of object that we deal - * with in this allocator. - */ - mb_map_size = (vm_size_t)(nmbufs * MSIZE); - mb_map_size = rounddown(mb_map_size, MBUF_BUCK_SZ); - mb_list_mbuf.ml_btable = malloc((unsigned long)mb_map_size / - MBUF_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT); - if (mb_list_mbuf.ml_btable == NULL) - goto bad; - mb_list_mbuf.ml_map = kmem_suballoc(kmem_map,&(mb_list_mbuf.ml_mapbase), - &(mb_list_mbuf.ml_maptop), mb_map_size); - mb_list_mbuf.ml_map->system_map = 1; - mb_list_mbuf.ml_mapfull = 0; - mb_list_mbuf.ml_objsize = MSIZE; - mb_list_mbuf.ml_objbucks = MBUF_BUCK_SZ / mb_list_mbuf.ml_objsize; - mb_list_mbuf.ml_wmhigh = &mbuf_hiwm; - mb_list_mbuf.ml_wmlow = &mbuf_lowm; - - mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES); - mb_map_size = rounddown(mb_map_size, CLUST_BUCK_SZ); - mb_list_clust.ml_btable = malloc((unsigned long)mb_map_size / - CLUST_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT); - if (mb_list_clust.ml_btable == NULL) - goto bad; - mb_list_clust.ml_map = kmem_suballoc(kmem_map, - &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop), - mb_map_size); - mb_list_clust.ml_map->system_map = 1; - mb_list_clust.ml_mapfull = 0; - mb_list_clust.ml_objsize = MCLBYTES; - mb_list_clust.ml_objbucks = CLUST_BUCK_SZ / mb_list_clust.ml_objsize; - mb_list_clust.ml_wmhigh = &clust_hiwm; - mb_list_clust.ml_wmlow = &clust_lowm; - - /* - * Allocate required general (global) containers for each object type. - */ - mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF, - M_NOWAIT); - mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF, - M_NOWAIT); - if ((mb_list_mbuf.ml_genlist == NULL) || - (mb_list_clust.ml_genlist == NULL)) - goto bad; - - /* - * Initialize condition variables and general container mutex locks. - */ - mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", NULL, MTX_DEF); - cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved"); - cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved), - "mcluster pool starved"); - mb_list_mbuf.ml_genlist->mb_cont.mc_lock = - mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen; - - /* - * Set up the general containers for each object. - */ - mb_list_mbuf.ml_genlist->mb_cont.mc_numowner = - mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER; - mb_list_mbuf.ml_genlist->mb_cont.mc_starved = - mb_list_clust.ml_genlist->mb_cont.mc_starved = 0; - mb_list_mbuf.ml_genlist->mb_cont.mc_objcount = - &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree); - mb_list_clust.ml_genlist->mb_cont.mc_objcount = - &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree); - mb_list_mbuf.ml_genlist->mb_cont.mc_numbucks = - &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbbucks); - mb_list_clust.ml_genlist->mb_cont.mc_numbucks = - &(mb_statpcpu[MB_GENLIST_OWNER].mb_clbucks); - mb_list_mbuf.ml_genlist->mb_cont.mc_types = - &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbtypes[0]); - mb_list_clust.ml_genlist->mb_cont.mc_types = NULL; - SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead)); - SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead)); - - /* - * Allocate all the required counters for clusters. This makes - * cluster allocations/deallocations much faster. - */ - cl_refcntmap = malloc(nmbclusters * sizeof(u_int), M_MBUF, M_NOWAIT); - if (cl_refcntmap == NULL) - goto bad; - - /* - * Initialize general mbuf statistics. - */ - mbstat.m_msize = mb_list_mbuf.ml_objsize; - mbstat.m_mclbytes = mb_list_clust.ml_objsize; - mbstat.m_minclsize = MINCLSIZE; - mbstat.m_mlen = MLEN; - mbstat.m_mhlen = MHLEN; - mbstat.m_numtypes = MT_NTYPES; - mbstat.m_mbperbuck = mb_list_mbuf.ml_objbucks; - mbstat.m_clperbuck = mb_list_clust.ml_objbucks; - - /* - * Allocate and initialize PCPU containers. - */ - for (i = 0; i < NCPU; i++) { - if (CPU_ABSENT(i)) { - mb_statpcpu[i].mb_active = 0; - continue; - } - - mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list), - M_MBUF, M_NOWAIT); - mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list), - M_MBUF, M_NOWAIT); - if ((mb_list_mbuf.ml_cntlst[i] == NULL) || - (mb_list_clust.ml_cntlst[i] == NULL)) - goto bad; - - mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", NULL, MTX_DEF); - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock = - mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i]; - - mb_statpcpu[i].mb_active = 1; - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner = - mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i; - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved = - mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0; - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount = - &(mb_statpcpu[i].mb_mbfree); - mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount = - &(mb_statpcpu[i].mb_clfree); - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numbucks = - &(mb_statpcpu[i].mb_mbbucks); - mb_list_clust.ml_cntlst[i]->mb_cont.mc_numbucks = - &(mb_statpcpu[i].mb_clbucks); - mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_types = - &(mb_statpcpu[i].mb_mbtypes[0]); - mb_list_clust.ml_cntlst[i]->mb_cont.mc_types = NULL; - - SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead)); - SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead)); - - /* - * Perform initial allocations. - */ - pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i); - MB_LOCK_CONT(pcpu_cnt); - for (j = 0; j < NMB_MBUF_INIT; j++) { - if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt) - == NULL) - goto bad; - } - MB_UNLOCK_CONT(pcpu_cnt); - - pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i); - MB_LOCK_CONT(pcpu_cnt); - for (j = 0; j < NMB_CLUST_INIT; j++) { - if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt) - == NULL) - goto bad; - } - MB_UNLOCK_CONT(pcpu_cnt); - } - - return; -bad: - panic("mbuf_init(): failed to initialize mbuf subsystem!"); -} - -/* - * Populate a given mbuf PCPU container with a bucket full of fresh new - * buffers. Return a pointer to the new bucket (already in the container if - * successful), or return NULL on failure. - * - * LOCKING NOTES: - * PCPU container lock must be held when this is called. - * The lock is dropped here so that we can cleanly call the underlying VM - * code. If we fail, we return with no locks held. If we succeed (i.e., return - * non-NULL), we return with the PCPU lock held, ready for allocation from - * the returned bucket. - */ -static struct mb_bucket * -mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst) -{ - struct mb_bucket *bucket; - caddr_t p; - int i; - - MB_UNLOCK_CONT(cnt_lst); - /* - * If our object's (finite) map is starved now (i.e., no more address - * space), bail out now. - */ - if (mb_list->ml_mapfull) - return (NULL); - - bucket = malloc(sizeof(struct mb_bucket) + - mb_list->ml_objbucks * sizeof(void *), M_MBUF, MBTOM(how)); - if (bucket == NULL) - return (NULL); - - p = (caddr_t)kmem_malloc(mb_list->ml_map, mb_list->ml_objsize * - mb_list->ml_objbucks, MBTOM(how)); - if (p == NULL) { - free(bucket, M_MBUF); - if (how == M_TRYWAIT) - mb_list->ml_mapfull = 1; - return (NULL); - } - - bucket->mb_numfree = 0; - mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket; - for (i = 0; i < mb_list->ml_objbucks; i++) { - bucket->mb_free[i] = p; - bucket->mb_numfree++; - p += mb_list->ml_objsize; - } - - MB_LOCK_CONT(cnt_lst); - bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; - SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist); - (*(cnt_lst->mb_cont.mc_numbucks))++; - *(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree; - - return (bucket); -} - -/* - * Allocate a network buffer. - * The general case is very easy. Complications only arise if our PCPU - * container is empty. Things get worse if the PCPU container is empty, - * the general container is empty, and we've run out of address space - * in our map; then we try to block if we're willing to (M_TRYWAIT). - */ -static -void * -mb_alloc(struct mb_lstmngr *mb_list, int how, short type, short persist, - int *pers_list) -{ - static int last_report; - struct mb_pcpu_list *cnt_lst; - struct mb_bucket *bucket; - void *m; - -#ifdef INVARIANTS - int flags; - - flags = how & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT); - if (flags != M_DONTWAIT && flags != M_TRYWAIT) { - static struct timeval lasterr; - static int curerr; - if (ppsratecheck(&lasterr, &curerr, 1)) { - printf("Bad mbuf alloc flags: %x\n", flags); - backtrace(); - how = M_TRYWAIT; - } - } -#endif - - m = NULL; - if ((persist & MBP_PERSISTENT) != 0) { - /* - * If we're a "persistent" call, then the per-CPU #(pers_list) - * cache lock is already held, and we just need to refer to - * the correct cache descriptor. - */ - cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, *pers_list); - } else { - cnt_lst = MB_GET_PCPU_LIST(mb_list); - MB_LOCK_CONT(cnt_lst); - } - - if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) { - /* - * This is the easy allocation case. We just grab an object - * from a bucket in the PCPU container. At worst, we - * have just emptied the bucket and so we remove it - * from the container. - */ - MB_GET_OBJECT(m, bucket, cnt_lst); - MB_MBTYPES_INC(cnt_lst, type, 1); - - /* If asked to persist, do not drop the lock. */ - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(cnt_lst); - else - *pers_list = cnt_lst->mb_cont.mc_numowner; - } else { - struct mb_gen_list *gen_list; - - /* - * This is the less-common more difficult case. We must - * first verify if the general list has anything for us - * and if that also fails, we must allocate a page from - * the map and create a new bucket to place in our PCPU - * container (already locked). If the map is starved then - * we're really in for trouble, as we have to wait on - * the general container's condition variable. - */ - gen_list = MB_GET_GEN_LIST(mb_list); - MB_LOCK_CONT(gen_list); - - if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) - != NULL) { - /* - * Give ownership of the bucket to our CPU's - * container, but only actually put the bucket - * in the container if it doesn't become free - * upon removing an mbuf from it. - */ - SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead), - mb_blist); - bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; - (*(gen_list->mb_cont.mc_numbucks))--; - (*(cnt_lst->mb_cont.mc_numbucks))++; - *(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree; - bucket->mb_numfree--; - m = bucket->mb_free[(bucket->mb_numfree)]; - if (bucket->mb_numfree == 0) { - SLIST_NEXT(bucket, mb_blist) = NULL; - bucket->mb_owner |= MB_BUCKET_FREE; - } else { - SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), - bucket, mb_blist); - *(cnt_lst->mb_cont.mc_objcount) += - bucket->mb_numfree; - } - MB_UNLOCK_CONT(gen_list); - MB_MBTYPES_INC(cnt_lst, type, 1); - - /* If asked to persist, do not drop the lock. */ - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(cnt_lst); - else - *pers_list = cnt_lst->mb_cont.mc_numowner; - } else { - /* - * We'll have to allocate a new page. - */ - MB_UNLOCK_CONT(gen_list); - bucket = mb_pop_cont(mb_list, how, cnt_lst); - if (bucket != NULL) { - MB_GET_OBJECT(m, bucket, cnt_lst); - MB_MBTYPES_INC(cnt_lst, type, 1); - - /* If asked to persist, do not drop the lock. */ - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(cnt_lst); - else - *pers_list=cnt_lst->mb_cont.mc_numowner; - } else { - if (how == M_TRYWAIT) { - /* - * Absolute worst-case scenario. - * We block if we're willing to, but - * only after trying to steal from - * other lists. - */ - m = mb_alloc_wait(mb_list, type); - } else { - /* XXX: No consistency. */ - mbstat.m_drops++; - - if (ticks < last_report || - (ticks - last_report) >= hz) { - last_report = ticks; - printf( -"All mbufs or mbuf clusters exhausted, please see tuning(7).\n"); - } - - } - if (m != NULL && (persist & MBP_PERSIST) != 0) { - cnt_lst = MB_GET_PCPU_LIST(mb_list); - MB_LOCK_CONT(cnt_lst); - *pers_list=cnt_lst->mb_cont.mc_numowner; - } - } - } - } - - return (m); -} - -/* - * This is the worst-case scenario called only if we're allocating with - * M_TRYWAIT. We first drain all the protocols, then try to find an mbuf - * by looking in every PCPU container. If we're still unsuccesful, we - * try the general container one last time and possibly block on our - * starved cv. - */ -static void * -mb_alloc_wait(struct mb_lstmngr *mb_list, short type) -{ - struct mb_pcpu_list *cnt_lst; - struct mb_gen_list *gen_list; - struct mb_bucket *bucket; - void *m; - int i, cv_ret; - - /* - * Try to reclaim mbuf-related objects (mbufs, clusters). - */ - mb_reclaim(); - - /* - * Cycle all the PCPU containers. Increment starved counts if found - * empty. - */ - for (i = 0; i < NCPU; i++) { - if (CPU_ABSENT(i)) - continue; - cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i); - MB_LOCK_CONT(cnt_lst); - - /* - * If container is non-empty, get a single object from it. - * If empty, increment starved count. - */ - if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != - NULL) { - MB_GET_OBJECT(m, bucket, cnt_lst); - MB_MBTYPES_INC(cnt_lst, type, 1); - MB_UNLOCK_CONT(cnt_lst); - mbstat.m_wait++; /* XXX: No consistency. */ - return (m); - } else - cnt_lst->mb_cont.mc_starved++; - - MB_UNLOCK_CONT(cnt_lst); - } - - /* - * We're still here, so that means it's time to get the general - * container lock, check it one more time (now that mb_reclaim() - * has been called) and if we still get nothing, block on the cv. - */ - gen_list = MB_GET_GEN_LIST(mb_list); - MB_LOCK_CONT(gen_list); - if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) { - MB_GET_OBJECT(m, bucket, gen_list); - MB_MBTYPES_INC(gen_list, type, 1); - MB_UNLOCK_CONT(gen_list); - mbstat.m_wait++; /* XXX: No consistency. */ - return (m); - } - - gen_list->mb_cont.mc_starved++; - cv_ret = cv_timedwait(&(gen_list->mgl_mstarved), - gen_list->mb_cont.mc_lock, mbuf_wait); - gen_list->mb_cont.mc_starved--; - - if ((cv_ret == 0) && - ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) { - MB_GET_OBJECT(m, bucket, gen_list); - MB_MBTYPES_INC(gen_list, type, 1); - mbstat.m_wait++; /* XXX: No consistency. */ - } else { - mbstat.m_drops++; /* XXX: No consistency. */ - m = NULL; - } - - MB_UNLOCK_CONT(gen_list); - - return (m); -} - -/*- - * Free an object to its rightful container. - * In the very general case, this operation is really very easy. - * Complications arise primarily if: - * (a) We've hit the high limit on number of free objects allowed in - * our PCPU container. - * (b) We're in a critical situation where our container has been - * marked 'starved' and we need to issue wakeups on the starved - * condition variable. - * (c) Minor (odd) cases: our bucket has migrated while we were - * waiting for the lock; our bucket is in the general container; - * our bucket is empty. - */ -static -void -mb_free(struct mb_lstmngr *mb_list, void *m, short type, short persist, - int *pers_list) -{ - struct mb_pcpu_list *cnt_lst; - struct mb_gen_list *gen_list; - struct mb_bucket *bucket; - u_int owner; - - bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)]; - - /* - * Make sure that if after we lock the bucket's present container the - * bucket has migrated, that we drop the lock and get the new one. - */ -retry_lock: - owner = bucket->mb_owner & ~MB_BUCKET_FREE; - switch (owner) { - case MB_GENLIST_OWNER: - gen_list = MB_GET_GEN_LIST(mb_list); - if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) { - if (*pers_list != MB_GENLIST_OWNER) { - cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, - *pers_list); - MB_UNLOCK_CONT(cnt_lst); - MB_LOCK_CONT(gen_list); - } - } else { - MB_LOCK_CONT(gen_list); - } - if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) { - MB_UNLOCK_CONT(gen_list); - *pers_list = -1; - goto retry_lock; - } - - /* - * If we're intended for the general container, this is - * real easy: no migrating required. The only `bogon' - * is that we're now contending with all the threads - * dealing with the general list, but this is expected. - */ - MB_PUT_OBJECT(m, bucket, gen_list); - MB_MBTYPES_DEC(gen_list, type, 1); - if (bucket->mb_owner & MB_BUCKET_FREE) { - SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead), - bucket, mb_blist); - bucket->mb_owner = MB_GENLIST_OWNER; - } - if (gen_list->mb_cont.mc_starved > 0) - cv_signal(&(gen_list->mgl_mstarved)); - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(gen_list); - else - *pers_list = MB_GENLIST_OWNER; - break; - - default: - cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner); - if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) { - if (*pers_list == MB_GENLIST_OWNER) { - gen_list = MB_GET_GEN_LIST(mb_list); - MB_UNLOCK_CONT(gen_list); - MB_LOCK_CONT(cnt_lst); - } else { - cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, - *pers_list); - owner = *pers_list; - } - } else { - MB_LOCK_CONT(cnt_lst); - } - if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) { - MB_UNLOCK_CONT(cnt_lst); - *pers_list = -1; - goto retry_lock; - } - - MB_PUT_OBJECT(m, bucket, cnt_lst); - MB_MBTYPES_DEC(cnt_lst, type, 1); - if ((*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) || - (cnt_lst->mb_cont.mc_starved > 0)) { - /* - * We've hit the high limit of allowed numbers of mbufs - * on this PCPU list or we've been flagged that we need - * to transfer a bucket over to the general cache. - * We must now migrate a bucket over to the general - * container. - */ - gen_list = MB_GET_GEN_LIST(mb_list); - MB_LOCK_CONT(gen_list); - if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) { - bucket = - SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead)); - SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead), - mb_blist); - } - SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead), - bucket, mb_blist); - bucket->mb_owner = MB_GENLIST_OWNER; - *(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree; - *(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree; - (*(cnt_lst->mb_cont.mc_numbucks))--; - (*(gen_list->mb_cont.mc_numbucks))++; - - /* - * While we're at it, transfer some of the mbtypes - * "count load" onto the general list's mbtypes - * array, seeing as how we're moving the bucket - * there now, meaning that the freeing of objects - * there will now decrement the _general list's_ - * mbtypes counters, and no longer our PCPU list's - * mbtypes counters. We do this for the type presently - * being freed in an effort to keep the mbtypes - * counters approximately balanced across all lists. - */ - MB_MBTYPES_DEC(cnt_lst, type, - mb_list->ml_objbucks - bucket->mb_numfree); - MB_MBTYPES_INC(gen_list, type, - mb_list->ml_objbucks - bucket->mb_numfree); - - if (cnt_lst->mb_cont.mc_starved > 0) { - /* - * Determine whether or not to keep - * transferring buckets to the general list - * or whether we've transferred enough already. - * The thread that is blocked may end up waking - * up in the meantime, but transferring an - * extra bucket in a constrained situation - * is not so bad, as we're likely to need - * it soon anyway. - */ - if (gen_list->mb_cont.mc_starved > 0) { - cnt_lst->mb_cont.mc_starved--; - cv_signal(&(gen_list->mgl_mstarved)); - } else - cnt_lst->mb_cont.mc_starved = 0; - } - MB_UNLOCK_CONT(gen_list); - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(cnt_lst); - else - *pers_list = owner; - break; - } - - if (bucket->mb_owner & MB_BUCKET_FREE) { - SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), - bucket, mb_blist); - bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; - } - - if ((persist & MBP_PERSIST) == 0) - MB_UNLOCK_CONT(cnt_lst); - else - *pers_list = owner; - break; - } -} - -/* - * Drain protocols in hopes to free up some resources. - * - * LOCKING NOTES: - * No locks should be held when this is called. The drain routines have to - * presently acquire some locks which raises the possibility of lock order - * violation if we're holding any mutex if that mutex is acquired in reverse - * order relative to one of the locks in the drain routines. - */ -static void -mb_reclaim(void) -{ - struct domain *dp; - struct protosw *pr; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, - "mb_reclaim()"); - - mbstat.m_drain++; /* XXX: No consistency. */ - - for (dp = domains; dp != NULL; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain != NULL) - (*pr->pr_drain)(); -} - -/****************************************************************************** - * Internal setup macros. - */ - -#define _mb_setup(m, type) do { \ - (m)->m_type = (type); \ - (m)->m_next = NULL; \ - (m)->m_nextpkt = NULL; \ - (m)->m_data = (m)->m_dat; \ - (m)->m_flags = 0; \ -} while (0) - -#define _mbhdr_setup(m, type) do { \ - (m)->m_type = (type); \ - (m)->m_next = NULL; \ - (m)->m_nextpkt = NULL; \ - (m)->m_data = (m)->m_pktdat; \ - (m)->m_flags = M_PKTHDR; \ - (m)->m_pkthdr.rcvif = NULL; \ - (m)->m_pkthdr.csum_flags = 0; \ - SLIST_INIT(&(m)->m_pkthdr.tags); \ -} while (0) - -#define _mcl_setup(m) do { \ - (m)->m_data = (m)->m_ext.ext_buf; \ - (m)->m_flags |= M_EXT; \ - (m)->m_ext.ext_free = NULL; \ - (m)->m_ext.ext_args = NULL; \ - (m)->m_ext.ext_size = MCLBYTES; \ - (m)->m_ext.ext_type = EXT_CLUSTER; \ -} while (0) - -#define _mext_init_ref(m, ref) do { \ - (m)->m_ext.ref_cnt = ((ref) == NULL) ? \ - malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)(ref); \ - if ((m)->m_ext.ref_cnt != NULL) { \ - *((m)->m_ext.ref_cnt) = 0; \ - MEXT_ADD_REF((m)); \ - } \ -} while (0) - -#define cl2ref(cl) \ - (((uintptr_t)(cl) - (uintptr_t)mb_list_clust.ml_mapbase) >> MCLSHIFT) - -#define _mext_dealloc_ref(m) \ - if ((m)->m_ext.ext_type != EXT_EXTREF) \ - free((m)->m_ext.ref_cnt, M_MBUF) - -/****************************************************************************** - * Internal routines. - * - * Because mb_alloc() and mb_free() are inlines (to keep the common - * cases down to a maximum of one function call), below are a few - * routines used only internally for the sole purpose of making certain - * functions smaller. - * - * - _mext_free(): frees associated storage when the ref. count is - * exactly one and we're freeing. - * - * - _mgetm_internal(): common "persistent-lock" routine that allocates - * an mbuf and a cluster in one shot, but where the lock is already - * held coming in (which is what makes it different from the exported - * m_getcl()). The lock is dropped when done. This is used by m_getm() - * and, therefore, is very m_getm()-specific. - */ -static struct mbuf *_mgetm_internal(int, short, short, int); - -void -_mext_free(struct mbuf *mb) -{ - - if (mb->m_ext.ext_type == EXT_CLUSTER) { - mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF, - 0, NULL); - } else { - (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args); - _mext_dealloc_ref(mb); - } -} - -static struct mbuf * -_mgetm_internal(int how, short type, short persist, int cchnum) -{ - struct mbuf *mb; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, persist,&cchnum); - if (mb == NULL) - return NULL; - _mb_setup(mb, type); - - if ((persist & MBP_PERSIST) != 0) { - mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, - how, MT_NOTMBUF, MBP_PERSISTENT, &cchnum); - if (mb->m_ext.ext_buf == NULL) { - (void)m_free(mb); - mb = NULL; - } - _mcl_setup(mb); - _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]); - } - return (mb); -} - -/****************************************************************************** - * Exported buffer allocation and de-allocation routines. - */ - -/* - * Allocate and return a single (normal) mbuf. NULL is returned on failure. - * - * Arguments: - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - */ -struct mbuf * -m_get(int how, short type) -{ - struct mbuf *mb; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL); - if (mb != NULL) - _mb_setup(mb, type); - return (mb); -} - -/* - * Allocate a given length worth of mbufs and/or clusters (whatever fits - * best) and return a pointer to the top of the allocated chain. If an - * existing mbuf chain is provided, then we will append the new chain - * to the existing one but still return the top of the newly allocated - * chain. NULL is returned on failure, in which case the [optional] - * provided chain is left untouched, and any memory already allocated - * is freed. - * - * Arguments: - * - m: existing chain to which to append new chain (optional). - * - len: total length of data to append, either in mbufs or clusters - * (we allocate whatever combination yields the best fit). - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - */ -struct mbuf * -m_getm(struct mbuf *m, int len, int how, short type) -{ - struct mbuf *mb, *top, *cur, *mtail; - int num, rem, cchnum; - short persist; - int i; - - KASSERT(len >= 0, ("m_getm(): len is < 0")); - - /* If m != NULL, we will append to the end of that chain. */ - if (m != NULL) - for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); - else - mtail = NULL; - - /* - * In the best-case scenario (which should be the common case - * unless we're in a starvation situation), we will be able to - * go through the allocation of all the desired mbufs and clusters - * here without dropping our per-CPU cache lock in between. - */ - num = len / MCLBYTES; - rem = len % MCLBYTES; - persist = 0; - cchnum = -1; - top = cur = NULL; - for (i = 0; i < num; i++) { - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, - MBP_PERSIST | persist, &cchnum); - if (mb == NULL) - goto failed; - _mb_setup(mb, type); - mb->m_len = 0; - - persist = (i != (num - 1) || rem > 0) ? MBP_PERSIST : 0; - mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, - how, MT_NOTMBUF, persist | MBP_PERSISTENT, &cchnum); - if (mb->m_ext.ext_buf == NULL) { - (void)m_free(mb); - goto failed; - } - _mcl_setup(mb); - _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]); - persist = MBP_PERSISTENT; - - if (cur == NULL) - top = cur = mb; - else - cur = (cur->m_next = mb); - } - if (rem > 0) { - if (cchnum >= 0) { - persist = MBP_PERSISTENT; - persist |= (rem > MINCLSIZE) ? MBP_PERSIST : 0; - mb = _mgetm_internal(how, type, persist, cchnum); - if (mb == NULL) - goto failed; - } else if (rem > MINCLSIZE) { - mb = m_getcl(how, type, 0); - } else { - mb = m_get(how, type); - } - if (mb != NULL) { - mb->m_len = 0; - if (cur == NULL) - top = mb; - else - cur->m_next = mb; - } else - goto failed; - } - - if (mtail != NULL) - mtail->m_next = top; - return top; -failed: - if (top != NULL) - m_freem(top); - return NULL; -} - -/* - * Allocate and return a single M_PKTHDR mbuf. NULL is returned on failure. - * - * Arguments: - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - */ -struct mbuf * -m_gethdr(int how, short type) -{ - struct mbuf *mb; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL); - if (mb != NULL) { - _mbhdr_setup(mb, type); -#ifdef MAC - if (mac_init_mbuf(mb, MBTOM(how)) != 0) { - m_free(mb); - return (NULL); - } -#endif - } - return (mb); -} - -/* - * Allocate and return a single (normal) pre-zero'd mbuf. NULL is - * returned on failure. - * - * Arguments: - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - */ -struct mbuf * -m_get_clrd(int how, short type) -{ - struct mbuf *mb; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL); - if (mb != NULL) { - _mb_setup(mb, type); - bzero(mtod(mb, caddr_t), MLEN); - } - return (mb); -} - -/* - * Allocate and return a single M_PKTHDR pre-zero'd mbuf. NULL is - * returned on failure. - * - * Arguments: - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - */ -struct mbuf * -m_gethdr_clrd(int how, short type) -{ - struct mbuf *mb; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL); - if (mb != NULL) { - _mbhdr_setup(mb, type); -#ifdef MAC - if (mac_init_mbuf(mb, MBTOM(how)) != 0) { - m_free(mb); - return (NULL); - } -#endif - bzero(mtod(mb, caddr_t), MHLEN); - } - return (mb); -} - -/* - * Free a single mbuf and any associated storage that it may have attached - * to it. The associated storage may not be immediately freed if its - * reference count is above 1. Returns the next mbuf in the chain following - * the mbuf being freed. - * - * Arguments: - * - mb: the mbuf to free. - */ -struct mbuf * -m_free(struct mbuf *mb) -{ - struct mbuf *nb; - int cchnum; - short persist = 0; - -#ifdef INVARIANTS - if (mb->m_flags & M_FREELIST) - panic("m_free detected a mbuf double-free"); - mb->m_flags |= M_FREELIST; -#endif - if ((mb->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(mb, NULL); - nb = mb->m_next; - if ((mb->m_flags & M_EXT) != 0) { - MEXT_REM_REF(mb); - if (atomic_cmpset_int(mb->m_ext.ref_cnt, 0, 1)) { - if (mb->m_ext.ext_type == EXT_CLUSTER) { - mb_free(&mb_list_clust, - (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF, - MBP_PERSIST, &cchnum); - persist = MBP_PERSISTENT; - } else { - (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, - mb->m_ext.ext_args); - _mext_dealloc_ref(mb); - persist = 0; - } - } - } - mb_free(&mb_list_mbuf, mb, mb->m_type, persist, &cchnum); - return (nb); -} - -/* - * Free an entire chain of mbufs and associated external buffers, if - * applicable. Right now, we only optimize a little so that the cache - * lock may be held across a single mbuf+cluster free. Hopefully, - * we'll eventually be holding the lock across more than merely two - * consecutive frees but right now this is hard to implement because of - * things like _mext_dealloc_ref (may do a free()) and atomic ops in the - * loop. - * - * - mb: the mbuf chain to free. - */ -void -m_freem(struct mbuf *mb) -{ - - while (mb != NULL) - mb = m_free(mb); -} - -/* - * Fetch an mbuf with a cluster attached to it. If one of the - * allocations fails, the entire allocation fails. This routine is - * the preferred way of fetching both the mbuf and cluster together, - * as it avoids having to unlock/relock between allocations. Returns - * NULL on failure. - * - * Arguments: - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - * - type: the type of the mbuf being allocated. - * - flags: any flags to pass to the mbuf being allocated; if this includes - * the M_PKTHDR bit, then the mbuf is configured as a M_PKTHDR mbuf. - */ -struct mbuf * -m_getcl(int how, short type, int flags) -{ - struct mbuf *mb; - int cchnum; - - mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, - MBP_PERSIST, &cchnum); - if (mb == NULL) - return NULL; - mb->m_type = type; - mb->m_next = NULL; - mb->m_flags = flags; - if ((flags & M_PKTHDR) != 0) { - mb->m_nextpkt = NULL; - mb->m_pkthdr.rcvif = NULL; - mb->m_pkthdr.csum_flags = 0; - SLIST_INIT(&mb->m_pkthdr.tags); - } - - mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, how, - MT_NOTMBUF, MBP_PERSISTENT, &cchnum); - if (mb->m_ext.ext_buf == NULL) { - (void)m_free(mb); - mb = NULL; - } else { - _mcl_setup(mb); - _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]); -#ifdef MAC - if (flags & M_PKTHDR) { - if (mac_init_mbuf(mb, MBTOM(how)) != 0) { - m_free(mb); - return (NULL); - } - } -#endif - } - return (mb); -} - -/* - * Fetch a single mbuf cluster and attach it to an existing mbuf. If - * successfull, configures the provided mbuf to have mbuf->m_ext.ext_buf - * pointing to the cluster, and sets the M_EXT bit in the mbuf's flags. - * The M_EXT bit is not set on failure. - * - * Arguments: - * - mb: the existing mbuf to which to attach the allocated cluster. - * - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks - * if really starved for memory. M_DONTWAIT to never block. - */ -void -m_clget(struct mbuf *mb, int how) -{ - - mb->m_ext.ext_buf= (caddr_t)mb_alloc(&mb_list_clust,how,MT_NOTMBUF, - 0, NULL); - if (mb->m_ext.ext_buf != NULL) { - _mcl_setup(mb); - _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]); - } -} - -/* - * Configure a provided mbuf to refer to the provided external storage - * buffer and setup a reference count for said buffer. If the setting - * up of the reference count fails, the M_EXT bit will not be set. If - * successfull, the M_EXT bit is set in the mbuf's flags. - * - * Arguments: - * - mb: the existing mbuf to which to attach the provided buffer. - * - buf: the address of the provided external storage buffer. - * - size: the size of the provided buffer. - * - freef: a pointer to a routine that is responsible for freeing the - * provided external storage buffer. - * - args: a pointer to an argument structure (of any type) to be passed - * to the provided freef routine (may be NULL). - * - flags: any other flags to be passed to the provided mbuf. - * - type: the type that the external storage buffer should be labeled with. - */ -void -m_extadd(struct mbuf *mb, caddr_t buf, u_int size, - void (*freef)(void *, void *), void *args, int flags, int type) -{ - u_int *ref_cnt = NULL; - - if (type == EXT_CLUSTER) - ref_cnt = &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]; - else if (type == EXT_EXTREF) - ref_cnt = mb->m_ext.ref_cnt; - _mext_init_ref(mb, ref_cnt); - if (mb->m_ext.ref_cnt != NULL) { - mb->m_flags |= (M_EXT | flags); - mb->m_ext.ext_buf = buf; - mb->m_data = mb->m_ext.ext_buf; - mb->m_ext.ext_size = size; - mb->m_ext.ext_free = freef; - mb->m_ext.ext_args = args; - mb->m_ext.ext_type = type; - } -} - -/* - * Change type of provided mbuf. This is a relatively expensive operation - * (due to the cost of statistics manipulations) and should be avoided, where - * possible. - * - * Arguments: - * - mb: the provided mbuf for which the type needs to be changed. - * - new_type: the new type to change the mbuf to. - */ -void -m_chtype(struct mbuf *mb, short new_type) -{ - struct mb_gen_list *gen_list; - - gen_list = MB_GET_GEN_LIST(&mb_list_mbuf); - MB_LOCK_CONT(gen_list); - MB_MBTYPES_DEC(gen_list, mb->m_type, 1); - MB_MBTYPES_INC(gen_list, new_type, 1); - MB_UNLOCK_CONT(gen_list); - mb->m_type = new_type; -} diff -ruN vendor/src/sys/kern/uipc_mbuf.c mbuma2/src/sys/kern/uipc_mbuf.c --- vendor/src/sys/kern/uipc_mbuf.c Tue May 25 11:49:30 2004 +++ mbuma2/src/sys/kern/uipc_mbuf.c Fri May 28 16:41:12 2004 @@ -86,6 +86,157 @@ #endif /* + * Malloc-type for external ext_buf ref counts. + */ +MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts"); + +/* + * Allocate a given length worth of mbufs and/or clusters (whatever fits + * best) and return a pointer to the top of the allocated chain. If an + * existing mbuf chain is provided, then we will append the new chain + * to the existing one but still return the top of the newly allocated + * chain. + */ +struct mbuf * +m_getm(struct mbuf *m, int len, int how, short type) +{ + struct mbuf *mb, *top, *cur, *mtail; + int num, rem; + int i; + + KASSERT(len >= 0, ("m_getm(): len is < 0")); + + /* If m != NULL, we will append to the end of that chain. */ + if (m != NULL) + for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); + else + mtail = NULL; + + /* + * Calculate how many mbufs+clusters ("packets") we need and how much + * leftover there is after that and allocate the first mbuf+cluster + * if required. + */ + num = len / MCLBYTES; + rem = len % MCLBYTES; + top = cur = NULL; + if (num > 0) { + if ((top = cur = m_getcl(how, type, 0)) == NULL) + goto failed; + } + num--; + top->m_len = 0; + + for (i = 0; i < num; i++) { + mb = m_getcl(how, type, 0); + if (mb == NULL) + goto failed; + mb->m_len = 0; + cur = (cur->m_next = mb); + } + if (rem > 0) { + mb = (rem > MINCLSIZE) ? + m_getcl(how, type, 0) : m_get(how, type); + if (mb == NULL) + goto failed; + mb->m_len = 0; + if (cur == NULL) + top = mb; + else + cur->m_next = mb; + } + + if (mtail != NULL) + mtail->m_next = top; + return top; +failed: + if (top != NULL) + m_freem(top); + return NULL; +} + +/* + * Free an entire chain of mbufs and associated external buffers, if + * applicable. + */ +void +m_freem(struct mbuf *mb) +{ + + while (mb != NULL) + mb = m_free(mb); +} + +/*- + * Configure a provided mbuf to refer to the provided external storage + * buffer and setup a reference count for said buffer. If the setting + * up of the reference count fails, the M_EXT bit will not be set. If + * successfull, the M_EXT bit is set in the mbuf's flags. + * + * Arguments: + * mb The existing mbuf to which to attach the provided buffer. + * buf The address of the provided external storage buffer. + * size The size of the provided buffer. + * freef A pointer to a routine that is responsible for freeing the + * provided external storage buffer. + * args A pointer to an argument structure (of any type) to be passed + * to the provided freef routine (may be NULL). + * flags Any other flags to be passed to the provided mbuf. + * type The type that the external storage buffer should be + * labeled with. + * + * Returns: + * Nothing. + */ +void +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *args, int flags, int type) +{ + u_int *ref_cnt = NULL; + + /* XXX Shouldn't be adding EXT_CLUSTER with this API */ + if (type == EXT_CLUSTER) + ref_cnt = (u_int *)uma_find_refcnt(zone_clust, + mb->m_ext.ext_buf); + else if (type == EXT_EXTREF) + ref_cnt = mb->m_ext.ref_cnt; + mb->m_ext.ref_cnt = (ref_cnt == NULL) ? + malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt; + if (mb->m_ext.ref_cnt != NULL) { + *(mb->m_ext.ref_cnt) = 1; + mb->m_flags |= (M_EXT | flags); + mb->m_ext.ext_buf = buf; + mb->m_data = mb->m_ext.ext_buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_args = args; + mb->m_ext.ext_type = type; + } +} + +/* + * Non-directly-exported function to clean up after mbufs with M_EXT + * storage attached to them if the reference count hits 0. + */ +void +mb_free_ext(struct mbuf *m) +{ + + MEXT_REM_REF(m); + if (atomic_cmpset_int(m->m_ext.ref_cnt, 0, 1)) { + if (m->m_ext.ext_type == EXT_CLUSTER) { + uma_zfree(zone_pack, m); + return; + } + (*(m->m_ext.ext_free))(m->m_ext.ext_buf, + m->m_ext.ext_args); + if (m->m_ext.ext_type != EXT_EXTREF) + free(m->m_ext.ref_cnt, M_MBUF); + } + uma_zfree(zone_mbuf, m); +} + +/* * "Move" mbuf pkthdr from "from" to "to". * "from" must have M_PKTHDR set, and "to" must be empty. */ @@ -364,22 +515,22 @@ struct mbuf *n; /* Get the next new mbuf */ - MGET(n, how, m->m_type); + if (remain >= MINCLSIZE) { + n = m_getcl(how, m->m_type, 0); + nsize = MCLBYTES; + } else { + n = m_get(how, m->m_type); + nsize = MLEN; + } if (n == NULL) goto nospace; - if (top == NULL) { /* first one, must be PKTHDR */ - if (!m_dup_pkthdr(n, m, how)) - goto nospace; - nsize = MHLEN; - } else /* not the first one */ - nsize = MLEN; - if (remain >= MINCLSIZE) { - MCLGET(n, how); - if ((n->m_flags & M_EXT) == 0) { - (void)m_free(n); + + if (top == NULL) { /* First one, must be PKTHDR */ + if (!m_dup_pkthdr(n, m, how)) { + m_free(n); goto nospace; } - nsize = MCLBYTES; + nsize = MHLEN; } n->m_len = 0; @@ -651,39 +802,42 @@ void (*copy)(char *from, caddr_t to, u_int len)) { struct mbuf *m; - struct mbuf *top = 0, **mp = ⊤ + struct mbuf *top = NULL, **mp = ⊤ int len; if (off < 0 || off > MHLEN) return (NULL); - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) - return (NULL); - m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.len = totlen; - len = MHLEN; - while (totlen > 0) { - if (top) { - MGET(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - m_freem(top); - return (NULL); - } - len = MLEN; - } - if (totlen + off >= MINCLSIZE) { - MCLGET(m, M_DONTWAIT); - if (m->m_flags & M_EXT) + if (top == NULL) { /* First one, must be PKTHDR */ + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); len = MCLBYTES; + } else { + m = m_gethdr(M_DONTWAIT, MT_DATA); + len = MHLEN; + + /* Place initial small packet/header at end of mbuf */ + if (m && totlen + off + max_linkhdr <= MLEN) { + m->m_data += max_linkhdr; + len -= max_linkhdr; + } + } + if (m == NULL) + return NULL; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = totlen; } else { - /* - * Place initial small packet/header at end of mbuf. - */ - if (top == NULL && totlen + off + max_linkhdr <= len) { - m->m_data += max_linkhdr; - len -= max_linkhdr; + if (totlen + off >= MINCLSIZE) { + m = m_getcl(M_DONTWAIT, MT_DATA, 0); + len = MCLBYTES; + } else { + m = m_get(M_DONTWAIT, MT_DATA); + len = MLEN; + } + if (m == NULL) { + m_freem(top); + return NULL; } } if (off) { @@ -722,9 +876,10 @@ off -= mlen; totlen += mlen; if (m->m_next == NULL) { - n = m_get_clrd(M_DONTWAIT, m->m_type); + n = m_get(M_DONTWAIT, m->m_type); if (n == NULL) goto out; + bzero(mtod(n, caddr_t), MLEN); n->m_len = min(MLEN, len + off); m->m_next = n; } diff -ruN vendor/src/sys/kern/uipc_mbuf2.c mbuma2/src/sys/kern/uipc_mbuf2.c --- vendor/src/sys/kern/uipc_mbuf2.c Tue May 25 11:49:30 2004 +++ mbuma2/src/sys/kern/uipc_mbuf2.c Mon May 10 17:09:58 2004 @@ -230,14 +230,10 @@ * now, we need to do the hard way. don't m_copy as there's no room * on both end. */ - MGET(o, M_DONTWAIT, m->m_type); - if (o && len > MLEN) { - MCLGET(o, M_DONTWAIT); - if ((o->m_flags & M_EXT) == 0) { - m_free(o); - o = NULL; - } - } + if (len > MLEN) + o = m_getcl(M_DONTWAIT, m->m_type, 0); + else + o = m_get(M_DONTWAIT, m->m_type); if (!o) { m_freem(m); return NULL; /* ENOBUFS */ @@ -274,29 +270,27 @@ m_dup1(struct mbuf *m, int off, int len, int wait) { struct mbuf *n; - int l; int copyhdr; if (len > MCLBYTES) return NULL; - if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { + if (off == 0 && (m->m_flags & M_PKTHDR) != 0) copyhdr = 1; - MGETHDR(n, wait, m->m_type); - l = MHLEN; - } else { + else copyhdr = 0; - MGET(n, wait, m->m_type); - l = MLEN; - } - if (n && len > l) { - MCLGET(n, wait); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } + if (len >= MINCLSIZE) { + if (copyhdr == 1) + n = m_getcl(wait, m->m_type, M_PKTHDR); + else + n = m_getcl(wait, m->m_type, 0); + } else { + if (copyhdr == 1) + n = m_gethdr(wait, m->m_type); + else + n = m_get(wait, m->m_type); } if (!n) - return NULL; + return NULL; /* ENOBUFS */ if (copyhdr && !m_dup_pkthdr(n, m, wait)) { m_free(n); diff -ruN vendor/src/sys/kern/uipc_socket.c mbuma2/src/sys/kern/uipc_socket.c --- vendor/src/sys/kern/uipc_socket.c Tue May 25 11:49:30 2004 +++ mbuma2/src/sys/kern/uipc_socket.c Tue May 25 17:17:15 2004 @@ -527,8 +527,8 @@ { struct mbuf **mp; struct mbuf *m; - long space, len, resid; - int clen = 0, error, s, dontroute, mlen; + long space, len = 0, resid; + int clen = 0, error, s, dontroute; int atomic = sosendallatonce(so) || top; #ifdef ZERO_COPY_SOCKETS int cow_send; @@ -624,25 +624,23 @@ #ifdef ZERO_COPY_SOCKETS cow_send = 0; #endif /* ZERO_COPY_SOCKETS */ - if (top == 0) { - MGETHDR(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - goto release; - } - mlen = MHLEN; - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { - MGET(m, M_TRYWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - goto release; - } - mlen = MLEN; - } if (resid >= MINCLSIZE) { #ifdef ZERO_COPY_SOCKETS + if (top == NULL) { + MGETHDR(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + goto release; + } + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_TRYWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + goto release; + } + } if (so_zero_copy_send && resid>=PAGE_SIZE && space>=PAGE_SIZE && @@ -654,29 +652,48 @@ cow_send = socow_setup(m, uio); } } - if (!cow_send){ + if (!cow_send) { + MCLGET(m, M_TRYWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m = NULL; + } else { + len = min(min(MCLBYTES, resid), space); + } + } else + len = PAGE_SIZE; +#else /* ZERO_COPY_SOCKETS */ + if (top == NULL) { + m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else + m = m_getcl(M_TRYWAIT, MT_DATA, 0); + len = min(min(MCLBYTES, resid), space); #endif /* ZERO_COPY_SOCKETS */ - MCLGET(m, M_TRYWAIT); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = MCLBYTES; - len = min(min(mlen, resid), space); } else { -#ifdef ZERO_COPY_SOCKETS - len = PAGE_SIZE; - } + if (top == NULL) { + m = m_gethdr(M_TRYWAIT, MT_DATA); + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { -#endif /* ZERO_COPY_SOCKETS */ -nopages: - len = min(min(mlen, resid), space); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); + len = min(min(MHLEN, resid), space); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && m && len < MHLEN) + MH_ALIGN(m, len); + } else { + m = m_get(M_TRYWAIT, MT_DATA); + len = min(min(MLEN, resid), space); + } } + if (m == NULL) { + error = ENOBUFS; + goto release; + } + space -= len; #ifdef ZERO_COPY_SOCKETS if (cow_send) diff -ruN vendor/src/sys/kern/uipc_socket2.c mbuma2/src/sys/kern/uipc_socket2.c --- vendor/src/sys/kern/uipc_socket2.c Tue May 25 11:49:30 2004 +++ mbuma2/src/sys/kern/uipc_socket2.c Thu May 20 10:11:54 2004 @@ -959,15 +959,12 @@ if (CMSG_SPACE((u_int)size) > MCLBYTES) return ((struct mbuf *) NULL); - if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) + if (CMSG_SPACE((u_int)size > MLEN)) + m = m_getcl(M_DONTWAIT, MT_CONTROL, 0); + else + m = m_get(M_DONTWAIT, MT_CONTROL); + if (m == NULL) return ((struct mbuf *) NULL); - if (CMSG_SPACE((u_int)size) > MLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return ((struct mbuf *) NULL); - } - } cp = mtod(m, struct cmsghdr *); m->m_len = 0; KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), diff -ruN vendor/src/sys/kern/uipc_syscalls.c mbuma2/src/sys/kern/uipc_syscalls.c --- vendor/src/sys/kern/uipc_syscalls.c Tue May 25 11:49:30 2004 +++ mbuma2/src/sys/kern/uipc_syscalls.c Mon May 10 17:09:58 2004 @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #ifdef KTRACE @@ -83,6 +84,21 @@ int compat); static int getpeername1(struct thread *td, struct getpeername_args *uap, int compat); + +/* + * NSFBUFS-related variables and associated sysctls + */ +int nsfbufs; +int nsfbufspeak; +int nsfbufsused; + +SYSCTL_DECL(_kern_ipc); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, + "Maximum number of sendfile(2) sf_bufs available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, + "Number of sendfile(2) sf_bufs at peak usage"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, + "Number of sendfile(2) sf_bufs in use"); /* * System call interface to the socket abstraction. diff -ruN vendor/src/sys/sparc64/sparc64/vm_machdep.c mbuma2/src/sys/sparc64/sparc64/vm_machdep.c --- vendor/src/sys/sparc64/sparc64/vm_machdep.c Wed May 26 10:00:09 2004 +++ mbuma2/src/sys/sparc64/sparc64/vm_machdep.c Wed May 26 10:01:24 2004 @@ -86,6 +86,10 @@ #include #include +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) @@ -350,6 +354,9 @@ struct sf_buf *sf_bufs; vm_offset_t sf_base; int i; + + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); SLIST_INIT(&sf_freelist.sf_head); diff -ruN vendor/src/sys/sys/mbuf.h mbuma2/src/sys/sys/mbuf.h --- vendor/src/sys/sys/mbuf.h Fri May 28 10:41:54 2004 +++ mbuma2/src/sys/sys/mbuf.h Fri May 28 10:43:46 2004 @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -33,7 +33,12 @@ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ +/* XXX: These includes suck. Sorry! */ #include +#ifdef _KERNEL +#include +#include +#endif /* * Mbufs are of a single size, MSIZE (sys/param.h), which @@ -57,6 +62,16 @@ */ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) + +/* + * Argument structure passed to UMA routines during mbuf and packet + * allocations. + */ +struct mb_args { + int flags; /* Flags for mbuf being allocated */ + int how; /* How to allocate: M_WAITOK or M_DONTWAIT */ + short type; /* Type of mbuf being allocated */ +}; #endif /* _KERNEL */ /* @@ -223,28 +238,12 @@ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ /* - * Mbuf and cluster allocation statistics PCPU structure. - */ -struct mbpstat { - u_long mb_mbfree; - u_long mb_mbbucks; - u_long mb_clfree; - u_long mb_clbucks; - long mb_mbtypes[MT_NTYPES]; - short mb_active; -}; - -/* * General mbuf allocator statistics structure. - * XXX: Modifications of these are not protected by any mutex locks nor by - * any atomic() manipulations. As a result, we may occasionally lose - * a count or two. Luckily, not all of these fields are modified at all - * and remain static, and those that are manipulated are only manipulated - * in failure situations, which do not occur (hopefully) very often. */ struct mbstat { - u_long m_drops; /* times failed to allocate */ - u_long m_wait; /* times succesfully returned from wait */ + u_long m_mbufs; /* XXX */ + u_long m_mclusts; /* XXX */ + u_long m_drain; /* times drained protocols for space */ u_long m_mcfail; /* XXX: times m_copym failed */ u_long m_mpfail; /* XXX: times m_pullup failed */ @@ -253,10 +252,10 @@ u_long m_minclsize; /* min length of data to allocate a cluster */ u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ - u_int m_mbperbuck; /* number of mbufs per "bucket" */ - u_int m_clperbuck; /* number of clusters per "bucket" */ - /* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */ + + /* Number of mbtypes (gives # elems in mbtypes[] array: */ short m_numtypes; + /* XXX: Sendfile stats should eventually move to their own struct */ u_long sf_iocnt; /* times sendfile had to do disk I/O */ u_long sf_allocfail; /* times sfbuf allocation failed */ @@ -265,14 +264,23 @@ /* * Flags specifying how an allocation should be made. - * M_DONTWAIT means "don't block if nothing is available" whereas - * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is - * available." - */ -#define M_DONTWAIT 0x4 /* don't conflict with M_NOWAIT */ -#define M_TRYWAIT 0x8 /* or M_WAITOK */ -#define M_WAIT M_TRYWAIT /* XXX: deprecated */ -#define MBTOM(how) ((how) & M_TRYWAIT ? M_WAITOK : M_NOWAIT) + * + * The flag to use is as follows: + * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation. + * - M_WAIT or M_WAITOK or M_TRYWAIT from wherever it is safe to block. + * + * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly + * and if we cannot allocate immediately we may return NULL, + * whereas M_WAIT/M_WAITOK/M_TRYWAIT means that if we cannot allocate + * resources we will block until they are available, and thus never + * return NULL. + * + * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT. + */ +#define MBTOM(how) (how) +#define M_DONTWAIT M_NOWAIT +#define M_TRYWAIT M_WAITOK +#define M_WAIT M_WAITOK #ifdef _KERNEL /*- @@ -296,12 +304,113 @@ #define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1) /* + * Network buffer allocation API + * + * The rest of it is defined in kern/subr_mbuf.c + */ + +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_clust; +extern uma_zone_t zone_pack; + +static __inline struct mbuf *m_get(int how, short type); +static __inline struct mbuf *m_gethdr(int how, short type); +static __inline struct mbuf *m_getcl(int how, short type, int flags); +static __inline struct mbuf *m_free(struct mbuf *m); +static __inline void m_clget(struct mbuf *m, int how); +static __inline void m_chtype(struct mbuf *m, short new_type); +void mb_free_ext(struct mbuf *); + +static __inline +struct mbuf * +m_get(int how, short type) +{ + struct mb_args args; + + args.flags = 0; + args.how = how; + args.type = type; + return (uma_zalloc_arg(zone_mbuf, &args, how)); +} + +/* XXX This should be depracated, very little use */ +static __inline +struct mbuf * +m_getclr(int how, short type) +{ + struct mbuf *m; + struct mb_args args; + + args.flags = 0; + args.how = how; + args.type = type; + m = uma_zalloc_arg(zone_mbuf, &args, how); + if (m != NULL) + bzero(m->m_data, MLEN); + return m; +} + +static __inline +struct mbuf * +m_gethdr(int how, short type) +{ + struct mb_args args; + + args.flags = M_PKTHDR; + args.how = how; + args.type = type; + return (uma_zalloc_arg(zone_mbuf, &args, how)); +} + +static __inline +struct mbuf * +m_getcl(int how, short type, int flags) +{ + struct mb_args args; + + args.flags = flags; + args.how = how; + args.type = type; + return (uma_zalloc_arg(zone_pack, &args, how)); +} + +static __inline +struct mbuf * +m_free(struct mbuf *m) +{ + struct mbuf *n = m->m_next; + +#ifdef INVARIANTS + m->m_flags |= M_FREELIST; +#endif + if (m->m_flags & M_EXT) + mb_free_ext(m); + else + uma_zfree(zone_mbuf, m); + return n; +} + +static __inline +void +m_clget(struct mbuf *m, int how) +{ + m->m_ext.ext_buf = NULL; + uma_zalloc_arg(zone_clust, m, how); +} + +static __inline +void +m_chtype(struct mbuf *m, short new_type) +{ + m->m_type = new_type; +} + +/* * mbuf, cluster, and external object allocation macros * (for compatibility purposes). */ /* NB: M_COPY_PKTHDR is deprecated. Use M_MOVE_PKTHDR or m_dup_pktdr. */ #define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) -#define m_getclr(how, type) m_get_clrd((how), (type)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) @@ -309,23 +418,6 @@ m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type)) /* - * MEXTFREE(m): disassociate (and possibly free) an external object from (m). - * - * If the atomic_cmpset_int() returns 0, then we effectively do nothing - * in terms of "cleaning up" (freeing the ext buf and ref. counter) as - * this means that either there are still references, or another thread - * is taking care of the clean-up. - */ -#define MEXTFREE(m) do { \ - struct mbuf *_mb = (m); \ - \ - MEXT_REM_REF(_mb); \ - if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \ - _mext_free(_mb); \ - _mb->m_flags &= ~M_EXT; \ -} while (0) - -/* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this * can be both the local data payload, or an external buffer area, * depending on whether M_EXT is set). @@ -425,18 +517,13 @@ extern int max_protohdr; /* Largest protocol header */ extern struct mbstat mbstat; /* General mbuf stats/infos */ extern int nmbclusters; /* Maximum number of clusters */ -extern int nmbcnt; /* Scale kmem_map for counter space */ -extern int nmbufs; /* Maximum number of mbufs */ struct uio; -void _mext_free(struct mbuf *); void m_adj(struct mbuf *, int); int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); void m_cat(struct mbuf *, struct mbuf *); -void m_chtype(struct mbuf *, short); -void m_clget(struct mbuf *, int); void m_extadd(struct mbuf *, caddr_t, u_int, void (*)(void *, void *), void *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); @@ -451,13 +538,7 @@ int m_dup_pkthdr(struct mbuf *, struct mbuf *, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_fragment(struct mbuf *, int, int); -struct mbuf *m_free(struct mbuf *); void m_freem(struct mbuf *); -struct mbuf *m_get(int, short); -struct mbuf *m_get_clrd(int, short); -struct mbuf *m_getcl(int, short, int); -struct mbuf *m_gethdr(int, short); -struct mbuf *m_gethdr_clrd(int, short); struct mbuf *m_getm(struct mbuf *, int, int, short); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); @@ -470,7 +551,7 @@ struct mbuf *m_uiotombuf(struct uio *, int, int); /*- - * Packets may have annotations attached by affixing a list + * Network packets may have annotations attached by affixing a list * of "packet tags" to the pkthdr structure. Packet tags are * dynamically allocated semi-opaque data structures that have * a fixed header (struct m_tag) that specifies the size of the diff -ruN vendor/src/sys/vm/uma.h mbuma2/src/sys/vm/uma.h --- vendor/src/sys/vm/uma.h Tue May 25 11:51:13 2004 +++ mbuma2/src/sys/vm/uma.h Sat Mar 27 10:48:28 2004 @@ -43,7 +43,7 @@ /* Types and type defs */ -struct uma_zone; +struct uma_zone; /* Opaque type used as a handle to the zone */ typedef struct uma_zone * uma_zone_t; @@ -157,12 +157,46 @@ * A pointer to a structure which is intended to be opaque to users of * the interface. The value may be null if the wait flag is not set. */ - uma_zone_t uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, uma_fini fini, int align, u_int16_t flags); /* + * Create a secondary uma zone + * + * Arguments: + * name The text name of the zone for debugging and stats, this memory + * should not be freed until the zone has been deallocated. + * ctor The constructor that is called when the object is allocated + * dtor The destructor that is called when the object is freed. + * zinit An initializer that sets up the initial state of the memory + * as the object passes from the Keg's slab to the Zone's cache. + * zfini A discard function that undoes initialization done by init + * as the object passes from the Zone's cache to the Keg's slab. + * + * ctor/dtor/zinit/zfini may all be null, see notes above. + * Note that the zinit and zfini specified here are NOT + * exactly the same as the init/fini specified to uma_zcreate() + * when creating a master zone. These zinit/zfini are called + * on the TRANSITION from keg to zone (and vice-versa). Once + * these are set, the primary zone may alter its init/fini + * (which are called when the object passes from VM to keg) + * using uma_zone_set_init/fini()) as well as its own + * zinit/zfini (unset by default for master zone) with + * uma_zone_set_zinit/zfini() (note subtle 'z' prefix). + * + * align A bitmask that corisponds to the requested alignment + * eg 4 would be 0x3 + * flags A set of parameters that control the behavior of the zone + * + * Returns: + * A pointer to a structure which is intended to be opaque to users of + * the interface. The value may be null if the wait flag is not set. + */ +uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_zone_t master); + +/* * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to @@ -185,6 +219,9 @@ * Use a hash table instead of caching * information in the vm_page. */ +#define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ +#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ +#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -201,7 +238,6 @@ * zone The zone we want to destroy. * */ - void uma_zdestroy(uma_zone_t zone); /* @@ -376,6 +412,28 @@ void uma_zone_set_max(uma_zone_t zone, int nitems); /* + * The following two routines (uma_zone_set_init/fini) + * are used to set the backend init/fini pair which acts on an + * object as it becomes allocated and is placed in a slab within + * the specified zone's backing keg. These should probably not + * be changed once allocations have already begun and only + * immediately upon zone creation. + */ +void uma_zone_set_init(uma_zone_t zone, uma_init uminit); +void uma_zone_set_fini(uma_zone_t zone, uma_fini fini); + +/* + * The following two routines (uma_zone_set_zinit/zfini) are + * used to set the zinit/zfini pair which acts on an object as + * it passes from the backing Keg's slab cache to the + * specified Zone's bucket cache. These should probably not + * be changed once allocations have already begun and + * only immediately upon zone creation. + */ +void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit); +void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini); + +/* * Replaces the standard page_alloc or obj_alloc functions for this zone * * Arguments: @@ -430,5 +488,19 @@ */ void uma_prealloc(uma_zone_t zone, int itemcnt); +/* + * Used to lookup the reference counter allocated for an item + * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, + * reference counters are allocated for items and stored in + * the underlying slab header. + * + * Arguments: + * zone The UMA_ZONE_REFCNT zone to which the item belongs. + * item The address of the item for which we want a refcnt. + * + * Returns: + * A pointer to a u_int32_t reference counter. + */ +u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item); #endif diff -ruN vendor/src/sys/vm/uma_core.c mbuma2/src/sys/vm/uma_core.c --- vendor/src/sys/vm/uma_core.c Tue May 25 11:51:13 2004 +++ mbuma2/src/sys/vm/uma_core.c Fri May 28 11:49:16 2004 @@ -84,15 +84,19 @@ #include /* - * This is the zone from which all zones are spawned. The idea is that even - * the zone heads are allocated from the allocator, so we use the bss section - * to bootstrap us. - */ -static struct uma_zone masterzone; -static uma_zone_t zones = &masterzone; + * This is the zone and keg from which all zones are spawned. The idea is that + * even the zone & keg heads are allocated from the allocator, so we use the + * bss section to bootstrap us. + */ +static struct uma_keg masterkeg; +static struct uma_zone masterzone_k; +static struct uma_zone masterzone_z; +static uma_zone_t kegs = &masterzone_k; +static uma_zone_t zones = &masterzone_z; /* This is the zone from which all of uma_slab_t's are allocated. */ static uma_zone_t slabzone; +static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ /* * The initial hash tables come out of this zone so they can be allocated @@ -107,10 +111,10 @@ */ static int bucketdisable = 1; -/* Linked list of all zones in the system */ -static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); +/* Linked list of all kegs in the system */ +static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); -/* This mutex protects the zone list */ +/* This mutex protects the keg list */ static struct mtx uma_mtx; /* These are the pcpu cache locks */ @@ -144,6 +148,16 @@ uma_dtor dtor; uma_init uminit; uma_fini fini; + uma_keg_t keg; + int align; + u_int16_t flags; +}; + +struct uma_kctor_args { + uma_zone_t zone; + size_t size; + uma_init uminit; + uma_fini fini; int align; u_int16_t flags; }; @@ -179,6 +193,8 @@ static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); static void bucket_cache_drain(uma_zone_t zone); +static void keg_ctor(void *, int, void *); +static void keg_dtor(void *, int, void *); static void zone_ctor(void *, int, void *); static void zone_dtor(void *, int, void *); static void zero_init(void *, int); @@ -202,6 +218,8 @@ static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); static void zone_drain(uma_zone_t); +static void uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, + uma_fini fini, int align, u_int16_t flags); void uma_print_zone(uma_zone_t); void uma_print_stats(void); @@ -328,10 +346,12 @@ static void zone_timeout(uma_zone_t zone) { + uma_keg_t keg; uma_cache_t cache; u_int64_t alloc; int cpu; + keg = zone->uz_keg; alloc = 0; /* @@ -344,7 +364,7 @@ * to lock and do it here instead so that the statistics don't get too * far out of sync. */ - if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { + if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; @@ -369,8 +389,8 @@ * may be a little aggressive. Should I allow for two collisions max? */ - if (zone->uz_flags & UMA_ZONE_HASH && - zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { + if (keg->uk_flags & UMA_ZONE_HASH && + keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { struct uma_hash newhash; struct uma_hash oldhash; int ret; @@ -381,14 +401,14 @@ * I have to do everything in stages and check for * races. */ - newhash = zone->uz_hash; + newhash = keg->uk_hash; ZONE_UNLOCK(zone); ret = hash_alloc(&newhash); ZONE_LOCK(zone); if (ret) { - if (hash_expand(&zone->uz_hash, &newhash)) { - oldhash = zone->uz_hash; - zone->uz_hash = newhash; + if (hash_expand(&keg->uk_hash, &newhash)) { + oldhash = keg->uk_hash; + keg->uk_hash = newhash; } else oldhash = newhash; @@ -530,7 +550,7 @@ mzone = 0; /* We have to lookup the slab again for malloc.. */ - if (zone->uz_flags & UMA_ZONE_MALLOC) + if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) mzone = 1; while (bucket->ub_cnt > 0) { @@ -636,29 +656,32 @@ zone_drain(uma_zone_t zone) { struct slabhead freeslabs = {}; + uma_keg_t keg; uma_slab_t slab; uma_slab_t n; u_int8_t flags; u_int8_t *mem; int i; + keg = zone->uz_keg; + /* - * We don't want to take pages from staticly allocated zones at this + * We don't want to take pages from statically allocated zones at this * time */ - if (zone->uz_flags & UMA_ZONE_NOFREE || zone->uz_freef == NULL) + if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) return; ZONE_LOCK(zone); #ifdef UMA_DEBUG - printf("%s free items: %u\n", zone->uz_name, zone->uz_free); + printf("%s free items: %u\n", zone->uz_name, keg->uk_free); #endif bucket_cache_drain(zone); - if (zone->uz_free == 0) + if (keg->uk_free == 0) goto finished; - slab = LIST_FIRST(&zone->uz_free_slab); + slab = LIST_FIRST(&keg->uk_free_slab); while (slab) { n = LIST_NEXT(slab, us_link); @@ -669,11 +692,11 @@ } LIST_REMOVE(slab, us_link); - zone->uz_pages -= zone->uz_ppera; - zone->uz_free -= zone->uz_ipers; + keg->uk_pages -= keg->uk_ppera; + keg->uk_free -= keg->uk_ipers; - if (zone->uz_flags & UMA_ZONE_HASH) - UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); @@ -684,34 +707,34 @@ while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); - if (zone->uz_fini) - for (i = 0; i < zone->uz_ipers; i++) - zone->uz_fini( - slab->us_data + (zone->uz_rsize * i), - zone->uz_size); + if (keg->uk_fini) + for (i = 0; i < keg->uk_ipers; i++) + keg->uk_fini( + slab->us_data + (keg->uk_rsize * i), + keg->uk_size); flags = slab->us_flags; mem = slab->us_data; - if (zone->uz_flags & UMA_ZONE_OFFPAGE) - uma_zfree_internal(slabzone, slab, NULL, 0); - if (zone->uz_flags & UMA_ZONE_MALLOC) { + if ((keg->uk_flags & UMA_ZONE_MALLOC) || + (keg->uk_flags & UMA_ZONE_REFCNT)) { vm_object_t obj; if (flags & UMA_SLAB_KMEM) obj = kmem_object; else obj = NULL; - for (i = 0; i < zone->uz_ppera; i++) + for (i = 0; i < keg->uk_ppera; i++) vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), obj); } + if (keg->uk_flags & UMA_ZONE_OFFPAGE) + uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0); #ifdef UMA_DEBUG printf("%s: Returning %d bytes.\n", - zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); + zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); #endif - zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); + keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); } - } /* @@ -728,20 +751,23 @@ static uma_slab_t slab_zalloc(uma_zone_t zone, int wait) { - uma_slab_t slab; /* Starting slab */ + uma_slabrefcnt_t slabref; + uma_slab_t slab; + uma_keg_t keg; u_int8_t *mem; u_int8_t flags; int i; slab = NULL; + keg = zone->uz_keg; #ifdef UMA_DEBUG printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); #endif ZONE_UNLOCK(zone); - if (zone->uz_flags & UMA_ZONE_OFFPAGE) { - slab = uma_zalloc_internal(slabzone, NULL, wait); + if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); if (slab == NULL) { ZONE_LOCK(zone); return NULL; @@ -755,12 +781,12 @@ * Malloced items are zeroed in uma_zalloc. */ - if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0) + if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) wait |= M_ZERO; else wait &= ~M_ZERO; - mem = zone->uz_allocf(zone, zone->uz_ppera * UMA_SLAB_SIZE, + mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); if (mem == NULL) { ZONE_LOCK(zone); @@ -768,32 +794,39 @@ } /* Point the slab into the allocated memory */ - if (!(zone->uz_flags & UMA_ZONE_OFFPAGE)) - slab = (uma_slab_t )(mem + zone->uz_pgoff); + if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) + slab = (uma_slab_t )(mem + keg->uk_pgoff); - if (zone->uz_flags & UMA_ZONE_MALLOC) - for (i = 0; i < zone->uz_ppera; i++) + if ((keg->uk_flags & UMA_ZONE_MALLOC) || + (keg->uk_flags & UMA_ZONE_REFCNT)) + for (i = 0; i < keg->uk_ppera; i++) vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); - slab->us_zone = zone; + slab->us_keg = keg; slab->us_data = mem; - slab->us_freecount = zone->uz_ipers; + slab->us_freecount = keg->uk_ipers; slab->us_firstfree = 0; slab->us_flags = flags; - for (i = 0; i < zone->uz_ipers; i++) - slab->us_freelist[i] = i+1; + for (i = 0; i < keg->uk_ipers; i++) + slab->us_freelist[i].us_item = i+1; + + if (keg->uk_flags & UMA_ZONE_REFCNT) { + slabref = (uma_slabrefcnt_t)slab; + for (i = 0; i < keg->uk_ipers; i++) + slabref->us_freelist[i].us_refcnt = 0; + } - if (zone->uz_init) - for (i = 0; i < zone->uz_ipers; i++) - zone->uz_init(slab->us_data + (zone->uz_rsize * i), - zone->uz_size); + if (keg->uk_init) + for (i = 0; i < keg->uk_ipers; i++) + keg->uk_init(slab->us_data + (keg->uk_rsize * i), + keg->uk_size); ZONE_LOCK(zone); - if (zone->uz_flags & UMA_ZONE_HASH) - UMA_HASH_INSERT(&zone->uz_hash, slab, mem); + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_INSERT(&keg->uk_hash, slab, mem); - zone->uz_pages += zone->uz_ppera; - zone->uz_free += zone->uz_ipers; + keg->uk_pages += keg->uk_ppera; + keg->uk_free += keg->uk_ipers; return (slab); } @@ -806,6 +839,10 @@ static void * startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) { + uma_keg_t keg; + + keg = zone->uz_keg; + /* * Check our small startup cache to see if it has pages remaining. */ @@ -827,11 +864,11 @@ * Now that we've booted reset these users to their real allocator. */ #ifdef UMA_MD_SMALL_ALLOC - zone->uz_allocf = uma_small_alloc; + keg->uk_allocf = uma_small_alloc; #else - zone->uz_allocf = page_alloc; + keg->uk_allocf = page_alloc; #endif - return zone->uz_allocf(zone, bytes, pflag, wait); + return keg->uk_allocf(zone, bytes, pflag, wait); } /* @@ -877,7 +914,7 @@ vm_page_t p; int pages, startpages; - object = zone->uz_obj; + object = zone->uz_keg->uk_obj; retkva = 0; /* @@ -887,7 +924,7 @@ p = TAILQ_LAST(&object->memq, pglist); pages = p != NULL ? p->pindex + 1 : 0; startpages = pages; - zkva = zone->uz_kva + pages * PAGE_SIZE; + zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; for (; bytes > 0; bytes -= PAGE_SIZE) { p = vm_page_alloc(object, pages, VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); @@ -965,29 +1002,33 @@ static void zone_small_init(uma_zone_t zone) { + uma_keg_t keg; int rsize; int memused; int ipers; - rsize = zone->uz_size; + keg = zone->uz_keg; + KASSERT(keg != NULL, ("Keg is null in zone_small_init")); + rsize = keg->uk_size; if (rsize < UMA_SMALLEST_UNIT) rsize = UMA_SMALLEST_UNIT; - if (rsize & zone->uz_align) - rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); + if (rsize & keg->uk_align) + rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); - zone->uz_rsize = rsize; + keg->uk_rsize = rsize; rsize += 1; /* Account for the byte of linkage */ - zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; - zone->uz_ppera = 1; + keg->uk_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; + keg->uk_ppera = 1; - KASSERT(zone->uz_ipers != 0, ("zone_small_init: ipers is 0, uh-oh!")); - memused = zone->uz_ipers * zone->uz_rsize; + KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0, uh-oh!")); + memused = keg->uk_ipers * keg->uk_rsize; /* Can we do any better? */ - if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { + if ((keg->uk_flags & UMA_ZONE_REFCNT) || + ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE)) { /* * We can't do this if we're internal or if we've been * asked to not go to the VM for buckets. If we do this we @@ -995,15 +1036,16 @@ * do not want to do if we're UMA_ZFLAG_CACHEONLY as a * result of UMA_ZONE_VM, which clearly forbids it. */ - if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) || - (zone->uz_flags & UMA_ZFLAG_CACHEONLY)) + if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || + (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) return; - ipers = UMA_SLAB_SIZE / zone->uz_rsize; - if (ipers > zone->uz_ipers) { - zone->uz_flags |= UMA_ZONE_OFFPAGE; - if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0) - zone->uz_flags |= UMA_ZONE_HASH; - zone->uz_ipers = ipers; + ipers = UMA_SLAB_SIZE / keg->uk_rsize; + if ((keg->uk_flags & UMA_ZONE_REFCNT) || + (ipers > keg->uk_ipers)) { + keg->uk_flags |= UMA_ZONE_OFFPAGE; + if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) + keg->uk_flags |= UMA_ZONE_HASH; + keg->uk_ipers = ipers; } } } @@ -1022,179 +1064,288 @@ static void zone_large_init(uma_zone_t zone) { + uma_keg_t keg; int pages; - KASSERT((zone->uz_flags & UMA_ZFLAG_CACHEONLY) == 0, + keg = zone->uz_keg; + + KASSERT(keg != NULL, ("Keg is null in zone_large_init")); + KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); - pages = zone->uz_size / UMA_SLAB_SIZE; + pages = keg->uk_size / UMA_SLAB_SIZE; /* Account for remainder */ - if ((pages * UMA_SLAB_SIZE) < zone->uz_size) + if ((pages * UMA_SLAB_SIZE) < keg->uk_size) pages++; - zone->uz_ppera = pages; - zone->uz_ipers = 1; + keg->uk_ppera = pages; + keg->uk_ipers = 1; - zone->uz_flags |= UMA_ZONE_OFFPAGE; - if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0) - zone->uz_flags |= UMA_ZONE_HASH; + keg->uk_flags |= UMA_ZONE_OFFPAGE; + if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) + keg->uk_flags |= UMA_ZONE_HASH; - zone->uz_rsize = zone->uz_size; + keg->uk_rsize = keg->uk_size; } /* - * Zone header ctor. This initializes all fields, locks, etc. And inserts - * the zone onto the global zone list. + * Keg header ctor. This initializes all fields, locks, etc. And inserts + * the keg onto the global keg list. * * Arguments/Returns follow uma_ctor specifications - * udata Actually uma_zcreat_args + * udata Actually uma_kctor_args */ - static void -zone_ctor(void *mem, int size, void *udata) +keg_ctor(void *mem, int size, void *udata) { - struct uma_zctor_args *arg = udata; - uma_zone_t zone = mem; - int privlc; + struct uma_kctor_args *arg = udata; + uma_keg_t keg = mem; + uma_zone_t zone; - bzero(zone, size); - zone->uz_name = arg->name; - zone->uz_size = arg->size; - zone->uz_ctor = arg->ctor; - zone->uz_dtor = arg->dtor; - zone->uz_init = arg->uminit; - zone->uz_fini = arg->fini; - zone->uz_align = arg->align; - zone->uz_free = 0; - zone->uz_pages = 0; - zone->uz_flags = arg->flags; - zone->uz_allocf = page_alloc; - zone->uz_freef = page_free; + bzero(keg, size); + keg->uk_size = arg->size; + keg->uk_init = arg->uminit; + keg->uk_fini = arg->fini; + keg->uk_align = arg->align; + keg->uk_free = 0; + keg->uk_pages = 0; + keg->uk_flags = arg->flags; + keg->uk_allocf = page_alloc; + keg->uk_freef = page_free; + keg->uk_recurse = 0; + keg->uk_slabzone = NULL; - if (arg->flags & UMA_ZONE_ZINIT) - zone->uz_init = zero_init; + /* + * The master zone is passed to us at keg-creation time. + */ + zone = arg->zone; + zone->uz_keg = keg; if (arg->flags & UMA_ZONE_VM) - zone->uz_flags |= UMA_ZFLAG_CACHEONLY; + keg->uk_flags |= UMA_ZFLAG_CACHEONLY; + + if (arg->flags & UMA_ZONE_ZINIT) + keg->uk_init = zero_init; /* - * XXX: - * The +1 byte added to uz_size is to account for the byte of + * The +1 byte added to uk_size is to account for the byte of * linkage that is added to the size in zone_small_init(). If * we don't account for this here then we may end up in * zone_small_init() with a calculated 'ipers' of 0. */ - if ((zone->uz_size+1) > (UMA_SLAB_SIZE - sizeof(struct uma_slab))) + if ((keg->uk_size+1) > (UMA_SLAB_SIZE - sizeof(struct uma_slab))) zone_large_init(zone); else zone_small_init(zone); + + if (keg->uk_flags & UMA_ZONE_REFCNT) + keg->uk_slabzone = slabrefzone; + else if (keg->uk_flags & UMA_ZONE_OFFPAGE) + keg->uk_slabzone = slabzone; + /* * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. */ - if (zone->uz_ppera == 1) { + if (keg->uk_ppera == 1) { #ifdef UMA_MD_SMALL_ALLOC - zone->uz_allocf = uma_small_alloc; - zone->uz_freef = uma_small_free; + keg->uk_allocf = uma_small_alloc; + keg->uk_freef = uma_small_free; #endif if (booted == 0) - zone->uz_allocf = startup_alloc; + keg->uk_allocf = startup_alloc; } + + /* + * Initialize keg's lock (shared among zones) through + * Master zone + */ + zone->uz_lock = &keg->uk_lock; if (arg->flags & UMA_ZONE_MTXCLASS) - privlc = 1; + ZONE_LOCK_INIT(zone, 1); else - privlc = 0; + ZONE_LOCK_INIT(zone, 0); /* * If we're putting the slab header in the actual page we need to * figure out where in each page it goes. This calculates a right * justified offset into the memory on an ALIGN_PTR boundary. */ - if (!(zone->uz_flags & UMA_ZONE_OFFPAGE)) { + if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { int totsize; /* Size of the slab struct and free list */ - totsize = sizeof(struct uma_slab) + zone->uz_ipers; + totsize = sizeof(struct uma_slab) + keg->uk_ipers; if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); - zone->uz_pgoff = UMA_SLAB_SIZE - totsize; - totsize = zone->uz_pgoff + sizeof(struct uma_slab) - + zone->uz_ipers; + keg->uk_pgoff = UMA_SLAB_SIZE - totsize; + totsize = keg->uk_pgoff + sizeof(struct uma_slab) + + keg->uk_ipers; /* I don't think it's possible, but I'll make sure anyway */ if (totsize > UMA_SLAB_SIZE) { printf("zone %s ipers %d rsize %d size %d\n", - zone->uz_name, zone->uz_ipers, zone->uz_rsize, - zone->uz_size); + zone->uz_name, keg->uk_ipers, keg->uk_rsize, + keg->uk_size); panic("UMA slab won't fit.\n"); } } - if (zone->uz_flags & UMA_ZONE_HASH) - hash_alloc(&zone->uz_hash); + if (keg->uk_flags & UMA_ZONE_HASH) + hash_alloc(&keg->uk_hash); #ifdef UMA_DEBUG printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", zone->uz_name, zone, - zone->uz_size, zone->uz_ipers, - zone->uz_ppera, zone->uz_pgoff); + keg->uk_size, keg->uk_ipers, + keg->uk_ppera, keg->uk_pgoff); #endif - ZONE_LOCK_INIT(zone, privlc); + + LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); mtx_lock(&uma_mtx); - LIST_INSERT_HEAD(&uma_zones, zone, uz_link); + LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); mtx_unlock(&uma_mtx); +} + +/* + * Zone header ctor. This initializes all fields, locks, etc. + * + * Arguments/Returns follow uma_ctor specifications + * udata Actually uma_zctor_args + */ + +static void +zone_ctor(void *mem, int size, void *udata) +{ + struct uma_zctor_args *arg = udata; + uma_zone_t zone = mem; + uma_zone_t z; + uma_keg_t keg; + + bzero(zone, size); + zone->uz_name = arg->name; + zone->uz_ctor = arg->ctor; + zone->uz_dtor = arg->dtor; + zone->uz_init = NULL; + zone->uz_fini = NULL; + zone->uz_allocs = 0; + zone->uz_fills = zone->uz_count = 0; + + if (arg->flags & UMA_ZONE_SECONDARY) { + KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); + keg = arg->keg; + zone->uz_keg = keg; + zone->uz_init = arg->uminit; + zone->uz_fini = arg->fini; + zone->uz_lock = &keg->uk_lock; + mtx_lock(&uma_mtx); + ZONE_LOCK(zone); + LIST_FOREACH(z, &keg->uk_zones, uz_link) { + if (LIST_NEXT(z, uz_link) == NULL) { + LIST_INSERT_AFTER(z, zone, uz_link); + break; + } + } + ZONE_UNLOCK(zone); + mtx_unlock(&uma_mtx); + } else if (arg->keg == NULL) { + uma_kcreate(zone, arg->size, arg->uminit, arg->fini, + arg->align, arg->flags); + } else { + struct uma_kctor_args karg; + + /* We should only be here from uma_startup() */ + karg.size = arg->size; + karg.uminit = arg->uminit; + karg.fini = arg->fini; + karg.align = arg->align; + karg.flags = arg->flags; + karg.zone = zone; + keg_ctor(arg->keg, sizeof(struct uma_keg), &karg); + } + keg = zone->uz_keg; + zone->uz_lock = &keg->uk_lock; /* * Some internal zones don't have room allocated for the per cpu * caches. If we're internal, bail out here. */ - if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { + KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, + ("Secondary zone requested UMA_ZFLAG_INTERNAL")); return; + } - if (zone->uz_ipers <= BUCKET_MAX) - zone->uz_count = zone->uz_ipers; + if (keg->uk_flags & UMA_ZONE_MAXBUCKET) + zone->uz_count = BUCKET_MAX; + else if (keg->uk_ipers <= BUCKET_MAX) + zone->uz_count = keg->uk_ipers; else zone->uz_count = BUCKET_MAX; } /* - * Zone header dtor. This frees all data, destroys locks, frees the hash table - * and removes the zone from the global list. + * Keg header dtor. This frees all data, destroys locks, frees the hash + * table and removes the keg from the global list. * * Arguments/Returns follow uma_dtor specifications * udata unused */ +static void +keg_dtor(void *arg, int size, void *udata) +{ + uma_keg_t keg; + keg = (uma_keg_t)arg; + mtx_lock(&keg->uk_lock); + if (keg->uk_free != 0) { + printf("Freed UMA keg was not empty (%d items). " + " Lost %d pages of memory.\n", + keg->uk_free, keg->uk_pages); + } + mtx_unlock(&keg->uk_lock); + + if (keg->uk_flags & UMA_ZONE_HASH) + hash_free(&keg->uk_hash); + + mtx_destroy(&keg->uk_lock); +} + +/* + * Zone header dtor. + * + * Arguments/Returns follow uma_dtor specifications + * udata unused + */ static void zone_dtor(void *arg, int size, void *udata) { uma_zone_t zone; + uma_keg_t keg; zone = (uma_zone_t)arg; + keg = zone->uz_keg; - if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) + if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) cache_drain(zone); - mtx_lock(&uma_mtx); - LIST_REMOVE(zone, uz_link); - zone_drain(zone); - mtx_unlock(&uma_mtx); - ZONE_LOCK(zone); - if (zone->uz_free != 0) { - printf("Zone %s was not empty (%d items). " - " Lost %d pages of memory.\n", - zone->uz_name, zone->uz_free, zone->uz_pages); - uma_print_zone(zone); + if (keg->uk_flags & UMA_ZONE_SECONDARY) { + mtx_lock(&uma_mtx); + LIST_REMOVE(zone, uz_link); + mtx_unlock(&uma_mtx); + } else { + mtx_lock(&uma_mtx); + LIST_REMOVE(keg, uk_link); + zone_drain(zone); + LIST_REMOVE(zone, uz_link); + mtx_unlock(&uma_mtx); + uma_zfree_internal(kegs, keg, NULL, 0); } - - ZONE_UNLOCK(zone); - if (zone->uz_flags & UMA_ZONE_HASH) - hash_free(&zone->uz_hash); - - ZONE_LOCK_FINI(zone); + zone->uz_keg = NULL; } + /* * Traverses every zone in the system and calls a callback * @@ -1208,11 +1359,14 @@ static void zone_foreach(void (*zfunc)(uma_zone_t)) { + uma_keg_t keg; uma_zone_t zone; mtx_lock(&uma_mtx); - LIST_FOREACH(zone, &uma_zones, uz_link) - zfunc(zone); + LIST_FOREACH(keg, &uma_kegs, uk_link) { + LIST_FOREACH(zone, &keg->uk_zones, uz_link) + zfunc(zone); + } mtx_unlock(&uma_mtx); } @@ -1227,25 +1381,23 @@ int i; #ifdef UMA_DEBUG - printf("Creating uma zone headers zone.\n"); + printf("Creating uma keg headers zone and keg.\n"); #endif mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); - /* "manually" Create the initial zone */ - args.name = "UMA Zones"; - args.size = sizeof(struct uma_zone) + - (sizeof(struct uma_cache) * (mp_maxid + 1)); - args.ctor = zone_ctor; - args.dtor = zone_dtor; + + /* "manually" create the initial zone */ + args.name = "UMA Kegs"; + args.size = sizeof(struct uma_keg); + args.ctor = keg_ctor; + args.dtor = keg_dtor; args.uminit = zero_init; args.fini = NULL; + args.keg = &masterkeg; args.align = 32 - 1; args.flags = UMA_ZFLAG_INTERNAL; /* The initial zone has no Per cpu queues so it's smaller */ - zone_ctor(zones, sizeof(struct uma_zone), &args); + zone_ctor(kegs, sizeof(struct uma_zone), &args); - /* Initialize the pcpu cache lock set once and for all */ - for (i = 0; i <= mp_maxid; i++) - CPU_LOCK_INIT(i); #ifdef UMA_DEBUG printf("Filling boot free list.\n"); #endif @@ -1258,7 +1410,30 @@ } #ifdef UMA_DEBUG - printf("Creating slab zone.\n"); + printf("Creating uma zone headers zone and keg.\n"); +#endif + args.name = "UMA Zones"; + args.size = sizeof(struct uma_zone) + + (sizeof(struct uma_cache) * (mp_maxid + 1)); + args.ctor = zone_ctor; + args.dtor = zone_dtor; + args.uminit = zero_init; + args.fini = NULL; + args.keg = NULL; + args.align = 32 - 1; + args.flags = UMA_ZFLAG_INTERNAL; + /* The initial zone has no Per cpu queues so it's smaller */ + zone_ctor(zones, sizeof(struct uma_zone), &args); + +#ifdef UMA_DEBUG + printf("Initializing pcpu cache locks.\n"); +#endif + /* Initialize the pcpu cache lock set once and for all */ + for (i = 0; i <= mp_maxid; i++) + CPU_LOCK_INIT(i); + +#ifdef UMA_DEBUG + printf("Creating slab and hash zones.\n"); #endif /* @@ -1276,6 +1451,20 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); + /* + * We also create a zone for the bigger slabs with reference + * counts in them, to accomodate UMA_ZONE_REFCNT zones. + */ + slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt); + slabsize /= UMA_MAX_WASTE; + slabsize++; + slabsize += 4 * slabsize; + slabsize += sizeof(struct uma_slab_refcnt); + slabrefzone = uma_zcreate("UMA RCntSlabs", + slabsize, + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); + hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, @@ -1321,6 +1510,21 @@ #endif } +static void +uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, + int align, u_int16_t flags) +{ + struct uma_kctor_args args; + + args.size = size; + args.uminit = uminit; + args.fini = fini; + args.align = align; + args.flags = flags; + args.zone = zone; + zone = uma_zalloc_internal(kegs, &args, M_WAITOK); +} + /* See uma.h */ uma_zone_t uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, @@ -1338,6 +1542,27 @@ args.fini = fini; args.align = align; args.flags = flags; + args.keg = NULL; + + return (uma_zalloc_internal(zones, &args, M_WAITOK)); +} + +/* See uma.h */ +uma_zone_t +uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_zone_t master) +{ + struct uma_zctor_args args; + + args.name = name; + args.size = master->uz_keg->uk_size; + args.ctor = ctor; + args.dtor = dtor; + args.uminit = zinit; + args.fini = zfini; + args.align = master->uz_keg->uk_align; + args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; + args.keg = master->uz_keg; return (uma_zalloc_internal(zones, &args, M_WAITOK)); } @@ -1357,35 +1582,25 @@ uma_cache_t cache; uma_bucket_t bucket; int cpu; + int badness = 1; /* This is the fast path allocation */ #ifdef UMA_DEBUG_ALLOC_1 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); #endif -#ifdef INVARIANTS - /* - * To make sure that WAITOK or NOWAIT is set, but not more than - * one, and check against the API botches that are common. - * The uma code implies M_WAITOK if M_NOWAIT is not set, so - * we default to waiting if none of the flags is set. - */ - cpu = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT); - if (cpu != M_NOWAIT && cpu != M_WAITOK) { - static struct timeval lasterr; - static int curerr, once; - if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) { - printf("Bad uma_zalloc flags: %x\n", cpu); - backtrace(); - once++; - } - } -#endif if (!(flags & M_NOWAIT)) { KASSERT(curthread->td_intr_nesting_level == 0, ("malloc(M_WAITOK) in interrupt context")); - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "malloc() of \"%s\"", zone->uz_name); +#ifdef WITNESS + badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", + zone->uz_name); +#endif + if (badness) { + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + } } zalloc_restart: @@ -1413,9 +1628,9 @@ #endif CPU_UNLOCK(cpu); if (zone->uz_ctor) - zone->uz_ctor(item, zone->uz_size, udata); + zone->uz_ctor(item,zone->uz_keg->uk_size,udata); if (flags & M_ZERO) - bzero(item, zone->uz_size); + bzero(item, zone->uz_keg->uk_size); return (item); } else if (cache->uc_freebucket) { /* @@ -1465,6 +1680,7 @@ /* Bump up our uz_count so we get here less */ if (zone->uz_count < BUCKET_MAX) zone->uz_count++; + /* * Now lets just fill a bucket and put it on the free list. If that * works we'll restart the allocation from the begining. @@ -1488,6 +1704,9 @@ uma_zone_slab(uma_zone_t zone, int flags) { uma_slab_t slab; + uma_keg_t keg; + + keg = zone->uz_keg; /* * This is to prevent us from recursively trying to allocate @@ -1498,7 +1717,7 @@ * things happen. So instead we return a NULL bucket, and make * the code that allocates buckets smart enough to deal with it */ - if (zone->uz_flags & UMA_ZFLAG_INTERNAL && zone->uz_recurse != 0) + if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) return (NULL); slab = NULL; @@ -1509,14 +1728,14 @@ * used over those that are totally full. This helps to reduce * fragmentation. */ - if (zone->uz_free != 0) { - if (!LIST_EMPTY(&zone->uz_part_slab)) { - slab = LIST_FIRST(&zone->uz_part_slab); + if (keg->uk_free != 0) { + if (!LIST_EMPTY(&keg->uk_part_slab)) { + slab = LIST_FIRST(&keg->uk_part_slab); } else { - slab = LIST_FIRST(&zone->uz_free_slab); + slab = LIST_FIRST(&keg->uk_free_slab); LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&zone->uz_part_slab, slab, - us_link); + LIST_INSERT_HEAD(&keg->uk_part_slab, slab, + us_link); } return (slab); } @@ -1527,27 +1746,28 @@ if (flags & M_NOVM) break; - if (zone->uz_maxpages && - zone->uz_pages >= zone->uz_maxpages) { - zone->uz_flags |= UMA_ZFLAG_FULL; + if (keg->uk_maxpages && + keg->uk_pages >= keg->uk_maxpages) { + keg->uk_flags |= UMA_ZFLAG_FULL; if (flags & M_NOWAIT) break; else - msleep(zone, &zone->uz_lock, PVM, + msleep(keg, &keg->uk_lock, PVM, "zonelimit", 0); continue; } - zone->uz_recurse++; + keg->uk_recurse++; slab = slab_zalloc(zone, flags); - zone->uz_recurse--; + keg->uk_recurse--; + /* * If we got a slab here it's safe to mark it partially used * and return. We assume that the caller is going to remove * at least one item. */ if (slab) { - LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); return (slab); } /* @@ -1564,22 +1784,25 @@ static void * uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) { + uma_keg_t keg; void *item; u_int8_t freei; + keg = zone->uz_keg; + freei = slab->us_firstfree; - slab->us_firstfree = slab->us_freelist[freei]; - item = slab->us_data + (zone->uz_rsize * freei); + slab->us_firstfree = slab->us_freelist[freei].us_item; + item = slab->us_data + (keg->uk_rsize * freei); slab->us_freecount--; - zone->uz_free--; + keg->uk_free--; #ifdef INVARIANTS uma_dbg_alloc(zone, slab, item); #endif /* Move this slab to the full list */ if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); + LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); } return (item); @@ -1590,6 +1813,7 @@ { uma_bucket_t bucket; uma_slab_t slab; + int16_t saved; int max; /* @@ -1603,7 +1827,7 @@ int bflags; bflags = (flags & ~M_ZERO); - if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) + if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) bflags |= M_NOVM; ZONE_UNLOCK(zone); @@ -1628,18 +1852,36 @@ max = MIN(bucket->ub_entries, zone->uz_count); /* Try to keep the buckets totally full */ + saved = bucket->ub_cnt; while (bucket->ub_cnt < max && (slab = uma_zone_slab(zone, flags)) != NULL) { while (slab->us_freecount && bucket->ub_cnt < max) { bucket->ub_bucket[bucket->ub_cnt++] = uma_slab_alloc(zone, slab); } + /* Don't block on the next fill */ flags |= M_NOWAIT; } - zone->uz_fills--; + /* + * We unlock here because we need to call the zone's init. + * It should be safe to unlock because the slab dealt with + * above is already on the appropriate list within the keg + * and the bucket we filled is not yet on any list, so we + * own it. + */ + if (zone->uz_init != NULL) { + int i; + + ZONE_UNLOCK(zone); + for (i = saved; i < bucket->ub_cnt; i++) + zone->uz_init(bucket->ub_bucket[i], + zone->uz_keg->uk_size); + ZONE_LOCK(zone); + } + zone->uz_fills--; if (bucket->ub_cnt != 0) { LIST_INSERT_HEAD(&zone->uz_full_bucket, bucket, ub_link); @@ -1668,10 +1910,12 @@ static void * uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) { + uma_keg_t keg; uma_slab_t slab; void *item; item = NULL; + keg = zone->uz_keg; #ifdef UMA_DEBUG_ALLOC printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); @@ -1688,10 +1932,18 @@ ZONE_UNLOCK(zone); + /* + * We have to call both the zone's init (not the keg's init) + * and the zone's ctor. This is because the item is going from + * a keg slab directly to the user, and the user is expecting it + * to be both zone-init'd as well as zone-ctor'd. + */ + if (zone->uz_init != NULL) + zone->uz_init(item, keg->uk_size); if (zone->uz_ctor != NULL) - zone->uz_ctor(item, zone->uz_size, udata); + zone->uz_ctor(item, keg->uk_size, udata); if (flags & M_ZERO) - bzero(item, zone->uz_size); + bzero(item, keg->uk_size); return (item); } @@ -1700,6 +1952,7 @@ void uma_zfree_arg(uma_zone_t zone, void *item, void *udata) { + uma_keg_t keg; uma_cache_t cache; uma_bucket_t bucket; int bflags; @@ -1708,6 +1961,8 @@ /* This is the fast path free */ skip = 0; + keg = zone->uz_keg; + #ifdef UMA_DEBUG_ALLOC_1 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); #endif @@ -1716,11 +1971,11 @@ * a little longer for the limits to be reset. */ - if (zone->uz_flags & UMA_ZFLAG_FULL) + if (keg->uk_flags & UMA_ZFLAG_FULL) goto zfree_internal; if (zone->uz_dtor) { - zone->uz_dtor(item, zone->uz_size, udata); + zone->uz_dtor(item, keg->uk_size, udata); skip = 1; } @@ -1745,7 +2000,7 @@ bucket->ub_cnt++; #ifdef INVARIANTS ZONE_LOCK(zone); - if (zone->uz_flags & UMA_ZONE_MALLOC) + if (keg->uk_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); @@ -1810,7 +2065,7 @@ #endif bflags = M_NOWAIT; - if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) + if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) bflags |= M_NOVM; bucket = bucket_alloc(zone->uz_count, bflags); if (bucket) { @@ -1836,7 +2091,7 @@ */ if (skip) { ZONE_LOCK(zone); - if (zone->uz_flags & UMA_ZONE_MALLOC) + if (keg->uk_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); @@ -1846,7 +2101,6 @@ uma_zfree_internal(zone, item, udata, skip); return; - } /* @@ -1862,20 +2116,25 @@ uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) { uma_slab_t slab; + uma_keg_t keg; u_int8_t *mem; u_int8_t freei; + keg = zone->uz_keg; + if (!skip && zone->uz_dtor) - zone->uz_dtor(item, zone->uz_size, udata); + zone->uz_dtor(item, keg->uk_size, udata); + if (zone->uz_fini) + zone->uz_fini(item, keg->uk_size); ZONE_LOCK(zone); - if (!(zone->uz_flags & UMA_ZONE_MALLOC)) { + if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); - if (zone->uz_flags & UMA_ZONE_HASH) - slab = hash_sfind(&zone->uz_hash, mem); + if (keg->uk_flags & UMA_ZONE_HASH) + slab = hash_sfind(&keg->uk_hash, mem); else { - mem += zone->uz_pgoff; + mem += keg->uk_pgoff; slab = (uma_slab_t)mem; } } else { @@ -1883,36 +2142,36 @@ } /* Do we need to remove from any lists? */ - if (slab->us_freecount+1 == zone->uz_ipers) { + if (slab->us_freecount+1 == keg->uk_ipers) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); } else if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); } /* Slab management stuff */ freei = ((unsigned long)item - (unsigned long)slab->us_data) - / zone->uz_rsize; + / keg->uk_rsize; #ifdef INVARIANTS if (!skip) uma_dbg_free(zone, slab, item); #endif - slab->us_freelist[freei] = slab->us_firstfree; + slab->us_freelist[freei].us_item = slab->us_firstfree; slab->us_firstfree = freei; slab->us_freecount++; /* Zone statistics */ - zone->uz_free++; + keg->uk_free++; - if (zone->uz_flags & UMA_ZFLAG_FULL) { - if (zone->uz_pages < zone->uz_maxpages) - zone->uz_flags &= ~UMA_ZFLAG_FULL; + if (keg->uk_flags & UMA_ZFLAG_FULL) { + if (keg->uk_pages < keg->uk_maxpages) + keg->uk_flags &= ~UMA_ZFLAG_FULL; /* We can handle one more allocation */ - wakeup_one(zone); + wakeup_one(keg); } ZONE_UNLOCK(zone); @@ -1922,15 +2181,62 @@ void uma_zone_set_max(uma_zone_t zone, int nitems) { + uma_keg_t keg; + + keg = zone->uz_keg; ZONE_LOCK(zone); - if (zone->uz_ppera > 1) - zone->uz_maxpages = nitems * zone->uz_ppera; + if (keg->uk_ppera > 1) + keg->uk_maxpages = nitems * keg->uk_ppera; else - zone->uz_maxpages = nitems / zone->uz_ipers; + keg->uk_maxpages = nitems / keg->uk_ipers; + + if (keg->uk_maxpages * keg->uk_ipers < nitems) + keg->uk_maxpages++; + + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_zone_set_init(uma_zone_t zone, uma_init uminit) +{ + ZONE_LOCK(zone); + KASSERT(zone->uz_keg->uk_pages == 0, + ("uma_zone_set_init on non-empty keg")); + zone->uz_keg->uk_init = uminit; + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_zone_set_fini(uma_zone_t zone, uma_fini fini) +{ + ZONE_LOCK(zone); + KASSERT(zone->uz_keg->uk_pages == 0, + ("uma_zone_set_fini on non-empty keg")); + zone->uz_keg->uk_fini = fini; + ZONE_UNLOCK(zone); +} - if (zone->uz_maxpages * zone->uz_ipers < nitems) - zone->uz_maxpages++; +/* See uma.h */ +void +uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) +{ + ZONE_LOCK(zone); + KASSERT(zone->uz_keg->uk_pages == 0, + ("uma_zone_set_zinit on non-empty keg")); + zone->uz_init = zinit; + ZONE_UNLOCK(zone); +} +/* See uma.h */ +void +uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) +{ + ZONE_LOCK(zone); + KASSERT(zone->uz_keg->uk_pages == 0, + ("uma_zone_set_zfini on non-empty keg")); + zone->uz_fini = zfini; ZONE_UNLOCK(zone); } @@ -1939,7 +2245,7 @@ uma_zone_set_freef(uma_zone_t zone, uma_free freef) { ZONE_LOCK(zone); - zone->uz_freef = freef; + zone->uz_keg->uk_freef = freef; ZONE_UNLOCK(zone); } @@ -1948,8 +2254,8 @@ uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) { ZONE_LOCK(zone); - zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; - zone->uz_allocf = allocf; + zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; + zone->uz_keg->uk_allocf = allocf; ZONE_UNLOCK(zone); } @@ -1957,12 +2263,14 @@ int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) { - int pages; + uma_keg_t keg; vm_offset_t kva; + int pages; - pages = count / zone->uz_ipers; + keg = zone->uz_keg; + pages = count / keg->uk_ipers; - if (pages * zone->uz_ipers < count) + if (pages * keg->uk_ipers < count) pages++; kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); @@ -1978,11 +2286,11 @@ pages, obj); } ZONE_LOCK(zone); - zone->uz_kva = kva; - zone->uz_obj = obj; - zone->uz_maxpages = pages; - zone->uz_allocf = obj_alloc; - zone->uz_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; + keg->uk_kva = kva; + keg->uk_obj = obj; + keg->uk_maxpages = pages; + keg->uk_allocf = obj_alloc; + keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; ZONE_UNLOCK(zone); return (1); } @@ -1993,20 +2301,41 @@ { int slabs; uma_slab_t slab; + uma_keg_t keg; + keg = zone->uz_keg; ZONE_LOCK(zone); - slabs = items / zone->uz_ipers; - if (slabs * zone->uz_ipers < items) + slabs = items / keg->uk_ipers; + if (slabs * keg->uk_ipers < items) slabs++; while (slabs > 0) { slab = slab_zalloc(zone, M_WAITOK); - LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); slabs--; } ZONE_UNLOCK(zone); } /* See uma.h */ +u_int32_t * +uma_find_refcnt(uma_zone_t zone, void *item) +{ + uma_slabrefcnt_t slab; + uma_keg_t keg; + u_int32_t *refcnt; + int idx; + + keg = zone->uz_keg; + slab = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); + KASSERT(slab != NULL, + ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); + idx = ((unsigned long)item - (unsigned long)slab->us_data) + / keg->uk_rsize; + refcnt = &(slab->us_freelist[idx].us_refcnt); + return refcnt; +} + +/* See uma.h */ void uma_reclaim(void) { @@ -2021,6 +2350,7 @@ * zones are drained. We have to do the same for buckets. */ zone_drain(slabzone); + zone_drain(slabrefzone); bucket_zone_drain(); } @@ -2044,7 +2374,6 @@ uma_zfree_internal(slabzone, slab, NULL, 0); } - return (mem); } @@ -2065,8 +2394,8 @@ static void slab_print(uma_slab_t slab) { - printf("slab: zone %p, data %p, freecount %d, firstfree %d\n", - slab->us_zone, slab->us_data, slab->us_freecount, + printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", + slab->us_keg, slab->us_data, slab->us_freecount, slab->us_firstfree); } @@ -2084,21 +2413,23 @@ uma_print_zone(uma_zone_t zone) { uma_cache_t cache; + uma_keg_t keg; uma_slab_t slab; int i; + keg = zone->uz_keg; printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", - zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, - zone->uz_ipers, zone->uz_ppera, - (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); + zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, + keg->uk_ipers, keg->uk_ppera, + (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); printf("Part slabs:\n"); - LIST_FOREACH(slab, &zone->uz_part_slab, us_link) + LIST_FOREACH(slab, &keg->uk_part_slab, us_link) slab_print(slab); printf("Free slabs:\n"); - LIST_FOREACH(slab, &zone->uz_free_slab, us_link) + LIST_FOREACH(slab, &keg->uk_free_slab, us_link) slab_print(slab); printf("Full slabs:\n"); - LIST_FOREACH(slab, &zone->uz_full_slab, us_link) + LIST_FOREACH(slab, &keg->uk_full_slab, us_link) slab_print(slab); for (i = 0; i <= mp_maxid; i++) { if (CPU_ABSENT(i)) @@ -2122,6 +2453,7 @@ int totalfree; char *tmpbuf, *offset; uma_zone_t z; + uma_keg_t zk; char *p; int cpu; int cachefree; @@ -2130,8 +2462,10 @@ cnt = 0; mtx_lock(&uma_mtx); - LIST_FOREACH(z, &uma_zones, uz_link) - cnt++; + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) + cnt++; + } mtx_unlock(&uma_mtx); MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, M_TEMP, M_WAITOK); @@ -2144,10 +2478,11 @@ goto out; offset = tmpbuf; mtx_lock(&uma_mtx); - LIST_FOREACH(z, &uma_zones, uz_link) { + LIST_FOREACH(zk, &uma_kegs, uk_link) { + LIST_FOREACH(z, &zk->uk_zones, uz_link) { if (cnt == 0) /* list may have changed size */ break; - if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) { + if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; @@ -2156,7 +2491,7 @@ } ZONE_LOCK(z); cachefree = 0; - if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) { + if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; @@ -2171,12 +2506,12 @@ LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { cachefree += bucket->ub_cnt; } - totalfree = z->uz_free + cachefree; + totalfree = zk->uk_free + cachefree; len = snprintf(offset, linesize, "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", - z->uz_name, z->uz_size, - z->uz_maxpages * z->uz_ipers, - (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, + z->uz_name, zk->uk_size, + zk->uk_maxpages * zk->uk_ipers, + (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, totalfree, (unsigned long long)z->uz_allocs); ZONE_UNLOCK(z); @@ -2185,6 +2520,7 @@ p[1] = ':'; cnt--; offset += len; + } } mtx_unlock(&uma_mtx); *offset++ = '\0'; diff -ruN vendor/src/sys/vm/uma_dbg.c mbuma2/src/sys/vm/uma_dbg.c --- vendor/src/sys/vm/uma_dbg.c Tue May 25 11:51:13 2004 +++ mbuma2/src/sys/vm/uma_dbg.c Sat Mar 20 18:39:21 2004 @@ -192,15 +192,17 @@ uma_dbg_getslab(uma_zone_t zone, void *item) { uma_slab_t slab; + uma_keg_t keg; u_int8_t *mem; + keg = zone->uz_keg; mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); - if (zone->uz_flags & UMA_ZONE_MALLOC) { + if (keg->uk_flags & UMA_ZONE_MALLOC) { slab = vtoslab((vm_offset_t)mem); - } else if (zone->uz_flags & UMA_ZONE_HASH) { - slab = hash_sfind(&zone->uz_hash, mem); + } else if (keg->uk_flags & UMA_ZONE_HASH) { + slab = hash_sfind(&keg->uk_hash, mem); } else { - mem += zone->uz_pgoff; + mem += keg->uk_pgoff; slab = (uma_slab_t)mem; } @@ -215,8 +217,10 @@ void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { + uma_keg_t keg; int freei; + keg = zone->uz_keg; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) @@ -225,9 +229,9 @@ } freei = ((unsigned long)item - (unsigned long)slab->us_data) - / zone->uz_rsize; + / keg->uk_rsize; - slab->us_freelist[freei] = 255; + slab->us_freelist[freei].us_item = 255; return; } @@ -241,8 +245,10 @@ void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { + uma_keg_t keg; int freei; + keg = zone->uz_keg; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) @@ -251,22 +257,22 @@ } freei = ((unsigned long)item - (unsigned long)slab->us_data) - / zone->uz_rsize; + / keg->uk_rsize; - if (freei >= zone->uz_ipers) + if (freei >= keg->uk_ipers) panic("zone: %s(%p) slab %p freelist %d out of range 0-%d\n", - zone->uz_name, zone, slab, freei, zone->uz_ipers-1); + zone->uz_name, zone, slab, freei, keg->uk_ipers-1); - if (((freei * zone->uz_rsize) + slab->us_data) != item) { + if (((freei * keg->uk_rsize) + slab->us_data) != item) { printf("zone: %s(%p) slab %p freed address %p unaligned.\n", zone->uz_name, zone, slab, item); panic("should be %p\n", - (freei * zone->uz_rsize) + slab->us_data); + (freei * keg->uk_rsize) + slab->us_data); } - if (slab->us_freelist[freei] != 255) { + if (slab->us_freelist[freei].us_item != 255) { printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei]); + slab, freei, slab->us_freelist[freei].us_item); panic("Duplicate free of item %p from zone %p(%s)\n", item, zone, zone->uz_name); } @@ -276,5 +282,5 @@ * Until then the count of valid slabs will make sure we don't * accidentally follow this and assume it's a valid index. */ - slab->us_freelist[freei] = 0; + slab->us_freelist[freei].us_item = 0; } diff -ruN vendor/src/sys/vm/uma_int.h mbuma2/src/sys/vm/uma_int.h --- vendor/src/sys/vm/uma_int.h Tue May 25 11:51:13 2004 +++ mbuma2/src/sys/vm/uma_int.h Thu May 27 21:58:07 2004 @@ -35,10 +35,10 @@ /* * Here's a quick description of the relationship between the objects: * - * Zones contain lists of slabs which are stored in either the full bin, empty + * Kegs contain lists of slabs which are stored in either the full bin, empty * bin, or partially allocated bin, to reduce fragmentation. They also contain * the user supplied value for size, which is adjusted for alignment purposes - * and rsize is the result of that. The zone also stores information for + * and rsize is the result of that. The Keg also stores information for * managing a hash of page addresses that maps pages to uma_slab_t structures * for pages that don't have embedded uma_slab_t's. * @@ -67,6 +67,20 @@ * so at this time it may not make sense to optimize for it. This can, of * course, be solved with dynamic slab sizes. * + * Kegs may serve multiple Zones but by far most of the time they only serve + * one. When a Zone is created, a Keg is allocated and setup for it. While + * the backing Keg stores slabs, the Zone caches Buckets of items allocated + * from the slabs. Each Zone is equipped with an init/fini and ctor/dtor + * pair, as well as with its own set of small per-CPU caches, layered above + * the Zone's general Bucket cache. + * + * The PCPU caches are protected by their own locks, while the Zones backed + * by the same Keg all share a common Keg lock (to coalesce contention on + * the backing slabs). The backing Keg typically only serves one Zone but + * in the case of multiple Zones, one of the Zones is considered the + * Master Zone and all Zone-related stats from the Keg are done in the + * Master Zone. For an example of a Multi-Zone setup, refer to the + * Mbuf allocation code. */ /* @@ -134,28 +148,6 @@ SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \ (mem))], (s), uma_slab, us_hlink); -/* Page management structure */ - -/* Sorry for the union, but space efficiency is important */ -struct uma_slab { - uma_zone_t us_zone; /* Zone we live in */ - union { - LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ - unsigned long _us_size; /* Size of allocation */ - } us_type; - SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ - u_int8_t *us_data; /* First item */ - u_int8_t us_flags; /* Page flags see uma.h */ - u_int8_t us_freecount; /* How many are free? */ - u_int8_t us_firstfree; /* First free item index */ - u_int8_t us_freelist[1]; /* Free List (actually larger) */ -}; - -#define us_link us_type._us_link -#define us_size us_type._us_size - -typedef struct uma_slab * uma_slab_t; - /* Hash table for freed address -> slab translation */ SLIST_HEAD(slabhead, uma_slab); @@ -188,6 +180,97 @@ typedef struct uma_cache * uma_cache_t; /* + * Keg management structure + * + * TODO: Optimize for cache line size + * + */ +struct uma_keg { + LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ + + struct mtx uk_lock; /* Lock for the keg */ + struct uma_hash uk_hash; + + LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ + LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */ + LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */ + LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */ + + u_int32_t uk_recurse; /* Allocation recursion count */ + u_int32_t uk_align; /* Alignment mask */ + u_int32_t uk_pages; /* Total page count */ + u_int32_t uk_free; /* Count of items free in slabs */ + u_int32_t uk_size; /* Requested size of each item */ + u_int32_t uk_rsize; /* Real size of each item */ + u_int32_t uk_maxpages; /* Maximum number of pages to alloc */ + + uma_init uk_init; /* Keg's init routine */ + uma_fini uk_fini; /* Keg's fini routine */ + uma_alloc uk_allocf; /* Allocation function */ + uma_free uk_freef; /* Free routine */ + + struct vm_object *uk_obj; /* Zone specific object */ + vm_offset_t uk_kva; /* Base kva for zones with objs */ + uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ + + u_int16_t uk_pgoff; /* Offset to uma_slab struct */ + u_int16_t uk_ppera; /* pages per allocation from backend */ + u_int16_t uk_ipers; /* Items per slab */ + u_int16_t uk_flags; /* Internal flags */ +}; + +/* Simpler reference to uma_keg for internal use. */ +typedef struct uma_keg * uma_keg_t; + +/* Page management structure */ + +/* Sorry for the union, but space efficiency is important */ +struct uma_slab_head { + uma_keg_t us_keg; /* Keg we live in */ + union { + LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ + unsigned long _us_size; /* Size of allocation */ + } us_type; + SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ + u_int8_t *us_data; /* First item */ + u_int8_t us_flags; /* Page flags see uma.h */ + u_int8_t us_freecount; /* How many are free? */ + u_int8_t us_firstfree; /* First free item index */ +}; + +/* The standard slab structure */ +struct uma_slab { + struct uma_slab_head us_head; /* slab header data */ + struct { + u_int8_t us_item; + } us_freelist[1]; /* actual number bigger */ +}; + +/* + * The slab structure for UMA_ZONE_REFCNT zones for whose items we + * maintain reference counters in the slab for. + */ +struct uma_slab_refcnt { + struct uma_slab_head us_head; /* slab header data */ + struct { + u_int8_t us_item; + u_int32_t us_refcnt; + } us_freelist[1]; /* actual number bigger */ +}; + +#define us_keg us_head.us_keg +#define us_link us_head.us_type._us_link +#define us_size us_head.us_type._us_size +#define us_hlink us_head.us_hlink +#define us_data us_head.us_data +#define us_flags us_head.us_flags +#define us_freecount us_head.us_freecount +#define us_firstfree us_head.us_firstfree + +typedef struct uma_slab * uma_slab_t; +typedef struct uma_slab_refcnt * uma_slabrefcnt_t; + +/* * Zone management structure * * TODO: Optimize for cache line size @@ -195,42 +278,22 @@ */ struct uma_zone { char *uz_name; /* Text name of the zone */ - LIST_ENTRY(uma_zone) uz_link; /* List of all zones */ - u_int32_t uz_align; /* Alignment mask */ - u_int32_t uz_pages; /* Total page count */ - -/* Used during alloc / free */ - struct mtx uz_lock; /* Lock for the zone */ - u_int32_t uz_free; /* Count of items free in slabs */ - u_int16_t uz_ipers; /* Items per slab */ - u_int16_t uz_flags; /* Internal flags */ - - LIST_HEAD(,uma_slab) uz_part_slab; /* partially allocated slabs */ - LIST_HEAD(,uma_slab) uz_free_slab; /* empty slab list */ - LIST_HEAD(,uma_slab) uz_full_slab; /* full slabs */ + struct mtx *uz_lock; /* Lock for the zone (keg's lock) */ + uma_keg_t uz_keg; /* Our underlying Keg */ + + LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */ LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ - u_int32_t uz_size; /* Requested size of each item */ - u_int32_t uz_rsize; /* Real size of each item */ - - struct uma_hash uz_hash; - u_int16_t uz_pgoff; /* Offset to uma_slab struct */ - u_int16_t uz_ppera; /* pages per allocation from backend */ uma_ctor uz_ctor; /* Constructor for each allocation */ uma_dtor uz_dtor; /* Destructor */ - u_int64_t uz_allocs; /* Total number of allocations */ - uma_init uz_init; /* Initializer for each item */ uma_fini uz_fini; /* Discards memory */ - uma_alloc uz_allocf; /* Allocation function */ - uma_free uz_freef; /* Free routine */ - struct vm_object *uz_obj; /* Zone specific object */ - vm_offset_t uz_kva; /* Base kva for zones with objs */ - u_int32_t uz_maxpages; /* Maximum number of pages to alloc */ - int uz_recurse; /* Allocation recursion count */ + + u_int64_t uz_allocs; /* Total number of allocations */ uint16_t uz_fills; /* Outstanding bucket fills */ uint16_t uz_count; /* Highest value ub_ptr can have */ + /* * This HAS to be the last item because we adjust the zone size * based on NCPU and then allocate the space for the zones. @@ -256,16 +319,16 @@ #define ZONE_LOCK_INIT(z, lc) \ do { \ if ((lc)) \ - mtx_init(&(z)->uz_lock, (z)->uz_name, \ + mtx_init((z)->uz_lock, (z)->uz_name, \ (z)->uz_name, MTX_DEF | MTX_DUPOK); \ else \ - mtx_init(&(z)->uz_lock, (z)->uz_name, \ + mtx_init((z)->uz_lock, (z)->uz_name, \ "UMA zone", MTX_DEF | MTX_DUPOK); \ } while (0) -#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) -#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock) -#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock) +#define ZONE_LOCK_FINI(z) mtx_destroy((z)->uz_lock) +#define ZONE_LOCK(z) mtx_lock((z)->uz_lock) +#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock) #define CPU_LOCK_INIT(cpu) \ mtx_init(&uma_pcpu_mtx[(cpu)], "UMA pcpu", "UMA pcpu", \ diff -ruN vendor/src/sys/vm/vm_kern.c mbuma2/src/sys/vm/vm_kern.c --- vendor/src/sys/vm/vm_kern.c Tue May 25 11:51:13 2004 +++ mbuma2/src/sys/vm/vm_kern.c Fri May 28 15:38:31 2004 @@ -320,16 +320,6 @@ vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { vm_map_unlock(map); - if (map != kmem_map) { - static int last_report; /* when we did it (in ticks) */ - if (ticks < last_report || - (ticks - last_report) >= hz) { - last_report = ticks; - printf("Out of mbuf address space!\n"); - printf("Consider increasing NMBCLUSTERS\n"); - } - return (0); - } if ((flags & M_NOWAIT) == 0) panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", (long)size, (long)map->size); diff -ruN vendor/src/usr.bin/netstat/main.c mbuma2/src/usr.bin/netstat/main.c --- vendor/src/usr.bin/netstat/main.c Tue May 25 11:29:39 2004 +++ mbuma2/src/usr.bin/netstat/main.c Fri May 21 15:34:55 2004 @@ -256,7 +256,6 @@ int Aflag; /* show addresses of protocol control block */ int aflag; /* show all sockets (including servers) */ int bflag; /* show i/f total bytes in/out */ -int cflag; /* show mbuf cache information */ int dflag; /* show i/f dropped packets */ int gflag; /* show group (multicast) routing or stats */ int iflag; /* show interfaces */ @@ -297,9 +296,6 @@ case 'b': bflag = 1; break; - case 'c': - cflag = 1; - break; case 'd': dflag = 1; break; @@ -425,10 +421,6 @@ if (nlistf != NULL || memf != NULL) setgid(getgid()); - if (cflag && !mflag) { - (void)fprintf(stderr, "-c only valid with -m\n"); - usage(); - } if (mflag) { if (memf != NULL) { if (kread(0, 0, 0) == 0) diff -ruN vendor/src/usr.bin/netstat/mbuf.c mbuma2/src/usr.bin/netstat/mbuf.c --- vendor/src/usr.bin/netstat/mbuf.c Tue May 25 11:29:39 2004 +++ mbuma2/src/usr.bin/netstat/mbuf.c Fri May 21 15:34:55 2004 @@ -99,17 +99,12 @@ u_long mbhiaddr, u_long clhiaddr, u_long mbloaddr, u_long clloaddr, u_long cpusaddr __unused, u_long pgsaddr, u_long mbpaddr) { - int i, j, nmbufs, nmbclusters, page_size, num_objs; + int i, nmbclusters; int nsfbufs, nsfbufspeak, nsfbufsused; - u_int mbuf_hiwm, clust_hiwm, mbuf_lowm, clust_lowm; - u_long totspace[2], totused[2]; - u_long gentotnum, gentotfree, totnum, totfree; - u_long totmem, totmemalloced, totmemused; short nmbtypes; size_t mlen; long *mbtypes = NULL; struct mbstat *mbstat = NULL; - struct mbpstat **mbpstat = NULL; struct mbtypenames *mp; bool *seen = NULL; @@ -119,50 +114,12 @@ goto err; } - /* - * XXX: Unfortunately, for the time being, we have to fetch - * the total length of the per-CPU stats area via sysctl - * (regardless of whether we're looking at a core or not. - */ - if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &mlen, NULL, 0) < 0) { - warn("sysctl: retrieving mb_statpcpu len"); - goto err; - } - num_objs = (int)(mlen / sizeof(struct mbpstat)); - if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) { - warn("calloc: cannot allocate memory for mbpstats pointers"); - goto err; - } - if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) { - warn("calloc: cannot allocate memory for mbpstats"); - goto err; - } - if (mbaddr) { - if (kread(mbpaddr, (char *)mbpstat[0], mlen)) - goto err; if (kread(mbaddr, (char *)mbstat, sizeof mbstat)) goto err; if (kread(nmbcaddr, (char *)&nmbclusters, sizeof(int))) goto err; - if (kread(nmbufaddr, (char *)&nmbufs, sizeof(int))) - goto err; - if (kread(mbhiaddr, (char *)&mbuf_hiwm, sizeof(u_int))) - goto err; - if (kread(clhiaddr, (char *)&clust_hiwm, sizeof(u_int))) - goto err; - if (kread(mbloaddr, (char *)&mbuf_lowm, sizeof(u_int))) - goto err; - if (kread(clloaddr, (char *)&clust_lowm, sizeof(u_int))) - goto err; - if (kread(pgsaddr, (char *)&page_size, sizeof(int))) - goto err; } else { - if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &mlen, - NULL, 0) < 0) { - warn("sysctl: retrieving mb_statpcpu"); - goto err; - } mlen = sizeof *mbstat; if (sysctlbyname("kern.ipc.mbstat", mbstat, &mlen, NULL, 0) < 0) { @@ -175,43 +132,9 @@ warn("sysctl: retrieving nmbclusters"); goto err; } - mlen = sizeof(int); - if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &mlen, NULL, 0) - < 0) { - warn("sysctl: retrieving nmbufs"); - goto err; - } - mlen = sizeof(u_int); - if (sysctlbyname("kern.ipc.mbuf_hiwm", &mbuf_hiwm, &mlen, - NULL, 0) < 0) { - warn("sysctl: retrieving mbuf_hiwm"); - goto err; - } - mlen = sizeof(u_int); - if (sysctlbyname("kern.ipc.clust_hiwm", &clust_hiwm, &mlen, - NULL, 0) < 0) { - warn("sysctl: retrieving clust_hiwm"); - goto err; - } - mlen = sizeof(u_int); - if (sysctlbyname("kern.ipc.mbuf_lowm", &mbuf_lowm, &mlen, - NULL, 0) < 0) { - warn("sysctl: retrieving mbuf_lowm"); - goto err; - } - mlen = sizeof(u_int); - if (sysctlbyname("kern.ipc.clust_lowm", &clust_lowm, &mlen, - NULL, 0) < 0) { - warn("sysctl: retrieving clust_lowm"); - goto err; - } - mlen = sizeof(int); - if (sysctlbyname("hw.pagesize", &page_size, &mlen, NULL, 0) - < 0) { - warn("sysctl: retrieving hw.pagesize"); - goto err; - } } + if (mbstat->m_mbufs < 0) mbstat->m_mbufs = 0; /* XXX */ + if (mbstat->m_mclusts < 0) mbstat->m_mclusts = 0; /* XXX */ nmbtypes = mbstat->m_numtypes; if ((seen = calloc(nmbtypes, sizeof(*seen))) == NULL) { @@ -223,59 +146,13 @@ goto err; } - for (i = 0; i < num_objs; i++) - mbpstat[i] = mbpstat[0] + i; - #undef MSIZE #define MSIZE (mbstat->m_msize) #undef MCLBYTES #define MCLBYTES (mbstat->m_mclbytes) -#define GENLST (num_objs - 1) - totnum = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck; - totfree = mbpstat[GENLST]->mb_mbfree; - for (j = 1; j < nmbtypes; j++) - mbtypes[j] += mbpstat[GENLST]->mb_mbtypes[j]; - totspace[0] = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck * MSIZE; - for (i = 0; i < (num_objs - 1); i++) { - if (mbpstat[i]->mb_active == 0) - continue; - totspace[0] += mbpstat[i]->mb_mbbucks*mbstat->m_mbperbuck*MSIZE; - totnum += mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck; - totfree += mbpstat[i]->mb_mbfree; - for (j = 1; j < nmbtypes; j++) - mbtypes[j] += mbpstat[i]->mb_mbtypes[j]; - } - totused[0] = totnum - totfree; - if (cflag) { - printf("mbuf usage:\n" - "\tTotal:\t\t%lu/%lu/%d (in use/in pool/max)\n", - totused[0], totnum, nmbufs); - gentotnum = mbpstat[GENLST]->mb_mbbucks * mbstat->m_mbperbuck; - gentotfree = mbpstat[GENLST]->mb_mbfree; - printf("\tGEN cache:\t%lu/%lu (in use/in pool)\n", - gentotnum - gentotfree, gentotnum); - } else { - /* XXX: peak is now wrong. */ - printf("%lu/%lu/%d mbufs in use (current/peak/max):\n", - totused[0], totnum, nmbufs); - } + printf("%lu mbufs in use\n", mbstat->m_mbufs); - for (i = 0; cflag && i < (num_objs - 1); i++) { - if (mbpstat[i]->mb_active == 0) - continue; - printf("\tCPU #%d cache:\t%lu/%lu (in use/in pool)\n", - i, - (mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck - - mbpstat[i]->mb_mbfree), - (mbpstat[i]->mb_mbbucks * mbstat->m_mbperbuck)); - } - if (cflag) { - printf("\tMbuf cache high watermark: %d\n", mbuf_hiwm); -#ifdef NOTYET - printf("\tMbuf cache low watermark: %d\n", mbuf_lowm); -#endif - } for (mp = mbtypenames; mp->mt_name; mp++) { if (mbtypes[mp->mt_type]) { seen[mp->mt_type] = YES; @@ -288,53 +165,10 @@ printf("\t %lu mbufs allocated to \n", mbtypes[i], i); } - if (cflag) - printf("\t%.1f%% of mbuf map consumed\n", - totspace[0] * 100.0 / (nmbufs * MSIZE)); - totnum = mbpstat[GENLST]->mb_clbucks * mbstat->m_clperbuck; - totfree = mbpstat[GENLST]->mb_clfree; - totspace[1] = mbpstat[GENLST]->mb_clbucks*mbstat->m_clperbuck*MCLBYTES; - for (i = 0; i < (num_objs - 1); i++) { - if (mbpstat[i]->mb_active == 0) - continue; - totspace[1] += mbpstat[i]->mb_clbucks * mbstat->m_clperbuck - * MCLBYTES; - totnum += mbpstat[i]->mb_clbucks * mbstat->m_clperbuck; - totfree += mbpstat[i]->mb_clfree; - } - totused[1] = totnum - totfree; - if (cflag) { - printf("mbuf cluster usage:\n" - "\tTotal:\t\t%lu/%lu/%d (in use/in pool/max)\n", - totused[1], totnum, nmbclusters); - gentotnum = mbpstat[GENLST]->mb_clbucks * mbstat->m_clperbuck; - gentotfree = mbpstat[GENLST]->mb_clfree; - printf("\tGEN cache:\t%lu/%lu (in use/in pool)\n", - gentotnum - gentotfree, gentotnum); - } else { - /* XXX: peak is now wrong. */ - printf("%lu/%lu/%d mbuf clusters in use (current/peak/max)\n", - totused[1], totnum, nmbclusters); - } - for (i = 0; cflag && i < (num_objs - 1); i++) { - if (mbpstat[i]->mb_active == 0) - continue; - printf("\tCPU #%d cache:\t%lu/%lu (in use/in pool)\n", - i, - (mbpstat[i]->mb_clbucks * mbstat->m_clperbuck - - mbpstat[i]->mb_clfree), - (mbpstat[i]->mb_clbucks * mbstat->m_clperbuck)); - } - if (cflag) { - printf("\tCluster cache high watermark: %d\n", clust_hiwm); -#ifdef NOTYET - printf("\tCluster cache low watermark: %d\n", clust_lowm); -#endif - } - if (cflag) - printf("\t%.1f%% of cluster map consumed\n", - totspace[1] * 100.0 / (nmbclusters * MCLBYTES)); + printf("%lu/%d mbuf clusters in use (current/max)\n", + mbstat->m_mclusts, nmbclusters); + mlen = sizeof(nsfbufs); if (!sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL, 0) && !sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused, &mlen, NULL, @@ -344,15 +178,8 @@ printf("%d/%d/%d sfbufs in use (current/peak/max)\n", nsfbufsused, nsfbufspeak, nsfbufs); } - totmem = nmbufs * MSIZE + nmbclusters * MCLBYTES; - totmemalloced = totspace[0] + totspace[1]; - totmemused = totused[0] * MSIZE + totused[1] * MCLBYTES; - printf( - "%lu KBytes allocated to network (%.1f%% in use, %.1f%% wired)\n", - totmem / 1024, totmemused * 100.0 / totmem, - totmemalloced * 100.0 / totmem); - printf("%lu requests for memory denied\n", mbstat->m_drops); - printf("%lu requests for memory delayed\n", mbstat->m_wait); + printf("%lu KBytes allocated to network\n", (mbstat->m_mbufs * MSIZE + + mbstat->m_mclusts * MCLBYTES) / 1024); printf("%lu requests for sfbufs denied\n", mbstat->sf_allocfail); printf("%lu requests for sfbufs delayed\n", mbstat->sf_allocwait); printf("%lu requests for I/O initiated by sendfile\n", @@ -366,9 +193,4 @@ free(seen); if (mbstat != NULL) free(mbstat); - if (mbpstat != NULL) { - if (mbpstat[0] != NULL) - free(mbpstat[0]); - free(mbpstat); - } } diff -ruN vendor/src/usr.bin/netstat/netstat.1 mbuma2/src/usr.bin/netstat/netstat.1 --- vendor/src/usr.bin/netstat/netstat.1 Tue May 25 11:29:39 2004 +++ mbuma2/src/usr.bin/netstat/netstat.1 Fri May 21 16:37:34 2004 @@ -181,7 +181,6 @@ .Bk -words .Nm .Fl m -.Op Fl c .Op Fl M Ar core .Op Fl N Ar system .Ek @@ -189,9 +188,6 @@ Show statistics recorded by the memory management routines .Pq Xr mbuf 9 . The network manages a private pool of memory buffers. -The -.Fl c -option shows per-CPU statistics for caching. .It Xo .Bk -words .Nm diff -ruN vendor/src/usr.bin/netstat/netstat.h mbuma2/src/usr.bin/netstat/netstat.h --- vendor/src/usr.bin/netstat/netstat.h Tue May 25 11:29:39 2004 +++ mbuma2/src/usr.bin/netstat/netstat.h Fri May 21 15:34:55 2004 @@ -39,7 +39,6 @@ extern int Aflag; /* show addresses of protocol control block */ extern int aflag; /* show all sockets (including servers) */ extern int bflag; /* show i/f total bytes in/out */ -extern int cflag; /* show mbuf cache information */ extern int dflag; /* show i/f dropped packets */ extern int gflag; /* show group (multicast) routing or stats */ extern int iflag; /* show interfaces */ diff -ruN vendor/src/usr.bin/systat/mbufs.c mbuma2/src/usr.bin/systat/mbufs.c --- vendor/src/usr.bin/systat/mbufs.c Tue May 25 14:18:07 2004 +++ mbuma2/src/usr.bin/systat/mbufs.c Tue May 25 15:31:43 2004 @@ -52,12 +52,9 @@ #include "systat.h" #include "extern.h" -static struct mbpstat **mbpstat; static struct mbstat *mbstat; -static int num_objs; static long *m_mbtypes; static short nmbtypes; -#define GENLST (num_objs - 1) static struct mtnames { short mt_type; @@ -101,20 +98,11 @@ showmbufs() { int i, j, max, idx; - u_long totfree; + u_long totmbufs; char buf[10]; const char *mtname; - totfree = mbpstat[GENLST]->mb_mbfree; - for (i = 1; i < nmbtypes; i++) - m_mbtypes[i] += mbpstat[GENLST]->mb_mbtypes[i]; - for (i = 0; i < GENLST; i++) { - if (mbpstat[i]->mb_active == 0) - continue; - totfree += mbpstat[i]->mb_mbfree; - for (j = 1; j < nmbtypes; j++) - m_mbtypes[j] += mbpstat[i]->mb_mbtypes[j]; - } + totmbufs = mbstat->m_mbufs; /* * Print totals for different mbuf types. @@ -159,16 +147,16 @@ /* * Print total number of free mbufs. */ - if (totfree > 0) { - mvwprintw(wnd, 1+j, 0, "%-10.10s", "free"); - if (totfree > 60) { - snprintf(buf, sizeof(buf), " %lu", totfree); - totfree = 60; - while(totfree--) + if (totmbufs > 0) { + mvwprintw(wnd, 1+j, 0, "%-10.10s", "Mbufs"); + if (totmbufs > 60) { + snprintf(buf, sizeof(buf), " %lu", totmbufs); + totmbufs = 60; + while(totmbufs--) waddch(wnd, 'X'); waddstr(wnd, buf); } else { - while(totfree--) + while(totmbufs--) waddch(wnd, 'X'); } wclrtoeol(wnd); @@ -198,23 +186,6 @@ return 0; } - if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &len, NULL, 0) < 0) { - error("sysctl getting mbpstat total size failed"); - return 0; - } - num_objs = (int)(len / sizeof(struct mbpstat)); - if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) { - error("calloc mbpstat pointers failed"); - return 0; - } - if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) { - error("calloc mbpstat structures failed"); - return 0; - } - - for (i = 0; i < num_objs; i++) - mbpstat[i] = mbpstat[0] + i; - return 1; } @@ -223,7 +194,7 @@ { size_t len; - len = num_objs * sizeof(struct mbpstat); - if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &len, NULL, 0) < 0) - printw("sysctl: mbpstat: %s", strerror(errno)); + len = sizeof *mbstat; + if (sysctlbyname("kern.ipc.mbstat", mbstat, &len, NULL, 0) < 0) + printw("sysctl: mbstat: %s", strerror(errno)); }