Index: src/sys/conf/files =================================================================== RCS file: /home/ncvs/src/sys/conf/files,v retrieving revision 1.534 diff -u -r1.534 files --- src/sys/conf/files 2001/06/11 12:38:50 1.534 +++ src/sys/conf/files 2001/06/21 17:47:01 @@ -796,6 +796,7 @@ kern/subr_eventhandler.c standard kern/subr_kobj.c standard kern/subr_log.c standard +kern/subr_mbuf.c standard kern/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_pcpu.c standard Index: src/sys/conf/param.c =================================================================== RCS file: /home/ncvs/src/sys/conf/param.c,v retrieving revision 1.40 diff -u -r1.40 param.c --- src/sys/conf/param.c 2000/10/29 16:57:27 1.40 +++ src/sys/conf/param.c 2001/06/21 17:47:01 @@ -64,17 +64,10 @@ #define MAXFILES (NPROC*2) #endif int maxproc = NPROC; /* maximum # of processes */ -int maxprocperuid = NPROC-1; /* maximum # of processes per user */ -int maxfiles = MAXFILES; /* system wide open files limit */ -int maxfilesperproc = MAXFILES; /* per-process open files limit */ +int maxprocperuid = NPROC-1; /* max # of procs per user */ +int maxfiles = MAXFILES; /* sys. wide open files limit */ +int maxfilesperproc = MAXFILES; /* per-proc open files limit */ int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */ -int mbuf_wait = 32; /* mbuf sleep time in ticks */ - -/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */ -#ifndef NSFBUFS -#define NSFBUFS (512 + MAXUSERS * 16) -#endif -int nsfbufs = NSFBUFS; /* * These may be set to nonzero here or by patching. Index: src/sys/kern/kern_malloc.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_malloc.c,v retrieving revision 1.88 diff -u -r1.88 kern_malloc.c --- src/sys/kern/kern_malloc.c 2001/06/08 05:24:16 1.88 +++ src/sys/kern/kern_malloc.c 2001/06/21 17:47:01 @@ -474,8 +474,15 @@ if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE)) vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE; + /* + * In mb_init(), we set up submaps for mbufs and clusters, in which + * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), + * respectively. Mathematically, this means that what we do here may + * amount to slightly more address space than we need for the submaps, + * but it never hurts to have an extra page in kmem_map. + */ npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * - sizeof(union mext_refcnt) + vm_kmem_size) / PAGE_SIZE; + sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, (vm_size_t)(npg * sizeof(struct kmemusage))); Index: src/sys/kern/subr_mbuf.c =================================================================== RCS file: subr_mbuf.c diff -N subr_mbuf.c --- /dev/null Thu Jun 21 22:06:36 2001 +++ subr_mbuf.c Thu Jun 21 10:53:31 2001 @@ -0,0 +1,1029 @@ +/* + * Copyright (c) 2001 + * Bosko Milekic . All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "opt_param.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Maximum number of PCPU containers. If you know what you're doing you could + * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your + * system during compilation, and thus prevent kernel structure bloats. + */ +#ifdef MBALLOC_NCPU +#define NCPU MBALLOC_NCPU +#else +#define NCPU MAXCPU +#endif + +/* + * The mbuf allocator is heavily based on Alfred Perlstein's + * (alfred@FreeBSD.org) "memcache" allocator which is itself based + * on concepts from several per-CPU memory allocators. The difference + * between this allocator and memcache is that, among other things: + * + * (i) We don't free back to the map from the free() routine - we leave the + * option of implementing lazy freeing (from a kproc) in the future. + * + * (ii) We allocate from separate sub-maps of kmem_map, thus limiting the + * maximum number of allocatable objects of a given type. Further, + * we handle blocking on a cv in the case that the map is starved and + * we have to rely solely on cached (circulating) objects. + * + * The mbuf allocator keeps all objects that it allocates in mb_buckets. + * The buckets keep a page worth of objects (an object can be an mbuf or an + * mbuf cluster) and facilitate moving larger sets of contiguous objects + * from the per-CPU lists to the main list for the given object. The buckets + * also have an added advantage in that after several moves from a per-CPU + * list to the main list and back to the per-CPU list, contiguous objects + * are kept together, thus trying to put the TLB cache to good use. + * + * The buckets are kept on singly-linked lists called "containers." A container + * is protected by a mutex lock in order to ensure consistency. The mutex lock + * itself is allocated seperately and attached to the container at boot time, + * thus allowing for certain containers to share the same mutex lock. Per-CPU + * containers for mbufs and mbuf clusters all share the same per-CPU + * lock whereas the "general system" containers (i.e. the "main lists") for + * these objects share one global lock. + * + */ +struct mb_bucket { + SLIST_ENTRY(mb_bucket) mb_blist; + int mb_owner; + int mb_numfree; + void *mb_free[0]; +}; + +struct mb_container { + SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead; + struct mtx *mc_lock; + int mc_numowner; + u_int mc_starved; + u_long *mc_objcount; + u_long *mc_numpgs; +}; + +struct mb_gen_list { + struct mb_container mb_cont; + struct cv mgl_mstarved; +}; + +struct mb_pcpu_list { + struct mb_container mb_cont; +}; + +/* + * Boot-time configurable object counts that will determine the maximum + * number of permitted objects in the mbuf and mcluster cases. In the + * ext counter (nmbcnt) case, it's just an indicator serving to scale + * kmem_map size properly - in other words, we may be allowed to allocate + * more than nmbcnt counters, whereas we will never be allowed to allocate + * more than nmbufs mbufs or nmbclusters mclusters. + * As for nsfbufs, it is used to indicate how many sendfile(2) buffers will be + * allocatable by the sfbuf allocator (found in uipc_syscalls.c) + */ +#ifndef NMBCLUSTERS +#define NMBCLUSTERS (1024 + MAXUSERS * 16) +#endif +#ifndef NMBUFS +#define NMBUFS (NMBCLUSTERS * 2) +#endif +#ifndef NSFBUFS +#define NSFBUFS (512 + MAXUSERS * 16) +#endif +#ifndef NMBCNTS +#define NMBCNTS (NMBCLUSTERS + NSFBUFS) +#endif +int nmbufs = NMBUFS; +int nmbclusters = NMBCLUSTERS; +int nmbcnt = NMBCNTS; +int nsfbufs = NSFBUFS; +TUNABLE_INT("kern.ipc.nmbufs", &nmbufs); +TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters); +TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt); +TUNABLE_INT("kern.ipc.nsfbufs", &nsfbufs); + +/* + * Perform sanity checks of tunables declared above. + */ +static void +tunable_mbinit(void *dummy) +{ + /* + * This has to be done before VM init. + */ + if (nmbufs < nmbclusters * 2) + nmbufs = nmbclusters * 2; + if (nmbcnt < nmbclusters + nsfbufs) + nmbcnt = nmbclusters + nsfbufs; + + return; +} +SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); + +/* + * The freelist structures and mutex locks. The number statically declared + * here depends on the number of CPUs. + * + * We setup in such a way that all the objects (mbufs, clusters) + * share the same mutex lock. It has been established that we do not benefit + * from different locks for different objects, so we use the same lock, + * regardless of object type. + */ +struct mb_lstmngr { + struct mb_gen_list *ml_genlist; + struct mb_pcpu_list *ml_cntlst[NCPU]; + struct mb_bucket **ml_btable; + vm_map_t ml_map; + vm_offset_t ml_mapbase; + vm_offset_t ml_maptop; + int ml_mapfull; + u_int ml_objsize; + u_int *ml_wmhigh; +}; +struct mb_lstmngr mb_list_mbuf, mb_list_clust; +struct mtx mbuf_gen, mbuf_pcpu[NCPU]; + +/* + * Local macros for internal allocator structure manipulations. + */ +#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[PCPU_GET(cpuid)] + +#define MB_GET_PCPU_LIST_NUM(mb_lst, num) (mb_lst)->ml_cntlst[(num)] + +#define MB_GET_GEN_LIST(mb_lst) (mb_lst)->ml_genlist + +#define MB_LOCK_CONT(mb_cnt) mtx_lock((mb_cnt)->mb_cont.mc_lock) + +#define MB_UNLOCK_CONT(mb_cnt) mtx_unlock((mb_cnt)->mb_cont.mc_lock) + +#define MB_BUCKET_INDX(mb_obj, mb_lst) \ + (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / PAGE_SIZE) + +#define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst) \ +{ \ + struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead); \ + \ + (mb_bckt)->mb_numfree--; \ + (mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)]; \ + (*((mb_lst)->mb_cont.mc_objcount))--; \ + if ((mb_bckt)->mb_numfree == 0) { \ + SLIST_REMOVE_HEAD(_mchd, mb_blist); \ + SLIST_NEXT((mb_bckt), mb_blist) = NULL; \ + (mb_bckt)->mb_owner |= MB_BUCKET_FREE; \ + } \ +} + +#define MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst) \ + (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp); \ + (mb_bckt)->mb_numfree++; \ + (*((mb_lst)->mb_cont.mc_objcount))++; + +/* + * Ownership of buckets/containers is represented by integers. The PCPU + * lists range from 0 to NCPU-1. We need a free numerical id for the general + * list (we use NCPU). We also need a non-conflicting free bit to indicate + * that the bucket is free and removed from a container, while not losing + * the bucket's originating container id. We use the highest bit + * for the free marker. + */ +#define MB_GENLIST_OWNER (NCPU) +#define MB_BUCKET_FREE (1 << (sizeof(int) * 8 - 1)) + +/* + * sysctl(8) exported objects + */ +struct mbstat mbstat; /* General stats + infos. */ +struct mbpstat mb_statpcpu[NCPU+1]; /* PCPU + Gen. container alloc stats */ +int mbuf_wait = 64; /* Sleep time for wait code (ticks) */ +u_int mbuf_limit = 512; /* Upper lim. on # of mbufs per CPU */ +u_int clust_limit = 128; /* Upper lim. on # of clusts per CPU */ +SYSCTL_DECL(_kern_ipc); +SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, + "Maximum number of mbuf clusters available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, + "Maximum number of mbufs available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, + "Number used to scale kmem_map to ensure sufficient space for counters"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0, + "Maximum number of sendfile(2) sf_bufs available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0, + "Sleep time of mbuf subsystem wait allocations during exhaustion"); +SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_limit, CTLFLAG_RW, &mbuf_limit, 0, + "Upper limit of number of mbufs allowed on each PCPU list"); +SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_limit, CTLFLAG_RW, &clust_limit, 0, + "Upper limit of number of mbuf clusters allowed on each PCPU list"); +SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, + "Mbuf general information and statistics"); +SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu, + sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics"); + +/* + * Prototypes of local allocator routines. + */ +static __inline void *mb_alloc(struct mb_lstmngr *, int); +void *mb_alloc_wait(struct mb_lstmngr *); +static __inline void mb_free(struct mb_lstmngr *, void *); +static void mb_init(void *); +struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int, + struct mb_pcpu_list *); +void mb_reclaim(void); + +/* + * Initial allocation numbers. Each parameter represents the number of buckets + * of each object that will be placed initially in each PCPU container for + * said object. + */ +#define NMB_MBUF_INIT 4 +#define NMB_CLUST_INIT 16 + +/* + * Initialize the mbuf subsystem. + * + * We sub-divide the kmem_map into several submaps; this way, we don't have + * to worry about artificially limiting the number of mbuf or mbuf cluster + * allocations, due to fear of one type of allocation "stealing" address + * space initially reserved for another. + * + * Setup both the general containers and all the PCPU containers. Populate + * the PCPU containers with initial numbers. + */ +MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures"); +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mb_init, NULL) +void +mb_init(void *dummy) +{ + struct mb_pcpu_list *pcpu_cnt; + vm_size_t mb_map_size; + int i, j; + + /* + * Setup all the submaps, for each type of object that we deal + * with in this allocator. + */ + mb_map_size = (vm_size_t)(nmbufs * MSIZE); + mb_map_size = rounddown(mb_map_size, PAGE_SIZE); + mb_list_mbuf.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE * + sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT); + if (mb_list_mbuf.ml_btable == NULL) + goto bad; + mb_list_mbuf.ml_map = kmem_suballoc(kmem_map,&(mb_list_mbuf.ml_mapbase), + &(mb_list_mbuf.ml_maptop), mb_map_size); + mb_list_mbuf.ml_mapfull = 0; + mb_list_mbuf.ml_objsize = MSIZE; + mb_list_mbuf.ml_wmhigh = &mbuf_limit; + + mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES); + mb_map_size = rounddown(mb_map_size, PAGE_SIZE); + mb_list_clust.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE + * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT); + if (mb_list_clust.ml_btable == NULL) + goto bad; + mb_list_clust.ml_map = kmem_suballoc(kmem_map, + &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop), + mb_map_size); + mb_list_clust.ml_mapfull = 0; + mb_list_clust.ml_objsize = MCLBYTES; + mb_list_clust.ml_wmhigh = &clust_limit; + + /* XXX XXX XXX: mbuf_map->system_map = clust_map->system_map = 1 */ + + /* + * Allocate required general (global) containers for each object type. + */ + mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF, + M_NOWAIT); + mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF, + M_NOWAIT); + if ((mb_list_mbuf.ml_genlist == NULL) || + (mb_list_clust.ml_genlist == NULL)) + goto bad; + + /* + * Initialize condition variables and general container mutex locks. + */ + mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", 0); + cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved"); + cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved), + "mcluster pool starved"); + mb_list_mbuf.ml_genlist->mb_cont.mc_lock = + mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen; + + /* + * Setup the general containers for each object. + */ + mb_list_mbuf.ml_genlist->mb_cont.mc_numowner = + mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER; + mb_list_mbuf.ml_genlist->mb_cont.mc_starved = + mb_list_clust.ml_genlist->mb_cont.mc_starved = 0; + mb_list_mbuf.ml_genlist->mb_cont.mc_objcount = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree); + mb_list_clust.ml_genlist->mb_cont.mc_objcount = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree); + mb_list_mbuf.ml_genlist->mb_cont.mc_numpgs = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbpgs); + mb_list_clust.ml_genlist->mb_cont.mc_numpgs = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_clpgs); + SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead)); + SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead)); + + /* + * Initialize general mbuf statistics + */ + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + + /* + * Allocate and initialize PCPU containers. + */ + for (i = 0; i < mp_ncpus; i++) { + mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list), + M_MBUF, M_NOWAIT); + mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list), + M_MBUF, M_NOWAIT); + if ((mb_list_mbuf.ml_cntlst[i] == NULL) || + (mb_list_clust.ml_cntlst[i] == NULL)) + goto bad; + + mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", 0); + mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock = + mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i]; + + mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner = + mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i; + mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved = + mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0; + mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount = + &(mb_statpcpu[i].mb_mbfree); + mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount = + &(mb_statpcpu[i].mb_clfree); + mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numpgs = + &(mb_statpcpu[i].mb_mbpgs); + mb_list_clust.ml_cntlst[i]->mb_cont.mc_numpgs = + &(mb_statpcpu[i].mb_clpgs); + + SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead)); + SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead)); + + /* + * Perform initial allocations. + */ + pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i); + MB_LOCK_CONT(pcpu_cnt); + for (j = 0; j < NMB_MBUF_INIT; j++) { + if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt) + == NULL) + goto bad; + } + MB_UNLOCK_CONT(pcpu_cnt); + + pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i); + MB_LOCK_CONT(pcpu_cnt); + for (j = 0; j < NMB_CLUST_INIT; j++) { + if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt) + == NULL) + goto bad; + } + MB_UNLOCK_CONT(pcpu_cnt); + } + + return; +bad: + panic("mb_init(): failed to initialize mbuf subsystem!"); +} + +/* + * Populate a given mbuf PCPU container with a bucket full of fresh new + * buffers. Return a pointer to the new bucket (already in the container if + * successful), or return NULL on failure. + * + * LOCKING NOTES: + * PCPU container lock must be held when this is called. + * The lock is dropped here so that we can cleanly call the underlying VM + * code. If we fail, we return with no locks held. If we succeed (i.e. return + * non-NULL), we return with the PCPU lock held, ready for allocation from + * the returned bucket. + */ +struct mb_bucket * +mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst) +{ + struct mb_bucket *bucket; + caddr_t p; + int i; + + MB_UNLOCK_CONT(cnt_lst); + /* + * If our object's (finite) map is starved now (i.e. no more address + * space), bail out now. + */ + if (mb_list->ml_mapfull) + return (NULL); + + bucket = malloc(sizeof(struct mb_bucket) + + PAGE_SIZE / mb_list->ml_objsize * sizeof(void *), M_MBUF, + how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); + if (bucket == NULL) + return (NULL); + + p = (caddr_t)kmem_malloc(mb_list->ml_map, PAGE_SIZE, + how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); + if (p == NULL) { + free(bucket, M_MBUF); + return (NULL); + } + + bucket->mb_numfree = 0; + mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket; + for (i = 0; i < (PAGE_SIZE / mb_list->ml_objsize); i++) { + bucket->mb_free[i] = p; + bucket->mb_numfree++; + p += mb_list->ml_objsize; + } + + MB_LOCK_CONT(cnt_lst); + bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; + SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist); + (*(cnt_lst->mb_cont.mc_numpgs))++; + *(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree; + + return (bucket); +} + +/* + * Allocate an mbuf-subsystem type object. + * The general case is very easy. Complications only arise if our PCPU + * container is empty. Things get worse if the PCPU container is empty, + * the general container is empty, and we've run out of address space + * in our map; then we try to block if we're willing to (M_TRYWAIT). + */ +static __inline +void * +mb_alloc(struct mb_lstmngr *mb_list, int how) +{ + struct mb_pcpu_list *cnt_lst; + struct mb_bucket *bucket; + void *m; + + m = NULL; + cnt_lst = MB_GET_PCPU_LIST(mb_list); + MB_LOCK_CONT(cnt_lst); + + if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) { + /* + * This is the easy allocation case. We just grab an object + * from a bucket in the PCPU container. At worst, we + * have just emptied the bucket and so we remove it + * from the container. + */ + MB_GET_OBJECT(m, bucket, cnt_lst); + MB_UNLOCK_CONT(cnt_lst); + } else { + struct mb_gen_list *gen_list; + + /* + * This is the less-common more difficult case. We must + * first verify if the general list has anything for us + * and if that also fails, we must allocate a page from + * the map and create a new bucket to place in our PCPU + * container (already locked). If the map is starved then + * we're really in for trouble, as we have to wait on + * the general container's condition variable. + */ + gen_list = MB_GET_GEN_LIST(mb_list); + MB_LOCK_CONT(gen_list); + + if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) + != NULL) { + /* + * Give ownership of the bucket to our CPU's + * container, but only actually put the bucket + * in the container if it doesn't become free + * upon removing an mbuf from it. + */ + SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead), + mb_blist); + bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; + (*(gen_list->mb_cont.mc_numpgs))--; + (*(cnt_lst->mb_cont.mc_numpgs))++; + *(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree; + bucket->mb_numfree--; + m = bucket->mb_free[(bucket->mb_numfree)]; + if (bucket->mb_numfree == 0) { + SLIST_NEXT(bucket, mb_blist) = NULL; + bucket->mb_owner |= MB_BUCKET_FREE; + } else { + SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), + bucket, mb_blist); + *(cnt_lst->mb_cont.mc_objcount) += + bucket->mb_numfree; + } + MB_UNLOCK_CONT(gen_list); + MB_UNLOCK_CONT(cnt_lst); + } else { + /* + * We'll have to allocate a new page. + */ + MB_UNLOCK_CONT(gen_list); + bucket = mb_pop_cont(mb_list, how, cnt_lst); + if (bucket != NULL) { + bucket->mb_numfree--; + m = bucket->mb_free[(bucket->mb_numfree)]; + (*(cnt_lst->mb_cont.mc_objcount))--; + MB_UNLOCK_CONT(cnt_lst); + } else { + if (how == M_TRYWAIT) { + /* + * Absolute worst-case scenario. We block if + * we're willing to, but only after trying to + * steal from other lists. + */ + mb_list->ml_mapfull = 1; + m = mb_alloc_wait(mb_list); + } else + /* XXX: No consistency. */ + mbstat.m_drops++; + } + } + } + + return (m); +} + +/* + * This is the worst-case scenario called only if we're allocating with + * M_TRYWAIT. We first drain all the protocols, then try to find an mbuf + * by looking in every PCPU container. If we're still unsuccesful, we + * try the general container one last time and possibly block on our + * starved cv. + */ +void * +mb_alloc_wait(struct mb_lstmngr *mb_list) +{ + struct mb_pcpu_list *cnt_lst; + struct mb_gen_list *gen_list; + struct mb_bucket *bucket; + void *m; + int i, cv_ret; + + /* + * Try to reclaim mbuf-related objects (mbufs, clusters). + */ + mb_reclaim(); + + /* + * Cycle all the PCPU containers. Increment starved counts if found + * empty. + */ + for (i = 0; i < mp_ncpus; i++) { + cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i); + MB_LOCK_CONT(cnt_lst); + + /* + * If container is non-empty, get a single object from it. + * If empty, increment starved count. + */ + if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != + NULL) { + MB_GET_OBJECT(m, bucket, cnt_lst); + MB_UNLOCK_CONT(cnt_lst); + mbstat.m_wait++; /* XXX: No consistency. */ + return (m); + } else + cnt_lst->mb_cont.mc_starved++; + + MB_UNLOCK_CONT(cnt_lst); + } + + /* + * We're still here, so that means it's time to get the general + * container lock, check it one more time (now that mb_reclaim() + * has been called) and if we still get nothing, block on the cv. + */ + gen_list = MB_GET_GEN_LIST(mb_list); + MB_LOCK_CONT(gen_list); + if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) { + MB_GET_OBJECT(m, bucket, gen_list); + MB_UNLOCK_CONT(gen_list); + mbstat.m_wait++; /* XXX: No consistency. */ + return (m); + } + + gen_list->mb_cont.mc_starved++; + cv_ret = cv_timedwait(&(gen_list->mgl_mstarved), + gen_list->mb_cont.mc_lock, mbuf_wait); + gen_list->mb_cont.mc_starved--; + + if ((cv_ret == 0) && + ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) { + MB_GET_OBJECT(m, bucket, gen_list); + mbstat.m_wait++; /* XXX: No consistency. */ + } else { + mbstat.m_drops++; /* XXX: No consistency. */ + m = NULL; + } + + MB_UNLOCK_CONT(gen_list); + + return (m); +} + +/* + * Free an object to its rightful container. + * In the very general case, this operation is really very easy. + * Complications arise primarily if: + * (a) We've hit the high limit on number of free objects allowed in + * our PCPU container. + * (b) We're in a critical situation where our container has been + * marked 'starved' and we need to issue wakeups on the starved + * condition variable. + * (c) Minor (odd) cases: our bucket has migrated while we were + * waiting for the lock; our bucket is in the general container; + * our bucket is empty. + */ +static __inline +void +mb_free(struct mb_lstmngr *mb_list, void *m) +{ + struct mb_pcpu_list *cnt_lst; + struct mb_gen_list *gen_list; + struct mb_bucket *bucket; + u_int owner; + + bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)]; + + /* + * Make sure that if after we lock the bucket's present container the + * bucket has migrated, that we drop the lock and get the new one. + */ +retry_lock: + owner = bucket->mb_owner & ~MB_BUCKET_FREE; + switch (owner) { + case MB_GENLIST_OWNER: + gen_list = MB_GET_GEN_LIST(mb_list); + MB_LOCK_CONT(gen_list); + if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) { + MB_UNLOCK_CONT(gen_list); + goto retry_lock; + } + + /* + * If we're intended for the general container, this is + * real easy: no migrating required. The only `bogon' + * is that we're now contending with all the threads + * dealing with the general list, but this is expected. + */ + MB_PUT_OBJECT(m, bucket, gen_list); + if (gen_list->mb_cont.mc_starved > 0) + cv_signal(&(gen_list->mgl_mstarved)); + MB_UNLOCK_CONT(gen_list); + break; + + default: + cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner); + MB_LOCK_CONT(cnt_lst); + if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) { + MB_UNLOCK_CONT(cnt_lst); + goto retry_lock; + } + + MB_PUT_OBJECT(m, bucket, cnt_lst); + + if (cnt_lst->mb_cont.mc_starved > 0) { + /* + * This is a tough case. It means that we've + * been flagged at least once to indicate that + * we're empty, and that the system is in a critical + * situation, so we ought to migrate at least one + * bucket over to the general container. + * There may or may not be a thread blocking on + * the starved condition variable, but chances + * are that one will eventually come up soon so + * it's better to migrate now than never. + */ + gen_list = MB_GET_GEN_LIST(mb_list); + MB_LOCK_CONT(gen_list); + KASSERT((bucket->mb_owner & MB_BUCKET_FREE) != 0, + ("mb_free: corrupt bucket %p\n", bucket)); + SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead), + bucket, mb_blist); + bucket->mb_owner = MB_GENLIST_OWNER; + (*(cnt_lst->mb_cont.mc_objcount))--; + (*(gen_list->mb_cont.mc_objcount))++; + (*(cnt_lst->mb_cont.mc_numpgs))--; + (*(gen_list->mb_cont.mc_numpgs))++; + + /* + * Determine whether or not to keep transferring + * buckets to the general list or whether we've + * transferred enough already. + * We realize that although we may flag another + * bucket to be migrated to the general container + * that in the meantime, the thread that was + * blocked on the cv is already woken up and + * long gone. But in that case, the worst + * consequence is that we will end up migrating + * one bucket too many, which is really not a big + * deal, especially if we're close to a critical + * situation. + */ + if (gen_list->mb_cont.mc_starved > 0) { + cnt_lst->mb_cont.mc_starved--; + cv_signal(&(gen_list->mgl_mstarved)); + } else + cnt_lst->mb_cont.mc_starved = 0; + + MB_UNLOCK_CONT(gen_list); + MB_UNLOCK_CONT(cnt_lst); + break; + } + + if (*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) { + /* + * We've hit the high limit of allowed numbers of mbufs + * on this PCPU list. We must now migrate a bucket + * over to the general container. + */ + gen_list = MB_GET_GEN_LIST(mb_list); + MB_LOCK_CONT(gen_list); + if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) { + bucket = + SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead)); + SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead), + mb_blist); + } + SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead), + bucket, mb_blist); + bucket->mb_owner = MB_GENLIST_OWNER; + *(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree; + *(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree; + (*(cnt_lst->mb_cont.mc_numpgs))--; + (*(gen_list->mb_cont.mc_numpgs))++; + + MB_UNLOCK_CONT(gen_list); + MB_UNLOCK_CONT(cnt_lst); + break; + } + + if (bucket->mb_owner & MB_BUCKET_FREE) { + SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), + bucket, mb_blist); + bucket->mb_owner = cnt_lst->mb_cont.mc_numowner; + } + + MB_UNLOCK_CONT(cnt_lst); + break; + } + + return; +} + +/* + * Drain protocols in hopes to free up some resources. + * + * LOCKING NOTES: + * No locks should be held when this is called. The drain routines have to + * presently acquire some locks which raises the possibility of lock order + * violation if we're holding any mutex if that mutex is acquired in reverse + * order relative to one of the locks in the drain routines. + */ +void +mb_reclaim(void) +{ + struct domain *dp; + struct protosw *pr; + +/* + * XXX: Argh, we almost always trip here with witness turned on now-a-days + * XXX: because we often come in with Giant held. For now, there's no way + * XXX: to avoid this. + */ +#ifdef WITNESS + KASSERT(witness_list(curproc) == 0, + ("mb_reclaim() called with locks held")); +#endif + + mbstat.m_drain++; /* XXX: No consistency. */ + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain) + (*pr->pr_drain)(); + +} + +/* + * Local mbuf & cluster alloc macros and routines. + * Local macro and function names begin with an underscore ("_"). + */ +void _mext_free(struct mbuf *); +void _mclfree(struct mbuf *); + +#define _m_get(m, how, type) do { \ + (m) = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how)); \ + if ((m) != NULL) { \ + (m)->m_type = (type); \ + (m)->m_next = NULL; \ + (m)->m_nextpkt = NULL; \ + (m)->m_data = (m)->m_dat; \ + (m)->m_flags = 0; \ + } \ +} while (0) + +#define _m_gethdr(m, how, type) do { \ + (m) = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how)); \ + if ((m) != NULL) { \ + (m)->m_type = (type); \ + (m)->m_next = NULL; \ + (m)->m_nextpkt = NULL; \ + (m)->m_data = (m)->m_pktdat; \ + (m)->m_flags = M_PKTHDR; \ + (m)->m_pkthdr.rcvif = NULL; \ + (m)->m_pkthdr.csum_flags = 0; \ + (m)->m_pkthdr.aux = NULL; \ + } \ +} while (0) + +/* XXX: Check for M_PKTHDR && m_pkthdr.aux is bogus... please fix (see KAME) */ +#define _m_free(m, n) do { \ + (n) = (m)->m_next; \ + if ((m)->m_flags & M_EXT) \ + MEXTFREE((m)); \ + if (((m)->m_flags & M_PKTHDR) != 0 && (m)->m_pkthdr.aux) { \ + m_freem((m)->m_pkthdr.aux); \ + (m)->m_pkthdr.aux = NULL; \ + } \ + mb_free(&mb_list_mbuf, (m)); \ +} while (0) + +#define _mext_init_ref(m) do { \ + (m)->m_ext.ref_cnt = malloc(sizeof(u_int), M_MBUF, M_NOWAIT); \ + if ((m)->m_ext.ref_cnt != NULL) { \ + *((m)->m_ext.ref_cnt) = 0; \ + MEXT_ADD_REF((m)); \ + } \ +} while (0) + +#define _mext_dealloc_ref(m) \ + free((m)->m_ext.ref_cnt, M_MBUF) + +void +_mext_free(struct mbuf *mb) +{ + + if (mb->m_ext.ext_type == EXT_CLUSTER) + mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf); + else + (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args); + + _mext_dealloc_ref(mb); + return; +} + +/* We only include this here to avoid making m_clget() excessively large + * due to too much inlined code. */ +void +_mclfree(struct mbuf *mb) +{ + + mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf); + mb->m_ext.ext_buf = NULL; + return; +} + +/* + * Exported space allocation and de-allocation routines. + */ +struct mbuf * +m_get(int how, int type) +{ + struct mbuf *mb; + + _m_get(mb, how, type); + return (mb); +} + +struct mbuf * +m_gethdr(int how, int type) +{ + struct mbuf *mb; + + _m_gethdr(mb, how, type); + return (mb); +} + +struct mbuf * +m_get_clrd(int how, int type) +{ + struct mbuf *mb; + + _m_get(mb, how, type); + + if (mb != NULL) + bzero(mtod(mb, caddr_t), MLEN); + + return (mb); +} + +struct mbuf * +m_gethdr_clrd(int how, int type) +{ + struct mbuf *mb; + + _m_gethdr(mb, how, type); + + if (mb != NULL) + bzero(mtod(mb, caddr_t), MHLEN); + + return (mb); +} + +struct mbuf * +m_free(struct mbuf *mb) +{ + struct mbuf *nb; + + _m_free(mb, nb); + return (nb); +} + +void +m_clget(struct mbuf *mb, int how) +{ + + mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, how); + if (mb->m_ext.ext_buf != NULL) { + _mext_init_ref(mb); + if (mb->m_ext.ref_cnt == NULL) + _mclfree(mb); + else { + mb->m_data = mb->m_ext.ext_buf; + mb->m_flags |= M_EXT; + mb->m_ext.ext_free = NULL; + mb->m_ext.ext_args = NULL; + mb->m_ext.ext_size = MCLBYTES; + mb->m_ext.ext_type = EXT_CLUSTER; + } + } + return; +} + +void +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(caddr_t, void *), void *args, short flags, int type) +{ + + _mext_init_ref(mb); + if (mb->m_ext.ref_cnt != NULL) { + mb->m_flags |= (M_EXT | flags); + mb->m_ext.ext_buf = buf; + mb->m_data = mb->m_ext.ext_buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_args = args; + mb->m_ext.ext_type = type; + } + return; +} Index: src/sys/kern/uipc_mbuf.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.85 diff -u -r1.85 uipc_mbuf.c --- src/sys/kern/uipc_mbuf.c 2001/06/20 19:48:14 1.85 +++ src/sys/kern/uipc_mbuf.c 2001/06/21 17:47:01 @@ -37,50 +37,20 @@ #include "opt_param.h" #include #include -#include #include #include #include #include -#include #include #include #include -#include -#include -#include - -#ifndef NMBCLUSTERS -#define NMBCLUSTERS (512 + MAXUSERS * 16) -#endif - -static void mbinit(void *); -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) - -struct mbuf *mbutl; -struct mbstat mbstat; -u_long mbtypes[MT_NTYPES]; int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; -int nmbclusters = NMBCLUSTERS; -int nmbufs = NMBCLUSTERS * 4; -int nmbcnt; -u_long m_mballoc_wid = 0; -u_long m_clalloc_wid = 0; /* - * freelist header structures... - * mbffree_lst, mclfree_lst, mcntfree_lst - */ -struct mbffree_lst mmbfree; -struct mclfree_lst mclfree; -struct mcntfree_lst mcntfree; -struct mtx mbuf_mtx; - -/* * sysctl(8) exported objects */ SYSCTL_DECL(_kern_ipc); @@ -91,421 +61,8 @@ SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, &max_datalen, 0, ""); -SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, - &mbuf_wait, 0, ""); -SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); -SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, - sizeof(mbtypes), "LU", ""); -SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, - &nmbclusters, 0, "Maximum number of mbuf clusters available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, - "Maximum number of mbufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, - "Maximum number of ext_buf counters available"); - -TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters); -TUNABLE_INT("kern.ipc.nmbufs", &nmbufs); -TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt); - -static void m_reclaim(void); - -/* Initial allocation numbers */ -#define NCL_INIT 2 -#define NMB_INIT 16 -#define REF_INIT NMBCLUSTERS - -static void -tunable_mbinit(void *dummy) -{ - - /* - * Sanity checks and pre-initialization for non-constants. - * This has to be done before VM initialization. - */ - if (nmbufs < nmbclusters * 2) - nmbufs = nmbclusters * 2; - if (nmbcnt == 0) - nmbcnt = EXT_COUNTERS; -} -SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); - -/* - * Full mbuf subsystem initialization done here. - * - * XXX: If ever we have system specific map setups to do, then move them to - * machdep.c - for now, there is no reason for this stuff to go there. - */ -static void -mbinit(void *dummy) -{ - vm_offset_t maxaddr; - vm_size_t mb_map_size; - - /* - * Setup the mb_map, allocate requested VM space. - */ - mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES + - nmbcnt * sizeof(union mext_refcnt)); - mb_map_size = rounddown(mb_map_size, PAGE_SIZE); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, - mb_map_size); - /* XXX XXX XXX: mb_map->system_map = 1; */ - - /* - * Initialize the free list headers, and setup locks for lists. - */ - mmbfree.m_head = NULL; - mclfree.m_head = NULL; - mcntfree.m_head = NULL; - mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF); - cv_init(&mmbfree.m_starved, "mbuf free list starved cv"); - cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv"); - - /* - * Initialize mbuf subsystem (sysctl exported) statistics structure. - */ - mbstat.m_msize = MSIZE; - mbstat.m_mclbytes = MCLBYTES; - mbstat.m_minclsize = MINCLSIZE; - mbstat.m_mlen = MLEN; - mbstat.m_mhlen = MHLEN; - - /* - * Perform some initial allocations. - */ - mtx_lock(&mbuf_mtx); - if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) - goto bad; - if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) - goto bad; - if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) - goto bad; - mtx_unlock(&mbuf_mtx); - - return; -bad: - panic("mbinit: failed to initialize mbuf subsystem!"); -} - -/* - * Allocate at least nmb reference count structs and place them - * on the ref cnt free list. - * - * Must be called with the mcntfree lock held. - */ -int -m_alloc_ref(u_int nmb, int how) -{ - caddr_t p; - u_int nbytes; - int i; - - /* - * We don't cap the amount of memory that can be used - * by the reference counters, like we do for mbufs and - * mbuf clusters. In fact, we're absolutely sure that we - * won't ever be going over our allocated space. We keep enough - * space in mb_map to accomodate maximum values of allocatable - * external buffers including, but not limited to, clusters. - * (That's also why we won't have to have wait routines for - * counters). - * - * If we're in here, we're absolutely certain to be returning - * succesfully, as long as there is physical memory to accomodate - * us. And if there isn't, but we're willing to wait, then - * kmem_malloc() will do the only waiting needed. - */ - - nbytes = round_page(nmb * sizeof(union mext_refcnt)); - if (1 /* XXX: how == M_TRYWAIT */) - mtx_unlock(&mbuf_mtx); - if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? - M_WAITOK : M_NOWAIT)) == NULL) { - if (1 /* XXX: how == M_TRYWAIT */) - mtx_lock(&mbuf_mtx); - return (0); - } - nmb = nbytes / sizeof(union mext_refcnt); - - /* - * We don't let go of the mutex in order to avoid a race. - * It is up to the caller to let go of the mutex. - */ - if (1 /* XXX: how == M_TRYWAIT */) - mtx_lock(&mbuf_mtx); - for (i = 0; i < nmb; i++) { - ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; - mcntfree.m_head = (union mext_refcnt *)p; - p += sizeof(union mext_refcnt); - mbstat.m_refree++; - } - mbstat.m_refcnt += nmb; - - return (1); -} - -/* - * Allocate at least nmb mbufs and place on mbuf free list. - * - * Must be called with the mmbfree lock held. - */ -int -m_mballoc(int nmb, int how) -{ - caddr_t p; - int i; - int nbytes; - - nbytes = round_page(nmb * MSIZE); - nmb = nbytes / MSIZE; - - /* - * If we've hit the mbuf limit, stop allocating from mb_map. - * Also, once we run out of map space, it will be impossible to - * get any more (nothing is ever freed back to the map). - */ - if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) - return (0); - - if (1 /* XXX: how == M_TRYWAIT */) - mtx_unlock(&mbuf_mtx); - p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ? - M_WAITOK : M_NOWAIT); - if (1 /* XXX: how == M_TRYWAIT */) { - mtx_lock(&mbuf_mtx); - if (p == NULL) - mbstat.m_wait++; - } - - /* - * Either the map is now full, or `how' is M_DONTWAIT and there - * are no pages left. - */ - if (p == NULL) - return (0); - - /* - * We don't let go of the mutex in order to avoid a race. - * It is up to the caller to let go of the mutex when done - * with grabbing the mbuf from the free list. - */ - for (i = 0; i < nmb; i++) { - ((struct mbuf *)p)->m_next = mmbfree.m_head; - mmbfree.m_head = (struct mbuf *)p; - p += MSIZE; - } - mbstat.m_mbufs += nmb; - mbtypes[MT_FREE] += nmb; - return (1); -} - -/* - * Once the mb_map has been exhausted and if the call to the allocation macros - * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to - * rely solely on reclaimed mbufs. - * - * Here we request for the protocols to free up some resources and, if we - * still cannot get anything, then we wait for an mbuf to be freed for a - * designated (mbuf_wait) time, at most. - * - * Must be called with the mmbfree mutex held. - */ -struct mbuf * -m_mballoc_wait(void) -{ - struct mbuf *p = NULL; - - /* - * See if we can drain some resources out of the protocols. - * We drop the mmbfree mutex to avoid recursing into it in some of - * the drain routines. Clearly, we're faced with a race here because - * once something is freed during the drain, it may be grabbed right - * from under us by some other thread. But we accept this possibility - * in order to avoid a potentially large lock recursion and, more - * importantly, to avoid a potential lock order reversal which may - * result in deadlock (See comment above m_reclaim()). - */ - mtx_unlock(&mbuf_mtx); - m_reclaim(); - - mtx_lock(&mbuf_mtx); - _MGET(p, M_DONTWAIT); - - if (p == NULL) { - int retval; - m_mballoc_wid++; - retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx, - mbuf_wait); - m_mballoc_wid--; - - /* - * If we got signaled (i.e. didn't time out), allocate. - */ - if (retval == 0) - _MGET(p, M_DONTWAIT); - } - - if (p != NULL) { - mbstat.m_wait++; - if (mmbfree.m_head != NULL) - MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); - } - - return (p); -} - -/* - * Allocate some number of mbuf clusters - * and place on cluster free list. - * - * Must be called with the mclfree lock held. - */ -int -m_clalloc(int ncl, int how) -{ - caddr_t p; - int i; - int npg_sz; - - npg_sz = round_page(ncl * MCLBYTES); - ncl = npg_sz / MCLBYTES; - - /* - * If the map is now full (nothing will ever be freed to it). - * If we've hit the mcluster number limit, stop allocating from - * mb_map. - */ - if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) - return (0); - - if (1 /* XXX: how == M_TRYWAIT */) - mtx_unlock(&mbuf_mtx); - p = (caddr_t)kmem_malloc(mb_map, npg_sz, - how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); - if (1 /* XXX: how == M_TRYWAIT */) - mtx_lock(&mbuf_mtx); - - /* - * Either the map is now full, or `how' is M_DONTWAIT and there - * are no pages left. - */ - if (p == NULL) - return (0); - - for (i = 0; i < ncl; i++) { - ((union mcluster *)p)->mcl_next = mclfree.m_head; - mclfree.m_head = (union mcluster *)p; - p += MCLBYTES; - mbstat.m_clfree++; - } - mbstat.m_clusters += ncl; - return (1); -} - -/* - * Once the mb_map submap has been exhausted and the allocation is called with - * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will - * block on a cv for a designated amount of time (mbuf_wait) or until we're - * signaled due to sudden mcluster availability. - * - * Must be called with the mclfree lock held. - */ -caddr_t -m_clalloc_wait(void) -{ - caddr_t p = NULL; - int retval; - - m_clalloc_wid++; - retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait); - m_clalloc_wid--; - - /* - * Now that we (think) that we've got something, try again. - */ - if (retval == 0) - _MCLALLOC(p, M_DONTWAIT); - - if (p != NULL) { - mbstat.m_wait++; - if (mclfree.m_head != NULL) - MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); - } - - return (p); -} - -/* - * m_reclaim: drain protocols in hopes to free up some resources... - * - * XXX: No locks should be held going in here. The drain routines have - * to presently acquire some locks which raises the possibility of lock - * order violation if we're holding any mutex if that mutex is acquired in - * reverse order relative to one of the locks in the drain routines. - */ -static void -m_reclaim(void) -{ - struct domain *dp; - struct protosw *pr; - -#ifdef WITNESS - KASSERT(witness_list(curproc) == 0, - ("m_reclaim called with locks held")); -#endif - - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain) - (*pr->pr_drain)(); - mbstat.m_drain++; -} - /* - * Space allocation routines. - * Some of these are also available as macros - * for critical paths. - */ -struct mbuf * -m_get(int how, int type) -{ - struct mbuf *m; - - MGET(m, how, type); - return (m); -} - -struct mbuf * -m_gethdr(int how, int type) -{ - struct mbuf *m; - - MGETHDR(m, how, type); - return (m); -} - -struct mbuf * -m_getclr(int how, int type) -{ - struct mbuf *m; - - MGET(m, how, type); - if (m != NULL) - bzero(mtod(m, caddr_t), MLEN); - return (m); -} - -struct mbuf * -m_free(struct mbuf *m) -{ - struct mbuf *n; - - MFREE(m, n); - return (n); -} - -/* * struct mbuf * * m_getm(m, len, how, type) * @@ -672,17 +229,13 @@ m = m->m_next; np = &n->m_next; } - if (top == NULL) { - mtx_lock(&mbuf_mtx); - mbstat.m_mcfail++; - mtx_unlock(&mbuf_mtx); - } + if (top == NULL) + mbstat.m_mcfail++; /* XXX: No consistency. */ + return (top); nospace: m_freem(top); - mtx_lock(&mbuf_mtx); - mbstat.m_mcfail++; - mtx_unlock(&mbuf_mtx); + mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -741,9 +294,7 @@ return top; nospace: m_freem(top); - mtx_lock(&mbuf_mtx); - mbstat.m_mcfail++; - mtx_unlock(&mbuf_mtx); + mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -844,9 +395,7 @@ nospace: m_freem(top); - mtx_lock(&mbuf_mtx); - mbstat.m_mcfail++; - mtx_unlock(&mbuf_mtx); + mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -1008,9 +557,7 @@ return (m); bad: m_freem(n); - mtx_lock(&mbuf_mtx); - mbstat.m_mpfail++; - mtx_unlock(&mbuf_mtx); + mbstat.m_mpfail++; /* XXX: No consistency. */ return (NULL); } @@ -1157,7 +704,7 @@ off -= mlen; totlen += mlen; if (m->m_next == NULL) { - n = m_getclr(M_DONTWAIT, m->m_type); + n = m_get_clrd(M_DONTWAIT, m->m_type); if (n == NULL) goto out; n->m_len = min(MLEN, len + off); Index: src/sys/sys/mbuf.h =================================================================== RCS file: /home/ncvs/src/sys/sys/mbuf.h,v retrieving revision 1.81 diff -u -r1.81 mbuf.h --- src/sys/sys/mbuf.h 2001/06/13 18:53:53 1.81 +++ src/sys/sys/mbuf.h 2001/06/21 17:47:01 @@ -37,42 +37,20 @@ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ -#ifdef _KERNEL -#include /* XXX */ -#include -#include -#endif /* _KERNEL */ - /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size * MCLBYTES (also in machine/param.h), which has no additional overhead * and is used instead of the internal data area; this is done when - * at least MINCLSIZE of data must be stored. + * at least MINCLSIZE of data must be stored. Additionally, it is possible + * to allocate a separate buffer externally and attach it to the mbuf in + * a way similar to that of mbuf clusters. */ - #define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ #define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ - #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ -/* - * Maximum number of allocatable counters for external buffers. This - * ensures enough VM address space for the allocation of counters - * in the extreme case where all possible external buffers are allocated. - * - * Note: When new types of external storage are allocated, EXT_COUNTERS - * must be tuned accordingly. Practically, this isn't a big deal - * as each counter is only a word long, so we can fit - * (PAGE_SIZE / length of word) counters in a single page. - * - * XXX: Must increase this if using any of if_ti, if_wb, if_sk drivers, - * or any other drivers which may manage their own buffers and - * eventually attach them to mbufs. - */ -#define EXT_COUNTERS (nmbclusters + nsfbufs) - #ifdef _KERNEL /* * Macros for type conversion @@ -83,7 +61,9 @@ #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) #endif /* _KERNEL */ -/* header at beginning of each mbuf: */ +/* + * Header present at the beginning of every mbuf. + */ struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt; /* next chain in queue/record */ @@ -93,7 +73,9 @@ short mh_flags; /* flags; see below */ }; -/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */ +/* + * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. + */ struct pkthdr { struct ifnet *rcvif; /* rcv interface */ int len; /* total packet length */ @@ -105,17 +87,23 @@ struct mbuf *aux; /* extra data buffer; ipsec/others */ }; -/* description of external storage mapped into mbuf, valid if M_EXT set */ +/* + * Description of external storage mapped into mbuf; valid only if M_EXT is set. + */ struct m_ext { caddr_t ext_buf; /* start of buffer */ void (*ext_free) /* free routine if not the usual */ (caddr_t, void *); void *ext_args; /* optional argument pointer */ u_int ext_size; /* size of buffer, for ext_free */ - union mext_refcnt *ref_cnt; /* pointer to ref count info */ + u_int *ref_cnt; /* pointer to ref count info */ int ext_type; /* type of external storage */ }; +/* + * The core of the mbuf object along with some shortcut defines for + * practical purposes. + */ struct mbuf { struct m_hdr m_hdr; union { @@ -141,7 +129,9 @@ #define m_pktdat M_dat.MH.MH_dat.MH_databuf #define m_dat M_dat.M_databuf -/* mbuf flags */ +/* + * mbuf flags + */ #define M_EXT 0x0001 /* has associated external storage */ #define M_PKTHDR 0x0002 /* start of record */ #define M_EOR 0x0004 /* end of record */ @@ -152,24 +142,32 @@ #define M_PROTO4 0x0080 /* protocol-specific */ #define M_PROTO5 0x0100 /* protocol-specific */ -/* mbuf pkthdr flags, also in m_flags */ +/* + * mbuf pkthdr flags (also stored in m_flags) + */ #define M_BCAST 0x0200 /* send/received as link-level broadcast */ #define M_MCAST 0x0400 /* send/received as link-level multicast */ #define M_FRAG 0x0800 /* packet is a fragment of a larger packet */ #define M_FIRSTFRAG 0x1000 /* packet is first fragment */ #define M_LASTFRAG 0x2000 /* packet is last fragment */ -/* external buffer types: identify ext_buf type */ +/* + * External buffer types: identify ext_buf type + */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ -/* flags copied when copying m_pkthdr */ +/* + * Flags copied when copying m_pkthdr + */ #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \ M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY) -/* flags indicating hw checksum support and sw checksum requirements */ +/* + * Flags indicating hw checksum support and sw checksum requirements + */ #define CSUM_IP 0x0001 /* will csum IP */ #define CSUM_TCP 0x0002 /* will csum TCP */ #define CSUM_UDP 0x0004 /* will csum UDP */ @@ -184,7 +182,9 @@ #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ -/* mbuf types */ +/* + * mbuf types + */ #define MT_FREE 0 /* should be on free list */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER 2 /* packet header */ @@ -208,18 +208,27 @@ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ +/* + * Mbuf and cluster allocation statistics PCPU structure. + */ +struct mbpstat { + u_long mb_mbfree; + u_long mb_mbpgs; + u_long mb_clfree; + u_long mb_clpgs; +}; + /* - * mbuf statistics + * General mbuf statistics structure. + * XXX: Modifications of these are not protected by any mutex locks nor by + * any atomic() manipulations. As a result, we may occasionally lose + * a count or two. Luckily, not all of these fields are modified at all + * and remain static, and those that are manipulated are only manipulated + * in failure situations, which do not occur (hopefully) very often. */ struct mbstat { - u_long m_mbufs; /* # mbufs obtained from page pool */ - u_long m_clusters; /* # clusters obtained from page pool */ - u_long m_clfree; /* # clusters on freelist (cache) */ - u_long m_refcnt; /* # ref counters obtained from page pool */ - u_long m_refree; /* # ref counters on freelist (cache) */ - u_long m_spare; /* spare field */ - u_long m_drops; /* times failed to find space */ - u_long m_wait; /* times waited for space */ + u_long m_drops; /* times failed to allocate */ + u_long m_wait; /* times succesfully returned from wait */ u_long m_drain; /* times drained protocols for space */ u_long m_mcfail; /* times m_copym failed */ u_long m_mpfail; /* times m_pullup failed */ @@ -230,329 +239,75 @@ u_long m_mhlen; /* length of data in a header mbuf */ }; -/* flags to m_get/MGET */ +/* + * Flags specifying how an allocation should be made. + * M_DONTWAIT means "don't block if nothing is available" whereas + * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is + * available." + */ #define M_DONTWAIT 1 #define M_TRYWAIT 0 #define M_WAIT M_TRYWAIT /* XXX: Deprecated. */ -/* - * Normal mbuf clusters are normally treated as character arrays - * after allocation, but use the first word of the buffer as a free list - * pointer while on the free list. - */ -union mcluster { - union mcluster *mcl_next; - char mcl_buf[MCLBYTES]; -}; - -/* - * The m_ext object reference counter structure. - */ -union mext_refcnt { - union mext_refcnt *next_ref; - u_int refcnt; -}; - #ifdef _KERNEL -/* - * The freelists for mbufs and mbuf clusters include condition variables - * that are used in cases of depletion/starvation. - * The counter freelist does not require a condition variable as we never - * expect to consume more than the reserved address space for counters. - * All are presently protected by the mbuf_mtx lock. - */ -struct mbffree_lst { - struct mbuf *m_head; - struct cv m_starved; -}; - -struct mclfree_lst { - union mcluster *m_head; - struct cv m_starved; -}; - -struct mcntfree_lst { - union mext_refcnt *m_head; -}; - /* - * Signal a single instance (if any) blocked on a m_starved cv (i.e. an - * instance waiting for an {mbuf, cluster} to be freed to the global - * cache lists). + * mbuf external reference count management macros * - * Must be called with mbuf_mtx held. - */ -#define MBWAKEUP(m_wid, m_cv) do { \ - if ((m_wid) > 0) \ - cv_signal((m_cv)); \ -} while (0) - -/* - * mbuf external reference count management macros: - * * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing * the external buffer ext_buf + * * MEXT_REM_REF(m): remove reference to m_ext object + * * MEXT_ADD_REF(m): add reference to m_ext object already * referred to by (m) - * MEXT_INIT_REF(m): allocate and initialize an external - * object reference counter for (m) */ -#define MEXT_IS_REF(m) ((m)->m_ext.ref_cnt->refcnt > 1) +#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1) #define MEXT_REM_REF(m) do { \ - KASSERT((m)->m_ext.ref_cnt->refcnt > 0, ("m_ext refcnt < 0")); \ - atomic_subtract_int(&((m)->m_ext.ref_cnt->refcnt), 1); \ + KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \ + atomic_subtract_int((m)->m_ext.ref_cnt, 1); \ } while(0) -#define MEXT_ADD_REF(m) atomic_add_int(&((m)->m_ext.ref_cnt->refcnt), 1) - -#define _MEXT_ALLOC_CNT(m_cnt, how) do { \ - union mext_refcnt *__mcnt; \ - \ - mtx_lock(&mbuf_mtx); \ - if (mcntfree.m_head == NULL) \ - m_alloc_ref(1, (how)); \ - __mcnt = mcntfree.m_head; \ - if (__mcnt != NULL) { \ - mcntfree.m_head = __mcnt->next_ref; \ - mbstat.m_refree--; \ - __mcnt->refcnt = 0; \ - } \ - mtx_unlock(&mbuf_mtx); \ - (m_cnt) = __mcnt; \ -} while (0) - -#define _MEXT_DEALLOC_CNT(m_cnt) do { \ - union mext_refcnt *__mcnt = (m_cnt); \ - \ - mtx_lock(&mbuf_mtx); \ - __mcnt->next_ref = mcntfree.m_head; \ - mcntfree.m_head = __mcnt; \ - mbstat.m_refree++; \ - mtx_unlock(&mbuf_mtx); \ -} while (0) +#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1) -#define MEXT_INIT_REF(m, how) do { \ - struct mbuf *__mmm = (m); \ - \ - _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \ - if (__mmm->m_ext.ref_cnt != NULL) \ - MEXT_ADD_REF(__mmm); \ -} while (0) - /* - * mbuf allocation/deallocation macros: - * - * MGET(struct mbuf *m, int how, int type) - * allocates an mbuf and initializes it to contain internal data. - * - * MGETHDR(struct mbuf *m, int how, int type) - * allocates an mbuf and initializes it to contain a packet header - * and internal data. + * mbuf, cluster, and external object allocation macros + * (for compatibility purposes) */ -/* - * Lower-level macros for MGET(HDR)... Not to be used outside the - * subsystem ("non-exportable" macro names are prepended with "_"). - */ -#define _MGET_SETUP(m_set, m_set_type) do { \ - (m_set)->m_type = (m_set_type); \ - (m_set)->m_next = NULL; \ - (m_set)->m_nextpkt = NULL; \ - (m_set)->m_data = (m_set)->m_dat; \ - (m_set)->m_flags = 0; \ -} while (0) +#define MGET(m, how, type) \ + (m) = m_get((how), (type)) -#define _MGET(m_mget, m_get_how) do { \ - if (mmbfree.m_head == NULL) \ - m_mballoc(1, (m_get_how)); \ - (m_mget) = mmbfree.m_head; \ - if ((m_mget) != NULL) { \ - mmbfree.m_head = (m_mget)->m_next; \ - mbtypes[MT_FREE]--; \ - } else { \ - if ((m_get_how) == M_TRYWAIT) \ - (m_mget) = m_mballoc_wait(); \ - } \ -} while (0) +#define MGETHDR(m, how, type) \ + (m) = m_gethdr((how), (type)) -#define MGET(m, how, type) do { \ - struct mbuf *_mm; \ - int _mhow = (how); \ - int _mtype = (type); \ - \ - mtx_lock(&mbuf_mtx); \ - _MGET(_mm, _mhow); \ - if (_mm != NULL) { \ - mbtypes[_mtype]++; \ - mtx_unlock(&mbuf_mtx); \ - _MGET_SETUP(_mm, _mtype); \ - } else { \ - mbstat.m_drops++; \ - mtx_unlock(&mbuf_mtx); \ - } \ - (m) = _mm; \ -} while (0) +#define MCLGET(m, how) \ + m_clget((m), (how)) -#define _MGETHDR_SETUP(m_set, m_set_type) do { \ - (m_set)->m_type = (m_set_type); \ - (m_set)->m_next = NULL; \ - (m_set)->m_nextpkt = NULL; \ - (m_set)->m_data = (m_set)->m_pktdat; \ - (m_set)->m_flags = M_PKTHDR; \ - (m_set)->m_pkthdr.rcvif = NULL; \ - (m_set)->m_pkthdr.csum_flags = 0; \ - (m_set)->m_pkthdr.aux = NULL; \ -} while (0) +#define MEXTADD(m, buf, size, free, args, flags, type) \ + m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type)) -#define MGETHDR(m, how, type) do { \ - struct mbuf *_mm; \ - int _mhow = (how); \ - int _mtype = (type); \ - \ - mtx_lock(&mbuf_mtx); \ - _MGET(_mm, _mhow); \ - if (_mm != NULL) { \ - mbtypes[_mtype]++; \ - mtx_unlock(&mbuf_mtx); \ - _MGETHDR_SETUP(_mm, _mtype); \ - } else { \ - mbstat.m_drops++; \ - mtx_unlock(&mbuf_mtx); \ - } \ - (m) = _mm; \ -} while (0) - -/* - * mbuf external storage macros: - * - * MCLGET allocates and refers an mcluster to an mbuf - * MEXTADD sets up pre-allocated external storage and refers to mbuf - * MEXTFREE removes reference to external object and frees it if - * necessary - */ -#define _MCLALLOC(p, how) do { \ - caddr_t _mp; \ - int _mhow = (how); \ - \ - if (mclfree.m_head == NULL) \ - m_clalloc(1, _mhow); \ - _mp = (caddr_t)mclfree.m_head; \ - if (_mp != NULL) { \ - mbstat.m_clfree--; \ - mclfree.m_head = ((union mcluster *)_mp)->mcl_next; \ - } else { \ - if (_mhow == M_TRYWAIT) \ - _mp = m_clalloc_wait(); \ - } \ - (p) = _mp; \ -} while (0) - -#define MCLGET(m, how) do { \ - struct mbuf *_mm = (m); \ - \ - mtx_lock(&mbuf_mtx); \ - _MCLALLOC(_mm->m_ext.ext_buf, (how)); \ - if (_mm->m_ext.ext_buf != NULL) { \ - mtx_unlock(&mbuf_mtx); \ - MEXT_INIT_REF(_mm, (how)); \ - if (_mm->m_ext.ref_cnt == NULL) { \ - _MCLFREE(_mm->m_ext.ext_buf); \ - _mm->m_ext.ext_buf = NULL; \ - } else { \ - _mm->m_data = _mm->m_ext.ext_buf; \ - _mm->m_flags |= M_EXT; \ - _mm->m_ext.ext_free = NULL; \ - _mm->m_ext.ext_args = NULL; \ - _mm->m_ext.ext_size = MCLBYTES; \ - _mm->m_ext.ext_type = EXT_CLUSTER; \ - } \ - } else { \ - mbstat.m_drops++; \ - mtx_unlock(&mbuf_mtx); \ - } \ -} while (0) - -#define MEXTADD(m, buf, size, free, args, flags, type) do { \ - struct mbuf *_mm = (m); \ - \ - MEXT_INIT_REF(_mm, M_TRYWAIT); \ - if (_mm->m_ext.ref_cnt != NULL) { \ - _mm->m_flags |= (M_EXT | (flags)); \ - _mm->m_ext.ext_buf = (caddr_t)(buf); \ - _mm->m_data = _mm->m_ext.ext_buf; \ - _mm->m_ext.ext_size = (size); \ - _mm->m_ext.ext_free = (free); \ - _mm->m_ext.ext_args = (args); \ - _mm->m_ext.ext_type = (type); \ - } \ +#define MFREE(m, n) do { \ + (n) = m_free((m)); \ + (m) = NULL; \ } while (0) -#define _MCLFREE(p) do { \ - union mcluster *_mp = (union mcluster *)(p); \ - \ - mtx_lock(&mbuf_mtx); \ - _mp->mcl_next = mclfree.m_head; \ - mclfree.m_head = _mp; \ - mbstat.m_clfree++; \ - MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); \ - mtx_unlock(&mbuf_mtx); \ -} while (0) +#define m_getclr m_get_clrd -/* MEXTFREE: +/* + * MEXTFREE(m): disassociate (and possibly free) an external object from `m' + * * If the atomic_cmpset_int() returns 0, then we effectively do nothing * in terms of "cleaning up" (freeing the ext buf and ref. counter) as * this means that either there are still references, or another thread * is taking care of the clean-up. */ #define MEXTFREE(m) do { \ - struct mbuf *_mmm = (m); \ - \ - MEXT_REM_REF(_mmm); \ - if (atomic_cmpset_int(&_mmm->m_ext.ref_cnt->refcnt, 0, 1)) { \ - if (_mmm->m_ext.ext_type != EXT_CLUSTER) { \ - (*(_mmm->m_ext.ext_free))(_mmm->m_ext.ext_buf, \ - _mmm->m_ext.ext_args); \ - } else \ - _MCLFREE(_mmm->m_ext.ext_buf); \ - _MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt); \ - } \ - _mmm->m_flags &= ~M_EXT; \ -} while (0) - -/* - * MFREE(struct mbuf *m, struct mbuf *n) - * Free a single mbuf and associated external storage. - * Place the successor, if any, in n. - * - * we do need to check non-first mbuf for m_aux, since some of existing - * code does not call M_PREPEND properly. - * (example: call to bpf_mtap from drivers) - */ -#define MFREE(m, n) do { \ - struct mbuf *_mm = (m); \ - struct mbuf *_aux; \ + struct mbuf *_mb = (m); \ \ - KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \ - if (_mm->m_flags & M_EXT) \ - MEXTFREE(_mm); \ - mtx_lock(&mbuf_mtx); \ - mbtypes[_mm->m_type]--; \ - if ((_mm->m_flags & M_PKTHDR) != 0 && _mm->m_pkthdr.aux) { \ - _aux = _mm->m_pkthdr.aux; \ - _mm->m_pkthdr.aux = NULL; \ - } else { \ - _aux = NULL; \ - } \ - _mm->m_type = MT_FREE; \ - mbtypes[MT_FREE]++; \ - (n) = _mm->m_next; \ - _mm->m_next = mmbfree.m_head; \ - mmbfree.m_head = _mm; \ - MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); \ - mtx_unlock(&mbuf_mtx); \ - if (_aux) \ - m_freem(_aux); \ + MEXT_REM_REF(_mb); \ + if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \ + _mext_free(_mb); \ + _mb->m_flags &= ~M_EXT; \ } while (0) /* @@ -570,8 +325,8 @@ * aux pointer will be moved to `to'. */ #define M_COPY_PKTHDR(to, from) do { \ - struct mbuf *_mfrom = (from); \ - struct mbuf *_mto = (to); \ + struct mbuf *_mfrom = (from); \ + struct mbuf *_mto = (to); \ \ _mto->m_data = _mto->m_pktdat; \ _mto->m_flags = _mfrom->m_flags & M_COPYFLAGS; \ @@ -621,10 +376,10 @@ * set to NULL. */ #define M_PREPEND(m, plen, how) do { \ - struct mbuf **_mmp = &(m); \ - struct mbuf *_mm = *_mmp; \ - int _mplen = (plen); \ - int __mhow = (how); \ + struct mbuf **_mmp = &(m); \ + struct mbuf *_mm = *_mmp; \ + int _mplen = (plen); \ + int __mhow = (how); \ \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ @@ -639,16 +394,7 @@ /* * change mbuf to new type */ -#define MCHTYPE(m, t) do { \ - struct mbuf *_mm = (m); \ - int _mt = (t); \ - \ - mtx_lock(&mbuf_mtx); \ - mbtypes[_mm->m_type]--; \ - mbtypes[_mt]++; \ - mtx_unlock(&mbuf_mtx); \ - _mm->m_type = (_mt); \ -} while (0) +#define MCHTYPE(m, t) (m)->m_type = (t) /* length to m_copy to copy all */ #define M_COPYALL 1000000000 @@ -665,55 +411,46 @@ void* p; }; -extern u_long m_clalloc_wid; /* mbuf cluster wait count */ -extern u_long m_mballoc_wid; /* mbuf wait count */ -extern int max_datalen; /* MHLEN - max_hdr */ -extern int max_hdr; /* largest link+protocol header */ -extern int max_linkhdr; /* largest link-level header */ -extern int max_protohdr; /* largest protocol header */ -extern struct mbstat mbstat; -extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */ -extern int mbuf_wait; /* mbuf sleep time */ -extern struct mtx mbuf_mtx; -extern struct mbuf *mbutl; /* virtual address of mclusters */ -extern struct mclfree_lst mclfree; -extern struct mcntfree_lst mcntfree; -extern struct mbffree_lst mmbfree; -extern int nmbclusters; -extern int nmbcnt; -extern int nmbufs; -extern int nsfbufs; - -void m_adj(struct mbuf *, int); -int m_alloc_ref(u_int, int); -struct mbuf *m_aux_add2 __P((struct mbuf *, int, int, void *)); -struct mbuf *m_aux_find2 __P((struct mbuf *, int, int, void *)); -struct mbuf *m_aux_add(struct mbuf *, int, int); -void m_aux_delete(struct mbuf *, struct mbuf *); -struct mbuf *m_aux_find(struct mbuf *, int, int); -void m_cat(struct mbuf *, struct mbuf *); -int m_clalloc(int, int); -caddr_t m_clalloc_wait(void); -void m_copyback(struct mbuf *, int, int, caddr_t); -void m_copydata(struct mbuf *, int, int, caddr_t); -struct mbuf *m_copym(struct mbuf *, int, int, int); -struct mbuf *m_copypacket(struct mbuf *, int); -struct mbuf *m_devget(char *, int, int, struct ifnet *, - void (*copy)(char *, caddr_t, u_int)); -struct mbuf *m_dup(struct mbuf *, int); -struct mbuf *m_free(struct mbuf *); -void m_freem(struct mbuf *); -struct mbuf *m_get(int, int); -struct mbuf *m_getclr(int, int); -struct mbuf *m_gethdr(int, int); -struct mbuf *m_getm(struct mbuf *, int, int, int); -int m_mballoc(int, int); -struct mbuf *m_mballoc_wait(void); -struct mbuf *m_prepend(struct mbuf *, int, int); -void m_print(const struct mbuf *m); -struct mbuf *m_pulldown(struct mbuf *, int, int, int *); -struct mbuf *m_pullup(struct mbuf *, int); -struct mbuf *m_split(struct mbuf *, int, int); +extern int max_datalen; /* MHLEN - max_hdr */ +extern int max_hdr; /* largest link + protocol header */ +extern int max_linkhdr; /* largest link-level header */ +extern int max_protohdr; /* largest protocol header */ +extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */ +extern struct mbstat mbstat; /* General mbuf stats/infos. */ +extern int nmbclusters; /* Maximum number of clusters */ +extern int nmbcnt; /* Scale kmem_map for counter space */ +extern int nmbufs; /* Maximum number of mbufs */ +extern int nsfbufs; /* Number of sendfile(2) bufs */ + +void m_adj(struct mbuf *, int); +struct mbuf *m_aux_add(struct mbuf *, int, int); +struct mbuf *m_aux_add2(struct mbuf *, int, int, void *); +void m_aux_delete(struct mbuf *, struct mbuf *); +struct mbuf *m_aux_find(struct mbuf *, int, int); +struct mbuf *m_aux_find2(struct mbuf *, int, int, void *); +void m_cat(struct mbuf *, struct mbuf *); +void m_clget(struct mbuf *, int); +void m_extadd(struct mbuf *, caddr_t, u_int, + void (*free)(caddr_t, void *), void *, short, int); +void m_copyback(struct mbuf *, int, int, caddr_t); +void m_copydata(struct mbuf *, int, int, caddr_t); +struct mbuf *m_copym(struct mbuf *, int, int, int); +struct mbuf *m_copypacket(struct mbuf *, int); +struct mbuf *m_devget(char *, int, int, struct ifnet *, + void (*copy)(char *, caddr_t, u_int)); +struct mbuf *m_dup(struct mbuf *, int); +struct mbuf *m_free(struct mbuf *); +void m_freem(struct mbuf *); +struct mbuf *m_get(int, int); +struct mbuf *m_get_clrd(int, int); +struct mbuf *m_gethdr(int, int); +struct mbuf *m_gethdr_clrd(int, int); +struct mbuf *m_getm(struct mbuf *, int, int, int); +struct mbuf *m_prepend(struct mbuf *, int, int); +void m_print(const struct mbuf *m); +struct mbuf *m_pulldown(struct mbuf *, int, int, int *); +struct mbuf *m_pullup(struct mbuf *, int); +struct mbuf *m_split(struct mbuf *, int, int); #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ Index: src/sys/sys/sysctl.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sysctl.h,v retrieving revision 1.96 diff -u -r1.96 sysctl.h --- src/sys/sys/sysctl.h 2001/06/18 21:07:04 1.96 +++ src/sys/sys/sysctl.h 2001/06/21 17:47:01 @@ -402,8 +402,6 @@ #define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */ #define KIPC_MAX_HDR 6 /* int: max total length of headers */ #define KIPC_MAX_DATALEN 7 /* int: max length of data? */ -#define KIPC_MBSTAT 8 /* struct: mbuf usage statistics */ -#define KIPC_NMBCLUSTERS 9 /* int: maximum mbuf clusters */ /* * CTL_HW identifiers Index: src/sys/vm/vm_kern.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_kern.c,v retrieving revision 1.70 diff -u -r1.70 vm_kern.c --- src/sys/vm/vm_kern.c 2001/05/19 01:28:09 1.70 +++ src/sys/vm/vm_kern.c 2001/06/21 17:47:01 @@ -89,8 +89,6 @@ vm_map_t exec_map=0; vm_map_t clean_map=0; vm_map_t buffer_map=0; -vm_map_t mb_map=0; -int mb_map_full=0; /* * kmem_alloc_pageable: @@ -331,6 +329,9 @@ * * NOTE: This routine is not supposed to block if M_NOWAIT is set, but * I have not verified that it actually does not block. + * + * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to + * which we never free. */ vm_offset_t kmem_malloc(map, size, flags) @@ -344,9 +345,6 @@ vm_page_t m; int hadvmlock; - if (map != kmem_map && map != mb_map) - panic("kmem_malloc: map != {kmem,mb}_map"); - hadvmlock = mtx_owned(&vm_mtx); if (!hadvmlock) mtx_lock(&vm_mtx); @@ -362,9 +360,9 @@ vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { vm_map_unlock(map); - if (map == mb_map) { - mb_map_full = TRUE; - printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n"); + if (map != kmem_map) { + printf("Out of mbuf address space!\n"); + printf("Consider increasing NMBCLUSTERS\n"); goto bad; } if ((flags & M_NOWAIT) == 0) Index: src/sys/vm/vm_kern.h =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_kern.h,v retrieving revision 1.22 diff -u -r1.22 vm_kern.h --- src/sys/vm/vm_kern.h 2000/02/16 21:11:31 1.22 +++ src/sys/vm/vm_kern.h 2001/06/21 17:47:01 @@ -71,8 +71,6 @@ extern vm_map_t buffer_map; extern vm_map_t kernel_map; extern vm_map_t kmem_map; -extern vm_map_t mb_map; -extern int mb_map_full; extern vm_map_t clean_map; extern vm_map_t exec_map; extern u_int vm_kmem_size; Index: src/sys/vm/vm_map.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_map.c,v retrieving revision 1.203 diff -u -r1.203 vm_map.c --- src/sys/vm/vm_map.c 2001/06/11 19:17:05 1.203 +++ src/sys/vm/vm_map.c 2001/06/21 17:47:01 @@ -726,14 +726,14 @@ mtx_assert(&vm_mtx, MA_OWNED); start = *addr; - if (map == kmem_map || map == mb_map) + if (map == kmem_map) s = splvm(); vm_map_lock(map); if (find_space) { if (vm_map_findspace(map, start, length, addr)) { vm_map_unlock(map); - if (map == kmem_map || map == mb_map) + if (map == kmem_map) splx(s); return (KERN_NO_SPACE); } @@ -743,7 +743,7 @@ start, start + length, prot, max, cow); vm_map_unlock(map); - if (map == kmem_map || map == mb_map) + if (map == kmem_map) splx(s); return (result); @@ -1951,7 +1951,7 @@ int result, s = 0; mtx_assert(&vm_mtx, MA_OWNED); - if (map == kmem_map || map == mb_map) + if (map == kmem_map) s = splvm(); vm_map_lock(map); @@ -1959,7 +1959,7 @@ result = vm_map_delete(map, start, end); vm_map_unlock(map); - if (map == kmem_map || map == mb_map) + if (map == kmem_map) splx(s); return (result); Index: src/sys/vm/vm_object.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_object.c,v retrieving revision 1.192 diff -u -r1.192 vm_object.c --- src/sys/vm/vm_object.c 2001/05/23 22:42:10 1.192 +++ src/sys/vm/vm_object.c 2001/06/21 17:47:01 @@ -1696,8 +1696,6 @@ return 1; if( _vm_object_in_map( buffer_map, object, 0)) return 1; - if( _vm_object_in_map( mb_map, object, 0)) - return 1; return 0; } Index: src/usr.bin/netstat/main.c =================================================================== RCS file: /home/ncvs/src/usr.bin/netstat/main.c,v retrieving revision 1.48 diff -u -r1.48 main.c --- src/usr.bin/netstat/main.c 2001/06/15 23:55:45 1.48 +++ src/usr.bin/netstat/main.c 2001/06/21 17:47:10 @@ -153,6 +153,16 @@ { "_nmbclusters" }, #define N_NMBUFS 41 { "_nmbufs" }, +#define N_MBLIM 42 + { "_mbuf_limit" }, +#define N_CLLIM 43 + { "_clust_limit" }, +#define N_NCPUS 44 + { "_smp_cpus" }, +#define N_PAGESZ 45 + { "_pagesize" }, +#define N_MBPSTAT 46 + { "_mb_statpcpu" }, { "" }, }; @@ -486,9 +496,14 @@ mbpr(nl[N_MBSTAT].n_value, nl[N_MBTYPES].n_value, nl[N_NMBCLUSTERS].n_value, - nl[N_NMBUFS].n_value); + nl[N_NMBUFS].n_value, + nl[N_MBLIM].n_value, + nl[N_CLLIM].n_value, + nl[N_NCPUS].n_value, + nl[N_PAGESZ].n_value, + nl[N_MBPSTAT].n_value); } else - mbpr(0, 0, 0, 0); + mbpr(0, 0, 0, 0, 0, 0, 0, 0, 0); exit(0); } #if 0 Index: src/usr.bin/netstat/mbuf.c =================================================================== RCS file: /home/ncvs/src/usr.bin/netstat/mbuf.c,v retrieving revision 1.24 diff -u -r1.24 mbuf.c --- src/usr.bin/netstat/mbuf.c 2001/06/15 23:35:13 1.24 +++ src/usr.bin/netstat/mbuf.c 2001/06/22 05:15:14 @@ -48,11 +48,14 @@ #include #include #include +#include #include "netstat.h" #define YES 1 typedef int bool; +/* XXX: mbtypes stats temporarily disactivated. */ +#if 0 static struct mbtypenames { int mt_type; char *mt_name; @@ -89,25 +92,31 @@ #endif { 0, 0 } }; +#endif /* 0 */ /* * Print mbuf statistics. */ void -mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr) +mbpr(u_long mbaddr, u_long mbtaddr, u_long nmbcaddr, u_long nmbufaddr, + u_long mblimaddr, u_long cllimaddr, u_long cpusaddr, u_long pgsaddr, + u_long mbpaddr) { - u_long totmem, totpossible, totmbufs; - register int i; - struct mbstat mbstat; + int i, nmbufs, nmbclusters, ncpu, page_size, num_objs; + u_int mbuf_limit, clust_limit; + u_long totspace, totnum, totfree; + size_t mlen; + struct mbstat *mbstat = NULL; + struct mbpstat **mbpstat = NULL; + +/* XXX: mbtypes stats temporarily disabled. */ +#if 0 + int nmbtypes; + size_t mbtypeslen; struct mbtypenames *mp; - int name[3], nmbclusters, nmbufs, nmbcnt, nmbtypes; - size_t nmbclen, nmbuflen, nmbcntlen, mbstatlen, mbtypeslen; - u_long *mbtypes; - bool *seen; /* "have we seen this type yet?" */ + u_long *mbtypes = NULL; + bool *seen = NULL; - mbtypes = NULL; - seen = NULL; - /* * XXX * We can't kread() mbtypeslen from a core image so we'll @@ -127,92 +136,182 @@ warn("calloc"); goto err; } +#endif + + mlen = sizeof mbstat; + if ((mbstat = malloc(mlen)) == NULL) { + warn("malloc: cannot allocate memory for mbstat"); + goto err; + } + + /* + * XXX: Unfortunately, for the time being, we have to fetch + * the total length of the per-CPU stats area via sysctl + * (regardless of whether we're looking at a core or not. + */ + if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &mlen, NULL, 0) < 0) { + warn("sysctl: retrieving mb_statpcpu len"); + goto err; + } + num_objs = (int)(mlen / sizeof(struct mbpstat)); + if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) { + warn("calloc: cannot allocate memory for mbpstats pointers"); + goto err; + } + if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) { + warn("calloc: cannot allocate memory for mbpstats"); + goto err; + } if (mbaddr) { - if (kread(mbaddr, (char *)&mbstat, sizeof mbstat)) + if (kread(mbpaddr, (char *)mbpstat[0], mlen)) + goto err; + if (kread(mbaddr, (char *)mbstat, sizeof mbstat)) goto err; +#if 0 if (kread(mbtaddr, (char *)mbtypes, mbtypeslen)) goto err; +#endif if (kread(nmbcaddr, (char *)&nmbclusters, sizeof(int))) goto err; if (kread(nmbufaddr, (char *)&nmbufs, sizeof(int))) goto err; + if (kread(mblimaddr, (char *)&mbuf_limit, sizeof(u_int))) + goto err; + if (kread(cllimaddr, (char *)&clust_limit, sizeof(u_int))) + goto err; + if (kread(cpusaddr, (char *)&ncpu, sizeof(int))) + goto err; + if (kread(pgsaddr, (char *)&page_size, sizeof(int))) + goto err; } else { - name[0] = CTL_KERN; - name[1] = KERN_IPC; - name[2] = KIPC_MBSTAT; - mbstatlen = sizeof mbstat; - if (sysctl(name, 3, &mbstat, &mbstatlen, 0, 0) < 0) { + if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &mlen, + NULL, 0) < 0) { + warn("sysctl: retrieving mb_statpcpu"); + goto err; + } + if (sysctlbyname("kern.ipc.mbstat", mbstat, &mlen, NULL, 0) + < 0) { warn("sysctl: retrieving mbstat"); goto err; } - +#if 0 if (sysctlbyname("kern.ipc.mbtypes", mbtypes, &mbtypeslen, NULL, 0) < 0) { warn("sysctl: retrieving mbtypes"); goto err; } - - name[2] = KIPC_NMBCLUSTERS; - nmbclen = sizeof(int); - if (sysctl(name, 3, &nmbclusters, &nmbclen, 0, 0) < 0) { +#endif + mlen = sizeof(int); + if (sysctlbyname("kern.ipc.nmbclusters", &nmbclusters, &mlen, + NULL, 0) < 0) { warn("sysctl: retrieving nmbclusters"); goto err; } - - nmbuflen = sizeof(int); - if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &nmbuflen, 0, 0) < 0) { + mlen = sizeof(int); + if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &mlen, NULL, 0) + < 0) { warn("sysctl: retrieving nmbufs"); goto err; } + mlen = sizeof(u_int); + if (sysctlbyname("kern.ipc.mbuf_limit", &mbuf_limit, &mlen, + NULL, 0) < 0) { + warn("sysctl: retrieving mbuf_limit"); + goto err; + } + mlen = sizeof(u_int); + if (sysctlbyname("kern.ipc.clust_limit", &clust_limit, &mlen, + NULL, 0) < 0) { + warn("sysctl: retrieving clust_limit"); + goto err; + } + mlen = sizeof(int); + if (sysctlbyname("kern.smp.cpus", &ncpu, &mlen, NULL, 0) < 0) { + warn("sysctl: retrieving kern.smp.cpus"); + goto err; + } + mlen = sizeof(int); + if (sysctlbyname("hw.pagesize", &page_size, &mlen, NULL, 0) + < 0) { + warn("sysctl: retrieving hw.pagesize"); + goto err; + } } - nmbcntlen = sizeof(int); - if (sysctlbyname("kern.ipc.nmbcnt", &nmbcnt, &nmbcntlen, 0, 0) < 0) { - warn("sysctl: retrieving nmbcnt"); - goto err; - } + for (i = 0; i < num_objs; i++) + mbpstat[i] = mbpstat[0] + i; #undef MSIZE -#define MSIZE (mbstat.m_msize) +#define MSIZE (mbstat->m_msize) #undef MCLBYTES -#define MCLBYTES (mbstat.m_mclbytes) - - totmbufs = 0; - for (mp = mbtypenames; mp->mt_name; mp++) - totmbufs += mbtypes[mp->mt_type]; - printf("%lu/%lu/%u mbufs in use (current/peak/max):\n", totmbufs, - mbstat.m_mbufs, nmbufs); - for (mp = mbtypenames; mp->mt_name; mp++) - if (mbtypes[mp->mt_type]) { - seen[mp->mt_type] = YES; - printf("\t%lu mbufs allocated to %s\n", - mbtypes[mp->mt_type], mp->mt_name); - } - seen[MT_FREE] = YES; - for (i = 0; i < nmbtypes; i++) - if (!seen[i] && mbtypes[i]) { - printf("\t%lu mbufs allocated to \n", - mbtypes[i], i); - } - printf("%lu/%lu/%u mbuf clusters in use (current/peak/max)\n", - mbstat.m_clusters - mbstat.m_clfree, mbstat.m_clusters, - nmbclusters); - printf("%lu/%lu m_ext reference counters (in use/allocated)\n", - mbstat.m_refcnt - mbstat.m_refree, mbstat.m_refcnt); - totmem = mbstat.m_mbufs * MSIZE + mbstat.m_clusters * MCLBYTES + - mbstat.m_refcnt * sizeof(union mext_refcnt); - totpossible = nmbclusters * MCLBYTES + nmbufs * MSIZE + - nmbcnt * sizeof(union mext_refcnt); - printf("%lu Kbytes allocated to network (%lu%% of mb_map in use)\n", - totmem / 1024, (totmem * 100) / totpossible); - printf("%lu requests for memory denied\n", mbstat.m_drops); - printf("%lu requests for memory delayed\n", mbstat.m_wait); - printf("%lu calls to protocol drain routines\n", mbstat.m_drain); +#define MCLBYTES (mbstat->m_mclbytes) +#define MBPERPG (page_size / MSIZE) +#define CLPERPG (page_size / MCLBYTES) +#define GENLST (num_objs - 1) + + printf("mbuf usage:\n"); + printf("\tGEN list:\t%lu/%lu (in use/in pool)\n", + (mbpstat[GENLST]->mb_mbpgs * MBPERPG - mbpstat[GENLST]->mb_mbfree), + (mbpstat[GENLST]->mb_mbpgs * MBPERPG)); + totnum = mbpstat[GENLST]->mb_mbpgs * MBPERPG; + totfree = mbpstat[GENLST]->mb_mbfree; + totspace = mbpstat[GENLST]->mb_mbpgs * page_size; + for (i = 0; i < ncpu; i++) { + printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i, + (mbpstat[i]->mb_mbpgs * MBPERPG - mbpstat[i]->mb_mbfree), + (mbpstat[i]->mb_mbpgs * MBPERPG)); + totspace += mbpstat[i]->mb_mbpgs * page_size; + totnum += mbpstat[i]->mb_mbpgs * MBPERPG; + totfree += mbpstat[i]->mb_mbfree; + } + printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree), + totnum); + printf("\tMaximum number allowed on each CPU list: %d\n", mbuf_limit); + printf("\tMaximum possible: %d\n", nmbufs); + printf("\t%lu%% of mbuf map consumed\n", ((totspace * 100) / (nmbufs + * MSIZE))); + + printf("mbuf cluster usage:\n"); + printf("\tGEN list:\t%lu/%lu (in use/in pool)\n", + (mbpstat[GENLST]->mb_clpgs * CLPERPG - mbpstat[GENLST]->mb_clfree), + (mbpstat[GENLST]->mb_clpgs * CLPERPG)); + totnum = mbpstat[GENLST]->mb_clpgs * CLPERPG; + totfree = mbpstat[GENLST]->mb_clfree; + totspace = mbpstat[GENLST]->mb_clpgs * page_size; + for (i = 0; i < ncpu; i++) { + printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i, + (mbpstat[i]->mb_clpgs * CLPERPG - mbpstat[i]->mb_clfree), + (mbpstat[i]->mb_clpgs * CLPERPG)); + totspace += mbpstat[i]->mb_clpgs * page_size; + totnum += mbpstat[i]->mb_clpgs * CLPERPG; + totfree += mbpstat[i]->mb_clfree; + } + printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree), + totnum); + printf("\tMaximum number allowed on each CPU list: %d\n", clust_limit); + printf("\tMaximum possible: %d\n", nmbclusters); + printf("\t%lu%% of cluster map consumed\n", ((totspace * 100) / + (nmbclusters * MCLBYTES))); + + printf("%lu requests for memory denied\n", mbstat->m_drops); + printf("%lu requests for memory delayed\n", mbstat->m_wait); + printf("%lu calls to protocol drain routines\n", mbstat->m_drain); err: +#if 0 if (mbtypes != NULL) free(mbtypes); if (seen != NULL) free(seen); +#endif + if (mbstat != NULL) + free(mbstat); + if (mbpstat != NULL) { + if (mbpstat[0] != NULL) + free(mbpstat[0]); + free(mbpstat); + } + + return; } Index: src/usr.bin/netstat/netstat.h =================================================================== RCS file: /home/ncvs/src/usr.bin/netstat/netstat.h,v retrieving revision 1.28 diff -u -r1.28 netstat.h --- src/usr.bin/netstat/netstat.h 2001/06/15 23:55:45 1.28 +++ src/usr.bin/netstat/netstat.h 2001/06/21 17:47:10 @@ -95,7 +95,8 @@ void bdg_stats (u_long, char *, int); -void mbpr (u_long, u_long, u_long, u_long); +void mbpr (u_long, u_long, u_long, u_long, u_long, u_long, + u_long, u_long, u_long); void hostpr (u_long, u_long); void impstats (u_long, u_long); Index: src/usr.bin/systat/mbufs.c =================================================================== RCS file: /home/ncvs/src/usr.bin/systat/mbufs.c,v retrieving revision 1.11 diff -u -r1.11 mbufs.c --- src/usr.bin/systat/mbufs.c 2000/07/15 16:24:21 1.11 +++ src/usr.bin/systat/mbufs.c 2001/06/22 03:18:51 @@ -49,7 +49,12 @@ #include "systat.h" #include "extern.h" -static struct mbstat *mb; +static struct mbpstat **mbpstat; +static int num_objs, ncpu; +#define GENLST (num_objs - 1) + +/* XXX: mbtypes stats temporarily disabled. */ +#if 0 static u_long *m_mbtypes; static int nmbtypes; @@ -66,6 +71,7 @@ }; #define NNAMES (sizeof (mtnames) / sizeof (mtnames[0])) +#endif WINDOW * openmbufs() @@ -95,12 +101,13 @@ void showmbufs() { - register int i, j, max, index; + int i, j, max, index; + u_long totfree; char buf[10]; char *mtname; - if (mb == 0) - return; +/* XXX: mbtypes stats temporarily disabled (will be back soon!) */ +#if 0 for (j = 0; j < wnd->_maxy; j++) { max = 0, index = -1; for (i = 0; i < wnd->_maxy; i++) { @@ -135,19 +142,27 @@ while (max--) waddch(wnd, 'X'); wclrtoeol(wnd); - mb->m_mbufs -= m_mbtypes[index]; m_mbtypes[index] = 0; } - if (mb->m_mbufs) { +#endif + + /* + * Print total number of free mbufs. + */ + totfree = mbpstat[GENLST]->mb_mbfree; + for (i = 0; i < ncpu; i++) + totfree += mbpstat[i]->mb_mbfree; + j = 0; /* XXX */ + if (totfree > 0) { mvwprintw(wnd, 1+j, 0, "%-10.10s", "free"); - if (mb->m_mbufs > 60) { - snprintf(buf, sizeof(buf), " %ld", mb->m_mbufs); - mb->m_mbufs = 60; - while (mb->m_mbufs--) + if (totfree > 60) { + snprintf(buf, sizeof(buf), " %lu", totfree); + totfree = 60; + while(totfree--) waddch(wnd, 'X'); waddstr(wnd, buf); } else { - while(mb->m_mbufs--) + while(totfree--) waddch(wnd, 'X'); } wclrtoeol(wnd); @@ -159,7 +174,10 @@ int initmbufs() { - size_t len, mbtypeslen; + int i; + size_t len; +#if 0 + size_t mbtypeslen; if (sysctlbyname("kern.ipc.mbtypes", NULL, &mbtypeslen, NULL, 0) < 0) { error("sysctl getting mbtypes size failed"); @@ -170,15 +188,28 @@ return 0; } nmbtypes = mbtypeslen / sizeof(*m_mbtypes); - - len = 0; - if (sysctlbyname("kern.ipc.mbstat", 0, &len, 0, 0) < 0) { - error("sysctl getting mbstat size failed"); +#endif + len = sizeof(int); + if (sysctlbyname("kern.smp.cpus", &ncpu, &len, NULL, 0) < 0) { + error("sysctl getting number of cpus"); + return 0; + } + if (sysctlbyname("kern.ipc.mb_statpcpu", NULL, &len, NULL, 0) < 0) { + error("sysctl getting mbpstat total size failed"); + return 0; + } + num_objs = (int)(len / sizeof(struct mbpstat)); + if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) { + error("calloc mbpstat pointers failed"); return 0; } + if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) { + error("calloc mbpstat structures failed"); + return 0; + } + for (i = 0; i < num_objs; i++) + mbpstat[i] = mbpstat[0] + i; - if (mb == 0) - mb = (struct mbstat *)calloc(1, sizeof *mb); return 1; } @@ -186,12 +217,13 @@ fetchmbufs() { size_t len; - - len = sizeof *mb; - if (sysctlbyname("kern.ipc.mbstat", mb, &len, 0, 0) < 0) - printw("sysctl: mbstat: %s", strerror(errno)); + len = num_objs * sizeof(struct mbpstat); + if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0], &len, NULL, 0) < 0) + printw("sysctl: mbpstat: %s", strerror(errno)); +#if 0 len = nmbtypes * sizeof *m_mbtypes; if (sysctlbyname("kern.ipc.mbtypes", m_mbtypes, &len, 0, 0) < 0) printw("sysctl: mbtypes: %s", strerror(errno)); +#endif }