Index: src/sys/conf/files
===================================================================
RCS file: /home/ncvs/src/sys/conf/files,v
retrieving revision 1.532
diff -u -r1.532 files
--- src/sys/conf/files	2001/06/07 20:12:11	1.532
+++ src/sys/conf/files	2001/06/09 05:31:04
@@ -795,6 +795,7 @@
 kern/subr_eventhandler.c	standard
 kern/subr_kobj.c	standard
 kern/subr_log.c		standard
+kern/subr_mbuf.c	standard
 kern/subr_mchain.c	optional libmchain
 kern/subr_module.c	standard
 kern/subr_pcpu.c	standard
Index: src/sys/conf/param.c
===================================================================
RCS file: /home/ncvs/src/sys/conf/param.c,v
retrieving revision 1.40
diff -u -r1.40 param.c
--- src/sys/conf/param.c	2000/10/29 16:57:27	1.40
+++ src/sys/conf/param.c	2001/06/09 05:31:04
@@ -64,17 +64,35 @@
 #define MAXFILES (NPROC*2)
 #endif
 int	maxproc = NPROC;			/* maximum # of processes */
-int	maxprocperuid = NPROC-1;		/* maximum # of processes per user */
-int	maxfiles = MAXFILES;			/* system wide open files limit */
-int	maxfilesperproc = MAXFILES;		/* per-process open files limit */
+int	maxprocperuid = NPROC-1;		/* max # of procs per user */
+int	maxfiles = MAXFILES;			/* sys. wide open files limit */
+int	maxfilesperproc = MAXFILES;		/* per-proc open files limit */
 int	ncallout = 16 + NPROC + MAXFILES;	/* maximum # of timer events */
-int	mbuf_wait = 32;				/* mbuf sleep time in ticks */
+int	mbuf_wait = 64;				/* mbuf sleep time in ticks */
+u_int	mbuf_limit = 512;			/* max # of mbufs per CPU lst */
+u_int	clust_limit = 128;			/* max # clusts per CPU lst */
 
-/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */
+/*
+ * Default values for nmbclusters, nmbufs, nmbcnt, and nsfbufs.
+ * See src/sys/kern/subr_mbuf.c for information regarding the significance
+ * of nmbclusters, nmbcnt, and nmbufs.
+ */
+#ifndef	NMBCLUSTERS
+#define	NMBCLUSTERS	(1024 + MAXUSERS * 16)
+#endif
+#ifndef	NMBUFS
+#define	NMBUFS		(NMBCLUSTERS * 3)
+#endif
 #ifndef NSFBUFS
 #define	NSFBUFS (512 + MAXUSERS * 16)
+#endif
+#ifndef	NMBCNTS
+#define	NMBCNTS		(NMBCLUSTERS + NSFBUFS)
 #endif
-int	nsfbufs = NSFBUFS;
+int	nmbclusters = NMBCLUSTERS;
+int	nmbufs = NMBUFS;
+int	nmbcnt = NMBCNTS;
+int	nsfbufs = NSFBUFS;	/* max. # of sf_bufs (sendfile(2) buffers) */
 
 /*
  * These may be set to nonzero here or by patching.
Index: src/sys/dev/ed/if_ed.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ed/if_ed.c,v
retrieving revision 1.200
diff -u -r1.200 if_ed.c
--- src/sys/dev/ed/if_ed.c	2001/03/03 08:31:06	1.200
+++ src/sys/dev/ed/if_ed.c	2001/06/09 05:31:04
@@ -40,6 +40,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
+#include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
Index: src/sys/dev/vx/if_vx.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/vx/if_vx.c,v
retrieving revision 1.32
diff -u -r1.32 if_vx.c
--- src/sys/dev/vx/if_vx.c	2000/12/07 23:30:51	1.32
+++ src/sys/dev/vx/if_vx.c	2001/06/09 05:31:04
@@ -31,6 +31,10 @@
  *
  */
 
+#if 0
+#error "This driver is broken. Until it's fixed, you can't use it."
+#endif
+
 /*
  * Created from if_ep.c driver by Fred Gray (fgray@rice.edu) to support
  * the 3c590 family.
@@ -691,15 +695,20 @@
     {
 	struct mbuf		*m0;
 
+#ifdef BROKEN
 	m0 = m_devget(mtod(m, char *) - ETHER_ALIGN,
 	    m->m_pkthdr.len + ETHER_ALIGN, 0, ifp, NULL);
+#endif
+	m0 = m_devget(mtod(m, char *), m->m_pkthdr.len, 0, ifp, NULL);
 
 	if (m0 == NULL) {
 		ifp->if_ierrors++;
 		goto abort;
 	}
 
+#ifdef BROKEN
 	m_adj(m0, ETHER_ALIGN);
+#endif
 	m_freem(m);
 	m = m0;
     }
Index: src/sys/kern/kern_malloc.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_malloc.c,v
retrieving revision 1.88
diff -u -r1.88 kern_malloc.c
--- src/sys/kern/kern_malloc.c	2001/06/08 05:24:16	1.88
+++ src/sys/kern/kern_malloc.c	2001/06/09 05:31:04
@@ -474,8 +474,15 @@
 	if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
 		vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
 
+	/*
+	 * In mb_init(), we set up submaps for mbufs and clusters, in which
+	 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
+	 * respectively. Mathematically, this means that what we do here may
+	 * amount to slightly more address space than we need for the submaps,
+	 * but it never hurts to have an extra page in kmem_map.
+	 */
 	npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt *
-	    sizeof(union mext_refcnt) + vm_kmem_size) / PAGE_SIZE;
+	    sizeof(u_int) + vm_kmem_size) / PAGE_SIZE;
 
 	kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
 		(vm_size_t)(npg * sizeof(struct kmemusage)));
Index: src/sys/kern/subr_mbuf.c
===================================================================
RCS file: subr_mbuf.c
diff -N subr_mbuf.c
--- /dev/null	Fri Jun  8 22:16:22 2001
+++ subr_mbuf.c	Fri Jun  8 22:31:04 2001
@@ -0,0 +1,817 @@
+/*
+ * Copyright (c) 2001
+ * 	Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by members and contributors
+ *	of The FreeBSD Project (http://www.FreeBSD.org/)
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_param.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+/*
+ * Maximum number of PCPU containers. If you know what you're doing you could
+ * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your
+ * system during compilation, and thus prevent kernel structure bloats.
+ */
+#ifdef	MBALLOC_NCPU
+#define	NCPU	MBALLOC_NCPU
+#else
+#define	NCPU	MAXCPU
+#endif
+
+/*
+ * The mbuf allocator is heavily based on Alfred Perlstein's
+ * (alfred@FreeBSD.org) "memcache" allocator which is itself based
+ * on concepts from several per-CPU memory allocators. The difference
+ * between this allocator and memcache is that, among other things:
+ *
+ * (i) We don't free back to the map from the free() routine - we leave the
+ *     option of implementing lazy freeing (from a kproc) in the future. 
+ *
+ * (ii) We want to leave room for future optimizations which may allow us
+ *      to inline a portion of "the easy allocation," provided that the
+ *      generated code is small enough.
+ *
+ * (iii) We allocate from separate sub-maps of kmem_map, thus limiting the
+ *	 maximum number of allocatable objects of a given type. Further,
+ *	 we handle blocking on a cv in the case that the map is starved and
+ *	 we have to rely solely on cached (circulating) objects.
+ *
+ * The mbuf allocator keeps all objects that it allocates in mb_buckets.
+ * The buckets keep a page worth of objects (an object can be an mbuf or an
+ * mbuf cluster) and facilitate moving larger sets of contiguous objects
+ * from the per-CPU lists to the main list for the given object. The buckets
+ * also have an added advantage in that after several moves from a per-CPU
+ * list to the main list and back to the per-CPU list, contiguous objects
+ * are kept together, thus trying to put the TLB cache to good use.
+ *
+ * The buckets are kept on singly-linked lists called "containers." A container
+ * is protected by a mutex lock in order to ensure consistency. The mutex lock
+ * itself is allocated seperately and attached to the container at boot time,
+ * thus allowing for certain containers to share the same mutex lock. Per-CPU
+ * containers for mbufs and mbuf clusters all share the same per-CPU
+ * lock whereas the "general system" containers (i.e. the "main lists") for
+ * these objects share one global lock.
+ *
+ */
+struct mb_bucket {
+	SLIST_ENTRY(mb_bucket)	mb_blist;
+	int 			mb_owner;
+	int			mb_numfree;
+	void 			*mb_free[0];
+};
+
+struct mb_container {
+	SLIST_HEAD(mc_buckethd, mb_bucket)	mc_bhead;
+	struct	mtx				*mc_lock;
+	int					mc_numowner;
+	u_int					mc_starved;
+	u_long					*mc_objcount;
+	u_long					*mc_numpgs;
+};
+
+struct mb_gen_list {
+	struct	mb_container	mb_cont;
+	struct	cv		mgl_mstarved;
+};
+
+struct mb_pcpu_list {
+	struct	mb_container	mb_cont;
+};
+
+/*
+ * Boot-time configurable object counts that will determine the maximum
+ * number of permitted objects in the mbuf and mcluster cases. In the
+ * ext counter (nmbcnt) case, it's just an indicator serving to scale
+ * kmem_map size properly - in other words, we may be allowed to allocate
+ * more than nmbcnt counters, whereas we will never be allowed to allocate
+ * more than nmbufs mbufs or nmbclusters mclusters.
+ */
+TUNABLE_INT("kern.ipc.nmbufs", &nmbufs);
+TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters);
+TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt);
+
+/*
+ * The freelist structures and mutex locks. The number statically declared
+ * here depends on the number of CPUs.
+ *
+ * We setup in such a way that all the objects (mbufs, clusters)
+ * share the same mutex lock. It has been established that we do not benefit
+ * from different locks for different objects, so we use the same lock,
+ * regardless of object type.
+ */
+struct mb_lstmngr {
+	struct	mb_gen_list	*ml_genlist;
+	struct	mb_pcpu_list	*ml_cntlst[NCPU];
+	struct	mb_bucket	**ml_btable;
+	vm_map_t		ml_map;
+	vm_offset_t		ml_mapbase;
+	vm_offset_t		ml_maptop;
+	int			ml_mapfull;
+	u_int			ml_objsize;
+	u_int			*ml_wmhigh;
+};
+struct	mb_lstmngr	mb_list_mbuf, mb_list_clust;
+struct	mtx		mbuf_gen, mbuf_pcpu[NCPU];
+
+/*
+ * Local macros for internal allocator structure manipulations.
+ */
+#define	MB_GET_PCPU_LIST(mb_lst)	  (mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
+
+#define	MB_GET_PCPU_LIST_NUM(mb_lst, num) (mb_lst)->ml_cntlst[(num)]
+
+#define	MB_GET_GEN_LIST(mb_lst)		  (mb_lst)->ml_genlist
+
+#define	MB_LOCK_CONT(mb_cnt)	 	  mtx_lock((mb_cnt)->mb_cont.mc_lock)
+
+#define	MB_UNLOCK_CONT(mb_cnt)		  mtx_unlock((mb_cnt)->mb_cont.mc_lock)
+
+#define	MB_BUCKET_INDX(mb_obj, mb_lst)					\
+    (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / PAGE_SIZE)
+
+#define	MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst)				\
+{									\
+	struct	mc_buckethd	*_mchd = &((mb_lst)->mb_cont.mc_bhead);	\
+									\
+	(mb_bckt)->mb_numfree--;					\
+	(mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)];	\
+	(*((mb_lst)->mb_cont.mc_objcount))--;				\
+	if ((mb_bckt)->mb_numfree == 0) {				\
+		SLIST_REMOVE_HEAD(_mchd, mb_blist);			\
+		SLIST_NEXT((mb_bckt), mb_blist) = NULL;			\
+		(mb_bckt)->mb_owner |= MB_BUCKET_FREE;			\
+	}								\
+}
+
+#define	MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst)				\
+	(mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp);	\
+	(mb_bckt)->mb_numfree++;					\
+	(*((mb_lst)->mb_cont.mc_objcount))++;
+
+/*
+ * Ownership of buckets/containers is represented by integers. The PCPU
+ * lists range from 0 to NCPU-1. We need a free numerical id for the general
+ * list (we use NCPU). We also need a non-conflicting free bit to indicate
+ * that the bucket is free and removed from a container, while not losing
+ * the bucket's originating container id. We use the highest bit
+ * for the free marker.
+ */
+#define	MB_GENLIST_OWNER	(NCPU)
+#define	MB_BUCKET_FREE		(1 << (sizeof(int) * 8 - 1))
+
+/*
+ * sysctl(8) exported objects
+ */
+struct	mbstat	mbstat;			/* General stats + infos. */
+struct	mbpstat	mb_statpcpu[NCPU+1];	/* PCPU + Gen. container alloc stats */
+SYSCTL_DECL(_kern_ipc);
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, 
+    "Maximum number of mbuf clusters available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
+    "Maximum number of mbufs available"); 
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
+    "Number used to scale kmem_map to ensure sufficient space for counters");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
+    "Sleep time of mbuf subsystem wait allocations during exhaustion");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_limit, CTLFLAG_RW, &mbuf_limit, 0,
+    "Upper limit of number of mbufs allowed on each PCPU list");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_limit, CTLFLAG_RW, &clust_limit, 0,
+    "Upper limit of number of mbuf clusters allowed on each PCPU list");
+SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
+    "Mbuf general information and statistics");
+SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
+    sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics");
+
+/*
+ * Prototypes of local (internal) routines.
+ */
+void			*mb_alloc_wait(struct mb_lstmngr *);
+static	void		mb_init(void *);
+struct	mb_bucket	*mb_pop_cont(struct mb_lstmngr *, int,
+			    struct mb_pcpu_list *);
+void			mb_reclaim(void);
+
+/*
+ * Initial allocation numbers. Each parameter represents the number of buckets
+ * of each object that will be placed initially in each PCPU container for
+ * said object.
+ */
+#define	NMB_MBUF_INIT	4
+#define	NMB_CLUST_INIT	16
+
+/*
+ * Initialize the mbuf subsystem.
+ *
+ * We sub-divide the kmem_map into several submaps; this way, we don't have
+ * to worry about artificially limiting the number of mbuf or mbuf cluster
+ * allocations, due to fear of one type of allocation "stealing" address
+ * space initially reserved for another.
+ *
+ * Setup both the general containers and all the PCPU containers. Populate
+ * the PCPU containers with initial numbers.
+ */
+MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mb_init, NULL)
+static void
+mb_init(void *dummy)
+{
+	struct	mb_pcpu_list	*pcpu_cnt;
+	vm_size_t		mb_map_size;
+	int			i, j;
+
+	/*
+	 * Setup all the submaps, for each type of object that we deal
+	 * with in this allocator.
+	 */
+	mb_map_size = (vm_size_t)(nmbufs * MSIZE);
+	mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
+	mb_list_mbuf.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE *
+	    sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
+	if (mb_list_mbuf.ml_btable == NULL)
+		goto bad;
+	mb_list_mbuf.ml_map = kmem_suballoc(kmem_map,&(mb_list_mbuf.ml_mapbase),
+	    &(mb_list_mbuf.ml_maptop), mb_map_size);
+	mb_list_mbuf.ml_mapfull = 0;
+	mb_list_mbuf.ml_objsize = MSIZE;
+	mb_list_mbuf.ml_wmhigh = &mbuf_limit;
+
+	mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES);
+	mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
+	mb_list_clust.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE
+	    * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
+	if (mb_list_clust.ml_btable == NULL)
+		goto bad;
+	mb_list_clust.ml_map = kmem_suballoc(kmem_map,
+	    &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop),
+	    mb_map_size);
+	mb_list_clust.ml_mapfull = 0;
+	mb_list_clust.ml_objsize = MCLBYTES;
+	mb_list_clust.ml_wmhigh = &clust_limit;
+
+	/* XXX XXX XXX: mbuf_map->system_map = clust_map->system_map = 1 */
+
+	/*
+	 * Allocate required general (global) containers for each object type.
+	 */
+	mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+	    M_NOWAIT);
+	mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+	    M_NOWAIT);
+	if ((mb_list_mbuf.ml_genlist == NULL) ||
+	    (mb_list_clust.ml_genlist == NULL))
+		goto bad;
+
+	/*
+	 * Initialize condition variables and general container mutex locks.
+	 */
+	mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", 0);
+	cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved");
+	cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved),
+	    "mcluster pool starved");
+	mb_list_mbuf.ml_genlist->mb_cont.mc_lock =
+	    mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;
+
+	/*
+	 * Setup the general containers for each object.
+	 */
+	mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
+	    mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER;
+	mb_list_mbuf.ml_genlist->mb_cont.mc_starved =
+	    mb_list_clust.ml_genlist->mb_cont.mc_starved = 0;
+	mb_list_mbuf.ml_genlist->mb_cont.mc_objcount =
+	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree);
+	mb_list_clust.ml_genlist->mb_cont.mc_objcount =
+	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree);
+	mb_list_mbuf.ml_genlist->mb_cont.mc_numpgs =
+	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbpgs);
+	mb_list_clust.ml_genlist->mb_cont.mc_numpgs =
+	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_clpgs);
+	SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead));
+	SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead));
+
+	/*
+	 * Initialize general mbuf statistics
+	 */
+	mbstat.m_msize = MSIZE;
+	mbstat.m_mclbytes = MCLBYTES;
+	mbstat.m_minclsize = MINCLSIZE;
+	mbstat.m_mlen = MLEN;
+	mbstat.m_mhlen = MHLEN;
+
+	/*
+	 * Allocate and initialize PCPU containers.
+	 */
+	for (i = 0; i < mp_ncpus; i++) {
+		mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+		    M_MBUF, M_NOWAIT);
+		mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+		    M_MBUF, M_NOWAIT);
+		if ((mb_list_mbuf.ml_cntlst[i] == NULL) ||
+		    (mb_list_clust.ml_cntlst[i] == NULL))
+			goto bad;
+
+		mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", 0);
+		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock =
+		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i];
+
+		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner =
+		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i;
+		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved =
+		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0;
+		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount =
+		    &(mb_statpcpu[i].mb_mbfree);
+		mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount =
+		    &(mb_statpcpu[i].mb_clfree);
+		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numpgs =
+		    &(mb_statpcpu[i].mb_mbpgs);
+		mb_list_clust.ml_cntlst[i]->mb_cont.mc_numpgs =
+		    &(mb_statpcpu[i].mb_clpgs);
+
+		SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead));
+		SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead));
+
+		/*
+		 * Perform initial allocations.
+		 */
+		pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i);
+		MB_LOCK_CONT(pcpu_cnt);
+		for (j = 0; j < NMB_MBUF_INIT; j++) {
+			if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt)
+			    == NULL)
+				goto bad;
+		}
+		MB_UNLOCK_CONT(pcpu_cnt);
+
+		pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i);
+		MB_LOCK_CONT(pcpu_cnt);
+		for (j = 0; j < NMB_CLUST_INIT; j++) {
+			if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt)
+			    == NULL)
+				goto bad;
+		}
+		MB_UNLOCK_CONT(pcpu_cnt);
+	}
+
+	return;
+bad:
+	panic("mb_init(): failed to initialize mbuf subsystem!");
+}
+
+/*
+ * Populate a given mbuf PCPU container with a bucket full of fresh new
+ * buffers. Return a pointer to the new bucket (already in the container if
+ * successful), or return NULL on failure.
+ *
+ * LOCKING NOTES:
+ * PCPU container lock must be held when this is called.
+ * The lock is dropped here so that we can cleanly call the underlying VM
+ * code. If we fail, we return with no locks held. If we succeed (i.e. return
+ * non-NULL), we return with the PCPU lock held, ready for allocation from
+ * the returned bucket.
+ */
+struct mb_bucket *
+mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
+{
+	struct	mb_bucket	*bucket;
+	caddr_t			p;
+	int			i;
+
+	MB_UNLOCK_CONT(cnt_lst);
+	/*
+	 * If our object's (finite) map is starved now (i.e. no more address
+	 * space), bail out now.
+	 */
+	if (mb_list->ml_mapfull)
+		return (NULL);
+
+	bucket = malloc(sizeof(struct mb_bucket) +
+	    PAGE_SIZE / mb_list->ml_objsize * sizeof(void *), M_MBUF,
+	    how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+	if (bucket == NULL)
+		return (NULL);
+
+	p = (caddr_t)kmem_malloc(mb_list->ml_map, PAGE_SIZE,
+	    how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+	if (p == NULL) {
+		free(bucket, M_MBUF);
+		return (NULL);
+	}
+
+	bucket->mb_numfree = 0;
+	mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket;
+	for (i = 0; i < (PAGE_SIZE / mb_list->ml_objsize); i++) {
+		bucket->mb_free[i] = p;
+		bucket->mb_numfree++;
+		p += mb_list->ml_objsize;
+	}
+
+	MB_LOCK_CONT(cnt_lst);
+	bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+	SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist);
+	(*(cnt_lst->mb_cont.mc_numpgs))++;
+	*(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree;
+
+	return (bucket);
+}
+
+/*
+ * Allocate an mbuf-subsystem type object.
+ * The general case is very easy. Complications only arise if our PCPU
+ * container is empty. Things get worse if the PCPU container is empty,
+ * the general container is empty, and we've run out of address space
+ * in our map; then we try to block if we're willing to (M_TRYWAIT).
+ */
+void *
+mb_alloc(struct mb_lstmngr *mb_list, int how)
+{
+	struct	mb_pcpu_list	*cnt_lst;
+	struct	mb_bucket 	*bucket;
+	void			*m;
+
+	m = NULL;
+	cnt_lst = MB_GET_PCPU_LIST(mb_list);
+	MB_LOCK_CONT(cnt_lst);
+
+	if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) {
+		/*
+		 * This is the easy allocation case. We just grab an object
+		 * from a bucket in the PCPU container. At worst, we
+		 * have just emptied the bucket and so we remove it
+		 * from the container.
+		 */
+		MB_GET_OBJECT(m, bucket, cnt_lst);
+		MB_UNLOCK_CONT(cnt_lst);
+	} else {
+		struct	mb_gen_list *gen_list;
+
+		/*
+		 * This is the less-common more difficult case. We must
+		 * first verify if the general list has anything for us
+		 * and if that also fails, we must allocate a page from
+		 * the map and create a new bucket to place in our PCPU
+		 * container (already locked). If the map is starved then
+		 * we're really in for trouble, as we have to wait on
+		 * the general container's condition variable.
+		 */
+		gen_list = MB_GET_GEN_LIST(mb_list);
+		MB_LOCK_CONT(gen_list);
+
+		if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead)))
+		    != NULL) {
+			/*
+			 * Give ownership of the bucket to our CPU's
+			 * container, but only actually put the bucket
+			 * in the container if it doesn't become free
+			 * upon removing an mbuf from it.
+			 */
+			SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead),
+			    mb_blist);
+			bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+			(*(gen_list->mb_cont.mc_numpgs))--;
+			(*(cnt_lst->mb_cont.mc_numpgs))++;
+			*(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree;
+			bucket->mb_numfree--;
+			m = bucket->mb_free[(bucket->mb_numfree)];
+			if (bucket->mb_numfree == 0) {
+				SLIST_NEXT(bucket, mb_blist) = NULL;
+				bucket->mb_owner |= MB_BUCKET_FREE;
+			} else {
+				SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+				     bucket, mb_blist);
+				*(cnt_lst->mb_cont.mc_objcount) +=
+				    bucket->mb_numfree;
+			}
+			MB_UNLOCK_CONT(gen_list);
+			MB_UNLOCK_CONT(cnt_lst);
+		} else {
+			/*
+			 * We'll have to allocate a new page.
+			 */
+			MB_UNLOCK_CONT(gen_list);
+			bucket = mb_pop_cont(mb_list, how, cnt_lst);
+			if (bucket != NULL) {
+				bucket->mb_numfree--;
+				m = bucket->mb_free[(bucket->mb_numfree)];
+				(*(cnt_lst->mb_cont.mc_objcount))--;
+				MB_UNLOCK_CONT(cnt_lst);
+			} else {
+				if (how == M_TRYWAIT) {
+				  /*
+			 	   * Absolute worst-case scenario. We block if
+			 	   * we're willing to, but only after trying to
+				   * steal from other lists.
+				   */
+					mb_list->ml_mapfull = 1;
+					m = mb_alloc_wait(mb_list);
+				} else
+					/* XXX: No consistency. */
+					mbstat.m_drops++;
+			}
+		}
+	}
+
+	return (m);
+}
+
+/*
+ * This is the worst-case scenario called only if we're allocating with
+ * M_TRYWAIT. We first drain all the protocols, then try to find an mbuf
+ * by looking in every PCPU container. If we're still unsuccesful, we
+ * try the general container one last time and possibly block on our
+ * starved cv.
+ */
+void *
+mb_alloc_wait(struct mb_lstmngr *mb_list)
+{
+	struct	mb_pcpu_list	*cnt_lst;
+	struct	mb_gen_list 	*gen_list;
+	struct	mb_bucket 	*bucket;
+	void			*m;
+	int			i, cv_ret;
+
+	/*
+	 * Try to reclaim mbuf-related objects (mbufs, clusters).
+	 */
+	mb_reclaim();
+
+	/*
+	 * Cycle all the PCPU containers. Increment starved counts if found
+	 * empty.
+	 */
+	for (i = 0; i < mp_ncpus; i++) {
+		cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i);
+		MB_LOCK_CONT(cnt_lst);
+
+		/*
+		 * If container is non-empty, get a single object from it.
+		 * If empty, increment starved count.
+		 */
+		if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) !=
+		    NULL) {
+			MB_GET_OBJECT(m, bucket, cnt_lst);
+			MB_UNLOCK_CONT(cnt_lst);
+			mbstat.m_wait++;	/* XXX: No consistency. */
+			return (m);
+		} else
+			cnt_lst->mb_cont.mc_starved++;
+
+		MB_UNLOCK_CONT(cnt_lst);
+	}
+
+	/*
+	 * We're still here, so that means it's time to get the general
+	 * container lock, check it one more time (now that mb_reclaim()
+	 * has been called) and if we still get nothing, block on the cv.
+	 */
+	gen_list = MB_GET_GEN_LIST(mb_list);
+	MB_LOCK_CONT(gen_list);
+	if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) {
+		MB_GET_OBJECT(m, bucket, gen_list);
+		MB_UNLOCK_CONT(gen_list);
+		mbstat.m_wait++;	/* XXX: No consistency. */
+		return (m);
+	}
+
+	gen_list->mb_cont.mc_starved++;
+	cv_ret = cv_timedwait(&(gen_list->mgl_mstarved),
+	    gen_list->mb_cont.mc_lock, mbuf_wait);
+	gen_list->mb_cont.mc_starved--;
+
+	if ((cv_ret == 0) &&
+	    ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) {
+		MB_GET_OBJECT(m, bucket, gen_list);
+		mbstat.m_wait++;	/* XXX: No consistency. */
+	} else {
+		mbstat.m_drops++;	/* XXX: No consistency. */
+		m = NULL;
+	}
+
+	MB_UNLOCK_CONT(gen_list);
+
+	return (m);
+}
+
+/*
+ * Free an object to its rightful container.
+ * In the very general case, this operation is really very easy.
+ * Complications arise primarily if:
+ *	(a) We've hit the high limit on number of free objects allowed in
+ *	    our PCPU container.
+ *	(b) We're in a critical situation where our container has been
+ *	    marked 'starved' and we need to issue wakeups on the starved
+ *	    condition variable.
+ *	(c) Minor (odd) cases: our bucket has migrated while we were
+ *	    waiting for the lock; our bucket is in the general container;
+ *	    our bucket is empty.
+ */
+void
+mb_free(struct mb_lstmngr *mb_list, void *m)
+{
+	struct	mb_pcpu_list	*cnt_lst;
+	struct	mb_gen_list 	*gen_list;
+	struct	mb_bucket 	*bucket;
+	u_int			owner;
+
+	bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)];
+
+	/*
+	 * Make sure that if after we lock the bucket's present container the
+	 * bucket has migrated, that we drop the lock and get the new one.
+	 */
+retry_lock:
+	owner = bucket->mb_owner & ~MB_BUCKET_FREE;
+	switch (owner) {
+	case MB_GENLIST_OWNER:
+		gen_list = MB_GET_GEN_LIST(mb_list);
+		MB_LOCK_CONT(gen_list);
+		if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
+			MB_UNLOCK_CONT(gen_list);
+			goto retry_lock;
+		}
+
+		/*
+		 * If we're intended for the general container, this is
+		 * real easy: no migrating required. The only `bogon'
+		 * is that we're now contending with all the threads
+		 * dealing with the general list, but this is expected.
+		 */
+		MB_PUT_OBJECT(m, bucket, gen_list);
+		if (gen_list->mb_cont.mc_starved > 0)
+			cv_signal(&(gen_list->mgl_mstarved));
+		MB_UNLOCK_CONT(gen_list);
+		break;
+
+	default:
+		cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner);
+		MB_LOCK_CONT(cnt_lst);
+		if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
+			MB_UNLOCK_CONT(cnt_lst);
+			goto retry_lock;
+		}
+
+		MB_PUT_OBJECT(m, bucket, cnt_lst);
+
+		if (cnt_lst->mb_cont.mc_starved > 0) {
+			/*
+			 * This is a tough case. It means that we've
+			 * been flagged at least once to indicate that
+			 * we're empty, and that the system is in a critical
+			 * situation, so we ought to migrate at least one
+			 * bucket over to the general container.
+			 * There may or may not be a thread blocking on
+			 * the starved condition variable, but chances
+			 * are that one will eventually come up soon so
+			 * it's better to migrate now than never.
+			 */
+			gen_list = MB_GET_GEN_LIST(mb_list);
+			MB_LOCK_CONT(gen_list);
+			KASSERT((bucket->mb_owner & MB_BUCKET_FREE) != 0,
+			    ("mb_free: corrupt bucket %p\n", bucket));
+			SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
+			    bucket, mb_blist);
+			bucket->mb_owner = MB_GENLIST_OWNER;
+			(*(cnt_lst->mb_cont.mc_objcount))--;
+			(*(gen_list->mb_cont.mc_objcount))++;
+			(*(cnt_lst->mb_cont.mc_numpgs))--;
+			(*(gen_list->mb_cont.mc_numpgs))++;
+
+			/*
+			 * Determine whether or not to keep transferring
+			 * buckets to the general list or whether we've
+			 * transferred enough already.
+			 * We realize that although we may flag another
+			 * bucket to be migrated to the general container
+			 * that in the meantime, the thread that was
+			 * blocked on the cv is already woken up and
+			 * long gone. But in that case, the worst
+			 * consequence is that we will end up migrating
+			 * one bucket too many, which is really not a big
+			 * deal, especially if we're close to a critical
+			 * situation.
+			 */
+			if (gen_list->mb_cont.mc_starved > 0) {
+				cnt_lst->mb_cont.mc_starved--;
+				cv_signal(&(gen_list->mgl_mstarved));
+			} else
+				cnt_lst->mb_cont.mc_starved = 0;
+
+			MB_UNLOCK_CONT(gen_list);
+			MB_UNLOCK_CONT(cnt_lst);
+			break;
+		}
+
+		if (*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) {
+			/*
+			 * We've hit the high limit of allowed numbers of mbufs
+			 * on this PCPU list. We must now migrate a bucket
+			 * over to the general container.
+			 */
+			gen_list = MB_GET_GEN_LIST(mb_list);
+			MB_LOCK_CONT(gen_list);
+			if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) {
+				bucket =
+				    SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead));
+				SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+				    mb_blist);
+			}
+			SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
+			    bucket, mb_blist);
+			bucket->mb_owner = MB_GENLIST_OWNER;
+			*(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree;
+			*(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree;
+			(*(cnt_lst->mb_cont.mc_numpgs))--;
+			(*(gen_list->mb_cont.mc_numpgs))++;
+
+			MB_UNLOCK_CONT(gen_list);
+			MB_UNLOCK_CONT(cnt_lst);
+			break;
+		}
+
+		if (bucket->mb_owner & MB_BUCKET_FREE) {
+			SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+			    bucket, mb_blist);
+			bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+		}
+
+		MB_UNLOCK_CONT(cnt_lst);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Drain protocols in hopes to free up some resources.
+ *
+ * LOCKING NOTES:
+ * No locks should be held when this is called. The drain routines have to
+ * presently acquire some locks which raises the possibility of lock order
+ * violation if we're holding any mutex if that mutex is acquired in reverse
+ * order relative to one of the locks in the drain routines.
+ */
+void
+mb_reclaim(void)
+{
+	struct	domain	*dp;
+	struct	protosw	*pr;
+
+/*
+ * XXX: Argh, we almost always trip here with witness turned on now-a-days
+ * XXX: because we often come in with Giant held. For now, there's no way
+ * XXX: to avoid this.
+ */
+#ifdef WITNESS
+	KASSERT(witness_list(curproc) == 0,
+	    ("mb_reclaim() called with locks held"));
+#endif
+
+	mbstat.m_drain++;	/* XXX: No consistency. */
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_drain)
+				(*pr->pr_drain)();
+
+}
Index: src/sys/kern/uipc_mbuf.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.81
diff -u -r1.81 uipc_mbuf.c
--- src/sys/kern/uipc_mbuf.c	2001/06/08 05:24:16	1.81
+++ src/sys/kern/uipc_mbuf.c	2001/06/09 05:31:04
@@ -37,50 +37,20 @@
 #include "opt_param.h"
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
-#include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 
-#include <vm/vm.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-
-#ifndef NMBCLUSTERS
-#define NMBCLUSTERS	(512 + MAXUSERS * 16)
-#endif
-
-static void mbinit(void *);
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
-
-struct mbuf *mbutl;
-struct mbstat mbstat;
-u_long	mbtypes[MT_NTYPES];
 int	max_linkhdr;
 int	max_protohdr;
 int	max_hdr;
 int	max_datalen;
-int	nmbclusters = NMBCLUSTERS;
-int	nmbufs = NMBCLUSTERS * 4;
-int	nmbcnt;
-u_long	m_mballoc_wid = 0;
-u_long	m_clalloc_wid = 0;
 
 /*
- * freelist header structures...
- * mbffree_lst, mclfree_lst, mcntfree_lst
- */
-struct mbffree_lst mmbfree;
-struct mclfree_lst mclfree;
-struct mcntfree_lst mcntfree;
-struct mtx	mbuf_mtx;
-
-/*
  * sysctl(8) exported objects
  */
 SYSCTL_DECL(_kern_ipc);
@@ -91,369 +61,8 @@
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
 	   &max_datalen, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
-	   &mbuf_wait, 0, "");
-SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
-SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
-	   sizeof(mbtypes), "LU", "");
-SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 
-	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
-	   "Maximum number of mbufs available"); 
-SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
-	   "Maximum number of ext_buf counters available");
-
-TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters);
-TUNABLE_INT("kern.ipc.nmbufs", &nmbufs);
-TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt);
-
-static void	m_reclaim(void);
-
-/* Initial allocation numbers */
-#define NCL_INIT	2
-#define NMB_INIT	16
-#define REF_INIT	NMBCLUSTERS 
-
-/*
- * Full mbuf subsystem initialization done here.
- *
- * XXX: If ever we have system specific map setups to do, then move them to
- *      machdep.c - for now, there is no reason for this stuff to go there.
- */
-static void
-mbinit(void *dummy)
-{
-	vm_offset_t maxaddr;
-	vm_size_t mb_map_size;
-
-	/* Sanity checks and pre-initialization for non-constants */
-	if (nmbufs < nmbclusters * 2)
-		nmbufs = nmbclusters * 2;
-	if (nmbcnt == 0)
-		nmbcnt = EXT_COUNTERS;
-
-	/*
-	 * Setup the mb_map, allocate requested VM space.
-	 */
-	mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES +
-	    nmbcnt * sizeof(union mext_refcnt));
-	mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
-	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
-	    mb_map_size);
-	/* XXX XXX XXX: mb_map->system_map = 1; */
-
-	/*
-	 * Initialize the free list headers, and setup locks for lists.
-	 */
-	mmbfree.m_head = NULL;
-	mclfree.m_head = NULL;
-	mcntfree.m_head = NULL;
-	mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF);
-	cv_init(&mmbfree.m_starved, "mbuf free list starved cv");
-	cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv");
- 
-	/*
-	 * Initialize mbuf subsystem (sysctl exported) statistics structure.
-	 */
-	mbstat.m_msize = MSIZE;
-	mbstat.m_mclbytes = MCLBYTES;
-	mbstat.m_minclsize = MINCLSIZE;
-	mbstat.m_mlen = MLEN;
-	mbstat.m_mhlen = MHLEN;
-
-	/*
-	 * Perform some initial allocations.
-	 */
-	mtx_lock(&mbuf_mtx);
-	if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
-		goto bad;
-	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
-		goto bad;
-	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
-		goto bad;
-	mtx_unlock(&mbuf_mtx);
-
-	return;
-bad:
-	panic("mbinit: failed to initialize mbuf subsystem!");
-}
-
-/*
- * Allocate at least nmb reference count structs and place them
- * on the ref cnt free list.
- *
- * Must be called with the mcntfree lock held.
- */
-int
-m_alloc_ref(u_int nmb, int how)
-{
-	caddr_t p;
-	u_int nbytes;
-	int i;
-
-	/*
-	 * We don't cap the amount of memory that can be used
-	 * by the reference counters, like we do for mbufs and
-	 * mbuf clusters. In fact, we're absolutely sure that we
-	 * won't ever be going over our allocated space. We keep enough
-	 * space in mb_map to accomodate maximum values of allocatable
-	 * external buffers including, but not limited to, clusters.
-	 * (That's also why we won't have to have wait routines for
-	 * counters).
-	 *
-	 * If we're in here, we're absolutely certain to be returning
-	 * succesfully, as long as there is physical memory to accomodate
-	 * us. And if there isn't, but we're willing to wait, then
-	 * kmem_malloc() will do the only waiting needed.
-	 */
-
-	nbytes = round_page(nmb * sizeof(union mext_refcnt));
-	if (1 /* XXX: how == M_TRYWAIT */)
-		mtx_unlock(&mbuf_mtx);
-	if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
-	    M_WAITOK : M_NOWAIT)) == NULL) {
-		if (1 /* XXX: how == M_TRYWAIT */)
-			mtx_lock(&mbuf_mtx);
-		return (0);
-	}
-	nmb = nbytes / sizeof(union mext_refcnt);
-
-	/*
-	 * We don't let go of the mutex in order to avoid a race.
-	 * It is up to the caller to let go of the mutex.
-	 */
-	if (1 /* XXX: how == M_TRYWAIT */)
-		mtx_lock(&mbuf_mtx);
-	for (i = 0; i < nmb; i++) {
-		((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
-		mcntfree.m_head = (union mext_refcnt *)p;
-		p += sizeof(union mext_refcnt);
-		mbstat.m_refree++;
-	}
-	mbstat.m_refcnt += nmb;
-
-	return (1);
-}
-
-/*
- * Allocate at least nmb mbufs and place on mbuf free list.
- *
- * Must be called with the mmbfree lock held.
- */
-int
-m_mballoc(int nmb, int how)
-{
-	caddr_t p;
-	int i;
-	int nbytes;
-
-	nbytes = round_page(nmb * MSIZE);
-	nmb = nbytes / MSIZE;
-
-	/*
-	 * If we've hit the mbuf limit, stop allocating from mb_map.
-	 * Also, once we run out of map space, it will be impossible to
-	 * get any more (nothing is ever freed back to the map).
-	 */
-	if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs))
-		return (0);
-
-	if (1 /* XXX: how == M_TRYWAIT */)
-		mtx_unlock(&mbuf_mtx);
-	p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
-		M_WAITOK : M_NOWAIT);
-	if (1 /* XXX: how == M_TRYWAIT */) {
-		mtx_lock(&mbuf_mtx);
-		if (p == NULL)
-			mbstat.m_wait++;
-	}
-
-	/*
-	 * Either the map is now full, or `how' is M_DONTWAIT and there
-	 * are no pages left.
-	 */
-	if (p == NULL)
-		return (0);
-
-	/*
-	 * We don't let go of the mutex in order to avoid a race.
-	 * It is up to the caller to let go of the mutex when done
-	 * with grabbing the mbuf from the free list.
-	 */
-	for (i = 0; i < nmb; i++) {
-		((struct mbuf *)p)->m_next = mmbfree.m_head;
-		mmbfree.m_head = (struct mbuf *)p;
-		p += MSIZE;
-	}
-	mbstat.m_mbufs += nmb;
-	mbtypes[MT_FREE] += nmb;
-	return (1);
-}
 
 /*
- * Once the mb_map has been exhausted and if the call to the allocation macros
- * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
- * rely solely on reclaimed mbufs.
- *
- * Here we request for the protocols to free up some resources and, if we
- * still cannot get anything, then we wait for an mbuf to be freed for a 
- * designated (mbuf_wait) time, at most.
- *
- * Must be called with the mmbfree mutex held.
- */
-struct mbuf *
-m_mballoc_wait(void)
-{
-	struct mbuf *p = NULL;
-
-	/*
-	 * See if we can drain some resources out of the protocols.
-	 * We drop the mmbfree mutex to avoid recursing into it in some of
-	 * the drain routines. Clearly, we're faced with a race here because
-	 * once something is freed during the drain, it may be grabbed right
-	 * from under us by some other thread. But we accept this possibility
-	 * in order to avoid a potentially large lock recursion and, more
-	 * importantly, to avoid a potential lock order reversal which may
-	 * result in deadlock (See comment above m_reclaim()).
-	 */
-	mtx_unlock(&mbuf_mtx);
-	m_reclaim();
-
-	mtx_lock(&mbuf_mtx);
-	_MGET(p, M_DONTWAIT);
-
-	if (p == NULL) {
-		int retval;
-
-		m_mballoc_wid++;
-		retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx,
-		    mbuf_wait);
-		m_mballoc_wid--;
-
-		/*
-		 * If we got signaled (i.e. didn't time out), allocate.
-		 */
-		if (retval == 0)
-			_MGET(p, M_DONTWAIT);
-	}
-
-	if (p != NULL) {
-		mbstat.m_wait++;
-		if (mmbfree.m_head != NULL)
-			MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved);
-	}
-
-	return (p);
-}
-
-/*
- * Allocate some number of mbuf clusters
- * and place on cluster free list.
- *
- * Must be called with the mclfree lock held.
- */
-int
-m_clalloc(int ncl, int how)
-{
-	caddr_t p;
-	int i;
-	int npg_sz;
-
-	npg_sz = round_page(ncl * MCLBYTES);
-	ncl = npg_sz / MCLBYTES;
-
-	/*
-	 * If the map is now full (nothing will ever be freed to it).
-	 * If we've hit the mcluster number limit, stop allocating from
-	 * mb_map.
-	 */
-	if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters))
-		return (0);
-
-	if (1 /* XXX: how == M_TRYWAIT */)
-		mtx_unlock(&mbuf_mtx);
-	p = (caddr_t)kmem_malloc(mb_map, npg_sz,
-				 how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
-	if (1 /* XXX: how == M_TRYWAIT */)
-		mtx_lock(&mbuf_mtx);
-
-	/*
-	 * Either the map is now full, or `how' is M_DONTWAIT and there
-	 * are no pages left.
-	 */
-	if (p == NULL)
-		return (0);
-
-	for (i = 0; i < ncl; i++) {
-		((union mcluster *)p)->mcl_next = mclfree.m_head;
-		mclfree.m_head = (union mcluster *)p;
-		p += MCLBYTES;
-		mbstat.m_clfree++;
-	}
-	mbstat.m_clusters += ncl;
-	return (1);
-}
-
-/*
- * Once the mb_map submap has been exhausted and the allocation is called with
- * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
- * block on a cv for a designated amount of time (mbuf_wait) or until we're
- * signaled due to sudden mcluster availability.
- *
- * Must be called with the mclfree lock held.
- */
-caddr_t
-m_clalloc_wait(void)
-{
-	caddr_t p = NULL;
-	int retval;
-
-	m_clalloc_wid++;
-	retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait);
-	m_clalloc_wid--;
-
-	/*
-	 * Now that we (think) that we've got something, try again.
-	 */
-	if (retval == 0)
-		_MCLALLOC(p, M_DONTWAIT);
-
-	if (p != NULL) {
-		mbstat.m_wait++;
-		if (mclfree.m_head != NULL)
-			MBWAKEUP(m_clalloc_wid, &mclfree.m_starved);
-	}
-
-	return (p);
-}
-
-/*
- * m_reclaim: drain protocols in hopes to free up some resources...
- *
- * XXX: No locks should be held going in here. The drain routines have
- * to presently acquire some locks which raises the possibility of lock
- * order violation if we're holding any mutex if that mutex is acquired in
- * reverse order relative to one of the locks in the drain routines.
- */
-static void
-m_reclaim(void)
-{
-	struct domain *dp;
-	struct protosw *pr;
-
-#ifdef WITNESS
-	KASSERT(witness_list(curproc) == 0,
-	    ("m_reclaim called with locks held"));
-#endif
-
-	for (dp = domains; dp; dp = dp->dom_next)
-		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
-			if (pr->pr_drain)
-				(*pr->pr_drain)();
-	mbstat.m_drain++;
-}
-
-/*
  * Space allocation routines.
  * Some of these are also available as macros
  * for critical paths.
@@ -671,18 +280,14 @@
 		off = 0;
 		m = m->m_next;
 		np = &n->m_next;
-	}
-	if (top == NULL) {
-		mtx_lock(&mbuf_mtx);
-		mbstat.m_mcfail++;
-		mtx_unlock(&mbuf_mtx);
 	}
+	if (top == NULL)
+		mbstat.m_mcfail++;	/* XXX: No consistency. */
+
 	return (top);
 nospace:
 	m_freem(top);
-	mtx_lock(&mbuf_mtx);
-	mbstat.m_mcfail++;
-	mtx_unlock(&mbuf_mtx);
+	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
@@ -741,9 +346,7 @@
 	return top;
 nospace:
 	m_freem(top);
-	mtx_lock(&mbuf_mtx);
-	mbstat.m_mcfail++;
-	mtx_unlock(&mbuf_mtx);
+	mbstat.m_mcfail++;	/* XXX: No consistency. */ 
 	return (NULL);
 }
 
@@ -844,9 +447,7 @@
 
 nospace:
 	m_freem(top);
-	mtx_lock(&mbuf_mtx);
-	mbstat.m_mcfail++;
-	mtx_unlock(&mbuf_mtx);
+	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
@@ -1008,9 +609,7 @@
 	return (m);
 bad:
 	m_freem(n);
-	mtx_lock(&mbuf_mtx);
-	mbstat.m_mpfail++;
-	mtx_unlock(&mbuf_mtx);
+	mbstat.m_mpfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
Index: src/sys/net/if_tun.c
===================================================================
RCS file: /home/ncvs/src/sys/net/if_tun.c,v
retrieving revision 1.94
diff -u -r1.94 if_tun.c
--- src/sys/net/if_tun.c	2001/06/01 15:51:10	1.94
+++ src/sys/net/if_tun.c	2001/06/09 05:31:04
@@ -21,6 +21,7 @@
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
+#include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
Index: src/sys/nfs/nfs_subs.c
===================================================================
RCS file: /home/ncvs/src/sys/nfs/nfs_subs.c,v
retrieving revision 1.101
diff -u -r1.101 nfs_subs.c
--- src/sys/nfs/nfs_subs.c	2001/05/19 01:28:07	1.101
+++ src/sys/nfs/nfs_subs.c	2001/06/09 05:31:05
@@ -51,10 +51,10 @@
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
+#include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
-#include <sys/malloc.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
 
Index: src/sys/sys/mbuf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/mbuf.h,v
retrieving revision 1.78
diff -u -r1.78 mbuf.h
--- src/sys/sys/mbuf.h	2001/05/01 08:13:17	1.78
+++ src/sys/sys/mbuf.h	2001/06/09 05:31:05
@@ -38,9 +38,9 @@
 #define	_SYS_MBUF_H_
 
 #ifdef _KERNEL
-#include <sys/condvar.h>	/* XXX */
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_MBUF);
+#endif
 #endif /* _KERNEL */
 
 /*
@@ -48,31 +48,15 @@
  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
  * MCLBYTES (also in machine/param.h), which has no additional overhead
  * and is used instead of the internal data area; this is done when
- * at least MINCLSIZE of data must be stored.
+ * at least MINCLSIZE of data must be stored. Additionally, it is possible
+ * to allocate a separate buffer externally and attach it to the mbuf in
+ * a way similar to that of mbuf clusters.
  */
-
 #define	MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
 #define	MHLEN		(MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
-
 #define	MINCLSIZE	(MHLEN + 1)	/* smallest amount to put in cluster */
 #define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
 
-/*
- * Maximum number of allocatable counters for external buffers. This
- * ensures enough VM address space for the allocation of counters
- * in the extreme case where all possible external buffers are allocated.
- *
- * Note: When new types of external storage are allocated, EXT_COUNTERS
- * 	 must be tuned accordingly. Practically, this isn't a big deal
- *	 as each counter is only a word long, so we can fit
- *	 (PAGE_SIZE / length of word) counters in a single page.
- *
- * XXX: Must increase this if using any of if_ti, if_wb, if_sk drivers,
- *	or any other drivers which may manage their own buffers and
- *	eventually attach them to mbufs. 
- */
-#define EXT_COUNTERS (nmbclusters + nsfbufs)
-
 #ifdef _KERNEL
 /*
  * Macros for type conversion
@@ -83,7 +67,9 @@
 #define	dtom(x)		((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
 #endif /* _KERNEL */
 
-/* header at beginning of each mbuf: */
+/*
+ * Header present at the beginning of every mbuf.
+ */
 struct m_hdr {
 	struct	mbuf *mh_next;		/* next buffer in chain */
 	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
@@ -93,7 +79,9 @@
 	short	mh_flags;		/* flags; see below */
 };
 
-/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
+/*
+ * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
+ */
 struct pkthdr {
 	struct	ifnet *rcvif;		/* rcv interface */
 	int	len;			/* total packet length */
@@ -105,17 +93,23 @@
 	struct	mbuf *aux;		/* extra data buffer; ipsec/others */
 };
 
-/* description of external storage mapped into mbuf, valid if M_EXT set */
+/*
+ * Description of external storage mapped into mbuf; valid only if M_EXT is set.
+ */
 struct m_ext {
 	caddr_t	ext_buf;		/* start of buffer */
 	void	(*ext_free)		/* free routine if not the usual */
 		    (caddr_t, void *);
 	void	*ext_args;		/* optional argument pointer */
 	u_int	ext_size;		/* size of buffer, for ext_free */
-	union	mext_refcnt *ref_cnt;	/* pointer to ref count info */
+	u_int	*ref_cnt;		/* pointer to ref count info */
 	int	ext_type;		/* type of external storage */
 };
 
+/*
+ * The core of the mbuf object along with some shortcut defines for
+ * practical purposes.
+ */
 struct mbuf {
 	struct	m_hdr m_hdr;
 	union {
@@ -141,7 +135,9 @@
 #define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
 #define	m_dat		M_dat.M_databuf
 
-/* mbuf flags */
+/*
+ * mbuf flags
+ */
 #define	M_EXT		0x0001	/* has associated external storage */
 #define	M_PKTHDR	0x0002	/* start of record */
 #define	M_EOR		0x0004	/* end of record */
@@ -152,24 +148,32 @@
 #define	M_PROTO4	0x0080	/* protocol-specific */
 #define	M_PROTO5	0x0100	/* protocol-specific */
 
-/* mbuf pkthdr flags, also in m_flags */
+/*
+ * mbuf pkthdr flags (also stored in m_flags)
+ */
 #define	M_BCAST		0x0200	/* send/received as link-level broadcast */
 #define	M_MCAST		0x0400	/* send/received as link-level multicast */
 #define	M_FRAG		0x0800	/* packet is a fragment of a larger packet */
 #define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
 #define	M_LASTFRAG	0x2000	/* packet is last fragment */
 
-/* external buffer types: identify ext_buf type */
+/*
+ * External buffer types: identify ext_buf type
+ */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
 #define	EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
 #define	EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	200	/* custom module's ext_buf type */
 
-/* flags copied when copying m_pkthdr */
+/*
+ * Flags copied when copying m_pkthdr
+ */
 #define	M_COPYFLAGS	(M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
 			    M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
 
-/* flags indicating hw checksum support and sw checksum requirements */
+/*
+ * Flags indicating hw checksum support and sw checksum requirements
+ */
 #define CSUM_IP			0x0001		/* will csum IP */
 #define CSUM_TCP		0x0002		/* will csum TCP */
 #define CSUM_UDP		0x0004		/* will csum UDP */
@@ -184,7 +188,9 @@
 #define CSUM_DELAY_DATA		(CSUM_TCP | CSUM_UDP)
 #define CSUM_DELAY_IP		(CSUM_IP)	/* XXX add ipv6 here too? */
 
-/* mbuf types */
+/*
+ * mbuf types
+ */
 #define	MT_FREE		0	/* should be on free list */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	2	/* packet header */
@@ -209,17 +215,26 @@
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 /*
- * mbuf statistics
+ * Mbuf and cluster allocation statistics PCPU structure.
  */
+struct mbpstat {
+	u_long	mb_mbfree;
+	u_long	mb_mbpgs;
+	u_long	mb_clfree;
+	u_long	mb_clpgs;
+};
+
+/*
+ * General mbuf statistics structure.
+ * XXX: Modifications of these are not protected by any mutex locks nor by
+ *	any atomic() manipulations. As a result, we may occasionally lose
+ *	a count or two. Luckily, not all of these fields are modified at all
+ *	and remain static, and those that are manipulated are only manipulated
+ *	in failure situations, which do not occur (hopefully) very often.
+ */
 struct mbstat {
-	u_long	m_mbufs;	/* # mbufs obtained from page pool */
-	u_long	m_clusters;	/* # clusters obtained from page pool */
-	u_long	m_clfree;	/* # clusters on freelist (cache) */
-	u_long	m_refcnt;	/* # ref counters obtained from page pool */
-	u_long	m_refree;	/* # ref counters on freelist (cache) */
-	u_long	m_spare;	/* spare field */
-	u_long	m_drops;	/* times failed to find space */
-	u_long	m_wait;		/* times waited for space */
+	u_long	m_drops;	/* times failed to allocate */
+	u_long	m_wait;		/* times succesfully returned from wait */
 	u_long	m_drain;	/* times drained protocols for space */
 	u_long	m_mcfail;	/* times m_copym failed */
 	u_long	m_mpfail;	/* times m_pullup failed */
@@ -230,64 +245,18 @@
 	u_long	m_mhlen;	/* length of data in a header mbuf */
 };
 
-/* flags to m_get/MGET */
+/*
+ * Flags specifying how an allocation should be made.
+ * M_DONTWAIT means "don't block if nothing is available" whereas
+ * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
+ * available."
+ */
 #define	M_DONTWAIT	1
 #define	M_TRYWAIT	0
 #define	M_WAIT		M_TRYWAIT	/* XXX: Deprecated. */
 
-/*
- * Normal mbuf clusters are normally treated as character arrays
- * after allocation, but use the first word of the buffer as a free list
- * pointer while on the free list.
- */
-union mcluster {
-	union	mcluster *mcl_next;
-	char	mcl_buf[MCLBYTES];
-};
-
-/*
- * The m_ext object reference counter structure.
- */
-union mext_refcnt {
-	union	mext_refcnt *next_ref;
-	u_int	refcnt;
-};
-
 #ifdef _KERNEL
 /*
- * The freelists for mbufs and mbuf clusters include condition variables
- * that are used in cases of depletion/starvation.
- * The counter freelist does not require a condition variable as we never
- * expect to consume more than the reserved address space for counters.
- * All are presently protected by the mbuf_mtx lock.
- */
-struct mbffree_lst {
-	struct	mbuf *m_head;
-	struct	cv m_starved;
-};
-
-struct mclfree_lst {
-	union	mcluster *m_head;
-	struct	cv m_starved;
-};
-  
-struct mcntfree_lst {
-	union	mext_refcnt *m_head;
-};
-
-/*
- * Signal a single instance (if any) blocked on a m_starved cv (i.e. an
- * instance waiting for an {mbuf, cluster} to be freed to the global
- * cache lists).
- *
- * Must be called with mbuf_mtx held.
- */
-#define	MBWAKEUP(m_wid, m_cv) do {					\
-	if ((m_wid) > 0)						\
-		cv_signal((m_cv));					\
-} while (0)
-
-/*
  * mbuf external reference count management macros:
  *
  * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
@@ -298,47 +267,25 @@
  * MEXT_INIT_REF(m): allocate and initialize an external
  *     object reference counter for (m)
  */
-#define MEXT_IS_REF(m) ((m)->m_ext.ref_cnt->refcnt > 1)
+#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
 
 #define MEXT_REM_REF(m) do {						\
-	KASSERT((m)->m_ext.ref_cnt->refcnt > 0, ("m_ext refcnt < 0"));	\
-	atomic_subtract_int(&((m)->m_ext.ref_cnt->refcnt), 1);		\
+	KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0"));	\
+	atomic_subtract_int((m)->m_ext.ref_cnt, 1);			\
 } while(0)
 
-#define MEXT_ADD_REF(m) atomic_add_int(&((m)->m_ext.ref_cnt->refcnt), 1)
+#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
 
-#define _MEXT_ALLOC_CNT(m_cnt, how) do {				\
-	union mext_refcnt *__mcnt;					\
-									\
-	mtx_lock(&mbuf_mtx);						\
-	if (mcntfree.m_head == NULL)					\
-		m_alloc_ref(1, (how));					\
-	__mcnt = mcntfree.m_head;					\
-	if (__mcnt != NULL) {						\
-		mcntfree.m_head = __mcnt->next_ref;			\
-		mbstat.m_refree--;					\
-		__mcnt->refcnt = 0;					\
-	}								\
-	mtx_unlock(&mbuf_mtx);						\
-	(m_cnt) = __mcnt;						\
-} while (0)
+#define	MEXT_DEALLOC_CNT(m_cnt)	free((m_cnt), M_MBUF)
 
-#define _MEXT_DEALLOC_CNT(m_cnt) do {					\
-	union mext_refcnt *__mcnt = (m_cnt);				\
+#define MEXT_INIT_REF(m) do {						\
+	struct	mbuf *__mmm = (m);					\
 									\
-	mtx_lock(&mbuf_mtx);						\
-	__mcnt->next_ref = mcntfree.m_head;				\
-	mcntfree.m_head = __mcnt;					\
-	mbstat.m_refree++;						\
-	mtx_unlock(&mbuf_mtx);						\
-} while (0)
-
-#define MEXT_INIT_REF(m, how) do {					\
-	struct mbuf *__mmm = (m);					\
-									\
-	_MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how));			\
-	if (__mmm->m_ext.ref_cnt != NULL)				\
+	__mmm->m_ext.ref_cnt = malloc(sizeof(u_int), M_MBUF, M_WAITOK);	\
+	if (__mmm->m_ext.ref_cnt != NULL) {				\
+		*(__mmm->m_ext.ref_cnt) = 0;				\
 		MEXT_ADD_REF(__mmm);					\
+	}								\
 } while (0)
 
 /*
@@ -351,76 +298,35 @@
  * allocates an mbuf and initializes it to contain a packet header
  * and internal data.
  */
-/*
- * Lower-level macros for MGET(HDR)... Not to be used outside the
- * subsystem ("non-exportable" macro names are prepended with "_").
- */
-#define _MGET_SETUP(m_set, m_set_type) do {				\
-	(m_set)->m_type = (m_set_type);					\
-	(m_set)->m_next = NULL;						\
-	(m_set)->m_nextpkt = NULL;					\
-	(m_set)->m_data = (m_set)->m_dat;				\
-	(m_set)->m_flags = 0;						\
-} while (0)
-
-#define	_MGET(m_mget, m_get_how) do {					\
-	if (mmbfree.m_head == NULL)					\
-		m_mballoc(1, (m_get_how));				\
-	(m_mget) = mmbfree.m_head;					\
-	if ((m_mget) != NULL) {						\
-		mmbfree.m_head = (m_mget)->m_next;			\
-		mbtypes[MT_FREE]--;					\
-	} else {							\
-		if ((m_get_how) == M_TRYWAIT)				\
-			(m_mget) = m_mballoc_wait();			\
-	}								\
-} while (0)
-
-#define MGET(m, how, type) do {						\
-	struct mbuf *_mm;						\
-	int _mhow = (how);						\
-	int _mtype = (type);						\
+#define	MGET(m, how, type) do {						\
+	struct	mbuf *_mb;						\
 									\
-	mtx_lock(&mbuf_mtx);						\
-	_MGET(_mm, _mhow);						\
-	if (_mm != NULL) {						\
-		mbtypes[_mtype]++;					\
-		mtx_unlock(&mbuf_mtx);					\
-		_MGET_SETUP(_mm, _mtype);				\
-	} else {							\
-		mbstat.m_drops++;					\
-		mtx_unlock(&mbuf_mtx);					\
+	_mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how));		\
+	if (_mb != NULL) { 						\
+		_mb->m_type = (type);					\
+		_mb->m_next = NULL;					\
+		_mb->m_nextpkt = NULL;					\
+		_mb->m_data = _mb->m_dat;				\
+		_mb->m_flags = 0;					\
 	}								\
-	(m) = _mm;							\
+	(m) = _mb;							\
 } while (0)
 
-#define _MGETHDR_SETUP(m_set, m_set_type) do {				\
-	(m_set)->m_type = (m_set_type);					\
-	(m_set)->m_next = NULL;						\
-	(m_set)->m_nextpkt = NULL;					\
-	(m_set)->m_data = (m_set)->m_pktdat;				\
-	(m_set)->m_flags = M_PKTHDR;					\
-	(m_set)->m_pkthdr.rcvif = NULL;					\
-	(m_set)->m_pkthdr.csum_flags = 0;				\
-	(m_set)->m_pkthdr.aux = NULL;					\
-} while (0)
-
-#define MGETHDR(m, how, type) do {					\
-	struct mbuf *_mm;						\
-	int _mhow = (how);						\
-	int _mtype = (type);						\
+#define	MGETHDR(m, how, type) do {					\
+	struct	mbuf *_mb;						\
 									\
-	mtx_lock(&mbuf_mtx);						\
-	_MGET(_mm, _mhow);						\
-	if (_mm != NULL) {						\
-		mbtypes[_mtype]++;					\
-		mtx_unlock(&mbuf_mtx);					\
-		_MGETHDR_SETUP(_mm, _mtype);				\
-	} else {							\
-		mbstat.m_drops++;					\
-		mtx_unlock(&mbuf_mtx);					\
+	_mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how)); 		\
+	if (_mb != NULL) {						\
+		_mb->m_type = (type);					\
+		_mb->m_next = NULL;					\
+		_mb->m_nextpkt = NULL;					\
+		_mb->m_data = _mb->m_pktdat;				\
+		_mb->m_flags = M_PKTHDR;				\
+		_mb->m_pkthdr.rcvif = NULL;				\
+		_mb->m_pkthdr.csum_flags = 0;				\
+		_mb->m_pkthdr.aux = NULL;				\
 	}								\
-	(m) = _mm;							\
+	(m) = _mb;							\
 } while (0)
 
 /*
@@ -431,33 +337,14 @@
  *   MEXTFREE removes reference to external object and frees it if
  *       necessary
  */
-#define	_MCLALLOC(p, how) do {						\
-	caddr_t _mp;							\
-	int _mhow = (how);						\
-									\
-	if (mclfree.m_head == NULL)					\
-		m_clalloc(1, _mhow);					\
-	_mp = (caddr_t)mclfree.m_head;					\
-	if (_mp != NULL) {						\
-		mbstat.m_clfree--;					\
-		mclfree.m_head = ((union mcluster *)_mp)->mcl_next;	\
-	} else {							\
-		if (_mhow == M_TRYWAIT)					\
-			_mp = m_clalloc_wait();				\
-	}								\
-	(p) = _mp;							\
-} while (0)
-
 #define	MCLGET(m, how) do {						\
-	struct mbuf *_mm = (m);						\
+	struct	mbuf *_mm = (m);					\
 									\
-	mtx_lock(&mbuf_mtx);						\
-	_MCLALLOC(_mm->m_ext.ext_buf, (how));				\
+	_mm->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, (how));  \
 	if (_mm->m_ext.ext_buf != NULL) {				\
-		mtx_unlock(&mbuf_mtx);					\
-		MEXT_INIT_REF(_mm, (how));				\
+		MEXT_INIT_REF(_mm);					\
 		if (_mm->m_ext.ref_cnt == NULL) {			\
-			_MCLFREE(_mm->m_ext.ext_buf);			\
+			MCLFREE(_mm->m_ext.ext_buf);			\
 			_mm->m_ext.ext_buf = NULL;			\
 		} else {						\
 			_mm->m_data = _mm->m_ext.ext_buf;		\
@@ -467,16 +354,13 @@
 			_mm->m_ext.ext_size = MCLBYTES;			\
 			_mm->m_ext.ext_type = EXT_CLUSTER;		\
 		}							\
-	} else {							\
-		mbstat.m_drops++;					\
-		mtx_unlock(&mbuf_mtx);					\
 	}								\
 } while (0)
 
 #define MEXTADD(m, buf, size, free, args, flags, type) do {		\
-	struct mbuf *_mm = (m);						\
+	struct	mbuf *_mm = (m);					\
 									\
-	MEXT_INIT_REF(_mm, M_TRYWAIT);					\
+	MEXT_INIT_REF(_mm);						\
 	if (_mm->m_ext.ref_cnt != NULL) {				\
 		_mm->m_flags |= (M_EXT | (flags));			\
 		_mm->m_ext.ext_buf = (caddr_t)(buf);			\
@@ -488,34 +372,26 @@
 	}								\
 } while (0)
 
-#define	_MCLFREE(p) do {						\
-	union mcluster *_mp = (union mcluster *)(p);			\
-									\
-	mtx_lock(&mbuf_mtx);						\
-	_mp->mcl_next = mclfree.m_head;					\
-	mclfree.m_head = _mp;						\
-	mbstat.m_clfree++;						\
-	MBWAKEUP(m_clalloc_wid, &mclfree.m_starved);			\
-	mtx_unlock(&mbuf_mtx); 						\
-} while (0)
+#define	MCLFREE(p)	mb_free(&mb_list_clust, (caddr_t)(p))
 
-/* MEXTFREE:
+/*
+ * MEXTFREE:
  * If the atomic_cmpset_int() returns 0, then we effectively do nothing
  * in terms of "cleaning up" (freeing the ext buf and ref. counter) as
  * this means that either there are still references, or another thread
  * is taking care of the clean-up.
  */
 #define	MEXTFREE(m) do {						\
-	struct mbuf *_mmm = (m);					\
+	struct	mbuf *_mmm = (m);					\
 									\
 	MEXT_REM_REF(_mmm);						\
-	if (atomic_cmpset_int(&_mmm->m_ext.ref_cnt->refcnt, 0, 1)) {	\
+	if (atomic_cmpset_int(_mmm->m_ext.ref_cnt, 0, 1)) {		\
 		if (_mmm->m_ext.ext_type != EXT_CLUSTER) {		\
 			(*(_mmm->m_ext.ext_free))(_mmm->m_ext.ext_buf,	\
 			    _mmm->m_ext.ext_args);			\
 		} else							\
-			_MCLFREE(_mmm->m_ext.ext_buf);			\
-		_MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt);			\
+			MCLFREE(_mmm->m_ext.ext_buf);			\
+		MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt);			\
 	}								\
 	_mmm->m_flags &= ~M_EXT;					\
 } while (0)
@@ -526,20 +402,13 @@
  * Place the successor, if any, in n.
  */
 #define	MFREE(m, n) do {						\
-	struct mbuf *_mm = (m);						\
+	struct	mbuf *_mm = (m);					\
 									\
-	KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf"));		\
+	(n) = _mm->m_next;						\
 	if (_mm->m_flags & M_EXT)					\
 		MEXTFREE(_mm);						\
-	mtx_lock(&mbuf_mtx);						\
-	mbtypes[_mm->m_type]--;						\
-	_mm->m_type = MT_FREE;						\
-	mbtypes[MT_FREE]++;						\
-	(n) = _mm->m_next;						\
-	_mm->m_next = mmbfree.m_head;					\
-	mmbfree.m_head = _mm;						\
-	MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved);			\
-	mtx_unlock(&mbuf_mtx); 						\
+	mb_free(&mb_list_mbuf, _mm);					\
+	(m) = NULL;							\
 } while (0)
 
 /*
@@ -557,8 +426,8 @@
  * aux pointer will be moved to `to'.
  */
 #define	M_COPY_PKTHDR(to, from) do {					\
-	struct mbuf *_mfrom = (from);					\
-	struct mbuf *_mto = (to);					\
+	struct	mbuf *_mfrom = (from);					\
+	struct	mbuf *_mto = (to);					\
 									\
 	_mto->m_data = _mto->m_pktdat;					\
 	_mto->m_flags = _mfrom->m_flags & M_COPYFLAGS;			\
@@ -608,10 +477,10 @@
  * set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
-	struct mbuf **_mmp = &(m);					\
-	struct mbuf *_mm = *_mmp;					\
-	int _mplen = (plen);						\
-	int __mhow = (how);						\
+	struct	mbuf **_mmp = &(m);					\
+	struct	mbuf *_mm = *_mmp;					\
+	int	_mplen = (plen);					\
+	int	__mhow = (how);						\
 									\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
@@ -626,16 +495,7 @@
 /*
  * change mbuf to new type
  */
-#define	MCHTYPE(m, t) do {						\
-	struct mbuf *_mm = (m);						\
-	int _mt = (t);							\
-									\
-	mtx_lock(&mbuf_mtx);						\
-	mbtypes[_mm->m_type]--;						\
-	mbtypes[_mt]++;							\
-	mtx_unlock(&mbuf_mtx);						\
-	_mm->m_type = (_mt);						\
-} while (0)
+#define	MCHTYPE(m, t)	(m)->m_type = (t)
 
 /* length to m_copy to copy all */
 #define	M_COPYALL	1000000000
@@ -651,53 +511,49 @@
 	int	type;
 };
 
-extern	u_long		 m_clalloc_wid;	/* mbuf cluster wait count */
-extern	u_long		 m_mballoc_wid;	/* mbuf wait count */
-extern	int		 max_datalen;	/* MHLEN - max_hdr */
-extern	int		 max_hdr;	/* largest link+protocol header */
-extern	int		 max_linkhdr;	/* largest link-level header */
-extern	int		 max_protohdr;	/* largest protocol header */
-extern	struct mbstat	 mbstat;
-extern	u_long		 mbtypes[MT_NTYPES]; /* per-type mbuf allocations */
-extern	int		 mbuf_wait;	/* mbuf sleep time */
-extern	struct mtx	 mbuf_mtx;
-extern	struct mbuf	*mbutl;		/* virtual address of mclusters */
-extern	struct mclfree_lst	mclfree;
-extern	struct mcntfree_lst	mcntfree;
-extern	struct mbffree_lst	mmbfree;
-extern	int		 nmbclusters;
-extern	int		 nmbcnt;
-extern	int		 nmbufs;
-extern	int		 nsfbufs;
-
-void	m_adj(struct mbuf *, int);
-int	m_alloc_ref(u_int, int);
-struct	mbuf *m_aux_add(struct mbuf *, int, int);
-void	m_aux_delete(struct mbuf *, struct mbuf *);
-struct	mbuf *m_aux_find(struct mbuf *, int, int);
-void	m_cat(struct mbuf *, struct mbuf *);
-int	m_clalloc(int, int);
-caddr_t	m_clalloc_wait(void);
-void	m_copyback(struct mbuf *, int, int, caddr_t);
-void	m_copydata(struct mbuf *, int, int, caddr_t);
-struct	mbuf *m_copym(struct mbuf *, int, int, int);
-struct	mbuf *m_copypacket(struct mbuf *, int);
-struct	mbuf *m_devget(char *, int, int, struct ifnet *,
-    void (*copy)(char *, caddr_t, u_int));
-struct	mbuf *m_dup(struct mbuf *, int);
-struct	mbuf *m_free(struct mbuf *);
-void	m_freem(struct mbuf *);
-struct	mbuf *m_get(int, int);
-struct	mbuf *m_getclr(int, int);
-struct	mbuf *m_gethdr(int, int);
-struct	mbuf *m_getm(struct mbuf *, int, int, int);
-int	m_mballoc(int, int);
-struct	mbuf *m_mballoc_wait(void);
-struct	mbuf *m_prepend(struct mbuf *, int, int);
-void	m_print(const struct mbuf *m);
-struct	mbuf *m_pulldown(struct mbuf *, int, int, int *);
-struct	mbuf *m_pullup(struct mbuf *, int);
-struct	mbuf *m_split(struct mbuf *, int, int);
+extern	int		max_datalen;	/* MHLEN - max_hdr */
+extern	int		max_hdr;	/* largest link + protocol header */
+extern	int		max_linkhdr;	/* largest link-level header */
+extern	int		max_protohdr;	/* largest protocol header */
+extern	struct mbpstat	mb_statpcpu[];	/* Per-CPU allocation stats. */
+extern	struct mbstat	mbstat;		/* General mbuf stats/infos. */
+extern	int		mbuf_wait;	/* mbuf sleep time */
+extern	int		nmbclusters;	/* Maximum number of clusters */
+extern	int		nmbufs;		/* Maximum number of mbufs */
+extern	int		nmbcnt;		/* Scale kmem_map for mext cntr space */
+extern	int		nsfbufs;
+extern	u_int		mbuf_limit;	/* High limit on num. mbufs per CPU */ 
+extern	u_int		clust_limit;	/* High limit on num. clusts per CPU */ 
+extern	struct		mb_lstmngr mb_list_mbuf, mb_list_clust;
+
+/*
+ * Exported function prototypes.
+ */
+void		*mb_alloc(struct mb_lstmngr *, int);
+void		 mb_free(struct mb_lstmngr *, void *);
+void		 m_adj(struct mbuf *, int);
+struct	mbuf	*m_aux_add(struct mbuf *, int, int);
+void		 m_aux_delete(struct mbuf *, struct mbuf *);
+struct	mbuf	*m_aux_find(struct mbuf *, int, int);
+void		 m_cat(struct mbuf *, struct mbuf *);
+void		 m_copyback(struct mbuf *, int, int, caddr_t);
+void		 m_copydata(struct mbuf *, int, int, caddr_t);
+struct	mbuf	*m_copym(struct mbuf *, int, int, int);
+struct	mbuf	*m_copypacket(struct mbuf *, int);
+struct	mbuf	*m_devget(char *, int, int, struct ifnet *,
+		    void (*copy)(char *, caddr_t, u_int));
+struct	mbuf	*m_dup(struct mbuf *, int);
+struct	mbuf	*m_free(struct mbuf *);
+void		 m_freem(struct mbuf *);
+struct	mbuf	*m_get(int, int);
+struct	mbuf	*m_getclr(int, int);
+struct	mbuf	*m_gethdr(int, int);
+struct	mbuf	*m_getm(struct mbuf *, int, int, int);
+struct	mbuf	*m_prepend(struct mbuf *, int, int);
+void		 m_print(const struct mbuf *m);
+struct	mbuf	*m_pulldown(struct mbuf *, int, int, int *);
+struct	mbuf	*m_pullup(struct mbuf *, int);
+struct	mbuf	*m_split(struct mbuf *, int, int);
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MBUF_H_ */
Index: src/sys/sys/sysctl.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/sysctl.h,v
retrieving revision 1.92
diff -u -r1.92 sysctl.h
--- src/sys/sys/sysctl.h	2001/05/19 05:45:55	1.92
+++ src/sys/sys/sysctl.h	2001/06/09 05:31:05
@@ -397,8 +397,6 @@
 #define	KIPC_MAX_PROTOHDR	5	/* int: max length of network header */
 #define	KIPC_MAX_HDR		6	/* int: max total length of headers */
 #define	KIPC_MAX_DATALEN	7	/* int: max length of data? */
-#define	KIPC_MBSTAT		8	/* struct: mbuf usage statistics */
-#define	KIPC_NMBCLUSTERS	9	/* int: maximum mbuf clusters */
 
 /*
  * CTL_HW identifiers
Index: src/sys/vm/vm_kern.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_kern.c,v
retrieving revision 1.70
diff -u -r1.70 vm_kern.c
--- src/sys/vm/vm_kern.c	2001/05/19 01:28:09	1.70
+++ src/sys/vm/vm_kern.c	2001/06/09 05:31:05
@@ -89,8 +89,6 @@
 vm_map_t exec_map=0;
 vm_map_t clean_map=0;
 vm_map_t buffer_map=0;
-vm_map_t mb_map=0;
-int mb_map_full=0;
 
 /*
  *	kmem_alloc_pageable:
@@ -331,6 +329,9 @@
  *
  *	NOTE:  This routine is not supposed to block if M_NOWAIT is set, but
  *	I have not verified that it actually does not block.
+ *
+ *	`map' is ONLY allowed to be kmem_map or one of the mbuf submaps to
+ *	which we never free.
  */
 vm_offset_t
 kmem_malloc(map, size, flags)
@@ -344,9 +345,6 @@
 	vm_page_t m;
 	int hadvmlock;
 
-	if (map != kmem_map && map != mb_map)
-		panic("kmem_malloc: map != {kmem,mb}_map");
-
 	hadvmlock = mtx_owned(&vm_mtx);
 	if (!hadvmlock)
 		mtx_lock(&vm_mtx);
@@ -362,9 +360,9 @@
 	vm_map_lock(map);
 	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
 		vm_map_unlock(map);
-		if (map == mb_map) {
-			mb_map_full = TRUE;
-			printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
+		if (map != kmem_map) {
+			printf("Out of mbuf address space!\n");
+			printf("Consider increasing NMBCLUSTERS\n");
 			goto bad;
 		}
 		if ((flags & M_NOWAIT) == 0)
Index: src/sys/vm/vm_kern.h
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_kern.h,v
retrieving revision 1.22
diff -u -r1.22 vm_kern.h
--- src/sys/vm/vm_kern.h	2000/02/16 21:11:31	1.22
+++ src/sys/vm/vm_kern.h	2001/06/09 05:31:05
@@ -71,8 +71,6 @@
 extern vm_map_t buffer_map;
 extern vm_map_t kernel_map;
 extern vm_map_t kmem_map;
-extern vm_map_t mb_map;
-extern int mb_map_full;
 extern vm_map_t clean_map;
 extern vm_map_t exec_map;
 extern u_int vm_kmem_size;
Index: src/sys/vm/vm_map.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_map.c,v
retrieving revision 1.201
diff -u -r1.201 vm_map.c
--- src/sys/vm/vm_map.c	2001/05/23 22:38:00	1.201
+++ src/sys/vm/vm_map.c	2001/06/09 05:31:05
@@ -692,14 +692,14 @@
 	mtx_assert(&vm_mtx, MA_OWNED);
 	start = *addr;
 
-	if (map == kmem_map || map == mb_map)
+	if (map == kmem_map)
 		s = splvm();
 
 	vm_map_lock(map);
 	if (find_space) {
 		if (vm_map_findspace(map, start, length, addr)) {
 			vm_map_unlock(map);
-			if (map == kmem_map || map == mb_map)
+			if (map == kmem_map)
 				splx(s);
 			return (KERN_NO_SPACE);
 		}
@@ -709,7 +709,7 @@
 		start, start + length, prot, max, cow);
 	vm_map_unlock(map);
 
-	if (map == kmem_map || map == mb_map)
+	if (map == kmem_map)
 		splx(s);
 
 	return (result);
@@ -1917,7 +1917,7 @@
 	int result, s = 0;
 
 	mtx_assert(&vm_mtx, MA_OWNED);
-	if (map == kmem_map || map == mb_map)
+	if (map == kmem_map)
 		s = splvm();
 
 	vm_map_lock(map);
@@ -1925,7 +1925,7 @@
 	result = vm_map_delete(map, start, end);
 	vm_map_unlock(map);
 
-	if (map == kmem_map || map == mb_map)
+	if (map == kmem_map)
 		splx(s);
 
 	return (result);
Index: src/sys/vm/vm_object.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_object.c,v
retrieving revision 1.192
diff -u -r1.192 vm_object.c
--- src/sys/vm/vm_object.c	2001/05/23 22:42:10	1.192
+++ src/sys/vm/vm_object.c	2001/06/09 05:31:05
@@ -1696,8 +1696,6 @@
 		return 1;
 	if( _vm_object_in_map( buffer_map, object, 0))
 		return 1;
-	if( _vm_object_in_map( mb_map, object, 0))
-		return 1;
 	return 0;
 }
 
Index: src/usr.bin/netstat/mbuf.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/netstat/mbuf.c,v
retrieving revision 1.20
diff -u -r1.20 mbuf.c
--- src/usr.bin/netstat/mbuf.c	2000/10/15 06:29:22	1.20
+++ src/usr.bin/netstat/mbuf.c	2001/06/09 05:30:39
@@ -48,13 +48,13 @@
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include "netstat.h"
 
 #define	YES	1
 typedef int bool;
 
-struct	mbstat mbstat;
-
+#if 0
 static struct mbtypenames {
 	int	mt_type;
 	char	*mt_name;
@@ -91,6 +91,7 @@
 #endif
 	{ 0, 0 }
 };
+#endif
 
 /*
  * Print mbuf statistics.
@@ -98,105 +99,158 @@
 void
 mbpr()
 {
-	u_long totmem, totpossible, totmbufs;
-	register int i;
-	struct mbtypenames *mp;
-	int name[3], nmbclusters, nmbufs, nmbcnt, nmbtypes;
-	size_t nmbclen, nmbuflen, nmbcntlen, mbstatlen, mbtypeslen;
-	u_long *mbtypes;
-	bool *seen;	/* "have we seen this type yet?" */
-
-	mbtypes = NULL;
-	seen = NULL;
-
-	name[0] = CTL_KERN;
-	name[1] = KERN_IPC;
-	name[2] = KIPC_MBSTAT;
-	mbstatlen = sizeof mbstat;
-	if (sysctl(name, 3, &mbstat, &mbstatlen, 0, 0) < 0) {
+	int i;
+	int nmbclusters, nmbufs, ncpu, page_size, num_objs;
+	u_int mbuf_limit, clust_limit;
+	u_long totspace, totnum, totfree;
+	size_t mlen;
+	struct mbstat *mbstat = NULL;
+	struct mbpstat **mbpstat = NULL;
+
+	if (sysctlbyname("kern.ipc.mbstat", NULL, &mlen, NULL, 0) < 0) {
+		warn("sysctl: retrieving mbstat len");
+		goto err;
+	}
+
+	if ((mbstat = malloc(mlen)) == NULL) {
+		warn("malloc: cannot allocate memory for mbstat");
+		goto err;
+	}
+
+	if (sysctlbyname("kern.ipc.mbstat", mbstat, &mlen, NULL, 0) < 0) {
 		warn("sysctl: retrieving mbstat");
 		goto err;
 	}
+
+	if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat, &mlen, NULL,0) < 0) {
+		warn("sysctl: retrieving mb_statpcpu len");
+		goto err;
+	}
 
-	if (sysctlbyname("kern.ipc.mbtypes", NULL, &mbtypeslen, NULL, 0) < 0) {
-		warn("sysctl: retrieving mbtypes length");
+	num_objs = (int)(mlen / sizeof(struct mbpstat));
+	if ((mbpstat = calloc(num_objs, sizeof(struct mbpstat *))) == NULL) {
+		warn("calloc: cannot allocate memory for mbpstats pointers");
 		goto err;
 	}
-	if ((mbtypes = malloc(mbtypeslen)) == NULL) {
-		warn("malloc: %lu bytes for mbtypes", (u_long)mbtypeslen);
+
+	if ((mbpstat[0] = calloc(num_objs, sizeof(struct mbpstat))) == NULL) {
+		warn("malloc: cannot allocate memory for mbpstats");
 		goto err;
 	}
-	if (sysctlbyname("kern.ipc.mbtypes", mbtypes, &mbtypeslen, NULL,
-	    0) < 0) {
-		warn("sysctl: retrieving mbtypes");
+
+	if (sysctlbyname("kern.ipc.mb_statpcpu", mbpstat[0],&mlen,NULL,0) < 0) {
+		warn("sysctl: retrieving mb_statpcpu");
 		goto err;
 	}
+
+	for (i = 0; i < num_objs; i++)
+		mbpstat[i] = mbpstat[0] + i;
 
-	nmbtypes = mbtypeslen / sizeof(*mbtypes);
-	if ((seen = calloc(nmbtypes, sizeof(*seen))) == NULL) {
-		warn("calloc");
+	mlen = sizeof(int);
+	if (sysctlbyname("kern.smp.cpus", &ncpu, &mlen, NULL, 0) < 0) {
+		warn("sysctl: retrieving kern.smp.cpus");
 		goto err;
 	}
-		
-	name[2] = KIPC_NMBCLUSTERS;
-	nmbclen = sizeof(int);
-	if (sysctl(name, 3, &nmbclusters, &nmbclen, 0, 0) < 0) {
+
+	mlen = sizeof(int);
+	if (sysctlbyname("kern.ipc.nmbclusters", &nmbclusters, &mlen, NULL, 0)
+	    < 0) {
 		warn("sysctl: retrieving nmbclusters");
 		goto err;
 	}
 
-	nmbuflen = sizeof(int);
-	if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &nmbuflen, 0, 0) < 0) {
+	mlen = sizeof(int);
+	if (sysctlbyname("kern.ipc.nmbufs", &nmbufs, &mlen, NULL, 0) < 0) {
 		warn("sysctl: retrieving nmbufs");
 		goto err;
 	}
 
-	nmbcntlen = sizeof(int);
-	if (sysctlbyname("kern.ipc.nmbcnt", &nmbcnt, &nmbcntlen, 0, 0) < 0) {
-		warn("sysctl: retrieving nmbcnt");
+	mlen = sizeof(u_int);
+	if (sysctlbyname("kern.ipc.mbuf_limit", &mbuf_limit,&mlen,NULL,0) < 0) {
+		warn("sysctl: retrieving mbuf_limit");
 		goto err;
 	}
 
+	mlen = sizeof(u_int);
+	if (sysctlbyname("kern.ipc.clust_limit",&clust_limit,&mlen,NULL,0) <0) {
+		warn("sysctl: retrieving clust_limit");
+		goto err;
+	}
+
+	mlen = sizeof(int);
+	if (sysctlbyname("hw.pagesize", &page_size, &mlen, NULL, 0) < 0) {
+		warn("sysctl: retrieving hw.pagesize");
+		goto err;
+	}
+
+/*
+ * Some useful defines for later.
+ */
 #undef MSIZE
-#define MSIZE		(mbstat.m_msize)
+#define MSIZE		(mbstat->m_msize)
 #undef MCLBYTES
-#define	MCLBYTES	(mbstat.m_mclbytes)
-
-	totmbufs = 0;
-	for (mp = mbtypenames; mp->mt_name; mp++)
-		totmbufs += mbtypes[mp->mt_type];
-	printf("%lu/%lu/%u mbufs in use (current/peak/max):\n", totmbufs,
-	    mbstat.m_mbufs, nmbufs);
-	for (mp = mbtypenames; mp->mt_name; mp++)
-		if (mbtypes[mp->mt_type]) {
-			seen[mp->mt_type] = YES;
-			printf("\t%lu mbufs allocated to %s\n",
-			    mbtypes[mp->mt_type], mp->mt_name);
-		}
-	seen[MT_FREE] = YES;
-	for (i = 0; i < nmbtypes; i++)
-		if (!seen[i] && mbtypes[i]) {
-			printf("\t%lu mbufs allocated to <mbuf type %d>\n",
-			    mbtypes[i], i);
-		}
-	printf("%lu/%lu/%u mbuf clusters in use (current/peak/max)\n",
-		mbstat.m_clusters - mbstat.m_clfree, mbstat.m_clusters,
-		nmbclusters);
-	printf("%lu/%lu m_ext reference counters (in use/allocated)\n",
-		mbstat.m_refcnt - mbstat.m_refree, mbstat.m_refcnt);
-	totmem = mbstat.m_mbufs * MSIZE + mbstat.m_clusters * MCLBYTES +
-	    mbstat.m_refcnt * sizeof(union mext_refcnt);
-	totpossible = nmbclusters * MCLBYTES + nmbufs * MSIZE +
-	    nmbcnt * sizeof(union mext_refcnt); 
-	printf("%lu Kbytes allocated to network (%lu%% of mb_map in use)\n",
-		totmem / 1024, (totmem * 100) / totpossible);
-	printf("%lu requests for memory denied\n", mbstat.m_drops);
-	printf("%lu requests for memory delayed\n", mbstat.m_wait);
-	printf("%lu calls to protocol drain routines\n", mbstat.m_drain);
+#define	MCLBYTES	(mbstat->m_mclbytes)
+#undef PAGE_SIZE
+#define	PAGE_SIZE	(page_size)
+#define	MBPERPG		(PAGE_SIZE / MSIZE)
+#define	CLPERPG		(PAGE_SIZE / MCLBYTES)
+#define	GENLST		(num_objs - 1)
+
+	printf("mbuf usage:\n");
+	printf("\tGEN list:\t%lu/%lu (in use/in pool)\n",
+	    (mbpstat[GENLST]->mb_mbpgs * MBPERPG - mbpstat[GENLST]->mb_mbfree),
+	    (mbpstat[GENLST]->mb_mbpgs * MBPERPG));
+	totnum = mbpstat[GENLST]->mb_mbpgs * MBPERPG;
+	totfree = mbpstat[GENLST]->mb_mbfree;
+	totspace = mbpstat[GENLST]->mb_mbpgs * PAGE_SIZE;
+	for (i = 0; i < ncpu; i++) {
+		printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i,
+		    (mbpstat[i]->mb_mbpgs * MBPERPG - mbpstat[i]->mb_mbfree),
+		    (mbpstat[i]->mb_mbpgs * MBPERPG));
+		totspace += mbpstat[i]->mb_mbpgs * PAGE_SIZE;
+		totnum += mbpstat[i]->mb_mbpgs * MBPERPG;
+		totfree += mbpstat[i]->mb_mbfree;
+	}
+	printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree),
+	    totnum);
+	printf("\tMaximum number allowed on each CPU list: %d\n", mbuf_limit);
+	printf("\tMaximum possible: %d\n", nmbufs);
+	printf("\t%lu%% of mbuf map consumed\n", ((totspace * 100) / (nmbufs
+	    * MSIZE)));
+
+	printf("mbuf cluster usage:\n");
+	printf("\tGEN list:\t%lu/%lu (in use/in pool)\n",
+	    (mbpstat[GENLST]->mb_clpgs * CLPERPG - mbpstat[GENLST]->mb_clfree),
+	    (mbpstat[GENLST]->mb_clpgs * CLPERPG));
+	totnum = mbpstat[GENLST]->mb_clpgs * CLPERPG;
+	totfree = mbpstat[GENLST]->mb_clfree;
+	totspace = mbpstat[GENLST]->mb_clpgs * PAGE_SIZE;
+	for (i = 0; i < ncpu; i++) {
+		printf("\tCPU #%d list:\t%lu/%lu (in use/in pool)\n", i,
+		    (mbpstat[i]->mb_clpgs * CLPERPG - mbpstat[i]->mb_clfree),
+		    (mbpstat[i]->mb_clpgs * CLPERPG));
+		totspace += mbpstat[i]->mb_clpgs * PAGE_SIZE;
+		totnum += mbpstat[i]->mb_clpgs * CLPERPG;
+		totfree += mbpstat[i]->mb_clfree;
+	}
+	printf("\tTotal:\t\t%lu/%lu (in use/in pool)\n", (totnum - totfree),
+	    totnum);
+	printf("\tMaximum number allowed on each CPU list: %d\n", clust_limit);
+	printf("\tMaximum possible: %d\n", nmbclusters);
+	printf("\t%lu%% of cluster map consumed\n", ((totspace * 100) /
+	    (nmbclusters * MCLBYTES)));
+
+	printf("%lu requests for memory denied\n", mbstat->m_drops);
+	printf("%lu requests for memory delayed\n", mbstat->m_wait);
+	printf("%lu calls to protocol drain routines\n", mbstat->m_drain);
 
 err:
-	if (mbtypes != NULL)
-		free(mbtypes);
-	if (seen != NULL)
-		free(seen);
+	if (mbstat != NULL)
+		free(mbstat);
+	if (mbpstat != NULL) {
+		if (mbpstat[0] != NULL)
+			free(mbpstat[0]);
+		free(mbpstat);
+	}
+	return;
 }