diff -ru /usr/src/sys.old/kern/subr_mbuf.c /usr/src/sys/kern/subr_mbuf.c
--- /usr/src/sys.old/kern/subr_mbuf.c	Thu Feb 13 18:33:36 2003
+++ /usr/src/sys/kern/subr_mbuf.c	Sun Feb 16 21:06:30 2003
@@ -34,6 +34,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
+#include <sys/kthread.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
@@ -70,26 +71,30 @@
 #endif
 
 /*-
- * The mbuf allocator is heavily based on Alfred Perlstein's
+ * The mbuf allocator is somewhat based on Alfred Perlstein's
  * (alfred@FreeBSD.org) "memcache" allocator which is itself based
- * on concepts from several per-CPU memory allocators. The difference
- * between this allocator and memcache is that, among other things:
- *
- * (i) We don't free back to the map from the free() routine - we leave the
- *     option of implementing lazy freeing (from a kproc) in the future. 
- *
- * (ii) We allocate from separate sub-maps of kmem_map, thus limiting the
- *	maximum number of allocatable objects of a given type. Further,
- *	we handle blocking on a cv in the case that the map is starved and
- *	we have to rely solely on cached (circulating) objects.
+ * on concepts from several per-CPU memory allocators.  There are
+ * a few fundemental differences that warrant the existence of this
+ * mbuf-specific allocator, though:
+ *
+ * (i)   We don't free back to the map from the free() routine - we manage
+ *       the balancing of the per-CPU and global caches via an "mbuf daemon"
+ *       kproc.  This allows us to have a very fast free() routine and
+ *       M_DONTWAIT code that doesn't need to ever enter the VM layer
+ *	 (XXX: currently it may but it is easily modifiable). 
+ *
+ * (ii)  We allocate from separate sub-maps of kmem_map, thus limiting the
+ *       maximum number of allocatable objects of a given type. Further,
+ *       we handle blocking on a cv in the case that the map is starved and
+ *       we have to rely solely on cached (circulating) objects.
  *
  * The mbuf allocator keeps all objects that it allocates in mb_buckets.
  * The buckets keep a page worth of objects (an object can be an mbuf or an
  * mbuf cluster) and facilitate moving larger sets of contiguous objects
- * from the per-CPU lists to the main list for the given object. The buckets
+ * from the per-CPU caches to the main cache for the given object. The buckets
  * also have an added advantage in that after several moves from a per-CPU
- * list to the main list and back to the per-CPU list, contiguous objects
- * are kept together, thus trying to put the TLB cache to good use.
+ * cache to the main cache and back to the per-CPU cache, contiguous objects
+ * are kept together, thus maximizing hardware cache use.
  *
  * The buckets are kept on singly-linked lists called "containers." A container
  * is protected by a mutex lock in order to ensure consistency.  The mutex lock
@@ -116,13 +121,9 @@
 	u_long	*mc_numpgs;
 };
 
-struct mb_gen_list {
-	struct	mb_container mb_cont;
-	struct	cv mgl_mstarved;
-};
-
-struct mb_pcpu_list {
+struct mb_cache {
 	struct	mb_container mb_cont;
+	struct	cv *mb_mstarved;
 };
 
 /*
@@ -189,14 +190,15 @@
  * multi-object allocations without dropping the lock in between.
  */
 struct mb_lstmngr {
-	struct mb_gen_list *ml_genlist;
-	struct mb_pcpu_list *ml_cntlst[NCPU];
+	struct mb_cache *ml_genlist;
+	struct mb_cache *ml_cntlst[NCPU];
 	struct mb_bucket **ml_btable;
 	vm_map_t	ml_map;
 	vm_offset_t	ml_mapbase;
 	vm_offset_t	ml_maptop;
 	int		ml_mapfull;
 	u_int		ml_objsize;
+	u_int		*ml_wmlow;
 	u_int		*ml_wmhigh;
 };
 static struct mb_lstmngr mb_list_mbuf, mb_list_clust;
@@ -251,6 +253,15 @@
 	if ((mb_type) != MT_NOTMBUF)					\
 	    (*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num)
 
+#define	MBD_DELTA(_mbd_low, _mbd_high)					\
+    ((u_long)(*_mbd_high - *_mbd_low) / 2)
+
+#define	mb_low(_cntlst, _lst) 						\
+    ((*(_cntlst)->mb_cont.mc_objcount) < (u_long)(*(_lst)->ml_wmlow))
+
+#define mb_high(_cntlst, _lst)						\
+    ((*(_cntlst)->mb_cont.mc_objcount) > (u_long)(*(_lst)->ml_wmhigh))
+
 /*
  * Ownership of buckets/containers is represented by integers.  The PCPU
  * lists range from 0 to NCPU-1.  We need a free numerical id for the general
@@ -269,9 +280,17 @@
 /* Sleep time for wait code (in ticks). */
 static int mbuf_wait = 64;
 
+static u_int mbuf_lowm = 128;	/* Cache low watermark for mbufs. */
+static u_int clust_lowm = 16;	/* Cache low watermark for clusters. */
 static u_int mbuf_limit = 512;	/* Upper limit on # of mbufs per CPU. */
 static u_int clust_limit = 128;	/* Upper limit on # of clusters per CPU. */
 
+/* Local state variables for mbuf cache balancer/garbage collector. */
+static int mbufd_needed;
+static int mbuf_daemon_ran;
+static struct mtx mbufd_lock;
+static struct proc *mbufdproc;
+
 /*
  * Objects exported by sysctl(8).
  */
@@ -286,10 +305,16 @@
     "Maximum number of sendfile(2) sf_bufs available");
 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
     "Sleep time of mbuf subsystem wait allocations during exhaustion");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_daemon_ran, CTLFLAG_RD, &mbuf_daemon_ran, 0,
+    "Number of times the mbuf daemon has run throughout uptime");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_lowm, CTLFLAG_RW, &mbuf_lowm, 0,
+    "Low watermark on caches for mbufs");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_lowm, CTLFLAG_RW, &clust_lowm, 0,
+    "Low watermark on caches for clusters");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_limit, CTLFLAG_RW, &mbuf_limit, 0,
-    "Upper limit of number of mbufs allowed on each PCPU list");
+    "Upper limit of number of mbufs allowed on each PCPU list (high watermark)");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_limit, CTLFLAG_RW, &clust_limit, 0,
-    "Upper limit of number of mbuf clusters allowed on each PCPU list");
+    "Upper limit of number of mbuf clusters allowed on each PCPU list (high watermark)");
 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
     "Mbuf general information and statistics");
 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
@@ -300,9 +325,13 @@
  */
 static void		*mb_alloc_wait(struct mb_lstmngr *, short);
 static struct mb_bucket	*mb_pop_cont(struct mb_lstmngr *, int,
-			    struct mb_pcpu_list *);
+			    struct mb_cache *);
 static void		 mb_reclaim(void);
 static void		 mbuf_init(void *);
+static void 		 mbd_vm2cache(struct mb_lstmngr *, struct mb_cache *);
+static void 		 mbd_cache2cache(struct mb_lstmngr *,struct mb_cache *);
+static void 		 mbd_cache2vm(struct mb_lstmngr *);
+static void 		 mbuf_daemon(void);
 
 /*
  * Initial allocation numbers.  Each parameter represents the number of buckets
@@ -335,7 +364,7 @@
 static void
 mbuf_init(void *dummy)
 {
-	struct mb_pcpu_list *pcpu_cnt;
+	struct mb_cache *pcpu_cnt;
 	vm_size_t mb_map_size;
 	int i, j;
 
@@ -356,6 +385,7 @@
 	mb_list_mbuf.ml_map->system_map = 1;
 	mb_list_mbuf.ml_mapfull = 0;
 	mb_list_mbuf.ml_objsize = MSIZE;
+	mb_list_mbuf.ml_wmlow = &mbuf_lowm;
 	mb_list_mbuf.ml_wmhigh = &mbuf_limit;
 
 	mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES);
@@ -370,14 +400,15 @@
 	mb_list_clust.ml_map->system_map = 1;
 	mb_list_clust.ml_mapfull = 0;
 	mb_list_clust.ml_objsize = MCLBYTES;
+	mb_list_clust.ml_wmlow = &clust_lowm;
 	mb_list_clust.ml_wmhigh = &clust_limit;
 
 	/*
 	 * Allocate required general (global) containers for each object type.
 	 */
-	mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+	mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_cache), M_MBUF,
 	    M_NOWAIT);
-	mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+	mb_list_clust.ml_genlist = malloc(sizeof(struct mb_cache), M_MBUF,
 	    M_NOWAIT);
 	if ((mb_list_mbuf.ml_genlist == NULL) ||
 	    (mb_list_clust.ml_genlist == NULL))
@@ -386,14 +417,34 @@
 	/*
 	 * Initialize condition variables and general container mutex locks.
 	 */
+	if (((mb_list_mbuf.ml_genlist->mb_mstarved =
+	    malloc(sizeof(struct cv), M_MBUF, M_NOWAIT)) == NULL) ||
+	    (mb_list_clust.ml_genlist->mb_mstarved =
+	    malloc(sizeof(struct cv), M_MBUF, M_NOWAIT)) == NULL)
+		goto bad;
 	mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", NULL, 0);
-	cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved");
-	cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved),
+	cv_init(mb_list_mbuf.ml_genlist->mb_mstarved, "mbuf pool starved");
+	cv_init(mb_list_clust.ml_genlist->mb_mstarved,
 	    "mcluster pool starved");
 	mb_list_mbuf.ml_genlist->mb_cont.mc_lock =
 	    mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;
 
 	/*
+	 * Initialize mbuf daemon cache balancer & garbage collector mutex
+	 * and state variables.  mbufd_needed is initialized to 1 because
+	 * it turns out that our mbuf_daemon thread only really gets
+	 * going (gets picked up by a CPU) long after mbuf_init and
+	 * kproc_start run... in the meantime some mbuf allocations may
+	 * have happened and incremented mbufd_needed.  But setting it to
+	 * 1 here initially ensures that they won't try to wakeup() on
+	 * a bogus ident before the mbuf_daemon() is actually ready.  When
+	 * the daemon becomes ready, it'll re-set it to zero, as required.
+	 */
+	mtx_init(&mbufd_lock, "mbuf daemon lock", NULL, 0);
+	mbuf_daemon_ran = 0;
+	mbufd_needed = 1;
+
+	/*
 	 * Set up the general containers for each object.
 	 */
 	mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
@@ -440,9 +491,9 @@
 		if (CPU_ABSENT(i))
 			continue;
 
-		mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+		mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_cache),
 		    M_MBUF, M_NOWAIT);
-		mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+		mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_cache),
 		    M_MBUF, M_NOWAIT);
 		if ((mb_list_mbuf.ml_cntlst[i] == NULL) ||
 		    (mb_list_clust.ml_cntlst[i] == NULL))
@@ -468,6 +519,8 @@
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_types =
 		    &(mb_statpcpu[i].mb_mbtypes[0]);
 		mb_list_clust.ml_cntlst[i]->mb_cont.mc_types = NULL;
+		mb_list_mbuf.ml_cntlst[i]->mb_mstarved =
+		    mb_list_clust.ml_cntlst[i]->mb_mstarved = NULL;
 
 		SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead));
 		SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead));
@@ -500,19 +553,19 @@
 }
 
 /*
- * Populate a given mbuf PCPU container with a bucket full of fresh new
+ * Populate a given cache container with a bucket full of fresh new
  * buffers.  Return a pointer to the new bucket (already in the container if
  * successful), or return NULL on failure.
  *
  * LOCKING NOTES:
- * PCPU container lock must be held when this is called.
+ * Cache container lock must be held when this is called.
  * The lock is dropped here so that we can cleanly call the underlying VM
  * code.  If we fail, we return with no locks held. If we succeed (i.e., return
  * non-NULL), we return with the PCPU lock held, ready for allocation from
  * the returned bucket.
  */
 static struct mb_bucket *
-mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
+mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_cache *cnt_lst)
 {
 	struct mb_bucket *bucket;
 	caddr_t p;
@@ -521,7 +574,12 @@
 	MB_UNLOCK_CONT(cnt_lst);
 	/*
 	 * If our object's (finite) map is starved now (i.e., no more address
-	 * space), bail out now.
+	 * space), bail out now.  It is theoretically possible that we are in a
+	 * race here where the ml_mapfull is going to be re-set to zero but we
+	 * perform the check below just before it is; we then end up returning
+	 * NULL even though the map is not starved.  However, the race should
+	 * NOT happen if the system is properly tuned with respect to
+	 * the mbuf and cluster cache watermarks.
 	 */
 	if (mb_list->ml_mapfull)
 		return (NULL);
@@ -571,7 +629,7 @@
 	 int *pers_list)
 {
 	static int last_report;
-	struct mb_pcpu_list *cnt_lst;
+	struct mb_cache *cnt_lst;
 	struct mb_bucket *bucket;
 	void *m;
 
@@ -603,8 +661,16 @@
 			MB_UNLOCK_CONT(cnt_lst);
 		else
 			*pers_list = cnt_lst->mb_cont.mc_numowner;
+
+		if (mb_low(cnt_lst, mb_list)) {
+			mtx_lock(&mbufd_lock);
+			mbufd_needed++;
+			if (mbufd_needed == 1)
+				wakeup(&mbufd_needed);
+			mtx_unlock(&mbufd_lock);
+		}
 	} else {
-		struct mb_gen_list *gen_list;
+		struct mb_cache *gen_list;
 
 		/*
 		 * This is the less-common more difficult case. We must
@@ -651,6 +717,14 @@
 				MB_UNLOCK_CONT(cnt_lst);
 			else
 				*pers_list = cnt_lst->mb_cont.mc_numowner;
+
+			if (mb_low(cnt_lst, mb_list)) {
+				mtx_lock(&mbufd_lock);
+				mbufd_needed++;
+				if (mbufd_needed == 1)
+					wakeup(&mbufd_needed);
+				mtx_unlock(&mbufd_lock);
+			}
 		} else {
 			/*
 			 * We'll have to allocate a new page.
@@ -666,6 +740,14 @@
 					MB_UNLOCK_CONT(cnt_lst);
 				else
 					*pers_list=cnt_lst->mb_cont.mc_numowner;
+
+				if (mb_low(cnt_lst, mb_list)) {
+					mtx_lock(&mbufd_lock);
+					mbufd_needed++;
+					if (mbufd_needed == 1)
+						wakeup(&mbufd_needed);
+					mtx_unlock(&mbufd_lock);
+				}
 			} else {
 				if (how == 0) {
 					/*
@@ -685,17 +767,25 @@
 						printf(
 "All mbufs or mbuf clusters exhausted, please see tuning(7).\n");
 					}
-
 				}
-				if (m != NULL && (persist & MBP_PERSIST) != 0) {
-					cnt_lst = MB_GET_PCPU_LIST(mb_list);
-					MB_LOCK_CONT(cnt_lst);
-					*pers_list=cnt_lst->mb_cont.mc_numowner;
+				if (m != NULL) {
+					if ((persist & MBP_PERSIST) != 0) {
+						cnt_lst = MB_GET_PCPU_LIST(mb_list);
+						MB_LOCK_CONT(cnt_lst);
+						*pers_list=cnt_lst->mb_cont.mc_numowner;
+			
+					}
+					if (mb_low(cnt_lst, mb_list)) {
+						mtx_lock(&mbufd_lock);
+						mbufd_needed++;
+						if (mbufd_needed == 1)
+							wakeup(&mbufd_needed);
+						mtx_unlock(&mbufd_lock);
+					}
 				}
 			}
 		}
 	}
-
 	return (m);
 }
 
@@ -709,8 +799,7 @@
 static void *
 mb_alloc_wait(struct mb_lstmngr *mb_list, short type)
 {
-	struct mb_pcpu_list *cnt_lst;
-	struct mb_gen_list *gen_list;
+	struct mb_cache *cnt_lst, *gen_list;
 	struct mb_bucket *bucket;
 	void *m;
 	int i, cv_ret;
@@ -743,7 +832,6 @@
 			return (m);
 		} else
 			cnt_lst->mb_cont.mc_starved++;
-
 		MB_UNLOCK_CONT(cnt_lst);
 	}
 
@@ -763,7 +851,7 @@
 	}
 
 	gen_list->mb_cont.mc_starved++;
-	cv_ret = cv_timedwait(&(gen_list->mgl_mstarved),
+	cv_ret = cv_timedwait(gen_list->mb_mstarved,
 	    gen_list->mb_cont.mc_lock, mbuf_wait);
 	gen_list->mb_cont.mc_starved--;
 
@@ -771,12 +859,23 @@
 	    ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) {
 		MB_GET_OBJECT(m, bucket, gen_list);
 		MB_MBTYPES_INC(gen_list, type, 1);
+		/*
+		 * If there are more waiters and we have more available objects then
+		 * wake them up.  Yes, it's theoretically possible that we have
+		 * threads blocked on the cv get signaled only to wakeup and find
+		 * that there's actually nothing in the cache anymore because some
+		 * other thread blocked on the genlist cache lock before the
+		 * cv_signal was issued.  But these are rare (not to mention pretty
+		 * helpless) situations.
+		 */
+		if ((gen_list->mb_cont.mc_starved > 0)
+		    && (gen_list->mb_cont.mc_objcount > 0))
+			cv_signal(gen_list->mb_mstarved);
 		mbstat.m_wait++;	/* XXX: No consistency. */
 	} else {
 		mbstat.m_drops++;	/* XXX: No consistency. */
 		m = NULL;
 	}
-
 	MB_UNLOCK_CONT(gen_list);
 
 	return (m);
@@ -800,8 +899,7 @@
 mb_free(struct mb_lstmngr *mb_list, void *m, short type, short persist,
 	int *pers_list)
 {
-	struct mb_pcpu_list *cnt_lst;
-	struct mb_gen_list *gen_list;
+	struct mb_cache *cnt_lst, *gen_list;
 	struct mb_bucket *bucket;
 	u_int owner;
 
@@ -841,7 +939,7 @@
 		MB_PUT_OBJECT(m, bucket, gen_list);
 		MB_MBTYPES_DEC(gen_list, type, 1);
 		if (gen_list->mb_cont.mc_starved > 0)
-			cv_signal(&(gen_list->mgl_mstarved));
+			cv_signal(gen_list->mb_mstarved);
 		if ((persist & MBP_PERSIST) == 0)
 			MB_UNLOCK_CONT(gen_list);
 		else
@@ -912,7 +1010,7 @@
 			 */
 			if (gen_list->mb_cont.mc_starved > 0) {
 				cnt_lst->mb_cont.mc_starved--;
-				cv_signal(&(gen_list->mgl_mstarved));
+				cv_signal(gen_list->mb_mstarved);
 			} else
 				cnt_lst->mb_cont.mc_starved = 0;
 
@@ -1018,6 +1116,185 @@
 }
 
 /******************************************************************************
+ * The mbuf subsystem garbage collecting/resource balancing daemon.
+ */
+
+/*
+ * Allocate pages from VM and populate a given cache with either
+ * mbufs or clusters, as specified by the mb_list.  We allocate
+ * just enough to fill up the cache to:
+ * low_watermark + MBD_DELTA(low_watermark, high_watermark)
+ * If cnt_lst == NULL, then move to the general cache.
+ */
+static void
+mbd_vm2cache(struct mb_lstmngr *mb_list, struct mb_cache *cnt_lst)
+{
+
+	while (*(cnt_lst->mb_cont.mc_objcount) < ((u_long)*mb_list->ml_wmlow +
+	    MBD_DELTA(mb_list->ml_wmlow, mb_list->ml_wmhigh))) {
+		if (mb_pop_cont(mb_list, M_NOWAIT, cnt_lst) == NULL) {
+			MB_LOCK_CONT(cnt_lst);
+			break;
+		}
+	}
+}
+
+/*
+ * Move either mbufs or clusters (as specified by the mb_list)
+ * from the general cache to the specified cache.  We move
+ * enough to fill up the specified PCPU cache to:
+ * low_watermark + MBD_DELTA(low_watermark, high_watermark)
+ */
+static void
+mbd_cache2cache(struct mb_lstmngr *mb_list, struct mb_cache *cnt_lst)
+{
+	struct mb_cache *gen_list;
+	struct mb_bucket *bucket;
+
+	gen_list = MB_GET_GEN_LIST(mb_list);
+	MB_LOCK_CONT(gen_list);
+	while ((*gen_list->mb_cont.mc_objcount > 0) &&
+	    (*cnt_lst->mb_cont.mc_objcount < ((u_long)*mb_list->ml_wmlow +
+	    MBD_DELTA(mb_list->ml_wmlow, mb_list->ml_wmhigh)))) {
+		bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead));
+		SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead),mb_blist);
+		bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+		(*(gen_list->mb_cont.mc_numpgs))--;
+		(*(cnt_lst->mb_cont.mc_numpgs))++;
+		*(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree;
+		SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+		    bucket, mb_blist);
+		*(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree;
+	}
+	MB_UNLOCK_CONT(gen_list);
+}
+
+/*
+ * Flush out a number of mbufs or clusters to VM (as specified by
+ * the mb_list) until we balance out the general cache so that
+ * the number of objects reaches:
+ * low_watermark + MBD_DELTA(low_watermark, high_watermark)
+ * The general cache must be locked when this is called.
+ */
+static void
+mbd_cache2vm(struct mb_lstmngr *mb_list)
+{
+	struct mb_cache *gen_list;
+	struct mb_bucket *bucket, **bucketp;
+	u_int objs;
+
+	/*
+	 * XXX: We drop the gen cache lock, get Giant, and re-get
+	 * the gen cache lock here because kmem_free() still needs
+	 * Giant but we need to obey lock order too (Giant->gencache).
+	 */
+	gen_list = MB_GET_GEN_LIST(mb_list);
+	MB_UNLOCK_CONT(gen_list);
+	mtx_lock(&Giant);
+	MB_LOCK_CONT(gen_list);
+
+	objs = PAGE_SIZE / mb_list->ml_objsize;
+	for (bucketp = &SLIST_FIRST(&(gen_list->mb_cont.mc_bhead));
+	    (bucket = *bucketp) != NULL;) {
+		if (*gen_list->mb_cont.mc_objcount <= (*mb_list->ml_wmlow +
+		    MBD_DELTA(mb_list->ml_wmlow, mb_list->ml_wmhigh)))
+			break;
+		if (bucket->mb_numfree == objs) {
+			*bucketp = SLIST_NEXT(bucket, mb_blist);
+			(*(gen_list->mb_cont.mc_numpgs))--;
+			*(gen_list->mb_cont.mc_objcount) -= objs;
+			mb_list->ml_btable[MB_BUCKET_INDX(bucket->mb_free[0],
+			    mb_list)] = NULL;
+			kmem_free(mb_list->ml_map,
+			    (vm_offset_t)bucket->mb_free[0], PAGE_SIZE);
+			free(bucket, M_MBUF);
+			mb_list->ml_mapfull = 0;
+			continue;
+		}
+		bucketp = &SLIST_NEXT(bucket, mb_blist);
+	}
+
+	mtx_unlock(&Giant);	/* XXX */
+}
+
+/*-
+ * The "mbuf daemon" kproc does the following:
+ * (1) Re-balance per-CPU caches, if necessary;
+ * (2) Re-balance global cache, if necessary;
+ * (3) Attempt to auto-tune watermarks based on bookmarked history.
+ */
+static void
+mbuf_daemon()
+{
+	struct mb_lstmngr *mb_list;
+	struct mb_cache *gen_list, *cnt_lst;
+	int i, j;
+
+	mtx_lock(&mbufd_lock);
+	mbufd_needed = 0;
+
+	for (;;) {
+	  msleep(&mbufd_needed, &mbufd_lock, PVM, "psleep", 0);
+	  while (mbufd_needed > 0) {
+		mbufd_needed = 0;
+		mtx_unlock(&mbufd_lock);
+
+		/*
+		 * We check both mbuf and mbuf cluster cache watermarks.
+		 */
+		for (mb_list = &mb_list_mbuf,
+		     j = 0; j < 2; j++,
+		     mb_list = &mb_list_clust)
+	 	{
+			gen_list = MB_GET_GEN_LIST(mb_list);
+			for (i = 0; i < NCPU; i++) {
+				if (CPU_ABSENT(i))
+					continue;
+				cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i);
+				if (mb_low(cnt_lst, mb_list)) {
+					MB_LOCK_CONT(cnt_lst);
+					if (mb_low(cnt_lst, mb_list)) {
+					  if (*gen_list->mb_cont.mc_objcount > 0)
+						mbd_cache2cache(
+						    mb_list, cnt_lst);
+					  if (mb_low(cnt_lst, mb_list))
+						mbd_vm2cache(mb_list, cnt_lst);
+					}
+					MB_UNLOCK_CONT(cnt_lst);
+				}
+			}
+
+			MB_LOCK_CONT(gen_list);
+			if (mb_low(gen_list, mb_list))
+				mbd_vm2cache(mb_list, gen_list);
+			else if (mb_high(gen_list, mb_list))
+				mbd_cache2vm(mb_list);
+			if ((gen_list->mb_cont.mc_starved > 0)
+			    && (*gen_list->mb_cont.mc_objcount > 0))
+				cv_signal(gen_list->mb_mstarved);
+			MB_UNLOCK_CONT(gen_list);
+
+			/* XXX: Auto-tune missing here. */
+		}
+		mtx_lock(&mbufd_lock);
+	  }
+
+	  /* XXX MORE STATS (?) missing here. */
+	  mbuf_daemon_ran++;
+	}
+
+	/* NOTREACHED */
+	mtx_unlock(&mbufd_lock);
+}
+
+static struct kproc_desc mbuf_kp = {
+	"mbufd",
+	mbuf_daemon,
+	&mbufdproc
+};
+SYSINIT(mbufd, SI_SUB_MBUF, SI_ORDER_SECOND, kproc_start, &mbuf_kp);
+
+/******************************************************************************
  * Internal setup macros.
  */
 
@@ -1539,7 +1816,7 @@
 void
 m_chtype(struct mbuf *mb, short new_type)
 {
-	struct mb_gen_list *gen_list;
+	struct mb_cache *gen_list;
 
 	gen_list = MB_GET_GEN_LIST(&mb_list_mbuf);
 	MB_LOCK_CONT(gen_list);