diff -ruN /usr/src.org/sys/alpha/alpha/machdep.c /usr/src/sys/alpha/alpha/machdep.c --- /usr/src.org/sys/alpha/alpha/machdep.c Fri Sep 15 20:44:24 2000 +++ /usr/src/sys/alpha/alpha/machdep.c Fri Sep 15 20:49:36 2000 @@ -185,8 +185,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - struct msgbuf *msgbufp=0; int bootverbose = 0, Maxmem = 0; @@ -373,18 +371,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good as + * we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/i386/i386/machdep.c /usr/src/sys/i386/i386/machdep.c --- /usr/src.org/sys/i386/i386/machdep.c Fri Sep 15 20:45:02 2000 +++ /usr/src/sys/i386/i386/machdep.c Fri Sep 15 20:49:45 2000 @@ -139,8 +139,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - int _udatasel, _ucodesel; u_int atdevbase; @@ -399,18 +397,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good + * as we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/kern/uipc_mbuf.c /usr/src/sys/kern/uipc_mbuf.c --- /usr/src.org/sys/kern/uipc_mbuf.c Fri Sep 15 20:45:05 2000 +++ /usr/src/sys/kern/uipc_mbuf.c Tue Sep 19 18:21:55 2000 @@ -48,28 +48,39 @@ #include #include +#include +#include +#include +#include +#include + #ifdef INVARIANTS #include #endif -static void mbinit __P((void *)); -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) - struct mbuf *mbutl; struct mbstat mbstat; u_long mbtypes[MT_NTYPES]; -struct mbuf *mmbfree; -union mcluster *mclfree; -union mext_refcnt *mext_refcnt_free; int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; int nmbclusters; int nmbufs; -u_int m_mballoc_wid = 0; -u_int m_clalloc_wid = 0; +u_long m_mballoc_wid = 0; +u_long m_clalloc_wid = 0; + +/* + * freelist header structures... + * mbffree_lst, mclfree_lst, mcntfree_lst + */ +struct mbffree_lst mbffree_lst_hdr, *mmbfree; +struct mclfree_lst mclfree_lst_hdr, *mclfree; +struct mcntfree_lst mcntfree_lst_hdr, *mcntfree; +/* + * sysctl(8) exported objects + */ SYSCTL_DECL(_kern_ipc); SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, &max_linkhdr, 0, ""); @@ -95,41 +106,76 @@ static void m_reclaim __P((void)); +/* Initial allocation numbers */ #define NCL_INIT 2 #define NMB_INIT 16 -#define REF_INIT (NMBCLUSTERS * 2) +#define REF_INIT NMBCLUSTERS -/* ARGSUSED*/ -static void -mbinit(dummy) - void *dummy; +/* + * Full mbuf subsystem initialization done here. + * + * XXX: If ever we have system specific map setups to do, then move them to + * machdep.c - for now, there is no reason for this stuff to go there. + * We just call this explicitly, as most of the stuff that needs to get + * done here should be done early on (i.e. from cpu_startup) anyway. + */ +void +mbinit(void) { - int s; + vm_offset_t maxaddr, mb_map_size; - mmbfree = NULL; - mclfree = NULL; - mext_refcnt_free = NULL; + /* + * Setup the mb_map, allocate requested VM space. + */ + mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + (nmbclusters + + nmbufs / 4) * sizeof(union mext_refcnt); + mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); + mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, + mb_map_size); + /* XXX: mb_map->system_map = 1; */ + /* + * Initialize the free list headers, and setup locks for lists. + */ + mmbfree = (struct mbffree_lst *)&mbffree_lst_hdr; + mclfree = (struct mclfree_lst *)&mclfree_lst_hdr; + mcntfree = (struct mcntfree_lst *)&mcntfree_lst_hdr; + mmbfree->m_head = NULL; + mclfree->m_head = NULL; + mcntfree->m_head = NULL; + mtx_init(&mmbfree->m_mtx, "mbuf free list lock", MTX_DEF); + mtx_init(&mclfree->m_mtx, "mcluster free list lock", MTX_DEF); + mtx_init(&mcntfree->m_mtx, "m_ext counter free list lock", MTX_DEF); + + /* + * Initialize mbuf subsystem (sysctl exported) statistics structure. + */ mbstat.m_msize = MSIZE; mbstat.m_mclbytes = MCLBYTES; mbstat.m_minclsize = MINCLSIZE; mbstat.m_mlen = MLEN; mbstat.m_mhlen = MHLEN; - s = splimp(); - if (m_alloc_ref(REF_INIT) == 0) + /* + * Perform some initial allocations. + * If allocations are succesful, locks are obtained in the allocation + * routines, so we must release them if it works. + */ + if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) goto bad; + else + mtx_exit(&mcntfree->m_mtx, MTX_DEF); + if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) goto bad; -#if MCLBYTES <= PAGE_SIZE + else + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) goto bad; -#else - /* It's OK to call contigmalloc in this context. */ - if (m_clalloc(16, M_WAIT) == 0) - goto bad; -#endif - splx(s); + else + mtx_exit(&mclfree->m_mtx, MTX_DEF); + return; bad: panic("mbinit: failed to initialize mbuf subsystem!"); @@ -138,37 +184,51 @@ /* * Allocate at least nmb reference count structs and place them * on the ref cnt free list. - * Must be called at splimp. */ int -m_alloc_ref(nmb) +m_alloc_ref(nmb, how) u_int nmb; + int how; { caddr_t p; u_int nbytes; int i; /* - * XXX: * We don't cap the amount of memory that can be used * by the reference counters, like we do for mbufs and - * mbuf clusters. The reason is that we don't really expect - * to have to be allocating too many of these guys with m_alloc_ref(), - * and if we are, we're probably not out of the woods anyway, - * so leave this way for now. + * mbuf clusters. In fact, we're absolutely sure that we + * won't ever be going over our allocated space. We keep enough + * space in mb_map to accomodate maximum values of allocatable + * external buffers including, but not limited to, clusters. + * (That's also why we won't have to have wait routines for + * counters). + * + * If we're in here, we're absolutely certain to be returning + * succesfully, as long as there is physical memory to accomodate + * us. */ - if (mb_map_full) - return (0); - nbytes = round_page(nmb * sizeof(union mext_refcnt)); - if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT)) == NULL) + mtx_enter(&Giant, MTX_DEF); + if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_WAIT ? M_WAIT : + M_NOWAIT)) == NULL) { + mtx_exit(&Giant, MTX_DEF); return (0); + } + mtx_exit(&Giant, MTX_DEF); nmb = nbytes / sizeof(union mext_refcnt); + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); for (i = 0; i < nmb; i++) { - ((union mext_refcnt *)p)->next_ref = mext_refcnt_free; - mext_refcnt_free = (union mext_refcnt *)p; + ((union mext_refcnt *)p)->next_ref = mcntfree->m_head; + mcntfree->m_head = (union mext_refcnt *)p; p += sizeof(union mext_refcnt); mbstat.m_refree++; } @@ -179,9 +239,7 @@ /* * Allocate at least nmb mbufs and place on mbuf free list. - * Must be called at splimp. */ -/* ARGSUSED */ int m_mballoc(nmb, how) register int nmb; @@ -192,44 +250,43 @@ int nbytes; /* - * If we've hit the mbuf limit, stop allocating from mb_map, - * (or trying to) in order to avoid dipping into the section of - * mb_map which we've "reserved" for clusters. - */ - if ((nmb + mbstat.m_mbufs) > nmbufs) - return (0); - - /* - * Once we run out of map space, it will be impossible to get - * any more (nothing is ever freed back to the map) - * -- however you are not dead as m_reclaim might - * still be able to free a substantial amount of space. - * - * XXX Furthermore, we can also work with "recycled" mbufs (when - * we're calling with M_WAIT the sleep procedure will be woken - * up when an mbuf is freed. See m_mballoc_wait()). + * If we've hit the mbuf limit, stop allocating from mb_map. + * Also, once we run out of map space, it will be impossible to + * get any more (nothing is ever freed back to the map). */ - if (mb_map_full) + if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) { + atomic_add_long(&mbstat.m_drops, 1); return (0); + } nbytes = round_page(nmb * MSIZE); + mtx_enter(&Giant, MTX_DEF); p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT); if (p == 0 && how == M_WAIT) { - mbstat.m_wait++; + atomic_add_long(&mbstat.m_wait, 1); p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK); } + mtx_exit(&Giant, MTX_DEF); /* - * Either the map is now full, or `how' is M_NOWAIT and there + * Either the map is now full, or `how' is M_DONTWAIT and there * are no pages left. */ if (p == NULL) return (0); nmb = nbytes / MSIZE; + + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); for (i = 0; i < nmb; i++) { - ((struct mbuf *)p)->m_next = mmbfree; - mmbfree = (struct mbuf *)p; + ((struct mbuf *)p)->m_next = mmbfree->m_head; + mmbfree->m_head = (struct mbuf *)p; p += MSIZE; } mbstat.m_mbufs += nmb; @@ -244,81 +301,125 @@ * designated (mbuf_wait) time. */ struct mbuf * -m_mballoc_wait(int caller, int type) +m_mballoc_wait(int type) { - struct mbuf *p; - int s; + struct mbuf *p = NULL; - s = splimp(); - m_mballoc_wid++; - if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK) - m_mballoc_wid--; - splx(s); - - /* - * Now that we (think) that we've got something, we will redo an - * MGET, but avoid getting into another instance of m_mballoc_wait() - * XXX: We retry to fetch _even_ if the sleep timed out. This is left - * this way, purposely, in the [unlikely] case that an mbuf was - * freed but the sleep was not awakened in time. + /* + * See if we can drain some resources out of the protocols. */ - p = NULL; - switch (caller) { - case MGET_C: + mtx_enter(&mmbfree->m_mtx, MTX_RLIKELY); + m_reclaim(); + mtx_exit(&mmbfree->m_mtx, MTX_RLIKELY); + + /* + * Try again. Even though we may have already lost from a race, + * it's not so much of a big deal - at least somebody profited + * from our drain. + */ + MGET(p, M_DONTWAIT, type); + + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_mballoc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_mballoc_wid of 0)! + */ + if (p == NULL) { + asleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait); + atomic_add_long(&m_mballoc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_mballoc_wid, 1); + + /* + * Try again (one last time). + * + * XXX: We retry to fetch _even_ if the sleep timed out. This + * is left this way, purposely, in the [unlikely] case + * that an mbuf was freed but the sleep was not awoken + * in time; then we are willing to race for it. If the + * sleep didn't time out (i.e. we got woken up) then + * we race for our mbuf with the other processors. + */ MGET(p, M_DONTWAIT, type); - break; - case MGETHDR_C: - MGETHDR(p, M_DONTWAIT, type); - break; - default: - panic("m_mballoc_wait: invalid caller (%d)", caller); } - s = splimp(); - if (p != NULL) { /* We waited and got something... */ - mbstat.m_wait++; - /* Wake up another if we have more free. */ - if (mmbfree != NULL) - MMBWAKEUP(); - } - splx(s); + /* If we waited and got something... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mmbfree->m_mtx, MTX_DEF); + if (mmbfree->m_head != NULL) + MBWAKEUP(m_mballoc_wid); + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); + return (p); } -#if MCLBYTES > PAGE_SIZE -static int i_want_my_mcl; - -static void -kproc_mclalloc(void) +/* + * Same as above, except expanded out (in our tradition) for MGETHDR. + */ +struct mbuf * +m_mballoc_wait_hdr(int type) { - int status; + struct mbuf *p = NULL; - while (1) { - tsleep(&i_want_my_mcl, PVM, "mclalloc", 0); + /* + * Try again. Even though we may have already lost from a race, + * it's not so much of a big deal - at least somebody profited + * from our drain. + */ + MGETHDR(p, M_DONTWAIT, type); - for (; i_want_my_mcl; i_want_my_mcl--) { - if (m_clalloc(1, M_WAIT) == 0) - printf("m_clalloc failed even in process context!\n"); - } + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_mballoc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_mballoc_wid of 0)! + */ + if (p == NULL) { + asleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait); + atomic_add_long(&m_mballoc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_mballoc_wid, 1); + + /* + * Try again (one last time). + * + * XXX: We retry to fetch _even_ if the sleep timed out. This + * is left this way, purposely, in the [unlikely] case + * that an mbuf was freed but the sleep was not awoken + * in time; then we are willing to race for it. If the + * sleep didn't time out (i.e. we got woken up) then + * we race for our mbuf with the other processors. + */ + MGETHDR(p, M_DONTWAIT, type); } -} -static struct proc *mclallocproc; -static struct kproc_desc mclalloc_kp = { - "mclalloc", - kproc_mclalloc, - &mclallocproc -}; -SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start, - &mclalloc_kp); -#endif + /* If we waited and got something... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mmbfree->m_mtx, MTX_DEF); + if (mmbfree->m_head != NULL) + MBWAKEUP(m_mballoc_wid); + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); + + return (p); +} /* * Allocate some number of mbuf clusters * and place on cluster free list. - * Must be called at splimp. */ -/* ARGSUSED */ int m_clalloc(ncl, how) register int ncl; @@ -329,54 +430,41 @@ int npg; /* + * If the map is now full (nothing will ever be freed to it). * If we've hit the mcluster number limit, stop allocating from - * mb_map, (or trying to) in order to avoid dipping into the section - * of mb_map which we've "reserved" for mbufs. + * mb_map. */ - if ((ncl + mbstat.m_clusters) > nmbclusters) { - mbstat.m_drops++; + if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) { + atomic_add_long(&mbstat.m_drops, 1); return (0); } - /* - * Once we run out of map space, it will be impossible - * to get any more (nothing is ever freed back to the - * map). From this point on, we solely rely on freed - * mclusters. - */ - if (mb_map_full) { - mbstat.m_drops++; - return (0); - } - -#if MCLBYTES > PAGE_SIZE - if (how != M_WAIT) { - i_want_my_mcl += ncl; - wakeup(&i_want_my_mcl); - mbstat.m_wait++; - p = 0; - } else { - p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul, - ~0ul, PAGE_SIZE, 0, mb_map); - } -#else npg = ncl; + mtx_enter(&Giant, MTX_DEF); p = (caddr_t)kmem_malloc(mb_map, ctob(npg), how != M_WAIT ? M_NOWAIT : M_WAITOK); + mtx_exit(&Giant, MTX_DEF); ncl = ncl * PAGE_SIZE / MCLBYTES; -#endif + /* - * Either the map is now full, or `how' is M_NOWAIT and there + * Either the map is now full, or `how' is M_DONTWAIT and there * are no pages left. */ if (p == NULL) { - mbstat.m_drops++; + atomic_add_long(&mbstat.m_drops, 1); return (0); } + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mclfree->m_mtx, MTX_DEF); for (i = 0; i < ncl; i++) { - ((union mcluster *)p)->mcl_next = mclfree; - mclfree = (union mcluster *)p; + ((union mcluster *)p)->mcl_next = mclfree->m_head; + mclfree->m_head = (union mcluster *)p; p += MCLBYTES; mbstat.m_clfree++; } @@ -389,128 +477,65 @@ * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will * sleep for a designated amount of time (mbuf_wait) or until we're woken up * due to sudden mcluster availability. + * + * Must be called with no held mutexes... may block. */ caddr_t m_clalloc_wait(void) { caddr_t p; - int s; - -#ifdef __i386__ - /* If in interrupt context, and INVARIANTS, maintain sanity and die. */ - KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - /* Sleep until something's available or until we expire. */ - m_clalloc_wid++; - if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK) - m_clalloc_wid--; + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_clalloc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_clalloc_wid of 0)! + */ + asleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait); + atomic_add_long(&m_clalloc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_clalloc_wid, 1); /* - * Now that we (think) that we've got something, we will redo and - * MGET, but avoid getting into another instance of m_clalloc_wait() + * Now that we (think) that we've got something, try again. */ p = NULL; _MCLALLOC(p, M_DONTWAIT); - s = splimp(); - if (p != NULL) { /* We waited and got something... */ - mbstat.m_wait++; - /* Wake up another if we have more free. */ - if (mclfree != NULL) - MCLWAKEUP(); - } + /* If we waited and got something ... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mclfree->m_mtx, MTX_DEF); + if (mclfree->m_head != NULL) + MBWAKEUP(m_clalloc_wid); + mtx_exit(&mclfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); - splx(s); return (p); } /* - * When MGET fails, ask protocols to free space when short of memory, - * then re-attempt to allocate an mbuf. + * m_reclaim: drain protocols in hopes to free up some resources... + * + * Should be called with mmbfree->m_mtx mutex held. We will most likely + * recursively grab it from within some drain routines, but that's Okay, + * as the mutex will never be completely released until we let go of it + * after our m_reclaim() is over. */ -struct mbuf * -m_retry(i, t) - int i, t; -{ - register struct mbuf *m; - - /* - * Must only do the reclaim if not in an interrupt context. - */ - if (i == M_WAIT) { -#ifdef __i386__ - KASSERT(intr_nesting_level == 0, - ("MBALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - m_reclaim(); - } - - /* - * Both m_mballoc_wait and m_retry must be nulled because - * when the MGET macro is run from here, we deffinately do _not_ - * want to enter an instance of m_mballoc_wait() or m_retry() (again!) - */ -#define m_mballoc_wait(caller,type) (struct mbuf *)0 -#define m_retry(i, t) (struct mbuf *)0 - MGET(m, i, t); -#undef m_retry -#undef m_mballoc_wait - - if (m != NULL) - mbstat.m_wait++; - else - mbstat.m_drops++; - - return (m); -} - -/* - * As above; retry an MGETHDR. - */ -struct mbuf * -m_retryhdr(i, t) - int i, t; -{ - register struct mbuf *m; - - /* - * Must only do the reclaim if not in an interrupt context. - */ - if (i == M_WAIT) { -#ifdef __i386__ - KASSERT(intr_nesting_level == 0, - ("MBALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - m_reclaim(); - } - -#define m_mballoc_wait(caller,type) (struct mbuf *)0 -#define m_retryhdr(i, t) (struct mbuf *)0 - MGETHDR(m, i, t); -#undef m_retryhdr -#undef m_mballoc_wait - - if (m != NULL) - mbstat.m_wait++; - else - mbstat.m_drops++; - - return (m); -} - static void m_reclaim() { register struct domain *dp; register struct protosw *pr; - int s = splimp(); for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain) (*pr->pr_drain)(); - splx(s); mbstat.m_drain++; } @@ -685,11 +710,11 @@ np = &n->m_next; } if (top == 0) - MCFail++; + atomic_add_long(&MCFail, 1); return (top); nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return (0); } @@ -746,7 +771,7 @@ return top; nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return 0; } @@ -853,7 +878,7 @@ nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return (0); } @@ -1022,7 +1047,7 @@ return (m); bad: m_freem(n); - MPFail++; + atomic_add_long(&MPFail, 1); return (0); } diff -ruN /usr/src.org/sys/pc98/i386/machdep.c /usr/src/sys/pc98/i386/machdep.c --- /usr/src.org/sys/pc98/i386/machdep.c Fri Sep 15 20:45:23 2000 +++ /usr/src/sys/pc98/i386/machdep.c Fri Sep 15 20:50:02 2000 @@ -145,8 +145,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ @@ -413,18 +411,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good + * as we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/sys/mbuf.h /usr/src/sys/sys/mbuf.h --- /usr/src.org/sys/sys/mbuf.h Fri Sep 15 20:45:28 2000 +++ /usr/src/sys/sys/mbuf.h Thu Sep 21 00:40:41 2000 @@ -37,6 +37,11 @@ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ +#ifdef _KERNEL +#include +#include +#endif + /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size @@ -181,11 +186,11 @@ * mbuf statistics */ struct mbstat { - u_long m_mbufs; /* mbufs obtained from page pool */ - u_long m_clusters; /* clusters obtained from page pool */ - u_long m_clfree; /* free clusters */ - u_long m_refcnt; /* refcnt structs obtained from page pool */ - u_long m_refree; /* free refcnt structs */ + u_long m_mbufs; /* # mbufs obtained from page pool */ + u_long m_clusters; /* # clusters obtained from page pool */ + u_long m_clfree; /* # clusters on freelist (cache) */ + u_long m_refcnt; /* # ref counters obtained from page pool */ + u_long m_refree; /* # ref counters on freelist (cache) */ u_long m_spare; /* spare field */ u_long m_drops; /* times failed to find space */ u_long m_wait; /* times waited for space */ @@ -203,8 +208,7 @@ #define M_DONTWAIT 1 #define M_WAIT 0 -/* Freelists: - * +/* * Normal mbuf clusters are normally treated as character arrays * after allocation, but use the first word of the buffer as a free list * pointer while on the free list. @@ -214,15 +218,6 @@ char mcl_buf[MCLBYTES]; }; - -/* - * These are identifying numbers passed to the m_mballoc_wait function, - * allowing us to determine whether the call came from an MGETHDR or - * an MGET. - */ -#define MGETHDR_C 1 -#define MGET_C 2 - /* * The m_ext object reference counter structure. */ @@ -232,41 +227,32 @@ }; /* - * Wake up the next instance (if any) of m_mballoc_wait() which is - * waiting for an mbuf to be freed. This should be called at splimp(). - * - * XXX: If there is another free mbuf, this routine will be called [again] - * from the m_mballoc_wait routine in order to wake another sleep instance. + * free list header definitions: mbffree_lst, mclfree_lst, mcntfree_lst */ -#define MMBWAKEUP() do { \ - if (m_mballoc_wid) { \ - m_mballoc_wid--; \ - wakeup_one(&m_mballoc_wid); \ - } \ -} while (0) +struct mbffree_lst { + struct mbuf *m_head; + struct mtx m_mtx; +}; -/* - * Same as above, but for mbuf cluster(s). - */ -#define MCLWAKEUP() do { \ - if (m_clalloc_wid) { \ - m_clalloc_wid--; \ - wakeup_one(&m_clalloc_wid); \ - } \ -} while (0) +struct mclfree_lst { + union mcluster *m_head; + struct mtx m_mtx; +}; + +struct mcntfree_lst { + union mext_refcnt *m_head; + struct mtx m_mtx; +}; /* - * mbuf utility macros: - * - * MBUFLOCK(code) - * prevents a section of code from from being interrupted by network - * drivers. + * Wake up the next instance (if any) of a sleeping allocation - which is + * waiting for a {cluster, mbuf} to be freed. */ -#define MBUFLOCK(code) do { \ - int _ms = splimp(); \ - \ - { code } \ - splx(_ms); \ +#define MBWAKEUP(m_wid) do { \ + if ((m_wid)) { \ + atomic_subtract_long(&(m_wid), 1); \ + wakeup_one(&(m_wid)); \ + } \ } while (0) /* @@ -286,31 +272,41 @@ #define MEXT_ADD_REF(m) atomic_add_long(&((m)->m_ext.ref_cnt->refcnt), 1) -#define _MEXT_ALLOC_CNT(m_cnt) MBUFLOCK( \ +#define _MEXT_ALLOC_CNT(m_cnt, how) do { \ union mext_refcnt *__mcnt; \ \ - if ((mext_refcnt_free == NULL) && (m_alloc_ref(1) == 0)) \ - panic("mbuf subsystem: out of ref counts!"); \ - __mcnt = mext_refcnt_free; \ - mext_refcnt_free = __mcnt->next_ref; \ - __mcnt->refcnt = 0; \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + if (mcntfree->m_head == NULL) { \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ + if (m_alloc_ref(1, (how)) == 0) \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + } \ + __mcnt = mcntfree->m_head; \ + if (__mcnt != NULL) { \ + mcntfree->m_head = __mcnt->next_ref; \ + mbstat.m_refree--; \ + __mcnt->refcnt = 0; \ + } \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ (m_cnt) = __mcnt; \ - mbstat.m_refree--; \ -) +} while (0) #define _MEXT_DEALLOC_CNT(m_cnt) do { \ union mext_refcnt *__mcnt = (m_cnt); \ \ - __mcnt->next_ref = mext_refcnt_free; \ - mext_refcnt_free = __mcnt; \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + __mcnt->next_ref = mcntfree->m_head; \ + mcntfree->m_head = __mcnt; \ mbstat.m_refree++; \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ } while (0) -#define MEXT_INIT_REF(m) do { \ +#define MEXT_INIT_REF(m, how) do { \ struct mbuf *__mmm = (m); \ \ - _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt); \ - MEXT_ADD_REF(__mmm); \ + _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \ + if (__mmm != NULL) \ + MEXT_ADD_REF(__mmm); \ } while (0) /* @@ -327,26 +323,28 @@ struct mbuf *_mm; \ int _mhow = (how); \ int _mtype = (type); \ - int _ms = splimp(); \ \ - if (mmbfree == NULL) \ - (void)m_mballoc(1, _mhow); \ - _mm = mmbfree; \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + if (mmbfree->m_head == NULL) { \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (m_mballoc(1, _mhow) == 0) \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + } \ + _mm = mmbfree->m_head; \ if (_mm != NULL) { \ - mmbfree = _mm->m_next; \ + mmbfree->m_head = _mm->m_next; \ mbtypes[MT_FREE]--; \ mbtypes[_mtype]++; \ - splx(_ms); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ _mm->m_type = _mtype; \ _mm->m_next = NULL; \ _mm->m_nextpkt = NULL; \ _mm->m_data = _mm->m_dat; \ _mm->m_flags = 0; \ } else { \ - splx(_ms); \ - _mm = m_retry(_mhow, _mtype); \ - if (_mm == NULL && _mhow == M_WAIT) \ - _mm = m_mballoc_wait(MGET_C, _mtype); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (_mhow == M_WAIT) \ + _mm = m_mballoc_wait(_mtype); \ } \ (m) = _mm; \ } while (0) @@ -355,16 +353,19 @@ struct mbuf *_mm; \ int _mhow = (how); \ int _mtype = (type); \ - int _ms = splimp(); \ \ - if (mmbfree == NULL) \ - (void)m_mballoc(1, _mhow); \ - _mm = mmbfree; \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + if (mmbfree->m_head == NULL) { \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (m_mballoc(1, _mhow) == 0) \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + } \ + _mm = mmbfree->m_head; \ if (_mm != NULL) { \ - mmbfree = _mm->m_next; \ + mmbfree->m_head = _mm->m_next; \ mbtypes[MT_FREE]--; \ mbtypes[_mtype]++; \ - splx(_ms); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ _mm->m_type = _mtype; \ _mm->m_next = NULL; \ _mm->m_nextpkt = NULL; \ @@ -374,10 +375,9 @@ _mm->m_pkthdr.csum_flags = 0; \ _mm->m_pkthdr.aux = NULL; \ } else { \ - splx(_ms); \ - _mm = m_retryhdr(_mhow, _mtype); \ - if (_mm == NULL && _mhow == M_WAIT) \ - _mm = m_mballoc_wait(MGETHDR_C, _mtype); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (_mhow == M_WAIT) \ + _mm = m_mballoc_wait_hdr(_mtype); \ } \ (m) = _mm; \ } while (0) @@ -393,17 +393,20 @@ #define _MCLALLOC(p, how) do { \ caddr_t _mp; \ int _mhow = (how); \ - int _ms = splimp(); \ \ - if (mclfree == NULL) \ - (void)m_clalloc(1, _mhow); \ - _mp = (caddr_t)mclfree; \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + if (mclfree->m_head == NULL) { \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ + if (m_clalloc(1, _mhow) == 0) \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + } \ + _mp = (caddr_t)mclfree->m_head; \ if (_mp != NULL) { \ mbstat.m_clfree--; \ - mclfree = ((union mcluster *)_mp)->mcl_next; \ - splx(_ms); \ + mclfree->m_head = ((union mcluster *)_mp)->mcl_next; \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ } else { \ - splx(_ms); \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ if (_mhow == M_WAIT) \ _mp = m_clalloc_wait(); \ } \ @@ -415,37 +418,46 @@ \ _MCLALLOC(_mm->m_ext.ext_buf, (how)); \ if (_mm->m_ext.ext_buf != NULL) { \ - _mm->m_data = _mm->m_ext.ext_buf; \ - _mm->m_flags |= M_EXT; \ - _mm->m_ext.ext_free = NULL; \ - _mm->m_ext.ext_args = NULL; \ - _mm->m_ext.ext_size = MCLBYTES; \ - MEXT_INIT_REF(_mm); \ + MEXT_INIT_REF(_mm, (how)); \ + if (_mm->m_ext.ref_cnt == NULL) { \ + _MCLFREE(_mm->m_ext.ext_buf); \ + _mm->m_ext.ext_buf = NULL; \ + } else { \ + _mm->m_data = _mm->m_ext.ext_buf; \ + _mm->m_flags |= M_EXT; \ + _mm->m_ext.ext_free = NULL; \ + _mm->m_ext.ext_args = NULL; \ + _mm->m_ext.ext_size = MCLBYTES; \ + } \ } \ } while (0) #define MEXTADD(m, buf, size, free, args) do { \ struct mbuf *_mm = (m); \ \ - _mm->m_flags |= M_EXT; \ - _mm->m_ext.ext_buf = (caddr_t)(buf); \ - _mm->m_data = _mm->m_ext.ext_buf; \ - _mm->m_ext.ext_size = (size); \ - _mm->m_ext.ext_free = (free); \ - _mm->m_ext.ext_args = (args); \ - MEXT_INIT_REF(_mm); \ + MEXT_INIT_REF(_mm, M_WAIT); \ + if (_mm->m_ext.ref_cnt != NULL) { \ + _mm->m_flags |= M_EXT; \ + _mm->m_ext.ext_buf = (caddr_t)(buf); \ + _mm->m_data = _mm->m_ext.ext_buf; \ + _mm->m_ext.ext_size = (size); \ + _mm->m_ext.ext_free = (free); \ + _mm->m_ext.ext_args = (args); \ + } \ } while (0) -#define _MCLFREE(p) MBUFLOCK( \ +#define _MCLFREE(p) do { \ union mcluster *_mp = (union mcluster *)(p); \ \ - _mp->mcl_next = mclfree; \ - mclfree = _mp; \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + _mp->mcl_next = mclfree->m_head; \ + mclfree->m_head = _mp; \ mbstat.m_clfree++; \ - MCLWAKEUP(); \ -) + MBWAKEUP(m_clalloc_wid); \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ +} while (0) -#define _MEXTFREE(m) do { \ +#define MEXTFREE(m) do { \ struct mbuf *_mmm = (m); \ \ if (MEXT_IS_REF(_mmm)) \ @@ -461,29 +473,27 @@ _mmm->m_flags &= ~M_EXT; \ } while (0) -#define MEXTFREE(m) MBUFLOCK( \ - _MEXTFREE(m); \ -) - /* * MFREE(struct mbuf *m, struct mbuf *n) * Free a single mbuf and associated external storage. * Place the successor, if any, in n. */ -#define MFREE(m, n) MBUFLOCK( \ +#define MFREE(m, n) do { \ struct mbuf *_mm = (m); \ \ KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \ if (_mm->m_flags & M_EXT) \ - _MEXTFREE(_mm); \ + MEXTFREE(_mm); \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ mbtypes[_mm->m_type]--; \ _mm->m_type = MT_FREE; \ mbtypes[MT_FREE]++; \ (n) = _mm->m_next; \ - _mm->m_next = mmbfree; \ - mmbfree = _mm; \ - MMBWAKEUP(); \ -) + _mm->m_next = mmbfree->m_head; \ + mmbfree->m_head = _mm; \ + MBWAKEUP(m_mballoc_wid); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ +} while (0) /* * Copy mbuf pkthdr from "from" to "to". @@ -557,15 +567,15 @@ *_mmp = _mm; \ } while (0) -/* change mbuf to new type */ +/* + * change mbuf to new type + */ #define MCHTYPE(m, t) do { \ struct mbuf *_mm = (m); \ int _mt = (t); \ - int _ms = splimp(); \ \ - mbtypes[_mm->m_type]--; \ - mbtypes[_mt]++; \ - splx(_ms); \ + atomic_subtract_long(mbtypes[_mm->m_type], 1); \ + atomic_add_long(mbtypes[_mt], 1); \ _mm->m_type = (_mt); \ } while (0) @@ -584,8 +594,8 @@ }; #ifdef _KERNEL -extern u_int m_clalloc_wid; /* mbuf cluster wait count */ -extern u_int m_mballoc_wid; /* mbuf wait count */ +extern u_long m_clalloc_wid; /* mbuf cluster wait count */ +extern u_long m_mballoc_wid; /* mbuf wait count */ extern int max_linkhdr; /* largest link-level header */ extern int max_protohdr; /* largest protocol header */ extern int max_hdr; /* largest link+protocol header */ @@ -594,15 +604,16 @@ extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */ extern int mbuf_wait; /* mbuf sleep time */ extern struct mbuf *mbutl; /* virtual address of mclusters */ -extern union mcluster *mclfree; -extern struct mbuf *mmbfree; -extern union mext_refcnt *mext_refcnt_free; +extern struct mclfree_lst mclfree_lst_hdr, *mclfree; +extern struct mbffree_lst mbffree_lst_hdr, *mmbfree; +extern struct mcntfree_lst mcntfree_lst_hdr, *mcntfree; extern int nmbclusters; extern int nmbufs; extern int nsfbufs; void m_adj __P((struct mbuf *, int)); -int m_alloc_ref __P((u_int)); +int m_alloc_ref __P((u_int, int)); +void mbinit __P((void)); void m_cat __P((struct mbuf *,struct mbuf *)); int m_clalloc __P((int, int)); caddr_t m_clalloc_wait __P((void)); @@ -619,13 +630,12 @@ struct mbuf *m_getclr __P((int, int)); struct mbuf *m_gethdr __P((int, int)); int m_mballoc __P((int, int)); -struct mbuf *m_mballoc_wait __P((int, int)); +struct mbuf *m_mballoc_wait __P((int)); +struct mbuf *m_mballoc_wait_hdr __P((int)); struct mbuf *m_prepend __P((struct mbuf *,int,int)); struct mbuf *m_pulldown __P((struct mbuf *, int, int, int *)); void m_print __P((const struct mbuf *m)); struct mbuf *m_pullup __P((struct mbuf *, int)); -struct mbuf *m_retry __P((int, int)); -struct mbuf *m_retryhdr __P((int, int)); struct mbuf *m_split __P((struct mbuf *,int,int)); struct mbuf *m_aux_add __P((struct mbuf *, int, int)); struct mbuf *m_aux_find __P((struct mbuf *, int, int));