diff -ruN /usr/src.org/sys/alpha/alpha/machdep.c /usr/src/sys/alpha/alpha/machdep.c --- /usr/src.org/sys/alpha/alpha/machdep.c Mon Sep 11 20:34:19 2000 +++ /usr/src/sys/alpha/alpha/machdep.c Mon Sep 11 21:59:04 2000 @@ -185,8 +185,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - struct msgbuf *msgbufp=0; int bootverbose = 0, Maxmem = 0; @@ -373,18 +371,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good as + * we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/i386/i386/machdep.c /usr/src/sys/i386/i386/machdep.c --- /usr/src.org/sys/i386/i386/machdep.c Mon Sep 11 20:35:23 2000 +++ /usr/src/sys/i386/i386/machdep.c Mon Sep 11 21:58:02 2000 @@ -139,8 +139,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - int _udatasel, _ucodesel; u_int atdevbase; @@ -399,18 +397,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good + * as we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/kern/uipc_mbuf.c /usr/src/sys/kern/uipc_mbuf.c --- /usr/src.org/sys/kern/uipc_mbuf.c Mon Sep 11 20:35:26 2000 +++ /usr/src/sys/kern/uipc_mbuf.c Wed Sep 13 21:56:16 2000 @@ -48,28 +48,40 @@ #include #include +#include +#include +#include +#include +#include + #ifdef INVARIANTS #include #endif -static void mbinit __P((void *)); -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) - struct mbuf *mbutl; struct mbstat mbstat; u_long mbtypes[MT_NTYPES]; -struct mbuf *mmbfree; -union mcluster *mclfree; -union mext_refcnt *mext_refcnt_free; int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; int nmbclusters; int nmbufs; -u_int m_mballoc_wid = 0; -u_int m_clalloc_wid = 0; +u_long m_mballoc_wid = 0; +u_long m_clalloc_wid = 0; +u_long m_refalloc_wid = 0; +/* + * freelist header structures... + * mbffree_lst, mclfree_lst, mcntfree_lst + */ +struct mbffree_lst mbffree_lst_hdr, *mmbfree; +struct mclfree_lst mclfree_lst_hdr, *mclfree; +struct mcntfree_lst mcntfree_lst_hdr, *mcntfree; + +/* + * sysctl(8) exported objects + */ SYSCTL_DECL(_kern_ipc); SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, &max_linkhdr, 0, ""); @@ -95,41 +107,90 @@ static void m_reclaim __P((void)); +/* Initial allocation numbers */ #define NCL_INIT 2 #define NMB_INIT 16 #define REF_INIT (NMBCLUSTERS * 2) -/* ARGSUSED*/ -static void -mbinit(dummy) - void *dummy; +/* + * Full mbuf subsystem initialization done here. + * + * XXX: If ever we have system specific map setups to do, then move them to + * machdep.c - for now, there is no reason for this stuff to go there. + * We just call this explicitly, as most of the stuff that needs to get + * done here should be done early on (i.e. from cpu_startup) anyway. + */ +void +mbinit(void) { - int s; + vm_offset_t maxaddr, mb_map_size; - mmbfree = NULL; - mclfree = NULL; - mext_refcnt_free = NULL; + /* + * Setup the mb_map, allocate requested VM space. + */ + mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + (nmbclusters + + nmbufs / 4) * sizeof(union mext_refcnt); + mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); + mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, + mb_map_size); + /* XXX: mb_map->system_map = 1; */ + /* + * Initialize the free list headers, and setup locks for lists. + */ + mmbfree = (struct mbffree_lst *)&mbffree_lst_hdr; + mclfree = (struct mclfree_lst *)&mclfree_lst_hdr; + mcntfree = (struct mcntfree_lst *)&mcntfree_lst_hdr; + mmbfree->m_head = NULL; + mclfree->m_head = NULL; + mcntfree->m_head = NULL; + mtx_init(&mmbfree->m_mtx, "mbuf free list lock", MTX_DEF); + mtx_init(&mclfree->m_mtx, "mcluster free list lock", MTX_DEF); + mtx_init(&mcntfree->m_mtx, "m_ext counter free list lock", MTX_DEF); + + /* + * Initialize mbuf subsystem (sysctl exported) statistics structure. + */ mbstat.m_msize = MSIZE; mbstat.m_mclbytes = MCLBYTES; mbstat.m_minclsize = MINCLSIZE; mbstat.m_mlen = MLEN; mbstat.m_mhlen = MHLEN; - s = splimp(); - if (m_alloc_ref(REF_INIT) == 0) + /* + * Perform some initial allocations. + * If allocations are succesful, locks are obtained in the allocation + * routines, so we must release them if it works. + * + * XXX: We try to allocate as many reference counters as we'll + * most need throughout the system's lifespan. + * XXXXXX: Make sure we check whether the MCLBYTES > PAGE_SIZE + * is still useful before bringing this in. + */ + if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) goto bad; + else + mtx_exit(&mcntfree->m_mtx, MTX_DEF); + if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) goto bad; + else + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + #if MCLBYTES <= PAGE_SIZE if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) goto bad; + else + mtx_exit(&mclfree->m_mtx, MTX_DEF); #else + /* XXXXXX */ /* It's OK to call contigmalloc in this context. */ if (m_clalloc(16, M_WAIT) == 0) goto bad; + else + mtx_exit(&mclfree->m_mtx, MTX_DEF); #endif - splx(s); + return; bad: panic("mbinit: failed to initialize mbuf subsystem!"); @@ -138,11 +199,11 @@ /* * Allocate at least nmb reference count structs and place them * on the ref cnt free list. - * Must be called at splimp. */ int -m_alloc_ref(nmb) +m_alloc_ref(nmb, how) u_int nmb; + int how; { caddr_t p; u_int nbytes; @@ -157,18 +218,29 @@ * and if we are, we're probably not out of the woods anyway, * so leave this way for now. */ - if (mb_map_full) return (0); nbytes = round_page(nmb * sizeof(union mext_refcnt)); - if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT)) == NULL) + mtx_enter(&Giant, MTX_DEF); + if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_WAIT ? M_WAIT : + M_NOWAIT)) == NULL) { + mtx_exit(&Giant, MTX_DEF); return (0); + } + mtx_exit(&Giant, MTX_DEF); nmb = nbytes / sizeof(union mext_refcnt); + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); for (i = 0; i < nmb; i++) { - ((union mext_refcnt *)p)->next_ref = mext_refcnt_free; - mext_refcnt_free = (union mext_refcnt *)p; + ((union mext_refcnt *)p)->next_ref = mcntfree->m_head; + mcntfree->m_head = (union mext_refcnt *)p; p += sizeof(union mext_refcnt); mbstat.m_refree++; } @@ -178,10 +250,50 @@ } /* + * External reference counter allocator's wait routine... + * (analoguous to m_mballoc_wait and m_clalloc_wait). + */ +union mext_refcnt * +m_cntalloc_wait(void) +{ + union mext_refcnt *p; + + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup in between, that the await() does + * effectively nothing. Otherwise, what could happen is that we + * increment m_refalloc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_refalloc_wid of 0)! + */ + asleep(&m_refalloc_wid, PVM, "refalc", mbuf_wait); + atomic_add_long(&m_refalloc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_refalloc_wid, 1); + + /* + * Now that we (think) that we've got something, try again. + */ + p = NULL; + _MEXT_ALLOC_CNT(p, M_DONTWAIT); + + /* If we waited and got something ... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mcntfree->m_mtx, MTX_DEF); + if (mcntfree->m_head != NULL) + MBWAKEUP(m_refalloc_wid); + mtx_exit(&mcntfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); + + return (p); +} + +/* * Allocate at least nmb mbufs and place on mbuf free list. - * Must be called at splimp. */ -/* ARGSUSED */ int m_mballoc(nmb, how) register int nmb; @@ -192,44 +304,43 @@ int nbytes; /* - * If we've hit the mbuf limit, stop allocating from mb_map, - * (or trying to) in order to avoid dipping into the section of - * mb_map which we've "reserved" for clusters. - */ - if ((nmb + mbstat.m_mbufs) > nmbufs) - return (0); - - /* - * Once we run out of map space, it will be impossible to get - * any more (nothing is ever freed back to the map) - * -- however you are not dead as m_reclaim might - * still be able to free a substantial amount of space. - * - * XXX Furthermore, we can also work with "recycled" mbufs (when - * we're calling with M_WAIT the sleep procedure will be woken - * up when an mbuf is freed. See m_mballoc_wait()). + * If we've hit the mbuf limit, stop allocating from mb_map. + * Also, once we run out of map space, it will be impossible to + * get any more (nothing is ever freed back to the map). */ - if (mb_map_full) + if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) { + atomic_add_long(&mbstat.m_drops, 1); return (0); + } nbytes = round_page(nmb * MSIZE); + mtx_enter(&Giant, MTX_DEF); p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT); if (p == 0 && how == M_WAIT) { - mbstat.m_wait++; + atomic_add_long(&mbstat.m_wait, 1); p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK); } + mtx_exit(&Giant, MTX_DEF); /* - * Either the map is now full, or `how' is M_NOWAIT and there + * Either the map is now full, or `how' is M_DONTWAIT and there * are no pages left. */ if (p == NULL) return (0); nmb = nbytes / MSIZE; + + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); for (i = 0; i < nmb; i++) { - ((struct mbuf *)p)->m_next = mmbfree; - mmbfree = (struct mbuf *)p; + ((struct mbuf *)p)->m_next = mmbfree->m_head; + mmbfree->m_head = (struct mbuf *)p; p += MSIZE; } mbstat.m_mbufs += nmb; @@ -244,47 +355,120 @@ * designated (mbuf_wait) time. */ struct mbuf * -m_mballoc_wait(int caller, int type) +m_mballoc_wait(int type) { - struct mbuf *p; - int s; + struct mbuf *p = NULL; - s = splimp(); - m_mballoc_wid++; - if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK) - m_mballoc_wid--; - splx(s); - - /* - * Now that we (think) that we've got something, we will redo an - * MGET, but avoid getting into another instance of m_mballoc_wait() - * XXX: We retry to fetch _even_ if the sleep timed out. This is left - * this way, purposely, in the [unlikely] case that an mbuf was - * freed but the sleep was not awakened in time. + /* + * See if we can drain some resources out of the protocols. */ - p = NULL; - switch (caller) { - case MGET_C: + m_reclaim(); + + /* + * Try again. Even though we may have already lost from a race, + * it's not so much of a big deal - at least somebody profited + * from our drain. + */ + MGET(p, M_DONTWAIT, type); + + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_mballoc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_mballoc_wid of 0)! + */ + if (p == NULL) { + asleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait); + atomic_add_long(&m_mballoc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_mballoc_wid, 1); + + /* + * Try again (one last time). + * + * XXX: We retry to fetch _even_ if the sleep timed out. This + * is left this way, purposely, in the [unlikely] case + * that an mbuf was freed but the sleep was not awoken + * in time; then we are willing to race for it. If the + * sleep didn't time out (i.e. we got woken up) then + * we race for our mbuf with the other processors. + */ MGET(p, M_DONTWAIT, type); - break; - case MGETHDR_C: - MGETHDR(p, M_DONTWAIT, type); - break; - default: - panic("m_mballoc_wait: invalid caller (%d)", caller); } - s = splimp(); - if (p != NULL) { /* We waited and got something... */ - mbstat.m_wait++; - /* Wake up another if we have more free. */ - if (mmbfree != NULL) - MMBWAKEUP(); + /* If we waited and got something... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mmbfree->m_mtx, MTX_DEF); + if (mmbfree->m_head != NULL) + MBWAKEUP(m_mballoc_wid); + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); + + return (p); +} + +/* + * Same as above, except expanded out (in our tradition) for MGETHDR. + */ +struct mbuf * +m_mballoc_wait_hdr(int type) +{ + struct mbuf *p = NULL; + + /* + * Try again. Even though we may have already lost from a race, + * it's not so much of a big deal - at least somebody profited + * from our drain. + */ + MGETHDR(p, M_DONTWAIT, type); + + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_mballoc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_mballoc_wid of 0)! + */ + if (p == NULL) { + asleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait); + atomic_add_long(&m_mballoc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_mballoc_wid, 1); + + /* + * Try again (one last time). + * + * XXX: We retry to fetch _even_ if the sleep timed out. This + * is left this way, purposely, in the [unlikely] case + * that an mbuf was freed but the sleep was not awoken + * in time; then we are willing to race for it. If the + * sleep didn't time out (i.e. we got woken up) then + * we race for our mbuf with the other processors. + */ + MGETHDR(p, M_DONTWAIT, type); } - splx(s); + + /* If we waited and got something... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mmbfree->m_mtx, MTX_DEF); + if (mmbfree->m_head != NULL) + MBWAKEUP(m_mballoc_wid); + mtx_exit(&mmbfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); + return (p); } +/* XXXXXX */ #if MCLBYTES > PAGE_SIZE static int i_want_my_mcl; @@ -312,13 +496,12 @@ SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start, &mclalloc_kp); #endif +/* XXXXXX */ /* * Allocate some number of mbuf clusters * and place on cluster free list. - * Must be called at splimp. */ -/* ARGSUSED */ int m_clalloc(ncl, how) register int ncl; @@ -329,54 +512,53 @@ int npg; /* + * If the map is now full (nothing will ever be freed to it). * If we've hit the mcluster number limit, stop allocating from - * mb_map, (or trying to) in order to avoid dipping into the section - * of mb_map which we've "reserved" for mbufs. - */ - if ((ncl + mbstat.m_clusters) > nmbclusters) { - mbstat.m_drops++; - return (0); - } - - /* - * Once we run out of map space, it will be impossible - * to get any more (nothing is ever freed back to the - * map). From this point on, we solely rely on freed - * mclusters. + * mb_map. */ - if (mb_map_full) { - mbstat.m_drops++; + if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) { + atomic_add_long(&mbstat.m_drops, 1); return (0); } +/* XXXXXX */ #if MCLBYTES > PAGE_SIZE if (how != M_WAIT) { i_want_my_mcl += ncl; wakeup(&i_want_my_mcl); - mbstat.m_wait++; + atomic_add_long(&mbstat.m_wait, 1); p = 0; } else { p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul, ~0ul, PAGE_SIZE, 0, mb_map); } -#else +#else /* XXXXXX */ npg = ncl; + mtx_enter(&Giant, MTX_DEF); p = (caddr_t)kmem_malloc(mb_map, ctob(npg), how != M_WAIT ? M_NOWAIT : M_WAITOK); + mtx_exit(&Giant, MTX_DEF); ncl = ncl * PAGE_SIZE / MCLBYTES; #endif /* - * Either the map is now full, or `how' is M_NOWAIT and there + * Either the map is now full, or `how' is M_DONTWAIT and there * are no pages left. */ if (p == NULL) { - mbstat.m_drops++; + atomic_add_long(&mbstat.m_drops, 1); return (0); } + /* + * We don't let go of the mutex in order to avoid a race. + * It is up to the caller to let go of the mutex if the call + * was successful or just do nothing if it failed, because in that + * case, we wouldn't have grabbed the mutex at all. + */ + mtx_enter(&mclfree->m_mtx, MTX_DEF); for (i = 0; i < ncl; i++) { - ((union mcluster *)p)->mcl_next = mclfree; - mclfree = (union mcluster *)p; + ((union mcluster *)p)->mcl_next = mclfree->m_head; + mclfree->m_head = (union mcluster *)p; p += MCLBYTES; mbstat.m_clfree++; } @@ -389,129 +571,63 @@ * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will * sleep for a designated amount of time (mbuf_wait) or until we're woken up * due to sudden mcluster availability. + * + * Must be called with no held mutexes... may block. */ caddr_t m_clalloc_wait(void) { caddr_t p; - int s; - -#ifdef __i386__ - /* If in interrupt context, and INVARIANTS, maintain sanity and die. */ - KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - /* Sleep until something's available or until we expire. */ - m_clalloc_wid++; - if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK) - m_clalloc_wid--; + /* + * This avoids a potential race. What we do is first place ourselves + * in the queue (with asleep) and then later do an actual await() so + * that if we happen to get a wakeup() in between, that the await() + * does effectively nothing. Otherwise, what could happen is that + * we increment m_clalloc_wid, at which point it will be decremented + * by a (racing) MBWAKEUP(), yet we will sleep on it nonetheless and + * risk never being woken up (i.e. sleep on a m_clalloc_wid of 0)! + */ + asleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait); + atomic_add_long(&m_clalloc_wid, 1); + if (await(PVM, mbuf_wait) == EWOULDBLOCK) + atomic_subtract_long(&m_clalloc_wid, 1); /* - * Now that we (think) that we've got something, we will redo and - * MGET, but avoid getting into another instance of m_clalloc_wait() + * Now that we (think) that we've got something, try again. */ p = NULL; _MCLALLOC(p, M_DONTWAIT); - s = splimp(); - if (p != NULL) { /* We waited and got something... */ - mbstat.m_wait++; - /* Wake up another if we have more free. */ - if (mclfree != NULL) - MCLWAKEUP(); - } + /* If we waited and got something ... */ + if (p != NULL) { + atomic_add_long(&mbstat.m_wait, 1); + mtx_enter(&mclfree->m_mtx, MTX_DEF); + if (mclfree->m_head != NULL) + MBWAKEUP(m_clalloc_wid); + mtx_exit(&mclfree->m_mtx, MTX_DEF); + } else + atomic_add_long(&mbstat.m_drops, 1); - splx(s); return (p); } -/* - * When MGET fails, ask protocols to free space when short of memory, - * then re-attempt to allocate an mbuf. - */ -struct mbuf * -m_retry(i, t) - int i, t; -{ - register struct mbuf *m; - - /* - * Must only do the reclaim if not in an interrupt context. - */ - if (i == M_WAIT) { -#ifdef __i386__ - KASSERT(intr_nesting_level == 0, - ("MBALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - m_reclaim(); - } - - /* - * Both m_mballoc_wait and m_retry must be nulled because - * when the MGET macro is run from here, we deffinately do _not_ - * want to enter an instance of m_mballoc_wait() or m_retry() (again!) - */ -#define m_mballoc_wait(caller,type) (struct mbuf *)0 -#define m_retry(i, t) (struct mbuf *)0 - MGET(m, i, t); -#undef m_retry -#undef m_mballoc_wait - - if (m != NULL) - mbstat.m_wait++; - else - mbstat.m_drops++; - - return (m); -} - -/* - * As above; retry an MGETHDR. - */ -struct mbuf * -m_retryhdr(i, t) - int i, t; -{ - register struct mbuf *m; - - /* - * Must only do the reclaim if not in an interrupt context. - */ - if (i == M_WAIT) { -#ifdef __i386__ - KASSERT(intr_nesting_level == 0, - ("MBALLOC: CANNOT WAIT IN INTERRUPT")); -#endif - m_reclaim(); - } - -#define m_mballoc_wait(caller,type) (struct mbuf *)0 -#define m_retryhdr(i, t) (struct mbuf *)0 - MGETHDR(m, i, t); -#undef m_retryhdr -#undef m_mballoc_wait - - if (m != NULL) - mbstat.m_wait++; - else - mbstat.m_drops++; - - return (m); -} - static void m_reclaim() { register struct domain *dp; register struct protosw *pr; - int s = splimp(); + /* + * !!!XXX!!!: Decide what to do: either grab mmbfree's lock and + * recurse into it from within or let things run their way and + * allow people to grab our freed stuff as we go along. + */ for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain) (*pr->pr_drain)(); - splx(s); - mbstat.m_drain++; + atomic_add_long(&mbstat.m_drain, 1); } /* @@ -685,11 +801,11 @@ np = &n->m_next; } if (top == 0) - MCFail++; + atomic_add_long(&MCFail, 1); return (top); nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return (0); } @@ -746,7 +862,7 @@ return top; nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return 0; } @@ -853,7 +969,7 @@ nospace: m_freem(top); - MCFail++; + atomic_add_long(&MCFail, 1); return (0); } @@ -1022,7 +1138,7 @@ return (m); bad: m_freem(n); - MPFail++; + atomic_add_long(&MPFail, 1); return (0); } diff -ruN /usr/src.org/sys/pc98/i386/machdep.c /usr/src/sys/pc98/i386/machdep.c --- /usr/src.org/sys/pc98/i386/machdep.c Mon Sep 11 20:35:46 2000 +++ /usr/src/sys/pc98/i386/machdep.c Mon Sep 11 21:57:40 2000 @@ -145,8 +145,6 @@ static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); - #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ @@ -413,18 +411,12 @@ (16*(ARG_MAX+(PAGE_SIZE*3)))); /* - * Finally, allocate mbuf pool. + * Initialize mbuf system. + * Doing this early on (as opposed to through SYSINIT) is good + * as we want to make sure that the mutex locks are setup prior to + * network device drivers doing their stuff. */ - { - vm_offset_t mb_map_size; - - mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + - (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt); - mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); - mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, - &maxaddr, mb_map_size); - mb_map->system_map = 1; - } + mbinit(); /* * Initialize callouts diff -ruN /usr/src.org/sys/sys/mbuf.h /usr/src/sys/sys/mbuf.h --- /usr/src.org/sys/sys/mbuf.h Mon Sep 11 20:35:50 2000 +++ /usr/src/sys/sys/mbuf.h Wed Sep 13 21:52:41 2000 @@ -37,6 +37,11 @@ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ +#ifdef _KERNEL +#include +#include +#endif + /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size @@ -181,11 +186,11 @@ * mbuf statistics */ struct mbstat { - u_long m_mbufs; /* mbufs obtained from page pool */ - u_long m_clusters; /* clusters obtained from page pool */ - u_long m_clfree; /* free clusters */ - u_long m_refcnt; /* refcnt structs obtained from page pool */ - u_long m_refree; /* free refcnt structs */ + u_long m_mbufs; /* # mbufs obtained from page pool */ + u_long m_clusters; /* # clusters obtained from page pool */ + u_long m_clfree; /* # clusters on freelist (cache) */ + u_long m_refcnt; /* # ref counters obtained from page pool */ + u_long m_refree; /* # ref counters on freelist (cache) */ u_long m_spare; /* spare field */ u_long m_drops; /* times failed to find space */ u_long m_wait; /* times waited for space */ @@ -203,8 +208,7 @@ #define M_DONTWAIT 1 #define M_WAIT 0 -/* Freelists: - * +/* * Normal mbuf clusters are normally treated as character arrays * after allocation, but use the first word of the buffer as a free list * pointer while on the free list. @@ -214,15 +218,6 @@ char mcl_buf[MCLBYTES]; }; - -/* - * These are identifying numbers passed to the m_mballoc_wait function, - * allowing us to determine whether the call came from an MGETHDR or - * an MGET. - */ -#define MGETHDR_C 1 -#define MGET_C 2 - /* * The m_ext object reference counter structure. */ @@ -232,41 +227,32 @@ }; /* - * Wake up the next instance (if any) of m_mballoc_wait() which is - * waiting for an mbuf to be freed. This should be called at splimp(). - * - * XXX: If there is another free mbuf, this routine will be called [again] - * from the m_mballoc_wait routine in order to wake another sleep instance. + * free list header definitions: mbffree_lst, mclfree_lst, mcntfree_lst */ -#define MMBWAKEUP() do { \ - if (m_mballoc_wid) { \ - m_mballoc_wid--; \ - wakeup_one(&m_mballoc_wid); \ - } \ -} while (0) +struct mbffree_lst { + struct mbuf *m_head; + mtx_t m_mtx; +}; -/* - * Same as above, but for mbuf cluster(s). - */ -#define MCLWAKEUP() do { \ - if (m_clalloc_wid) { \ - m_clalloc_wid--; \ - wakeup_one(&m_clalloc_wid); \ - } \ -} while (0) +struct mclfree_lst { + union mcluster *m_head; + mtx_t m_mtx; +}; + +struct mcntfree_lst { + union mext_refcnt *m_head; + mtx_t m_mtx; +}; /* - * mbuf utility macros: - * - * MBUFLOCK(code) - * prevents a section of code from from being interrupted by network - * drivers. + * Wake up the next instance (if any) of a sleeping allocation - which is + * waiting for a {cluster, mbuf, counter} to be freed. */ -#define MBUFLOCK(code) do { \ - int _ms = splimp(); \ - \ - { code } \ - splx(_ms); \ +#define MBWAKEUP(m_wid) do { \ + if ((m_wid)) { \ + atomic_subtract_long(&(m_wid), 1); \ + wakeup_one(&(m_wid)); \ + } \ } while (0) /* @@ -286,30 +272,44 @@ #define MEXT_ADD_REF(m) atomic_add_long(&((m)->m_ext.ref_cnt->refcnt), 1) -#define _MEXT_ALLOC_CNT(m_cnt) MBUFLOCK( \ +#define _MEXT_ALLOC_CNT(m_cnt, how) do { \ union mext_refcnt *__mcnt; \ \ - if ((mext_refcnt_free == NULL) && (m_alloc_ref(1) == 0)) \ - panic("mbuf subsystem: out of ref counts!"); \ - __mcnt = mext_refcnt_free; \ - mext_refcnt_free = __mcnt->next_ref; \ - __mcnt->refcnt = 0; \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + if (mcntfree->m_head == NULL) { \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ + if (m_alloc_ref(1, (how)) == 0) \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + } \ + __mcnt = mcntfree->m_head; \ + if (__mcnt != NULL) { \ + mcntfree->m_head = __mcnt->next_ref; \ + mbstat.m_refree--; \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ + __mcnt->refcnt = 0; \ + } else { \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ + if ((how) == M_WAIT) \ + __mcnt = m_cntalloc_wait(); \ + } \ (m_cnt) = __mcnt; \ - mbstat.m_refree--; \ -) +} while (0) #define _MEXT_DEALLOC_CNT(m_cnt) do { \ union mext_refcnt *__mcnt = (m_cnt); \ \ - __mcnt->next_ref = mext_refcnt_free; \ - mext_refcnt_free = __mcnt; \ + mtx_enter(&mcntfree->m_mtx, MTX_DEF); \ + __mcnt->next_ref = mcntfree->m_head; \ + mcntfree->m_head = __mcnt; \ mbstat.m_refree++; \ + MBWAKEUP(m_refalloc_wid); \ + mtx_exit(&mcntfree->m_mtx, MTX_DEF); \ } while (0) -#define MEXT_INIT_REF(m) do { \ +#define MEXT_INIT_REF(m, how) do { \ struct mbuf *__mmm = (m); \ \ - _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt); \ + _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \ MEXT_ADD_REF(__mmm); \ } while (0) @@ -327,26 +327,28 @@ struct mbuf *_mm; \ int _mhow = (how); \ int _mtype = (type); \ - int _ms = splimp(); \ \ - if (mmbfree == NULL) \ - (void)m_mballoc(1, _mhow); \ - _mm = mmbfree; \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + if (mmbfree->m_head == NULL) { \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (m_mballoc(1, _mhow) == 0) \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + } \ + _mm = mmbfree->m_head; \ if (_mm != NULL) { \ - mmbfree = _mm->m_next; \ + mmbfree->m_head = _mm->m_next; \ mbtypes[MT_FREE]--; \ mbtypes[_mtype]++; \ - splx(_ms); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ _mm->m_type = _mtype; \ _mm->m_next = NULL; \ _mm->m_nextpkt = NULL; \ _mm->m_data = _mm->m_dat; \ _mm->m_flags = 0; \ } else { \ - splx(_ms); \ - _mm = m_retry(_mhow, _mtype); \ - if (_mm == NULL && _mhow == M_WAIT) \ - _mm = m_mballoc_wait(MGET_C, _mtype); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (_mhow == M_WAIT) \ + _mm = m_mballoc_wait(_mtype); \ } \ (m) = _mm; \ } while (0) @@ -355,16 +357,19 @@ struct mbuf *_mm; \ int _mhow = (how); \ int _mtype = (type); \ - int _ms = splimp(); \ \ - if (mmbfree == NULL) \ - (void)m_mballoc(1, _mhow); \ - _mm = mmbfree; \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + if (mmbfree->m_head == NULL) { \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (m_mballoc(1, _mhow) == 0) \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ + } \ + _mm = mmbfree->m_head; \ if (_mm != NULL) { \ - mmbfree = _mm->m_next; \ + mmbfree->m_head = _mm->m_next; \ mbtypes[MT_FREE]--; \ mbtypes[_mtype]++; \ - splx(_ms); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ _mm->m_type = _mtype; \ _mm->m_next = NULL; \ _mm->m_nextpkt = NULL; \ @@ -374,10 +379,9 @@ _mm->m_pkthdr.csum_flags = 0; \ _mm->m_pkthdr.aux = NULL; \ } else { \ - splx(_ms); \ - _mm = m_retryhdr(_mhow, _mtype); \ - if (_mm == NULL && _mhow == M_WAIT) \ - _mm = m_mballoc_wait(MGETHDR_C, _mtype); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ + if (_mhow == M_WAIT) \ + _mm = m_mballoc_wait_hdr(_mtype); \ } \ (m) = _mm; \ } while (0) @@ -393,17 +397,20 @@ #define _MCLALLOC(p, how) do { \ caddr_t _mp; \ int _mhow = (how); \ - int _ms = splimp(); \ \ - if (mclfree == NULL) \ - (void)m_clalloc(1, _mhow); \ - _mp = (caddr_t)mclfree; \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + if (mclfree->m_head == NULL) { \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ + if (m_clalloc(1, _mhow) == 0) \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + } \ + _mp = (caddr_t)mclfree->m_head; \ if (_mp != NULL) { \ mbstat.m_clfree--; \ - mclfree = ((union mcluster *)_mp)->mcl_next; \ - splx(_ms); \ + mclfree->m_head = ((union mcluster *)_mp)->mcl_next; \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ } else { \ - splx(_ms); \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ if (_mhow == M_WAIT) \ _mp = m_clalloc_wait(); \ } \ @@ -420,7 +427,7 @@ _mm->m_ext.ext_free = NULL; \ _mm->m_ext.ext_args = NULL; \ _mm->m_ext.ext_size = MCLBYTES; \ - MEXT_INIT_REF(_mm); \ + MEXT_INIT_REF(_mm, (how)); \ } \ } while (0) @@ -433,19 +440,21 @@ _mm->m_ext.ext_size = (size); \ _mm->m_ext.ext_free = (free); \ _mm->m_ext.ext_args = (args); \ - MEXT_INIT_REF(_mm); \ + MEXT_INIT_REF(_mm, M_DONTWAIT); \ } while (0) -#define _MCLFREE(p) MBUFLOCK( \ +#define _MCLFREE(p) do { \ union mcluster *_mp = (union mcluster *)(p); \ \ - _mp->mcl_next = mclfree; \ - mclfree = _mp; \ + mtx_enter(&mclfree->m_mtx, MTX_DEF); \ + _mp->mcl_next = mclfree->m_head; \ + mclfree->m_head = _mp; \ mbstat.m_clfree++; \ - MCLWAKEUP(); \ -) + MBWAKEUP(m_clalloc_wid); \ + mtx_exit(&mclfree->m_mtx, MTX_DEF); \ +} while (0) -#define _MEXTFREE(m) do { \ +#define MEXTFREE(m) do { \ struct mbuf *_mmm = (m); \ \ if (MEXT_IS_REF(_mmm)) \ @@ -461,29 +470,27 @@ _mmm->m_flags &= ~M_EXT; \ } while (0) -#define MEXTFREE(m) MBUFLOCK( \ - _MEXTFREE(m); \ -) - /* * MFREE(struct mbuf *m, struct mbuf *n) * Free a single mbuf and associated external storage. * Place the successor, if any, in n. */ -#define MFREE(m, n) MBUFLOCK( \ +#define MFREE(m, n) do { \ struct mbuf *_mm = (m); \ \ KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \ if (_mm->m_flags & M_EXT) \ - _MEXTFREE(_mm); \ + MEXTFREE(_mm); \ + mtx_enter(&mmbfree->m_mtx, MTX_DEF); \ mbtypes[_mm->m_type]--; \ _mm->m_type = MT_FREE; \ mbtypes[MT_FREE]++; \ (n) = _mm->m_next; \ - _mm->m_next = mmbfree; \ - mmbfree = _mm; \ - MMBWAKEUP(); \ -) + _mm->m_next = mmbfree->m_head; \ + mmbfree->m_head = _mm; \ + MBWAKEUP(m_mballoc_wid); \ + mtx_exit(&mmbfree->m_mtx, MTX_DEF); \ +} while (0) /* * Copy mbuf pkthdr from "from" to "to". @@ -557,15 +564,15 @@ *_mmp = _mm; \ } while (0) -/* change mbuf to new type */ +/* + * change mbuf to new type + */ #define MCHTYPE(m, t) do { \ struct mbuf *_mm = (m); \ int _mt = (t); \ - int _ms = splimp(); \ \ - mbtypes[_mm->m_type]--; \ - mbtypes[_mt]++; \ - splx(_ms); \ + atomic_subtract_long(mbtypes[_mm->m_type], 1); \ + atomic_add_long(mbtypes[_mt], 1); \ _mm->m_type = (_mt); \ } while (0) @@ -584,8 +591,9 @@ }; #ifdef _KERNEL -extern u_int m_clalloc_wid; /* mbuf cluster wait count */ -extern u_int m_mballoc_wid; /* mbuf wait count */ +extern u_long m_clalloc_wid; /* mbuf cluster wait count */ +extern u_long m_mballoc_wid; /* mbuf wait count */ +extern u_long m_refalloc_wid; /* ext ref counter wait count */ extern int max_linkhdr; /* largest link-level header */ extern int max_protohdr; /* largest protocol header */ extern int max_hdr; /* largest link+protocol header */ @@ -594,18 +602,20 @@ extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */ extern int mbuf_wait; /* mbuf sleep time */ extern struct mbuf *mbutl; /* virtual address of mclusters */ -extern union mcluster *mclfree; -extern struct mbuf *mmbfree; -extern union mext_refcnt *mext_refcnt_free; +extern struct mclfree_lst mclfree_lst_hdr, *mclfree; +extern struct mbffree_lst mbffree_lst_hdr, *mmbfree; +extern struct mcntfree_lst mcntfree_lst_hdr, *mcntfree; extern int nmbclusters; extern int nmbufs; extern int nsfbufs; void m_adj __P((struct mbuf *, int)); -int m_alloc_ref __P((u_int)); +int m_alloc_ref __P((u_int, int)); +void mbinit __P((void)); void m_cat __P((struct mbuf *,struct mbuf *)); int m_clalloc __P((int, int)); caddr_t m_clalloc_wait __P((void)); +union mext_refcnt *m_cntalloc_wait __P((void)); void m_copyback __P((struct mbuf *, int, int, caddr_t)); void m_copydata __P((struct mbuf *,int,int,caddr_t)); struct mbuf *m_copym __P((struct mbuf *, int, int, int)); @@ -619,13 +629,12 @@ struct mbuf *m_getclr __P((int, int)); struct mbuf *m_gethdr __P((int, int)); int m_mballoc __P((int, int)); -struct mbuf *m_mballoc_wait __P((int, int)); +struct mbuf *m_mballoc_wait __P((int)); +struct mbuf *m_mballoc_wait_hdr __P((int)); struct mbuf *m_prepend __P((struct mbuf *,int,int)); struct mbuf *m_pulldown __P((struct mbuf *, int, int, int *)); void m_print __P((const struct mbuf *m)); struct mbuf *m_pullup __P((struct mbuf *, int)); -struct mbuf *m_retry __P((int, int)); -struct mbuf *m_retryhdr __P((int, int)); struct mbuf *m_split __P((struct mbuf *,int,int)); struct mbuf *m_aux_add __P((struct mbuf *, int, int)); struct mbuf *m_aux_find __P((struct mbuf *, int, int));