Hey Alfred, This is the design I had in mind, with your suggestions, in rough code. Look at the dramatic code size difference, particularly in MFREE(). the mb_fast is just the name of the fast lists header structure, which looks like this: struct mb_lists { struct mbuf *mb_first[2]; u_long free_cnt[2]; struct mtx f_mtx; }; struct mb_lists *mb_fast;... Notice that there is only ONE mutex lock for it, because that's per-CPU and that's all we need. Can you imagine the _additional_ complexity/code-bloat if we were to implement (2)? Note that in the F2 list (mb_fast[cpuid]->mb_first[1]) all the mbufs have their m_nextpkt point to the tail (last mbuf) in the list. This was the least costly method I could think of at this time to be able to easily later transfer the F2 list to the general mmbfree one, when necessary. I'm not certain how much of a gain, in terms of cache efficiency this would be, but it certainly would be a gain for what concerns lock contention. I look forward to reading your comments/suggestions/modifications/etc. Thanks, --Bosko Milekic bmilekic@freebsd.org #define _MGET_SETUP(m_set, m_set_type) do { \ (m_set)->m_type = (m_set_type); \ (m_set)->m_next = NULL; \ (m_set)->m_nextpkt = NULL; \ (m_set)->m_data = (m_set)->m_dat; \ (m_set)->m_flags = 0; \ } while (0) #define _MGET_GEN(m_mget, m_get_how) do { \ mtx_enter(&mmbfree.m_mtx, MTX_DEF); \ if (mmbfree.m_head == NULL) \ m_mballoc(1, (m_get_how)); \ (m_mget) = mmbfree.m_head; \ if ((m_mget) != NULL) \ mmbfree.m_head = (m_mget)->m_next; \ else { \ if ((m_get_how) == M_WAIT) \ (m_mget) = m_mballoc_wait(); \ } \ mtx_exit(&mmbfree.m_mtx, MTX_DEF); \ } while (0) #define _MGET(m, how, type) do { \ struct mbuf *_mm; \ int _mhow = (how); \ int _mtype = (type); \ \ mtx_enter(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ if ((_mm = mb_fast[cpuid]->mb_first[1]) == NULL) { \ if ((_mm = mb_fast[cpuid]->mb_first[0]) == NULL) { \ mtx_exit(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ _MGET_GEN(_mm, _mhow); \ } else { \ mb_fast[cpuid]->mb_first[0] = _mm->m_next; \ mb_fast[cpuid]->free_cnt[0]--; \ mtx_exit(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ } \ } else { \ mb_fast[cpuid]->mb_first[1] = _mm->m_next; \ mb_fast[cpuid]->free_cnt[1]--; \ mtx_exit(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ } \ } while (0) #define MGET(mb_ptr, mb_how, mb_type) do { \ _MGET((mb_ptr), (mb_how), (mb_type)); \ if ((mb_ptr) != NULL) { \ atomic_subtract_long(&mbtypes[MT_FREE], 1); \ atomic_add_long(&mbtypes[(mb_type)], 1); \ _MGET_SETUP((mb_ptr), (mb_type)); \ } \ } while (0) #define _MGETHDR_SETUP(m_set, m_set_type) do { \ (m_set)->m_type = (m_set_type); \ (m_set)->m_next = NULL; \ (m_set)->m_nextpkt = NULL; \ (m_set)->m_data = (m_set)->m_pktdat; \ (m_set)->m_flags = M_PKTHDR; \ (m_set)->m_pkthdr.rcvif = NULL; \ (m_set)->m_pkthdr.csum_flags = 0; \ (m_set)->m_pkthdr.aux = NULL; \ } while (0) #define MGETHDR(mb_ptr, mb_how, mb_type) do { \ _MGET((mb_ptr), (mb_how), (mb_type)); \ if ((mb_ptr) != NULL) { \ atomic_subtract_long(&mbtypes[MT_FREE], 1); \ atomic_add_long(&mbtypes[(mb_type)], 1); \ _MGETHDR_SETUP((mb_ptr), (mb_type)); \ } } while (0) #define MFREE(m, n) do { \ struct mbuf *_mm = (m); \ \ KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \ if (_mm->m_flags & M_EXT) \ MEXTFREE(_mm); \ atomic_subtract_long(&mbtypes[_mm->m_type], 1); \ _mm->m_type = MT_FREE; \ atomic_add_long(&mbtypes[MT_FREE], 1); \ (n) = _mm->m_next; \ if (m_mballoc_wid) { \ mtx_enter(&mmbfree.m_mtx, MTX_DEF); \ _mm->m_next = mmbfree.m_head; \ mmbfree.m_head = _mm->m_next; \ m_mballoc_wid--; \ wakeup_one(&m_mballoc_wid); \ mtx_exit(&mmbfree.m_mtx, MTX_DEF); \ } else { \ mtx_enter(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ if ((mb_fast[cpuid]->free_cnt[0] + 1) > mb_lowat) { \ if ((_mm->m_next = mb_fast[cpuid]->mb_first[1]) \ != NULL) \ _mm->m_nextpkt = \ mb_fast[cpuid]->mb_first[1]-> \ m_nextpkt; \ else \ _mm->m_nextpkt = _mm; \ if (mb_fast[cpuid]->free_cnt[1] > (mb_hiwat - \ mb_lowat)) { \ mb_fast[cpuid]->free_cnt[1] = 0; \ mb_fast[cpuid]->mb_first[1] = NULL; \ mtx_exit(&mb_fast[cpuid]->f_mtx, \ MTX_DEF); \ mtx_enter(&mmbfree.m_mtx, MTX_DEF); \ _mm->m_nextpkt->m_next = \ mmbfree.m_head; \ mmbfree.m_head = _mm; \ mtx_exit(&mmbfree.m_mtx, MTX_DEF); \ } else { \ mb_fast[cpuid]->mb_first[1] = _mm; \ mb_fast[cpuid]->free_cnt[1]++; \ mtx_exit(&mb_fast[cpuid]->f_mtx, \ MTX_DEF); \ } \ } else { \ _mm->m_next = mb_fast[cpuid]->mb_first[0]; \ mb_fast[cpuid]->mb_first[0] = _mm; \ mb_fast[cpuid]->free_cnt[0]++; \ mtx_exit(&mb_fast[cpuid]->f_mtx, MTX_DEF); \ } \ } \ } while (0)