Index: kern/uipc_sockbuf.c =================================================================== --- kern/uipc_sockbuf.c (revision 185196) +++ kern/uipc_sockbuf.c (working copy) @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include /* for aio_swake proto */ #include +#include #include #include #include @@ -552,9 +553,7 @@ sbcheck(struct sockbuf *sb) n = m->m_nextpkt; for (; m; m = m->m_next) { len += m->m_len; - mbcnt += MSIZE; - if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ - mbcnt += m->m_ext.ext_size; + mbcnt += m->m_size + sizeof(struct mbuf); } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { @@ -750,6 +749,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, str int eor = 0; struct mbuf *o; + CTR3(KTR_NET, "sb %p m %p n %p", sb, m, n); SOCKBUF_LOCK_ASSERT(sb); while (m) { @@ -764,11 +764,12 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, str continue; } if (n && (n->m_flags & M_EOR) == 0 && - M_WRITABLE(n) && + (n->m_flags & M_RDONLY) == 0 && ((sb->sb_flags & SB_NOCOALESCE) == 0) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { + CTR1(KTR_NET, "copy %d bytes\n", m->m_len); bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; Index: kern/kern_mbuf.c =================================================================== --- kern/kern_mbuf.c (revision 185196) +++ kern/kern_mbuf.c (working copy) @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -206,31 +207,28 @@ SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG /* * Zones from which we allocate. */ -uma_zone_t zone_mbuf; uma_zone_t zone_clust; -uma_zone_t zone_pack; +uma_zone_t zone_ext; +uma_zone_t zone_iclust; uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; -uma_zone_t zone_ext_refcnt; +uma_zone_t zone_mbuf; +uma_zone_t zone_pack; /* * Local prototypes. */ -static int mb_ctor_mbuf(void *, int, void *, int); -static int mb_ctor_clust(void *, int, void *, int); -static int mb_ctor_pack(void *, int, void *, int); -static void mb_dtor_mbuf(void *, int, void *); -static void mb_dtor_clust(void *, int, void *); -static void mb_dtor_pack(void *, int, void *); -static int mb_zinit_pack(void *, int, int); -static void mb_zfini_pack(void *, int); - static void mb_reclaim(void *); static void mbuf_init(void *); +static void mb_dtor_pack(void *mem, int size, void *arg); +static int mb_zinit_pack(void *mem, int size, int how); +static void mb_zfini_pack(void *mem, int size); + + static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); static void mbuf_jumbo_free(void *, int, u_int8_t); - +static void *mb_alloc_iclust(uma_zone_t, int, u_int8_t *, int); static MALLOC_DEFINE(M_JUMBOFRAME, "jumboframes", "mbuf jumbo frame buffers"); /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ @@ -248,73 +246,77 @@ mbuf_init(void *dummy) * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, - mb_ctor_mbuf, mb_dtor_mbuf, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); + zone_ext = uma_zcreate(MBUF_EXT_NAME, MEXTHSIZE, +#ifdef INVARIANTS + trash_ctor, trash_dtor, trash_init, trash_fini, +#else + NULL, NULL, NULL, NULL, +#endif + UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); + + zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); if (nmbclusters > 0) uma_zone_set_max(zone_clust, nmbclusters); + zone_iclust = uma_zcreate("mbuf_icluster", MICLBYTES, + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + UMA_ZONE_MAXBUCKET | UMA_ZONE_STRIPEBUCKET | UMA_ZONE_NOFREE); + uma_zone_set_ppera(zone_iclust, + (MICLBYTES * (PAGE_SIZE / 64)) / PAGE_SIZE); + uma_zone_set_allocf(zone_iclust, mb_alloc_iclust); + uma_prealloc(zone_iclust, 8192); - zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, - mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); + zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, NULL, + mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_ext); /* Make jumbo frame zone too. Page size, 9k and 16k. */ zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbop > 0) uma_zone_set_max(zone_jumbop, nmbjumbop); - zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, - mb_ctor_clust, mb_dtor_clust, + zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES + MEXTHSIZE, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo9 > 0) uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo16 > 0) uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); uma_zone_set_freef(zone_jumbo16, mbuf_jumbo_free); - zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), - NULL, NULL, - NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_ZINIT); - - /* uma_prealloc() goes here... */ - /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA @@ -343,6 +345,16 @@ mbuf_init(void *dummy) mbstat.sf_allocwait = mbstat.sf_allocfail = 0; } +static MALLOC_DEFINE(M_ICLUST, "iclust", "inline clusters"); + +void * +mb_alloc_iclust(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + + *flags = UMA_SLAB_PRIV; + return contigmalloc(bytes, M_ICLUST, 0, 0, ~0, 1, 0); +} + /* * UMA backend page allocator for the jumbo frame zones. * @@ -370,86 +382,6 @@ mbuf_jumbo_free(void *mem, int size, u_int8_t flag } /* - * Constructor for Mbuf master zone. - * - * The 'arg' pointer points to a mb_args structure which - * contains call-specific information required to support the - * mbuf allocation API. See mbuf.h. - */ -static int -mb_ctor_mbuf(void *mem, int size, void *arg, int how) -{ - struct mbuf *m; - struct mb_args *args; -#ifdef MAC - int error; -#endif - int flags; - short type; - -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif - m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; - - /* - * The mbuf is initialized later. The caller has the - * responsibility to set up any MAC labels too. - */ - if (type == MT_NOINIT) - return (0); - - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_len = 0; - m->m_flags = flags; - m->m_type = type; - if (flags & M_PKTHDR) { - m->m_data = m->m_pktdat; - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.header = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - SLIST_INIT(&m->m_pkthdr.tags); -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } else - m->m_data = m->m_dat; - return (0); -} - -/* - * The Mbuf master zone destructor. - */ -static void -mb_dtor_mbuf(void *mem, int size, void *arg) -{ - struct mbuf *m; - unsigned long flags; - - m = (struct mbuf *)mem; - flags = (unsigned long)arg; - - if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); -#ifdef INVARIANTS - trash_dtor(mem, size, arg); -#endif -} - -/* * The Mbuf Packet zone destructor. */ static void @@ -458,9 +390,6 @@ mb_dtor_pack(void *mem, int size, void *arg) struct mbuf *m; m = (struct mbuf *)mem; - if ((m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - /* Make sure we've got a clean cluster back. */ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); @@ -469,10 +398,7 @@ mb_dtor_pack(void *mem, int size, void *arg) KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); - KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); -#ifdef INVARIANTS - trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); -#endif + /* * If there are processes blocked on zone_clust, waiting for pages * to be freed up, * cause them to be woken up by draining the @@ -486,85 +412,6 @@ mb_dtor_pack(void *mem, int size, void *arg) } /* - * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. - * - * Here the 'arg' pointer points to the Mbuf which we - * are configuring cluster storage for. If 'arg' is - * empty we allocate just the cluster without setting - * the mbuf to it. See mbuf.h. - */ -static int -mb_ctor_clust(void *mem, int size, void *arg, int how) -{ - struct mbuf *m; - u_int *refcnt; - int type; - uma_zone_t zone; - -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif - switch (size) { - case MCLBYTES: - type = EXT_CLUSTER; - zone = zone_clust; - break; -#if MJUMPAGESIZE != MCLBYTES - case MJUMPAGESIZE: - type = EXT_JUMBOP; - zone = zone_jumbop; - break; -#endif - case MJUM9BYTES: - type = EXT_JUMBO9; - zone = zone_jumbo9; - break; - case MJUM16BYTES: - type = EXT_JUMBO16; - zone = zone_jumbo16; - break; - default: - panic("unknown cluster size"); - break; - } - - m = (struct mbuf *)arg; - refcnt = uma_find_refcnt(zone, mem); - *refcnt = 1; - if (m != NULL) { - m->m_ext.ext_buf = (caddr_t)mem; - m->m_data = m->m_ext.ext_buf; - m->m_flags |= M_EXT; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = refcnt; - } - - return (0); -} - -/* - * The Mbuf Cluster zone destructor. - */ -static void -mb_dtor_clust(void *mem, int size, void *arg) -{ -#ifdef INVARIANTS - uma_zone_t zone; - - zone = m_getzone(size); - KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, - ("%s: refcnt incorrect %u", __func__, - *(uma_find_refcnt(zone, mem))) ); - - trash_dtor(mem, size, arg); -#endif -} - -/* * The Packet secondary zone's init routine, executed on the * object's transition from mbuf keg slab to zone cache. */ @@ -574,13 +421,14 @@ mb_zinit_pack(void *mem, int size, int how) struct mbuf *m; m = (struct mbuf *)mem; /* m is virgin. */ - if (uma_zalloc_arg(zone_clust, m, how) == NULL || + /* + * Allocate and attach the cluster to the ext. + */ + if ((mem = uma_zalloc(zone_clust, how)) == NULL || m->m_ext.ext_buf == NULL) return (ENOMEM); - m->m_ext.ext_type = EXT_PACKET; /* Override. */ -#ifdef INVARIANTS - trash_init(m->m_ext.ext_buf, MCLBYTES, how); -#endif + m_extadd(m, mem, MCLBYTES, m_ext_free_nop, NULL, NULL, 0, EXT_PACKET); + return (0); } @@ -603,52 +451,28 @@ mb_zfini_pack(void *mem, int size) #endif } -/* - * The "packet" keg constructor. - */ -static int -mb_ctor_pack(void *mem, int size, void *arg, int how) +int +m_pkthdr_init(struct mbuf *m, int how) { - struct mbuf *m; - struct mb_args *args; #ifdef MAC int error; #endif - int flags; - short type; - m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; - -#ifdef INVARIANTS - trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); -#endif - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_ext.ext_buf; - m->m_len = 0; - m->m_flags = (flags | M_EXT); - m->m_type = type; - - if (flags & M_PKTHDR) { - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.header = NULL; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - SLIST_INIT(&m->m_pkthdr.tags); + m->m_data = m->m_pktdat; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; + SLIST_INIT(&m->m_pkthdr.tags); #ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); + /* If the label init fails, fail the alloc */ + error = mac_mbuf_init(m, how); + if (error) + return (error); #endif - } - /* m_ext is already initialized. */ return (0); } Index: kern/uipc_mbuf2.c =================================================================== --- kern/uipc_mbuf2.c (revision 185196) +++ kern/uipc_mbuf2.c (working copy) @@ -150,14 +150,7 @@ m_pulldown(struct mbuf *m, int off, int len, int * * M_WRITABLE(). For now, we only evaluate once at the beginning and * live with this. */ - /* - * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, - * then we're not "writable," according to this code. - */ - writable = 0; - if ((n->m_flags & M_EXT) == 0 || - (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n))) - writable = 1; + writable = M_WRITABLE(n); /* * the target data is on . Index: kern/uipc_mbuf.c =================================================================== --- kern/uipc_mbuf.c (revision 185196) +++ kern/uipc_mbuf.c (working copy) @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -87,6 +88,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfail &m_defragrandomfailures, 0, ""); #endif +void m_refm(struct mbuf *mb, struct mbuf *m); + /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an @@ -128,8 +131,7 @@ m_getm2(struct mbuf *m, int len, int how, short ty } /* Book keeping. */ - len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size : - ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN); + len -= mb->m_size; if (mtail != NULL) mtail->m_next = mb; else @@ -189,122 +191,64 @@ void m_extadd(struct mbuf *mb, caddr_t buf, u_int size, void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type) { - KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); + mb->m_flags |= (M_EXT | flags); + mb->m_data = buf; + mb->m_size = size; + mb->m_ext.ext_buf = buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_arg1 = arg1; + mb->m_ext.ext_arg2 = arg2; + mb->m_ext.ext_type = type; - if (type != EXT_EXTREF) - mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT); - if (mb->m_ext.ref_cnt != NULL) { - *(mb->m_ext.ref_cnt) = 1; - mb->m_flags |= (M_EXT | flags); - mb->m_ext.ext_buf = buf; - mb->m_data = mb->m_ext.ext_buf; - mb->m_ext.ext_size = size; - mb->m_ext.ext_free = freef; - mb->m_ext.ext_arg1 = arg1; - mb->m_ext.ext_arg2 = arg2; - mb->m_ext.ext_type = type; - } + CTR3(KTR_NET, "m_extadd: %p ref %d uf %p", + mb, mb->m_ref, mb->m_ext.ext_buf); } -/* - * Non-directly-exported function to clean up after mbufs with M_EXT - * storage attached to them if the reference count hits 1. - */ void -mb_free_ext(struct mbuf *m) +m_refm(struct mbuf *mb, struct mbuf *m) { - int skipmbuf; - - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + if (m->m_ref > 1) + atomic_add_int(&m->m_ref, 1); + else + m->m_ref++; + mb->m_flags |= M_EXT | M_RDONLY; + mb->m_data = m->m_data; + mb->m_size = m->m_len; /* Only existing data is visible. */ + mb->m_ext.ext_buf = m->m_data; + mb->m_ext.ext_size = m->m_len; + mb->m_ext.ext_free = m_ext_free_mbuf; + mb->m_ext.ext_arg1 = m; + mb->m_ext.ext_arg2 = NULL; + mb->m_ext.ext_type = EXT_MBUF; + CTR3(KTR_NET, "m_refm: %p ref %d buf %p", + mb, mb->m_ref, mb->m_ext.ext_buf); +} +void +m_ext_free_zone(void *arg1, void *arg2) +{ + + uma_zfree(arg1, arg2); +} + +void +m_ext_free_mbuf(void *arg1, void *arg2) +{ + /* - * check if the header is embedded in the cluster - */ - skipmbuf = (m->m_flags & M_NOFREE); - - /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ref_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) { - switch (m->m_ext.ext_type) { - case EXT_PACKET: /* The packet zone is special. */ - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); - return; /* Job done. */ - case EXT_CLUSTER: - uma_zfree(zone_clust, m->m_ext.ext_buf); - break; - case EXT_JUMBOP: - uma_zfree(zone_jumbop, m->m_ext.ext_buf); - break; - case EXT_JUMBO9: - uma_zfree(zone_jumbo9, m->m_ext.ext_buf); - break; - case EXT_JUMBO16: - uma_zfree(zone_jumbo16, m->m_ext.ext_buf); - break; - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - *(m->m_ext.ref_cnt) = 0; - uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, - m->m_ext.ref_cnt)); - /* FALLTHROUGH */ - case EXT_EXTREF: - KASSERT(m->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); - (*(m->m_ext.ext_free))(m->m_ext.ext_arg1, - m->m_ext.ext_arg2); - break; - default: - KASSERT(m->m_ext.ext_type == 0, - ("%s: unknown ext_type", __func__)); - } - } - if (skipmbuf) - return; - - /* - * Free this mbuf back to the mbuf zone with all m_ext - * information purged. + * Release one more reference to this mbuf. If it is the last it + * will be freed. */ - m->m_ext.ext_buf = NULL; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ref_cnt = NULL; - m->m_ext.ext_size = 0; - m->m_ext.ext_type = 0; - m->m_flags &= ~M_EXT; - uma_zfree(zone_mbuf, m); + m_free(arg1); } -/* - * Attach the the cluster from *m to *n, set up m_ext in *n - * and bump the refcount of the cluster. - */ -static void -mb_dupcl(struct mbuf *n, struct mbuf *m) +void +m_ext_free_nop(void *arg1, void *arg2) { - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ref_cnt) == 1) - *(m->m_ext.ref_cnt) += 1; - else - atomic_add_int(m->m_ext.ref_cnt, 1); - n->m_ext.ext_buf = m->m_ext.ext_buf; - n->m_ext.ext_free = m->m_ext.ext_free; - n->m_ext.ext_arg1 = m->m_ext.ext_arg1; - n->m_ext.ext_arg2 = m->m_ext.ext_arg2; - n->m_ext.ext_size = m->m_ext.ext_size; - n->m_ext.ref_cnt = m->m_ext.ref_cnt; - n->m_ext.ext_type = m->m_ext.ext_type; - n->m_flags |= M_EXT; + /* Nothing to do. */ } /* @@ -532,6 +476,7 @@ m_copym(struct mbuf *m, int off0, int len, int wai struct mbuf *top; int copyhdr = 0; + CTR3(KTR_NET, "m_copym(%p, %d, %d)", m, off0, len); KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); MBUF_CHECKSLEEP(wait); @@ -569,12 +514,8 @@ m_copym(struct mbuf *m, int off0, int len, int wai copyhdr = 0; } n->m_len = min(len, m->m_len - off); - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + off; - mb_dupcl(n, m); - } else - bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), - (u_int)n->m_len); + m_refm(n, m); + n->m_data += off; if (len != M_COPYALL) len -= n->m_len; off = 0; @@ -618,6 +559,7 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int of int i, nlen = 0; caddr_t buf[MLEN]; + CTR4(KTR_NET, "m_copymdata(%p, %p, %d, %d)", m, n, off, len); KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source")); KASSERT(off >= 0, ("m_copymdata, negative off %d", off)); KASSERT(len >= 0, ("m_copymdata, negative len %d", len)); @@ -637,7 +579,10 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int of if (off + len > nlen || len < 1) return NULL; + /* XXX This assumes !WRITABLE are MCLBYTES size and have M_EXT set. */ if (!M_WRITABLE(mm)) { + if ((mm->m_flags & M_EXT) == 0) + panic("m_copymdata: invalid mbuf"); /* XXX: Use proper m_xxx function instead. */ x = m_getcl(how, MT_DATA, mm->m_flags); if (x == NULL) @@ -672,7 +617,7 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int of } /* Expand first/last mbuf to cluster if possible. */ - if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) { + if (!prep && mm->m_size < MCLBYTES && len > M_TRAILINGSPACE(mm)) { bcopy(mm->m_data, &buf, mm->m_len); m_clget(mm, how); if (!(mm->m_flags & M_EXT)) @@ -681,7 +626,7 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int of mm->m_data = mm->m_ext.ext_buf; mm->m_pkthdr.header = NULL; } - if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) { + if (prep && mm->m_size < MCLBYTES && len > M_LEADINGSPACE(mm)) { bcopy(mm->m_data, &buf, mm->m_len); m_clget(mm, how); if (!(mm->m_flags & M_EXT)) @@ -703,8 +648,7 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int of return NULL; i = 0; for (x = z; x != NULL; x = x->m_next) { - i += x->m_flags & M_EXT ? x->m_ext.ext_size : - (x->m_flags & M_PKTHDR ? MHLEN : MLEN); + i += x->m_size; if (!x->m_next) break; } @@ -753,6 +697,7 @@ m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; + CTR1(KTR_NET, "m_copypacket(%p)", m); MBUF_CHECKSLEEP(how); MGET(n, how, m->m_type); top = n; @@ -762,14 +707,7 @@ m_copypacket(struct mbuf *m, int how) if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); - } else { - n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); - } - + m_refm(n, m); m = m->m_next; while (m) { MGET(o, how, m->m_type); @@ -780,13 +718,7 @@ m_copypacket(struct mbuf *m, int how) n = n->m_next; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); - } else { - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); - } - + m_refm(m, n); m = m->m_next; } return top; @@ -805,6 +737,7 @@ m_copydata(const struct mbuf *m, int off, int len, { u_int count; + CTR3(KTR_NET, "m_copydata(%p, %d, %d)", m, off, len); KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { @@ -834,8 +767,9 @@ struct mbuf * m_dup(struct mbuf *m, int how) { struct mbuf **p, *top = NULL; - int remain, moff, nsize; + int remain, moff; + CTR1(KTR_NET, "m_dup(%p)", m); MBUF_CHECKSLEEP(how); /* Sanity check */ if (m == NULL) @@ -852,10 +786,8 @@ m_dup(struct mbuf *m, int how) /* Get the next new mbuf */ if (remain >= MINCLSIZE) { n = m_getcl(how, m->m_type, 0); - nsize = MCLBYTES; } else { n = m_get(how, m->m_type); - nsize = MLEN; } if (n == NULL) goto nospace; @@ -865,8 +797,6 @@ m_dup(struct mbuf *m, int how) m_free(n); goto nospace; } - if ((n->m_flags & M_EXT) == 0) - nsize = MHLEN; } n->m_len = 0; @@ -875,8 +805,8 @@ m_dup(struct mbuf *m, int how) p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ - while (n->m_len < nsize && m != NULL) { - int chunk = min(nsize - n->m_len, m->m_len - moff); + while (n->m_len < n->m_size && m != NULL) { + int chunk = min(n->m_size - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; @@ -908,11 +838,13 @@ nospace: void m_cat(struct mbuf *m, struct mbuf *n) { + CTR2(KTR_NET, "m_cat(%p, %p)", m, n); + while (m->m_next) m = m->m_next; while (n) { if (m->m_flags & M_EXT || - m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + m->m_len + n->m_len > m->m_size) { /* just join the two chains */ m->m_next = n; return; @@ -932,6 +864,7 @@ m_adj(struct mbuf *mp, int req_len) struct mbuf *m; int count; + CTR2(KTR_NET, "m_adj(%p, %d)", mp, req_len); if ((m = mp) == NULL) return; if (len >= 0) { @@ -1014,13 +947,13 @@ m_pullup(struct mbuf *n, int len) int count; int space; + CTR2(KTR_NET, "m_pullup(%p, %d)", n, len); /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ - if ((n->m_flags & M_EXT) == 0 && - n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if ((n->m_flags & M_EXT) == 0 && len < n->m_size && n->m_next) { if (n->m_len >= len) return (n); m = n; @@ -1036,7 +969,8 @@ m_pullup(struct mbuf *n, int len) if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); } - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, @@ -1075,6 +1009,7 @@ m_copyup(struct mbuf *n, int len, int dstoff) struct mbuf *m; int count, space; + CTR2(KTR_NET, "m_copyup(%p, %d)", n, len); if (len > (MHLEN - dstoff)) goto bad; MGET(m, M_DONTWAIT, n->m_type); @@ -1084,7 +1019,8 @@ m_copyup(struct mbuf *n, int len, int dstoff) if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); m->m_data += dstoff; - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), @@ -1126,6 +1062,7 @@ m_split(struct mbuf *m0, int len0, int wait) struct mbuf *m, *n; u_int len = len0, remain; + CTR2(KTR_NET, "m_split(%p, %d)", m0, len0); MBUF_CHECKSLEEP(wait); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; @@ -1139,8 +1076,8 @@ m_split(struct mbuf *m0, int len0, int wait) n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; - if (m->m_flags & M_EXT) - goto extpacket; + if (m->m_size >= MCLBYTES) + goto refpacket; if (remain > MHLEN) { /* m can't be the lead packet */ MH_ALIGN(n, 0); @@ -1164,13 +1101,9 @@ m_split(struct mbuf *m0, int len0, int wait) return (NULL); M_ALIGN(n, remain); } -extpacket: - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + len; - mb_dupcl(n, m); - } else { - bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); - } +refpacket: + m_refm(n, m); + n->m_data += len; n->m_len = remain; m->m_len = len; n->m_next = m->m_next; @@ -1255,6 +1188,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_ca struct mbuf *m = m0, *n; int totlen = 0; + CTR3(KTR_NET, "m_copyback(%p, %d, %d)", m0, off, len); if (m0 == NULL) return; while (off > (mlen = m->m_len)) { @@ -1306,6 +1240,7 @@ m_append(struct mbuf *m0, int len, c_caddr_t cp) struct mbuf *m, *n; int remainder, space; + CTR2(KTR_NET, "m_append(%p, %d)", m0, len); for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; @@ -1776,12 +1711,7 @@ m_align(struct mbuf *m, int len) { int adjust; - if (m->m_flags & M_EXT) - adjust = m->m_ext.ext_size - len; - else if (m->m_flags & M_PKTHDR) - adjust = MHLEN - len; - else - adjust = MLEN - len; + adjust = m->m_size - len; m->m_data += adjust &~ (sizeof(long)-1); } Index: netinet/sctp_os_bsd.h =================================================================== --- netinet/sctp_os_bsd.h (revision 185196) +++ netinet/sctp_os_bsd.h (working copy) @@ -360,7 +360,7 @@ typedef struct callout sctp_os_timer_t; /* return the base ext data pointer */ #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf) /* return the refcnt of the data pointer */ -#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt) +#define SCTP_BUF_EXTEND_REFCNT(m) (m->m_ref) /* return any buffer related flags, this is * used beyond logging for apple only. */ Index: dev/cxgb/cxgb_sge.c =================================================================== --- dev/cxgb/cxgb_sge.c (revision 185196) +++ dev/cxgb/cxgb_sge.c (working copy) @@ -515,7 +515,7 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) struct refill_fl_cb_arg cb_arg; caddr_t cl; int err, count = 0; - int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + int header_size = sizeof(struct mbuf) + sizeof(uint32_t); cb_arg.error = 0; while (n--) { @@ -2365,7 +2365,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int npo q->rspq.size = p->rspq_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); @@ -2541,8 +2541,7 @@ init_cluster_mbuf(caddr_t cl, int flags, int type, struct mbuf *m; int header_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + - sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); bzero(cl, header_size); m = (struct mbuf *)cl; Index: vm/uma.h =================================================================== --- vm/uma.h (revision 185196) +++ vm/uma.h (working copy) @@ -230,6 +230,7 @@ uma_zone_t uma_zsecond_create(char *name, uma_ctor #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ #define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ +#define UMA_ZONE_STRIPEBUCKET 0x2000 /* Stripe buckets across slabs. */ /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -509,6 +510,8 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free */ void uma_prealloc(uma_zone_t zone, int itemcnt); +void uma_zone_set_ppera(uma_zone_t zone, int pages); + /* * Used to lookup the reference counter allocated for an item * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 185196) +++ vm/uma_core.c (working copy) @@ -239,7 +239,7 @@ static uma_bucket_t bucket_alloc(int, int); static void bucket_free(uma_bucket_t); static void bucket_zone_drain(void); static int uma_zalloc_bucket(uma_zone_t zone, int flags); -static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); +static uma_slab_t uma_zone_slab(uma_zone_t zone, uma_slab_t slab, int flags); static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, u_int32_t flags); @@ -1164,6 +1164,27 @@ zone_large_init(uma_zone_t zone) keg->uk_rsize = keg->uk_size; } +void +uma_zone_set_ppera(uma_zone_t zone, int pages) +{ + uma_keg_t keg; + + ZONE_LOCK(zone); + keg = zone->uz_keg; + if (keg->uk_ppera < pages) { + keg->uk_ppera = pages; + keg->uk_ipers = (pages * PAGE_SIZE) / keg->uk_rsize; + keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC; + keg->uk_slabzone = slabzone; + KASSERT(keg->uk_ipers <= uma_max_ipers, + ("zone_small_init: keg->uk_ipers too high!")); + printf("ppera: req pages %d, ipers %d, rsize %d\n", + pages, keg->uk_ipers, keg->uk_rsize); + } + ZONE_UNLOCK(zone); +} + + /* * Keg header ctor. This initializes all fields, locks, etc. And inserts * the keg onto the global keg list. @@ -1557,7 +1578,7 @@ uma_startup(void *bootmem, int boot_pages) } if (objsize > UMA_SMALLEST_UNIT) objsize--; - uma_max_ipers = UMA_SLAB_SIZE / objsize; + uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; totsize = wsize; @@ -1937,7 +1958,7 @@ zalloc_start: } static uma_slab_t -uma_zone_slab(uma_zone_t zone, int flags) +uma_zone_slab(uma_zone_t zone, uma_slab_t last, int flags) { uma_slab_t slab; uma_keg_t keg; @@ -1970,6 +1991,14 @@ static uma_slab_t for (;;) { /* + * Stripe bucket allocations across slabs. + */ + if ((keg->uk_flags & UMA_ZONE_STRIPEBUCKET) && last) { + if (last->us_freecount != 0 && + (slab = LIST_NEXT(last, us_link)) != NULL) + return (slab); + } + /* * Find a slab with some space. Prefer slabs that are partially * used over those that are totally full. This helps to reduce * fragmentation. @@ -2105,11 +2134,14 @@ uma_zalloc_bucket(uma_zone_t zone, int flags) max = MIN(bucket->ub_entries, zone->uz_count); /* Try to keep the buckets totally full */ saved = bucket->ub_cnt; + slab = NULL; while (bucket->ub_cnt < max && - (slab = uma_zone_slab(zone, flags)) != NULL) { + (slab = uma_zone_slab(zone, slab, flags)) != NULL) { while (slab->us_freecount && bucket->ub_cnt < max) { bucket->ub_bucket[bucket->ub_cnt++] = uma_slab_alloc(zone, slab); + if (zone->uz_keg->uk_flags & UMA_ZONE_STRIPEBUCKET) + continue; } /* Don't block on the next fill */ @@ -2191,7 +2223,7 @@ uma_zalloc_internal(uma_zone_t zone, void *udata, #endif ZONE_LOCK(zone); - slab = uma_zone_slab(zone, flags); + slab = uma_zone_slab(zone, NULL, flags); if (slab == NULL) { zone->uz_fails++; ZONE_UNLOCK(zone); @@ -2446,7 +2478,10 @@ uma_zfree_internal(uma_zone_t zone, void *item, vo slab = (uma_slab_t)mem; } } else { - slab = (uma_slab_t)udata; + if (udata == NULL) + slab = vtoslab((vm_offset_t)item); + else + slab = (uma_slab_t)udata; } /* Do we need to remove from any lists? */ Index: amd64/conf/GENERIC =================================================================== --- amd64/conf/GENERIC (revision 185196) +++ amd64/conf/GENERIC (working copy) @@ -85,6 +85,8 @@ device cpufreq device acpi device pci +device hwpmc + # Floppy drives device fdc Index: sys/mbuf.h =================================================================== --- sys/mbuf.h (revision 185196) +++ sys/mbuf.h (working copy) @@ -53,10 +53,15 @@ * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. */ -#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ -#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +#define MHSIZE offsetof(struct mbuf, M_dat.M_databuf) +#define MPKTHSIZE offsetof(struct mbuf, M_dat.MH.MH_dat.MH_databuf) +#define MEXTHSIZE sizeof(struct mbuf) +#define MLEN (MSIZE - MHSIZE) /* normal data len */ +#define MHLEN (MSIZE - MPKTHSIZE) /* data len w/pkthdr */ #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ +#define MDATAALIGN (64) /* Starting alignment of data. */ +#define MICLBYTES (MCLBYTES + roundup2(MEXTHSIZE, MDATAALIGN)) #ifdef _KERNEL /*- @@ -67,36 +72,9 @@ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) -/* - * Argument structure passed to UMA routines during mbuf and packet - * allocations. - */ -struct mb_args { - int flags; /* Flags for mbuf being allocated */ - short type; /* Type of mbuf being allocated */ -}; #endif /* _KERNEL */ -#if defined(__LP64__) -#define M_HDR_PAD 6 -#else -#define M_HDR_PAD 2 -#endif - /* - * Header present at the beginning of every mbuf. - */ -struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - caddr_t mh_data; /* location of data */ - int mh_len; /* amount of data in this mbuf */ - int mh_flags; /* flags; see below */ - short mh_type; /* type of data in this mbuf */ - uint8_t pad[M_HDR_PAD];/* word align */ -}; - -/* * Packet tag structure (see below for details). */ struct m_tag { @@ -130,15 +108,14 @@ struct pkthdr { * Description of external storage mapped into mbuf; valid only if M_EXT is * set. */ -struct m_ext { +struct mb_ext { caddr_t ext_buf; /* start of buffer */ + u_int ext_size; /* size of buffer, for ext_free */ + int ext_type; /* type of external storage */ void (*ext_free) /* free routine if not the usual */ (void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ - u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ }; /* @@ -146,28 +123,30 @@ struct pkthdr { * purposes. */ struct mbuf { - struct m_hdr m_hdr; + struct mbuf *m_next; /* next buffer in chain */ + struct mbuf *m_nextpkt; /* next chain in queue/record */ + uma_zone_t m_zone; /* Zone allocated from. */ + caddr_t m_data; /* location of valid data */ + volatile int m_ref; /* Reference count. */ + int m_len; /* amount of data in this mbuf */ + int m_flags; /* flags; see below */ + short m_type; /* type of data in this mbuf */ + short m_size; /* Actual size of buffer. */ union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { - struct m_ext MH_ext; /* M_EXT set */ - char MH_databuf[MHLEN]; + struct mb_ext MH_ext; /* M_EXT set */ + char MH_databuf[0]; } MH_dat; } MH; - char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + char M_databuf[0]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; -#define m_next m_hdr.mh_next -#define m_len m_hdr.mh_len -#define m_data m_hdr.mh_data -#define m_type m_hdr.mh_type -#define m_flags m_hdr.mh_flags -#define m_nextpkt m_hdr.mh_nextpkt #define m_act m_nextpkt #define m_pkthdr M_dat.MH.MH_pkthdr #define m_ext M_dat.MH.MH_dat.MH_ext -#define m_pktdat M_dat.MH.MH_dat.MH_databuf +#define m_pktdat M_dat.MH.MH_dat.MH_databuf #define m_dat M_dat.M_databuf /* @@ -229,7 +208,6 @@ struct mbuf { #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ -#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ /* * Flags indicating hw checksum support and sw checksum requirements. This @@ -318,15 +296,16 @@ struct mbstat { * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ -#define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" -#define MBUF_PACKET_MEM_NAME "mbuf_packet" -#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" +#define MBUF_EXT_NAME "mbuf_ext" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" +#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" +#define MBUF_MEM_NAME "mbuf" +#define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_TAG_MEM_NAME "mbuf_tag" -#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" + #ifdef _KERNEL #ifdef WITNESS @@ -345,44 +324,114 @@ struct mbstat { * The rest of it is defined in kern/kern_mbuf.c */ -extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; -extern uma_zone_t zone_pack; -extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_ext; +extern uma_zone_t zone_iclust; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; -extern uma_zone_t zone_ext_refcnt; +extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_pack; +static __inline struct mbuf *m_alloc(uma_zone_t zone, int size, int how, + short type, int flags); +void m_extadd(struct mbuf *, caddr_t, u_int, + void (*)(void *, void *), void *, void *, + int, int); +void m_ext_free_zone(void *arg1, void *arg2); +void m_ext_free_mbuf(void *arg1, void *arg2); +void m_ext_free_nop(void *arg1, void *arg2); +static __inline struct mbuf *m_free(struct mbuf *m); static __inline struct mbuf *m_getcl(int how, short type, int flags); static __inline struct mbuf *m_get(int how, short type); static __inline struct mbuf *m_gethdr(int how, short type); static __inline struct mbuf *m_getjcl(int how, short type, int flags, int size); static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ -static __inline struct mbuf *m_free(struct mbuf *m); +static __inline int m_init(struct mbuf *m, uma_zone_t zone, + int size, int how, short type, int flags); static __inline void m_clget(struct mbuf *m, int how); static __inline void *m_cljget(struct mbuf *m, int how, int size); static __inline void m_chtype(struct mbuf *m, short new_type); -void mb_free_ext(struct mbuf *); static __inline struct mbuf *m_last(struct mbuf *m); +int m_pkthdr_init(struct mbuf *m, int how); + +#include + +static __inline struct mbuf * +m_alloc(uma_zone_t zone, int size, int how, short type, int flags) +{ + struct mbuf *m; + + m = uma_zalloc(zone, how); + if (m == NULL) + return (NULL); + CTR3(KTR_NET, "m_alloc: %p zone %p size %d", m, zone, size); + if (type != MT_NOINIT) { + if (m_init(m, zone, size, how, type, flags)) { + uma_zfree(zone, m); + return (NULL); + } + } + return (m); +} + +static __inline void +m_dataalign(struct mbuf *m) +{ + m->m_data = (void *)roundup2((uintptr_t)m->m_data, MDATAALIGN); +} + +/* + * Initialize an mbuf with linear storage. + * + * Inline because the consumer text overhead will be roughly the same to + * initialize or call a function with this many parameters and M_PKTHDR + * should go away with constant propagation for !MGETHDR. + */ static __inline int +m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type, + int flags) +{ + int error; + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_zone = zone; + m->m_data = m->m_dat; + m->m_ref = 1; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + m->m_size = size; + if (flags & M_PKTHDR) { + m->m_data = m->m_pktdat; + if ((error = m_pkthdr_init(m, how)) != 0) + return (error); + } + + return (0); +} + +static __inline int m_gettype(int size) { int type; switch (size) { case MSIZE: + case MLEN: + /* FALLTHROUGH */ + case MHLEN: type = EXT_MBUF; break; case MCLBYTES: type = EXT_CLUSTER; break; -#if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; -#endif case MJUM9BYTES: type = EXT_JUMBO9; break; @@ -403,16 +452,17 @@ m_getzone(int size) switch (size) { case MSIZE: + case MLEN: + /* FALLTHROUGH */ + case MHLEN: zone = zone_mbuf; break; case MCLBYTES: zone = zone_clust; break; -#if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: zone = zone_jumbop; break; -#endif case MJUM9BYTES: zone = zone_jumbo9; break; @@ -429,11 +479,8 @@ m_getzone(int size) static __inline struct mbuf * m_get(int how, short type) { - struct mb_args args; - args.flags = 0; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MLEN, how, type, 0); } /* @@ -443,11 +490,8 @@ static __inline struct mbuf * m_getclr(int how, short type) { struct mbuf *m; - struct mb_args args; - args.flags = 0; - args.type = type; - m = uma_zalloc_arg(zone_mbuf, &args, how); + m = m_alloc(zone_mbuf, MLEN, how, type, 0); if (m != NULL) bzero(m->m_data, MLEN); return (m); @@ -456,72 +500,126 @@ m_getclr(int how, short type) static __inline struct mbuf * m_gethdr(int how, short type) { - struct mb_args args; - args.flags = M_PKTHDR; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MHLEN, how, type, M_PKTHDR); } static __inline struct mbuf * +m_getpack(int how, short type, int flags) +{ + struct mbuf *m; + + m = m_alloc(zone_pack, MCLBYTES, how, type, flags | M_EXT); + /* Restore the data pointer clobbered by m_init. */ + if (m && type != MT_NOINIT) + m->m_data = m->m_ext.ext_buf; + + return (m); +} + +static __inline struct mbuf * m_getcl(int how, short type, int flags) { - struct mb_args args; + struct mbuf *m; - args.flags = flags; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); + m = m_alloc(zone_iclust, MCLBYTES, how, type, flags); + if (m != NULL) + m_dataalign(m); + else + m = m_getpack(how, type, flags); + + return (m); } /* * m_getjcl() returns an mbuf with a cluster of the specified size attached. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. - * - * XXX: This is rather large, should be real function maybe. */ static __inline struct mbuf * m_getjcl(int how, short type, int flags, int size) { - struct mb_args args; - struct mbuf *m, *n; uma_zone_t zone; + struct mbuf *m; + void *mem; - args.flags = flags; - args.type = type; - - m = uma_zalloc_arg(zone_mbuf, &args, how); - if (m == NULL) - return (NULL); - zone = m_getzone(size); - n = uma_zalloc_arg(zone, m, how); - if (n == NULL) { - uma_zfree(zone_mbuf, m); - return (NULL); + switch (size) { + case MCLBYTES: + m = m_getcl(how, type, flags); + break; + case MJUMPAGESIZE: + /* FALLTHROUGH */ +#ifdef MJUM16BYTE + case MJUM16BYTE: +#endif + /* + * Allocate the memory and header seperate for these sizes. + */ + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + m = m_alloc(zone_ext, 0, how, type, flags); + if (m == NULL) { + uma_zfree(zone, mem); + return (NULL); + } + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, + flags, m_gettype(size)); + break; + default: + /* + * Allocate contiguous header and memory from the given + * zone. + */ + m = m_alloc(m_getzone(size), size, how, type, flags); + if (m) + m_dataalign(m); + break; } + return (m); } +void m_tag_delete_chain(struct mbuf *, struct m_tag *); + static __inline void -m_free_fast(struct mbuf *m) +m_free_fast(uma_zone_t zone, struct mbuf *m) { + #ifdef INVARIANTS + KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); if (m->m_flags & M_PKTHDR) KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); #endif - - uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); + + uma_zfree(zone, m); } +static __inline void +_m_free(struct mbuf *m) +{ + + CTR4(KTR_NET, "m_free: %p ref %d zone %p size %d", + m, m->m_ref, m->m_zone, m->m_size); + if (m->m_flags & M_PKTHDR && !SLIST_EMPTY(&m->m_pkthdr.tags)) + m_tag_delete_chain(m, NULL); + /* + * Free attached storage if this mbuf is the only reference to it. + */ + if (m->m_flags & M_EXT) + m->m_ext.ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + if ((m->m_flags & M_NOFREE) == 0) + uma_zfree(m->m_zone, m); +} + static __inline struct mbuf * m_free(struct mbuf *m) { struct mbuf *n = m->m_next; - if (m->m_flags & M_EXT) - mb_free_ext(m); - else if ((m->m_flags & M_NOFREE) == 0) - uma_zfree(zone_mbuf, m); + if (m->m_ref == 1 || atomic_fetchadd_int(&m->m_ref, -1) == 1) + _m_free(m); + return (n); } @@ -529,18 +627,7 @@ static __inline void m_clget(struct mbuf *m, int how) { - if (m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - m->m_ext.ext_buf = (char *)NULL; - uma_zalloc_arg(zone_clust, m, how); - /* - * On a cluster allocation failure, drain the packet zone and retry, - * we might be able to loosen a few clusters up on the drain. - */ - if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { - zone_drain(zone_pack); - uma_zalloc_arg(zone_clust, m, how); - } + m_cljget(m, how, MCLBYTES); } /* @@ -554,14 +641,20 @@ static __inline void * m_cljget(struct mbuf *m, int how, int size) { uma_zone_t zone; + void *mem; if (m && m->m_flags & M_EXT) printf("%s: %p mbuf already has cluster\n", __func__, m); if (m != NULL) m->m_ext.ext_buf = NULL; - zone = m_getzone(size); - return (uma_zalloc_arg(zone, m, how)); + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + if (m) + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, 0, + m_gettype(size)); + return (mem); } static __inline void @@ -572,35 +665,26 @@ m_cljset(struct mbuf *m, void *cl, int type) switch (type) { case EXT_CLUSTER: + zone = zone_clust; size = MCLBYTES; - zone = zone_clust; break; -#if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: + zone = zone_jumbop; size = MJUMPAGESIZE; - zone = zone_jumbop; break; -#endif case EXT_JUMBO9: + zone = zone_jumbo9; size = MJUM9BYTES; - zone = zone_jumbo9; break; case EXT_JUMBO16: + zone = zone_jumbo16; size = MJUM16BYTES; - zone = zone_jumbo16; break; default: panic("unknown cluster type"); break; } - - m->m_data = m->m_ext.ext_buf = cl; - m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = uma_find_refcnt(zone, cl); - m->m_flags |= M_EXT; - + m_extadd(m, cl, size, m_ext_free_zone, zone, cl, 0, type); } static __inline void @@ -637,9 +721,7 @@ m_last(struct mbuf *m) * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ -#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ - (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (m)->m_ref == 1) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ @@ -659,25 +741,14 @@ m_last(struct mbuf *m) * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an * object of the specified size at the end of the mbuf, longword aligned. */ -#define M_ALIGN(m, len) do { \ - KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \ - ("%s: M_ALIGN not normal mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_dat, \ - ("%s: M_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define M_ALIGN(m, len) \ + (m)->m_data += ((m)->m_size - (len)) & ~(sizeof(long) - 1); /* * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by * M_DUP/MOVE_PKTHDR. */ -#define MH_ALIGN(m, len) do { \ - KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \ - ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_pktdat, \ - ("%s: MH_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define MH_ALIGN(m, len) M_ALIGN(m, len) /* * Compute the amount of space available before the current start of data in @@ -686,11 +757,7 @@ m_last(struct mbuf *m) * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. */ -#define M_LEADINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ - (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ - (m)->m_data - (m)->m_dat) +#define M_LEADINGSPACE(m) (M_WRITABLE(m) ? (m)->m_data - M_START(m) : 0) /* * Compute the amount of space available after the end of data in an mbuf. @@ -699,11 +766,13 @@ m_last(struct mbuf *m) * of checking writability of the mbuf data area rests solely with the caller. */ #define M_TRAILINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ - - ((m)->m_data + (m)->m_len) : 0) : \ - &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + (M_WRITABLE(m) ? \ + (M_START(m) + (m)->m_size) - ((m)->m_data + (m)->m_len) : 0) +#define M_START(m) \ + ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \ + (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat) + /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the @@ -753,8 +822,6 @@ int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); -void m_extadd(struct mbuf *, caddr_t, u_int, - void (*)(void *, void *), void *, void *, int, int); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); @@ -867,7 +934,6 @@ struct mbuf *m_unshare(struct mbuf *, int how); /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); -void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); Index: sys/sockbuf.h =================================================================== --- sys/sockbuf.h (revision 185196) +++ sys/sockbuf.h (working copy) @@ -171,12 +171,10 @@ void sbunlock(struct sockbuf *sb); (sb)->sb_cc += (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl += (m)->m_len; \ - (sb)->sb_mbcnt += MSIZE; \ (sb)->sb_mcnt += 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt += 1; \ - } \ + (sb)->sb_mbcnt += m->m_size + sizeof(struct mbuf); \ } /* adjust counters in sb reflecting freeing of m */ @@ -184,12 +182,10 @@ void sbunlock(struct sockbuf *sb); (sb)->sb_cc -= (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl -= (m)->m_len; \ - (sb)->sb_mbcnt -= MSIZE; \ (sb)->sb_mcnt -= 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt -= 1; \ - } \ + (sb)->sb_mbcnt -= m->m_size + sizeof(struct mbuf); \ if ((sb)->sb_sndptr == (m)) { \ (sb)->sb_sndptr = NULL; \ (sb)->sb_sndptroff = 0; \