Index: nfsclient/nfsm_subs.h =================================================================== --- nfsclient/nfsm_subs.h (revision 194477) +++ nfsclient/nfsm_subs.h (working copy) @@ -59,18 +59,8 @@ struct mbuf *nfsm_rpchead(struct ucred *cr, int nm struct mbuf *mrest, int mrest_len, struct mbuf **mbp, u_int32_t **xidpp); -#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define NFSMINOFF(m) \ - do { \ - if (M_HASCL(m)) \ - (m)->m_data = (m)->m_ext.ext_buf; \ - else if ((m)->m_flags & M_PKTHDR) \ - (m)->m_data = (m)->m_pktdat; \ - else \ - (m)->m_data = (m)->m_dat; \ - } while (0) -#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ - (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) +#define NFSMINOFF(m) M_START((m)) +#define NFSMSIZ(m) (m)->m_size /* * Now for the macros that do the simple stuff and call the functions Index: kern/uipc_sockbuf.c =================================================================== --- kern/uipc_sockbuf.c (revision 194477) +++ kern/uipc_sockbuf.c (working copy) @@ -562,9 +562,7 @@ sbcheck(struct sockbuf *sb) n = m->m_nextpkt; for (; m; m = m->m_next) { len += m->m_len; - mbcnt += MSIZE; - if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ - mbcnt += m->m_ext.ext_size; + mbcnt += m->m_size + sizeof(struct mbuf); } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { @@ -768,7 +766,7 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, str continue; } if (n && (n->m_flags & M_EOR) == 0 && - M_WRITABLE(n) && + (n->m_flags & M_RDONLY) == 0 && ((sb->sb_flags & SB_NOCOALESCE) == 0) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && Index: kern/kern_mbuf.c =================================================================== --- kern/kern_mbuf.c (revision 194477) +++ kern/kern_mbuf.c (working copy) @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -211,27 +212,24 @@ SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG /* * Zones from which we allocate. */ -uma_zone_t zone_mbuf; uma_zone_t zone_clust; -uma_zone_t zone_pack; uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; -uma_zone_t zone_ext_refcnt; +uma_zone_t zone_mbuf; +uma_zone_t zone_pack; /* * Local prototypes. */ -static int mb_ctor_mbuf(void *, int, void *, int); -static int mb_ctor_clust(void *, int, void *, int); -static int mb_ctor_pack(void *, int, void *, int); -static void mb_dtor_mbuf(void *, int, void *); -static void mb_dtor_clust(void *, int, void *); -static void mb_dtor_pack(void *, int, void *); -static int mb_zinit_pack(void *, int, int); -static void mb_zfini_pack(void *, int); - +#ifdef INVARIANTS +static int mb_ctor_pack(void *mem, int size, void *arg, int how); +#endif +static void mb_dtor_pack(void *mem, int size, void *arg); static void mb_reclaim(void *); +static int mb_zinit_pack(void *mem, int size, int how); +static void mb_zfini_pack(void *mem, int size); + static void mbuf_init(void *); static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); @@ -250,71 +248,64 @@ mbuf_init(void *dummy) * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, - mb_ctor_mbuf, mb_dtor_mbuf, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); if (nmbclusters > 0) uma_zone_set_max(zone_clust, nmbclusters); - zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, + zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, +#ifdef INVARIANTS + mb_ctor_pack, +#else + NULL, +#endif mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); /* Make jumbo frame zone too. Page size, 9k and 16k. */ zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbop > 0) uma_zone_set_max(zone_jumbop, nmbjumbop); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo9 > 0) uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo16 > 0) uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); - zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), - NULL, NULL, - NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_ZINIT); - - /* uma_prealloc() goes here... */ - /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA @@ -359,86 +350,18 @@ mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0)); } -/* - * Constructor for Mbuf master zone. - * - * The 'arg' pointer points to a mb_args structure which - * contains call-specific information required to support the - * mbuf allocation API. See mbuf.h. - */ +#ifdef INVARIANTS static int -mb_ctor_mbuf(void *mem, int size, void *arg, int how) +mb_ctor_pack(void *mem, int size, void *arg, int how) { struct mbuf *m; - struct mb_args *args; -#ifdef MAC - int error; -#endif - int flags; - short type; -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; + trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); - /* - * The mbuf is initialized later. The caller has the - * responsibility to set up any MAC labels too. - */ - if (type == MT_NOINIT) - return (0); - - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_len = 0; - m->m_flags = flags; - m->m_type = type; - if (flags & M_PKTHDR) { - m->m_data = m->m_pktdat; - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.header = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - m->m_pkthdr.flowid = 0; - SLIST_INIT(&m->m_pkthdr.tags); -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } else - m->m_data = m->m_dat; return (0); } - -/* - * The Mbuf master zone destructor. - */ -static void -mb_dtor_mbuf(void *mem, int size, void *arg) -{ - struct mbuf *m; - unsigned long flags; - - m = (struct mbuf *)mem; - flags = (unsigned long)arg; - - if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); -#ifdef INVARIANTS - trash_dtor(mem, size, arg); #endif -} /* * The Mbuf Packet zone destructor. @@ -449,21 +372,21 @@ mb_dtor_pack(void *mem, int size, void *arg) struct mbuf *m; m = (struct mbuf *)mem; - if ((m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - /* Make sure we've got a clean cluster back. */ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); - KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); + KASSERT(m->m_ext.ext_free == m_ext_free_nop, + ("%s: ext_free != m_ext_free_nop", __func__)); KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); - KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); - KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); - KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); -#ifdef INVARIANTS + KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", + __func__)); + KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", + __func__)); +#ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif + /* * If there are processes blocked on zone_clust, waiting for pages * to be freed up, * cause them to be woken up by draining the @@ -477,85 +400,6 @@ mb_dtor_pack(void *mem, int size, void *arg) } /* - * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. - * - * Here the 'arg' pointer points to the Mbuf which we - * are configuring cluster storage for. If 'arg' is - * empty we allocate just the cluster without setting - * the mbuf to it. See mbuf.h. - */ -static int -mb_ctor_clust(void *mem, int size, void *arg, int how) -{ - struct mbuf *m; - u_int *refcnt; - int type; - uma_zone_t zone; - -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif - switch (size) { - case MCLBYTES: - type = EXT_CLUSTER; - zone = zone_clust; - break; -#if MJUMPAGESIZE != MCLBYTES - case MJUMPAGESIZE: - type = EXT_JUMBOP; - zone = zone_jumbop; - break; -#endif - case MJUM9BYTES: - type = EXT_JUMBO9; - zone = zone_jumbo9; - break; - case MJUM16BYTES: - type = EXT_JUMBO16; - zone = zone_jumbo16; - break; - default: - panic("unknown cluster size"); - break; - } - - m = (struct mbuf *)arg; - refcnt = uma_find_refcnt(zone, mem); - *refcnt = 1; - if (m != NULL) { - m->m_ext.ext_buf = (caddr_t)mem; - m->m_data = m->m_ext.ext_buf; - m->m_flags |= M_EXT; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = refcnt; - } - - return (0); -} - -/* - * The Mbuf Cluster zone destructor. - */ -static void -mb_dtor_clust(void *mem, int size, void *arg) -{ -#ifdef INVARIANTS - uma_zone_t zone; - - zone = m_getzone(size); - KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, - ("%s: refcnt incorrect %u", __func__, - *(uma_find_refcnt(zone, mem))) ); - - trash_dtor(mem, size, arg); -#endif -} - -/* * The Packet secondary zone's init routine, executed on the * object's transition from mbuf keg slab to zone cache. */ @@ -565,13 +409,16 @@ mb_zinit_pack(void *mem, int size, int how) struct mbuf *m; m = (struct mbuf *)mem; /* m is virgin. */ - if (uma_zalloc_arg(zone_clust, m, how) == NULL || - m->m_ext.ext_buf == NULL) + /* + * Allocate and attach the cluster to the ext. + */ + if ((mem = uma_zalloc(zone_clust, how)) == NULL) return (ENOMEM); - m->m_ext.ext_type = EXT_PACKET; /* Override. */ + m_extadd(m, mem, MCLBYTES, m_ext_free_nop, NULL, NULL, 0, EXT_PACKET); #ifdef INVARIANTS - trash_init(m->m_ext.ext_buf, MCLBYTES, how); + return trash_init(m->m_ext.ext_buf, MCLBYTES, how); #endif + return (0); } @@ -594,57 +441,34 @@ mb_zfini_pack(void *mem, int size) #endif } -/* - * The "packet" keg constructor. - */ -static int -mb_ctor_pack(void *mem, int size, void *arg, int how) +int +m_pkthdr_init(struct mbuf *m, int how) { - struct mbuf *m; - struct mb_args *args; #ifdef MAC int error; #endif - int flags; - short type; - m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; - -#ifdef INVARIANTS - trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); -#endif - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_ext.ext_buf; - m->m_len = 0; - m->m_flags = (flags | M_EXT); - m->m_type = type; - - if (flags & M_PKTHDR) { - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.header = NULL; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - m->m_pkthdr.flowid = 0; - SLIST_INIT(&m->m_pkthdr.tags); + m->m_data = m->m_pktdat; + SLIST_INIT(&m->m_pkthdr.tags); + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.header = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.flowid = 0; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; #ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); + /* If the label init fails, fail the alloc */ + error = mac_mbuf_init(m, how); + if (error) + return (error); #endif - } - /* m_ext is already initialized. */ return (0); } + /* * This is the protocol drain routine. * @@ -666,3 +490,45 @@ mb_reclaim(void *junk) if (pr->pr_drain != NULL) (*pr->pr_drain)(); } + +struct mbuf * +_m_getjcl(int how, short type, int flags, int size, uma_zone_t zone, + int exttype) +{ + struct mbuf *m; + void *mem; + + if (size == MCLBYTES) + return m_getcl(how, type, flags); + /* + * Allocate the memory and header seperate for these sizes. + */ + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + m = m_alloc(zone_mbuf, 0, how, type, flags); + if (m == NULL) { + uma_zfree(zone, mem); + return (NULL); + } + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, flags, exttype); + + return (m); +} + +void * +_m_cljget(struct mbuf *m, int how, int size, uma_zone_t zone, int exttype) +{ + void *mem; + + if (m && m->m_flags & M_EXT) + printf("%s: %p mbuf already has cluster\n", __func__, m); + if (m != NULL) + m->m_ext.ext_buf = NULL; + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + if (m) + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, 0, exttype); + return (mem); +} Index: kern/uipc_mbuf2.c =================================================================== --- kern/uipc_mbuf2.c (revision 194477) +++ kern/uipc_mbuf2.c (working copy) @@ -148,14 +148,7 @@ m_pulldown(struct mbuf *m, int off, int len, int * * M_WRITABLE(). For now, we only evaluate once at the beginning and * live with this. */ - /* - * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, - * then we're not "writable," according to this code. - */ - writable = 0; - if ((n->m_flags & M_EXT) == 0 || - (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n))) - writable = 1; + writable = M_WRITABLE(n); /* * the target data is on . Index: kern/uipc_mbuf.c =================================================================== --- kern/uipc_mbuf.c (revision 194477) +++ kern/uipc_mbuf.c (working copy) @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -84,6 +85,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfail &m_defragrandomfailures, 0, ""); #endif +static void m_refm(struct mbuf *mb, struct mbuf *m); + /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an @@ -125,8 +128,7 @@ m_getm2(struct mbuf *m, int len, int how, short ty } /* Book keeping. */ - len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size : - ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN); + len -= mb->m_size; if (mtail != NULL) mtail->m_next = mb; else @@ -161,147 +163,63 @@ m_freem(struct mbuf *mb) mb = m_free(mb); } -/*- - * Configure a provided mbuf to refer to the provided external storage - * buffer and setup a reference count for said buffer. If the setting - * up of the reference count fails, the M_EXT bit will not be set. If - * successfull, the M_EXT bit is set in the mbuf's flags. - * - * Arguments: - * mb The existing mbuf to which to attach the provided buffer. - * buf The address of the provided external storage buffer. - * size The size of the provided buffer. - * freef A pointer to a routine that is responsible for freeing the - * provided external storage buffer. - * args A pointer to an argument structure (of any type) to be passed - * to the provided freef routine (may be NULL). - * flags Any other flags to be passed to the provided mbuf. - * type The type that the external storage buffer should be - * labeled with. - * - * Returns: - * Nothing. +/* + * Reference the existing storage area of an mbuf. The reference is readonly + * and the referenced data can not be freed until the referencing mbuf is + * freed. */ -void -m_extadd(struct mbuf *mb, caddr_t buf, u_int size, - void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type) +static void +m_refm(struct mbuf *mb, struct mbuf *m) { - KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); - if (type != EXT_EXTREF) - mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT); - if (mb->m_ext.ref_cnt != NULL) { - *(mb->m_ext.ref_cnt) = 1; - mb->m_flags |= (M_EXT | flags); - mb->m_ext.ext_buf = buf; - mb->m_data = mb->m_ext.ext_buf; - mb->m_ext.ext_size = size; - mb->m_ext.ext_free = freef; - mb->m_ext.ext_arg1 = arg1; - mb->m_ext.ext_arg2 = arg2; - mb->m_ext.ext_type = type; - } + if (m->m_ref > 1) + atomic_add_int(&m->m_ref, 1); + else + m->m_ref++; + mb->m_flags |= M_EXT | M_RDONLY; + mb->m_data = m->m_data; + mb->m_size = m->m_size; + mb->m_ext.ext_buf = M_START(m); + mb->m_ext.ext_size = m->m_size; + mb->m_ext.ext_free = m_ext_free_mbuf; + mb->m_ext.ext_arg1 = m; + mb->m_ext.ext_arg2 = NULL; + mb->m_ext.ext_type = EXT_MBUF; + + CTR3(KTR_NET, "m_refm: %p ref %d buf %p", + mb, mb->m_ref, mb->m_ext.ext_buf); } /* - * Non-directly-exported function to clean up after mbufs with M_EXT - * storage attached to them if the reference count hits 1. + * Free the ext area of a mbuf assuming a uma zone and argument are + * presented. */ void -mb_free_ext(struct mbuf *m) +m_ext_free_zone(void *arg1, void *arg2) { - int skipmbuf; - - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + uma_zfree(arg1, arg2); +} +/* + * Free the ext area of a mbuf assuming it has been acquired with m_refm(). + */ +void +m_ext_free_mbuf(void *arg1, void *arg2) +{ + /* - * check if the header is embedded in the cluster - */ - skipmbuf = (m->m_flags & M_NOFREE); - - /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ref_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) { - switch (m->m_ext.ext_type) { - case EXT_PACKET: /* The packet zone is special. */ - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); - return; /* Job done. */ - case EXT_CLUSTER: - uma_zfree(zone_clust, m->m_ext.ext_buf); - break; - case EXT_JUMBOP: - uma_zfree(zone_jumbop, m->m_ext.ext_buf); - break; - case EXT_JUMBO9: - uma_zfree(zone_jumbo9, m->m_ext.ext_buf); - break; - case EXT_JUMBO16: - uma_zfree(zone_jumbo16, m->m_ext.ext_buf); - break; - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - *(m->m_ext.ref_cnt) = 0; - uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, - m->m_ext.ref_cnt)); - /* FALLTHROUGH */ - case EXT_EXTREF: - KASSERT(m->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); - (*(m->m_ext.ext_free))(m->m_ext.ext_arg1, - m->m_ext.ext_arg2); - break; - default: - KASSERT(m->m_ext.ext_type == 0, - ("%s: unknown ext_type", __func__)); - } - } - if (skipmbuf) - return; - - /* - * Free this mbuf back to the mbuf zone with all m_ext - * information purged. + * Release one more reference to this mbuf. If it is the last it + * will be freed. */ - m->m_ext.ext_buf = NULL; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ref_cnt = NULL; - m->m_ext.ext_size = 0; - m->m_ext.ext_type = 0; - m->m_flags &= ~M_EXT; - uma_zfree(zone_mbuf, m); + m_free(arg1); } -/* - * Attach the the cluster from *m to *n, set up m_ext in *n - * and bump the refcount of the cluster. - */ -static void -mb_dupcl(struct mbuf *n, struct mbuf *m) +void +m_ext_free_nop(void *arg1, void *arg2) { - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ref_cnt) == 1) - *(m->m_ext.ref_cnt) += 1; - else - atomic_add_int(m->m_ext.ref_cnt, 1); - n->m_ext.ext_buf = m->m_ext.ext_buf; - n->m_ext.ext_free = m->m_ext.ext_free; - n->m_ext.ext_arg1 = m->m_ext.ext_arg1; - n->m_ext.ext_arg2 = m->m_ext.ext_arg2; - n->m_ext.ext_size = m->m_ext.ext_size; - n->m_ext.ref_cnt = m->m_ext.ref_cnt; - n->m_ext.ext_type = m->m_ext.ext_type; - n->m_flags |= M_EXT; + /* Nothing to do. */ } /* @@ -354,11 +272,8 @@ m_sanity(struct mbuf *m0, int sanitize) * unrelated kernel memory before or after us is trashed. * No way to recover from that. */ - a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : - ((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) : - (caddr_t)(&m->m_dat)) ); - b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size : - ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN))); + a = M_START(m); + b = (caddr_t)(a + m->m_size); if ((caddr_t)m->m_data < a) M_SANITY_ACTION("m_data outside mbuf data range left"); if ((caddr_t)m->m_data > b) @@ -529,6 +444,7 @@ m_copym(struct mbuf *m, int off0, int len, int wai struct mbuf *top; int copyhdr = 0; + CTR3(KTR_NET, "m_copym(%p, %d, %d)", m, off0, len); KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); MBUF_CHECKSLEEP(wait); @@ -565,13 +481,16 @@ m_copym(struct mbuf *m, int off0, int len, int wai n->m_pkthdr.len = len; copyhdr = 0; } + /* + * If the copied data will fit in the space of standard + * mbuf prefer to copy rather than reference. + */ n->m_len = min(len, m->m_len - off); - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + off; - mb_dupcl(n, m); + if (n->m_len > n->m_size) { + m_refm(n, m); + n->m_data += off; } else - bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), - (u_int)n->m_len); + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), n->m_len); if (len != M_COPYALL) len -= n->m_len; off = 0; @@ -749,7 +668,9 @@ struct mbuf * m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; + int leading; + CTR1(KTR_NET, "m_copypacket(%p)", m); MBUF_CHECKSLEEP(how); MGET(n, how, m->m_type); top = n; @@ -759,13 +680,10 @@ m_copypacket(struct mbuf *m, int how) if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); - } else { - n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); - } + if (n->m_len > n->m_size) + m_refm(n, m); + else + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), m->m_len); m = m->m_next; while (m) { @@ -777,13 +695,13 @@ m_copypacket(struct mbuf *m, int how) n = n->m_next; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); + leading = M_LEADINGSPACE(m); + if (n->m_len + leading > n->m_size) { + m_refm(n, m); } else { - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + n->m_data = M_START(n) + leading; + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), n->m_len); } - m = m->m_next; } return top; @@ -802,6 +720,7 @@ m_copydata(const struct mbuf *m, int off, int len, { u_int count; + CTR3(KTR_NET, "m_copydata(%p, %d, %d)", m, off, len); KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { @@ -831,8 +750,9 @@ struct mbuf * m_dup(struct mbuf *m, int how) { struct mbuf **p, *top = NULL; - int remain, moff, nsize; + int remain, moff; + CTR1(KTR_NET, "m_dup(%p)", m); MBUF_CHECKSLEEP(how); /* Sanity check */ if (m == NULL) @@ -847,13 +767,10 @@ m_dup(struct mbuf *m, int how) struct mbuf *n; /* Get the next new mbuf */ - if (remain >= MINCLSIZE) { + if (remain >= MINCLSIZE) n = m_getcl(how, m->m_type, 0); - nsize = MCLBYTES; - } else { + else n = m_get(how, m->m_type); - nsize = MLEN; - } if (n == NULL) goto nospace; @@ -862,8 +779,6 @@ m_dup(struct mbuf *m, int how) m_free(n); goto nospace; } - if ((n->m_flags & M_EXT) == 0) - nsize = MHLEN; } n->m_len = 0; @@ -872,8 +787,8 @@ m_dup(struct mbuf *m, int how) p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ - while (n->m_len < nsize && m != NULL) { - int chunk = min(nsize - n->m_len, m->m_len - moff); + while (n->m_len < n->m_size && m != NULL) { + int chunk = min(n->m_size - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; @@ -905,11 +820,13 @@ nospace: void m_cat(struct mbuf *m, struct mbuf *n) { + CTR2(KTR_NET, "m_cat(%p, %p)", m, n); + while (m->m_next) m = m->m_next; while (n) { if (m->m_flags & M_EXT || - m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + m->m_len + n->m_len > m->m_size) { /* just join the two chains */ m->m_next = n; return; @@ -929,6 +846,7 @@ m_adj(struct mbuf *mp, int req_len) struct mbuf *m; int count; + CTR2(KTR_NET, "m_adj(%p, %d)", mp, req_len); if ((m = mp) == NULL) return; if (len >= 0) { @@ -1011,13 +929,13 @@ m_pullup(struct mbuf *n, int len) int count; int space; + CTR2(KTR_NET, "m_pullup(%p, %d)", n, len); /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ - if ((n->m_flags & M_EXT) == 0 && - n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if ((n->m_flags & M_EXT) == 0 && len < n->m_size && n->m_next) { if (n->m_len >= len) return (n); m = n; @@ -1033,7 +951,8 @@ m_pullup(struct mbuf *n, int len) if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); } - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, @@ -1072,6 +991,7 @@ m_copyup(struct mbuf *n, int len, int dstoff) struct mbuf *m; int count, space; + CTR2(KTR_NET, "m_copyup(%p, %d)", n, len); if (len > (MHLEN - dstoff)) goto bad; MGET(m, M_DONTWAIT, n->m_type); @@ -1081,7 +1001,8 @@ m_copyup(struct mbuf *n, int len, int dstoff) if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); m->m_data += dstoff; - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), @@ -1123,6 +1044,7 @@ m_split(struct mbuf *m0, int len0, int wait) struct mbuf *m, *n; u_int len = len0, remain; + CTR2(KTR_NET, "m_split(%p, %d)", m0, len0); MBUF_CHECKSLEEP(wait); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; @@ -1136,8 +1058,8 @@ m_split(struct mbuf *m0, int len0, int wait) n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; - if (m->m_flags & M_EXT) - goto extpacket; + if (m->m_size >= MCLBYTES) + goto refpacket; if (remain > MHLEN) { /* m can't be the lead packet */ MH_ALIGN(n, 0); @@ -1161,13 +1083,13 @@ m_split(struct mbuf *m0, int len0, int wait) return (NULL); M_ALIGN(n, remain); } -extpacket: - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + len; - mb_dupcl(n, m); - } else { +refpacket: + if (remain > n->m_size) { + m_refm(n, m); + n->m_data += len; + } else bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); - } + n->m_len = remain; m->m_len = len; n->m_next = m->m_next; @@ -1252,6 +1174,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_ca struct mbuf *m = m0, *n; int totlen = 0; + CTR3(KTR_NET, "m_copyback(%p, %d, %d)", m0, off, len); if (m0 == NULL) return; while (off > (mlen = m->m_len)) { @@ -1307,6 +1230,7 @@ m_append(struct mbuf *m0, int len, c_caddr_t cp) struct mbuf *m, *n; int remainder, space; + CTR2(KTR_NET, "m_append(%p, %d)", m0, len); for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; @@ -1577,7 +1501,7 @@ again: n = m->m_next; if (n == NULL) break; - if ((m->m_flags & M_RDONLY) == 0 && + if (M_WRITABLE(m) && n->m_len < M_TRAILINGSPACE(m)) { bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, n->m_len); @@ -1777,12 +1701,7 @@ m_align(struct mbuf *m, int len) { int adjust; - if (m->m_flags & M_EXT) - adjust = m->m_ext.ext_size - len; - else if (m->m_flags & M_PKTHDR) - adjust = MHLEN - len; - else - adjust = MLEN - len; + adjust = m->m_size - len; m->m_data += adjust &~ (sizeof(long)-1); } Index: netinet/sctp_os_bsd.h =================================================================== --- netinet/sctp_os_bsd.h (revision 194477) +++ netinet/sctp_os_bsd.h (working copy) @@ -355,7 +355,7 @@ typedef struct callout sctp_os_timer_t; /* return the base ext data pointer */ #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf) /* return the refcnt of the data pointer */ -#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt) +#define SCTP_BUF_EXTEND_REFCNT(m) (m->m_ref) /* return any buffer related flags, this is * used beyond logging for apple only. */ Index: netinet/ip_options.c =================================================================== --- netinet/ip_options.c (revision 194477) +++ netinet/ip_options.c (working copy) @@ -502,7 +502,7 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, } if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; - if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { + if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) { MGETHDR(n, M_DONTWAIT, MT_DATA); if (n == NULL) { *phlen = 0; Index: modules/Makefile =================================================================== --- modules/Makefile (revision 194477) +++ modules/Makefile (working copy) @@ -62,7 +62,6 @@ SUBDIR= ${_3dfx} \ ${_cryptodev} \ ${_cs} \ ${_ctau} \ - cxgb \ ${_cyclic} \ dc \ dcons \ Index: netgraph/ng_tty.c =================================================================== --- netgraph/ng_tty.c (revision 194477) +++ netgraph/ng_tty.c (working copy) @@ -442,7 +442,7 @@ ngt_rint_bypass(struct tty *tp, const void *buf, s * Odd, we have changed from non-bypass to bypass. It is * unlikely but not impossible, flush the data first. */ - sc->m->m_data = sc->m->m_pktdat; + sc->m->m_data = M_START(sc->m); NG_SEND_DATA_ONLY(error, sc->hook, sc->m); sc->m = NULL; } @@ -498,7 +498,7 @@ ngt_rint(struct tty *tp, char c, int flags) /* Ship off mbuf if it's time */ if (sc->hotchar == -1 || c == sc->hotchar || m->m_len >= MHLEN) { - m->m_data = m->m_pktdat; + m->m_data = M_START(m); sc->m = NULL; NG_SEND_DATA_ONLY(error, sc->hook, m); /* Will queue */ } Index: nfsserver/nfsm_subs.h =================================================================== --- nfsserver/nfsm_subs.h (revision 194477) +++ nfsserver/nfsm_subs.h (working copy) @@ -50,9 +50,7 @@ * First define what the actual subs. return */ -#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ - (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) +#define NFSMSIZ(m) (m)->m_size /* * Now for the macros that do the simple stuff and call the functions Index: dev/ti/if_ti.c =================================================================== --- dev/ti/if_ti.c (revision 194477) +++ dev/ti/if_ti.c (working copy) @@ -1308,12 +1308,11 @@ ti_newbuf_mini(sc, i, m) if (m_new == NULL) { return (ENOBUFS); } - m_new->m_len = m_new->m_pkthdr.len = MHLEN; } else { m_new = m; - m_new->m_data = m_new->m_pktdat; - m_new->m_len = m_new->m_pkthdr.len = MHLEN; + m_new->m_data = M_START(m); } + m_new->m_len = m_new->m_pkthdr.len = m_new->m_size; m_adj(m_new, ETHER_ALIGN); r = &sc->ti_rdata->ti_rx_mini_ring[i]; Index: dev/hatm/if_hatm_intr.c =================================================================== --- dev/hatm/if_hatm_intr.c (revision 194477) +++ dev/hatm/if_hatm_intr.c (working copy) @@ -456,9 +456,8 @@ hatm_rx_buffer(struct hatm_softc *sc, u_int group, c0->hdr.flags &= ~MBUF_CARD; if (m != NULL) { - m->m_ext.ref_cnt = &c0->hdr.ref_cnt; MEXTADD(m, (void *)c0, MBUF0_SIZE, - hatm_mbuf0_free, c0, sc, M_PKTHDR, EXT_EXTREF); + hatm_mbuf0_free, c0, sc, M_PKTHDR, EXT_NET_DRV); m->m_data += MBUF0_OFFSET; } else hatm_mbuf0_free(c0, sc); @@ -480,9 +479,8 @@ hatm_rx_buffer(struct hatm_softc *sc, u_int group, c1->hdr.flags &= ~MBUF_CARD; if (m != NULL) { - m->m_ext.ref_cnt = &c1->hdr.ref_cnt; MEXTADD(m, (void *)c1, MBUF1_SIZE, - hatm_mbuf1_free, c1, sc, M_PKTHDR, EXT_EXTREF); + hatm_mbuf1_free, c1, sc, M_PKTHDR, EXT_NET_DRV); m->m_data += MBUF1_OFFSET; } else hatm_mbuf1_free(c1, sc); Index: dev/hatm/if_hatmvar.h =================================================================== --- dev/hatm/if_hatmvar.h (revision 194477) +++ dev/hatm/if_hatmvar.h (working copy) @@ -286,7 +286,6 @@ struct mbuf_chunk_hdr { uint16_t pageno; uint8_t chunkno; uint8_t flags; - u_int ref_cnt; }; #define MBUF_CARD 0x01 /* buffer is on card */ #define MBUF_USED 0x02 /* buffer is somewhere in the system */ Index: dev/cxgb/cxgb_sge.c =================================================================== --- dev/cxgb/cxgb_sge.c (revision 194477) +++ dev/cxgb/cxgb_sge.c (working copy) @@ -515,7 +515,7 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) struct refill_fl_cb_arg cb_arg; caddr_t cl; int err, count = 0; - int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + int header_size = sizeof(struct mbuf) + sizeof(uint32_t); cb_arg.error = 0; while (n--) { @@ -2360,7 +2360,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int npo q->rspq.size = p->rspq_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); @@ -2535,8 +2535,7 @@ init_cluster_mbuf(caddr_t cl, int flags, int type, struct mbuf *m; int header_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + - sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); bzero(cl, header_size); m = (struct mbuf *)cl; Index: dev/vx/if_vx.c =================================================================== --- dev/vx/if_vx.c (revision 194477) +++ dev/vx/if_vx.c (working copy) @@ -825,8 +825,11 @@ vx_get(struct vx_softc *sc, u_int totlen) /* Convert one of our saved mbuf's. */ sc->vx_next_mb = (sc->vx_next_mb + 1) % MAX_MBS; m->m_data = m->m_pktdat; - m->m_flags = M_PKTHDR; - bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); + m->m_flags |= M_PKTHDR; + if (m_pkthdr_init(m, M_NOWAIT)) { + m_free(m); + return NULL; + } } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; Index: dev/iscsi/initiator/isc_soc.c =================================================================== --- dev/iscsi/initiator/isc_soc.c (revision 194477) +++ dev/iscsi/initiator/isc_soc.c (working copy) @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -67,7 +68,7 @@ __FBSDID("$FreeBSD$"); #ifdef USE_MBUF -static int ou_refcnt = 0; +volatile u_int ou_refcnt; /* | function for freeing external storage for mbuf @@ -77,6 +78,8 @@ ext_free(void *a, void *b) { pduq_t *pq = b; + if (refcount_release(&ou_refcnt) == 0) + return; if(pq->buf != NULL) { debug(3, "ou_refcnt=%d a=%p b=%p", ou_refcnt, a, pq->buf); free(pq->buf, M_ISCSI); @@ -137,11 +140,12 @@ isc_sendPDU(isc_session_t *sp, pduq_t *pq) while(len > 0) { int l; + /* XXX Does not handle allocation failure. */ MGET(md, M_TRYWAIT, MT_DATA); - md->m_ext.ref_cnt = &ou_refcnt; l = min(MCLBYTES, len); debug(5, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l); - MEXTADD(md, pp->ds + off, l, ext_free, pp->ds + off, pq, 0, EXT_EXTREF); + MEXTADD(md, pp->ds + off, l, ext_free, pp->ds + off, pq, 0, EXT_NET_DRV); + refcount_acquire(&ou_refcnt); md->m_len = l; md->m_next = NULL; mh->m_pkthdr.len += l; Index: dev/xen/netback/netback.c =================================================================== --- dev/xen/netback/netback.c (revision 194477) +++ dev/xen/netback/netback.c (working copy) @@ -873,8 +873,7 @@ netif_rx(netif_t *netif) pkts_dequeued++; /* Check if we need to copy the data */ - if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || - (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { + if (M_WRITABLE(m) == 0 || m->m_next != NULL) { struct mbuf *n; DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", Index: vm/uma_int.h =================================================================== --- vm/uma_int.h (revision 194477) +++ vm/uma_int.h (working copy) @@ -214,7 +214,6 @@ struct uma_keg { struct vm_object *uk_obj; /* Zone specific object */ vm_offset_t uk_kva; /* Base kva for zones with objs */ - uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ u_int16_t uk_pgoff; /* Offset to uma_slab struct */ u_int16_t uk_ppera; /* pages per allocation from backend */ @@ -223,10 +222,8 @@ struct uma_keg { }; typedef struct uma_keg * uma_keg_t; -/* Page management structure */ - -/* Sorry for the union, but space efficiency is important */ -struct uma_slab_head { +/* The slab/page management structure. */ +struct uma_slab { uma_keg_t us_keg; /* Keg we live in */ union { LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ @@ -237,51 +234,23 @@ typedef struct uma_keg * uma_keg_t; u_int8_t us_flags; /* Page flags see uma.h */ u_int8_t us_freecount; /* How many are free? */ u_int8_t us_firstfree; /* First free item index */ -}; - -/* The standard slab structure */ -struct uma_slab { - struct uma_slab_head us_head; /* slab header data */ struct { u_int8_t us_item; - } us_freelist[1]; /* actual number bigger */ + } us_freelist[0]; /* actual number bigger */ }; -/* - * The slab structure for UMA_ZONE_REFCNT zones for whose items we - * maintain reference counters in the slab for. - */ -struct uma_slab_refcnt { - struct uma_slab_head us_head; /* slab header data */ - struct { - u_int8_t us_item; - u_int32_t us_refcnt; - } us_freelist[1]; /* actual number bigger */ -}; +#define us_link us_type._us_link +#define us_size us_type._us_size -#define us_keg us_head.us_keg -#define us_link us_head.us_type._us_link -#define us_size us_head.us_type._us_size -#define us_hlink us_head.us_hlink -#define us_data us_head.us_data -#define us_flags us_head.us_flags -#define us_freecount us_head.us_freecount -#define us_firstfree us_head.us_firstfree typedef struct uma_slab * uma_slab_t; -typedef struct uma_slab_refcnt * uma_slabrefcnt_t; typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); - /* * These give us the size of one free item reference within our corresponding - * uma_slab structures, so that our calculations during zone setup are correct - * regardless of what the compiler decides to do with padding the structure - * arrays within uma_slab. + * uma_slab structures. */ -#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head)) -#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \ - sizeof(struct uma_slab_head)) +#define UMA_FRITM_SZ 1 struct uma_klink { LIST_ENTRY(uma_klink) kl_link; Index: vm/uma_dbg.c =================================================================== --- vm/uma_dbg.c (revision 194477) +++ vm/uma_dbg.c (working copy) @@ -221,7 +221,6 @@ void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -234,14 +233,8 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, vo freei = ((unsigned long)item - (unsigned long)slab->us_data) / keg->uk_rsize; + slab->us_freelist[freei].us_item = 255; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = 255; - } else { - slab->us_freelist[freei].us_item = 255; - } - return; } @@ -255,7 +248,6 @@ void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -280,34 +272,17 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, voi (freei * keg->uk_rsize) + slab->us_data); } - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - if (slabref->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slabref->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slabref->us_freelist[freei].us_item = 0; - } else { - if (slab->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slab->us_freelist[freei].us_item = 0; + if (slab->us_freelist[freei].us_item != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slab->us_freelist[freei].us_item); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); } + + /* + * When this is actually linked into the slab this will change. + * Until then the count of valid slabs will make sure we don't + * accidentally follow this and assume it's a valid index. + */ + slab->us_freelist[freei].us_item = 0; } Index: vm/uma.h =================================================================== --- vm/uma.h (revision 194477) +++ vm/uma.h (working copy) @@ -239,7 +239,6 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t ma * information in the vm_page. */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ -#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ #define UMA_ZONE_CACHESPREAD 0x1000 /* * Spread memory start locations across @@ -255,8 +254,7 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t ma * physical parameters of the request and may not be provided by the consumer. */ #define UMA_ZONE_INHERIT \ - (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \ - UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB) + (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | UMA_ZONE_VTOSLAB) /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -537,21 +535,6 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free void uma_prealloc(uma_zone_t zone, int itemcnt); /* - * Used to lookup the reference counter allocated for an item - * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, - * reference counters are allocated for items and stored in - * the underlying slab header. - * - * Arguments: - * zone The UMA_ZONE_REFCNT zone to which the item belongs. - * item The address of the item for which we want a refcnt. - * - * Returns: - * A pointer to a u_int32_t reference counter. - */ -u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item); - -/* * Used to determine if a fixed-size zone is exhausted. * * Arguments: Index: vm/vm_radix_tree.c =================================================================== Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 194477) +++ vm/uma_core.c (working copy) @@ -103,7 +103,6 @@ static uma_zone_t zones = &masterzone_z; /* This is the zone from which all of uma_slab_t's are allocated. */ static uma_zone_t slabzone; -static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ /* * The initial hash tables come out of this zone so they can be allocated @@ -139,7 +138,6 @@ static int booted = 0; /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ static u_int uma_max_ipers; -static u_int uma_max_ipers_ref; /* * This is the handle used to schedule events that need to happen @@ -742,7 +740,7 @@ finished: obj); } if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); #ifdef UMA_DEBUG printf("%s: Returning %d bytes.\n", @@ -806,7 +804,6 @@ zone_drain(uma_zone_t zone) static uma_slab_t keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) { - uma_slabrefcnt_t slabref; uma_alloc allocf; uma_slab_t slab; u_int8_t *mem; @@ -823,7 +820,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int KEG_UNLOCK(keg); if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); + slab = zone_alloc_item(slabzone, NULL, wait); if (slab == NULL) { KEG_LOCK(keg); return NULL; @@ -846,7 +843,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); KEG_LOCK(keg); return (NULL); @@ -866,16 +863,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int slab->us_firstfree = 0; slab->us_flags = flags; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) { - slabref->us_freelist[i].us_refcnt = 0; - slabref->us_freelist[i].us_item = i+1; - } - } else { - for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; - } + for (i = 0; i < keg->uk_ipers; i++) + slab->us_freelist[i].us_item = i+1; if (keg->uk_init != NULL) { for (i = 0; i < keg->uk_ipers; i++) @@ -903,7 +892,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int (i * PAGE_SIZE), obj); } if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); @@ -1109,13 +1098,8 @@ keg_small_init(uma_keg_t keg) keg->uk_rsize = rsize; keg->uk_ppera = 1; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ - shsize = sizeof(struct uma_slab_refcnt); - } else { - rsize += UMA_FRITM_SZ; /* Account for linkage */ - shsize = sizeof(struct uma_slab); - } + rsize += UMA_FRITM_SZ; /* Account for linkage */ + shsize = sizeof(struct uma_slab); keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0")); @@ -1246,7 +1230,6 @@ keg_ctor(void *mem, int size, void *udata, int fla keg->uk_allocf = page_alloc; keg->uk_freef = page_free; keg->uk_recurse = 0; - keg->uk_slabzone = NULL; /* * The master zone is passed to us at keg-creation time. @@ -1260,7 +1243,7 @@ keg_ctor(void *mem, int size, void *udata, int fla if (arg->flags & UMA_ZONE_ZINIT) keg->uk_init = zero_init; - if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC) + if (arg->flags & UMA_ZONE_MALLOC) keg->uk_flags |= UMA_ZONE_VTOSLAB; /* @@ -1269,31 +1252,14 @@ keg_ctor(void *mem, int size, void *udata, int fla * we don't account for this here then we may end up in * keg_small_init() with a calculated 'ipers' of 0. */ - if (keg->uk_flags & UMA_ZONE_REFCNT) { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITMREF_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) - keg_large_init(keg); - else - keg_small_init(keg); - } else { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITM_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab))) - keg_large_init(keg); - else - keg_small_init(keg); - } + if (keg->uk_flags & UMA_ZONE_CACHESPREAD) + keg_cachespread_init(keg); + else if ((keg->uk_size+UMA_FRITM_SZ) > + (UMA_SLAB_SIZE - sizeof(struct uma_slab))) + keg_large_init(keg); + else + keg_small_init(keg); - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - if (keg->uk_flags & UMA_ZONE_REFCNT) - keg->uk_slabzone = slabrefzone; - else - keg->uk_slabzone = slabzone; - } - /* * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. @@ -1324,25 +1290,16 @@ keg_ctor(void *mem, int size, void *udata, int fla u_int totsize; /* Size of the slab struct and free list */ - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = sizeof(struct uma_slab_refcnt) + - keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = sizeof(struct uma_slab) + - keg->uk_ipers * UMA_FRITM_SZ; + totsize = sizeof(struct uma_slab) + + keg->uk_ipers * UMA_FRITM_SZ; if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); keg->uk_pgoff = UMA_SLAB_SIZE - totsize; + totsize = keg->uk_pgoff + sizeof(struct uma_slab) + + keg->uk_ipers * UMA_FRITM_SZ; - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) - + keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = keg->uk_pgoff + sizeof(struct uma_slab) - + keg->uk_ipers * UMA_FRITM_SZ; - /* * The only way the following is possible is if with our * UMA_ALIGN_PTR adjustments we are now bigger than @@ -1635,26 +1592,11 @@ uma_startup(void *bootmem, int boot_pages) objsize--; uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / - (objsize + UMA_FRITMREF_SZ); - totsize *= (UMA_FRITMREF_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64); + KASSERT(uma_max_ipers <= 255, + ("uma_startup: calculated uma_max_ipers value too large!")); - KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), - ("uma_startup: calculated uma_max_ipers values too large!")); - #ifdef UMA_DEBUG printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); - printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", - uma_max_ipers_ref); #endif /* "manually" create the initial zone */ @@ -1717,18 +1659,6 @@ uma_startup(void *bootmem, int boot_pages) NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); - /* - * We also create a zone for the bigger slabs with reference - * counts in them, to accomodate UMA_ZONE_REFCNT zones. - */ - slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; - slabsize += sizeof(struct uma_slab_refcnt); - slabrefzone = uma_zcreate("UMA RCntSlabs", - slabsize, - NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, - UMA_ZFLAG_INTERNAL); - hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, @@ -1891,14 +1821,6 @@ uma_zsecond_add(uma_zone_t zone, uma_zone_t master goto out; } /* - * Both must either be refcnt, or not be refcnt. - */ - if ((zone->uz_flags & UMA_ZONE_REFCNT) != - (master->uz_flags & UMA_ZONE_REFCNT)) { - error = EINVAL; - goto out; - } - /* * The underlying object must be the same size. rsize * may be different. */ @@ -2299,7 +2221,6 @@ static void * slab_alloc_item(uma_zone_t zone, uma_slab_t slab) { uma_keg_t keg; - uma_slabrefcnt_t slabref; void *item; u_int8_t freei; @@ -2307,12 +2228,7 @@ slab_alloc_item(uma_zone_t zone, uma_slab_t slab) mtx_assert(&keg->uk_lock, MA_OWNED); freei = slab->us_firstfree; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slab->us_firstfree = slabref->us_freelist[freei].us_item; - } else { - slab->us_firstfree = slab->us_freelist[freei].us_item; - } + slab->us_firstfree = slab->us_freelist[freei].us_item; item = slab->us_data + (keg->uk_rsize * freei); slab->us_freecount--; @@ -2690,7 +2606,6 @@ zone_free_item(uma_zone_t zone, void *item, void * enum zfreeskip skip, int flags) { uma_slab_t slab; - uma_slabrefcnt_t slabref; uma_keg_t keg; u_int8_t *mem; u_int8_t freei; @@ -2747,12 +2662,7 @@ zone_free_item(uma_zone_t zone, void *item, void * uma_dbg_free(zone, slab, item); #endif - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = slab->us_firstfree; - } else { - slab->us_freelist[freei].us_item = slab->us_firstfree; - } + slab->us_freelist[freei].us_item = slab->us_firstfree; slab->us_firstfree = freei; slab->us_freecount++; @@ -2934,26 +2844,6 @@ uma_prealloc(uma_zone_t zone, int items) } /* See uma.h */ -u_int32_t * -uma_find_refcnt(uma_zone_t zone, void *item) -{ - uma_slabrefcnt_t slabref; - uma_keg_t keg; - u_int32_t *refcnt; - int idx; - - slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & - (~UMA_SLAB_MASK)); - keg = slabref->us_keg; - KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, - ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); - idx = ((unsigned long)item - (unsigned long)slabref->us_data) - / keg->uk_rsize; - refcnt = &slabref->us_freelist[idx].us_refcnt; - return refcnt; -} - -/* See uma.h */ void uma_reclaim(void) { @@ -2968,7 +2858,6 @@ uma_reclaim(void) * zones are drained. We have to do the same for buckets. */ zone_drain(slabzone); - zone_drain(slabrefzone); bucket_zone_drain(); } Index: vm/vm_radix_tree.h =================================================================== Index: net/if_gre.c =================================================================== --- net/if_gre.c (revision 194477) +++ net/if_gre.c (working copy) @@ -331,7 +331,7 @@ gre_output(struct ifnet *ifp, struct mbuf *m, stru mob_h.proto = htons(mob_h.proto); mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); - if ((m->m_data - msiz) < m->m_pktdat) { + if (M_LEADINGSPACE(m) < msiz) { /* need new mbuf */ MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 == NULL) { Index: netipsec/key_debug.c =================================================================== --- netipsec/key_debug.c (revision 194477) +++ netipsec/key_debug.c (working copy) @@ -663,9 +663,9 @@ kdebug_mbufhdr(m) if (m->m_flags & M_EXT) { printf(" m_ext{ ext_buf:%p ext_free:%p " - "ext_size:%u ref_cnt:%p }\n", + "ext_size:%u }\n", m->m_ext.ext_buf, m->m_ext.ext_free, - m->m_ext.ext_size, m->m_ext.ref_cnt); + m->m_ext.ext_size); } return; Index: sys/mbuf.h =================================================================== --- sys/mbuf.h (revision 194477) +++ sys/mbuf.h (working copy) @@ -53,8 +53,10 @@ * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. */ -#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ -#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +#define MHSIZE offsetof(struct mbuf, M_dat.M_databuf) +#define MPKTHSIZE offsetof(struct mbuf, M_dat.MH.MH_dat.MH_databuf) +#define MLEN (MSIZE - MHSIZE) /* normal data len */ +#define MHLEN (MSIZE - MPKTHSIZE) /* data len w/pkthdr */ #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ @@ -67,36 +69,9 @@ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) -/* - * Argument structure passed to UMA routines during mbuf and packet - * allocations. - */ -struct mb_args { - int flags; /* Flags for mbuf being allocated */ - short type; /* Type of mbuf being allocated */ -}; #endif /* _KERNEL */ -#if defined(__LP64__) -#define M_HDR_PAD 6 -#else -#define M_HDR_PAD 2 -#endif - /* - * Header present at the beginning of every mbuf. - */ -struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - caddr_t mh_data; /* location of data */ - int mh_len; /* amount of data in this mbuf */ - int mh_flags; /* flags; see below */ - short mh_type; /* type of data in this mbuf */ - uint8_t pad[M_HDR_PAD];/* word align */ -}; - -/* * Packet tag structure (see below for details). */ struct m_tag { @@ -111,6 +86,7 @@ struct m_tag { * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. */ struct pkthdr { + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ struct ifnet *rcvif; /* rcv interface */ /* variables for ip and tcp reassembly */ void *header; /* pointer to packet header */ @@ -126,7 +102,9 @@ struct pkthdr { u_int16_t vt_vtag; /* Ethernet 802.1p+q vlan tag */ u_int16_t vt_nrecs; /* # of IGMPv3 records in this chain */ } PH_vt; - SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ +#if defined(__LP64__) + uint32_t pad; +#endif }; #define ether_vtag PH_vt.vt_vtag @@ -134,15 +112,17 @@ struct pkthdr { * Description of external storage mapped into mbuf; valid only if M_EXT is * set. */ -struct m_ext { - caddr_t ext_buf; /* start of buffer */ +struct mb_ext { + caddr_t ext_buf; /* start of buffer */ + uint16_t ext_size; /* size of buffer, for ext_free */ + uint16_t ext_type; /* type of external storage */ +#if defined(__LP64__) + uint32_t ext_pad; +#endif void (*ext_free) /* free routine if not the usual */ (void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ - u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ }; /* @@ -150,28 +130,33 @@ struct pkthdr { * purposes. */ struct mbuf { - struct m_hdr m_hdr; + struct mbuf *m_next; /* next buffer in chain */ + struct mbuf *m_nextpkt; /* next chain in queue/record */ + void *m_zone; /* Zone allocated from. */ + caddr_t m_data; /* location of valid data */ + volatile int m_ref; /* Reference count. */ + int m_len; /* amount of data in this mbuf */ + int m_flags; /* flags; see below */ +#if defined(__LP64__) + uint32_t pad; +#endif + short m_type; /* type of data in this mbuf */ + u_short m_size; /* Actual size of buffer. */ union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { - struct m_ext MH_ext; /* M_EXT set */ - char MH_databuf[MHLEN]; + struct mb_ext MH_ext; /* M_EXT set */ + char MH_databuf[0]; } MH_dat; } MH; - char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + char M_databuf[0]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; -#define m_next m_hdr.mh_next -#define m_len m_hdr.mh_len -#define m_data m_hdr.mh_data -#define m_type m_hdr.mh_type -#define m_flags m_hdr.mh_flags -#define m_nextpkt m_hdr.mh_nextpkt #define m_act m_nextpkt #define m_pkthdr M_dat.MH.MH_pkthdr #define m_ext M_dat.MH.MH_dat.MH_ext -#define m_pktdat M_dat.MH.MH_dat.MH_databuf +#define m_pktdat M_dat.MH.MH_dat.MH_databuf #define m_dat M_dat.M_databuf /* @@ -234,7 +219,6 @@ struct mbuf { #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ -#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ /* * Flags indicating hw checksum support and sw checksum requirements. This @@ -325,15 +309,15 @@ struct mbstat { * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ -#define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" -#define MBUF_PACKET_MEM_NAME "mbuf_packet" -#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" +#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" +#define MBUF_MEM_NAME "mbuf" +#define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_TAG_MEM_NAME "mbuf_tag" -#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" + #ifdef _KERNEL #ifdef WITNESS @@ -352,36 +336,46 @@ struct mbstat { * The rest of it is defined in kern/kern_mbuf.c */ -extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; -extern uma_zone_t zone_pack; -extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_ext; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; -extern uma_zone_t zone_ext_refcnt; +extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_pack; -static __inline struct mbuf *m_getcl(int how, short type, int flags); +static __inline struct mbuf *m_alloc(uma_zone_t zone, int size, int how, + short type, int flags); +static __inline void m_extadd(struct mbuf *, caddr_t, u_int, + void (*)(void *, void *), void *, void *, + int, int); +void m_ext_free_zone(void *arg1, void *arg2); +void m_ext_free_mbuf(void *arg1, void *arg2); +void m_ext_free_nop(void *arg1, void *arg2); +static __inline struct mbuf *m_free(struct mbuf *m); static __inline struct mbuf *m_get(int how, short type); static __inline struct mbuf *m_gethdr(int how, short type); -static __inline struct mbuf *m_getjcl(int how, short type, int flags, - int size); +struct mbuf *_m_getjcl(int how, short type, int flags, + int size, uma_zone_t zone, int exttype); static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ -static __inline struct mbuf *m_free(struct mbuf *m); +static __inline int m_init(struct mbuf *m, uma_zone_t zone, + int size, int how, short type, int flags); static __inline void m_clget(struct mbuf *m, int how); -static __inline void *m_cljget(struct mbuf *m, int how, int size); +void *_m_cljget(struct mbuf *m, int how, int size, + uma_zone_t zone, int exttype); static __inline void m_chtype(struct mbuf *m, short new_type); -void mb_free_ext(struct mbuf *); static __inline struct mbuf *m_last(struct mbuf *m); +int m_pkthdr_init(struct mbuf *m, int how); +/* + * Determine the type of cluster to allocate for an ext mbuf. + */ static __inline int m_gettype(int size) { int type; switch (size) { - case MSIZE: - type = EXT_MBUF; - break; case MCLBYTES: type = EXT_CLUSTER; break; @@ -397,21 +391,21 @@ m_gettype(int size) type = EXT_JUMBO16; break; default: - panic("%s: m_getjcl: invalid cluster size", __func__); + panic("%s: m_gettype: invalid cluster size", __func__); } return (type); } +/* + * Determine the zone to use when allocating an ext mbuf. + */ static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { - case MSIZE: - zone = zone_mbuf; - break; case MCLBYTES: zone = zone_clust; break; @@ -427,20 +421,105 @@ m_getzone(int size) zone = zone_jumbo16; break; default: - panic("%s: m_getjcl: invalid cluster type", __func__); + panic("%s: m_getzone: invalid cluster type", __func__); } return (zone); } +/* + * Allocate an mbuf from the provided zone and initialize the size header + * area if requested. Delayed initialization may be performed by calling + * m_init() later with the required arguments. + * + * Returns NULL on failure and an mbuf on success. + */ static __inline struct mbuf * +m_alloc(uma_zone_t zone, int size, int how, short type, int flags) +{ + struct mbuf *m; + + m = uma_zalloc(zone, how); + if (m == NULL) + return (NULL); + if (type == MT_NOINIT) + return (m); + if (m_init(m, zone, size, how, type, flags)) { + uma_zfree(zone, m); + return (NULL); + } + return (m); +} + +/* + * Configure a provided mbuf to refer to the provided external storage + * buffer and setup a reference count for said buffer. + * + * Arguments: + * mb The existing mbuf to which to attach the provided buffer. + * buf The address of the provided external storage buffer. + * size The size of the provided buffer. + * freef A pointer to a routine that is responsible for freeing the + * provided external storage buffer. + * arg{1,2} Pointers to arguments to be passed to the provided freef + * routine (may be NULL). + * flags Any other flags to be passed to the provided mbuf. + * type The type that the external storage buffer should be + * labeled with. + * + * Returns: + * Nothing. + */ +static __inline void +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type) +{ + mb->m_flags |= (M_EXT | flags); + mb->m_data = buf; + mb->m_size = size; + mb->m_ext.ext_buf = buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_arg1 = arg1; + mb->m_ext.ext_arg2 = arg2; + mb->m_ext.ext_type = type; +} + +/* + * Initialize an mbuf with linear storage. + * + * Inline because the consumer text overhead will be roughly the same to + * initialize or call a function with this many parameters and M_PKTHDR + * should go away with constant propagation for !MGETHDR. + */ +static __inline int +m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type, + int flags) +{ + int error; + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_zone = zone; + m->m_data = m->m_dat; + m->m_ref = 1; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + m->m_size = size; + if (flags & M_PKTHDR) { + if ((error = m_pkthdr_init(m, how)) != 0) + return (error); + } + + return (0); +} + +static __inline struct mbuf * m_get(int how, short type) { - struct mb_args args; - args.flags = 0; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MLEN, how, type, 0); } /* @@ -450,11 +529,8 @@ static __inline struct mbuf * m_getclr(int how, short type) { struct mbuf *m; - struct mb_args args; - args.flags = 0; - args.type = type; - m = uma_zalloc_arg(zone_mbuf, &args, how); + m = m_alloc(zone_mbuf, MLEN, how, type, 0); if (m != NULL) bzero(m->m_data, MLEN); return (m); @@ -463,91 +539,74 @@ m_getclr(int how, short type) static __inline struct mbuf * m_gethdr(int how, short type) { - struct mb_args args; - args.flags = M_PKTHDR; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MHLEN, how, type, M_PKTHDR); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { - struct mb_args args; + struct mbuf *m; - args.flags = flags; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); + m = m_alloc(zone_pack, MCLBYTES, how, type, flags | M_EXT); + /* Restore the data pointer clobbered by m_init. */ + if (m && type != MT_NOINIT) + m->m_data = m->m_ext.ext_buf; + + return (m); } -/* - * m_getjcl() returns an mbuf with a cluster of the specified size attached. - * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. - * - * XXX: This is rather large, should be real function maybe. - */ static __inline struct mbuf * m_getjcl(int how, short type, int flags, int size) { - struct mb_args args; - struct mbuf *m, *n; - uma_zone_t zone; - args.flags = flags; - args.type = type; + /* + * Rely on constant propagation to resolve zone and type before + * calling the non-inlined function. + */ + return _m_getjcl(how, type, flags, size, m_getzone(size), + m_gettype(size)); +} - m = uma_zalloc_arg(zone_mbuf, &args, how); - if (m == NULL) - return (NULL); +void m_tag_delete_chain(struct mbuf *, struct m_tag *); - zone = m_getzone(size); - n = uma_zalloc_arg(zone, m, how); - if (n == NULL) { - uma_zfree(zone_mbuf, m); - return (NULL); - } - return (m); -} - static __inline void -m_free_fast(struct mbuf *m) +m_free_fast(uma_zone_t zone, struct mbuf *m) { + #ifdef INVARIANTS + KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); if (m->m_flags & M_PKTHDR) KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); #endif - - uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); + + uma_zfree(zone, m); } -static __inline struct mbuf * -m_free(struct mbuf *m) +static __inline void +_m_free(struct mbuf *m) { - struct mbuf *n = m->m_next; + if (m->m_flags & M_PKTHDR && !SLIST_EMPTY(&m->m_pkthdr.tags)) + m_tag_delete_chain(m, NULL); + /* + * Free attached storage if this mbuf is the only reference to it. + */ if (m->m_flags & M_EXT) - mb_free_ext(m); - else if ((m->m_flags & M_NOFREE) == 0) - uma_zfree(zone_mbuf, m); - return (n); + m->m_ext.ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + if ((m->m_flags & M_NOFREE) == 0) + uma_zfree(m->m_zone, m); } -static __inline void -m_clget(struct mbuf *m, int how) +static __inline struct mbuf * +m_free(struct mbuf *m) { + struct mbuf *n = m->m_next; - if (m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - m->m_ext.ext_buf = (char *)NULL; - uma_zalloc_arg(zone_clust, m, how); - /* - * On a cluster allocation failure, drain the packet zone and retry, - * we might be able to loosen a few clusters up on the drain. - */ - if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { - zone_drain(zone_pack); - uma_zalloc_arg(zone_clust, m, how); - } + if (m->m_ref == 1 || atomic_fetchadd_int(&m->m_ref, -1) == 1) + _m_free(m); + + return (n); } /* @@ -560,15 +619,12 @@ static __inline void static __inline void * m_cljget(struct mbuf *m, int how, int size) { - uma_zone_t zone; - if (m && m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - if (m != NULL) - m->m_ext.ext_buf = NULL; - - zone = m_getzone(size); - return (uma_zalloc_arg(zone, m, how)); + /* + * Rely on constant propagation to resolve zone and type before + * calling the non-inlined function. + */ + return _m_cljget(m, how, size, m_getzone(size), m_gettype(size)); } static __inline void @@ -600,14 +656,14 @@ m_cljset(struct mbuf *m, void *cl, int type) panic("unknown cluster type"); break; } + m_extadd(m, cl, size, m_ext_free_zone, zone, cl, 0, type); +} - m->m_data = m->m_ext.ext_buf = cl; - m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = uma_find_refcnt(zone, cl); - m->m_flags |= M_EXT; +static __inline void +m_clget(struct mbuf *m, int how) +{ + m_cljget(m, how, MCLBYTES); } static __inline void @@ -644,9 +700,7 @@ m_last(struct mbuf *m) * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ -#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ - (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (m)->m_ref == 1) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ @@ -666,25 +720,14 @@ m_last(struct mbuf *m) * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an * object of the specified size at the end of the mbuf, longword aligned. */ -#define M_ALIGN(m, len) do { \ - KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \ - ("%s: M_ALIGN not normal mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_dat, \ - ("%s: M_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define M_ALIGN(m, len) \ + (m)->m_data += ((m)->m_size - (len)) & ~(sizeof(long) - 1); /* * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by * M_DUP/MOVE_PKTHDR. */ -#define MH_ALIGN(m, len) do { \ - KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \ - ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_pktdat, \ - ("%s: MH_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define MH_ALIGN(m, len) M_ALIGN(m, len) /* * Compute the amount of space available before the current start of data in @@ -693,11 +736,7 @@ m_last(struct mbuf *m) * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. */ -#define M_LEADINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ - (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ - (m)->m_data - (m)->m_dat) +#define M_LEADINGSPACE(m) (M_WRITABLE(m) ? (m)->m_data - M_START(m) : 0) /* * Compute the amount of space available after the end of data in an mbuf. @@ -706,11 +745,13 @@ m_last(struct mbuf *m) * of checking writability of the mbuf data area rests solely with the caller. */ #define M_TRAILINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ - - ((m)->m_data + (m)->m_len) : 0) : \ - &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + (M_WRITABLE(m) ? \ + (M_START(m) + (m)->m_size) - ((m)->m_data + (m)->m_len) : 0) +#define M_START(m) \ + ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \ + (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat) + /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the @@ -760,8 +801,6 @@ int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); -void m_extadd(struct mbuf *, caddr_t, u_int, - void (*)(void *, void *), void *, void *, int, int); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); @@ -875,7 +914,6 @@ struct mbuf *m_unshare(struct mbuf *, int how); /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); -void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); Index: sys/sockbuf.h =================================================================== --- sys/sockbuf.h (revision 194477) +++ sys/sockbuf.h (working copy) @@ -173,12 +173,10 @@ void sbunlock(struct sockbuf *sb); (sb)->sb_cc += (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl += (m)->m_len; \ - (sb)->sb_mbcnt += MSIZE; \ (sb)->sb_mcnt += 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt += 1; \ - } \ + (sb)->sb_mbcnt += m->m_size + sizeof(struct mbuf); \ } /* adjust counters in sb reflecting freeing of m */ @@ -186,12 +184,10 @@ void sbunlock(struct sockbuf *sb); (sb)->sb_cc -= (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl -= (m)->m_len; \ - (sb)->sb_mbcnt -= MSIZE; \ (sb)->sb_mcnt -= 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt -= 1; \ - } \ + (sb)->sb_mbcnt -= m->m_size + sizeof(struct mbuf); \ if ((sb)->sb_sndptr == (m)) { \ (sb)->sb_sndptr = NULL; \ (sb)->sb_sndptroff = 0; \