Index: nfsclient/nfsm_subs.h =================================================================== --- nfsclient/nfsm_subs.h (revision 187994) +++ nfsclient/nfsm_subs.h (working copy) @@ -59,18 +59,8 @@ struct mbuf *mrest, int mrest_len, struct mbuf **mbp, u_int32_t **xidpp); -#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define NFSMINOFF(m) \ - do { \ - if (M_HASCL(m)) \ - (m)->m_data = (m)->m_ext.ext_buf; \ - else if ((m)->m_flags & M_PKTHDR) \ - (m)->m_data = (m)->m_pktdat; \ - else \ - (m)->m_data = (m)->m_dat; \ - } while (0) -#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ - (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) +#define NFSMINOFF(m) M_START((m)) +#define NFSMSIZ(m) (m)->m_size /* * Now for the macros that do the simple stuff and call the functions Index: kern/uipc_sockbuf.c =================================================================== --- kern/uipc_sockbuf.c (revision 187994) +++ kern/uipc_sockbuf.c (working copy) @@ -552,9 +552,7 @@ n = m->m_nextpkt; for (; m; m = m->m_next) { len += m->m_len; - mbcnt += MSIZE; - if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ - mbcnt += m->m_ext.ext_size; + mbcnt += m->m_size + sizeof(struct mbuf); } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { @@ -764,7 +762,7 @@ continue; } if (n && (n->m_flags & M_EOR) == 0 && - M_WRITABLE(n) && + (n->m_flags & M_RDONLY) == 0 && ((sb->sb_flags & SB_NOCOALESCE) == 0) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && Index: kern/kern_mbuf.c =================================================================== --- kern/kern_mbuf.c (revision 187994) +++ kern/kern_mbuf.c (working copy) @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -210,27 +211,24 @@ /* * Zones from which we allocate. */ -uma_zone_t zone_mbuf; uma_zone_t zone_clust; -uma_zone_t zone_pack; uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; -uma_zone_t zone_ext_refcnt; +uma_zone_t zone_mbuf; +uma_zone_t zone_pack; /* * Local prototypes. */ -static int mb_ctor_mbuf(void *, int, void *, int); -static int mb_ctor_clust(void *, int, void *, int); -static int mb_ctor_pack(void *, int, void *, int); -static void mb_dtor_mbuf(void *, int, void *); -static void mb_dtor_clust(void *, int, void *); -static void mb_dtor_pack(void *, int, void *); -static int mb_zinit_pack(void *, int, int); -static void mb_zfini_pack(void *, int); - +#ifdef INVARIANTS +static int mb_ctor_pack(void *mem, int size, void *arg, int how); +#endif +static void mb_dtor_pack(void *mem, int size, void *arg); static void mb_reclaim(void *); +static int mb_zinit_pack(void *mem, int size, int how); +static void mb_zfini_pack(void *mem, int size); + static void mbuf_init(void *); static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); static void mbuf_jumbo_free(void *, int, u_int8_t); @@ -252,73 +250,66 @@ * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, - mb_ctor_mbuf, mb_dtor_mbuf, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); if (nmbclusters > 0) uma_zone_set_max(zone_clust, nmbclusters); - zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, + zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, +#ifdef INVARIANTS + mb_ctor_pack, +#else + NULL, +#endif mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); /* Make jumbo frame zone too. Page size, 9k and 16k. */ zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbop > 0) uma_zone_set_max(zone_jumbop, nmbjumbop); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo9 > 0) uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, - mb_ctor_clust, mb_dtor_clust, #ifdef INVARIANTS - trash_init, trash_fini, + trash_ctor, trash_dtor, trash_init, trash_fini, #else - NULL, NULL, + NULL, NULL, NULL, NULL, #endif - UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + UMA_ALIGN_PTR, 0); if (nmbjumbo16 > 0) uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); uma_zone_set_freef(zone_jumbo16, mbuf_jumbo_free); - zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), - NULL, NULL, - NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_ZINIT); - - /* uma_prealloc() goes here... */ - /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA @@ -373,86 +364,18 @@ contigfree(mem, size, M_JUMBOFRAME); } -/* - * Constructor for Mbuf master zone. - * - * The 'arg' pointer points to a mb_args structure which - * contains call-specific information required to support the - * mbuf allocation API. See mbuf.h. - */ +#ifdef INVARIANTS static int -mb_ctor_mbuf(void *mem, int size, void *arg, int how) +mb_ctor_pack(void *mem, int size, void *arg, int how) { struct mbuf *m; - struct mb_args *args; -#ifdef MAC - int error; -#endif - int flags; - short type; -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; + trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); - /* - * The mbuf is initialized later. The caller has the - * responsibility to set up any MAC labels too. - */ - if (type == MT_NOINIT) - return (0); - - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_len = 0; - m->m_flags = flags; - m->m_type = type; - if (flags & M_PKTHDR) { - m->m_data = m->m_pktdat; - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.header = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - m->m_pkthdr.flowid = 0; - SLIST_INIT(&m->m_pkthdr.tags); -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } else - m->m_data = m->m_dat; return (0); } - -/* - * The Mbuf master zone destructor. - */ -static void -mb_dtor_mbuf(void *mem, int size, void *arg) -{ - struct mbuf *m; - unsigned long flags; - - m = (struct mbuf *)mem; - flags = (unsigned long)arg; - - if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); -#ifdef INVARIANTS - trash_dtor(mem, size, arg); #endif -} /* * The Mbuf Packet zone destructor. @@ -463,21 +386,21 @@ struct mbuf *m; m = (struct mbuf *)mem; - if ((m->m_flags & M_PKTHDR) != 0) - m_tag_delete_chain(m, NULL); - /* Make sure we've got a clean cluster back. */ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); - KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); + KASSERT(m->m_ext.ext_free == m_ext_free_nop, + ("%s: ext_free != m_ext_free_nop", __func__)); KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); - KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); - KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); - KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); -#ifdef INVARIANTS + KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", + __func__)); + KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", + __func__)); +#ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif + /* * If there are processes blocked on zone_clust, waiting for pages * to be freed up, * cause them to be woken up by draining the @@ -491,85 +414,6 @@ } /* - * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. - * - * Here the 'arg' pointer points to the Mbuf which we - * are configuring cluster storage for. If 'arg' is - * empty we allocate just the cluster without setting - * the mbuf to it. See mbuf.h. - */ -static int -mb_ctor_clust(void *mem, int size, void *arg, int how) -{ - struct mbuf *m; - u_int *refcnt; - int type; - uma_zone_t zone; - -#ifdef INVARIANTS - trash_ctor(mem, size, arg, how); -#endif - switch (size) { - case MCLBYTES: - type = EXT_CLUSTER; - zone = zone_clust; - break; -#if MJUMPAGESIZE != MCLBYTES - case MJUMPAGESIZE: - type = EXT_JUMBOP; - zone = zone_jumbop; - break; -#endif - case MJUM9BYTES: - type = EXT_JUMBO9; - zone = zone_jumbo9; - break; - case MJUM16BYTES: - type = EXT_JUMBO16; - zone = zone_jumbo16; - break; - default: - panic("unknown cluster size"); - break; - } - - m = (struct mbuf *)arg; - refcnt = uma_find_refcnt(zone, mem); - *refcnt = 1; - if (m != NULL) { - m->m_ext.ext_buf = (caddr_t)mem; - m->m_data = m->m_ext.ext_buf; - m->m_flags |= M_EXT; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = refcnt; - } - - return (0); -} - -/* - * The Mbuf Cluster zone destructor. - */ -static void -mb_dtor_clust(void *mem, int size, void *arg) -{ -#ifdef INVARIANTS - uma_zone_t zone; - - zone = m_getzone(size); - KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, - ("%s: refcnt incorrect %u", __func__, - *(uma_find_refcnt(zone, mem))) ); - - trash_dtor(mem, size, arg); -#endif -} - -/* * The Packet secondary zone's init routine, executed on the * object's transition from mbuf keg slab to zone cache. */ @@ -579,13 +423,16 @@ struct mbuf *m; m = (struct mbuf *)mem; /* m is virgin. */ - if (uma_zalloc_arg(zone_clust, m, how) == NULL || - m->m_ext.ext_buf == NULL) + /* + * Allocate and attach the cluster to the ext. + */ + if ((mem = uma_zalloc(zone_clust, how)) == NULL) return (ENOMEM); - m->m_ext.ext_type = EXT_PACKET; /* Override. */ + m_extadd(m, mem, MCLBYTES, m_ext_free_nop, NULL, NULL, 0, EXT_PACKET); #ifdef INVARIANTS - trash_init(m->m_ext.ext_buf, MCLBYTES, how); + return trash_init(m->m_ext.ext_buf, MCLBYTES, how); #endif + return (0); } @@ -608,57 +455,34 @@ #endif } -/* - * The "packet" keg constructor. - */ -static int -mb_ctor_pack(void *mem, int size, void *arg, int how) +int +m_pkthdr_init(struct mbuf *m, int how) { - struct mbuf *m; - struct mb_args *args; #ifdef MAC int error; #endif - int flags; - short type; - m = (struct mbuf *)mem; - args = (struct mb_args *)arg; - flags = args->flags; - type = args->type; - -#ifdef INVARIANTS - trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); -#endif - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_ext.ext_buf; - m->m_len = 0; - m->m_flags = (flags | M_EXT); - m->m_type = type; - - if (flags & M_PKTHDR) { - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.len = 0; - m->m_pkthdr.header = NULL; - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.tso_segsz = 0; - m->m_pkthdr.ether_vtag = 0; - m->m_pkthdr.flowid = 0; - SLIST_INIT(&m->m_pkthdr.tags); + m->m_data = m->m_pktdat; + SLIST_INIT(&m->m_pkthdr.tags); + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.header = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.flowid = 0; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; #ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); + /* If the label init fails, fail the alloc */ + error = mac_mbuf_init(m, how); + if (error) + return (error); #endif - } - /* m_ext is already initialized. */ return (0); } + /* * This is the protocol drain routine. * @@ -680,3 +504,45 @@ if (pr->pr_drain != NULL) (*pr->pr_drain)(); } + +struct mbuf * +_m_getjcl(int how, short type, int flags, int size, uma_zone_t zone, + int exttype) +{ + struct mbuf *m; + void *mem; + + if (size == MCLBYTES) + return m_getcl(how, type, flags); + /* + * Allocate the memory and header seperate for these sizes. + */ + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + m = m_alloc(zone_mbuf, 0, how, type, flags); + if (m == NULL) { + uma_zfree(zone, mem); + return (NULL); + } + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, flags, exttype); + + return (m); +} + +void * +_m_cljget(struct mbuf *m, int how, int size, uma_zone_t zone, int exttype) +{ + void *mem; + + if (m && m->m_flags & M_EXT) + printf("%s: %p mbuf already has cluster\n", __func__, m); + if (m != NULL) + m->m_ext.ext_buf = NULL; + mem = uma_zalloc(zone, how); + if (mem == NULL) + return (NULL); + if (m) + m_extadd(m, mem, size, m_ext_free_zone, zone, mem, 0, exttype); + return (mem); +} Index: kern/uipc_mbuf2.c =================================================================== --- kern/uipc_mbuf2.c (revision 187994) +++ kern/uipc_mbuf2.c (working copy) @@ -150,14 +150,7 @@ * M_WRITABLE(). For now, we only evaluate once at the beginning and * live with this. */ - /* - * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, - * then we're not "writable," according to this code. - */ - writable = 0; - if ((n->m_flags & M_EXT) == 0 || - (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n))) - writable = 1; + writable = M_WRITABLE(n); /* * the target data is on . Index: kern/uipc_mbuf.c =================================================================== --- kern/uipc_mbuf.c (revision 187994) +++ kern/uipc_mbuf.c (working copy) @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,8 @@ &m_defragrandomfailures, 0, ""); #endif +static void m_refm(struct mbuf *mb, struct mbuf *m); + /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an @@ -128,8 +131,7 @@ } /* Book keeping. */ - len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size : - ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN); + len -= mb->m_size; if (mtail != NULL) mtail->m_next = mb; else @@ -164,147 +166,63 @@ mb = m_free(mb); } -/*- - * Configure a provided mbuf to refer to the provided external storage - * buffer and setup a reference count for said buffer. If the setting - * up of the reference count fails, the M_EXT bit will not be set. If - * successfull, the M_EXT bit is set in the mbuf's flags. - * - * Arguments: - * mb The existing mbuf to which to attach the provided buffer. - * buf The address of the provided external storage buffer. - * size The size of the provided buffer. - * freef A pointer to a routine that is responsible for freeing the - * provided external storage buffer. - * args A pointer to an argument structure (of any type) to be passed - * to the provided freef routine (may be NULL). - * flags Any other flags to be passed to the provided mbuf. - * type The type that the external storage buffer should be - * labeled with. - * - * Returns: - * Nothing. +/* + * Reference the existing storage area of an mbuf. The reference is readonly + * and the referenced data can not be freed until the referencing mbuf is + * freed. */ -void -m_extadd(struct mbuf *mb, caddr_t buf, u_int size, - void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type) +static void +m_refm(struct mbuf *mb, struct mbuf *m) { - KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); - if (type != EXT_EXTREF) - mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT); - if (mb->m_ext.ref_cnt != NULL) { - *(mb->m_ext.ref_cnt) = 1; - mb->m_flags |= (M_EXT | flags); - mb->m_ext.ext_buf = buf; - mb->m_data = mb->m_ext.ext_buf; - mb->m_ext.ext_size = size; - mb->m_ext.ext_free = freef; - mb->m_ext.ext_arg1 = arg1; - mb->m_ext.ext_arg2 = arg2; - mb->m_ext.ext_type = type; - } + if (m->m_ref > 1) + atomic_add_int(&m->m_ref, 1); + else + m->m_ref++; + mb->m_flags |= M_EXT | M_RDONLY; + mb->m_data = m->m_data; + mb->m_size = m->m_len; /* Only existing data is visible. */ + mb->m_ext.ext_buf = m->m_data; + mb->m_ext.ext_size = m->m_len; + mb->m_ext.ext_free = m_ext_free_mbuf; + mb->m_ext.ext_arg1 = m; + mb->m_ext.ext_arg2 = NULL; + mb->m_ext.ext_type = EXT_MBUF; + + CTR3(KTR_NET, "m_refm: %p ref %d buf %p", + mb, mb->m_ref, mb->m_ext.ext_buf); } /* - * Non-directly-exported function to clean up after mbufs with M_EXT - * storage attached to them if the reference count hits 1. + * Free the ext area of a mbuf assuming a uma zone and argument are + * presented. */ void -mb_free_ext(struct mbuf *m) +m_ext_free_zone(void *arg1, void *arg2) { - int skipmbuf; - - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + uma_zfree(arg1, arg2); +} +/* + * Free the ext area of a mbuf assuming it has been acquired with m_refm(). + */ +void +m_ext_free_mbuf(void *arg1, void *arg2) +{ + /* - * check if the header is embedded in the cluster - */ - skipmbuf = (m->m_flags & M_NOFREE); - - /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ref_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) { - switch (m->m_ext.ext_type) { - case EXT_PACKET: /* The packet zone is special. */ - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); - return; /* Job done. */ - case EXT_CLUSTER: - uma_zfree(zone_clust, m->m_ext.ext_buf); - break; - case EXT_JUMBOP: - uma_zfree(zone_jumbop, m->m_ext.ext_buf); - break; - case EXT_JUMBO9: - uma_zfree(zone_jumbo9, m->m_ext.ext_buf); - break; - case EXT_JUMBO16: - uma_zfree(zone_jumbo16, m->m_ext.ext_buf); - break; - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - *(m->m_ext.ref_cnt) = 0; - uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, - m->m_ext.ref_cnt)); - /* FALLTHROUGH */ - case EXT_EXTREF: - KASSERT(m->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); - (*(m->m_ext.ext_free))(m->m_ext.ext_arg1, - m->m_ext.ext_arg2); - break; - default: - KASSERT(m->m_ext.ext_type == 0, - ("%s: unknown ext_type", __func__)); - } - } - if (skipmbuf) - return; - - /* - * Free this mbuf back to the mbuf zone with all m_ext - * information purged. + * Release one more reference to this mbuf. If it is the last it + * will be freed. */ - m->m_ext.ext_buf = NULL; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ref_cnt = NULL; - m->m_ext.ext_size = 0; - m->m_ext.ext_type = 0; - m->m_flags &= ~M_EXT; - uma_zfree(zone_mbuf, m); + m_free(arg1); } -/* - * Attach the the cluster from *m to *n, set up m_ext in *n - * and bump the refcount of the cluster. - */ -static void -mb_dupcl(struct mbuf *n, struct mbuf *m) +void +m_ext_free_nop(void *arg1, void *arg2) { - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ref_cnt) == 1) - *(m->m_ext.ref_cnt) += 1; - else - atomic_add_int(m->m_ext.ref_cnt, 1); - n->m_ext.ext_buf = m->m_ext.ext_buf; - n->m_ext.ext_free = m->m_ext.ext_free; - n->m_ext.ext_arg1 = m->m_ext.ext_arg1; - n->m_ext.ext_arg2 = m->m_ext.ext_arg2; - n->m_ext.ext_size = m->m_ext.ext_size; - n->m_ext.ref_cnt = m->m_ext.ref_cnt; - n->m_ext.ext_type = m->m_ext.ext_type; - n->m_flags |= M_EXT; + /* Nothing to do. */ } /* @@ -357,11 +275,8 @@ * unrelated kernel memory before or after us is trashed. * No way to recover from that. */ - a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : - ((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) : - (caddr_t)(&m->m_dat)) ); - b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size : - ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN))); + a = M_START(m); + b = (caddr_t)(a + m->m_size); if ((caddr_t)m->m_data < a) M_SANITY_ACTION("m_data outside mbuf data range left"); if ((caddr_t)m->m_data > b) @@ -532,6 +447,7 @@ struct mbuf *top; int copyhdr = 0; + CTR3(KTR_NET, "m_copym(%p, %d, %d)", m, off0, len); KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); MBUF_CHECKSLEEP(wait); @@ -568,13 +484,16 @@ n->m_pkthdr.len = len; copyhdr = 0; } + /* + * If the copied data will fit in the space of standard + * mbuf prefer to copy rather than reference. + */ n->m_len = min(len, m->m_len - off); - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + off; - mb_dupcl(n, m); + if (n->m_len > n->m_size) { + m_refm(n, m); + n->m_data += off; } else - bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), - (u_int)n->m_len); + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), n->m_len); if (len != M_COPYALL) len -= n->m_len; off = 0; @@ -752,7 +671,9 @@ m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; + int leading; + CTR1(KTR_NET, "m_copypacket(%p)", m); MBUF_CHECKSLEEP(how); MGET(n, how, m->m_type); top = n; @@ -762,13 +683,10 @@ if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); - } else { - n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); - } + if (n->m_len > n->m_size) + m_refm(n, m); + else + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), m->m_len); m = m->m_next; while (m) { @@ -780,13 +698,13 @@ n = n->m_next; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { - n->m_data = m->m_data; - mb_dupcl(n, m); + leading = M_LEADINGSPACE(m); + if (n->m_len + leading > n->m_size) { + m_refm(n, m); } else { - bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + n->m_data = M_START(n) + leading; + bcopy(mtod(m, caddr_t), mtod(n, caddr_t), n->m_len); } - m = m->m_next; } return top; @@ -805,6 +723,7 @@ { u_int count; + CTR3(KTR_NET, "m_copydata(%p, %d, %d)", m, off, len); KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { @@ -834,8 +753,9 @@ m_dup(struct mbuf *m, int how) { struct mbuf **p, *top = NULL; - int remain, moff, nsize; + int remain, moff; + CTR1(KTR_NET, "m_dup(%p)", m); MBUF_CHECKSLEEP(how); /* Sanity check */ if (m == NULL) @@ -850,13 +770,10 @@ struct mbuf *n; /* Get the next new mbuf */ - if (remain >= MINCLSIZE) { + if (remain >= MINCLSIZE) n = m_getcl(how, m->m_type, 0); - nsize = MCLBYTES; - } else { + else n = m_get(how, m->m_type); - nsize = MLEN; - } if (n == NULL) goto nospace; @@ -865,8 +782,6 @@ m_free(n); goto nospace; } - if ((n->m_flags & M_EXT) == 0) - nsize = MHLEN; } n->m_len = 0; @@ -875,8 +790,8 @@ p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ - while (n->m_len < nsize && m != NULL) { - int chunk = min(nsize - n->m_len, m->m_len - moff); + while (n->m_len < n->m_size && m != NULL) { + int chunk = min(n->m_size - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; @@ -908,11 +823,13 @@ void m_cat(struct mbuf *m, struct mbuf *n) { + CTR2(KTR_NET, "m_cat(%p, %p)", m, n); + while (m->m_next) m = m->m_next; while (n) { if (m->m_flags & M_EXT || - m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + m->m_len + n->m_len > m->m_size) { /* just join the two chains */ m->m_next = n; return; @@ -932,6 +849,7 @@ struct mbuf *m; int count; + CTR2(KTR_NET, "m_adj(%p, %d)", mp, req_len); if ((m = mp) == NULL) return; if (len >= 0) { @@ -1014,13 +932,13 @@ int count; int space; + CTR2(KTR_NET, "m_pullup(%p, %d)", n, len); /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ - if ((n->m_flags & M_EXT) == 0 && - n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if ((n->m_flags & M_EXT) == 0 && len < n->m_size && n->m_next) { if (n->m_len >= len) return (n); m = n; @@ -1036,7 +954,8 @@ if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); } - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, @@ -1075,6 +994,7 @@ struct mbuf *m; int count, space; + CTR2(KTR_NET, "m_copyup(%p, %d)", n, len); if (len > (MHLEN - dstoff)) goto bad; MGET(m, M_DONTWAIT, n->m_type); @@ -1084,7 +1004,8 @@ if (n->m_flags & M_PKTHDR) M_MOVE_PKTHDR(m, n); m->m_data += dstoff; - space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + /* XXX M_TRAILINGSPACE without M_WRITABLE */ + space = (M_START(m) + m->m_size) - (m->m_data + (m)->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), @@ -1126,6 +1047,7 @@ struct mbuf *m, *n; u_int len = len0, remain; + CTR2(KTR_NET, "m_split(%p, %d)", m0, len0); MBUF_CHECKSLEEP(wait); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; @@ -1139,8 +1061,8 @@ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; - if (m->m_flags & M_EXT) - goto extpacket; + if (m->m_size >= MCLBYTES) + goto refpacket; if (remain > MHLEN) { /* m can't be the lead packet */ MH_ALIGN(n, 0); @@ -1164,13 +1086,13 @@ return (NULL); M_ALIGN(n, remain); } -extpacket: - if (m->m_flags & M_EXT) { - n->m_data = m->m_data + len; - mb_dupcl(n, m); - } else { +refpacket: + if (remain > n->m_size) { + m_refm(n, m); + n->m_data += len; + } else bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); - } + n->m_len = remain; m->m_len = len; n->m_next = m->m_next; @@ -1255,6 +1177,7 @@ struct mbuf *m = m0, *n; int totlen = 0; + CTR3(KTR_NET, "m_copyback(%p, %d, %d)", m0, off, len); if (m0 == NULL) return; while (off > (mlen = m->m_len)) { @@ -1310,6 +1233,7 @@ struct mbuf *m, *n; int remainder, space; + CTR2(KTR_NET, "m_append(%p, %d)", m0, len); for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; @@ -1580,7 +1504,7 @@ n = m->m_next; if (n == NULL) break; - if ((m->m_flags & M_RDONLY) == 0 && + if (M_WRITABLE(m) && n->m_len < M_TRAILINGSPACE(m)) { bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, n->m_len); @@ -1780,12 +1704,7 @@ { int adjust; - if (m->m_flags & M_EXT) - adjust = m->m_ext.ext_size - len; - else if (m->m_flags & M_PKTHDR) - adjust = MHLEN - len; - else - adjust = MLEN - len; + adjust = m->m_size - len; m->m_data += adjust &~ (sizeof(long)-1); } Index: netinet/sctp_os_bsd.h =================================================================== --- netinet/sctp_os_bsd.h (revision 187994) +++ netinet/sctp_os_bsd.h (working copy) @@ -357,7 +357,7 @@ /* return the base ext data pointer */ #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf) /* return the refcnt of the data pointer */ -#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt) +#define SCTP_BUF_EXTEND_REFCNT(m) (m->m_ref) /* return any buffer related flags, this is * used beyond logging for apple only. */ Index: netinet/ip_options.c =================================================================== --- netinet/ip_options.c (revision 187994) +++ netinet/ip_options.c (working copy) @@ -503,7 +503,7 @@ } if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; - if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { + if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) { MGETHDR(n, M_DONTWAIT, MT_DATA); if (n == NULL) { *phlen = 0; Index: netgraph/ng_tty.c =================================================================== --- netgraph/ng_tty.c (revision 187994) +++ netgraph/ng_tty.c (working copy) @@ -442,7 +442,7 @@ * Odd, we have changed from non-bypass to bypass. It is * unlikely but not impossible, flush the data first. */ - sc->m->m_data = sc->m->m_pktdat; + sc->m->m_data = M_START(sc->m); NG_SEND_DATA_ONLY(error, sc->hook, sc->m); sc->m = NULL; } @@ -498,7 +498,7 @@ /* Ship off mbuf if it's time */ if (sc->hotchar == -1 || c == sc->hotchar || m->m_len >= MHLEN) { - m->m_data = m->m_pktdat; + m->m_data = M_START(m); sc->m = NULL; NG_SEND_DATA_ONLY(error, sc->hook, m); /* Will queue */ } Index: nfsserver/nfsm_subs.h =================================================================== --- nfsserver/nfsm_subs.h (revision 187994) +++ nfsserver/nfsm_subs.h (working copy) @@ -50,9 +50,7 @@ * First define what the actual subs. return */ -#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ - (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) +#define NFSMSIZ(m) (m)->m_size /* * Now for the macros that do the simple stuff and call the functions Index: dev/ti/if_ti.c =================================================================== --- dev/ti/if_ti.c (revision 187994) +++ dev/ti/if_ti.c (working copy) @@ -1308,12 +1308,11 @@ if (m_new == NULL) { return (ENOBUFS); } - m_new->m_len = m_new->m_pkthdr.len = MHLEN; } else { m_new = m; - m_new->m_data = m_new->m_pktdat; - m_new->m_len = m_new->m_pkthdr.len = MHLEN; + m_new->m_data = M_START(m); } + m_new->m_len = m_new->m_pkthdr.len = m_new->m_size; m_adj(m_new, ETHER_ALIGN); r = &sc->ti_rdata->ti_rx_mini_ring[i]; Index: dev/cxgb/cxgb_sge.c =================================================================== --- dev/cxgb/cxgb_sge.c (revision 187994) +++ dev/cxgb/cxgb_sge.c (working copy) @@ -515,7 +515,7 @@ struct refill_fl_cb_arg cb_arg; caddr_t cl; int err, count = 0; - int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + int header_size = sizeof(struct mbuf) + sizeof(uint32_t); cb_arg.error = 0; while (n--) { @@ -2360,7 +2360,7 @@ q->rspq.size = p->rspq_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); @@ -2535,8 +2535,7 @@ struct mbuf *m; int header_size; - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + - sizeof(struct m_ext_) + sizeof(uint32_t); + header_size = sizeof(struct mbuf) + sizeof(uint32_t); bzero(cl, header_size); m = (struct mbuf *)cl; Index: dev/vx/if_vx.c =================================================================== --- dev/vx/if_vx.c (revision 187994) +++ dev/vx/if_vx.c (working copy) @@ -825,8 +825,11 @@ /* Convert one of our saved mbuf's. */ sc->vx_next_mb = (sc->vx_next_mb + 1) % MAX_MBS; m->m_data = m->m_pktdat; - m->m_flags = M_PKTHDR; - bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); + m->m_flags |= M_PKTHDR; + if (m_pkthdr_init(m, M_NOWAIT)) { + m_free(m); + return NULL; + } } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; Index: dev/xen/netback/netback.c =================================================================== --- dev/xen/netback/netback.c (revision 187994) +++ dev/xen/netback/netback.c (working copy) @@ -863,8 +863,7 @@ pkts_dequeued++; /* Check if we need to copy the data */ - if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || - (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { + if (M_WRITABLE(m) == 0 || m->m_next != NULL) { struct mbuf *n; DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", Index: vm/uma_int.h =================================================================== --- vm/uma_int.h (revision 187994) +++ vm/uma_int.h (working copy) @@ -214,7 +214,6 @@ struct vm_object *uk_obj; /* Zone specific object */ vm_offset_t uk_kva; /* Base kva for zones with objs */ - uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ u_int16_t uk_pgoff; /* Offset to uma_slab struct */ u_int16_t uk_ppera; /* pages per allocation from backend */ @@ -223,10 +222,8 @@ }; typedef struct uma_keg * uma_keg_t; -/* Page management structure */ - -/* Sorry for the union, but space efficiency is important */ -struct uma_slab_head { +/* The slab/page management structure. */ +struct uma_slab { uma_keg_t us_keg; /* Keg we live in */ union { LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ @@ -237,51 +234,23 @@ u_int8_t us_flags; /* Page flags see uma.h */ u_int8_t us_freecount; /* How many are free? */ u_int8_t us_firstfree; /* First free item index */ -}; - -/* The standard slab structure */ -struct uma_slab { - struct uma_slab_head us_head; /* slab header data */ struct { u_int8_t us_item; - } us_freelist[1]; /* actual number bigger */ + } us_freelist[0]; /* actual number bigger */ }; -/* - * The slab structure for UMA_ZONE_REFCNT zones for whose items we - * maintain reference counters in the slab for. - */ -struct uma_slab_refcnt { - struct uma_slab_head us_head; /* slab header data */ - struct { - u_int8_t us_item; - u_int32_t us_refcnt; - } us_freelist[1]; /* actual number bigger */ -}; +#define us_link us_type._us_link +#define us_size us_type._us_size -#define us_keg us_head.us_keg -#define us_link us_head.us_type._us_link -#define us_size us_head.us_type._us_size -#define us_hlink us_head.us_hlink -#define us_data us_head.us_data -#define us_flags us_head.us_flags -#define us_freecount us_head.us_freecount -#define us_firstfree us_head.us_firstfree typedef struct uma_slab * uma_slab_t; -typedef struct uma_slab_refcnt * uma_slabrefcnt_t; typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); - /* * These give us the size of one free item reference within our corresponding - * uma_slab structures, so that our calculations during zone setup are correct - * regardless of what the compiler decides to do with padding the structure - * arrays within uma_slab. + * uma_slab structures. */ -#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head)) -#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \ - sizeof(struct uma_slab_head)) +#define UMA_FRITM_SZ 1 struct uma_klink { LIST_ENTRY(uma_klink) kl_link; Index: vm/uma_dbg.c =================================================================== --- vm/uma_dbg.c (revision 187994) +++ vm/uma_dbg.c (working copy) @@ -221,7 +221,6 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -234,14 +233,8 @@ freei = ((unsigned long)item - (unsigned long)slab->us_data) / keg->uk_rsize; + slab->us_freelist[freei].us_item = 255; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = 255; - } else { - slab->us_freelist[freei].us_item = 255; - } - return; } @@ -255,7 +248,6 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; - uma_slabrefcnt_t slabref; int freei; if (slab == NULL) { @@ -280,34 +272,17 @@ (freei * keg->uk_rsize) + slab->us_data); } - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - if (slabref->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slabref->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slabref->us_freelist[freei].us_item = 0; - } else { - if (slab->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slab->us_freelist[freei].us_item = 0; + if (slab->us_freelist[freei].us_item != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slab->us_freelist[freei].us_item); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); } + + /* + * When this is actually linked into the slab this will change. + * Until then the count of valid slabs will make sure we don't + * accidentally follow this and assume it's a valid index. + */ + slab->us_freelist[freei].us_item = 0; } Index: vm/uma.h =================================================================== --- vm/uma.h (revision 187994) +++ vm/uma.h (working copy) @@ -239,7 +239,6 @@ * information in the vm_page. */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ -#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ #define UMA_ZONE_CACHESPREAD 0x1000 /* * Spread memory start locations across @@ -255,8 +254,7 @@ * physical parameters of the request and may not be provided by the consumer. */ #define UMA_ZONE_INHERIT \ - (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \ - UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB) + (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | UMA_ZONE_VTOSLAB) /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -537,21 +535,6 @@ void uma_prealloc(uma_zone_t zone, int itemcnt); /* - * Used to lookup the reference counter allocated for an item - * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, - * reference counters are allocated for items and stored in - * the underlying slab header. - * - * Arguments: - * zone The UMA_ZONE_REFCNT zone to which the item belongs. - * item The address of the item for which we want a refcnt. - * - * Returns: - * A pointer to a u_int32_t reference counter. - */ -u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item); - -/* * Used to determine if a fixed-size zone is exhausted. * * Arguments: Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 187994) +++ vm/uma_core.c (working copy) @@ -103,7 +103,6 @@ /* This is the zone from which all of uma_slab_t's are allocated. */ static uma_zone_t slabzone; -static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ /* * The initial hash tables come out of this zone so they can be allocated @@ -139,7 +138,6 @@ /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ static u_int uma_max_ipers; -static u_int uma_max_ipers_ref; /* * This is the handle used to schedule events that need to happen @@ -742,7 +740,7 @@ obj); } if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); #ifdef UMA_DEBUG printf("%s: Returning %d bytes.\n", @@ -806,7 +804,6 @@ static uma_slab_t keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) { - uma_slabrefcnt_t slabref; uma_alloc allocf; uma_slab_t slab; u_int8_t *mem; @@ -823,7 +820,7 @@ KEG_UNLOCK(keg); if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); + slab = zone_alloc_item(slabzone, NULL, wait); if (slab == NULL) { KEG_LOCK(keg); return NULL; @@ -846,7 +843,7 @@ mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); KEG_LOCK(keg); return (NULL); @@ -866,16 +863,8 @@ slab->us_firstfree = 0; slab->us_flags = flags; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) { - slabref->us_freelist[i].us_refcnt = 0; - slabref->us_freelist[i].us_item = i+1; - } - } else { - for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; - } + for (i = 0; i < keg->uk_ipers; i++) + slab->us_freelist[i].us_item = i+1; if (keg->uk_init != NULL) { for (i = 0; i < keg->uk_ipers; i++) @@ -903,7 +892,7 @@ (i * PAGE_SIZE), obj); } if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, + zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); @@ -1107,13 +1096,8 @@ keg->uk_rsize = rsize; keg->uk_ppera = 1; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ - shsize = sizeof(struct uma_slab_refcnt); - } else { - rsize += UMA_FRITM_SZ; /* Account for linkage */ - shsize = sizeof(struct uma_slab); - } + rsize += UMA_FRITM_SZ; /* Account for linkage */ + shsize = sizeof(struct uma_slab); keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0")); @@ -1244,7 +1228,6 @@ keg->uk_allocf = page_alloc; keg->uk_freef = page_free; keg->uk_recurse = 0; - keg->uk_slabzone = NULL; /* * The master zone is passed to us at keg-creation time. @@ -1258,7 +1241,7 @@ if (arg->flags & UMA_ZONE_ZINIT) keg->uk_init = zero_init; - if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC) + if (arg->flags & UMA_ZONE_MALLOC) keg->uk_flags |= UMA_ZONE_VTOSLAB; /* @@ -1267,31 +1250,14 @@ * we don't account for this here then we may end up in * keg_small_init() with a calculated 'ipers' of 0. */ - if (keg->uk_flags & UMA_ZONE_REFCNT) { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITMREF_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) - keg_large_init(keg); - else - keg_small_init(keg); - } else { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITM_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab))) - keg_large_init(keg); - else - keg_small_init(keg); - } + if (keg->uk_flags & UMA_ZONE_CACHESPREAD) + keg_cachespread_init(keg); + else if ((keg->uk_size+UMA_FRITM_SZ) > + (UMA_SLAB_SIZE - sizeof(struct uma_slab))) + keg_large_init(keg); + else + keg_small_init(keg); - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - if (keg->uk_flags & UMA_ZONE_REFCNT) - keg->uk_slabzone = slabrefzone; - else - keg->uk_slabzone = slabzone; - } - /* * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. @@ -1322,25 +1288,16 @@ u_int totsize; /* Size of the slab struct and free list */ - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = sizeof(struct uma_slab_refcnt) + - keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = sizeof(struct uma_slab) + - keg->uk_ipers * UMA_FRITM_SZ; + totsize = sizeof(struct uma_slab) + + keg->uk_ipers * UMA_FRITM_SZ; if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); keg->uk_pgoff = UMA_SLAB_SIZE - totsize; + totsize = keg->uk_pgoff + sizeof(struct uma_slab) + + keg->uk_ipers * UMA_FRITM_SZ; - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) - + keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = keg->uk_pgoff + sizeof(struct uma_slab) - + keg->uk_ipers * UMA_FRITM_SZ; - /* * The only way the following is possible is if with our * UMA_ALIGN_PTR adjustments we are now bigger than @@ -1633,26 +1590,11 @@ objsize--; uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / - (objsize + UMA_FRITMREF_SZ); - totsize *= (UMA_FRITMREF_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64); + KASSERT(uma_max_ipers <= 255, + ("uma_startup: calculated uma_max_ipers value too large!")); - KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), - ("uma_startup: calculated uma_max_ipers values too large!")); - #ifdef UMA_DEBUG printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); - printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", - uma_max_ipers_ref); #endif /* "manually" create the initial zone */ @@ -1715,18 +1657,6 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); - /* - * We also create a zone for the bigger slabs with reference - * counts in them, to accomodate UMA_ZONE_REFCNT zones. - */ - slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; - slabsize += sizeof(struct uma_slab_refcnt); - slabrefzone = uma_zcreate("UMA RCntSlabs", - slabsize, - NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, - UMA_ZFLAG_INTERNAL); - hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, @@ -1889,14 +1819,6 @@ goto out; } /* - * Both must either be refcnt, or not be refcnt. - */ - if ((zone->uz_flags & UMA_ZONE_REFCNT) != - (master->uz_flags & UMA_ZONE_REFCNT)) { - error = EINVAL; - goto out; - } - /* * The underlying object must be the same size. rsize * may be different. */ @@ -2297,7 +2219,6 @@ slab_alloc_item(uma_zone_t zone, uma_slab_t slab) { uma_keg_t keg; - uma_slabrefcnt_t slabref; void *item; u_int8_t freei; @@ -2305,12 +2226,7 @@ mtx_assert(&keg->uk_lock, MA_OWNED); freei = slab->us_firstfree; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slab->us_firstfree = slabref->us_freelist[freei].us_item; - } else { - slab->us_firstfree = slab->us_freelist[freei].us_item; - } + slab->us_firstfree = slab->us_freelist[freei].us_item; item = slab->us_data + (keg->uk_rsize * freei); slab->us_freecount--; @@ -2688,7 +2604,6 @@ enum zfreeskip skip, int flags) { uma_slab_t slab; - uma_slabrefcnt_t slabref; uma_keg_t keg; u_int8_t *mem; u_int8_t freei; @@ -2745,12 +2660,7 @@ uma_dbg_free(zone, slab, item); #endif - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = slab->us_firstfree; - } else { - slab->us_freelist[freei].us_item = slab->us_firstfree; - } + slab->us_freelist[freei].us_item = slab->us_firstfree; slab->us_firstfree = freei; slab->us_freecount++; @@ -2932,26 +2842,6 @@ } /* See uma.h */ -u_int32_t * -uma_find_refcnt(uma_zone_t zone, void *item) -{ - uma_slabrefcnt_t slabref; - uma_keg_t keg; - u_int32_t *refcnt; - int idx; - - slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & - (~UMA_SLAB_MASK)); - keg = slabref->us_keg; - KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, - ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); - idx = ((unsigned long)item - (unsigned long)slabref->us_data) - / keg->uk_rsize; - refcnt = &slabref->us_freelist[idx].us_refcnt; - return refcnt; -} - -/* See uma.h */ void uma_reclaim(void) { @@ -2966,7 +2856,6 @@ * zones are drained. We have to do the same for buckets. */ zone_drain(slabzone); - zone_drain(slabrefzone); bucket_zone_drain(); } Index: net/ppp_tty.c =================================================================== --- net/ppp_tty.c (revision 187994) +++ net/ppp_tty.c (working copy) @@ -117,19 +117,6 @@ void pppasyncdetach(void); /* - * Some useful mbuf macros not in mbuf.h. - */ -#define M_IS_CLUSTER(m) ((m)->m_flags & M_EXT) - -#define M_DATASTART(m) \ - (M_IS_CLUSTER(m) ? (m)->m_ext.ext_buf : \ - (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat) - -#define M_DATASIZE(m) \ - (M_IS_CLUSTER(m) ? (m)->m_ext.ext_size : \ - (m)->m_flags & M_PKTHDR ? MHLEN: MLEN) - -/* * Does c need to be escaped? */ #define ESCAPE_P(c) (sc->sc_asyncmap[(c) >> 5] & (1 << ((c) & 0x1F))) @@ -792,7 +779,7 @@ *mp = m; MCLGET(m, M_DONTWAIT); } - len -= M_DATASIZE(m); + len -= m->m_size; mp = &m->m_next; } } @@ -981,7 +968,7 @@ } m = sc->sc_m; m->m_len = 0; - m->m_data = M_DATASTART(sc->sc_m); + m->m_data = M_START(sc->sc_m); sc->sc_mc = m; sc->sc_mp = mtod(m, char *); sc->sc_fcs = PPP_INITFCS; @@ -1036,7 +1023,7 @@ } sc->sc_mc = m = m->m_next; m->m_len = 0; - m->m_data = M_DATASTART(m); + m->m_data = M_START(m); sc->sc_mp = mtod(m, char *); } Index: net/if_ppp.c =================================================================== --- net/if_ppp.c (revision 187994) +++ net/if_ppp.c (working copy) @@ -166,19 +166,6 @@ IFC_SIMPLE_DECLARE(ppp, 0); /* - * Some useful mbuf macros not in mbuf.h. - */ -#define M_IS_CLUSTER(m) ((m)->m_flags & M_EXT) - -#define M_DATASTART(m) \ - (M_IS_CLUSTER(m) ? (m)->m_ext.ext_buf : \ - (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat) - -#define M_DATASIZE(m) \ - (M_IS_CLUSTER(m) ? (m)->m_ext.ext_size : \ - (m)->m_flags & M_PKTHDR ? MHLEN: MLEN) - -/* * We steal two bits in the mbuf m_flags, to mark high-priority packets * for output, and received packets following lost/corrupted packets. */ Index: net/if_gre.c =================================================================== --- net/if_gre.c (revision 187994) +++ net/if_gre.c (working copy) @@ -330,7 +330,7 @@ mob_h.proto = htons(mob_h.proto); mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); - if ((m->m_data - msiz) < m->m_pktdat) { + if (M_LEADINGSPACE(m) < msiz) { /* need new mbuf */ MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 == NULL) { Index: sys/mbuf.h =================================================================== --- sys/mbuf.h (revision 187994) +++ sys/mbuf.h (working copy) @@ -53,8 +53,10 @@ * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. */ -#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ -#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +#define MHSIZE offsetof(struct mbuf, M_dat.M_databuf) +#define MPKTHSIZE offsetof(struct mbuf, M_dat.MH.MH_dat.MH_databuf) +#define MLEN (MSIZE - MHSIZE) /* normal data len */ +#define MHLEN (MSIZE - MPKTHSIZE) /* data len w/pkthdr */ #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ @@ -67,36 +69,9 @@ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) -/* - * Argument structure passed to UMA routines during mbuf and packet - * allocations. - */ -struct mb_args { - int flags; /* Flags for mbuf being allocated */ - short type; /* Type of mbuf being allocated */ -}; #endif /* _KERNEL */ -#if defined(__LP64__) -#define M_HDR_PAD 6 -#else -#define M_HDR_PAD 2 -#endif - /* - * Header present at the beginning of every mbuf. - */ -struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - caddr_t mh_data; /* location of data */ - int mh_len; /* amount of data in this mbuf */ - int mh_flags; /* flags; see below */ - short mh_type; /* type of data in this mbuf */ - uint8_t pad[M_HDR_PAD];/* word align */ -}; - -/* * Packet tag structure (see below for details). */ struct m_tag { @@ -111,6 +86,7 @@ * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. */ struct pkthdr { + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ struct ifnet *rcvif; /* rcv interface */ /* variables for ip and tcp reassembly */ void *header; /* pointer to packet header */ @@ -123,22 +99,26 @@ int csum_data; /* data field used by csum routines */ u_int16_t tso_segsz; /* TSO segment size */ u_int16_t ether_vtag; /* Ethernet 802.1p+q vlan tag */ - SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ +#if defined(__LP64__) + uint32_t pad; +#endif }; /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. */ -struct m_ext { +struct mb_ext { caddr_t ext_buf; /* start of buffer */ + u_short ext_size; /* size of buffer, for ext_free */ + u_short ext_type; /* type of external storage */ +#if defined(__LP64__) + uint32_t pad; +#endif void (*ext_free) /* free routine if not the usual */ (void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ - u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ }; /* @@ -146,28 +126,33 @@ * purposes. */ struct mbuf { - struct m_hdr m_hdr; + struct mbuf *m_next; /* next buffer in chain */ + struct mbuf *m_nextpkt; /* next chain in queue/record */ + uma_zone_t m_zone; /* Zone allocated from. */ + caddr_t m_data; /* location of valid data */ + volatile int m_ref; /* Reference count. */ + int m_len; /* amount of data in this mbuf */ + int m_flags; /* flags; see below */ +#if defined(__LP64__) + uint32_t pad; +#endif + short m_type; /* type of data in this mbuf */ + u_short m_size; /* Actual size of buffer. */ union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { - struct m_ext MH_ext; /* M_EXT set */ - char MH_databuf[MHLEN]; + struct mb_ext MH_ext; /* M_EXT set */ + char MH_databuf[0]; } MH_dat; } MH; - char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + char M_databuf[0]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; -#define m_next m_hdr.mh_next -#define m_len m_hdr.mh_len -#define m_data m_hdr.mh_data -#define m_type m_hdr.mh_type -#define m_flags m_hdr.mh_flags -#define m_nextpkt m_hdr.mh_nextpkt #define m_act m_nextpkt #define m_pkthdr M_dat.MH.MH_pkthdr #define m_ext M_dat.MH.MH_dat.MH_ext -#define m_pktdat M_dat.MH.MH_dat.MH_databuf +#define m_pktdat M_dat.MH.MH_dat.MH_databuf #define m_dat M_dat.M_databuf /* @@ -229,7 +214,6 @@ #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ -#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ /* * Flags indicating hw checksum support and sw checksum requirements. This @@ -320,15 +304,15 @@ * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ -#define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" -#define MBUF_PACKET_MEM_NAME "mbuf_packet" -#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" +#define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" +#define MBUF_MEM_NAME "mbuf" +#define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_TAG_MEM_NAME "mbuf_tag" -#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" + #ifdef _KERNEL #ifdef WITNESS @@ -347,44 +331,52 @@ * The rest of it is defined in kern/kern_mbuf.c */ -extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; -extern uma_zone_t zone_pack; -extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_ext; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; -extern uma_zone_t zone_ext_refcnt; +extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_pack; -static __inline struct mbuf *m_getcl(int how, short type, int flags); +static __inline struct mbuf *m_alloc(uma_zone_t zone, int size, int how, + short type, int flags); +static __inline void m_extadd(struct mbuf *, caddr_t, u_int, + void (*)(void *, void *), void *, void *, + int, int); +void m_ext_free_zone(void *arg1, void *arg2); +void m_ext_free_mbuf(void *arg1, void *arg2); +void m_ext_free_nop(void *arg1, void *arg2); +static __inline struct mbuf *m_free(struct mbuf *m); static __inline struct mbuf *m_get(int how, short type); static __inline struct mbuf *m_gethdr(int how, short type); -static __inline struct mbuf *m_getjcl(int how, short type, int flags, - int size); +struct mbuf *_m_getjcl(int how, short type, int flags, + int size, uma_zone_t zone, int exttype); static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ -static __inline struct mbuf *m_free(struct mbuf *m); +static __inline int m_init(struct mbuf *m, uma_zone_t zone, + int size, int how, short type, int flags); static __inline void m_clget(struct mbuf *m, int how); -static __inline void *m_cljget(struct mbuf *m, int how, int size); +void *_m_cljget(struct mbuf *m, int how, int size, + uma_zone_t zone, int exttype); static __inline void m_chtype(struct mbuf *m, short new_type); -void mb_free_ext(struct mbuf *); static __inline struct mbuf *m_last(struct mbuf *m); +int m_pkthdr_init(struct mbuf *m, int how); +/* + * Determine the type of cluster to allocate for an ext mbuf. + */ static __inline int m_gettype(int size) { int type; switch (size) { - case MSIZE: - type = EXT_MBUF; - break; case MCLBYTES: type = EXT_CLUSTER; break; -#if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; -#endif case MJUM9BYTES: type = EXT_JUMBO9; break; @@ -392,21 +384,21 @@ type = EXT_JUMBO16; break; default: - panic("%s: m_getjcl: invalid cluster size", __func__); + panic("%s: m_gettype: invalid cluster size", __func__); } return (type); } +/* + * Determine the zone to use when allocating an ext mbuf. + */ static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { - case MSIZE: - zone = zone_mbuf; - break; case MCLBYTES: zone = zone_clust; break; @@ -422,20 +414,105 @@ zone = zone_jumbo16; break; default: - panic("%s: m_getjcl: invalid cluster type", __func__); + panic("%s: m_getzone: invalid cluster type", __func__); } return (zone); } +/* + * Allocate an mbuf from the provided zone and initialize the size header + * area if requested. Delayed initialization may be performed by calling + * m_init() later with the required arguments. + * + * Returns NULL on failure and an mbuf on success. + */ static __inline struct mbuf * +m_alloc(uma_zone_t zone, int size, int how, short type, int flags) +{ + struct mbuf *m; + + m = uma_zalloc(zone, how); + if (m == NULL) + return (NULL); + if (type == MT_NOINIT) + return (m); + if (m_init(m, zone, size, how, type, flags)) { + uma_zfree(zone, m); + return (NULL); + } + return (m); +} + +/* + * Configure a provided mbuf to refer to the provided external storage + * buffer and setup a reference count for said buffer. + * + * Arguments: + * mb The existing mbuf to which to attach the provided buffer. + * buf The address of the provided external storage buffer. + * size The size of the provided buffer. + * freef A pointer to a routine that is responsible for freeing the + * provided external storage buffer. + * arg{1,2} Pointers to arguments to be passed to the provided freef + * routine (may be NULL). + * flags Any other flags to be passed to the provided mbuf. + * type The type that the external storage buffer should be + * labeled with. + * + * Returns: + * Nothing. + */ +static __inline void +m_extadd(struct mbuf *mb, caddr_t buf, u_int size, + void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type) +{ + mb->m_flags |= (M_EXT | flags); + mb->m_data = buf; + mb->m_size = size; + mb->m_ext.ext_buf = buf; + mb->m_ext.ext_size = size; + mb->m_ext.ext_free = freef; + mb->m_ext.ext_arg1 = arg1; + mb->m_ext.ext_arg2 = arg2; + mb->m_ext.ext_type = type; +} + +/* + * Initialize an mbuf with linear storage. + * + * Inline because the consumer text overhead will be roughly the same to + * initialize or call a function with this many parameters and M_PKTHDR + * should go away with constant propagation for !MGETHDR. + */ +static __inline int +m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type, + int flags) +{ + int error; + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_zone = zone; + m->m_data = m->m_dat; + m->m_ref = 1; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + m->m_size = size; + if (flags & M_PKTHDR) { + if ((error = m_pkthdr_init(m, how)) != 0) + return (error); + } + + return (0); +} + +static __inline struct mbuf * m_get(int how, short type) { - struct mb_args args; - args.flags = 0; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MLEN, how, type, 0); } /* @@ -445,11 +522,8 @@ m_getclr(int how, short type) { struct mbuf *m; - struct mb_args args; - args.flags = 0; - args.type = type; - m = uma_zalloc_arg(zone_mbuf, &args, how); + m = m_alloc(zone_mbuf, MLEN, how, type, 0); if (m != NULL) bzero(m->m_data, MLEN); return (m); @@ -458,91 +532,74 @@ static __inline struct mbuf * m_gethdr(int how, short type) { - struct mb_args args; - args.flags = M_PKTHDR; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + return m_alloc(zone_mbuf, MHLEN, how, type, M_PKTHDR); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { - struct mb_args args; + struct mbuf *m; - args.flags = flags; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); + m = m_alloc(zone_pack, MCLBYTES, how, type, flags | M_EXT); + /* Restore the data pointer clobbered by m_init. */ + if (m && type != MT_NOINIT) + m->m_data = m->m_ext.ext_buf; + + return (m); } -/* - * m_getjcl() returns an mbuf with a cluster of the specified size attached. - * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. - * - * XXX: This is rather large, should be real function maybe. - */ static __inline struct mbuf * m_getjcl(int how, short type, int flags, int size) { - struct mb_args args; - struct mbuf *m, *n; - uma_zone_t zone; - args.flags = flags; - args.type = type; + /* + * Rely on constant propagation to resolve zone and type before + * calling the non-inlined function. + */ + return _m_getjcl(how, type, flags, size, m_getzone(size), + m_gettype(size)); +} - m = uma_zalloc_arg(zone_mbuf, &args, how); - if (m == NULL) - return (NULL); +void m_tag_delete_chain(struct mbuf *, struct m_tag *); - zone = m_getzone(size); - n = uma_zalloc_arg(zone, m, how); - if (n == NULL) { - uma_zfree(zone_mbuf, m); - return (NULL); - } - return (m); -} - static __inline void -m_free_fast(struct mbuf *m) +m_free_fast(uma_zone_t zone, struct mbuf *m) { + #ifdef INVARIANTS + KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); if (m->m_flags & M_PKTHDR) KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); #endif - - uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); + + uma_zfree(zone, m); } -static __inline struct mbuf * -m_free(struct mbuf *m) +static __inline void +_m_free(struct mbuf *m) { - struct mbuf *n = m->m_next; + if (m->m_flags & M_PKTHDR && !SLIST_EMPTY(&m->m_pkthdr.tags)) + m_tag_delete_chain(m, NULL); + /* + * Free attached storage if this mbuf is the only reference to it. + */ if (m->m_flags & M_EXT) - mb_free_ext(m); - else if ((m->m_flags & M_NOFREE) == 0) - uma_zfree(zone_mbuf, m); - return (n); + m->m_ext.ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + if ((m->m_flags & M_NOFREE) == 0) + uma_zfree(m->m_zone, m); } -static __inline void -m_clget(struct mbuf *m, int how) +static __inline struct mbuf * +m_free(struct mbuf *m) { + struct mbuf *n = m->m_next; - if (m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - m->m_ext.ext_buf = (char *)NULL; - uma_zalloc_arg(zone_clust, m, how); - /* - * On a cluster allocation failure, drain the packet zone and retry, - * we might be able to loosen a few clusters up on the drain. - */ - if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { - zone_drain(zone_pack); - uma_zalloc_arg(zone_clust, m, how); - } + if (m->m_ref == 1 || atomic_fetchadd_int(&m->m_ref, -1) == 1) + _m_free(m); + + return (n); } /* @@ -555,15 +612,12 @@ static __inline void * m_cljget(struct mbuf *m, int how, int size) { - uma_zone_t zone; - if (m && m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - if (m != NULL) - m->m_ext.ext_buf = NULL; - - zone = m_getzone(size); - return (uma_zalloc_arg(zone, m, how)); + /* + * Rely on constant propagation to resolve zone and type before + * calling the non-inlined function. + */ + return _m_cljget(m, how, size, m_getzone(size), m_gettype(size)); } static __inline void @@ -595,14 +649,14 @@ panic("unknown cluster type"); break; } + m_extadd(m, cl, size, m_ext_free_zone, zone, cl, 0, type); +} - m->m_data = m->m_ext.ext_buf = cl; - m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_size = size; - m->m_ext.ext_type = type; - m->m_ext.ref_cnt = uma_find_refcnt(zone, cl); - m->m_flags |= M_EXT; +static __inline void +m_clget(struct mbuf *m, int how) +{ + m_cljget(m, how, MCLBYTES); } static __inline void @@ -639,9 +693,7 @@ * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ -#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ - (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (m)->m_ref == 1) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ @@ -661,25 +713,14 @@ * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an * object of the specified size at the end of the mbuf, longword aligned. */ -#define M_ALIGN(m, len) do { \ - KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \ - ("%s: M_ALIGN not normal mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_dat, \ - ("%s: M_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define M_ALIGN(m, len) \ + (m)->m_data += ((m)->m_size - (len)) & ~(sizeof(long) - 1); /* * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by * M_DUP/MOVE_PKTHDR. */ -#define MH_ALIGN(m, len) do { \ - KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \ - ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_pktdat, \ - ("%s: MH_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define MH_ALIGN(m, len) M_ALIGN(m, len) /* * Compute the amount of space available before the current start of data in @@ -688,11 +729,7 @@ * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. */ -#define M_LEADINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ - (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ - (m)->m_data - (m)->m_dat) +#define M_LEADINGSPACE(m) (M_WRITABLE(m) ? (m)->m_data - M_START(m) : 0) /* * Compute the amount of space available after the end of data in an mbuf. @@ -701,11 +738,13 @@ * of checking writability of the mbuf data area rests solely with the caller. */ #define M_TRAILINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ - - ((m)->m_data + (m)->m_len) : 0) : \ - &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + (M_WRITABLE(m) ? \ + (M_START(m) + (m)->m_size) - ((m)->m_data + (m)->m_len) : 0) +#define M_START(m) \ + ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \ + (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat) + /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the @@ -755,8 +794,6 @@ int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); -void m_extadd(struct mbuf *, caddr_t, u_int, - void (*)(void *, void *), void *, void *, int, int); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); @@ -869,7 +906,6 @@ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); -void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); Index: sys/sockbuf.h =================================================================== --- sys/sockbuf.h (revision 187994) +++ sys/sockbuf.h (working copy) @@ -171,12 +171,10 @@ (sb)->sb_cc += (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl += (m)->m_len; \ - (sb)->sb_mbcnt += MSIZE; \ (sb)->sb_mcnt += 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt += 1; \ - } \ + (sb)->sb_mbcnt += m->m_size + sizeof(struct mbuf); \ } /* adjust counters in sb reflecting freeing of m */ @@ -184,12 +182,10 @@ (sb)->sb_cc -= (m)->m_len; \ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ (sb)->sb_ctl -= (m)->m_len; \ - (sb)->sb_mbcnt -= MSIZE; \ (sb)->sb_mcnt -= 1; \ - if ((m)->m_flags & M_EXT) { \ - (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ + if ((m)->m_flags & M_EXT) \ (sb)->sb_ccnt -= 1; \ - } \ + (sb)->sb_mbcnt -= m->m_size + sizeof(struct mbuf); \ if ((sb)->sb_sndptr == (m)) { \ (sb)->sb_sndptr = NULL; \ (sb)->sb_sndptroff = 0; \