Index: sys/dev/en/midway.c =================================================================== RCS file: /home/ncvs/src/sys/dev/en/midway.c,v retrieving revision 1.25 diff -u -r1.25 midway.c --- sys/dev/en/midway.c 2000/11/08 05:45:46 1.25 +++ sys/dev/en/midway.c 2000/11/11 22:51:43 @@ -1827,7 +1827,7 @@ m = *mm; if (m->m_flags & M_EXT) { - if (m->m_ext.ext_free) { + if (m->m_ext.ext_type != EXT_CLUSTER) { /* external buffer isn't an ordinary mbuf cluster! */ printf("%s: mfix: special buffer! can't make a copy!\n", sc->sc_dev.dv_xname); Index: sys/kern/uipc_mbuf2.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_mbuf2.c,v retrieving revision 1.4 diff -u -r1.4 uipc_mbuf2.c --- sys/kern/uipc_mbuf2.c 2000/10/29 13:56:51 1.4 +++ sys/kern/uipc_mbuf2.c 2000/11/11 22:51:51 @@ -81,7 +81,7 @@ * * on error return (NULL return value), original "m" will be freed. * - * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) + * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf. */ struct mbuf * m_pulldown(m, off, len, offp) @@ -91,7 +91,7 @@ { struct mbuf *n, *o; int hlen, tlen, olen; - int sharedcluster; + int writable; /* check invalid arguments. */ if (m == NULL) @@ -176,25 +176,42 @@ * easy cases first. * we need to use m_copydata() to get data from m_next, 0>. */ - if ((n->m_flags & M_EXT) == 0) - sharedcluster = 0; - else { - if (n->m_ext.ext_free) - sharedcluster = 1; - else if (MEXT_IS_REF(n)) - sharedcluster = 1; - else - sharedcluster = 0; - } + /* + * XXX: This code is flawed because it considers a "writable" mbuf + * data region to require all of the following: + * (i) mbuf _has_ to have M_EXT set; if it is just a regular + * mbuf, it is still not considered "writable." + * (ii) since mbuf has M_EXT, the ext_type _has_ to be + * EXT_CLUSTER. Anything else makes it non-writable. + * (iii) M_WRITABLE() must evaluate true. + * Ideally, the requirement should only be (iii). + * + * If we're writable, we're sure we're writable, because the ref. count + * cannot increase from 1, as that would require posession of mbuf + * n by someone else (which is impossible). However, if we're _not_ + * writable, we may eventually become writable )if the ref. count drops + * to 1), but we'll fail to notice it unless we re-evaluate + * M_WRITABLE(). For now, we only evaluate once at the beginning and + * live with this. + */ + /* + * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, + * then we're not "writable," according to this code. + */ + writable = 0; + if ((n->m_flags & M_EXT) && (n->m_ext.ext_type == EXT_CLUSTER) && + M_WRITABLE(n)) + writable = 1; + if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen - && !sharedcluster) { + && writable) { m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len); n->m_len += tlen; m_adj(n->m_next, tlen); goto ok; } if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen - && !sharedcluster) { + && writable) { n->m_next->m_data -= hlen; n->m_next->m_len += hlen; bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen); Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.74 diff -u -r1.74 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 2000/11/04 21:55:25 1.74 +++ sys/kern/uipc_syscalls.c 2000/11/11 22:51:58 @@ -1625,7 +1625,8 @@ /* * Setup external storage for mbuf. */ - MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL); + MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, + EXT_SFBUF); m->m_data = (char *) sf->kva + pgoff; m->m_pkthdr.len = m->m_len = xfsize; /* Index: sys/netinet/ip_fil.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_fil.c,v retrieving revision 1.24 diff -u -r1.24 ip_fil.c --- sys/netinet/ip_fil.c 2000/10/26 12:33:42 1.24 +++ sys/netinet/ip_fil.c 2000/11/11 22:52:04 @@ -1162,7 +1162,7 @@ m->m_len = 0; avail = M_TRAILINGSPACE(m); # else - avail = (m->m_flags & M_EXT) ? MCLBYTES : MHLEN; + avail = MCLBYTES; # endif xtra = MIN(ntohs(oip6->ip6_plen) + sizeof(ip6_t), avail - hlen - sizeof(*icmp) - max_linkhdr); @@ -1382,11 +1382,7 @@ # if BSD >= 199306 int i = 0; -# ifdef MCLISREFERENCED - if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) -# else - if (m->m_flags & M_EXT) -# endif + if ((m->m_flags & M_EXT) && MEXT_IS_REF(m)) i = 1; # endif # ifndef sparc Index: sys/netinet6/ipsec.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ipsec.c,v retrieving revision 1.8 diff -u -r1.8 ipsec.c --- sys/netinet6/ipsec.c 2000/11/09 17:55:17 1.8 +++ sys/netinet6/ipsec.c 2000/11/11 22:52:10 @@ -3219,14 +3219,11 @@ for (n = m, mpp = &m; n; n = n->m_next) { if (n->m_flags & M_EXT) { /* - * Make a copy only if there are more than one references - * to the cluster. + * Make a copy only if there are more than one + * references to the cluster. * XXX: is this approach effective? */ - if ( - n->m_ext.ext_free || - MEXT_IS_REF(n) - ) + if (n->m_ext.ext_type != EXT_CLUSTER || MEXT_IS_REF(n)) { int remain, copied; struct mbuf *mm; Index: sys/pci/if_sk.c =================================================================== RCS file: /home/ncvs/src/sys/pci/if_sk.c,v retrieving revision 1.34 diff -u -r1.34 if_sk.c --- sys/pci/if_sk.c 2000/11/02 00:00:30 1.34 +++ sys/pci/if_sk.c 2000/11/11 22:52:15 @@ -699,7 +699,7 @@ /* Attach the buffer to the mbuf */ MEXTADD(m_new, buf, SK_JLEN, sk_jfree, - (struct sk_if_softc *)sc_if); + (struct sk_if_softc *)sc_if, 0, EXT_NET_DRV); m_new->m_data = (void *)buf; m_new->m_pkthdr.len = m_new->m_len = SK_JLEN; } else { Index: sys/pci/if_ti.c =================================================================== RCS file: /home/ncvs/src/sys/pci/if_ti.c,v retrieving revision 1.39 diff -u -r1.39 if_ti.c --- sys/pci/if_ti.c 2000/10/21 00:13:35 1.39 +++ sys/pci/if_ti.c 2000/11/11 22:52:18 @@ -815,7 +815,7 @@ m_new->m_data = (void *) buf; m_new->m_len = m_new->m_pkthdr.len = TI_JUMBO_FRAMELEN; MEXTADD(m_new, buf, TI_JUMBO_FRAMELEN, ti_jfree, - (struct ti_softc *)sc); + (struct ti_softc *)sc, 0, EXT_NET_DRV); } else { m_new = m; m_new->m_data = m_new->m_ext.ext_buf; Index: sys/pci/if_wb.c =================================================================== RCS file: /home/ncvs/src/sys/pci/if_wb.c,v retrieving revision 1.34 diff -u -r1.34 if_wb.c --- sys/pci/if_wb.c 2000/10/15 14:19:00 1.34 +++ sys/pci/if_wb.c 2000/11/11 22:52:22 @@ -1106,7 +1106,8 @@ } m_new->m_data = c->wb_buf; m_new->m_pkthdr.len = m_new->m_len = WB_BUFBYTES; - MEXTADD(m_new, c->wb_buf, WB_BUFBYTES, wb_bfree, NULL); + MEXTADD(m_new, c->wb_buf, WB_BUFBYTES, wb_bfree, NULL, 0, + EXT_NET_DRV); } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = WB_BUFBYTES; Index: sys/sys/mbuf.h =================================================================== RCS file: /home/ncvs/src/sys/sys/mbuf.h,v retrieving revision 1.62 diff -u -r1.62 mbuf.h --- sys/sys/mbuf.h 2000/10/20 07:58:15 1.62 +++ sys/sys/mbuf.h 2000/11/11 22:52:26 @@ -107,6 +107,7 @@ void *ext_args; /* optional argument pointer */ u_int ext_size; /* size of buffer, for ext_free */ union mext_refcnt *ref_cnt; /* pointer to ref count info */ + short ext_type; /* type of external storage */ }; struct mbuf { @@ -138,22 +139,28 @@ #define M_EXT 0x0001 /* has associated external storage */ #define M_PKTHDR 0x0002 /* start of record */ #define M_EOR 0x0004 /* end of record */ -#define M_PROTO1 0x0008 /* protocol-specific */ -#define M_PROTO2 0x0010 /* protocol-specific */ -#define M_PROTO3 0x0020 /* protocol-specific */ -#define M_PROTO4 0x0040 /* protocol-specific */ -#define M_PROTO5 0x0080 /* protocol-specific */ +#define M_RDONLY 0x0008 /* associated data is marked read-only */ +#define M_PROTO1 0x0010 /* protocol-specific */ +#define M_PROTO2 0x0020 /* protocol-specific */ +#define M_PROTO3 0x0040 /* protocol-specific */ +#define M_PROTO4 0x0080 /* protocol-specific */ +#define M_PROTO5 0x0100 /* protocol-specific */ /* mbuf pkthdr flags, also in m_flags */ -#define M_BCAST 0x0100 /* send/received as link-level broadcast */ -#define M_MCAST 0x0200 /* send/received as link-level multicast */ -#define M_FRAG 0x0400 /* packet is a fragment of a larger packet */ -#define M_FIRSTFRAG 0x0800 /* packet is first fragment */ -#define M_LASTFRAG 0x1000 /* packet is last fragment */ +#define M_BCAST 0x0200 /* send/received as link-level broadcast */ +#define M_MCAST 0x0400 /* send/received as link-level multicast */ +#define M_FRAG 0x0800 /* packet is a fragment of a larger packet */ +#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ +#define M_LASTFRAG 0x2000 /* packet is last fragment */ + +/* external buffer types: identify ext_buf type */ +#define EXT_CLUSTER 1 /* mbuf cluster */ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ /* flags copied when copying m_pkthdr */ #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \ - M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG) + M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY) /* flags indicating hw checksum support and sw checksum requirements */ #define CSUM_IP 0x0001 /* will csum IP */ @@ -444,21 +451,23 @@ _mm->m_ext.ext_free = NULL; \ _mm->m_ext.ext_args = NULL; \ _mm->m_ext.ext_size = MCLBYTES; \ + _mm->m_ext.ext_type = EXT_CLUSTER; \ } \ } \ } while (0) -#define MEXTADD(m, buf, size, free, args) do { \ +#define MEXTADD(m, buf, size, free, args, flags, type) do { \ struct mbuf *_mm = (m); \ \ MEXT_INIT_REF(_mm, M_WAIT); \ if (_mm->m_ext.ref_cnt != NULL) { \ - _mm->m_flags |= M_EXT; \ + _mm->m_flags |= (M_EXT | (flags)); \ _mm->m_ext.ext_buf = (caddr_t)(buf); \ _mm->m_data = _mm->m_ext.ext_buf; \ _mm->m_ext.ext_size = (size); \ _mm->m_ext.ext_free = (free); \ _mm->m_ext.ext_args = (args); \ + _mm->m_ext.ext_type = (type); \ } \ } while (0) @@ -478,7 +487,7 @@ \ if (MEXT_IS_REF(_mmm)) \ MEXT_REM_REF(_mmm); \ - else if (_mmm->m_ext.ext_free != NULL) { \ + else if (_mmm->m_ext.ext_type != EXT_CLUSTER) { \ (*(_mmm->m_ext.ext_free))(_mmm->m_ext.ext_buf, \ _mmm->m_ext.ext_args); \ _MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt); \ @@ -510,6 +519,15 @@ MBWAKEUP(m_mballoc_wid); \ mtx_exit(&mmbfree.m_mtx, MTX_DEF); \ } while (0) + +/* + * M_WRITABLE(m) + * Evaluate TRUE if it's safe to write to the mbuf m's data region (this + * can be both the local data payload, or an external buffer area, + * depending on whether M_EXT is set). + */ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \ + & M_EXT) || !MEXT_IS_REF(m))) /* * Copy mbuf pkthdr from "from" to "to".