! ! It turns out that too many drivers are not only parsing the ! L2/3/4 headers for TSO but also for generic checksum offloading. ! Ideally we would only have one common function shared amongst ! all drivers, and perhaps when updating them for IPv6 we should ! introduce that. Eventually we should provide the meta information ! along with mbufs to avoid (re-)parsing entirely. ! ! To not break IPv6 (checksums and offload) and to be able to MFC ! the changes without risking to hurt 3rd party drivers, duplicate ! the v4 framework, as other OSes have done as well. ! ! Introduce interface capability flags for TX/RX checksum offload ! with IPv6, to allow independent toggling (where possible). Add ! CSUM_*_IPV6 flags for UDP/TCP over IPv6, and reserve further for ! SCTP, and IPv6 fragmentation. Define CSUM_DELAY_DATA_IPV6 as we ! do for legacy IP and add an alias for CSUM_DATA_VALID_IPV6. ! ! This pretty much brings IPv6 handling in line with IPv4. ! TSO is still handled in a different way and not via if_hwassist. ! ! Update ifconfig to allow (un)setting of the new capability flags. ! Update loopback to announce the new capabilities and if_hwassist ! flags. ! ! Individual driver updates will have to follow. ! ! Reported by: gallatin, dim, .. ! Reviewed by: ! Tested by: ! Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 (revision 236149) +++ sbin/ifconfig/ifconfig.8 (working copy) @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd May 19, 2012 +.Dd May 26, 2012 .Dt IFCONFIG 8 .Os .Sh NAME @@ -372,16 +372,32 @@ This is useful for devices which have multiple phy .It Cm name Ar name Set the interface name to .Ar name . -.It Cm rxcsum , txcsum +.It Cm rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, enable receive (or transmit) checksum offloading on the interface. +The feature can be turned on selectively per protocol family. +Use +.Cm rxcsum6 , txcsum6 +for +.Xr ip6 4 +or +.Cm rxcsum , txcsum +otherwise. Some drivers may not be able to enable these flags independently of each other, so setting one may also set the other. The driver will offload as much checksum work as it can reliably support, the exact level of offloading varies between drivers. -.It Fl rxcsum , txcsum +.It Fl rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, disable receive (or transmit) checksum offloading on the interface. +The feature can be turned off selectively per protocol family. +Use +.Fl rxcsum6 , txcsum6 +for +.Xr ip6 4 +or +.Fl rxcsum , txcsum +otherwise. These settings may not always be independent of each other. .It Cm tso If the driver supports Index: sbin/ifconfig/ifconfig.c =================================================================== --- sbin/ifconfig/ifconfig.c (revision 236149) +++ sbin/ifconfig/ifconfig.c (working copy) @@ -916,7 +916,8 @@ unsetifdescr(const char *val, int value, int s, co #define IFCAPBITS \ "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \ "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ -"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" +"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ +"\26IFCAP_RXCSUM_IPV6\27IFCAP_TXCSUM_IPV6" /* * Print the status of the interface. If an address family was @@ -1193,6 +1194,10 @@ static struct cmd basic_cmds[] = { DEF_CMD("-monitor", -IFF_MONITOR, setifflags), DEF_CMD("staticarp", IFF_STATICARP, setifflags), DEF_CMD("-staticarp", -IFF_STATICARP, setifflags), + DEF_CMD("rxcsum6", IFCAP_RXCSUM_IPV6, setifcap), + DEF_CMD("-rxcsum6", -IFCAP_RXCSUM_IPV6, setifcap), + DEF_CMD("txcsum6", IFCAP_TXCSUM_IPV6, setifcap), + DEF_CMD("-txcsum6", -IFCAP_TXCSUM_IPV6, setifcap), DEF_CMD("rxcsum", IFCAP_RXCSUM, setifcap), DEF_CMD("-rxcsum", -IFCAP_RXCSUM, setifcap), DEF_CMD("txcsum", IFCAP_TXCSUM, setifcap), Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 236149) +++ sys/netinet/tcp_input.c (working copy) @@ -589,7 +589,7 @@ tcp_input(struct mbuf *m, int off0) ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; else Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c (revision 236149) +++ sys/netinet/tcp_subr.c (working copy) @@ -619,10 +619,10 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct nth->th_win = htons((u_short)win); nth->th_urp = 0; - m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; nth->th_sum = in6_cksum_pseudo(ip6, tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : @@ -634,6 +634,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct #endif #ifdef INET { + m->m_pkthdr.csum_flags = CSUM_TCP; nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); } Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c (revision 236149) +++ sys/netinet/tcp_syncache.c (working copy) @@ -1473,10 +1473,10 @@ syncache_respond(struct syncache *sc) optlen = 0; M_SETFIB(m, sc->sc_inc.inc_fibnum); - m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(NULL, NULL); @@ -1488,6 +1488,7 @@ syncache_respond(struct syncache *sc) #endif #ifdef INET { + m->m_pkthdr.csum_flags = CSUM_TCP; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(tlen + optlen - hlen + IPPROTO_TCP)); error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); Index: sys/netinet/tcp_timewait.c =================================================================== --- sys/netinet/tcp_timewait.c (revision 236149) +++ sys/netinet/tcp_timewait.c (working copy) @@ -574,10 +574,10 @@ tcp_twrespond(struct tcptw *tw, int flags) th->th_flags = flags; th->th_win = htons(tw->last_win); - m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(inp, NULL); @@ -590,6 +590,7 @@ tcp_twrespond(struct tcptw *tw, int flags) #endif #ifdef INET { + m->m_pkthdr.csum_flags = CSUM_TCP; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); ip->ip_len = m->m_pkthdr.len; Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c (revision 236149) +++ sys/netinet/tcp_output.c (working copy) @@ -1047,7 +1047,6 @@ send: * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ - m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { @@ -1055,6 +1054,7 @@ send: * ip6_plen is not need to be filled now, and will be filled * in ip6_output. */ + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, 0); } @@ -1064,6 +1064,7 @@ send: #endif #ifdef INET { + m->m_pkthdr.csum_flags = CSUM_TCP; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c (revision 236149) +++ sys/netinet6/udp6_usrreq.c (working copy) @@ -230,7 +230,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) goto badunlocked; } - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else @@ -784,7 +784,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, str ip6->ip6_dst = *faddr; udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0); - m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); flags = 0; Index: sys/netinet6/ip6_ipsec.c =================================================================== --- sys/netinet6/ip6_ipsec.c (revision 236149) +++ sys/netinet6/ip6_ipsec.c (working copy) @@ -291,6 +291,7 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *in /* * Do delayed checksums now because we send before * this is done in the normal processing path. + * XXX-BZ CSUM_DELAY_DATA_IPV6? */ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { ipseclog((LOG_DEBUG, Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c (revision 236149) +++ sys/netinet6/ip6_output.c (working copy) @@ -190,7 +190,7 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u u_short csum; csum = in_cksum_skip(m, offset + plen, offset); - if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) + if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ @@ -885,9 +885,9 @@ again: m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP @@ -905,9 +905,9 @@ again: if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP @@ -960,8 +960,8 @@ passout: * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ - if (sw_csum & CSUM_DELAY_DATA) { - sw_csum &= ~CSUM_DELAY_DATA; + if (sw_csum & CSUM_DELAY_DATA_IPV6) { + sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP @@ -1076,9 +1076,9 @@ passout: * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) { Index: sys/netinet6/ip6_forward.c =================================================================== --- sys/netinet6/ip6_forward.c (revision 236149) +++ sys/netinet6/ip6_forward.c (working copy) @@ -581,9 +581,9 @@ skip_routing: m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP @@ -601,9 +601,9 @@ skip_routing: if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h (revision 236149) +++ sys/sys/mbuf.h (working copy) @@ -283,15 +283,24 @@ struct mbuf { #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ #define CSUM_TSO 0x0020 /* will do TSO */ #define CSUM_SCTP 0x0040 /* will csum SCTP */ +/* CSUM_SCTP_IPV6 0x0080 will csum IPv6/SCTP */ #define CSUM_IP_CHECKED 0x0100 /* did csum IP */ #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ #define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */ +#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */ +#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */ +/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */ +/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */ + +#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6) +#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID + #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) -#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ +#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */ /* * mbuf types. Index: sys/net/if.h =================================================================== --- sys/net/if.h (revision 236149) +++ sys/net/if.h (working copy) @@ -230,7 +230,11 @@ struct if_data { #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ +#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ +#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ +#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) + #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) Index: sys/net/if_loop.c =================================================================== --- sys/net/if_loop.c (revision 236149) +++ sys/net/if_loop.c (working copy) @@ -92,7 +92,9 @@ #endif #define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) -#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | \ +#define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP) +#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ + CSUM_PSEUDO_HDR | \ CSUM_IP_CHECKED | CSUM_IP_VALID | \ CSUM_SCTP_VALID) @@ -143,8 +145,9 @@ lo_clone_create(struct if_clone *ifc, int unit, ca ifp->if_ioctl = loioctl; ifp->if_output = looutput; ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM; - ifp->if_hwassist = LO_CSUM_FEATURES; + ifp->if_capabilities = ifp->if_capenable = + IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; + ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); if (V_loif == NULL) @@ -247,12 +250,19 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct #if 1 /* XXX */ switch (dst->sa_family) { case AF_INET: - case AF_INET6: if (ifp->if_capenable & IFCAP_RXCSUM) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; } m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES; + break; + case AF_INET6: + if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) { + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags = LO_CSUM_SET; + } + m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6; + break; case AF_IPX: case AF_APPLETALK: break; @@ -436,10 +446,15 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t dat ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_TXCSUM) != 0) ifp->if_capenable ^= IFCAP_TXCSUM; + if ((mask & IFCAP_RXCSUM_IPV6) != 0) + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + if ((mask & IFCAP_TXCSUM_IPV6) != 0) + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = LO_CSUM_FEATURES; - else - ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + ifp->if_hwassist |= LO_CSUM_FEATURES6; break; default: