! ! Proposed MFC patch of the intial IPv6 offload support changes ! (no SCTP yet but to come soon as well, as more locking improvements ! etc.) matchingt he changes in HEAD. ! ! For testing only. ! Index: sys =================================================================== --- sys (revision 236006) +++ sys (working copy) Property changes on: sys ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r231317,235916,235921,235924,235941,235944,235950,235953-235956,235958-235964,235981 Index: sys/sparc64/include/in_cksum.h =================================================================== --- sys/sparc64/include/in_cksum.h (revision 236006) +++ sys/sparc64/include/in_cksum.h (working copy) @@ -65,6 +65,7 @@ #define in_cksum(m, len) in_cksum_skip(m, len, 0) +#if defined(IPVERSION) && (IPVERSION == 4) static __inline void in_cksum_update(struct ip *ip) { @@ -73,6 +74,7 @@ in_cksum_update(struct ip *ip) __tmp = (int)ip->ip_sum + 1; ip->ip_sum = __tmp + (__tmp >> 16); } +#endif static __inline u_short in_addword(u_short sum, u_short b) @@ -106,6 +108,7 @@ in_pseudo(u_int sum, u_int b, u_int c) return (sum); } +#if defined(IPVERSION) && (IPVERSION == 4) static __inline u_int in_cksum_hdr(struct ip *ip) { @@ -163,6 +166,7 @@ in_cksum_hdr(struct ip *ip) #undef __LD_ADD return (__ret); } +#endif #ifdef _KERNEL u_short in_cksum_skip(struct mbuf *m, int len, int skip); Index: sys/ia64/include/in_cksum.h =================================================================== --- sys/ia64/include/in_cksum.h (revision 236006) +++ sys/ia64/include/in_cksum.h (working copy) @@ -39,6 +39,7 @@ #define in_cksum(m, len) in_cksum_skip(m, len, 0) +#if defined(IPVERSION) && (IPVERSION == 4) /* * It it useful to have an Internet checksum routine which is inlineable * and optimized specifically for the task of computing IP header checksums @@ -65,9 +66,12 @@ in_cksum_update(struct ip *ip) } while(0) #endif +#endif #ifdef _KERNEL +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *ip); +#endif u_short in_addword(u_short sum, u_short b); u_short in_pseudo(u_int sum, u_int b, u_int c); u_short in_cksum_skip(struct mbuf *m, int len, int skip); Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 236006) +++ sys/netinet/tcp_input.c (working copy) @@ -608,13 +608,31 @@ tcp_input(struct mbuf *m, int off0) #ifdef INET6 if (isipv6) { /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */ + + if (m->m_len < (sizeof(*ip6) + sizeof(*th))) { + m = m_pullup(m, sizeof(*ip6) + sizeof(*th)); + if (m == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return; + } + } + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)((caddr_t)ip6 + off0); tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; - if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) { + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) + th->th_sum = m->m_pkthdr.csum_data; + else + th->th_sum = in6_cksum_pseudo(ip6, tlen, + IPPROTO_TCP, m->m_pkthdr.csum_data); + th->th_sum ^= 0xffff; + } else + th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); + if (th->th_sum) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } - th = (struct tcphdr *)((caddr_t)ip6 + off0); /* * Be proactive about unspecified IPv6 address in source. @@ -3573,7 +3591,6 @@ tcp_mssopt(struct in_conninfo *inc) if (inc->inc_flags & INC_ISIPV6) { mss = V_tcp_v6mssdflt; maxmtu = tcp_maxmtu6(inc, NULL); - thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); } #endif @@ -3584,10 +3601,13 @@ tcp_mssopt(struct in_conninfo *inc) { mss = V_tcp_mssdflt; maxmtu = tcp_maxmtu(inc, NULL); - thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct tcpiphdr); } #endif +#if defined(INET6) || defined(INET) + thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ +#endif + if (maxmtu && thcmtu) mss = min(maxmtu, thcmtu) - min_protoh; else if (maxmtu || thcmtu) Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c (revision 236006) +++ sys/netinet/tcp_subr.c (working copy) @@ -573,8 +573,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; - ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + - tlen)); + ip6->ip6_plen = 0; /* Set in ip6_output(). */ tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); } #endif @@ -619,12 +618,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct else nth->th_win = htons((u_short)win); nth->th_urp = 0; + + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { - nth->th_sum = 0; - nth->th_sum = in6_cksum(m, IPPROTO_TCP, - sizeof(struct ip6_hdr), - tlen - sizeof(struct ip6_hdr)); + nth->th_sum = in6_cksum_pseudo(ip6, + tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } @@ -636,8 +636,6 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct { nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); - m->m_pkthdr.csum_flags = CSUM_TCP; - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); } #endif /* INET */ #ifdef TCPDEBUG Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c (revision 236006) +++ sys/netinet/tcp_syncache.c (working copy) @@ -1472,11 +1472,12 @@ syncache_respond(struct syncache *sc) optlen = 0; M_SETFIB(m, sc->sc_inc.inc_fibnum); + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { - th->th_sum = 0; - th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, - tlen + optlen - hlen); + th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, + IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(NULL, NULL); error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } @@ -1488,8 +1489,6 @@ syncache_respond(struct syncache *sc) { th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(tlen + optlen - hlen + IPPROTO_TCP)); - m->m_pkthdr.csum_flags = CSUM_TCP; - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); } #endif Index: sys/netinet/tcp_timewait.c =================================================================== --- sys/netinet/tcp_timewait.c (revision 236006) +++ sys/netinet/tcp_timewait.c (working copy) @@ -574,10 +574,12 @@ tcp_twrespond(struct tcptw *tw, int flags) th->th_flags = flags; th->th_win = htons(tw->last_win); + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { - th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), - sizeof(struct tcphdr) + optlen); + th->th_sum = in6_cksum_pseudo(ip6, + sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(inp, NULL); error = ip6_output(m, inp->in6p_outputopts, NULL, (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); @@ -590,8 +592,6 @@ tcp_twrespond(struct tcptw *tw, int flags) { th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); - m->m_pkthdr.csum_flags = CSUM_TCP; - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); ip->ip_len = m->m_pkthdr.len; if (V_path_mtu_discovery) ip->ip_off |= IP_DF; Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c (revision 236006) +++ sys/netinet/tcp_output.c (working copy) @@ -1052,19 +1052,23 @@ send: * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 - if (isipv6) + if (isipv6) { /* * ip6_plen is not need to be filled now, and will be filled * in ip6_output. */ - th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), - sizeof(struct tcphdr) + optlen + len); + th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + + optlen + len, IPPROTO_TCP, 0); + } +#endif +#if defined(INET6) && defined(INET) else -#endif /* INET6 */ +#endif +#ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); @@ -1072,6 +1076,7 @@ send: KASSERT(ip->ip_v == IPVERSION, ("%s: IP version incorrect: %d", __func__, ip->ip_v)); } +#endif /* * Enable TSO and specify the size of the segments. Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c (revision 236006) +++ sys/netinet/tcp_lro.c (working copy) @@ -1,397 +1,606 @@ -/****************************************************************************** +/*- + * Copyright (c) 2007, Myricom Inc. + * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Bjoern Zeeb + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ -Copyright (c) 2007, Myricom Inc. -Copyright (c) 2008, Intel Corporation. -All rights reserved. +#include +__FBSDID("$FreeBSD$"); -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: +#include "opt_inet.h" +#include "opt_inet6.h" - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Myricom Inc, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - 3. Neither the name of the Intel Corporation, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -$FreeBSD$ -***************************************************************************/ - #include #include -#include #include #include #include #include +#include #include -#include #include #include +#include #include +#include #include #include -#include +#include + #include +#ifndef LRO_ENTRIES +#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ +#endif -static uint16_t do_csum_data(uint16_t *raw, int len) -{ - uint32_t csum; - csum = 0; - while (len > 0) { - csum += *raw; - raw++; - csum += *raw; - raw++; - len -= 4; - } - csum = (csum >> 16) + (csum & 0xffff); - csum = (csum >> 16) + (csum & 0xffff); - return (uint16_t)csum; -} +#define TCP_LRO_UPDATE_CSUM 1 +#ifndef TCP_LRO_UPDATE_CSUM +#define TCP_LRO_INVALID_CSUM 0x0000 +#endif -/* - * Allocate and init the LRO data structures - */ int -tcp_lro_init(struct lro_ctrl *cntl) +tcp_lro_init(struct lro_ctrl *lc) { - struct lro_entry *lro; - int i, error = 0; + struct lro_entry *le; + int error, i; - SLIST_INIT(&cntl->lro_free); - SLIST_INIT(&cntl->lro_active); + lc->lro_bad_csum = 0; + lc->lro_queued = 0; + lc->lro_flushed = 0; + lc->lro_cnt = 0; + SLIST_INIT(&lc->lro_free); + SLIST_INIT(&lc->lro_active); - cntl->lro_bad_csum = 0; - cntl->lro_queued = 0; - cntl->lro_flushed = 0; - + error = 0; for (i = 0; i < LRO_ENTRIES; i++) { - lro = (struct lro_entry *) malloc(sizeof (struct lro_entry), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (lro == NULL) { + le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, + M_NOWAIT | M_ZERO); + if (le == NULL) { if (i == 0) error = ENOMEM; break; } - cntl->lro_cnt = i; - SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); + lc->lro_cnt = i + 1; + SLIST_INSERT_HEAD(&lc->lro_free, le, next); } return (error); } void -tcp_lro_free(struct lro_ctrl *cntl) +tcp_lro_free(struct lro_ctrl *lc) { - struct lro_entry *entry; + struct lro_entry *le; - while (!SLIST_EMPTY(&cntl->lro_free)) { - entry = SLIST_FIRST(&cntl->lro_free); - SLIST_REMOVE_HEAD(&cntl->lro_free, next); - free(entry, M_DEVBUF); + while (!SLIST_EMPTY(&lc->lro_free)) { + le = SLIST_FIRST(&lc->lro_free); + SLIST_REMOVE_HEAD(&lc->lro_free, next); + free(le, M_DEVBUF); } } +#ifdef TCP_LRO_UPDATE_CSUM +static uint16_t +tcp_lro_csum_th(struct tcphdr *th) +{ + uint32_t ch; + uint16_t *p, l; + + ch = th->th_sum = 0x0000; + l = th->th_off; + p = (uint16_t *)th; + while (l > 0) { + ch += *p; + p++; + ch += *p; + p++; + l--; + } + while (ch > 0xffff) + ch = (ch >> 16) + (ch & 0xffff); + + return (ch & 0xffff); +} + +static uint16_t +tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th, + uint16_t tcp_data_len, uint16_t csum) +{ + uint32_t c; + uint16_t cs; + + c = csum; + + /* Remove length from checksum. */ + switch (le->eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + { + struct ip6_hdr *ip6; + + ip6 = (struct ip6_hdr *)l3hdr; + if (le->append_cnt == 0) + cs = ip6->ip6_plen; + else { + uint32_t cx; + + cx = ntohs(ip6->ip6_plen); + cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0); + } + break; + } +#endif +#ifdef INET + case ETHERTYPE_IP: + { + struct ip *ip4; + + ip4 = (struct ip *)l3hdr; + if (le->append_cnt == 0) + cs = ip4->ip_len; + else { + cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4), + IPPROTO_TCP); + cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr, + htons(cs)); + } + break; + } +#endif + default: + cs = 0; /* Keep compiler happy. */ + } + + cs = ~cs; + c += cs; + + /* Remove TCP header csum. */ + cs = ~tcp_lro_csum_th(th); + c += cs; + while (c > 0xffff) + c = (c >> 16) + (c & 0xffff); + + return (c & 0xffff); +} +#endif + void -tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro) +tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) { - struct ifnet *ifp; - struct ip *ip; - struct tcphdr *tcp; - uint32_t *ts_ptr; - uint32_t tcplen, tcp_csum; + if (le->append_cnt > 0) { + struct tcphdr *th; + uint16_t p_len; - if (lro->append_cnt) { - /* incorporate the new len into the ip header and - * re-calculate the checksum */ - ip = lro->ip; - ip->ip_len = htons(lro->len - ETHER_HDR_LEN); - ip->ip_sum = 0; - ip->ip_sum = 0xffff ^ - do_csum_data((uint16_t*)ip, - sizeof (*ip)); + p_len = htons(le->p_len); + switch (le->eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + { + struct ip6_hdr *ip6; - lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED | - CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - lro->m_head->m_pkthdr.csum_data = 0xffff; - lro->m_head->m_pkthdr.len = lro->len; + ip6 = le->le_ip6; + ip6->ip6_plen = p_len; + th = (struct tcphdr *)(ip6 + 1); + le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | + CSUM_PSEUDO_HDR; + le->p_len += ETHER_HDR_LEN + sizeof(*ip6); + break; + } +#endif +#ifdef INET + case ETHERTYPE_IP: + { + struct ip *ip4; +#ifdef TCP_LRO_UPDATE_CSUM + uint32_t cl; + uint16_t c; +#endif - /* incorporate the latest ack into the tcp header */ - tcp = (struct tcphdr *) (ip + 1); - tcp->th_ack = lro->ack_seq; - tcp->th_win = lro->window; - /* incorporate latest timestamp into the tcp header */ - if (lro->timestamp) { - ts_ptr = (uint32_t *)(tcp + 1); - ts_ptr[1] = htonl(lro->tsval); - ts_ptr[2] = lro->tsecr; + ip4 = le->le_ip4; +#ifdef TCP_LRO_UPDATE_CSUM + /* Fix IP header checksum for new length. */ + c = ~ip4->ip_sum; + cl = c; + c = ~ip4->ip_len; + cl += c + p_len; + while (cl > 0xffff) + cl = (cl >> 16) + (cl & 0xffff); + c = cl; + ip4->ip_sum = ~c; +#else + ip4->ip_sum = TCP_LRO_INVALID_CSUM; +#endif + ip4->ip_len = p_len; + th = (struct tcphdr *)(ip4 + 1); + le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | + CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; + le->p_len += ETHER_HDR_LEN; + break; } - /* - * update checksum in tcp header by re-calculating the - * tcp pseudoheader checksum, and adding it to the checksum - * of the tcp payload data - */ - tcp->th_sum = 0; - tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN; - tcp_csum = lro->data_csum; - tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(tcplen + IPPROTO_TCP)); - tcp_csum += do_csum_data((uint16_t*)tcp, - tcp->th_off << 2); - tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); - tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); - tcp->th_sum = 0xffff ^ tcp_csum; +#endif + default: + th = NULL; /* Keep compiler happy. */ + } + le->m_head->m_pkthdr.csum_data = 0xffff; + le->m_head->m_pkthdr.len = le->p_len; + + /* Incorporate the latest ACK into the TCP header. */ + th->th_ack = le->ack_seq; + th->th_win = le->window; + /* Incorporate latest timestamp into the TCP header. */ + if (le->timestamp != 0) { + uint32_t *ts_ptr; + + ts_ptr = (uint32_t *)(th + 1); + ts_ptr[1] = htonl(le->tsval); + ts_ptr[2] = le->tsecr; + } +#ifdef TCP_LRO_UPDATE_CSUM + /* Update the TCP header checksum. */ + le->ulp_csum += p_len; + le->ulp_csum += tcp_lro_csum_th(th); + while (le->ulp_csum > 0xffff) + le->ulp_csum = (le->ulp_csum >> 16) + + (le->ulp_csum & 0xffff); + th->th_sum = (le->ulp_csum & 0xffff); + th->th_sum = ~th->th_sum; +#else + th->th_sum = TCP_LRO_INVALID_CSUM; +#endif } - ifp = cntl->ifp; - (*ifp->if_input)(cntl->ifp, lro->m_head); - cntl->lro_queued += lro->append_cnt + 1; - cntl->lro_flushed++; - lro->m_head = NULL; - lro->timestamp = 0; - lro->append_cnt = 0; - SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); + + (*lc->ifp->if_input)(lc->ifp, le->m_head); + lc->lro_queued += le->append_cnt + 1; + lc->lro_flushed++; + bzero(le, sizeof(*le)); + SLIST_INSERT_HEAD(&lc->lro_free, le, next); } -int -tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum) +#ifdef INET6 +static int +tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, + struct tcphdr **th) { - struct ether_header *eh; - struct ip *ip; - struct tcphdr *tcp; - uint32_t *ts_ptr; - struct mbuf *m_nxt, *m_tail; - struct lro_entry *lro; - int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len; - int opt_bytes, trim, csum_flags; - uint32_t seq, tmp_csum, device_mtu; + /* XXX-BZ we should check the flow-label. */ - eh = mtod(m_head, struct ether_header *); - if (eh->ether_type != htons(ETHERTYPE_IP)) - return 1; - ip = (struct ip *) (eh + 1); - if (ip->ip_p != IPPROTO_TCP) - return 1; - - /* ensure there are no options */ - if ((ip->ip_hl << 2) != sizeof (*ip)) - return -1; + /* XXX-BZ We do not yet support ext. hdrs. */ + if (ip6->ip6_nxt != IPPROTO_TCP) + return (TCP_LRO_NOT_SUPPORTED); - /* .. and the packet is not fragmented */ - if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) - return -1; + /* Find the TCP header. */ + *th = (struct tcphdr *)(ip6 + 1); - /* verify that the IP header checksum is correct */ - csum_flags = m_head->m_pkthdr.csum_flags; + return (0); +} +#endif + +#ifdef INET +static int +tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4, + struct tcphdr **th) +{ + int csum_flags; + uint16_t csum; + + if (ip4->ip_p != IPPROTO_TCP) + return (TCP_LRO_NOT_SUPPORTED); + + /* Ensure there are no options. */ + if ((ip4->ip_hl << 2) != sizeof (*ip4)) + return (TCP_LRO_CANNOT); + + /* .. and the packet is not fragmented. */ + if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) + return (TCP_LRO_CANNOT); + + /* Legacy IP has a header checksum that needs to be correct. */ + csum_flags = m->m_pkthdr.csum_flags; if (csum_flags & CSUM_IP_CHECKED) { if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { - cntl->lro_bad_csum++; - return -1; + lc->lro_bad_csum++; + return (TCP_LRO_CANNOT); } } else { - tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip)); - if (__predict_false((tmp_csum ^ 0xffff) != 0)) { - cntl->lro_bad_csum++; - return -1; + csum = in_cksum_hdr(ip4); + if (__predict_false((csum ^ 0xffff) != 0)) { + lc->lro_bad_csum++; + return (TCP_LRO_CANNOT); } } - - /* find the TCP header */ - tcp = (struct tcphdr *) (ip + 1); - /* Get the TCP checksum if we dont have it */ - if (!csum) - csum = tcp->th_sum; + /* Find the TCP header (we assured there are no IP options). */ + *th = (struct tcphdr *)(ip4 + 1); - /* ensure no bits set besides ack or psh */ - if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) - return -1; + return (0); +} +#endif - /* check for timestamps. Since the only option we handle are - timestamps, we only have to handle the simple case of - aligned timestamps */ +int +tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) +{ + struct lro_entry *le; + struct ether_header *eh; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ +#endif +#ifdef INET + struct ip *ip4 = NULL; /* Keep compiler happy. */ +#endif + struct tcphdr *th; + void *l3hdr = NULL; /* Keep compiler happy. */ + uint32_t *ts_ptr; + tcp_seq seq; + int error, ip_len, l; + uint16_t eh_type, tcp_data_len; - opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); - tcp_hdr_len = sizeof (*tcp) + opt_bytes; - ts_ptr = (uint32_t *)(tcp + 1); - if (opt_bytes != 0) { - if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) || - (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| - TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) - return -1; + /* We expect a contiguous header [eh, ip, tcp]. */ + + eh = mtod(m, struct ether_header *); + eh_type = ntohs(eh->ether_type); + switch (eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + if (V_ip6_forwarding != 0) { + /* XXX-BZ stats but changing lro_ctrl is a problem. */ + return (TCP_LRO_CANNOT); + } + l3hdr = ip6 = (struct ip6_hdr *)(eh + 1); + error = tcp_lro_rx_ipv6(lc, m, ip6, &th); + if (error != 0) + return (error); + tcp_data_len = ntohs(ip6->ip6_plen); + ip_len = sizeof(*ip6) + tcp_data_len; + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + if (V_ipforwarding != 0) { + /* XXX-BZ stats but changing lro_ctrl is a problem. */ + return (TCP_LRO_CANNOT); + } + l3hdr = ip4 = (struct ip *)(eh + 1); + error = tcp_lro_rx_ipv4(lc, m, ip4, &th); + if (error != 0) + return (error); + ip_len = ntohs(ip4->ip_len); + tcp_data_len = ip_len - sizeof(*ip4); + break; +#endif + /* XXX-BZ what happens in case of VLAN(s)? */ + default: + return (TCP_LRO_NOT_SUPPORTED); } - ip_len = ntohs(ip->ip_len); - tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); - + /* + * If the frame is padded beyond the end of the IP packet, then we must + * trim the extra bytes off. + */ + l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len); + if (l != 0) { + if (l < 0) + /* Truncated packet. */ + return (TCP_LRO_CANNOT); - /* - * If frame is padded beyond the end of the IP packet, - * then we must trim the extra bytes off the end. + m_adj(m, -l); + } + + /* + * Check TCP header constraints. */ - tot_len = m_head->m_pkthdr.len; - trim = tot_len - (ip_len + ETHER_HDR_LEN); - if (trim != 0) { - if (trim < 0) { - /* truncated packet */ - return -1; + /* Ensure no bits set besides ACK or PSH. */ + if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) + return (TCP_LRO_CANNOT); + + /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */ + /* XXX-BZ Ideally we'd flush on PUSH? */ + + /* + * Check for timestamps. + * Since the only option we handle are timestamps, we only have to + * handle the simple case of aligned timestamps. + */ + l = (th->th_off << 2); + tcp_data_len -= l; + l -= sizeof(*th); + ts_ptr = (uint32_t *)(th + 1); + if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || + (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| + TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) + return (TCP_LRO_CANNOT); + + /* If the driver did not pass in the checksum, set it now. */ + if (csum == 0x0000) + csum = th->th_sum; + + seq = ntohl(th->th_seq); + + /* Try to find a matching previous segment. */ + SLIST_FOREACH(le, &lc->lro_active, next) { + if (le->eh_type != eh_type) + continue; + if (le->source_port != th->th_sport || + le->dest_port != th->th_dport) + continue; + switch (eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + if (bcmp(&le->source_ip6, &ip6->ip6_src, + sizeof(struct in6_addr)) != 0 || + bcmp(&le->dest_ip6, &ip6->ip6_dst, + sizeof(struct in6_addr)) != 0) + continue; + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + if (le->source_ip4 != ip4->ip_src.s_addr || + le->dest_ip4 != ip4->ip_dst.s_addr) + continue; + break; +#endif } - m_adj(m_head, -trim); - tot_len = m_head->m_pkthdr.len; - } - m_nxt = m_head; - m_tail = NULL; /* -Wuninitialized */ - while (m_nxt != NULL) { - m_tail = m_nxt; - m_nxt = m_tail->m_next; - } + /* Flush now if appending will result in overflow. */ + if (le->p_len > (65535 - tcp_data_len)) { + SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); + tcp_lro_flush(lc, le); + break; + } - hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; - seq = ntohl(tcp->th_seq); + /* Try to append the new segment. */ + if (__predict_false(seq != le->next_seq || + (tcp_data_len == 0 && le->ack_seq == th->th_ack))) { + /* Out of order packet or duplicate ACK. */ + SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); + tcp_lro_flush(lc, le); + return (TCP_LRO_CANNOT); + } - SLIST_FOREACH(lro, &cntl->lro_active, next) { - if (lro->source_port == tcp->th_sport && - lro->dest_port == tcp->th_dport && - lro->source_ip == ip->ip_src.s_addr && - lro->dest_ip == ip->ip_dst.s_addr) { - /* Flush now if appending will result in overflow. */ - if (lro->len > (65535 - tcp_data_len)) { - SLIST_REMOVE(&cntl->lro_active, lro, - lro_entry, next); - tcp_lro_flush(cntl, lro); - break; - } + if (l != 0) { + uint32_t tsval = ntohl(*(ts_ptr + 1)); + /* Make sure timestamp values are increasing. */ + /* XXX-BZ flip and use TSTMP_GEQ macro for this? */ + if (__predict_false(le->tsval > tsval || + *(ts_ptr + 2) == 0)) + return (TCP_LRO_CANNOT); + le->tsval = tsval; + le->tsecr = *(ts_ptr + 2); + } - /* Try to append it */ + le->next_seq += tcp_data_len; + le->ack_seq = th->th_ack; + le->window = th->th_win; + le->append_cnt++; - if (__predict_false(seq != lro->next_seq || - (tcp_data_len == 0 && - lro->ack_seq == tcp->th_ack))) { - /* out of order packet or dup ack */ - SLIST_REMOVE(&cntl->lro_active, lro, - lro_entry, next); - tcp_lro_flush(cntl, lro); - return -1; - } +#ifdef TCP_LRO_UPDATE_CSUM + le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th, + tcp_data_len, ~csum); +#endif - if (opt_bytes) { - uint32_t tsval = ntohl(*(ts_ptr + 1)); - /* make sure timestamp values are increasing */ - if (__predict_false(lro->tsval > tsval || - *(ts_ptr + 2) == 0)) { - return -1; - } - lro->tsval = tsval; - lro->tsecr = *(ts_ptr + 2); - } + if (tcp_data_len == 0) { + m_freem(m); + return (0); + } - lro->next_seq += tcp_data_len; - lro->ack_seq = tcp->th_ack; - lro->window = tcp->th_win; - lro->append_cnt++; - if (tcp_data_len == 0) { - m_freem(m_head); - return 0; - } - /* subtract off the checksum of the tcp header - * from the hardware checksum, and add it to the - * stored tcp data checksum. Byteswap the checksum - * if the total length so far is odd - */ - tmp_csum = do_csum_data((uint16_t*)tcp, - tcp_hdr_len); - csum = csum + (tmp_csum ^ 0xffff); - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - if (lro->len & 0x1) { - /* Odd number of bytes so far, flip bytes */ - csum = ((csum << 8) | (csum >> 8)) & 0xffff; - } - csum = csum + lro->data_csum; - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - lro->data_csum = csum; + le->p_len += tcp_data_len; - lro->len += tcp_data_len; + /* + * Adjust the mbuf so that m_data points to the first byte of + * the ULP payload. Adjust the mbuf to avoid complications and + * append new segment to existing mbuf chain. + */ + m_adj(m, m->m_pkthdr.len - tcp_data_len); + m->m_flags &= ~M_PKTHDR; - /* adjust mbuf so that m->m_data points to - the first byte of the payload */ - m_adj(m_head, hlen); - /* append mbuf chain */ - lro->m_tail->m_next = m_head; - /* advance the last pointer */ - lro->m_tail = m_tail; - /* flush packet if required */ - device_mtu = cntl->ifp->if_mtu; - if (lro->len > (65535 - device_mtu)) { - SLIST_REMOVE(&cntl->lro_active, lro, - lro_entry, next); - tcp_lro_flush(cntl, lro); - } - return 0; + le->m_tail->m_next = m; + le->m_tail = m_last(m); + + /* + * If a possible next full length packet would cause an + * overflow, pro-actively flush now. + */ + if (le->p_len > (65535 - lc->ifp->if_mtu)) { + SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); + tcp_lro_flush(lc, le); } + + return (0); } - if (SLIST_EMPTY(&cntl->lro_free)) - return -1; + /* Try to find an empty slot. */ + if (SLIST_EMPTY(&lc->lro_free)) + return (TCP_LRO_CANNOT); - /* start a new chain */ - lro = SLIST_FIRST(&cntl->lro_free); - SLIST_REMOVE_HEAD(&cntl->lro_free, next); - SLIST_INSERT_HEAD(&cntl->lro_active, lro, next); - lro->source_port = tcp->th_sport; - lro->dest_port = tcp->th_dport; - lro->source_ip = ip->ip_src.s_addr; - lro->dest_ip = ip->ip_dst.s_addr; - lro->next_seq = seq + tcp_data_len; - lro->mss = tcp_data_len; - lro->ack_seq = tcp->th_ack; - lro->window = tcp->th_win; + /* Start a new segment chain. */ + le = SLIST_FIRST(&lc->lro_free); + SLIST_REMOVE_HEAD(&lc->lro_free, next); + SLIST_INSERT_HEAD(&lc->lro_active, le, next); - /* save the checksum of just the TCP payload by - * subtracting off the checksum of the TCP header from - * the entire hardware checksum - * Since IP header checksum is correct, checksum over - * the IP header is -0. Substracting -0 is unnecessary. + /* Start filling in details. */ + switch (eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + le->le_ip6 = ip6; + le->source_ip6 = ip6->ip6_src; + le->dest_ip6 = ip6->ip6_dst; + le->eh_type = eh_type; + le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6); + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + le->le_ip4 = ip4; + le->source_ip4 = ip4->ip_src.s_addr; + le->dest_ip4 = ip4->ip_dst.s_addr; + le->eh_type = eh_type; + le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN; + break; +#endif + } + le->source_port = th->th_sport; + le->dest_port = th->th_dport; + + le->next_seq = seq + tcp_data_len; + le->ack_seq = th->th_ack; + le->window = th->th_win; + if (l != 0) { + le->timestamp = 1; + le->tsval = ntohl(*(ts_ptr + 1)); + le->tsecr = *(ts_ptr + 2); + } + +#ifdef TCP_LRO_UPDATE_CSUM + /* + * Do not touch the csum of the first packet. However save the + * "adjusted" checksum of just the source and destination addresses, + * the next header and the TCP payload. The length and TCP header + * parts may change, so we remove those from the saved checksum and + * re-add with final values on tcp_lro_flush() if needed. */ - tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len); - csum = csum + (tmp_csum ^ 0xffff); - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - lro->data_csum = csum; - - lro->ip = ip; - /* record timestamp if it is present */ - if (opt_bytes) { - lro->timestamp = 1; - lro->tsval = ntohl(*(ts_ptr + 1)); - lro->tsecr = *(ts_ptr + 2); - } - lro->len = tot_len; - lro->m_head = m_head; - lro->m_tail = m_tail; - return 0; + KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n", + __func__, le, le->ulp_csum)); + + le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len, + ~csum); + th->th_sum = csum; /* Restore checksum on first packet. */ +#endif + + le->m_head = m; + le->m_tail = m_last(m); + + return (0); } + +/* end */ Index: sys/netinet/tcp_lro.h =================================================================== --- sys/netinet/tcp_lro.h (revision 236006) +++ sys/netinet/tcp_lro.h (working copy) @@ -1,67 +1,75 @@ -/******************************************************************************* +/*- + * Copyright (c) 2006, Myricom Inc. + * Copyright (c) 2008, Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ -Copyright (c) 2006, Myricom Inc. -Copyright (c) 2008, Intel Corporation. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Myricom Inc, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - 2. Neither the name of the Intel Corporation, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - - -$FreeBSD$ - -***************************************************************************/ #ifndef _TCP_LRO_H_ #define _TCP_LRO_H_ -struct lro_entry; struct lro_entry { - SLIST_ENTRY(lro_entry) next; - struct mbuf *m_head; - struct mbuf *m_tail; - int timestamp; - struct ip *ip; - uint32_t tsval; - uint32_t tsecr; - uint32_t source_ip; - uint32_t dest_ip; - uint32_t next_seq; - uint32_t ack_seq; - uint32_t len; - uint32_t data_csum; - uint16_t window; - uint16_t source_port; - uint16_t dest_port; - uint16_t append_cnt; - uint16_t mss; - + SLIST_ENTRY(lro_entry) next; + struct mbuf *m_head; + struct mbuf *m_tail; + union { + struct ip *ip4; + struct ip6_hdr *ip6; + } leip; + union { + in_addr_t s_ip4; + struct in6_addr s_ip6; + } lesource; + union { + in_addr_t d_ip4; + struct in6_addr d_ip6; + } ledest; + uint16_t source_port; + uint16_t dest_port; + uint16_t eh_type; /* EthernetHeader type. */ + uint16_t append_cnt; + uint32_t p_len; /* IP header payload length. */ + uint32_t ulp_csum; /* TCP, etc. checksum. */ + uint32_t next_seq; /* tcp_seq */ + uint32_t ack_seq; /* tcp_seq */ + uint32_t tsval; + uint32_t tsecr; + uint16_t window; + uint16_t timestamp; /* flag, not a TCP hdr field. */ }; SLIST_HEAD(lro_head, lro_entry); +#define le_ip4 leip.ip4 +#define le_ip6 leip.ip6 +#define source_ip4 lesource.s_ip4 +#define dest_ip4 ledest.d_ip4 +#define source_ip6 lesource.s_ip6 +#define dest_ip6 ledest.d_ip6 + +/* NB: This is part of driver structs. */ struct lro_ctrl { struct ifnet *ifp; int lro_queued; @@ -73,13 +81,12 @@ struct lro_ctrl { struct lro_head lro_free; }; - int tcp_lro_init(struct lro_ctrl *); void tcp_lro_free(struct lro_ctrl *); void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *); int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); -/* Number of LRO entries - these are per rx queue */ -#define LRO_ENTRIES 8 +#define TCP_LRO_CANNOT -1 +#define TCP_LRO_NOT_SUPPORTED 1 #endif /* _TCP_LRO_H_ */ Index: sys/modules =================================================================== --- sys/modules (revision 236006) +++ sys/modules (working copy) Property changes on: sys/modules ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/modules:r235963-235964 Index: sys/modules/ixgbe/Makefile =================================================================== --- sys/modules/ixgbe/Makefile (revision 236006) +++ sys/modules/ixgbe/Makefile (working copy) @@ -2,10 +2,23 @@ .PATH: ${.CURDIR}/../../dev/ixgbe KMOD = ixgbe SRCS = device_if.h bus_if.h pci_if.h +SRCS += opt_inet.h opt_inet6.h SRCS += ixgbe.c ixv.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c SRCS += ixgbe_82599.c ixgbe_82598.c ixgbe_x540.c CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP -DIXGBE_FDIR +.if !defined(KERNBUILDDIR) +.if ${MK_INET_SUPPORT} != "no" +opt_inet.h: + @echo "#define INET 1" > ${.TARGET} +.endif + +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + @echo "#define INET6 1" > ${.TARGET} +.endif +.endif + .include Index: sys/modules/cxgb/cxgb/Makefile =================================================================== --- sys/modules/cxgb/cxgb/Makefile (revision 236006) +++ sys/modules/cxgb/cxgb/Makefile (working copy) @@ -8,9 +8,21 @@ SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxg SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c cxgb_aq100x.c SRCS+= cxgb_sge.c cxgb_offload.c cxgb_tn1010.c SRCS+= device_if.h bus_if.h pci_if.h -SRCS+= opt_inet.h opt_zero.h opt_sched.h +SRCS+= opt_inet.h opt_inet6.h opt_zero.h opt_sched.h SRCS+= uipc_mvec.c CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB} +.if !defined(KERNBUILDDIR) +.if ${MK_INET_SUPPORT} != "no" +opt_inet.h: + @echo "#define INET 1" > ${.TARGET} +.endif + +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + @echo "#define INET6 1" > ${.TARGET} +.endif +.endif + .include Index: sys/i386/include/in_cksum.h =================================================================== --- sys/i386/include/in_cksum.h (revision 236006) +++ sys/i386/include/in_cksum.h (working copy) @@ -54,6 +54,7 @@ * therefore always exactly five 32-bit words. */ #if defined(__GNUCLIKE_ASM) && !defined(__INTEL_COMPILER) +#if defined(IPVERSION) && (IPVERSION == 4) static __inline u_int in_cksum_hdr(const struct ip *ip) { @@ -88,6 +89,7 @@ in_cksum_update(struct ip *ip) __tmpsum = (int)ntohs(ip->ip_sum) + 256; ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); } +#endif static __inline u_short in_addword(u_short sum, u_short b) @@ -121,6 +123,7 @@ in_pseudo(u_int sum, u_int b, u_int c) } #else +#if defined(IPVERSION) && (IPVERSION == 4) #define in_cksum_update(ip) \ do { \ int __tmpsum; \ @@ -129,10 +132,13 @@ in_pseudo(u_int sum, u_int b, u_int c) } while(0) #endif +#endif #ifdef _KERNEL #if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *ip); +#endif u_short in_addword(u_short sum, u_short b); u_short in_pseudo(u_int sum, u_int b, u_int c); #endif Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c (revision 236006) +++ sys/netinet6/udp6_usrreq.c (working copy) @@ -185,6 +185,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) #ifdef IPFIREWALL_FORWARD struct m_tag *fwd_tag; #endif + uint16_t uh_sum; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); @@ -228,7 +229,18 @@ udp6_input(struct mbuf **mp, int *offp, int proto) UDPSTAT_INC(udps_nosum); goto badunlocked; } - if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { + + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) + uh_sum = m->m_pkthdr.csum_data; + else + uh_sum = in6_cksum_pseudo(ip6, ulen, + IPPROTO_UDP, m->m_pkthdr.csum_data); + uh_sum ^= 0xffff; + } else + uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen); + + if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); goto badunlocked; } @@ -771,10 +783,9 @@ udp6_output(struct inpcb *inp, struct mbuf *m, str ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; - if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP, - sizeof(struct ip6_hdr), plen)) == 0) { - udp6->uh_sum = 0xffff; - } + udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0); + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); flags = 0; Index: sys/netinet6/in6.h =================================================================== --- sys/netinet6/in6.h (revision 236006) +++ sys/netinet6/in6.h (working copy) @@ -632,7 +632,9 @@ struct ip6_mtuinfo { #ifdef _KERNEL struct cmsghdr; +struct ip6_hdr; +int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t); int in6_cksum __P((struct mbuf *, u_int8_t, u_int32_t, u_int32_t)); int in6_localaddr __P((struct in6_addr *)); int in6_localip(struct in6_addr *); Index: sys/netinet6/in6_src.c =================================================================== --- sys/netinet6/in6_src.c (revision 236006) +++ sys/netinet6/in6_src.c (working copy) @@ -873,8 +873,7 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *i RTFREE(ro6.ro_rt); if (lifp) return (ND_IFINFO(lifp)->chlim); - } else - return (V_ip6_defhlim); + } } return (V_ip6_defhlim); } Index: sys/netinet6/scope6_var.h =================================================================== --- sys/netinet6/scope6_var.h (revision 236006) +++ sys/netinet6/scope6_var.h (working copy) @@ -54,6 +54,7 @@ int sa6_embedscope __P((struct sockaddr_in6 *, int int sa6_recoverscope __P((struct sockaddr_in6 *)); int in6_setscope __P((struct in6_addr *, struct ifnet *, u_int32_t *)); int in6_clearscope __P((struct in6_addr *)); +uint16_t in6_getscope(struct in6_addr *); #endif /* _KERNEL */ #endif /* _NETINET6_SCOPE6_VAR_H_ */ Index: sys/netinet6/ip6_input.c =================================================================== --- sys/netinet6/ip6_input.c (revision 236006) +++ sys/netinet6/ip6_input.c (working copy) @@ -146,6 +146,9 @@ RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6 static void ip6_init2(void *); static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); +static struct ip6aux *ip6_addaux(struct mbuf *); +static struct ip6aux *ip6_findaux(struct mbuf *m); +static void ip6_delaux (struct mbuf *); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); @@ -326,6 +329,83 @@ ip6_init2(void *dummy) /* This must be after route_init(), which is now SI_ORDER_THIRD */ SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); +static int +ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, + int *nxt, int *ours) +{ + struct ip6_hdr *ip6; + struct ip6_hbh *hbh; + + if (ip6_hopopts_input(plen, rtalert, &m, off)) { +#if 0 /*touches NULL pointer*/ + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); +#endif + goto out; /* m have already been freed */ + } + + /* adjust pointer */ + ip6 = mtod(m, struct ip6_hdr *); + + /* + * if the payload length field is 0 and the next header field + * indicates Hop-by-Hop Options header, then a Jumbo Payload + * option MUST be included. + */ + if (ip6->ip6_plen == 0 && *plen == 0) { + /* + * Note that if a valid jumbo payload option is + * contained, ip6_hopopts_input() must set a valid + * (non-zero) payload length to the variable plen. + */ + V_ip6stat.ip6s_badoptions++; + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); + icmp6_error(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); + goto out; + } +#ifndef PULLDOWN_TEST + /* ip6_hopopts_input() ensures that mbuf is contiguous */ + hbh = (struct ip6_hbh *)(ip6 + 1); +#else + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), + sizeof(struct ip6_hbh)); + if (hbh == NULL) { + V_ip6stat.ip6s_tooshort++; + goto out; + } +#endif + *nxt = hbh->ip6h_nxt; + + /* + * If we are acting as a router and the packet contains a + * router alert option, see if we know the option value. + * Currently, we only support the option value for MLD, in which + * case we should pass the packet to the multicast routing + * daemon. + */ + if (*rtalert != ~0) { + switch (*rtalert) { + case IP6OPT_RTALERT_MLD: + if (V_ip6_forwarding) + *ours = 1; + break; + default: + /* + * RFC2711 requires unrecognized values must be + * silently ignored. + */ + break; + } + } + + return (0); + +out: + return (1); +} + void ip6_input(struct mbuf *m) { @@ -820,71 +900,11 @@ passin: */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - struct ip6_hbh *hbh; + int error; - if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { -#if 0 /*touches NULL pointer*/ - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); -#endif - goto out; /* m have already been freed */ - } - - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); - - /* - * if the payload length field is 0 and the next header field - * indicates Hop-by-Hop Options header, then a Jumbo Payload - * option MUST be included. - */ - if (ip6->ip6_plen == 0 && plen == 0) { - /* - * Note that if a valid jumbo payload option is - * contained, ip6_hopopts_input() must set a valid - * (non-zero) payload length to the variable plen. - */ - V_ip6stat.ip6s_badoptions++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); - icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); + error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours); + if (error != 0) goto out; - } -#ifndef PULLDOWN_TEST - /* ip6_hopopts_input() ensures that mbuf is contiguous */ - hbh = (struct ip6_hbh *)(ip6 + 1); -#else - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), - sizeof(struct ip6_hbh)); - if (hbh == NULL) { - V_ip6stat.ip6s_tooshort++; - goto out; - } -#endif - nxt = hbh->ip6h_nxt; - - /* - * If we are acting as a router and the packet contains a - * router alert option, see if we know the option value. - * Currently, we only support the option value for MLD, in which - * case we should pass the packet to the multicast routing - * daemon. - */ - if (rtalert != ~0) { - switch (rtalert) { - case IP6OPT_RTALERT_MLD: - if (V_ip6_forwarding) - ours = 1; - break; - default: - /* - * RFC2711 requires unrecognized values must be - * silently ignored. - */ - break; - } - } } else nxt = ip6->ip6_nxt; @@ -1770,7 +1790,7 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, in } } -struct ip6aux * +static struct ip6aux * ip6_addaux(struct mbuf *m) { struct m_tag *mtag; @@ -1787,7 +1807,7 @@ ip6_addaux(struct mbuf *m) return mtag ? (struct ip6aux *)(mtag + 1) : NULL; } -struct ip6aux * +static struct ip6aux * ip6_findaux(struct mbuf *m) { struct m_tag *mtag; @@ -1796,7 +1816,7 @@ ip6_findaux(struct mbuf *m) return mtag ? (struct ip6aux *)(mtag + 1) : NULL; } -void +static void ip6_delaux(struct mbuf *m) { struct m_tag *mtag; Index: sys/netinet6/route6.c =================================================================== --- sys/netinet6/route6.c (revision 236006) +++ sys/netinet6/route6.c (working copy) @@ -62,6 +62,7 @@ route6_input(struct mbuf **mp, int *offp, int prot struct mbuf *m = *mp; struct ip6_rthdr *rh; int off = *offp, rhlen; +#ifdef __notyet__ struct ip6aux *ip6a; ip6a = ip6_findaux(m); @@ -73,6 +74,7 @@ route6_input(struct mbuf **mp, int *offp, int prot return IPPROTO_DONE; } } +#endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*rh), IPPROTO_DONE); Index: sys/netinet6/in6_cksum.c =================================================================== --- sys/netinet6/in6_cksum.c (revision 236006) +++ sys/netinet6/in6_cksum.c (working copy) @@ -80,6 +80,66 @@ __FBSDID("$FreeBSD$"); #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);} +static int +_in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum) +{ + int sum; + uint16_t scope, *w; + union { + u_int16_t phs[4]; + struct { + u_int32_t ph_len; + u_int8_t ph_zero[3]; + u_int8_t ph_nxt; + } __packed ph; + } uph; + + sum = csum; + + /* + * First create IP6 pseudo header and calculate a summary. + */ + uph.ph.ph_len = htonl(len); + uph.ph.ph_zero[0] = uph.ph.ph_zero[1] = uph.ph.ph_zero[2] = 0; + uph.ph.ph_nxt = nxt; + + /* Payload length and upper layer identifier. */ + sum += uph.phs[0]; sum += uph.phs[1]; + sum += uph.phs[2]; sum += uph.phs[3]; + + /* IPv6 source address. */ + scope = in6_getscope(&ip6->ip6_src); + w = (u_int16_t *)&ip6->ip6_src; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; + + /* IPv6 destination address. */ + scope = in6_getscope(&ip6->ip6_dst); + w = (u_int16_t *)&ip6->ip6_dst; + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; + + return (sum); +} + +int +in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum) +{ + int sum; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + sum = _in6_cksum_pseudo(ip6, len, nxt, csum); + REDUCE; + return (sum); +} + /* * m MUST contain a contiguous IP6 header. * off is an offset where TCP/UDP/ICMP6 header starts. @@ -89,12 +149,10 @@ __FBSDID("$FreeBSD$"); int in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) { - u_int16_t *w; - int sum = 0; - int mlen = 0; - int byte_swapped = 0; struct ip6_hdr *ip6; - struct in6_addr in6; + u_int16_t *w, scope; + int byte_swapped, mlen; + int sum; union { u_int16_t phs[4]; struct { @@ -112,43 +170,39 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t u_int32_t l; } l_util; - /* sanity check */ - if (m->m_pkthdr.len < off + len) { - panic("in6_cksum: mbuf len (%d) < off+len (%d+%d)", - m->m_pkthdr.len, off, len); - } + /* Sanity check. */ + KASSERT(m->m_pkthdr.len >= off + len, ("%s: mbuf len (%d) < off(%d)+" + "len(%d)", __func__, m->m_pkthdr.len, off, len)); - bzero(&uph, sizeof(uph)); - /* * First create IP6 pseudo header and calculate a summary. */ - ip6 = mtod(m, struct ip6_hdr *); uph.ph.ph_len = htonl(len); + uph.ph.ph_zero[0] = uph.ph.ph_zero[1] = uph.ph.ph_zero[2] = 0; uph.ph.ph_nxt = nxt; - /* - * IPv6 source address. - * XXX: we'd like to avoid copying the address, but we can't due to - * the possibly embedded scope zone ID. - */ - in6 = ip6->ip6_src; - in6_clearscope(&in6); - w = (u_int16_t *)&in6; + /* Payload length and upper layer identifier. */ + sum = uph.phs[0]; sum += uph.phs[1]; + sum += uph.phs[2]; sum += uph.phs[3]; + + ip6 = mtod(m, struct ip6_hdr *); + + /* IPv6 source address. */ + scope = in6_getscope(&ip6->ip6_src); + w = (u_int16_t *)&ip6->ip6_src; sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; - /* IPv6 destination address */ - in6 = ip6->ip6_dst; - in6_clearscope(&in6); - w = (u_int16_t *)&in6; + /* IPv6 destination address. */ + scope = in6_getscope(&ip6->ip6_dst); + w = (u_int16_t *)&ip6->ip6_dst; sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + if (scope != 0) + sum -= scope; - /* Payload length and upper layer identifier */ - sum += uph.phs[0]; sum += uph.phs[1]; - sum += uph.phs[2]; sum += uph.phs[3]; - /* * Secondly calculate a summary of the first mbuf excluding offset. */ @@ -167,14 +221,16 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t /* * Force to even boundary. */ - if ((1 & (long) w) && (mlen > 0)) { + if ((1 & (long)w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; w = (u_int16_t *)((char *)w + 1); mlen--; byte_swapped = 1; - } + } else + byte_swapped = 0; + /* * Unroll the loop to make overhead from * branches &c small. Index: sys/netinet6/scope6.c =================================================================== --- sys/netinet6/scope6.c (revision 236006) +++ sys/netinet6/scope6.c (working copy) @@ -494,3 +494,16 @@ in6_clearscope(struct in6_addr *in6) return (modified); } + +/* + * Return the scope identifier or zero. + */ +uint16_t +in6_getscope(struct in6_addr *in6) +{ + + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) + return (in6->s6_addr16[1]); + + return (0); +} Index: sys/netinet6/ip6_var.h =================================================================== --- sys/netinet6/ip6_var.h (revision 236006) +++ sys/netinet6/ip6_var.h (working copy) @@ -388,9 +388,9 @@ char * ip6_get_prevhdr __P((struct mbuf *, int)); int ip6_nexthdr __P((struct mbuf *, int, int, int *)); int ip6_lasthdr __P((struct mbuf *, int, int, int *)); -struct ip6aux *ip6_addaux __P((struct mbuf *)); +#ifdef __notyet__ struct ip6aux *ip6_findaux __P((struct mbuf *)); -void ip6_delaux __P((struct mbuf *)); +#endif extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c (revision 236006) +++ sys/netinet6/icmp6.c (working copy) @@ -2418,23 +2418,23 @@ icmp6_redirect_input(struct mbuf *m, int off) if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { + RTFREE_LOCKED(rt); nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); - RTFREE_LOCKED(rt); goto bad; } gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { + RTFREE_LOCKED(rt); nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); - RTFREE_LOCKED(rt); goto bad; } } else { Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c (revision 236006) +++ sys/netinet6/ip6_output.c (working copy) @@ -83,6 +83,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include #include #include @@ -182,6 +184,29 @@ static int copypktopts(struct ip6_pktopts *, struc }\ } while (/*CONSTCOND*/ 0) +static void +in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) +{ + u_short csum; + + csum = in_cksum_skip(m, ntohl(plen), offset); + if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) + csum = 0xffff; + offset += m->m_pkthdr.csum_data; /* checksum offset */ + + if (offset + sizeof(u_short) > m->m_len) { + printf("%s: delayed m_pullup, m->len: %d off: %d\n", + __func__, m->m_len, offset); + /* + * XXX this should not happen, but if it does, the correct + * behavior may be to insert the checksum in the appropriate + * next mbuf in the chain. + */ + return; + } + *(u_short *)(m->m_data + offset) = csum; +} + /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). @@ -221,9 +246,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *op int flevalid = 0; int hdrsplit = 0; int needipsec = 0; -#ifdef SCTP - int sw_csum; -#endif + int sw_csum, tso; #ifdef IPSEC struct ipsec_output_state state; struct ip6_rthdr *rh = NULL; @@ -867,8 +890,6 @@ again: CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; @@ -891,7 +912,7 @@ again: } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) - m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; + m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; @@ -927,16 +948,32 @@ passout: * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ + sw_csum = m->m_pkthdr.csum_flags; + if (!hdrsplit) { + tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; + sw_csum &= ~ifp->if_hwassist; + } else + tso = 0; + /* + * If we added extension headers, we will not do TSO and calculate the + * checksums ourselves for now. + * XXX-BZ Need a framework to know when the NIC can handle it, even + * with ext. hdrs. + */ + if (sw_csum & CSUM_DELAY_DATA) { + sw_csum &= ~CSUM_DELAY_DATA; + in6_delayed_cksum(m, ip6->ip6_plen, sizeof(struct ip6_hdr)); + } #ifdef SCTP - sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_SCTP) { + sw_csum &= ~CSUM_SCTP; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); - sw_csum &= ~CSUM_SCTP; } #endif + m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; - if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) + if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; @@ -945,7 +982,7 @@ passout: error = EMSGSIZE; goto bad; } - if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ + if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the @@ -1033,6 +1070,22 @@ passout: goto bad; } + + /* + * If the interface will not calculate checksums on + * fragmented packets, then do it here. + * XXX-BZ handle the hw offloading case. Need flags. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in6_delayed_cksum(m, ip6->ip6_plen, sizeof(*ip6)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) { + sctp_delayed_cksum(m, hlen); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } +#endif mnext = &m->m_nextpkt; /* Index: sys/netinet6/ip6_forward.c =================================================================== --- sys/netinet6/ip6_forward.c (revision 236006) +++ sys/netinet6/ip6_forward.c (working copy) @@ -586,8 +586,6 @@ skip_routing: CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; Index: sys/amd64/include/in_cksum.h =================================================================== --- sys/amd64/include/in_cksum.h (revision 236006) +++ sys/amd64/include/in_cksum.h (working copy) @@ -43,6 +43,7 @@ #define in_cksum(m, len) in_cksum_skip(m, len, 0) +#if defined(IPVERSION) && (IPVERSION == 4) /* * It it useful to have an Internet checksum routine which is inlineable * and optimized specifically for the task of computing IP header checksums @@ -69,9 +70,12 @@ in_cksum_update(struct ip *ip) } while(0) #endif +#endif #ifdef _KERNEL +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *ip); +#endif u_short in_addword(u_short sum, u_short b); u_short in_pseudo(u_int sum, u_int b, u_int c); u_short in_cksum_skip(struct mbuf *m, int len, int skip); Index: sys/powerpc/include/in_cksum.h =================================================================== --- sys/powerpc/include/in_cksum.h (revision 236006) +++ sys/powerpc/include/in_cksum.h (working copy) @@ -39,6 +39,7 @@ #define in_cksum(m, len) in_cksum_skip(m, len, 0) +#if defined(IPVERSION) && (IPVERSION == 4) /* * It it useful to have an Internet checksum routine which is inlineable * and optimized specifically for the task of computing IP header checksums @@ -65,9 +66,12 @@ in_cksum_update(struct ip *ip) } while(0) #endif +#endif #ifdef _KERNEL +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *ip); +#endif u_short in_addword(u_short sum, u_short b); u_short in_pseudo(u_int sum, u_int b, u_int c); u_short in_cksum_skip(struct mbuf *m, int len, int skip); Index: sys/arm/include/in_cksum.h =================================================================== --- sys/arm/include/in_cksum.h (revision 236006) +++ sys/arm/include/in_cksum.h (working copy) @@ -46,7 +46,9 @@ u_short in_cksum(struct mbuf *m, int len); u_short in_addword(u_short sum, u_short b); u_short in_cksum_skip(struct mbuf *m, int len, int skip); u_int do_cksum(const void *, int); +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *); +#endif static __inline u_short in_pseudo(u_int sum, u_int b, u_int c) Index: sys/mips/include/in_cksum.h =================================================================== --- sys/mips/include/in_cksum.h (revision 236006) +++ sys/mips/include/in_cksum.h (working copy) @@ -40,6 +40,7 @@ #define in_cksum(m, len) in_cksum_skip(m, len, 0) +#if defined(IPVERSION) && (IPVERSION == 4) /* * It it useful to have an Internet checksum routine which is inlineable * and optimized specifically for the task of computing IP header checksums @@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip) } while(0) #endif +#endif #ifdef _KERNEL +#if defined(IPVERSION) && (IPVERSION == 4) u_int in_cksum_hdr(const struct ip *ip); +#endif u_short in_addword(u_short sum, u_short b); u_short in_pseudo(u_int sum, u_int b, u_int c); u_short in_cksum_skip(struct mbuf *m, int len, int skip); Index: sys/dev =================================================================== --- sys/dev (revision 236006) +++ sys/dev (working copy) Property changes on: sys/dev ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/dev:r231317,235944,235963-235964 Index: sys/dev/cxgbe/adapter.h =================================================================== --- sys/dev/cxgbe/adapter.h (revision 236006) +++ sys/dev/cxgbe/adapter.h (working copy) @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "offload.h" Index: sys/dev/ixgbe =================================================================== --- sys/dev/ixgbe (revision 236006) +++ sys/dev/ixgbe (working copy) Property changes on: sys/dev/ixgbe ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/dev/ixgbe:r235964 Index: sys/dev/ixgbe/ixgbe.c =================================================================== --- sys/dev/ixgbe/ixgbe.c (revision 236006) +++ sys/dev/ixgbe/ixgbe.c (working copy) @@ -162,7 +162,7 @@ static void ixgbe_dma_free(struct adapter *, s static void ixgbe_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); static bool ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *); -static bool ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *); +static bool ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); @@ -997,6 +997,8 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, ca ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_TSO6) + ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) @@ -1061,7 +1063,7 @@ ixgbe_init_locked(struct adapter *adapter) /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; - if (ifp->if_capenable & IFCAP_TSO4) + if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); @@ -1767,9 +1769,8 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_he ** a packet. */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { - if (ixgbe_tso_setup(txr, m_head, &paylen)) { + if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) { cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; - olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; ++adapter->tso_tx; @@ -2562,7 +2563,7 @@ ixgbe_setup_interface(device_t dev, struct adapter */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; + ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO @@ -3234,6 +3235,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbu case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); + /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; @@ -3292,17 +3294,23 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbu * **********************************************************************/ static bool -ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen) +ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen, + u32 *olinfo_status) { struct adapter *adapter = txr->adapter; struct ixgbe_adv_tx_context_desc *TXD; struct ixgbe_tx_buf *tx_buffer; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0; - u16 vtag = 0; - int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen; + u32 mss_l4len_idx = 0, len; + u16 vtag = 0, eh_type; + int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif +#ifdef INET struct ip *ip; +#endif struct tcphdr *th; @@ -3311,32 +3319,62 @@ static bool * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - else + eh_type = eh->evl_proto; + } else { ehdrlen = ETHER_HDR_LEN; + eh_type = eh->evl_encap_proto; + } /* Ensure we have at least the IP+TCP header in the first mbuf. */ - if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)) - return FALSE; + len = ehdrlen + sizeof(struct tcphdr); + switch (ntohs(eh_type)) { +#ifdef INET6 + case ETHERTYPE_IPV6: + if (mp->m_len < len + sizeof(struct ip6_hdr)) + return FALSE; + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + /* XXX-BZ For now we do not pretend to support ext. hdrs. */ + if (ip6->ip6_nxt != IPPROTO_TCP) + return FALSE; + ip_hlen = sizeof(struct ip6_hdr); + th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); + th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + if (mp->m_len < len + sizeof(struct ip)) + return FALSE; + ip = (struct ip *)(mp->m_data + ehdrlen); + if (ip->ip_p != IPPROTO_TCP) + return FALSE; + ip->ip_sum = 0; + ip_hlen = ip->ip_hl << 2; + th = (struct tcphdr *)((caddr_t)ip + ip_hlen); + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(IPPROTO_TCP)); + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; + break; +#endif + default: + panic("%s: CSUM_TSO but no supported IP version (0x%04x)", + __func__, ntohs(eh_type)); + break; + } ctxd = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[ctxd]; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return FALSE; /* 0 */ - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); tcp_hlen = th->th_off << 2; - hdrlen = ehdrlen + ip_hlen + tcp_hlen; /* This is used in the transmit desc in encap */ - *paylen = mp->m_pkthdr.len - hdrlen; + *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { @@ -3351,10 +3389,8 @@ static bool /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl); - /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); @@ -4295,15 +4331,17 @@ ixgbe_rx_input(struct rx_ring *rxr, struct ifnet * { /* - * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet + * ATM LRO is only for IP/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. + * ethernet header. In case of IPv6 we do not yet support ext. hdrs. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) && + ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == + (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || + (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == + (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* Index: sys/dev/cxgb/cxgb_sge.c =================================================================== --- sys/dev/cxgb/cxgb_sge.c (revision 236006) +++ sys/dev/cxgb/cxgb_sge.c (working copy) @@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE. #include __FBSDID("$FreeBSD$"); +#include "opt_inet6.h" #include "opt_inet.h" #include @@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -1492,10 +1494,10 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { - int eth_type; + uint16_t eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; struct ether_header *eh; - struct ip *ip; + void *l3hdr; struct tcphdr *tcp; txd->flit[2] = 0; @@ -1521,18 +1523,37 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) } eh = mtod(m0, struct ether_header *); - if (eh->ether_type == htons(ETHERTYPE_VLAN)) { - eth_type = CPL_ETH_II_VLAN; - ip = (struct ip *)((struct ether_vlan_header *)eh + 1); + eth_type = eh->ether_type; + if (eth_type == htons(ETHERTYPE_VLAN)) { + struct ether_vlan_header *evh = (void *)eh; + + tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); + l3hdr = evh + 1; + eth_type = evh->evl_proto; } else { - eth_type = CPL_ETH_II; - ip = (struct ip *)(eh + 1); + tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); + l3hdr = eh + 1; } - tcp = (struct tcphdr *)(ip + 1); - tso_info |= V_LSO_ETH_TYPE(eth_type) | - V_LSO_IPHDR_WORDS(ip->ip_hl) | - V_LSO_TCPHDR_WORDS(tcp->th_off); + if (eth_type == htons(ETHERTYPE_IP)) { + struct ip *ip = l3hdr; + + tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); + tcp = (struct tcphdr *)(ip + 1); + } else if (eth_type == htons(ETHERTYPE_IPV6)) { + struct ip6_hdr *ip6 = l3hdr; + + KASSERT(ip6->ip6_nxt == IPPROTO_TCP, + ("%s: CSUM_TSO with ip6_nxt %d", + __func__, ip6->ip6_nxt)); + + tso_info |= F_LSO_IPV6; + tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); + tcp = (struct tcphdr *)(ip6 + 1); + } else + panic("%s: CSUM_TSO but neither ip nor ip6", __func__); + + tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); if (__predict_false(mlen <= PIO_LEN)) { @@ -2065,7 +2086,7 @@ t3_free_qset(adapter_t *sc, struct sge_qset *q) MTX_DESTROY(&q->rspq.lock); } -#ifdef INET +#if defined(INET6) || defined(INET) tcp_lro_free(&q->lro.ctrl); #endif @@ -2648,7 +2669,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int npo /* Allocate and setup the lro_ctrl structure */ q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); -#ifdef INET +#if defined(INET6) || defined(INET) ret = tcp_lro_init(&q->lro.ctrl); if (ret) { printf("error %d from tcp_lro_init\n", ret); @@ -2941,9 +2962,11 @@ process_responses(adapter_t *adap, struct sge_qset struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; +#if defined(INET6) || defined(INET) int lro_enabled = qs->lro.enabled; int skip_lro; struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; +#endif struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; int ngathered = 0; struct t3_mbuf_hdr *mh = &rspq->rspq_mh; @@ -3062,15 +3085,16 @@ process_responses(adapter_t *adap, struct sge_qset * The mbuf's rcvif was derived from the cpl header and * is accurate. Skip LRO and just use that. */ +#if defined(INET6) || defined(INET) skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro -#ifdef INET && (tcp_lro_rx(lro_ctrl, m, 0) == 0) -#endif ) { /* successfully queue'd for LRO */ - } else { + } else +#endif + { /* * LRO not enabled, packet unsuitable for LRO, * or unable to queue. Pass it up right now in @@ -3089,7 +3113,7 @@ process_responses(adapter_t *adap, struct sge_qset deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); -#ifdef INET +#if defined(INET6) || defined(INET) /* Flush LRO */ while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); Index: sys/dev/cxgb/cxgb_main.c =================================================================== --- sys/dev/cxgb/cxgb_main.c (revision 236006) +++ sys/dev/cxgb/cxgb_main.c (working copy) @@ -982,7 +982,7 @@ cxgb_makedev(struct port_info *pi) #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE) -#define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6) +#define CXGB_CAP_ENABLE CXGB_CAP static int cxgb_port_attach(device_t dev) @@ -2059,8 +2059,8 @@ fail: } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; - if (mask & IFCAP_TSO4) { - ifp->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_TSO) { + ifp->if_capenable ^= IFCAP_TSO; if (IFCAP_TSO & ifp->if_capenable) { if (IFCAP_TXCSUM & ifp->if_capenable) Index: sys/dev/cxgb/cxgb_adapter.h =================================================================== --- sys/dev/cxgb/cxgb_adapter.h (revision 236006) +++ sys/dev/cxgb/cxgb_adapter.h (working copy) @@ -46,6 +46,7 @@ $FreeBSD$ #include #include #include +#include #include #include Index: sys/net/if_loop.c =================================================================== --- sys/net/if_loop.c (revision 236006) +++ sys/net/if_loop.c (working copy) @@ -247,12 +247,12 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct #if 1 /* XXX */ switch (dst->sa_family) { case AF_INET: + case AF_INET6: if (ifp->if_capenable & IFCAP_RXCSUM) { m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = LO_CSUM_SET; } m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES; - case AF_INET6: case AF_IPX: case AF_APPLETALK: break; Index: sbin/ifconfig =================================================================== --- sbin/ifconfig (revision 236006) +++ sbin/ifconfig (working copy) Property changes on: sbin/ifconfig ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sbin/ifconfig:r235671 Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 (revision 236006) +++ sbin/ifconfig/ifconfig.8 (working copy) @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd March 4, 2012 +.Dd May 19, 2012 .Dt IFCONFIG 8 .Os .Sh NAME @@ -400,6 +400,22 @@ It will always disable TSO for .Xr ip 4 and .Xr ip6 4 . +.It Cm tso6 , tso4 +If the driver supports +.Xr tcp 4 +segmentation offloading for +.Xr ip6 4 +or +.Xr ip 4 +use one of these to selectively enabled it only for one protocol family. +.It Fl tso6 , tso4 +If the driver supports +.Xr tcp 4 +segmentation offloading for +.Xr ip6 4 +or +.Xr ip 4 +use one of these to selectively disable it only for one protocol family. .It Cm lro If the driver supports .Xr tcp 4 Index: sbin/ifconfig/ifconfig.c =================================================================== --- sbin/ifconfig/ifconfig.c (revision 236006) +++ sbin/ifconfig/ifconfig.c (working copy) @@ -1178,6 +1178,10 @@ static struct cmd basic_cmds[] = { DEF_CMD("-netcons", -IFCAP_NETCONS, setifcap), DEF_CMD("polling", IFCAP_POLLING, setifcap), DEF_CMD("-polling", -IFCAP_POLLING, setifcap), + DEF_CMD("tso6", IFCAP_TSO6, setifcap), + DEF_CMD("-tso6", -IFCAP_TSO6, setifcap), + DEF_CMD("tso4", IFCAP_TSO4, setifcap), + DEF_CMD("-tso4", -IFCAP_TSO4, setifcap), DEF_CMD("tso", IFCAP_TSO, setifcap), DEF_CMD("-tso", -IFCAP_TSO, setifcap), DEF_CMD("lro", IFCAP_LRO, setifcap),