Index: usr.bin/netstat/inet.c =================================================================== --- usr.bin/netstat/inet.c (revision 221564) +++ usr.bin/netstat/inet.c (working copy) @@ -622,6 +622,7 @@ "\t\t%lu data packet%s (%lu byte%s) retransmitted\n"); p(tcps_sndrexmitbad, "\t\t%lu data packet%s unnecessarily retransmitted\n"); + p(tcps_sndearlyrexmit, "\t%lu packet%s early retransmitted\n"); p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n"); p2a(tcps_sndacks, tcps_delack, "\t\t%lu ack-only packet%s (%lu delayed)\n"); Index: share/man/man4/Makefile =================================================================== --- share/man/man4/Makefile (revision 221564) +++ share/man/man4/Makefile (working copy) @@ -140,6 +140,7 @@ gpib.4 \ gre.4 \ h_ertt.4 \ + h_tcper.4 \ harp.4 \ hatm.4 \ hfa.4 \ Index: share/man/man4/h_tcper.4 =================================================================== --- share/man/man4/h_tcper.4 (revision 0) +++ share/man/man4/h_tcper.4 (revision 0) @@ -0,0 +1,60 @@ +.\" +.\" Copyright (c) 2011 Weongyo Jeong +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: share/man/man4/h_ertt.4 218912 2011-02-21 11:56:11Z lstewart $ +.\" +.Dd May 4, 2011 +.Dt h_tcper 9 +.Os +.Sh NAME +.Nm h_tcper +.Nd Segment Boundary Tracker Khelp module +.Sh SYNOPSIS +.In netinet/khelp/h_tcper.h +.Sh DESCRIPTION +The +.Nm +Khelp module works within the +.Xr khelp 9 +framework to provide TCP with a per-connection, segment boundary tracker. +It's to form an understanding as to how many actual segments have been +transmitted, but not acknowledged. +Its implementation is done by the sender by tracking the boundaries of +the four segments on the right side of the current window. +.Sh SEE ALSO +.Xr hhook 9 , +.Xr khelp 9 +.Sh HISTORY +The +.Nm +module first appeared in +.Fx 9.0 . +The module was first released in 2011 by Weongyo Jeong +.Sh AUTHORS +.An -nosplit +The +.Nm +Khelp module and this manual page were written by +.An Weongyo Jeong Aq weongyo@freebsd.org . Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 221564) +++ sys/netinet/tcp_input.c (working copy) @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include /* for proc0 declaration */ @@ -101,6 +102,7 @@ #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ +#include #ifdef IPSEC #include @@ -161,6 +163,11 @@ &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); +VNET_DEFINE(int, tcp_do_rfc5827) = 0; +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc5827, CTLFLAG_RW, + &VNET_NAME(tcp_do_rfc5827), 0, + "Enable RFC 5827 (Early Retransmit)"); + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 0; @@ -260,6 +267,63 @@ } } +static int +tcp_getrexmtthresh(struct tcpcb *tp) +{ +#define iceildiv(n, d) (((n) + (d) - 1) / (d)) + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + struct tcper *te; + uint32_t oseg, ownd, sent; + int32_t tcper_id; + int ER_thresh; + + if (!V_tcp_do_rfc5827) + goto def; + if ((sent = tp->snd_max - tp->snd_una) == 0) + goto def; + + tcper_id = khelp_get_id("tcper"); + if (tcper_id > 0) { + /* + * Segment-Based Early Retransmit + */ + te = khelp_get_osd(tp->osd, tcper_id); + KASSERT(te != NULL, ("%s: te is NULL", __func__)); + oseg = te->numseg; + if (oseg > 0 && oseg < 4 && + (sent == so->so_snd.sb_cc || tp->snd_wnd == 0)) { + ER_thresh = oseg - 1; + if ((tp->t_flags & TF_SACK_PERMIT) == 0 || + tcp_sack_oseg(tp, te->segb, te->sege, + TCP_ER_MAXSEGB, ER_thresh)) { + TCPSTAT_INC(tcps_sndearlyrexmit); + return (ER_thresh); + } + } + goto def; + } + + /* + * Byte-Based Early Retransmit + */ + ownd = sent; + if (ownd < (4 * tp->t_maxseg) && + (sent == so->so_snd.sb_cc || tp->snd_wnd == 0)) { + ER_thresh = iceildiv(ownd, tp->t_maxseg) - 1; + if (((tp->t_flags & TF_SACK_PERMIT) == 0 || + ownd <= tp->t_maxseg || + tcp_sack_ownd(tp, ownd - tp->t_maxseg)) && + tp->t_dupacks + 1 >= ER_thresh) { + TCPSTAT_INC(tcps_sndearlyrexmit); + return (ER_thresh); + } + } +def: + return (tcprexmtthresh); +#undef iceildiv +} + /* * CC wrapper hook functions */ @@ -2372,6 +2436,10 @@ if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { + int rexmtthresh; + + rexmtthresh = tcp_getrexmtthresh(tp); + TCPSTAT_INC(tcps_rcvdupack); /* * If we have outstanding data (other than @@ -2403,7 +2471,7 @@ if (!tcp_timer_active(tp, TT_REXMT) || th->th_ack != tp->snd_una) tp->t_dupacks = 0; - else if (++tp->t_dupacks > tcprexmtthresh || + else if (++tp->t_dupacks > rexmtthresh || IN_FASTRECOVERY(tp->t_flags)) { cc_ack_received(tp, th, CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && @@ -2427,7 +2495,7 @@ tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; - } else if (tp->t_dupacks == tcprexmtthresh) { + } else if (tp->t_dupacks == rexmtthresh) { tcp_seq onxt = tp->snd_nxt; /* Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h (revision 221564) +++ sys/netinet/tcp_var.h (working copy) @@ -493,6 +493,9 @@ u_long tcps_sig_err_sigopt; /* No signature expected by socket */ u_long tcps_sig_err_nosigopt; /* No signature provided by segment */ + /* Early Retransmit */ + u_long tcps_sndearlyrexmit; /* early Fast Retransmissions */ + u_long _pad[12]; /* 6 UTO, 6 TBD */ }; @@ -610,6 +613,7 @@ VNET_DECLARE(int, ss_fltsz_local); VNET_DECLARE(int, tcp_do_rfc3465); VNET_DECLARE(int, tcp_abc_l_var); +VNET_DECLARE(int, tcp_do_rfc5827); #define V_tcb VNET(tcb) #define V_tcbinfo VNET(tcbinfo) #define V_tcpstat VNET(tcpstat) @@ -622,6 +626,7 @@ #define V_ss_fltsz_local VNET(ss_fltsz_local) #define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) #define V_tcp_abc_l_var VNET(tcp_abc_l_var) +#define V_tcp_do_rfc5827 VNET(tcp_do_rfc5827) VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ @@ -726,6 +731,9 @@ struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); +boolean_t tcp_sack_ownd(struct tcpcb *tp, uint32_t amount); +boolean_t tcp_sack_oseg(struct tcpcb *tp, tcp_seq *seqb, tcp_seq *sege, + uint32_t numseq, uint32_t amount); int tcp_newreno(struct tcpcb *, struct tcphdr *); u_long tcp_seq_subtract(u_long, u_long ); Index: sys/netinet/khelp/h_tcper.c =================================================================== --- sys/netinet/khelp/h_tcper.c (revision 0) +++ sys/netinet/khelp/h_tcper.c (revision 0) @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2011 Weongyo Jeong + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: sys/netinet/khelp/h_ertt.c 217806 2011-01-24 23:08:38Z lstewart $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +static int +tcper_input(int hhook_type, int hhook_id, void *udata, void *ctx_data, + void *hdata, struct osd *hosd) +{ + struct tcper *te; + struct tcp_hhook_data *thdp; + struct tcpcb *tp; + struct tcphdr *th; + int i; + + KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); + KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); + + te = (struct tcper *)hdata; + thdp = ctx_data; + tp = thdp->tp; + th = thdp->th; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + if ((te->flags & TCP_ER_HAVEONE) == 0) + return (0); + + for (i = 0; i < TCP_ER_MAXSEGB; i++) { + if (SEQ_GT(th->th_ack, te->segb[i])) { + if (te->segb[i] == te->max) { + te->max = tp->snd_una; + te->flags &= ~TCP_ER_HAVEONE; + } + if (te->segb[i] != te->sege[i]) + te->numseg--; + te->segb[i] = tp->snd_una; + te->sege[i] = tp->snd_una; + } + } + return (0); +} + +static int +tcper_output(int hhook_type, int hhook_id, void *udata, void *ctx_data, + void *hdata, struct osd *hosd) +{ + struct tcper *te; + struct tcp_hhook_data *thdp; + struct tcpcb *tp; + struct tcphdr *th; + tcp_seq seq, tmpseq; + long len; + int i; + + KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); + KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); + + te = (struct tcper *)hdata; + thdp = ctx_data; + tp = thdp->tp; + th = thdp->th; + len = thdp->len; + seq = ntohl(th->th_seq); + + INP_WLOCK_ASSERT(tp->t_inpcb); + + if ((te->flags & TCP_ER_HAVEONE) == 0) { + for (i = 0; i < TCP_ER_MAXSEGB - 1; i++) { + te->segb[i] = tp->snd_una; + te->sege[i] = tp->snd_una; + } + te->flags |= TCP_ER_HAVEONE; + goto update; + } + if (SEQ_LEQ(seq, te->max)) + return (0); + /* + * XXX PLEASE FIX ME TO BE SMARTER. + * There'd be better algorithms not to swap twice or other approaches. + */ + for (i = 0; i < TCP_ER_MAXSEGB - 1; i++) { + /* swap segb[i] and segb[i + 1] */ + tmpseq = te->segb[i]; + te->segb[i] = te->segb[i + 1]; + te->segb[i + 1] = tmpseq; + /* swap sege[i] and sege[i + 1] */ + tmpseq = te->sege[i]; + te->sege[i] = te->sege[i + 1]; + te->sege[i + 1] = tmpseq; + } +update: + te->segb[TCP_ER_MAXSEGB - 1] = seq; + te->sege[TCP_ER_MAXSEGB - 1] = seq + len; + te->max = seq; + if (len > 0) { + /* + * XXX Increases or Decreases only the packet length is larger + * than zero. It means we ASSUME that only a packet + * (length > 0) is considered as a outstanding segment. + */ + te->numseg++; + } + return (0); +} + +static int +tcper_ctor(void *mem, int size, void *arg, int flags) +{ + struct tcper *te = mem; + + te->flags = 0; + te->numseg = 0; + return (0); +} + +static struct helper tcper_helper = { + .h_flags = HELPER_NEEDS_OSD, + .h_classes = HELPER_CLASS_TCP +}; + +/* Define the helper hook info required by ERTT. */ +static struct hookinfo tcper_hooks[] = { + { + .hook_type = HHOOK_TYPE_TCP, + .hook_id = HHOOK_TCP_EST_IN, + .hook_udata = NULL, + .hook_func = &tcper_input + }, + { + .hook_type = HHOOK_TYPE_TCP, + .hook_id = HHOOK_TCP_EST_OUT, + .hook_udata = NULL, + .hook_func = &tcper_output + } +}; + +KHELP_DECLARE_MOD_UMA(tcper, &tcper_helper, tcper_hooks, 1, + sizeof(struct tcper), tcper_ctor, NULL); Property changes on: sys/netinet/khelp/h_tcper.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + Id Added: svn:eol-style + native Index: sys/netinet/khelp/h_tcper.h =================================================================== --- sys/netinet/khelp/h_tcper.h (revision 0) +++ sys/netinet/khelp/h_tcper.h (revision 0) @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2011 Weongyo Jeong + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: sys/netinet/khelp/h_ertt.h 217806 2011-01-24 23:08:38Z lstewart $ + */ + +#ifndef _NETINET_KHELP_H_RFC5827_ +#define _NETINET_KHELP_H_RFC5827_ + +struct tcper { +#define TCP_ER_MAXSEGB 4 + uint32_t flags; +#define TCP_ER_HAVEONE 0x00000001 /* set if at least one seg in */ + tcp_seq segb[TCP_ER_MAXSEGB]; + tcp_seq sege[TCP_ER_MAXSEGB]; + tcp_seq max; + int numseg; +}; + +#endif Property changes on: sys/netinet/khelp/h_tcper.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + Id Added: svn:eol-style + native Index: sys/netinet/tcp_sack.c =================================================================== --- sys/netinet/tcp_sack.c (revision 221564) +++ sys/netinet/tcp_sack.c (working copy) @@ -684,3 +684,54 @@ return; tp->snd_nxt = tp->snd_fack; } + +/* + * True if at least "amount" bytes has been SACKed. + * Used by Early Retransmit. + */ +boolean_t +tcp_sack_ownd(struct tcpcb *tp, uint32_t amount) +{ + struct sackhole *p; + uint32_t ackedbyte; + + ackedbyte = tp->snd_fack - tp->snd_una; + TAILQ_FOREACH(p, &tp->snd_holes, scblink) { + KASSERT(ackedbyte >= p->end - p->start, + ("%s: assert failed", __func__)); + ackedbyte -= p->end - p->start; + } + if (ackedbyte >= amount) + return (TRUE); + return (FALSE); +} + +/* + * True if at least "amount" segments has been SACKed. + * Used by Early Retransmit. + */ +boolean_t +tcp_sack_oseg(struct tcpcb *tp, tcp_seq *segb, tcp_seq *sege, uint32_t numseq, + uint32_t amount) +{ + struct sackhole *p; + int segment_sacked = 0, i, inhole; + + for (i = 0; i < numseq; i++) { + inhole = 0; + TAILQ_FOREACH(p, &tp->snd_holes, scblink) { + if (SEQ_GEQ(segb[i], p->start) && + SEQ_LEQ(sege[i], p->end)) { + inhole = 1; + break; + } + } + if (inhole) + continue; + if (SEQ_GT(segb[i], tp->snd_fack)) + continue; + if (++segment_sacked >= amount) + return (TRUE); + } + return (FALSE); +} Index: sys/modules/khelp/h_tcper/Makefile =================================================================== --- sys/modules/khelp/h_tcper/Makefile (revision 0) +++ sys/modules/khelp/h_tcper/Makefile (revision 0) @@ -0,0 +1,9 @@ +# $FreeBSD: sys/modules/khelp/h_ertt/Makefile 217806 2011-01-24 23:08:38Z lstewart $ + +.include + +.PATH: ${.CURDIR}/../../../netinet/khelp +KMOD= h_tcper +SRCS= h_tcper.h h_tcper.c + +.include Property changes on: sys/modules/khelp/h_tcper/Makefile ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + Id Added: svn:eol-style + native Index: sys/modules/khelp/Makefile =================================================================== --- sys/modules/khelp/Makefile (revision 221564) +++ sys/modules/khelp/Makefile (working copy) @@ -1,5 +1,5 @@ # $FreeBSD$ -SUBDIR= h_ertt +SUBDIR= h_ertt h_tcper .include