Index: tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 186471) +++ sys/netinet/tcp_input.c (revision 186482) @@ -117,6 +117,8 @@ int tcp_do_autorcvbuf; int tcp_autorcvbuf_inc; int tcp_autorcvbuf_max; +int tcp_do_rfc3465; +int tcp_abc_l_var; #endif SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_tcp, TCPCTL_STATS, stats, @@ -144,6 +146,13 @@ tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW, + tcp_do_rfc3465, 0, + "Enable RFC 3465 (Appropriate Byte Counting)"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW, + tcp_abc_l_var, 2, + "Cap the max cwnd increment during slow-start to this number of segments"); + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW, tcp_do_ecn, 0, "TCP ECN support"); @@ -2293,20 +2302,59 @@ /* * When new data is acked, open the congestion window. - * If the window gives us less than ssthresh packets - * in flight, open exponentially (maxseg per packet). - * Otherwise open linearly: maxseg per window - * (maxseg^2 / cwnd per packet). - * If cwnd > maxseg^2, fix the cwnd increment at 1 byte - * to avoid capping cwnd (as suggested in RFC2581). + * Method depends on which congestion control state we're + * in (slow start or cong avoid) and if ABC (RFC 3465) is + * enabled. + * + * slow start: cwnd <= ssthresh + * cong avoid: cwnd > ssthresh + * + * slow start and ABC (RFC 3465): + * Grow cwnd exponentially by the amount of data + * ACKed capping the max increment per ACK to + * (abc_l_var * maxseg) bytes. + * + * slow start without ABC (RFC 2581): + * Grow cwnd exponentially by maxseg per ACK. + * + * cong avoid and ABC (RFC 3465): + * Grow cwnd linearly by maxseg per RTT for each + * cwnd worth of ACKed data. + * + * cong avoid without ABC (RFC 2581): + * Grow cwnd linearly by approximately maxseg per RTT using + * maxseg^2 / cwnd per ACK as the increment. + * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to + * avoid capping cwnd. */ if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) || !IN_FASTRECOVERY(tp)) { u_int cw = tp->snd_cwnd; u_int incr = tp->t_maxseg; - if (cw > tp->snd_ssthresh) - incr = max((incr * incr / cw), 1); - tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<snd_scale); + /* In congestion avoidance? */ + if (cw > tp->snd_ssthresh) { + if (V_tcp_do_rfc3465) { + tp->t_bytes_acked += acked; + if (tp->t_bytes_acked >= tp->snd_cwnd) + tp->t_bytes_acked -= cw; + else + incr = 0; + } + else + incr = max((incr * incr / cw), 1); + /* + * In slow-start with ABC enabled and no RTO in sight? + * (Must not use abc_l_var > 1 if slow starting after an + * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt == + * snd_max check is sufficient to handle this). + */ + } else if (V_tcp_do_rfc3465 && + tp->snd_nxt == tp->snd_max) + incr = min(acked, + V_tcp_abc_l_var * tp->t_maxseg); + /* ABC is on by default, so (incr == 0) frequently. */ + if (incr > 0) + tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<snd_scale); } SOCKBUF_LOCK(&so->so_snd); if (acked > so->so_snd.sb_cc) { @@ -2328,8 +2376,10 @@ tp->snd_recover = th->th_ack - 1; if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) && IN_FASTRECOVERY(tp) && - SEQ_GEQ(th->th_ack, tp->snd_recover)) + SEQ_GEQ(th->th_ack, tp->snd_recover)) { EXIT_FASTRECOVERY(tp); + tp->t_bytes_acked = 0; + } tp->snd_una = th->th_ack; if (tp->t_flags & TF_SACK_PERMIT) { if (SEQ_GT(tp->snd_una, tp->snd_recover)) Index: tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c (revision 186471) +++ sys/netinet/tcp_subr.c (revision 186482) @@ -316,6 +316,8 @@ V_tcp_do_autorcvbuf = 1; V_tcp_autorcvbuf_inc = 16*1024; V_tcp_autorcvbuf_max = 256*1024; + V_tcp_do_rfc3465 = 1; + V_tcp_abc_l_var = 2; V_tcp_mssdflt = TCP_MSS; #ifdef INET6 Index: tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c (revision 186471) +++ sys/netinet/tcp_timer.c (revision 186482) @@ -587,6 +587,7 @@ tp->t_dupacks = 0; } EXIT_FASTRECOVERY(tp); + tp->t_bytes_acked = 0; (void) tcp_output(tp); out: Index: tcp_var.h =================================================================== --- sys/netinet/tcp_var.h (revision 186471) +++ sys/netinet/tcp_var.h (revision 186482) @@ -189,6 +189,7 @@ void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */ struct toe_usrreqs *t_tu; /* offload operations vector */ void *t_toe; /* TOE pcb pointer */ + int t_bytes_acked; /* # bytes acked during current RTT */ }; /* Index: vinet.h =================================================================== --- sys/netinet/vinet.h (revision 186471) +++ sys/netinet/vinet.h (revision 186482) @@ -127,6 +127,8 @@ int _drop_synfin; int _tcp_do_rfc3042; int _tcp_do_rfc3390; + int _tcp_do_rfc3465; + int _tcp_abc_l_var; int _tcp_do_ecn; int _tcp_ecn_maxretries; int _tcp_insecure_rst; @@ -291,6 +293,7 @@ #define V_subnetsarelocal VNET_INET(subnetsarelocal) #define V_tcb VNET_INET(tcb) #define V_tcbinfo VNET_INET(tcbinfo) +#define V_tcp_abc_l_var VNET_INET(tcp_abc_l_var) #define V_tcp_autorcvbuf_inc VNET_INET(tcp_autorcvbuf_inc) #define V_tcp_autorcvbuf_max VNET_INET(tcp_autorcvbuf_max) #define V_tcp_autosndbuf_inc VNET_INET(tcp_autosndbuf_inc) @@ -303,6 +306,7 @@ #define V_tcp_do_rfc1323 VNET_INET(tcp_do_rfc1323) #define V_tcp_do_rfc3042 VNET_INET(tcp_do_rfc3042) #define V_tcp_do_rfc3390 VNET_INET(tcp_do_rfc3390) +#define V_tcp_do_rfc3465 VNET_INET(tcp_do_rfc3465) #define V_tcp_do_sack VNET_INET(tcp_do_sack) #define V_tcp_do_tso VNET_INET(tcp_do_tso) #define V_tcp_ecn_maxretries VNET_INET(tcp_ecn_maxretries)