Index: sys/netinet/tcp_input.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.335 diff -u -p -u -r1.335 tcp_input.c --- sys/netinet/tcp_input.c 11 Apr 2007 09:45:16 -0000 1.335 +++ sys/netinet/tcp_input.c 11 Apr 2007 10:33:27 -0000 @@ -1,4 +1,36 @@ /*- + * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu. All rights reserved. + * Copyright (c) 2002, 2003, 2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Jeffrey M. Hsu. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * @@ -129,6 +161,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3 &tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +static int tcp_do_eifel_detect = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel, CTLFLAG_RW, + &tcp_do_eifel_detect, 0, + "Eifel detection algorithm (RFC 3522)"); + static int tcp_insecure_rst = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW, &tcp_insecure_rst, 0, @@ -1159,19 +1196,26 @@ tcp_do_segment(struct mbuf *m, struct tc ++tcpstat.tcps_predack; /* * "bad retransmit" recovery - */ - if (tp->t_rxtshift == 1 && - ticks < tp->t_badrxtwin) { - ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = - tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; + * + * If Eifel detection applies, then + * it is deterministic, so use it + * unconditionally over the old heuristic. + * Otherwise, fall back to the old heuristic. + */ + if (tcp_do_eifel_detect && + (to.to_flags & TOF_TS) && to.to_tsecr && + (tp->t_flags & TF_FIRSTACCACK)) { + /* Eifel detection applicable. */ + if (to.to_tsecr < tp->t_rexmtTS) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_eifeldetected; + } + } else if (tp->t_rxtshift == 1 && + ticks < tp->t_badrxtwin) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_rttdetected; } + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); /* * Recalculate the transmit timer / rtt. @@ -1974,6 +2018,11 @@ tcp_do_segment(struct mbuf *m, struct tc break; } } + if (tcp_do_eifel_detect && + (tp->t_flags & TF_RCVD_TSTMP)) { + tcp_save_congestion_state(tp); + tp->t_flags |= TF_FASTREXMT; + } win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) @@ -2115,15 +2164,17 @@ process_ACK: * original cwnd and ssthresh, and proceed to transmit where * we left off. */ - if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) { - ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; /* XXX probably not required */ + if (tcp_do_eifel_detect && acked && + (to.to_flags & TOF_TS) && to.to_tsecr && + (tp->t_flags & TF_FIRSTACCACK)) { + /* Eifel detection applicable. */ + if (to.to_tsecr < tp->t_rexmtTS) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_eifeldetected; + } + } else if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_rttdetected; } /* @@ -2171,6 +2222,9 @@ process_ACK: if (acked == 0) goto step6; + /* Stop looking for an acceptable ACK since one was received. */ + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); + /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_timer.c,v retrieving revision 1.90 diff -u -p -u -r1.90 tcp_timer.c --- sys/netinet/tcp_timer.c 11 Apr 2007 09:45:16 -0000 1.90 +++ sys/netinet/tcp_timer.c 11 Apr 2007 10:35:08 -0000 @@ -1,4 +1,36 @@ /*- + * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu. All rights reserved. + * Copyright (c) 2002, 2003, 2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Jeffrey M. Hsu. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * @@ -617,6 +649,39 @@ tcp_timer_persist(struct tcpcb *tp, stru return (0); } +void +tcp_save_congestion_state(struct tcpcb *tp) +{ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp)) + tp->t_flags |= TF_WASFRECOVERY; + else + tp->t_flags &= ~TF_WASFRECOVERY; + if (tp->t_flags & TF_RCVD_TSTMP) { + tp->t_rexmtTS = ticks; + tp->t_flags |= TF_FIRSTACCACK; + } +} + +void +tcp_revert_congestion_state(struct tcpcb *tp) +{ + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + tp->snd_recover = tp->snd_recover_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp); + if (tp->t_flags & TF_FASTREXMT) + ++tcpstat.tcps_sndfastrexmitbad; + else + ++tcpstat.tcps_sndrtobad; + tp->t_badrxtwin = 0; + tp->t_rxtshift = 0; + tp->snd_nxt = tp->snd_max; +} + static int tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) { @@ -647,14 +712,8 @@ tcp_timer_rexmt(struct tcpcb *tp, struct * "On Estimating End-to-End Network Path Properties" by * Allman and Paxson for more details. */ - tp->snd_cwnd_prev = tp->snd_cwnd; - tp->snd_ssthresh_prev = tp->snd_ssthresh; - tp->snd_recover_prev = tp->snd_recover; - if (IN_FASTRECOVERY(tp)) - tp->t_flags |= TF_WASFRECOVERY; - else - tp->t_flags &= ~TF_WASFRECOVERY; - tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + tcp_save_congestion_state(tp); + tp->t_flags &= ~TF_FASTREXMT; } tcpstat.tcps_rexmttimeo++; if (tp->t_state == TCPS_SYN_SENT) Index: sys/netinet/tcp_var.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.142 diff -u -p -u -r1.142 tcp_var.h --- sys/netinet/tcp_var.h 11 Apr 2007 09:45:16 -0000 1.142 +++ sys/netinet/tcp_var.h 11 Apr 2007 10:33:27 -0000 @@ -111,6 +111,8 @@ struct tcpcb { #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ #define TF_FORCEDATA 0x800000 /* force out a byte */ #define TF_TSO 0x1000000 /* TSO enabled on this connection */ +#define TF_FIRSTACCACK 0x2000000 /* Look for 1st acceptable ACK. */ +#define TF_FASTREXMT 0x4000000 /* Did Fast Retransmit. */ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; @@ -181,6 +183,7 @@ struct tcpcb { u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ u_long t_badrxtwin; /* window for retransmit recovery */ + u_long t_rexmtTS; /* timestamp of last retransmit */ u_char snd_limited; /* segments limited transmitted */ /* SACK related state */ int sack_enable; /* enable SACK for this connection */ @@ -346,7 +349,10 @@ struct tcpstat { u_long tcps_sndbyte; /* data bytes sent */ u_long tcps_sndrexmitpack; /* data packets retransmitted */ u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ - u_long tcps_sndrexmitbad; /* unnecessary packet retransmissions */ + u_long tcps_sndrtobad; /* spurious RTO retransmissions */ + u_long tcps_sndfastrexmitbad; /* spurious Fast Retransmissions */ + u_long tcps_eifeldetected; /* Eifel-detected spurious rexmits */ + u_long tcps_rttdetected; /* RTT-detected spurious RTO rexmits */ u_long tcps_sndacks; /* ack-only packets sent */ u_long tcps_sndprobe; /* window probes sent */ u_long tcps_sndurg; /* packets sent with URG only */ @@ -526,6 +532,8 @@ int tcp_output(struct tcpcb *); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); int tcp_twrespond(struct tcptw *, int); +void tcp_save_congestion_state(struct tcpcb *tp); +void tcp_revert_congestion_state(struct tcpcb *tp); void tcp_setpersist(struct tcpcb *); #ifdef TCP_SIGNATURE int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); Index: usr.bin/netstat/inet.c =================================================================== RCS file: /home/ncvs/src/usr.bin/netstat/inet.c,v retrieving revision 1.74 diff -u -p -u -r1.74 inet.c --- usr.bin/netstat/inet.c 26 Feb 2007 22:25:21 -0000 1.74 +++ usr.bin/netstat/inet.c 11 Apr 2007 10:08:28 -0000 @@ -385,8 +385,10 @@ tcp_stats(u_long off __unused, const cha "\t\t%lu data packet%s (%lu byte%s)\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t%lu data packet%s (%lu byte%s) retransmitted\n"); - p(tcps_sndrexmitbad, - "\t\t%lu data packet%s unnecessarily retransmitted\n"); + p(tcps_sndrtobad, "\t\t%lu spurious RTO retransmit%s\n"); + p(tcps_sndfastrexmitbad, "\t\t%lu spurious Fast Retransmit%s\n"); + p(tcps_eifeldetected, "\t\t%lu Eifel-detected spurious retransmit%s\n"); + p(tcps_rttdetected, "\t\t%lu RTT-detected spurious retransmit%s\n"); p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n"); p2a(tcps_sndacks, tcps_delack, "\t\t%lu ack-only packet%s (%lu delayed)\n");