/* * Copyright (c) 2013, 2014 * Inferno Nettverk A/S, Norway. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. The above copyright notice, this list of conditions and the following * disclaimer must appear in all copies of the software, derivative works * or modified versions, and any portions thereof, aswell as in all * supporting documentation. * 2. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by * Inferno Nettverk A/S, Norway. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Inferno Nettverk A/S requests users of this software to return to * * Software Distribution Coordinator or sdc@inet.no * Inferno Nettverk A/S * Oslo Research Park * Gaustadalléen 21 * NO-0349 Oslo * Norway * * any improvements or extensions that they make and grant Inferno Nettverk A/S * the rights to redistribute these changes. * */ #include "common.h" #if HAVE_NETINET_TCP_FSM_H #include #endif /* HAVE_NETINET_TCP_FSM_H */ static const char rcsid[] = "$Id: tcpinfo.c,v 1.15.4.1 2014/08/15 18:16:41 karls Exp $"; #if HAVE_TCP_INFO const char *tcpi_ca_state2string(const unsigned long val); const char *tcpi_state2string(const unsigned long val); const char *tcpi_options2string(const unsigned long val); #endif /* HAVE_TCP_INFO */ char * get_tcpinfo(fdc, fdv, buf, buflen) const size_t fdc; int fdv[]; char *buf; size_t buflen; { const char *function = "get_tcpinfo()"; #if HAVE_TCP_INFO struct tcp_info info_0, info_1; socklen_t len; size_t bufused; if (fdc == 0) return NULL; if (buf == NULL || buflen == 0) { static char bufmem[ (31 + 10) /* number of keywords + 10 extra keyw. */ * (20 /* length of keyword */ + 10 /* key value */) * 2 /* max fdc. */]; CTASSERT(sizeof(bufmem) > MAXTCPINFOLEN); buf = bufmem; buflen = sizeof(bufmem); } if (fdc > 0) { len = sizeof(info_0); if (getsockopt(fdv[0], IPPROTO_TCP, TCP_INFO, &info_0, &len) != 0) { slog(LOG_DEBUG, "%s: getsockopt(TCP_INFO) on fd %d (fdv[0]) failed: %s", function, fdv[0], strerror(errno)); fdv[0] = -1; } } if (fdc > 1) { len = sizeof(info_1); if (getsockopt(fdv[1], IPPROTO_TCP, TCP_INFO, &info_1, &len) != 0) { slog(LOG_DEBUG, "%s: getsockopt(TCP_INFO) on fd %d (fdv[1]) failed: %s", function, fdv[1], strerror(errno)); fdv[1] = -1; } } switch (fdc) { case 1: if (fdv[0] == -1) return NULL; break; case 2: if (fdv[0] == -1 && fdv[1] == -1) return NULL; break; default: SERRX(fdc); } bufused = 0; #define ADDATTR(fdc, fdv, attr, _func, tcpinfo_0, tcpinfo_1) \ do { \ const char * (*func)(unsigned long value) = (_func); \ \ if (fdc == 1) { \ bufused += snprintf(&buf[bufused], buflen - bufused, \ "%-20s : %lu%s%s%s\n", \ #attr, \ (unsigned long)((tcpinfo_0)->attr), \ func == NULL ? "" : " (", \ func == NULL ? "" : func((tcpinfo_0)->attr), \ func == NULL ? "" : ")"); \ } \ else if (fdc == 2) { \ if (fdv[0] >= 0 && fdv[1] >= 0) \ bufused += snprintf(&buf[bufused], buflen - bufused, \ "%-20s : %lu%s%s%s <-> %lu%s%s%s\n", \ #attr, \ (unsigned long)((tcpinfo_0)->attr), \ func == NULL ? "" : " (", \ func == NULL ? "" : func((tcpinfo_0)->attr), \ func == NULL ? "" : ")", \ (unsigned long)((tcpinfo_1)->attr), \ func == NULL ? "" : " (", \ func == NULL ? "" : func((tcpinfo_1)->attr), \ func == NULL ? "" : ")"); \ else { \ if (fdv[0] >= 0) { \ SASSERTX(fdv[1] == -1); \ \ bufused += snprintf(&buf[bufused], buflen - bufused, \ "%-20s : %lu%s%s%s <-> N/A\n", \ #attr, \ (unsigned long)((tcpinfo_0)->attr), \ func == NULL ? "" : " (", \ func == NULL ? "" : func((tcpinfo_0)->attr), \ func == NULL ? "" : ")"); \ } \ else if (fdv[1] >= 0) { \ SASSERTX(fdv[0] == -1); \ \ bufused += snprintf(&buf[bufused], buflen - bufused, \ "%-20s : N/A <-> %lu%s%s%s\n", \ #attr, \ (unsigned long)((tcpinfo_1)->attr), \ func == NULL ? "" : " (", \ func == NULL ? "" : func((tcpinfo_1)->attr), \ func == NULL ? "" : ")"); \ } \ else \ SERRX(0); \ } \ } \ else \ SERRX(fdc); \ } while (/* CONSTCOND */ 0) #define ADDNL() \ do { \ bufused += snprintf(&buf[bufused], buflen - bufused, "\n"); \ } while (/* CONSTCOND */ 0) /* * XXX FreeBSD has some further extensions. Add support * for them when we have finished the analysis, as Linux * is what customer is using. * * XXX most comments regarding meaning are as of currently * unverified and just guesses. Add a "verified" comment * once verified. */ ADDATTR(fdc, fdv, tcpi_state, tcpi_state2string, &info_0, &info_1); #if HAVE_TCP_INFO_TCPI_CA_STATE /* * enum tcp_ca_state. */ ADDATTR(fdc, fdv, tcpi_ca_state, tcpi_ca_state2string, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_CA_STATE */ #if HAVE_TCP_INFO_TCPI_RETRANSMITS /* * Number of times we have retransmitted currently outstanding * data, based on ACK timeouts? Reset every time successfully ACK'ed * and things are moving on? (and possibly in a few other cases too?) * [Mostly verified] */ ADDATTR(fdc, fdv, tcpi_retransmits, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_CA_STATE */ #if HAVE_TCP_INFO_TCPI_PROBES /* * Number of tcp zero window probes sent. * Sent if peer advertizes a zero receive window size, * preventing us from sending it any more data. * * Think this is sent if the peer receive window has been zero * for a while (without updates to refresh it as zero), and * this counts for how long (how many probes) it has currently * been zero. * Not sure if the max value, before we trigger a send error * is the sysctl tcp_retries2, or if the source code comment * is still correct in that there is no max value. */ ADDATTR(fdc, fdv, tcpi_probes, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_PROBES */ #if HAVE_TCP_INFO_TCPI_BACKOFF /* * Current backoff value. Used to calculate how long to wait * before retransmitting un-acked data. * The wait is calculated as a factor of this value, so would * think this should have the same value as tcpi_retransmits, * but looks like this value is reset in some cases where * tcpi_retransmits is not. * * Looks like it is also limited by sysctl tcp_retries2. */ ADDATTR(fdc, fdv, tcpi_backoff, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_BACKOFF */ /* * Options set on socket. */ ADDATTR(fdc, fdv, tcpi_options, tcpi_options2string, &info_0, &info_1); /* * how much peer told us to scale his receive windows size by? */ ADDATTR(fdc, fdv, tcpi_snd_wscale, NULL, &info_0, &info_1); /* * how much we told peer to scale our windows receive size by? */ ADDATTR(fdc, fdv, tcpi_rcv_wscale, NULL, &info_0, &info_1); /* * Retransmission timeout. Microseconds. */ ADDATTR(fdc, fdv, tcpi_rto, NULL, &info_0, &info_1); #if HAVE_TCP_INFO_TCPI_ATO /* * Ack timeout. Microseconds. */ ADDATTR(fdc, fdv, tcpi_ato, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_ATO */ /* * Maximum segment size for outgoing TCP packets. Bytes. */ ADDATTR(fdc, fdv, tcpi_snd_mss, NULL, &info_0, &info_1); /* * Maximum segment size for outgoing TCP packets used by peer. Bytes. * A guess made by our side. */ ADDATTR(fdc, fdv, tcpi_rcv_mss, NULL, &info_0, &info_1); #if HAVE_TCP_INFO_TCPI_UNACKED /* * Number of packets sent by us, but not yet ack'ed by peer. */ ADDATTR(fdc, fdv, tcpi_unacked, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_UNACKED */ #if HAVE_TCP_INFO_TCPI_SACKED /* * Packets that arrived at peer out of order. * If TCPI_OPT_SACK is not enabled, this value is still set, * but based on an estimate (duplicate acks)? * Reset ... when? */ ADDATTR(fdc, fdv, tcpi_sacked, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_SACKED */ #if HAVE_TCP_INFO_TCPI_LOST /* * Packets lost by network. * Reset when the lost data has been retransmitted? */ ADDATTR(fdc, fdv, tcpi_lost, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_LOST */ #if HAVE_TCP_INFO_TCPI_RETRANS /* * Number of packets retransmitted due to non-timeout reasons (e.g., * the TCP Fast Retransmit feature). Reset ... when? */ ADDATTR(fdc, fdv, tcpi_retrans, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_RETRANS */ #if HAVE_TCP_INFO_TCPI_FACKETS /* * Forward ack. Packets. */ ADDATTR(fdc, fdv, tcpi_fackets, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_FACKETS */ /* * Times since last sent/received. All in milliseconds. * Note that some (all?) of these seems to contain some random value * initially. At least for tcpi_last_data_recv that appears to be * the case. */ ADDNL(); #if HAVE_TCP_INFO_TCPI_LAST_DATA_SENT /* * Time since we last sent any data. */ ADDATTR(fdc, fdv, tcpi_last_data_sent, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_LAST_DATA_SENT */ #if HAVE_TCP_INFO_TCPI_LAST_ACK_SENT /* * Time since we last sent an ack. * NOTE: Linux does not appears to keep track of this value; always zero. */ ADDATTR(fdc, fdv, tcpi_last_ack_sent, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_LAST_ACK_SENT */ #if HAVE_TCP_INFO_TCPI_LAST_DATA_RECV /* * Time since we last received any data? */ ADDATTR(fdc, fdv, tcpi_last_data_recv, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_LAST_DATA_RECV */ #if HAVE_TCP_INFO_TCPI_LAST_ACK_RECV /* * Time since we last received an ack. [Verified] */ ADDATTR(fdc, fdv, tcpi_last_ack_recv, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_LAST_ACK_RECV */ /* * Metrics. */ ADDNL(); #if HAVE_TCP_INFO_TCPI_PMTU /* * Path MTU. Bytes. * What if not known? Guessed? */ ADDATTR(fdc, fdv, tcpi_pmtu, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_PMTU */ #if HAVE_TCP_INFO_TCPI_RCV_SSTHRESH /* * receive slow start threshold? * Something related to the receive window we advertise, * based on how well we have performed (how fast we've * read data out of our socket buffer?) in the past and * how much memory is available? */ ADDATTR(fdc, fdv, tcpi_rcv_ssthresh, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_RCV_SSTHRESH */ /* * Estimated round-trip-time? * Possibly max of the moving average and last rtt calculation * done (which is not necessarily the rtt of last packet). * Microseconds. If no value calculated, set to 1000. */ ADDATTR(fdc, fdv, tcpi_rtt, NULL, &info_0, &info_1); /* * median deviation for tcpi_rtt? Microseconds. */ ADDATTR(fdc, fdv, tcpi_rttvar, NULL, &info_0, &info_1); /* * send slow start threshold? */ ADDATTR(fdc, fdv, tcpi_snd_ssthresh, NULL, &info_0, &info_1); /* * Our current congestion window towards peer. Packets. */ ADDATTR(fdc, fdv, tcpi_snd_cwnd, NULL, &info_0, &info_1); #if HAVE_TCP_INFO_TCPI_ADVMSS /* * MSS we advertise to peer. If advertised, advertised as part * of TCP options. Otherwise peer has to assume minimum, 536? * Number of bytes. */ ADDATTR(fdc, fdv, tcpi_advmss, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_ADVMSS */ #if HAVE_TCP_INFO_TCPI_REORDERING /* * Same as sysctl tcp_reordering? I.e. not dynamically updated? * If so, why here? */ ADDATTR(fdc, fdv, tcpi_reordering, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_REORDERING */ #if HAVE_TCP_INFO_TCPI_RCV_RTT /* * "receivers (ours) estimated rtt". Same as tcpi_rtt, but * estimated another way? (without timestamps?) Microseconds. */ ADDATTR(fdc, fdv, tcpi_rcv_rtt, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_RCV_RTT */ #if HAVE_TCP_INFO_TCPI_RCV_SPACE /* * Size of our receive buffer? * Tuned/modified while session is running? * Number of bytes. */ ADDATTR(fdc, fdv, tcpi_rcv_space, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_RCV_SPACE */ /* end of metrics. */ ADDNL(); #if HAVE_TCP_INFO_TCPI_TOTAL_RETRANS /* * Total number of packets retransmitted on this connection, * regardless of reason. * Note that this appears to be calculated based on the data, so if e.g. * a large packet is retransmitted as two smaller packets, that counts as * two retransmits, not one. * [Verified] */ ADDATTR(fdc, fdv, tcpi_total_retrans, NULL, &info_0, &info_1); #endif /* HAVE_TCP_INFO_TCPI_TOTAL_RETRANS */ return buf; #else return NULL; #endif /* HAVE_TCP_INFO */ } #if HAVE_TCP_INFO const char *tcpi_ca_state2string(val) const unsigned long val; { #if defined __linux__ switch (val) { case TCP_CA_Open: return "TCP_CA_Open"; case TCP_CA_Disorder: return "TCP_CA_Disorder"; case TCP_CA_CWR: return "TCP_CA_CWR"; case TCP_CA_Recovery: return "TCP_CA_Recovery"; case TCP_CA_Loss: return "TCP_CA_Loss"; } #endif /* __linux__ */ return ""; } const char *tcpi_state2string(val) const unsigned long val; { #if defined(__FreeBSD__) switch (val) { case TCPS_CLOSED: return "CLOSED"; case TCPS_LISTEN: return "LISTEN"; case TCPS_SYN_SENT: return "SYN_SENT"; case TCPS_SYN_RECEIVED: return "SYN_RECEIVED"; case TCPS_ESTABLISHED: return "ESTABLISHED"; case TCPS_CLOSE_WAIT: return "CLOSE_WAIT"; case TCPS_FIN_WAIT_1: return "FIN_WAIT_1"; case TCPS_CLOSING: return "CLOSING"; case TCPS_LAST_ACK: return "LAST_ACK"; case TCPS_FIN_WAIT_2: return "FIN_WAIT_2"; case TCPS_TIME_WAIT: return "TIME_WAIT"; } #elif defined(__linux__) switch (val) { case TCP_ESTABLISHED: return "ESTABLISHED"; case TCP_SYN_SENT: return "SYN_SENT"; case TCP_SYN_RECV: return "SYN_RECV"; case TCP_FIN_WAIT1: return "FIN_WAIT1"; case TCP_FIN_WAIT2: return "FIN_WAIT2"; case TCP_TIME_WAIT: return "TIME_WAIT"; case TCP_CLOSE: return "CLOSE"; case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; case TCP_LAST_ACK: return "LAST_ACK"; case TCP_LISTEN: return "LISTEN"; case TCP_CLOSING: return "CLOSING"; } #endif /* __linux__ */ return ""; } const char *tcpi_options2string(val) const unsigned long val; { static char buf[256]; size_t bufused; *buf = NUL; bufused = 0; if (val & TCPI_OPT_TIMESTAMPS) bufused += snprintf(&buf[bufused], sizeof(buf) - bufused, "%sTS", *buf == NUL ? "" : ", "); if (val & TCPI_OPT_SACK) bufused += snprintf(&buf[bufused], sizeof(buf) - bufused, "%sSACK", *buf == NUL ? "" : ", "); if (val & TCPI_OPT_WSCALE) bufused += snprintf(&buf[bufused], sizeof(buf) - bufused, "%sWscale", *buf == NUL ? "" : ", "); if (val & TCPI_OPT_ECN) bufused += snprintf(&buf[bufused], sizeof(buf) - bufused, "%sECN", *buf == NUL ? "" : ", "); return buf; } #endif /* HAVE_TCP_INFO */