diff --git a/cddl/lib/libdtrace/Makefile b/cddl/lib/libdtrace/Makefile index bb7801d..46f7046 100644 --- a/cddl/lib/libdtrace/Makefile +++ b/cddl/lib/libdtrace/Makefile @@ -51,6 +51,7 @@ DSRCS= errno.d \ ip.d \ psinfo.d \ signal.d \ + tcp.d \ udp.d \ unistd.d diff --git a/cddl/lib/libdtrace/tcp.d b/cddl/lib/libdtrace/tcp.d new file mode 100644 index 0000000..fe76f1d --- /dev/null +++ b/cddl/lib/libdtrace/tcp.d @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 2013 Mark Johnston + */ + +#pragma D depends_on library ip.d +#pragma D depends_on provider tcp + +/* + * Convert a TCP state value to a string. + */ + +#pragma D binding "1.0" TCPS_CLOSED +inline int TCPS_CLOSED = 0; +#pragma D binding "1.0" TCPS_LISTEN +inline int TCPS_LISTEN = 1; +#pragma D binding "1.0" TCPS_SYN_SENT +inline int TCPS_SYN_SENT = 2; +#pragma D binding "1.0" TCPS_SYN_RECEIVED +inline int TCPS_SYN_RECEIVED = 3; +#pragma D binding "1.0" TCPS_ESTABLISHED +inline int TCPS_ESTABLISHED = 4; +#pragma D binding "1.0" TCPS_CLOSE_WAIT +inline int TCPS_CLOSE_WAIT = 5; +#pragma D binding "1.0" TCPS_FIN_WAIT_1 +inline int TCPS_FIN_WAIT_1 = 6; +#pragma D binding "1.0" TCPS_CLOSING +inline int TCPS_CLOSING = 7; +#pragma D binding "1.0" TCPS_LAST_ACK +inline int TCPS_LAST_ACK = 8; +#pragma D binding "1.0" TCPS_FIN_WAIT_2 +inline int TCPS_FIN_WAIT_2 = 9; +#pragma D binding "1.0" TCPS_TIME_WAIT +inline int TCPS_TIME_WAIT = 10; + +#pragma D binding "1.0" TH_FIN +inline uint8_t TH_FIN = 0x01; +#pragma D binding "1.0" TH_SYN +inline uint8_t TH_SYN = 0x02; +#pragma D binding "1.0" TH_RST +inline uint8_t TH_RST = 0x04; +#pragma D binding "1.0" TH_PUSH +inline uint8_t TH_PUSH = 0x08; +#pragma D binding "1.0" TH_ACK +inline uint8_t TH_ACK = 0x10; +#pragma D binding "1.0" TH_URG +inline uint8_t TH_URG = 0x20; + +#pragma D binding "1.0" tcp_state_string +inline string tcp_state_string[int32_t state] = + state == TCPS_CLOSED ? "state-closed" : + state == TCPS_LISTEN ? "state-listen" : + state == TCPS_SYN_SENT ? "state-syn-sent" : + state == TCPS_SYN_RECEIVED ? "state-syn-received" : + state == TCPS_ESTABLISHED ? "state-established" : + state == TCPS_CLOSE_WAIT ? "state-close-wait" : + state == TCPS_FIN_WAIT_1 ? "state-fin-wait-1" : + state == TCPS_CLOSING ? "state-closing" : + state == TCPS_LAST_ACK ? "state-last-ack" : + state == TCPS_FIN_WAIT_2 ? "state-fin-wait-2" : + state == TCPS_TIME_WAIT ? "state-time-wait" : + ""; + +/* + * tcpsinfo contains stable TCP details from tcp_t. + */ +typedef struct tcpsinfo { + uintptr_t tcps_addr; + int tcps_local; /* is delivered locally, boolean */ + int tcps_active; /* active open (from here), boolean */ + uint16_t tcps_lport; /* local port */ + uint16_t tcps_rport; /* remote port */ + string tcps_laddr; /* local address, as a string */ + string tcps_raddr; /* remote address, as a string */ + int32_t tcps_state; /* TCP state */ + uint32_t tcps_iss; /* Initial sequence # sent */ + uint32_t tcps_suna; /* sequence # sent but unacked */ + uint32_t tcps_snxt; /* next sequence # to send */ + uint32_t tcps_rack; /* sequence # we have acked */ + uint32_t tcps_rnxt; /* next sequence # expected */ + uint32_t tcps_swnd; /* send window size */ + int32_t tcps_snd_ws; /* send window scaling */ + uint32_t tcps_rwnd; /* receive window size */ + int32_t tcps_rcv_ws; /* receive window scaling */ + uint32_t tcps_cwnd; /* congestion window */ + uint32_t tcps_cwnd_ssthresh; /* threshold for congestion avoidance */ + uint32_t tcps_sack_fack; /* SACK sequence # we have acked */ + uint32_t tcps_sack_snxt; /* next SACK seq # for retransmission */ + uint32_t tcps_rto; /* round-trip timeout, msec */ + uint32_t tcps_mss; /* max segment size */ + int tcps_retransmit; /* retransmit send event, boolean */ +} tcpsinfo_t; + +/* + * tcplsinfo provides the old tcp state for state changes. + */ +typedef struct tcplsinfo { + int32_t tcps_state; /* previous TCP state */ +} tcplsinfo_t; + +/* + * tcpinfo is the TCP header fields. + */ +typedef struct tcpinfo { + uint16_t tcp_sport; /* source port */ + uint16_t tcp_dport; /* destination port */ + uint32_t tcp_seq; /* sequence number */ + uint32_t tcp_ack; /* acknowledgment number */ + uint8_t tcp_offset; /* data offset, in bytes */ + uint8_t tcp_flags; /* flags */ + uint16_t tcp_window; /* window size */ + uint16_t tcp_checksum; /* checksum */ + uint16_t tcp_urgent; /* urgent data pointer */ + struct tcphdr *tcp_hdr; /* raw TCP header */ +} tcpinfo_t; + +#pragma D binding "1.0" translator +translator tcpsinfo_t < struct tcpcb *p > { + tcps_addr = (uintptr_t)p; + tcps_local = -1; /* XXX */ + tcps_active = -1; /* XXX */ + tcps_lport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_lport); + tcps_rport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport); + tcps_laddr = p == NULL ? 0 : + p->t_inpcb->inp_vflag == INP_IPV4 ? + inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) : + p->t_inpcb->inp_vflag == INP_IPV6 ? + inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local) : + ""; + tcps_raddr = p == NULL ? 0 : + p->t_inpcb->inp_vflag == INP_IPV4 ? + inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) : + p->t_inpcb->inp_vflag == INP_IPV6 ? + inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign) : + ""; + tcps_state = p == NULL ? -1 : p->t_state; + tcps_iss = p == NULL ? 0 : p->iss; + tcps_suna = p == NULL ? 0 : p->snd_una; + tcps_snxt = p == NULL ? 0 : p->snd_nxt; + tcps_rack = p == NULL ? 0 : p->last_ack_sent; + tcps_rnxt = p == NULL ? 0 : p->rcv_nxt; + tcps_swnd = p == NULL ? -1 : p->snd_wnd; + tcps_snd_ws = p == NULL ? -1 : p->snd_scale; + tcps_rwnd = p == NULL ? -1 : p->rcv_wnd; + tcps_rcv_ws = p == NULL ? -1 : p->rcv_scale; + tcps_cwnd = p == NULL ? -1 : p->snd_cwnd; + tcps_cwnd_ssthresh = p == NULL ? -1 : p->snd_ssthresh; + tcps_sack_fack = p == NULL ? 0 : p->snd_fack; + tcps_sack_snxt = p == NULL ? 0 : p->sack_newdata; + tcps_rto = p == NULL ? -1 : p->t_rxtcur / 1000; /* XXX */ + tcps_mss = p == NULL ? -1 : p->t_maxseg; + tcps_retransmit = -1; /* XXX */ +}; + +#pragma D binding "1.0" translator +translator tcpinfo_t < struct tcphdr *p > { + tcp_sport = p == NULL ? 0 : ntohs(p->th_sport); + tcp_dport = p == NULL ? 0 : ntohs(p->th_dport); + tcp_seq = p == NULL ? -1 : ntohl(p->th_seq); + tcp_ack = p == NULL ? -1 : ntohl(p->th_ack); + tcp_offset = p == NULL ? -1 : ntohs(p->th_off); + tcp_flags = p == NULL ? 0 : p->th_flags; + tcp_window = p == NULL ? 0 : ntohs(p->th_win); + tcp_checksum = p == NULL ? 0 : ntohs(p->th_sum); + tcp_urgent = p == NULL ? 0 : ntohs(p->th_urp); + tcp_hdr = (struct tcphdr *)p; +}; + +#pragma D binding "1.0" translator +translator tcplsinfo_t < int s > { + tcps_state = s; +}; diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/sdt_subr.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/sdt_subr.c index a1fc2b1..2316458 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/sdt_subr.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/sdt_subr.c @@ -92,6 +92,7 @@ sdt_provider_t sdt_providers[] = { { "proc", "__proc_", &stab_attr, 0 }, { "io", "__io_", &stab_attr, 0 }, { "ip", "__ip_", &stab_attr, 0 }, + { "tcp", "__tcp_", &stab_attr, 0 }, { "udp", "__udp_", &stab_attr, 0 }, { "mib", "__mib_", &stab_attr, 0 }, { "fsinfo", "__fsinfo_", &fsinfo_attr, 0 }, @@ -802,6 +803,49 @@ sdt_argdesc_t sdt_args[] = { { "ip", "receive", 3, 3, "struct ifnet *", "ifinfo_t *" }, { "ip", "receive", 4, 4, "struct ip *", "ipv4info_t *" }, { "ip", "receive", 5, 5, "struct ip6_hdr *", "ipv6info_t *" }, + + { "tcp", "connect-established", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "connect-established", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "connect-established", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "connect-established", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "connect-established", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "connect-refused", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "connect-refused", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "connect-refused", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "connect-refused", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "connect-refused", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "connect-request", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "connect-request", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "connect-request", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "connect-request", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "connect-request", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "accept-established", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "accept-established", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "accept-established", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "accept-established", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "accept-established", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "accept-refused", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "accept-refused", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "accept-refused", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "accept-refused", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "accept-refused", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "state-change", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "state-change", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "state-change", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "state-change", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "tcp", "state-change", 4, 4, "struct tcphdr *", "tcpinfo_t *" }, + { "tcp", "state-change", 5, 5, "int", "tcplsinfo_t *" }, + { "tcp", "send", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "send", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "send", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "send", 3, 3, "struct tcphdr *", "tcpsinfo_t *" }, + { "tcp", "send", 4, 4, "struct tcpcb *", "tcpinfo_t *" }, + { "tcp", "receive", 0, 0, "void *", "pktinfo_t *" }, + { "tcp", "receive", 1, 1, "void *", "csinfo_t *" }, + { "tcp", "receive", 2, 2, "struct mbuf *", "ipinfo_t *" }, + { "tcp", "receive", 3, 3, "struct tcphdr *", "tcpsinfo_t *" }, + { "tcp", "receive", 4, 4, "struct tcpcb *", "tcpinfo_t *" }, + { "udp", "send", 0, 0, "void *", "void *" }, { "udp", "send", 1, 1, "void *", "void *" }, { "udp", "send", 2, 2, "struct mbuf *", "ipinfo_t *" }, diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index a3122a1..fef85ed 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kdtrace.h" #include "opt_tcpdebug.h" #include @@ -63,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include /* for proc0 declaration */ #include +#include #include #include #include @@ -246,6 +248,19 @@ static void inline hhook_run_tcp_est_in(struct tcpcb *tp, */ VNET_DEFINE(struct tcpstat_p, tcpstatp); +SDT_PROVIDER_DEFINE(tcp); + +SDT_PROBE_DEFINE5(tcp, , , receive, receive, "void *", "void *", + "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); +SDT_PROBE_DEFINE5(tcp, , , connect_established, connect-established, "void *", + "void *", "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); +SDT_PROBE_DEFINE5(tcp, , , connect_refused, connect-refused, "void *", "void *", + "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); +SDT_PROBE_DEFINE5(tcp, , , accept_established, accept-established, "void *", + "void *", "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); +SDT_PROBE_DEFINE5(tcp, , , accept_refused, accept-refused, "void *", "void *", + "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); + static void vnet_tcpstatp_init(const void *unused) { @@ -635,6 +650,7 @@ tcp_input(struct mbuf *m, int off0) int ti_locked; #define TI_UNLOCKED 1 #define TI_WLOCKED 2 + int rprobe_fired = 0; #ifdef TCPDEBUG /* @@ -936,6 +952,7 @@ findpcb: goto dropunlock; rstreason = BANDLIM_RST_CLOSEDPORT; + SDT_PROBE5(tcp, , , accept_refused, 0, 0, m, tp, th); goto dropwithreset; } INP_WLOCK_ASSERT(inp); @@ -1066,6 +1083,9 @@ relocked: INP_INFO_WLOCK_ASSERT(&V_tcbinfo); } + SDT_PROBE5(tcp, , , receive, 0, 0, m, tp, th); + rprobe_fired = 1; + #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) @@ -1482,6 +1502,8 @@ dropunlock: INP_WUNLOCK(inp); drop: + if (!rprobe_fired) + SDT_PROBE5(tcp, , , receive, 0, 0, m, tp, th); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); if (s != NULL) free(s, M_TCPLOG); @@ -1940,8 +1962,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } - if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) + if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { + SDT_PROBE5(tcp, , , connect_refused, 0, 0, m, tp, th); tp = tcp_drop(tp, ECONNREFUSED); + } if (thflags & TH_RST) goto drop; if (!(thflags & TH_SYN)) @@ -1986,11 +2010,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { - tp->t_state = TCPS_FIN_WAIT_1; + tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { - tp->t_state = TCPS_ESTABLISHED; + tcp_state_change(tp, TCPS_ESTABLISHED); + SDT_PROBE5(tcp, , , connect_established, 0, 0, + m, tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); @@ -2008,7 +2034,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_timer_activate(tp, TT_REXMT, 0); - tp->t_state = TCPS_SYN_RECEIVED; + tcp_state_change(tp, TCPS_SYN_RECEIVED); } KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: " @@ -2146,7 +2172,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, ti_locked)); INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - tp->t_state = TCPS_CLOSED; + tcp_state_change(tp, TCPS_CLOSED); TCPSTAT_INC(tcps_drops); tp = tcp_close(tp); break; @@ -2392,10 +2418,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { - tp->t_state = TCPS_FIN_WAIT_1; + tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; } else { - tp->t_state = TCPS_ESTABLISHED; + tcp_state_change(tp, TCPS_ESTABLISHED); + SDT_PROBE5(tcp, , , accept_established, 0, 0, m, tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } @@ -2783,7 +2810,7 @@ process_ACK: tcp_finwait2_timeout : TP_MAXIDLE(tp))); } - tp->t_state = TCPS_FIN_WAIT_2; + tcp_state_change(tp, TCPS_FIN_WAIT_2); } break; @@ -3009,7 +3036,7 @@ dodata: /* XXX */ tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: - tp->t_state = TCPS_CLOSE_WAIT; + tcp_state_change(tp, TCPS_CLOSE_WAIT); break; /* @@ -3017,7 +3044,7 @@ dodata: /* XXX */ * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: - tp->t_state = TCPS_CLOSING; + tcp_state_change(tp, TCPS_CLOSING); break; /* diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 16038cb..905a240 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kdtrace.h" #include "opt_tcpdebug.h" #include @@ -46,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -121,6 +123,13 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); +SDT_PROVIDER_DECLARE(tcp); + +SDT_PROBE_DEFINE5(tcp, , , send, send, "void *", "void *", "struct mbuf *", + "struct tcpcb *", "struct tcphdr *"); +SDT_PROBE_DEFINE5(tcp, , , connect_request, connect-request, 0, 0, + "struct mbuf *", "struct tcpcb *", "struct tcphdr *"); + static void inline hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, long len, int tso); @@ -1173,6 +1182,11 @@ send: */ ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL); + if (tp->t_state == TCPS_SYN_SENT) + SDT_PROBE5(tcp, , , connect_request, 0, 0, m, tp, th); + + SDT_PROBE5(tcp, , , send, 0, 0, m, tp, th); + /* TODO: IPv6 IP6TOS_ECT bit on */ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), @@ -1207,6 +1221,11 @@ send: if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) ip->ip_off |= htons(IP_DF); + if (tp->t_state == TCPS_SYN_SENT) + SDT_PROBE5(tcp, , , connect_request, 0, 0, m, tp, th); + + SDT_PROBE5(tcp, , , send, 0, 0, m, tp, th); + error = ip_output(m, tp->t_inpcb->inp_options, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 05030fd..93b9e3d 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kdtrace.h" #include "opt_tcpdebug.h" #include @@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$"); #endif #include #include +#include #include #include #include @@ -119,6 +121,12 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif +SDT_PROVIDER_DECLARE(tcp); + +SDT_PROBE_DEFINE6(tcp, , , state_change, state-change, "void *", "void *", + "void *", "struct tcpcb *", "void *", "int"); +SDT_PROBE_DECLARE(tcp, , , send); + static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) { @@ -702,6 +710,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif + SDT_PROBE5(tcp, , , send, 0, inp, m, tp, th); #ifdef INET6 if (isipv6) (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); @@ -882,7 +891,7 @@ tcp_drop(struct tcpcb *tp, int errno) INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { - tp->t_state = TCPS_CLOSED; + tcp_state_change(tp, TCPS_CLOSED); (void) tcp_output(tp); TCPSTAT_INC(tcps_drops); } else @@ -2374,3 +2383,19 @@ tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, panic("%s: string too long", __func__); return (s); } + +/* + * A subroutine which makes it easy to track TCP state changes with DTrace. + * This function shouldn't be called for t_state initializations that don't + * correspond to actual TCP state transitions. + */ +void +tcp_state_change(struct tcpcb *tp, int newstate) +{ +#if defined(KDTRACE_HOOKS) + int pstate = tp->t_state; +#endif + + tp->t_state = newstate; + SDT_PROBE6(tcp, , , state_change, 0, 0, 0, tp, 0, pstate); +} diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 441c269..072c33e 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -814,7 +814,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) #endif /* INET */ INP_HASH_WUNLOCK(&V_tcbinfo); tp = intotcpcb(inp); - tp->t_state = TCPS_SYN_RECEIVED; + tcp_state_change(tp, TCPS_SYN_RECEIVED); tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; tcp_rcvseqinit(tp); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index ef0aad1..88755e7 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -367,7 +367,7 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td) error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { - tp->t_state = TCPS_LISTEN; + tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) @@ -412,7 +412,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) } INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { - tp->t_state = TCPS_LISTEN; + tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) @@ -1152,7 +1152,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) soisconnecting(so); TCPSTAT_INC(tcps_connattempt); - tp->t_state = TCPS_SYN_SENT; + tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1224,7 +1224,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) soisconnecting(so); TCPSTAT_INC(tcps_connattempt); - tp->t_state = TCPS_SYN_SENT; + tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1704,7 +1704,7 @@ tcp_usrclosed(struct tcpcb *tp) #endif /* FALLTHROUGH */ case TCPS_CLOSED: - tp->t_state = TCPS_CLOSED; + tcp_state_change(tp, TCPS_CLOSED); tp = tcp_close(tp); /* * tcp_close() should never return NULL here as the socket is @@ -1720,11 +1720,11 @@ tcp_usrclosed(struct tcpcb *tp) break; case TCPS_ESTABLISHED: - tp->t_state = TCPS_FIN_WAIT_1; + tcp_state_change(tp, TCPS_FIN_WAIT_1); break; case TCPS_CLOSE_WAIT: - tp->t_state = TCPS_LAST_ACK; + tcp_state_change(tp, TCPS_LAST_ACK); break; } if (tp->t_state >= TCPS_FIN_WAIT_2) { diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index eddbd3c..c8700e6 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -795,6 +795,7 @@ struct inpcb * struct tcpcb * tcp_newtcpcb(struct inpcb *); int tcp_output(struct tcpcb *); +void tcp_state_change(struct tcpcb *, int); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); void tcp_tw_init(void);