diff -r 522068e580f7 -r 9ddbb125c2e2 sbin/ifconfig/ifconfig.c --- a/sbin/ifconfig/ifconfig.c Wed Oct 12 17:19:20 2016 -0700 +++ b/sbin/ifconfig/ifconfig.c Thu Oct 13 17:11:04 2016 -0700 @@ -1145,7 +1145,7 @@ unsetifdescr(const char *val, int value, "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \ "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ -"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT" +"\26RXCSUM_IPV6\27TXCSUM_IPV6\30TXRTLMT" /* * Print the status of the interface. If an address family was diff -r 522068e580f7 -r 9ddbb125c2e2 sys/net/if.c --- a/sys/net/if.c Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/net/if.c Thu Oct 13 17:11:04 2016 -0700 @@ -2767,17 +2767,6 @@ ifioctl(struct socket *so, u_long cmd, c ifr = (struct ifreq *)data; switch (cmd) { - /* - * The TX rate limiting IOCTLs should only be used - * within the kernel. Prevent user-space from using - * them: - */ - case SIOCARATECTL: - case SIOCSRATECTL: - case SIOCDRATECTL: - CURVNET_RESTORE(); - return (EOPNOTSUPP); - #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); diff -r 522068e580f7 -r 9ddbb125c2e2 sys/net/if.h --- a/sys/net/if.h Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/net/if.h Thu Oct 13 17:11:04 2016 -0700 @@ -372,16 +372,6 @@ struct ifreq_buffer { }; /* - * Interface to create/delete/modify TX rate limiting. - */ -struct ifreq_txrtlmt { - uint32_t txring_max_rate; /* limit in bytes per second */ - uint32_t txring_id; /* driver specific value */ - uint32_t txring_flow_id; /* current flowid */ - uint32_t txring_flow_type; /* current flowtype */ -}; - -/* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The diff -r 522068e580f7 -r 9ddbb125c2e2 sys/net/if_var.h --- a/sys/net/if_var.h Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/net/if_var.h Thu Oct 13 17:11:04 2016 -0700 @@ -176,6 +176,12 @@ struct if_encap_req { #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ +struct ifnet_txrtlmt_ops { + int (*ito_alloc)(struct ifnet *, u_int, void **); + int (*ito_update)(struct ifnet *, u_int, void **); + int (*ito_free)(struct ifnet *, void *); +}; + /* * Structure defining a network interface. */ @@ -303,12 +309,14 @@ struct ifnet { u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ + struct ifnet_txrtlmt_ops *if_txrtlmt_ops; + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ - void *if_pspare[4]; /* packet pacing / general use */ + void *if_pspare[3]; /* packet pacing / general use */ int if_ispare[4]; /* packet pacing / general use */ }; diff -r 522068e580f7 -r 9ddbb125c2e2 sys/netinet/in_pcb.c --- a/sys/netinet/in_pcb.c Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/netinet/in_pcb.c Thu Oct 13 17:11:04 2016 -0700 @@ -1146,7 +1146,7 @@ in_pcbdetach(struct inpcb *inp) KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); #ifdef RATELIMIT - if (inp->inp_txring_ifp != NULL) + if (inp->inp_txrtlmt_ifp != NULL) in_pcbdetach_txrtlmt(inp); #endif inp->inp_socket->so_pcb = NULL; @@ -2695,29 +2695,28 @@ DB_SHOW_COMMAND(inpcb, db_show_inpcb) #ifdef RATELIMIT /* - * Modify existing TX rate limit on inp_txring_ifp and update + * Modify existing TX rate limit on inp_txrtlmt_ifp and update * inpcb info: */ static int in_pcbmodify_txrtlmt(struct inpcb *inp, struct ifnet *ifp, uint32_t max_pacing_rate) { - struct ifreq_txrtlmt req; int error; INP_WLOCK_ASSERT(inp); - - req.txring_max_rate = max_pacing_rate; - req.txring_id = inp->inp_txring_id; - req.txring_flow_id = inp->inp_flowid; - req.txring_flow_type = inp->inp_flowtype; + MPASS(ifp->if_txrtlmt_ops != NULL); + MPASS(ifp->if_txrtlmt_ops->ito_update != NULL); - error = ifp->if_ioctl(ifp, SIOCSRATECTL, (caddr_t)&req); + if (ifp->if_txrtlmt_ops->ito_update != NULL) { + error = (*ifp->if_txrtlmt_ops->ito_update)(ifp, max_pacing_rate, + &inp->inp_txrtlmt_cookie); + if (error) + return (error); + inp->inp_txrtlmt_max_rate = max_pacing_rate; + } else + return (ENOTSUP); - if (error) - return (error); - - inp->inp_txring_max_rate = max_pacing_rate; return (0); } @@ -2728,61 +2727,48 @@ static int in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, uint32_t max_pacing_rate) { - struct ifreq_txrtlmt req; int error; INP_WLOCK_ASSERT(inp); - KASSERT(inp->inp_txring_ifp == NULL, - ("%s: inp_txring_ifp != NULL", __func__)); - - req.txring_max_rate = max_pacing_rate; - req.txring_flow_id = inp->inp_flowid; - req.txring_flow_type = inp->inp_flowtype; + KASSERT(inp->inp_txrtlmt_ifp == NULL, + ("%s: inp %p inp_txrtlmt_ifp != NULL", __func__, inp)); + MPASS(ifp->if_txrtlmt_ops != NULL); + MPASS(ifp->if_txrtlmt_ops->ito_alloc != NULL); if_ref(ifp); - error = ifp->if_ioctl(ifp, SIOCARATECTL, (caddr_t)&req); - + error = (*ifp->if_txrtlmt_ops->ito_alloc)(ifp, max_pacing_rate, + &inp->inp_txrtlmt_cookie); if (error) { if_rele(ifp); return (error); } - inp->inp_txring_ifp = ifp; - inp->inp_txring_max_rate = max_pacing_rate; - inp->inp_txring_id = req.txring_id; + inp->inp_txrtlmt_ifp = ifp; + inp->inp_txrtlmt_max_rate = max_pacing_rate; + return (0); } /* - * Remove TX rate limit from inp_txring_ifp and detach it from + * Remove TX rate limit from inp_txrtlmt_ifp and detach it from * the inpcb: */ static void in_pcbdetach_txrtlmt(struct inpcb *inp) { - struct ifreq_txrtlmt req; struct ifnet *ifp; INP_WLOCK_ASSERT(inp); - - KASSERT(inp->inp_txring_ifp != NULL, - ("%s: inp->inp_txring_ifp == NULL", __func__)); - - ifp = inp->inp_txring_ifp; - req.txring_id = inp->inp_txring_id; - req.txring_flow_id = inp->inp_flowid; - req.txring_flow_type = inp->inp_flowtype; + KASSERT(inp->inp_txrtlmt_ifp != NULL, + ("%s: inp %p inp_txrtlmt_ifp == NULL", __func__, inp)); - inp->inp_txring_ifp = NULL; - inp->inp_txring_id = 0; - inp->inp_txring_max_rate = 0; + ifp = inp->inp_txrtlmt_ifp; + MPASS(ifp != NULL); - /* - * If the device was detached while we still had reference on - * ifp, we assume if_dead() was called and replaced callbacks - * with stubs. - */ - ifp->if_ioctl(ifp, SIOCDRATECTL, (caddr_t)&req); + (*ifp->if_txrtlmt_ops->ito_free)(ifp, inp->inp_txrtlmt_cookie); + + inp->inp_txrtlmt_ifp = NULL; + inp->inp_txrtlmt_max_rate = 0; if_rele(ifp); } @@ -2791,18 +2777,19 @@ in_pcbdetach_txrtlmt(struct inpcb *inp) * mbuf to match what the network driver expects. */ void -in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) +in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m) { - struct socket *socket; + struct socket *so; uint32_t max_pacing_rate; int error; if (inp == NULL) return; - socket = inp->inp_socket; - if (socket == NULL) - return; + M_ASSERTPKTHDR(m); + INP_LOCK_ASSERT(inp); + so = inp->inp_socket; + MPASS(so != NULL); /* * NOTE: The so_max_pacing_rate value is read unlocked, @@ -2810,9 +2797,10 @@ in_pcboutput_txrtlmt(struct inpcb *inp, * is checked at every mbuf we send. It is assumed that the * variable read itself will be atomic. */ - max_pacing_rate = socket->so_max_pacing_rate; + max_pacing_rate = so->so_max_pacing_rate; + m->m_pkthdr.rcvif = NULL; - if (max_pacing_rate == 0 && inp->inp_txring_ifp == NULL) + if (max_pacing_rate == 0 && inp->inp_txrtlmt_ifp == NULL) return; /* @@ -2822,7 +2810,7 @@ in_pcboutput_txrtlmt(struct inpcb *inp, * pointers compared below represent valid network interfaces, * except when comparing towards NULL. */ - if (ifp != inp->inp_txring_ifp) { + if (ifp != inp->inp_txrtlmt_ifp) { bool wlocked = INP_WLOCKED(inp); if (!wlocked) { @@ -2836,25 +2824,9 @@ in_pcboutput_txrtlmt(struct inpcb *inp, return; } - if (inp->inp_txring_ifp != NULL) + if (inp->inp_txrtlmt_ifp != NULL) in_pcbdetach_txrtlmt(inp); - /* - * In order to utilize packet pacing with RSS, we need - * to wait until there is a valid RSS hash before we - * can proceed: - */ - if (inp->inp_flowtype == M_HASHTYPE_NONE) { - if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { - if (!wlocked) - INP_DOWNGRADE(inp); - return; - } - /* typically UDP ends up here */ - inp->inp_flowid = mb->m_pkthdr.flowid; - inp->inp_flowtype = M_HASHTYPE_GET(mb); - } - error = in_pcbattach_txrtlmt(inp, ifp, max_pacing_rate); if (!wlocked) @@ -2862,7 +2834,7 @@ in_pcboutput_txrtlmt(struct inpcb *inp, if (error) return; - } else if (inp->inp_txring_max_rate != max_pacing_rate) { + } else if (inp->inp_txrtlmt_max_rate != max_pacing_rate) { bool wlocked = INP_WLOCKED(inp); if (!wlocked) { @@ -2882,10 +2854,8 @@ in_pcboutput_txrtlmt(struct inpcb *inp, goto done; /* use old rate */ } done: - /* - * Update the flow ID and RSS hash for the transmitted mbuf. - */ - mb->m_pkthdr.flowid = inp->inp_txring_id; - M_HASHTYPE_SET(mb, M_HASHTYPE_TXRTLMT); + /* XXXNP: review rcvif down the stack. */ + m->m_pkthdr.rcvif = (void *)inp->inp_txrtlmt_cookie; + return; } #endif /* RATELIMIT */ diff -r 522068e580f7 -r 9ddbb125c2e2 sys/netinet/in_pcb.h --- a/sys/netinet/in_pcb.h Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/netinet/in_pcb.h Thu Oct 13 17:11:04 2016 -0700 @@ -202,14 +202,13 @@ struct inpcb { u_char inp_ip_minttl; /* (i) minimum TTL or drop */ uint32_t inp_flowid; /* (x) flow id / queue id */ u_int inp_refcount; /* (i) refcount */ - struct ifnet *inp_txring_ifp; /* (i) ifp of TX ring */ - void *inp_pspare[4]; /* (x) packet pacing / general use */ + struct ifnet *inp_txrtlmt_ifp; /* (i) ifp doing tx rate limiting */ + void *inp_txrtlmt_cookie; /* (i) txrtlmt_ifp specific cookie */ + void *inp_pspare[3]; /* (x) general use */ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */ - uint32_t inp_txring_max_rate; /* (i) driver TX ring rate */ - uint32_t inp_txring_id; /* (i) driver TX ring ID */ - u_int inp_ispare[2]; /* (x) packet pacing / user cookie / - * general use */ + u_int inp_txrtlmt_max_rate; /* (i) desired TX drain rate */ + u_int inp_ispare[3]; /* (x) user cookie / general use */ /* Local and foreign ports, local and foreign addr. */ struct in_conninfo inp_inc; /* (i) list for PCB's local port */ diff -r 522068e580f7 -r 9ddbb125c2e2 sys/sys/mbuf.h --- a/sys/sys/mbuf.h Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/sys/mbuf.h Thu Oct 13 17:11:04 2016 -0700 @@ -345,7 +345,6 @@ struct mbuf { #define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple + * ext hdrs */ -#define M_HASHTYPE_TXRTLMT 62 /* rate limited TX traffic */ #define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */ #define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE) /* ordering+hash, not affinity*/ diff -r 522068e580f7 -r 9ddbb125c2e2 sys/sys/sockio.h --- a/sys/sys/sockio.h Wed Oct 12 17:19:20 2016 -0700 +++ b/sys/sys/sockio.h Thu Oct 13 17:11:04 2016 -0700 @@ -133,8 +133,4 @@ #define SIOCGIFGMEMB _IOWR('i', 138, struct ifgroupreq) /* get members */ #define SIOCGIFXMEDIA _IOWR('i', 139, struct ifmediareq) /* get net xmedia */ -#define SIOCARATECTL _IOWR('i', 140, struct ifreq_txrtlmt) /* add TX rate limit */ -#define SIOCSRATECTL _IOWR('i', 141, struct ifreq_txrtlmt) /* set TX rate limit */ -#define SIOCDRATECTL _IOW('i', 142, struct ifreq_txrtlmt) /* del TX rate limit */ - #endif /* !_SYS_SOCKIO_H_ */