Index: sys/netinet/in.h =================================================================== --- sys/netinet/in.h (revision 265880) +++ sys/netinet/in.h (working copy) @@ -468,6 +468,9 @@ #define IP_MINTTL 66 /* minimum TTL for packet or drop */ #define IP_DONTFRAG 67 /* don't fragment packet */ #define IP_RECVTOS 68 /* bool; receive IP TOS w/dgram */ +#define IP_FLOWID 69 +#define IP_FLOWTYPE 70 +#define IP_RSSCPUID 71 /* IPv4 Source Filter Multicast API [RFC3678] */ #define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */ Index: sys/netinet/in_pcb.h =================================================================== --- sys/netinet/in_pcb.h (revision 265880) +++ sys/netinet/in_pcb.h (working copy) @@ -180,7 +180,8 @@ uint32_t inp_flowid; /* (x) flow id / queue id */ u_int inp_refcount; /* (i) refcount */ void *inp_pspare[5]; /* (x) route caching / general use */ - u_int inp_ispare[6]; /* (x) route caching / user cookie / + uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ + u_int inp_ispare[5]; /* (x) route caching / user cookie / * general use */ /* Local and foreign ports, local and foreign addr. */ Index: sys/netinet/in_rss.c =================================================================== --- sys/netinet/in_rss.c (revision 265880) +++ sys/netinet/in_rss.c (working copy) @@ -407,27 +407,34 @@ } /* - * netisr CPU affinity lookup routine for use by protocols. + * netisr CPU affinity lookup given just the hash and hashtype. */ -struct mbuf * -rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) +u_int +rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type) { - M_ASSERTPKTHDR(m); - - switch (M_HASHTYPE_GET(m)) { + switch (hash_type) { case M_HASHTYPE_RSS_IPV4: case M_HASHTYPE_RSS_TCP_IPV4: - *cpuid = rss_getcpu(rss_getbucket(m->m_pkthdr.flowid)); - return (m); - + return (rss_getcpu(rss_getbucket(hash_val))); default: - *cpuid = NETISR_CPUID_NONE; - return (m); + return (NETISR_CPUID_NONE); } } /* + * netisr CPU affinity lookup routine for use by protocols. + */ +struct mbuf * +rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) +{ + + M_ASSERTPKTHDR(m); + *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); + return (m); +} + +/* * Query the RSS hash algorithm. */ u_int Index: sys/netinet/in_rss.h =================================================================== --- sys/netinet/in_rss.h (revision 265880) +++ sys/netinet/in_rss.h (working copy) @@ -90,5 +90,6 @@ * Network stack interface to query desired CPU affinity of a packet. */ struct mbuf *rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid); +u_int rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type); #endif /* !_NETINET_IN_RSS_H_ */ Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c (revision 265880) +++ sys/netinet/ip_output.c (working copy) @@ -144,6 +144,7 @@ M_SETFIB(m, inp->inp_inc.inc_fibnum); if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) { m->m_pkthdr.flowid = inp->inp_flowid; + M_HASHTYPE_SET(m, inp->inp_flowtype); m->m_flags |= M_FLOWID; } } @@ -1171,6 +1172,11 @@ case IP_DONTFRAG: case IP_BINDANY: case IP_RECVTOS: + case IP_FLOWID: + case IP_FLOWTYPE: +#ifdef RSS + case IP_RSSCPUID: +#endif switch (sopt->sopt_name) { case IP_TOS: @@ -1232,6 +1238,18 @@ case IP_RECVTOS: optval = OPTBIT(INP_RECVTOS); break; + case IP_FLOWID: + optval = inp->inp_flowid; + break; + case IP_FLOWTYPE: + optval = inp->inp_flowtype; + break; +#ifdef RSS + case IP_RSSCPUID: + optval = rss_hash2cpuid(inp->inp_flowid, + inp->inp_flowtype); + break; +#endif } error = sooptcopyout(sopt, &optval, sizeof optval); break; Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 265880) +++ sys/netinet/tcp_input.c (working copy) @@ -905,6 +905,7 @@ inp->inp_flags |= INP_HW_FLOWID; inp->inp_flags &= ~INP_SW_FLOWID; inp->inp_flowid = m->m_pkthdr.flowid; + inp->inp_flowtype = M_HASHTYPE_GET(m); } #ifdef IPSEC #ifdef INET6 Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c (revision 265880) +++ sys/netinet/tcp_syncache.c (working copy) @@ -718,6 +718,7 @@ inp->inp_flags |= INP_HW_FLOWID; inp->inp_flags &= ~INP_SW_FLOWID; inp->inp_flowid = m->m_pkthdr.flowid; + inp->inp_flowtype = M_HASHTYPE_GET(m); } /* Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c (revision 265880) +++ sys/netinet/tcp_timer.c (working copy) @@ -128,10 +128,50 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, &per_cpu_timers , 0, "run tcp timers on all cpus"); +#if 0 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) +#endif /* + * Map the given inp to a CPU id. + * + * This queries RSS if it's compiled in, else it defaults to the current + * CPU ID. + */ +static inline int +inp_to_cpuid(struct inpcb *inp) +{ + u_int cpuid; + +#ifdef RSS + if (per_cpu_timers) { + cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); + if (cpuid == NETISR_CPUID_NONE) + return (curcpu); /* XXX */ + else + return (cpuid); + } +#else + /* Legacy, pre-RSS behaviour */ + if (per_cpu_timers) { + /* + * We don't have a flowid -> cpuid mapping, so cheat and + * just map unknown cpuids to curcpu. Not the best, but + * apparently better than defaulting to swi 0. + */ + cpuid = inp->inp_flowid % (mp_maxid + 1); + if (! CPU_ABSENT(cpuid)) + return (cpuid); + return (curcpu); + } else { + /* Default to swi 0 */ + return (0); + } +#endif +} + +/* * Tcp protocol timeout routine called every 500 ms. * Updates timestamps used for TCP * causes finite state machine actions if timers expire. @@ -271,7 +311,8 @@ if (tp->t_state != TCPS_TIME_WAIT && ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) callout_reset_on(&tp->t_timers->tt_2msl, - TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp)); + TP_KEEPINTVL(tp), tcp_timer_2msl, tp, + inp_to_cpuid(inp)); else tp = tcp_close(tp); } @@ -361,10 +402,10 @@ free(t_template, M_TEMP); } callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), - tcp_timer_keep, tp, INP_CPU(inp)); + tcp_timer_keep, tp, inp_to_cpuid(inp)); } else callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), - tcp_timer_keep, tp, INP_CPU(inp)); + tcp_timer_keep, tp, inp_to_cpuid(inp)); #ifdef TCPDEBUG if (inp->inp_socket->so_options & SO_DEBUG) @@ -649,7 +690,7 @@ struct callout *t_callout; void *f_callout; struct inpcb *inp = tp->t_inpcb; - int cpu = INP_CPU(inp); + int cpu = inp_to_cpuid(inp); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE)