diff --git a/sys/conf/files b/sys/conf/files index fe6f8cfc22d..a458d740fb1 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4110,6 +4110,8 @@ net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp net/toeplitz.c optional inet rss | inet6 rss net/vnet.c optional vimage +net/yandex.c standard \ + compile-with "${NORMAL_C} -I$S/contrib/ck/include" net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl net80211/ieee80211_action.c optional wlan diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 3e7b736ede2..9f6b009451f 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -195,6 +196,10 @@ struct vi_info { struct ifnet *ifp; struct pfil_head *pfil; + struct yndx_conf *yndx; + eventhandler_tag vlan_c; + eventhandler_tag vlan_u; + unsigned long flags; int if_flags; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index b088b5899dc..bcfc9b79332 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -688,6 +688,8 @@ static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); +static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); +static void cxgbe_vlan_unconfig(void *, struct ifnet *, uint16_t); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS); @@ -1775,6 +1777,14 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi) } #endif + vi->yndx = yndx_conf_alloc(true); + yndx_conf_init(vi->yndx, dev, ifp->if_dname, NULL); + yndx_ifcap_init(vi->yndx, ifp); + vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, + ifp, EVENTHANDLER_PRI_ANY); + vi->vlan_u = EVENTHANDLER_REGISTER(vlan_unconfig, + cxgbe_vlan_unconfig, ifp, EVENTHANDLER_PRI_ANY); + ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (vi->nnmrxq != 0) @@ -1872,6 +1882,12 @@ cxgbe_vi_detach(struct vi_info *vi) callout_drain(&vi->tick); vi_full_uninit(vi); + if (vi->vlan_c) + EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); + if (vi->vlan_u) + EVENTHANDLER_DEREGISTER(vlan_unconfig, vi->vlan_u); + yndx_conf_free(vi->yndx); + if_free(vi->ifp); vi->ifp = NULL; } @@ -2099,6 +2115,7 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) if (mask & IFCAP_TXTLS) ifp->if_capenable ^= (mask & IFCAP_TXTLS); #endif + yndx_ifcap_update(vi->yndx, ifp, ifr->ifr_reqcap); #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); @@ -2187,8 +2204,14 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) + txq += yndx_flowid2queue(vi->yndx, m->m_pkthdr.flowid, + ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + + vi->rsrv_noflowq), + vi->ntxq); +#if 0 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); +#endif items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 256); @@ -5589,6 +5612,10 @@ t4_setup_intr_handlers(struct adapter *sc) bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); } +#else + bus_bind_intr(sc->dev, irq->res, + yndx_queue2cpuid(vi->yndx, + q, q % mp_ncpus)); #endif irq++; rid++; @@ -5605,6 +5632,10 @@ t4_setup_intr_handlers(struct adapter *sc) #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); +#else + bus_bind_intr(sc->dev, irq->res, + yndx_queue2cpuid(vi->yndx, + q, q % mp_ncpus)); #endif irq++; rid++; @@ -6211,6 +6242,31 @@ vi_tick(void *arg) callout_schedule(&vi->tick, hz); } +static void +cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) +{ + struct vi_info *vi; + + if (arg != ifp) + return; + + vi = ifp->if_softc; + yndx_vlan_register(vi->yndx, vid, ifp); +} + +static void +cxgbe_vlan_unconfig(void *arg, struct ifnet *ifp, uint16_t vid) +{ + struct vi_info *vi; + + if (arg != ifp) + return; + + vi = ifp->if_softc; + yndx_vlan_unregister(vi->yndx, vid, ifp); +} + + /* * Should match fw_caps_config_ enums in t4fw_interface.h */ diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 2cbfc8d5ae5..c0e3ecc2b1a 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1953,8 +1953,9 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, have_mbuf: m0->m_pkthdr.rcvif = ifp; - M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); - m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); + yndx_flowid_set(vi->yndx, m0, be32toh(d->rss.hash_val), + rxq - &sc->sge.rxq[vi->first_rxq], + sw_hashtype[d->rss.hash_type][d->rss.ipv6]); cpl = (const void *)(&d->rss + 1); if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { @@ -1977,8 +1978,7 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, } if (cpl->vlan_ex) { - m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); - m0->m_flags |= M_VLANTAG; + yndx_vlan_handle(vi->yndx, m0, be16toh(cpl->vlan)); rxq->vlan_extraction++; } diff --git a/sys/dev/if_intel/igb/if_igb.c b/sys/dev/if_intel/igb/if_igb.c index fe97c9de793..b0a617f2831 100644 --- a/sys/dev/if_intel/igb/if_igb.c +++ b/sys/dev/if_intel/igb/if_igb.c @@ -426,6 +426,8 @@ igb_attach(device_t dev) adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; + adapter->yndx = yndx_conf_alloc(true); + yndx_conf_init(adapter->yndx, dev, "igb", NULL); IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTLs */ @@ -677,6 +679,7 @@ igb_attach(device_t dev) igb_free_pci_resources(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); + yndx_conf_free(adapter->yndx); IGB_CORE_LOCK_DESTROY(adapter); return (error); @@ -762,6 +765,7 @@ igb_detach(device_t dev) free(adapter->mta, M_DEVBUF); adapter->mta = NULL; + yndx_conf_free(adapter->yndx); IGB_CORE_LOCK_DESTROY(adapter); return (0); @@ -950,12 +954,14 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m) i = bucket_id % adapter->num_queues; } else { #endif - i = m->m_pkthdr.flowid % adapter->num_queues; + i = yndx_flowid2queue(adapter->yndx, m->m_pkthdr.flowid, + m->m_pkthdr.flowid, adapter->num_queues); #ifdef RSS } #endif } else { - i = curcpu % adapter->num_queues; + i = yndx_flowid2queue(adapter->yndx, m->m_pkthdr.flowid, + curcpu, adapter->num_queues); } txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; @@ -1250,6 +1256,7 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); + yndx_ifcap_update(adapter->yndx, ifp, ifr->ifr_reqcap); VLAN_CAPABILITIES(ifp); break; } @@ -2421,9 +2428,7 @@ igb_allocate_msix(struct adapter *adapter) struct igb_queue *que = adapter->queues; int error, rid, vector = 0; int cpu_id = 0; -#ifdef RSS cpuset_t cpu_mask; -#endif /* Be sure to start with all interrupts disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); @@ -2496,7 +2501,8 @@ igb_allocate_msix(struct adapter *adapter) if (adapter->num_queues > 1) { if (igb_last_bind_cpu < 0) igb_last_bind_cpu = CPU_FIRST(); - cpu_id = igb_last_bind_cpu; + cpu_id = yndx_queue2cpuid(adapter->yndx, i, + igb_last_bind_cpu); } #endif @@ -2523,25 +2529,15 @@ igb_allocate_msix(struct adapter *adapter) taskqueue_thread_enqueue, &que->tq); if (adapter->num_queues > 1) { /* - * Only pin the taskqueue thread to a CPU if - * RSS is in use. - * - * This again just happens to match the default RSS - * round-robin bucket -> queue -> CPU allocation. + * Pin the taskqueue thread to the same CPU as + * queue. */ -#ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, - "%s que (bucket %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#else - taskqueue_start_threads(&que->tq, 1, PI_NET, - "%s que (qid %d)", + "%s que (cpu %d)", device_get_nameunit(adapter->dev), cpu_id); -#endif } else { taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); @@ -3244,6 +3240,7 @@ igb_setup_interface(device_t dev, struct adapter *adapter) IFM_ETHER | IFM_1000_T, 0, NULL); } } + yndx_ifcap_init(adapter->yndx, ifp); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); @@ -5106,8 +5103,8 @@ igb_rxeof(struct igb_queue *que, int count, int *done) if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { - rxr->fmp->m_pkthdr.ether_vtag = vtag; - rxr->fmp->m_flags |= M_VLANTAG; + yndx_vlan_handle(adapter->yndx, + rxr->fmp, vtag); } /* @@ -5145,7 +5142,10 @@ igb_rxeof(struct igb_queue *que, int count, int *done) break; default: /* XXX fallthrough */ - M_HASHTYPE_SET(rxr->fmp, + yndx_flowid_set(adapter->yndx, + rxr->fmp, + rxr->fmp->m_pkthdr.flowid, + i, #if __FreeBSD_version > 1100000 M_HASHTYPE_OPAQUE_HASH); #else @@ -5283,6 +5283,7 @@ igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) return; IGB_CORE_LOCK(adapter); + yndx_vlan_register(adapter->yndx, vtag, ifp); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); @@ -5310,6 +5311,7 @@ igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) return; IGB_CORE_LOCK(adapter); + yndx_vlan_unregister(adapter->yndx, vtag, ifp); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); diff --git a/sys/dev/if_intel/igb/if_igb.h b/sys/dev/if_intel/igb/if_igb.h index d236a7e2832..5d1b04a352c 100644 --- a/sys/dev/if_intel/igb/if_igb.h +++ b/sys/dev/if_intel/igb/if_igb.h @@ -74,6 +74,7 @@ #include #include #endif +#include #include #include @@ -515,6 +516,8 @@ struct adapter { void *tag; struct resource *res; + struct yndx_conf *yndx; + struct ifmedia media; struct callout timer; int msix; diff --git a/sys/dev/if_intel/ix/if_ix.c b/sys/dev/if_intel/ix/if_ix.c index 9b9ff4ec8a1..2df3710fa4f 100644 --- a/sys/dev/if_intel/ix/if_ix.c +++ b/sys/dev/if_intel/ix/if_ix.c @@ -718,6 +718,8 @@ ixgbe_attach(device_t dev) adapter = device_get_softc(dev); adapter->hw.back = adapter; adapter->dev = dev; + adapter->yndx = yndx_conf_alloc(true); + yndx_conf_init(adapter->yndx, dev, "ix", "ixgbe"); hw = &adapter->hw; /* Core Lock Init*/ @@ -980,6 +982,7 @@ ixgbe_attach(device_t dev) err_out: if (adapter->ifp != NULL) if_free(adapter->ifp); + yndx_conf_free(adapter->yndx); ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); @@ -1103,6 +1106,7 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + yndx_ifcap_init(adapter->yndx, ifp); /* * Specify the media types supported by this adapter and register * callbacks to update media and link information @@ -1765,6 +1769,7 @@ ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) return; IXGBE_CORE_LOCK(adapter); + yndx_vlan_register(adapter->yndx, vtag, ifp); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); @@ -1791,6 +1796,7 @@ ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) return; IXGBE_CORE_LOCK(adapter); + yndx_vlan_unregister(adapter->yndx, vtag, ifp); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); @@ -2722,6 +2728,7 @@ ixgbe_detach(device_t dev) free(adapter->queues, M_IXGBE); free(adapter->mta, M_IXGBE); + yndx_conf_free(adapter->yndx); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); @@ -5030,6 +5037,7 @@ ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } + yndx_ifcap_update(adapter->yndx, ifp, ifr->ifr_reqcap); VLAN_CAPABILITIES(ifp); break; } @@ -5245,7 +5253,8 @@ ixgbe_allocate_msix(struct adapter *adapter) * round-robin bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) - cpu_id = i; + cpu_id = yndx_queue2cpuid(adapter->yndx, + i, i); } if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, cpu_id); @@ -5269,14 +5278,10 @@ ixgbe_allocate_msix(struct adapter *adapter) taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", device_get_nameunit(adapter->dev), i); #else - if (adapter->feat_en & IXGBE_FEATURE_RSS) - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - &cpu_mask, "%s (bucket %d)", - device_get_nameunit(adapter->dev), cpu_id); - else - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - NULL, "%s:q%d", device_get_nameunit(adapter->dev), - i); + CPU_SETOF(cpu_id, &cpu_mask); + taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, + &cpu_mask, "%s:q%d (cpu %d)", + device_get_nameunit(adapter->dev), i, cpu_id); #endif } diff --git a/sys/dev/if_intel/ix/ix_txrx.c b/sys/dev/if_intel/ix/ix_txrx.c index d75ed425cf6..a29ccf79dd8 100644 --- a/sys/dev/if_intel/ix/ix_txrx.c +++ b/sys/dev/if_intel/ix/ix_txrx.c @@ -185,9 +185,11 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) bucket_id, adapter->num_queues); #endif } else - i = m->m_pkthdr.flowid % adapter->num_queues; + i = yndx_flowid2queue(adapter->yndx, m->m_pkthdr.flowid, + m->m_pkthdr.flowid, adapter->num_queues); } else - i = curcpu % adapter->num_queues; + i = yndx_flowid2queue(adapter->yndx, m->m_pkthdr.flowid, + curcpu, adapter->num_queues); /* Check for a hung queue and pick alternative */ if (((1 << i) & adapter->active_queues) == 0) @@ -1837,8 +1839,7 @@ ixgbe_rxeof(struct ix_queue *que) if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) vtag = le16toh(cur->wb.upper.vlan); if (vtag) { - sendmp->m_pkthdr.ether_vtag = vtag; - sendmp->m_flags |= M_VLANTAG; + yndx_vlan_handle(adapter->yndx, sendmp, vtag); } if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixgbe_rx_checksum(staterr, sendmp, ptype); @@ -1891,11 +1892,13 @@ ixgbe_rxeof(struct ix_queue *que) break; #endif default: + yndx_flowid_set(adapter->yndx, + sendmp, + sendmp->m_pkthdr.flowid, + i, #if __FreeBSD_version < 1100116 - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); #else - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE_HASH); #endif } diff --git a/sys/dev/if_intel/ix/ixgbe.h b/sys/dev/if_intel/ix/ixgbe.h index 8e2a082bc39..1b315214716 100644 --- a/sys/dev/if_intel/ix/ixgbe.h +++ b/sys/dev/if_intel/ix/ixgbe.h @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -422,6 +423,8 @@ struct adapter { void *tag; struct resource *res; + struct yndx_conf *yndx; + struct ifmedia media; struct callout timer; int link_rid; diff --git a/sys/net/if.h b/sys/net/if.h index 1e7430263fc..a262c6b3119 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -249,6 +249,7 @@ struct if_data { #define IFCAP_NOMAP 0x4000000 /* can TX unmapped mbufs */ #define IFCAP_TXTLS4 0x8000000 /* can do TLS encryption and segmentation for TCP */ #define IFCAP_TXTLS6 0x10000000 /* can do TLS encryption and segmentation for TCP6 */ +#define IFCAP_VLANRXHACK 0x20000000 /* use short VLANs call chain */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) diff --git a/sys/net/yandex.c b/sys/net/yandex.c new file mode 100644 index 00000000000..3e0dcf60c9b --- /dev/null +++ b/sys/net/yandex.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Yandex LLC + * Copyright (c) 2017-2020 Andrey V. Elsukov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static MALLOC_DEFINE(M_YNDX, "yndx", "buffers for Yandex's structures"); + +struct yndx_conf { + uint8_t qtof_mul; /* queue to flow multiplier */ + uint8_t qtof_add; /* queue to flow addition */ + uint8_t ftoq_div; /* flow to queue divider */ + uint8_t ftoq_add; /* flow to queue addition */ + uint32_t flags; +#define YNDX_DISABLED 0x00000001 +#define YNDX_VLANRXHACK 0x00000002 + struct ifnet *vifp[4096]; /* vlans mapping */ +}; + +struct yndx_conf* +yndx_conf_alloc(bool waitok) +{ + struct yndx_conf *conf; + + conf = malloc(sizeof(struct yndx_conf), M_YNDX, + M_ZERO | (waitok ? M_WAITOK: M_NOWAIT)); + if (conf != NULL) { + conf->qtof_mul = 1; + conf->ftoq_div = 1; + } + return (conf); +} + +void +yndx_conf_free(struct yndx_conf *conf) +{ + + free(conf, M_YNDX); +} + +int +yndx_conf_init(struct yndx_conf *conf, device_t dev, const char *name, + const char *alias) +{ + char tunable[255]; + unsigned int v, unit; + int ret; + + if (conf == NULL) + return (EINVAL); + + if (name == NULL && alias == NULL) + return (EINVAL); + + ret = 0; + unit = device_get_unit(dev); + if (alias != NULL) { + v = snprintf(tunable, sizeof(tunable), + "hw.%s.%u.yndx_disabled", alias, unit); + if (v >= sizeof(tunable)) { + ret = ENAMETOOLONG; + } else { + if (getenv_uint(tunable, &v) != 0 && v > 0) + conf->flags |= YNDX_DISABLED; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.qtof_mul", alias, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->qtof_mul = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.qtof_add", alias, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->qtof_add = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.ftoq_div", alias, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->ftoq_div = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.ftoq_add", alias, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->ftoq_add = v; + } + } + if (name != NULL) { + v = snprintf(tunable, sizeof(tunable), + "hw.%s.%u.yndx_disabled", name, unit); + if (v >= sizeof(tunable)) { + if (alias == NULL) + ret = ENAMETOOLONG; + } else { + if (getenv_uint(tunable, &v) != 0 && v > 0) + conf->flags |= YNDX_DISABLED; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.qtof_mul", name, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->qtof_mul = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.qtof_add", name, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->qtof_add = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.ftoq_div", name, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->ftoq_div = v; + snprintf(tunable, sizeof(tunable), + "hw.%s.%u.ftoq_add", name, unit); + if (getenv_uint(tunable, &v) != 0 && v >= 0) + conf->ftoq_add = v; + } + } + if (conf->ftoq_div == 0) + conf->ftoq_div = 1; + + if (ret != 0) { + device_printf(dev, + "YNDX: failed to obtain config with error %d\n", ret); + } else if (bootverbose) { + device_printf(dev, "YNDX: ftoq_div=%u, ftoq_add=%u, " + "qtof_mul=%u, qtof_add=%u%s\n", conf->ftoq_div, + conf->ftoq_add, conf->qtof_mul, conf->qtof_add, + (conf->flags & YNDX_DISABLED) ? ", disabled": ""); + } + + return (ret); +} + +unsigned int +yndx_queue2cpuid(const struct yndx_conf *conf, unsigned int queue, + unsigned int default_cpuid) +{ + + if (conf == NULL || (conf->flags & YNDX_DISABLED)) + return (default_cpuid); + return ((conf->qtof_mul * queue + conf->qtof_add) % mp_ncpus); +} + +unsigned int +yndx_flowid2queue(const struct yndx_conf *conf, unsigned int flowid, + unsigned int default_queue, unsigned int num_queues) +{ + + if (conf == NULL || (conf->flags & YNDX_DISABLED)) + return (default_queue); + return ((flowid / conf->ftoq_div + conf->ftoq_add) % num_queues); +} + +void +yndx_flowid_set(const struct yndx_conf *conf, struct mbuf *m, + unsigned int flowid, unsigned int queue, unsigned int default_hashtype) +{ + + if (conf != NULL && (conf->flags & YNDX_DISABLED) == 0) { + flowid = conf->qtof_mul * queue + conf->qtof_add; + default_hashtype = M_HASHTYPE_OPAQUE; + } + m->m_pkthdr.flowid = flowid; + M_HASHTYPE_SET(m, default_hashtype); +} + + +/* + * When an interface is part of lagg(4), its if_type is not IFT_ETHER. + * lagg(4) changes it to IFT_IEEE8023ADLAG and to find the actual + * vlan(4) interface we should use VLAN_DEVAT() on parent interface. + */ +static struct ifnet * +yndx_vlan_devat(struct ifnet *ifp, uint16_t vid) +{ + struct lagg_port *lp; + struct lagg_softc *sc; + + /* Skip lagg nesting */ + while (ifp->if_type == IFT_IEEE8023ADLAG) { + lp = ifp->if_lagg; + sc = lp->lp_softc; + ifp = sc->sc_ifp; + } + /* Get vlan interface based on tag */ + return (VLAN_DEVAT(ifp, vid)); +} + +void +yndx_vlan_register(struct yndx_conf *conf, uint16_t vid, struct ifnet *ifp) +{ + + if (ifp != NULL) + ifp = yndx_vlan_devat(ifp, vid); + ck_pr_fence_store(); + ck_pr_store_ptr(&conf->vifp[vid % 4096], ifp); +} + +void +yndx_vlan_unregister(struct yndx_conf *conf, uint16_t vid, + struct ifnet *ifp __unused) +{ + + ck_pr_fence_store(); + ck_pr_store_ptr(&conf->vifp[vid % 4096], NULL); +} + +void +yndx_vlan_handle(const struct yndx_conf *conf, struct mbuf *m, uint16_t vid) +{ + struct ifnet *ifp; + + if (conf != NULL && (conf->flags & YNDX_VLANRXHACK) != 0) { + ifp = ck_pr_load_ptr(&conf->vifp[vid % 4096]); + if (ifp != NULL) { + m->m_pkthdr.rcvif = ifp; + m->m_flags &= ~M_VLANTAG; + return; + } + } + m->m_pkthdr.ether_vtag = vid; + m->m_flags |= M_VLANTAG; +} + +void +yndx_ifcap_init(struct yndx_conf *conf, struct ifnet *ifp) +{ + + if (conf == NULL || ifp == NULL) + return; + + ifp->if_capabilities |= IFCAP_VLANRXHACK; +#if 0 + ifp->if_capenable |= IFCAP_VLANRXHACK; + conf->flags |= YNDX_VLANRXHACK; +#endif +} + +void +yndx_ifcap_update(struct yndx_conf *conf, struct ifnet *ifp, int cap) +{ + if (conf == NULL) + return; + + if (((ifp->if_capenable ^ cap) & IFCAP_VLANRXHACK) == 0) + return; + + ifp->if_capenable ^= IFCAP_VLANRXHACK; + if (ifp->if_capenable & IFCAP_VLANRXHACK) + conf->flags |= YNDX_VLANRXHACK; + else + conf->flags &= ~YNDX_VLANRXHACK; +} + diff --git a/sys/net/yandex.h b/sys/net/yandex.h new file mode 100644 index 00000000000..14eee6bf08c --- /dev/null +++ b/sys/net/yandex.h @@ -0,0 +1,95 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Yandex LLC + * Copyright (c) 2017-2020 Andrey V. Elsukov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_YANDEX_H_ +#define _NET_YANDEX_H_ + +struct yndx_conf; + +/* + * yndx_conf_alloc() + * allocate yndx_conf structure to keep Yandex's specific + * interface configuration. Driver should keep pointer to this + * configuration and use it in the yndx_* functions. + */ +struct yndx_conf *yndx_conf_alloc(bool waitok); + +/* + * yndx_conf_free() + * free allocated for configuration memory. + */ +void yndx_conf_free(struct yndx_conf *conf); + +/* + * yndx_conf_init() + * initialize interface configuration. + * dev - related to interface device_t; + * name - name of interface to read tunables; + * alias - optional name of interface (for compatibility with + * old configs). + */ +int yndx_conf_init(struct yndx_conf *conf, device_t dev, + const char *name, const char *alias); + +/* + * yndx_queue2cpuid() + * return CPU id based on queue number and configuration. + */ +unsigned int yndx_queue2cpuid(const struct yndx_conf *conf, + unsigned int queue, unsigned int default_cpuid); + +/* + * yndx_flowid2queue() + * return queue number based on flowid, number of queues and + * configuration. + */ +unsigned int yndx_flowid2queue(const struct yndx_conf *conf, + unsigned int flowid, unsigned int default_queue, unsigned int num_queues); + +/* + * yndx_flowid_set() + * set hash type and flowid for specified mbuf based on current + * configuration. + */ +void yndx_flowid_set(const struct yndx_conf *conf, struct mbuf *m, + unsigned int flowid, unsigned int queue, unsigned int default_hashtype); + +void yndx_vlan_register(struct yndx_conf *conf, uint16_t vid, + struct ifnet *ifp); +void yndx_vlan_unregister(struct yndx_conf *conf, uint16_t vid, + struct ifnet *ifp); +void yndx_vlan_handle(const struct yndx_conf *conf, struct mbuf *m, + uint16_t vtag); +void yndx_ifcap_init(struct yndx_conf *conf, struct ifnet *ifp); +void yndx_ifcap_update(struct yndx_conf *conf, struct ifnet *ifp, + int cap); + +#endif /* _NET_YANDEX_H_ */