diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index abfe2216d50a..a113b7e33bdc 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -320,6 +320,13 @@ vmxnet3_register(device_t dev) return (&vmxnet3_sctx_init); } +static int +trunc_powerof2(int val) +{ + + return (1U << (fls(val) - 1)); +} + static int vmxnet3_attach_pre(if_ctx_t ctx) { @@ -349,12 +356,16 @@ vmxnet3_attach_pre(if_ctx_t ctx) /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES; + scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets); scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus); + scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max); /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES; + scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets); scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus); + scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max); /* * Enforce that the transmit completion queue descriptor count is @@ -381,6 +392,12 @@ vmxnet3_attach_pre(if_ctx_t ctx) scctx->isc_rxqsizes[2] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2]; + /* + * Initialize the max frame size and descriptor queue buffer + * sizes. + */ + vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp)); + scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; /* Map PCI BARs */ @@ -1494,6 +1511,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) int cqidx; uint16_t total_len; uint8_t nfrags; + uint8_t i; uint8_t flid; sc = vsc; @@ -1517,6 +1535,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) KASSERT(rxcd->sop && rxcd->eop, ("%s: zero-length packet without both sop and eop set", __func__)); + rxc->vxcr_zero_length++; if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; @@ -1572,31 +1591,6 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) } } - /* VLAN */ - if (rxcd->vlan) { - ri->iri_flags |= M_VLANTAG; - ri->iri_vtag = rxcd->vtag; - } - - /* Checksum offload */ - if (!rxcd->no_csum) { - uint32_t csum_flags = 0; - - if (rxcd->ipv4) { - csum_flags |= CSUM_IP_CHECKED; - if (rxcd->ipcsum_ok) - csum_flags |= CSUM_IP_VALID; - } - if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) { - csum_flags |= CSUM_L4_CALC; - if (rxcd->csum_ok) { - csum_flags |= CSUM_L4_VALID; - ri->iri_csum_data = 0xffff; - } - } - ri->iri_csum_flags = csum_flags; - } - /* * The queue numbering scheme used for rxcd->qid is as follows: * - All of the command ring 0s are numbered [0, nrxqsets - 1] @@ -1632,6 +1626,46 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) ri->iri_nfrags = nfrags; ri->iri_len = total_len; + /* + * If there's an error, the last descriptor in the packet will + * have the error indicator set. In this case, set all + * fragment lengths to zero. This will cause iflib to discard + * the packet, but process all associated descriptors through + * the refill mechanism. + */ + if (__predict_false(rxcd->error)) { + rxc->vxcr_pkt_errors++; + for (i = 0; i < nfrags; i++) { + frag = &ri->iri_frags[i]; + frag->irf_len = 0; + } + } else { + /* Checksum offload information is in the last descriptor. */ + if (!rxcd->no_csum) { + uint32_t csum_flags = 0; + + if (rxcd->ipv4) { + csum_flags |= CSUM_IP_CHECKED; + if (rxcd->ipcsum_ok) + csum_flags |= CSUM_IP_VALID; + } + if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) { + csum_flags |= CSUM_L4_CALC; + if (rxcd->csum_ok) { + csum_flags |= CSUM_L4_VALID; + ri->iri_csum_data = 0xffff; + } + } + ri->iri_csum_flags = csum_flags; + } + + /* VLAN information is in the last descriptor. */ + if (rxcd->vlan) { + ri->iri_flags |= M_VLANTAG; + ri->iri_vtag = rxcd->vtag; + } + } + return (0); } @@ -1645,14 +1679,13 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru) uint64_t *paddrs; int count; int len; - int pidx; + int idx; int i; uint8_t flid; uint8_t btype; count = iru->iru_count; len = iru->iru_buf_size; - pidx = iru->iru_pidx; flid = iru->iru_flidx; paddrs = iru->iru_paddrs; @@ -1666,17 +1699,32 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru) * command ring 1 is filled with BTYPE_BODY descriptors. */ btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY; - for (i = 0; i < count; i++) { - rxd[pidx].addr = paddrs[i]; - rxd[pidx].len = len; - rxd[pidx].btype = btype; - rxd[pidx].gen = rxr->vxrxr_gen; - - if (++pidx == rxr->vxrxr_ndesc) { - pidx = 0; + /* + * The refill entries from iflib will advance monotonically, + * but the refilled descriptors may not be contiguous due to + * earlier skipping of descriptors by the device. The refill + * entries from iflib need an entire state update, while the + * descriptors previously skipped by the device only need to + * have their generation numbers updated. + */ + idx = rxr->vxrxr_refill_start; + i = 0; + do { + if (idx == iru->iru_idxs[i]) { + rxd[idx].addr = paddrs[i]; + rxd[idx].len = len; + rxd[idx].btype = btype; + i++; + } else + rxr->vxrxr_desc_skips++; + rxd[idx].gen = rxr->vxrxr_gen; + + if (++idx == rxr->vxrxr_ndesc) { + idx = 0; rxr->vxrxr_gen ^= 1; } - } + } while (i != count); + rxr->vxrxr_refill_start = idx; } static void @@ -1825,6 +1873,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = VMXNET3_INIT_GEN; + rxr->vxrxr_desc_skips = 0; + rxr->vxrxr_refill_start = 0; /* * iflib has zeroed out the descriptor array during the * prior attach or stop @@ -1834,6 +1884,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = 0; + rxr->vxrxr_desc_skips = 0; + rxr->vxrxr_refill_start = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } @@ -1841,6 +1893,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; + rxc->vxcr_zero_length = 0; + rxc->vxcr_pkt_errors = 0; /* * iflib has zeroed out the descriptor array during the prior attach * or stop @@ -1906,13 +1960,8 @@ static void vmxnet3_init(if_ctx_t ctx) { struct vmxnet3_softc *sc; - if_softc_ctx_t scctx; sc = iflib_get_softc(ctx); - scctx = sc->vmx_scctx; - - scctx->isc_max_frame_size = if_getmtu(iflib_get_ifp(ctx)) + - ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; /* Use the current MAC address. */ bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); @@ -1938,11 +1987,37 @@ vmxnet3_multi_set(if_ctx_t ctx) static int vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu) { + struct vmxnet3_softc *sc; + if_softc_ctx_t scctx; + + sc = iflib_get_softc(ctx); + scctx = sc->vmx_scctx; if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) return (EINVAL); + /* + * Update the max frame size so that the rx mbuf size is + * chosen based on the new mtu during the interface init that + * will occur after this routine returns. + */ + scctx->isc_max_frame_size = mtu + + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; + /* RX completion queue - n/a */ + scctx->isc_rxd_buf_size[0] = 0; + /* + * For header-type descriptors (used for first segment of + * packet), let iflib determine the buffer size based on the + * max frame size. + */ + scctx->isc_rxd_buf_size[1] = 0; + /* + * For body-type descriptors (used for jumbo frames and LRO), + * always use page-sized buffers. + */ + scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE; + return (0); } @@ -2288,14 +2363,22 @@ vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); + SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD, + &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); + SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD, + &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); + SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD, + &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, ""); + SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD, + &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, ""); } } diff --git a/sys/dev/vmware/vmxnet3/if_vmxvar.h b/sys/dev/vmware/vmxnet3/if_vmxvar.h index 20e73af69004..e39b552ab8cf 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxvar.h +++ b/sys/dev/vmware/vmxnet3/if_vmxvar.h @@ -41,7 +41,7 @@ struct vmxnet3_softc; #define VMXNET3_MAX_TX_NDESC 4096 #define VMXNET3_MIN_TX_NDESC 32 #define VMXNET3_MASK_TX_NDESC 0x1F -#define VMXNET3_DEF_RX_NDESC 256 +#define VMXNET3_DEF_RX_NDESC 512 #define VMXNET3_MAX_RX_NDESC 2048 #define VMXNET3_MIN_RX_NDESC 32 #define VMXNET3_MASK_RX_NDESC 0x1F @@ -63,6 +63,8 @@ struct vmxnet3_rxring { u_int vxrxr_ndesc; int vxrxr_gen; bus_addr_t vxrxr_paddr; + uint64_t vxrxr_desc_skips; + uint16_t vxrxr_refill_start; }; struct vmxnet3_comp_ring { @@ -78,6 +80,8 @@ struct vmxnet3_comp_ring { u_int vxcr_ndesc; int vxcr_gen; bus_addr_t vxcr_paddr; + uint64_t vxcr_zero_length; + uint64_t vxcr_pkt_errors; }; struct vmxnet3_txqueue { diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 5c006c17bce0..1ebbe60186c0 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -207,8 +207,6 @@ struct iflib_ctx { #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush -#define isc_rxd_refill ifc_txrx.ift_rxd_refill -#define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; @@ -394,8 +392,7 @@ struct iflib_fl { bus_dma_tag_t ifl_buf_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); - caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; - qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; + qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t @@ -453,7 +450,6 @@ typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; - qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ @@ -713,6 +709,7 @@ static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); static int iflib_register(if_ctx_t); static void iflib_deregister(if_ctx_t); static void iflib_unregister_vlan_handlers(if_ctx_t ctx); +static uint16_t iflib_get_mbuf_size_for(unsigned int size); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); @@ -859,7 +856,6 @@ netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, boo if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); - fl->ifl_vm_addrs[tmp_pidx] = addr; if (__predict_false(init)) { netmap_load_map(na, fl->ifl_buf_tag, map[nic_i], addr); @@ -1333,7 +1329,6 @@ iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; - iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; @@ -1987,7 +1982,7 @@ _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) } /** - * _iflib_fl_refill - refill an rxq free-buffer list + * iflib_fl_refill - refill an rxq free-buffer list * @ctx: the iflib context * @fl: the free list to refill * @count: the number of new buffers to allocate @@ -1996,7 +1991,7 @@ _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) * The caller must assure that @count does not exceed the queue's capacity. */ static uint8_t -_iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) +iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct if_rxd_update iru; struct rxq_refill_cb_arg cb_arg; @@ -2033,12 +2028,13 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); - while (n--) { + while (n-- > 0) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * - * If the cluster is still set then we know a minimum sized packet was received + * If the cluster is still set then we know a minimum sized + * packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); @@ -2046,7 +2042,8 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); MPASS(frag_idx >= 0); if ((cl = sd_cl[frag_idx]) == NULL) { - if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) + cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); + if (__predict_false(cl == NULL)) break; cb_arg.error = 0; @@ -2054,16 +2051,12 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, BUS_DMA_NOWAIT); - if (err != 0 || cb_arg.error) { - /* - * !zone_pack ? - */ - if (fl->ifl_zone == zone_pack) - uma_zfree(fl->ifl_zone, cl); + if (__predict_false(err != 0 || cb_arg.error)) { + uma_zfree(fl->ifl_zone, cl); break; } - sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; + sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; sd_cl[frag_idx] = cl; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; @@ -2075,9 +2068,9 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) BUS_DMASYNC_PREREAD); MPASS(sd_m[frag_idx] == NULL); - if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { + m = m_gethdr(M_NOWAIT, MT_NOINIT); + if (__predict_false(m == NULL)) break; - } sd_m[frag_idx] = m; bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING @@ -2087,50 +2080,58 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) DBG_COUNTER_INC(rx_allocs); fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; - fl->ifl_vm_addrs[i] = cl; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { +#ifdef INVARIANTS fl->ifl_gen = 1; +#endif idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - i = 0; - pidx = idx; fl->ifl_pidx = idx; fl->ifl_credits = credits; + pidx = idx; + i = 0; } } - if (i) { - iru.iru_pidx = pidx; - iru.iru_count = i; - ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - fl->ifl_pidx = idx; - fl->ifl_credits = credits; - } - DBG_COUNTER_INC(rxd_flush); - if (fl->ifl_pidx == 0) - pidx = fl->ifl_size - 1; - else - pidx = fl->ifl_pidx - 1; + if (n < count - 1) { + if (i != 0) { + iru.iru_pidx = pidx; + iru.iru_count = i; + ctx->isc_rxd_refill(ctx->ifc_softc, &iru); + fl->ifl_pidx = idx; + fl->ifl_credits = credits; + } + DBG_COUNTER_INC(rxd_flush); + if (fl->ifl_pidx == 0) + pidx = fl->ifl_size - 1; + else + pidx = fl->ifl_pidx - 1; - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); - fl->ifl_fragidx = frag_idx + 1; - if (fl->ifl_fragidx == fl->ifl_size) - fl->ifl_fragidx = 0; + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, + fl->ifl_id, pidx); + if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { + fl->ifl_fragidx = frag_idx + 1; + if (fl->ifl_fragidx == fl->ifl_size) + fl->ifl_fragidx = 0; + } else { + fl->ifl_fragidx = frag_idx; + } + } return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); } -static __inline uint8_t -__iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) +static inline uint8_t +iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; @@ -2142,7 +2143,7 @@ __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) MPASS(reclaimable == delta); if (reclaimable > 0) - return (_iflib_fl_refill(ctx, fl, min(max, reclaimable))); + return (iflib_fl_refill(ctx, fl, reclaimable)); return (0); } @@ -2173,22 +2174,20 @@ iflib_fl_bufs_free(iflib_fl_t fl) bus_dmamap_sync(fl->ifl_buf_tag, sd_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fl->ifl_buf_tag, sd_map); - if (*sd_cl != NULL) - uma_zfree(fl->ifl_zone, *sd_cl); + uma_zfree(fl->ifl_zone, *sd_cl); + *sd_cl = NULL; if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); + *sd_m = NULL; } } else { - MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif - *sd_cl = NULL; - *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { @@ -2213,6 +2212,8 @@ iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; + int qidx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* @@ -2221,7 +2222,16 @@ iflib_fl_setup(iflib_fl_t fl) iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); - fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; + qidx = rxq->ifr_fl_offset + fl->ifl_id; + if (scctx->isc_rxd_buf_size[qidx] != 0) + fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; + else + fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; + /* + * ifl_buf_size may be a driver-supplied value, so pull it up + * to the selected mbuf size. + */ + fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); @@ -2231,7 +2241,7 @@ iflib_fl_setup(iflib_fl_t fl) /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ - (void) _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); + (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); @@ -2353,6 +2363,16 @@ iflib_timer(void *arg) STATE_UNLOCK(ctx); } +static uint16_t +iflib_get_mbuf_size_for(unsigned int size) +{ + + if (size <= MCLBYTES) + return (MCLBYTES); + else + return (MJUMPAGESIZE); +} + static void iflib_calc_rx_mbuf_sz(if_ctx_t ctx) { @@ -2362,10 +2382,8 @@ iflib_calc_rx_mbuf_sz(if_ctx_t ctx) * XXX don't set the max_frame_size to larger * than the hardware can handle */ - if (sctx->isc_max_frame_size <= MCLBYTES) - ctx->ifc_rx_mbuf_sz = MCLBYTES; - else - ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE; + ctx->ifc_rx_mbuf_sz = + iflib_get_mbuf_size_for(sctx->isc_max_frame_size); } uint32_t @@ -2578,7 +2596,6 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; - sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; @@ -2590,12 +2607,10 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; - next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); - /* not valid assert if bxe really does SGE from non-contiguous elements */ - MPASS(fl->ifl_cidx == cidx); bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); - if (unload) + + if (unload && irf->irf_len != 0) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) @@ -2667,6 +2682,7 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && + ri->iri_frags[0].irf_len != 0 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; @@ -2680,6 +2696,8 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); + if (m == NULL) + return (NULL); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; @@ -2780,7 +2798,7 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) - retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8); + retval |= iflib_fl_refill_all(ctx, fl); DBG_COUNTER_INC(rx_unavail); return (retval); } @@ -2836,7 +2854,7 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) - retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8); + retval |= iflib_fl_refill_all(ctx, fl); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) @@ -6702,6 +6720,9 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); + SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", + CTLFLAG_RD, + &fl->ifl_buf_size, 1, "buffer size"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, diff --git a/sys/net/iflib.h b/sys/net/iflib.h index 6b076ad9485b..d013a34ab4ac 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -95,7 +95,6 @@ typedef struct if_rxd_info { typedef struct if_rxd_update { uint64_t *iru_paddrs; - caddr_t *iru_vaddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; @@ -221,6 +220,9 @@ typedef struct if_softc_ctx { uint32_t isc_tx_qdepth; iflib_intr_mode_t isc_intr; + uint16_t isc_rxd_buf_size[8]; /* set at init time by driver, 0 + means use iflib-calculated size + based on isc_max_frame_size */ uint16_t isc_max_frame_size; /* set at init time by driver */ uint16_t isc_min_frame_size; /* set at init time by driver, only used if IFLIB_NEED_ETHER_PAD is set. */