diff -r 4b083efde4bb sys/dev/cxgbe/t4_main.c --- a/sys/dev/cxgbe/t4_main.c Thu Feb 26 14:22:27 2015 +0000 +++ b/sys/dev/cxgbe/t4_main.c Fri Feb 27 07:59:30 2015 -0800 @@ -591,7 +591,9 @@ t4_attach(device_t dev) if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; - pci_set_max_read_req(dev, 4096); +#if 0 + pci_set_max_read_req(dev, 2048); +#endif v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); diff -r 4b083efde4bb sys/dev/cxgbe/t4_netmap.c --- a/sys/dev/cxgbe/t4_netmap.c Thu Feb 26 14:22:27 2015 +0000 +++ b/sys/dev/cxgbe/t4_netmap.c Fri Feb 27 07:59:30 2015 -0800 @@ -58,6 +58,9 @@ extern int fl_pad; /* XXXNM */ extern int spg_len; /* XXXNM */ extern int fl_pktshift; /* XXXNM */ +int rx_blackhole = 0; +TUNABLE_INT("hw.cxgbe.nm_rx_blackhole", &rx_blackhole); + /* netmap ifnet routines */ static void cxgbe_nm_init(void *); static int cxgbe_nm_ioctl(struct ifnet *, unsigned long, caddr_t); @@ -269,7 +272,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, s c.iqaddr = htobe64(nm_rxq->iq_ba); if (cong >= 0) { c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | - V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | + V_FW_IQ_CMD_FL0CNGCHMAP(0) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.iqns_to_fl0congen |= @@ -491,13 +494,13 @@ cxgbe_netmap_on(struct adapter *sc, stru /* We deal with 8 bufs at a time */ MPASS((na->num_rx_desc & 7) == 0); MPASS(na->num_rx_desc == nm_rxq->fl_sidx); - for (j = 0; j < nm_rxq->fl_sidx - 8; j++) { + for (j = 0; j < nm_rxq->fl_sidx; j++) { uint64_t ba; PNMB(na, &slot[j], &ba); nm_rxq->fl_desc[j] = htobe64(ba | hwidx); } - nm_rxq->fl_pidx = j; + j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; MPASS((j & 7) == 0); j /= 8; /* driver pidx to hardware pidx */ wmb(); @@ -903,6 +906,12 @@ cxgbe_netmap_rxsync(struct netmap_kring u_int const head = nm_rxsync_prologue(kring); u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; +#ifdef KTR + u_int k1 = kring->nr_hwtail; +#endif + + if (rx_blackhole) + return (0); /* No updates ever. */ if (netmap_no_pendintr || force_update) { kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); @@ -912,6 +921,9 @@ cxgbe_netmap_rxsync(struct netmap_kring /* Userspace done with buffers from kring->nr_hwcur to head */ n = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; + CTR5(KTR_CXGBE, "%s: rxb_new_seen %u (%u - %u), rxb_read %u", __func__, + IDXDIFF(kring->nr_hwtail, k1, kring->nkr_num_slots), k1, + kring->nr_hwtail, n); n &= ~7U; if (n > 0) { u_int fl_pidx = nm_rxq->fl_pidx; @@ -1095,6 +1107,7 @@ handle_nm_fw6_msg(struct adapter *sc, st netmap_tx_irq(ifp, nm_txq->nid); } +int foo = 256; void t4_nm_intr(void *arg) { @@ -1107,10 +1120,10 @@ t4_nm_intr(void *arg) struct netmap_ring *ring = kring->ring; struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; uint32_t lq; - u_int n = 0; - int processed = 0; + u_int n = 0, work = 0; uint8_t opcode; uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); + u_int fl_credits = fl_cidx & 7; while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { @@ -1140,6 +1153,7 @@ t4_nm_intr(void *arg) ring->slot[fl_cidx].flags = kring->nkr_slot_flags; if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) fl_cidx = 0; + fl_credits++; break; default: panic("%s: unexpected opcode 0x%x on nm_rxq %p", @@ -1164,17 +1178,51 @@ t4_nm_intr(void *arg) nm_rxq->iq_gen ^= F_RSPD_GEN; } - if (__predict_false(++n == 64)) { /* XXXNM: tune */ + if (__predict_false(++n == foo)) { /* XXXNM: tune */ + atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); + if (rx_blackhole && fl_credits >= 8) { + fl_credits /= 8; + IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, + nm_rxq->fl_sidx); + + CTR4(KTR_CXGBE, + "%s: nm_rxq %p fl_pidx += %u (now %u)", + __func__, nm_rxq, fl_credits * 8, + nm_rxq->fl_pidx); + + t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), + nm_rxq->fl_db_val | V_PIDX(fl_credits)); + fl_credits = fl_cidx & 7; + } else if (!rx_blackhole) { + netmap_rx_irq(ifp, nm_rxq->nid, &work); + MPASS(work != 0); + } + + CTR4(KTR_CXGBE, "%s: nm_rxq %p iq_cidx += %u (now %u)", + __func__, nm_rxq, n, nm_rxq->iq_cidx); + t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) | V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); n = 0; } } - if (fl_cidx != nm_rxq->fl_cidx) { - atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); - netmap_rx_irq(ifp, nm_rxq->nid, &processed); - } + + atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); + if (rx_blackhole) { + fl_credits /= 8; + IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); + + CTR4(KTR_CXGBE, "%s: nm_rxq %p fl_pidx += %u (now %u)", + __func__, nm_rxq, fl_credits * 8, nm_rxq->fl_pidx); + + t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), + nm_rxq->fl_db_val | V_PIDX(fl_credits)); + } else + netmap_rx_irq(ifp, nm_rxq->nid, &work); + + CTR4(KTR_CXGBE, "%s: nm_rxq %p iq_cidx += %u (now %u) ARMED", + __func__, nm_rxq, n, nm_rxq->iq_cidx); t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) | V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(1)));