Index: dev/e1000/if_igb.c =================================================================== --- dev/e1000/if_igb.c (revision 225402) +++ dev/e1000/if_igb.c (working copy) @@ -175,7 +175,6 @@ static int igb_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void igb_qflush(struct ifnet *); -static void igb_deferred_mq_start(void *, int); #else static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); @@ -934,8 +933,15 @@ err = igb_mq_start_locked(ifp, txr, m); IGB_TX_UNLOCK(txr); } else { - err = drbr_enqueue(ifp, txr->br, m); - taskqueue_enqueue(que->tq, &txr->txq_task); + err = drbr_enqueue_pending(ifp, txr->br, m); + if (err == EINPROGRESS) { + IGB_TX_LOCK(txr); + drbr_clear_pending(ifp, txr->br); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr, NULL); + IGB_TX_UNLOCK(txr); + err = 0; + } } return (err); @@ -996,22 +1002,6 @@ } /* - * Called from a taskqueue to drain queued transmit packets. - */ -static void -igb_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - - IGB_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr, NULL); - IGB_TX_UNLOCK(txr); -} - -/* ** Flush all ring buffers */ static void @@ -2231,7 +2221,6 @@ { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Turn off all interrupts */ @@ -2250,10 +2239,6 @@ return (ENXIO); } -#if __FreeBSD_version >= 800000 - TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); -#endif - /* * Try allocating a fast interrupt and the associated deferred * processing contexts. @@ -2326,10 +2311,7 @@ */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); -#if __FreeBSD_version >= 800000 - TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, - que->txr); -#endif + /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, @@ -2551,9 +2533,6 @@ for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { -#if __FreeBSD_version >= 800000 - taskqueue_drain(que->tq, &que->txr->txq_task); -#endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } Index: dev/e1000/if_igb.h =================================================================== --- dev/e1000/if_igb.h (revision 225402) +++ dev/e1000/if_igb.h (working copy) @@ -297,7 +297,6 @@ struct buf_ring *br; #endif bus_dma_tag_t txtag; - struct task txq_task; u32 bytes; u32 packets; Index: dev/e1000/if_em.c =================================================================== --- dev/e1000/if_em.c (revision 225402) +++ dev/e1000/if_em.c (working copy) @@ -934,8 +934,17 @@ if (EM_TX_TRYLOCK(txr)) { error = em_mq_start_locked(ifp, txr, m); EM_TX_UNLOCK(txr); - } else - error = drbr_enqueue(ifp, txr->br, m); + } else { + error = drbr_enqueue_pending(ifp, txr->br, m); + if (error == EINPROGRESS) { + EM_TX_LOCK(txr); + drbr_clear_pending(ifp, txr->br); + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr, NULL); + EM_TX_UNLOCK(txr); + error = 0; + } + } return (error); } Index: dev/cxgbe/t4_main.c =================================================================== --- dev/cxgbe/t4_main.c (revision 225402) +++ dev/cxgbe/t4_main.c (working copy) @@ -140,7 +140,6 @@ /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); -static void cxgbe_start(struct ifnet *); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); @@ -855,14 +854,9 @@ ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; - ifp->if_start = cxgbe_start; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; - ifp->if_snd.ifq_drv_maxlen = 1024; - IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); - IFQ_SET_READY(&ifp->if_snd); - ifp->if_capabilities = T4_CAP; ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO; @@ -1129,21 +1123,6 @@ return (rc); } -static void -cxgbe_start(struct ifnet *ifp) -{ - struct port_info *pi = ifp->if_softc; - struct sge_txq *txq; - int i; - - for_each_txq(pi, i, txq) { - if (TXQ_TRYLOCK(txq)) { - txq_start(ifp, txq); - TXQ_UNLOCK(txq); - } - } -} - static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { @@ -1165,13 +1144,26 @@ br = txq->br; if (TXQ_TRYLOCK(txq) == 0) { + /* - * XXX: make sure that this packet really is sent out. There is - * a small race where t4_eth_tx may stop draining the drbr and - * goes away, just before we enqueued this mbuf. + * It is possible that t4_eth_tx finishes up and releases the + * lock between the TRYLOCK above and the drbr_enqueue here. We + * need to make sure that this mbuf doesn't just sit there in + * the drbr. */ - return (drbr_enqueue(ifp, br, m)); + rc = drbr_enqueue_pending(ifp, br, m); + if (rc == EINPROGRESS) { + TXQ_LOCK(txq); + drbr_clear_pending(ifp, br); + if (!drbr_empty(ifp, br) || txq->m) { + m = txq->m ? txq->m : drbr_dequeue(ifp, br); + (void) t4_eth_tx(ifp, txq, m); + } + TXQ_UNLOCK(txq); + rc = 0; + } + return (rc); } /* Index: dev/cxgb/cxgb_sge.c =================================================================== --- dev/cxgb/cxgb_sge.c (revision 225402) +++ dev/cxgb/cxgb_sge.c (working copy) @@ -1706,16 +1725,22 @@ if (t3_encap(qs, &m_head) || m_head == NULL) break; m_head = NULL; } if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); +#if 1 + /* + * XXX: Should be handled by the ISR noticing TX completions + * and thus more room in the queue? + */ if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); +#endif if (m_head != NULL) m_freem(m_head); } @@ -1762,9 +1790,11 @@ if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) cxgb_start_locked(qs); +#if 0 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); +#endif return (0); } @@ -1790,24 +1824,21 @@ /* XXX running */ error = cxgb_transmit_locked(ifp, qs, m); TXQ_UNLOCK(qs); - } else - error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); + } else { + error = drbr_enqueue_pending(ifp, qs->txq[TXQ_ETH].txq_mr, m); + cxgb_tx_deferred++; + if (error == EINPROGRESS) { + TXQ_LOCK(qs); + drbr_clear_pending(ifp, qs->txq[TXQ_ETH].txq_mr); + if (!TXQ_RING_EMPTY(qs)) + cxgb_start_locked(qs); + TXQ_UNLOCK(qs); + error = 0; + } + } return (error); } -void -cxgb_start(struct ifnet *ifp) -{ - struct port_info *pi = ifp->if_softc; - struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; - - if (!pi->link_config.link_ok) - return; - TXQ_LOCK(qs); - cxgb_start_locked(qs); - TXQ_UNLOCK(qs); -} - void cxgb_qflush(struct ifnet *ifp) { Index: dev/cxgb/cxgb_main.c =================================================================== --- dev/cxgb/cxgb_main.c (revision 225402) +++ dev/cxgb/cxgb_main.c (working copy) @@ -1175,12 +1175,9 @@ ifp->if_softc = p; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; - ifp->if_start = cxgb_start; + ifp->if_transmit = cxgb_transmit; + ifp->if_qflush = cxgb_qflush; - ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen); - IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); - IFQ_SET_READY(&ifp->if_snd); - ifp->if_capabilities = CXGB_CAP; ifp->if_capenable = CXGB_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO; @@ -1195,8 +1192,6 @@ } ether_ifattach(ifp, p->hw_addr); - ifp->if_transmit = cxgb_transmit; - ifp->if_qflush = cxgb_qflush; #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) Index: dev/cxgb/cxgb_adapter.h =================================================================== --- dev/cxgb/cxgb_adapter.h (revision 225402) +++ dev/cxgb/cxgb_adapter.h (working copy) @@ -316,6 +316,7 @@ int idx; /* qset # */ int qs_flags; int coalescing; + int txq_thread_pending; struct cv qs_cv; struct mtx lock; #define QS_NAME_LEN 32 @@ -600,5 +601,4 @@ void cxgb_tx_watchdog(void *arg); int cxgb_transmit(struct ifnet *ifp, struct mbuf *m); void cxgb_qflush(struct ifnet *ifp); -void cxgb_start(struct ifnet *ifp); #endif Index: net/if_var.h =================================================================== --- net/if_var.h (revision 225402) +++ net/if_var.h (working copy) @@ -599,7 +599,25 @@ return (error); } +static __inline int +drbr_enqueue_pending(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m) +{ + int error; + + error = drbr_enqueue(ifp, br, m); + if (error == 0 && buf_ring_set_pending(br)) + error = EINPROGRESS; + return (error); +} + static __inline void +drbr_clear_pending(struct ifnet *ifp, struct buf_ring *br) +{ + + buf_ring_clear_pending(br); +} + +static __inline void drbr_flush(struct ifnet *ifp, struct buf_ring *br) { struct mbuf *m; Index: sys/buf_ring.h =================================================================== --- sys/buf_ring.h (revision 225402) +++ sys/buf_ring.h (working copy) @@ -49,10 +49,12 @@ uint64_t br_drops; uint64_t br_prod_bufs; uint64_t br_prod_bytes; + int br_thread_pending; /* * Pad out to next L2 cache line */ - uint64_t _pad0[11]; + uint32_t _pad0[1]; + uint64_t _pad1[10]; volatile uint32_t br_cons_head; volatile uint32_t br_cons_tail; @@ -62,7 +64,7 @@ /* * Pad out to next L2 cache line */ - uint64_t _pad1[14]; + uint64_t _pad2[14]; #ifdef DEBUG_BUFRING struct mtx *br_lock; #endif @@ -130,6 +132,20 @@ return (buf_ring_enqueue_bytes(br, buf, 0)); } +static __inline int +buf_ring_set_pending(struct buf_ring *br) +{ + + return (atomic_cmpset_int(&br->br_thread_pending, 0, 1)); +} + +static __inline void +buf_ring_clear_pending(struct buf_ring *br) +{ + + atomic_store_rel_int(&br->br_thread_pending, 0); +} + /* * multi-consumer safe dequeue *