Index: sys/sys/buf_ring.h =================================================================== --- sys/sys/buf_ring.h (revision 244608) +++ sys/sys/buf_ring.h (working copy) @@ -193,9 +193,10 @@ #ifdef PREFETCH_DEFINED if (cons_next != prod_tail) { - prefetch(br->br_ring[cons_next]); + prefetch(br->br_ring[cons_next], PRFTCH_RD, PRFTCH_L3); if (cons_next_next != prod_tail) - prefetch(br->br_ring[cons_next_next]); + prefetch(br->br_ring[cons_next_next], PRFTCH_RD, + PRFTCH_L3); } #endif br->br_cons_head = cons_next; Index: sys/sys/cdefs.h =================================================================== --- sys/sys/cdefs.h (revision 244608) +++ sys/sys/cdefs.h (working copy) @@ -229,6 +229,18 @@ #define __alignof(x) __offsetof(struct { char __a; x __b; }, __b) #endif +#if defined(__GNUC__) && __GNUC_PREREQ__(3, 0) +#define PRFTCH_RD 0 +#define PRFTCH_WR 1 + +#define PRFTCH_L0 0 +#define PRFTCH_L1 1 +#define PRFTCH_L2 2 +#define PRFTCH_L3 3 + +#define prefetch(x, y, z) __builtin_prefetch(x, y, z) +#endif + /* * Keywords added in C11. */ Index: sys/ofed/include/linux/list.h =================================================================== --- sys/ofed/include/linux/list.h (revision 244608) +++ sys/ofed/include/linux/list.h (working copy) @@ -61,6 +61,9 @@ #include #include +#ifdef prefetch +#undef prefetch +#endif #define prefetch(x) struct list_head { Index: sys/dev/e1000/e1000_osdep.h =================================================================== --- sys/dev/e1000/e1000_osdep.h (revision 244608) +++ sys/dev/e1000/e1000_osdep.h (working copy) @@ -119,16 +119,6 @@ #endif #endif /*__FreeBSD_version < 800000 */ -#if defined(__i386__) || defined(__amd64__) -static __inline -void prefetch(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} -#else -#define prefetch(x) -#endif - struct e1000_osdep { bus_space_tag_t mem_bus_space_tag; Index: sys/dev/cxgbe/t4_sge.c =================================================================== --- sys/dev/cxgbe/t4_sge.c (revision 244608) +++ sys/dev/cxgbe/t4_sge.c (working copy) @@ -1300,8 +1300,8 @@ KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH, ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); - prefetch(&eq->desc[eq->pidx]); - prefetch(&txq->sdesc[eq->pidx]); + cxgbe_prefetch(&eq->desc[eq->pidx]); + cxgbe_prefetch(&txq->sdesc[eq->pidx]); txpkts.npkt = 0;/* indicates there's nothing in txpkts */ coalescing = 0; @@ -3373,7 +3373,7 @@ txmaps = &txq->txmaps; txm = &txmaps->maps[txmaps->map_cidx]; if (maps) - prefetch(txm->m); + cxgbe_prefetch(txm->m); eq->avail += reclaimed; KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ @@ -3389,7 +3389,7 @@ next = txm + 1; if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total)) next = txmaps->maps; - prefetch(next->m); + cxgbe_prefetch(next->m); bus_dmamap_unload(txq->tx_tag, txm->map); m_freem(txm->m); Index: sys/dev/cxgbe/adapter.h =================================================================== --- sys/dev/cxgbe/adapter.h (revision 244608) +++ sys/dev/cxgbe/adapter.h (working copy) @@ -57,15 +57,7 @@ #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) -#if defined(__i386__) || defined(__amd64__) -static __inline void -prefetch(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} -#else -#define prefetch(x) -#endif +#define cxgbe_prefetch(x) prefetch(x, PRFTCH_RD, PRFTCH_L3) #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD Index: sys/dev/netmap/ixgbe_netmap.h =================================================================== --- sys/dev/netmap/ixgbe_netmap.h (revision 244608) +++ sys/dev/netmap/ixgbe_netmap.h (working copy) @@ -263,9 +263,9 @@ */ j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ - prefetch(&ring->slot[j]); + ixgbe_prefetch(&ring->slot[j]); l = netmap_idx_k2n(kring, j); /* NIC index */ - prefetch(&txr->tx_buffers[l]); + ixgbe_prefetch(&txr->tx_buffers[l]); for (n = 0; j != k; n++) { /* * Collect per-slot info. @@ -294,8 +294,8 @@ j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; - prefetch(&ring->slot[j]); - prefetch(&txr->tx_buffers[l]); + ixgbe_prefetch(&ring->slot[j]); + ixgbe_prefetch(&txr->tx_buffers[l]); /* * Quick check for valid addr and len. Index: sys/dev/netmap/netmap.c =================================================================== --- sys/dev/netmap/netmap.c (revision 244608) +++ sys/dev/netmap/netmap.c (working copy) @@ -158,7 +158,6 @@ #include #include #endif /* __FreeBSD__ */ -#define prefetch(x) __builtin_prefetch(x) #endif /* !linux */ static void bdg_netmap_attach(struct ifnet *ifp); @@ -2133,7 +2132,7 @@ int len = ft[ft_i].len = slot->len; char *buf = ft[ft_i].buf = NMB(slot); - prefetch(buf); + prefetch(buf, PRFTCH_RD, PRFTCH_L3); if (unlikely(len < 14)) continue; if (unlikely(++ft_i == netmap_bridge)) Index: sys/dev/sfxge/common/efsys.h =================================================================== --- sys/dev/sfxge/common/efsys.h (revision 244608) +++ sys/dev/sfxge/common/efsys.h (working copy) @@ -95,61 +95,20 @@ /* Memory type to use on FreeBSD */ MALLOC_DECLARE(M_SFXGE); -/* Machine dependend prefetch wrappers */ -#if defined(__i386__) || defined(__amd64__) static __inline void prefetch_read_many(void *addr) { - __asm__( - "prefetcht0 (%0)" - : - : "r" (addr)); + prefetch(addr, PRFTCH_RD, PRFTCH_L3); } static __inline void prefetch_read_once(void *addr) { - __asm__( - "prefetchnta (%0)" - : - : "r" (addr)); + prefetch(addr, PRFTCH_RD, PRFTCH_L0); } -#elif defined(__sparc64__) -static __inline void -prefetch_read_many(void *addr) -{ - __asm__( - "prefetch [%0], 0" - : - : "r" (addr)); -} - -static __inline void -prefetch_read_once(void *addr) -{ - - __asm__( - "prefetch [%0], 1" - : - : "r" (addr)); -} -#else -static __inline void -prefetch_read_many(void *addr) -{ - -} - -static __inline void -prefetch_read_once(void *addr) -{ - -} -#endif - #if defined(__i386__) || defined(__amd64__) #include #include Index: sys/dev/bxe/if_bxe.h =================================================================== --- sys/dev/bxe/if_bxe.h (revision 244608) +++ sys/dev/bxe/if_bxe.h (working copy) @@ -1806,16 +1806,10 @@ #define mb() __asm volatile("mfence" ::: "memory") #define wmb() __asm volatile("sfence" ::: "memory") #define rmb() __asm volatile("lfence" ::: "memory") -static __inline void -prefetch(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} #else #define mb() #define rmb() #define wmb() -#define prefetch() #endif #endif Index: sys/dev/ixgbe/ixgbe.c =================================================================== --- sys/dev/ixgbe/ixgbe.c (revision 244608) +++ sys/dev/ixgbe/ixgbe.c (working copy) @@ -3654,7 +3654,7 @@ buf = txr->tx_buffers; txd = txr->tx_base; } - prefetch(txd); + ixgbe_prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, @@ -4447,7 +4447,7 @@ nextp = 0; } nbuf = &rxr->rx_buffers[nextp]; - prefetch(nbuf); + ixgbe_prefetch(nbuf); } /* ** Rather than using the fmp/lmp global pointers Index: sys/dev/ixgbe/ixv.c =================================================================== --- sys/dev/ixgbe/ixv.c (revision 244608) +++ sys/dev/ixgbe/ixv.c (working copy) @@ -3367,7 +3367,7 @@ if (nextp == adapter->num_rx_desc) nextp = 0; nbuf = &rxr->rx_buffers[nextp]; - prefetch(nbuf); + ixgbe_prefetch(nbuf); } /* ** The header mbuf is ONLY used when header Index: sys/dev/ixgbe/ixgbe_osdep.h =================================================================== --- sys/dev/ixgbe/ixgbe_osdep.h (revision 244608) +++ sys/dev/ixgbe/ixgbe_osdep.h (working copy) @@ -138,15 +138,7 @@ #endif #endif -#if defined(__i386__) || defined(__amd64__) -static __inline -void prefetch(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} -#else -#define prefetch(x) -#endif +#define ixgbe_prefetch(x) prefetch(x, PRFTCH_RD, PRFTCH_L3) /* * Optimized bcopy thanks to Luigi Rizzo's investigative work. Assumes Index: sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h =================================================================== --- sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h (revision 244608) +++ sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h (working copy) @@ -3,7 +3,7 @@ /* $FreeBSD$ */ -#undef prefetch +#undef cxgb_prefetch #undef WARN_ON #undef max_t #undef udelay Index: sys/dev/cxgb/cxgb_sge.c =================================================================== --- sys/dev/cxgb/cxgb_sge.c (revision 244608) +++ sys/dev/cxgb/cxgb_sge.c (working copy) @@ -1381,7 +1381,7 @@ txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; - prefetch(txd); + cxgb_prefetch(txd); m0 = *m; mtx_assert(&qs->lock, MA_OWNED); @@ -2139,8 +2139,8 @@ mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { - prefetch(q->sdesc[(cidx + 1) & mask].m); - prefetch(q->sdesc[(cidx + 2) & mask].m); + cxgb_prefetch(q->sdesc[(cidx + 1) & mask].m); + cxgb_prefetch(q->sdesc[(cidx + 2) & mask].m); if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { @@ -2700,10 +2700,10 @@ int ret = 0; mask = fl->size - 1; - prefetch(fl->sdesc[(cidx + 1) & mask].m); - prefetch(fl->sdesc[(cidx + 2) & mask].m); - prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); - prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); + cxgb_prefetch(fl->sdesc[(cidx + 1) & mask].m); + cxgb_prefetch(fl->sdesc[(cidx + 2) & mask].m); + cxgb_prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); + cxgb_prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); Index: sys/dev/cxgb/cxgb_osdep.h =================================================================== --- sys/dev/cxgb/cxgb_osdep.h (revision 244608) +++ sys/dev/cxgb/cxgb_osdep.h (working copy) @@ -121,12 +121,6 @@ */ #if defined(__i386__) || defined(__amd64__) -static __inline -void prefetch(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} - #define smp_mb() mb() #define L1_CACHE_BYTES 128 @@ -141,10 +135,11 @@ #else #define smp_mb() -#define prefetch(x) #define L1_CACHE_BYTES 32 #endif +#define cxgb_prefetch(x) prefetch(x, PRFTCH_RD, PRFTCH_L3) + #define DBG_RX (1 << 0) static const int debug_flags = DBG_RX; Index: sys/dev/cxgb/sys/mvec.h =================================================================== --- sys/dev/cxgb/sys/mvec.h (revision 244608) +++ sys/dev/cxgb/sys/mvec.h (working copy) @@ -58,7 +58,7 @@ while (m != NULL) { n = m->m_nextpkt; if (n != NULL) - prefetch(n); + cxgb_prefetch(n); m_freem(m); m = n; }