From 8c4ea03d56d4fa55089c0978eb6833c6b3bfc680 Mon Sep 17 00:00:00 2001 From: Kristof Provost Date: Thu, 26 Mar 2020 20:16:08 +0100 Subject: [PATCH 3/3] bridge: epoch-ification Run the bridge datapath under epoch, rather than under the BRIDGE_LOCK(). We still take the BRIDGE_LOCK() whenever we insert or delete items in the relevant lists, but we use epoch callbacks to free items so that it's safe to iterate the lists without the BRIDGE_LOCK. Tests on mercat5/6 shows this increases bridge throughput significantly, from 3.7Mpps to 18.6Mpps. Sponsored by: The FreeBSD Foundation --- sys/net/if_bridge.c | 175 ++++++++++++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 56 deletions(-) diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 84786875889..83c453090bb 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -237,6 +237,8 @@ struct bridge_iflist { uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ + + struct epoch_context bif_epoch_ctx; }; /* @@ -250,6 +252,9 @@ struct bridge_rtnode { uint8_t brt_flags; /* address flags */ uint8_t brt_addr[ETHER_ADDR_LEN]; uint16_t brt_vlan; /* vlan id */ + + struct vnet *brt_vnet; + struct epoch_context brt_epoch_ctx; }; #define brt_ifp brt_dst->bif_ifp @@ -276,6 +281,8 @@ struct bridge_softc { uint32_t sc_brtexceeded; /* # of cache drops */ struct ifnet *sc_ifaddr; /* member mac copied from */ struct ether_addr sc_defaddr; /* Default MAC address */ + + struct epoch_context sc_epoch_ctx; }; VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx); @@ -595,6 +602,11 @@ vnet_bridge_uninit(const void *unused __unused) if_clone_detach(V_bridge_cloner); V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); + + /* Before we can destroy the uma zone, because there are callbacks that + * use it. */ + NET_EPOCH_WAIT(); + uma_zdestroy(V_bridge_rtnode_zone); } VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, @@ -757,6 +769,17 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) return (0); } +static void +bridge_clone_destroy_cb(struct epoch_context *ctx) +{ + struct bridge_softc *sc; + + sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx); + + BRIDGE_LOCK_DESTROY(sc); + free(sc, M_DEVBUF); +} + /* * bridge_clone_destroy: * @@ -795,8 +818,7 @@ bridge_clone_destroy(struct ifnet *ifp) ether_ifdetach(ifp); if_free(ifp); - BRIDGE_LOCK_DESTROY(sc); - free(sc, M_DEVBUF); + epoch_call(net_epoch_preempt, &sc->sc_epoch_ctx, bridge_clone_destroy_cb); } /* @@ -822,6 +844,9 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0, oldmtu; + struct epoch_tracker et; + + NET_EPOCH_ENTER_ET(et); switch (cmd) { @@ -943,6 +968,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } + NET_EPOCH_EXIT_ET(et); + return (error); } @@ -957,6 +984,8 @@ bridge_mutecaps(struct bridge_softc *sc) struct bridge_iflist *bif; int enabled, mask; + BRIDGE_LOCK_ASSERT(sc); + /* Initial bitmask of capabilities to test */ mask = BRIDGE_IFCAPS_MASK; @@ -1018,7 +1047,7 @@ bridge_lookup_member(struct bridge_softc *sc, const char *name) struct bridge_iflist *bif; struct ifnet *ifp; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; @@ -1039,7 +1068,7 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { struct bridge_iflist *bif; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) @@ -1049,6 +1078,16 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) return (NULL); } +static void +bridge_delete_member_cb(struct epoch_context *ctx) +{ + struct bridge_iflist *bif; + + bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx); + + free(bif, M_DEVBUF); +} + /* * bridge_delete_member: * @@ -1129,7 +1168,9 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, } bstp_destroy(&bif->bif_stp); /* prepare to free */ BRIDGE_LOCK(sc); - free(bif, M_DEVBUF); + + epoch_call(net_epoch_preempt, &bif->bif_epoch_ctx, + bridge_delete_member_cb); } /* @@ -1146,7 +1187,9 @@ bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) ("%s: not a span interface", __func__)); CK_LIST_REMOVE(bif, bif_next); - free(bif, M_DEVBUF); + + epoch_call(net_epoch_preempt, &bif->bif_epoch_ctx, + bridge_delete_member_cb); } static int @@ -1529,12 +1572,17 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) struct bridge_iflist *bif; int error; + MPASS(in_epoch(net_epoch_preempt)); + bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) return (ENOENT); + /* bridge_rtupdate() may acquire the lock. */ + BRIDGE_UNLOCK(sc); error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); + BRIDGE_LOCK(sc); return (error); } @@ -1873,6 +1921,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; + struct epoch_tracker et; if (ifp->if_flags & IFF_RENAMING) return; @@ -1883,6 +1932,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) */ return; } + NET_EPOCH_ENTER_ET(et); /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); @@ -1892,6 +1942,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); + NET_EPOCH_EXIT_ET(et); return; } @@ -1908,6 +1959,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) BRIDGE_UNLOCK(sc); } BRIDGE_LIST_UNLOCK(); + NET_EPOCH_EXIT_ET(et); } /* @@ -2067,6 +2119,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct bridge_softc *sc; uint16_t vlan; + MPASS(in_epoch(net_epoch_preempt)); + if (m->m_len < ETHER_HDR_LEN) { m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) @@ -2077,8 +2131,6 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, sc = ifp->if_bridge; vlan = VLANTAGOF(m); - BRIDGE_LOCK(sc); - /* * If bridge is down, but the original output interface is up, * go ahead and send out that interface. Otherwise, the packet @@ -2100,16 +2152,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, if (dst_if == NULL) { struct bridge_iflist *bif; struct mbuf *mc; - int error = 0, used = 0; + int used = 0; bridge_span(sc, m); - BRIDGE_LOCK2REF(sc, error); - if (error) { - m_freem(m); - return (0); - } - CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { dst_if = bif->bif_ifp; @@ -2143,7 +2189,6 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, } if (used == 0) m_freem(m); - BRIDGE_UNREF(sc); return (0); } @@ -2155,11 +2200,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, bridge_span(sc, m); if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); - BRIDGE_UNLOCK(sc); return (0); } - BRIDGE_UNLOCK(sc); bridge_enqueue(sc, dst_if, m); return (0); } @@ -2184,10 +2227,8 @@ bridge_transmit(struct ifnet *ifp, struct mbuf *m) eh = mtod(m, struct ether_header *); - BRIDGE_LOCK(sc); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { - BRIDGE_UNLOCK(sc); error = bridge_enqueue(sc, dst_if, m); } else bridge_broadcast(sc, ifp, m, 0); @@ -2221,6 +2262,8 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, uint8_t *dst; int error; + MPASS(in_epoch(net_epoch_preempt)); + src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; @@ -2299,12 +2342,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, || PFIL_HOOKED(&V_inet6_pfil_hook) #endif ) { - BRIDGE_UNLOCK(sc); if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; if (m == NULL) return; - BRIDGE_LOCK(sc); } if (dst_if == NULL) { @@ -2332,8 +2373,6 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; - BRIDGE_UNLOCK(sc); - if (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) @@ -2349,7 +2388,6 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, return; drop: - BRIDGE_UNLOCK(sc); m_freem(m); } @@ -2370,6 +2408,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) uint16_t vlan; int error; + MPASS(in_epoch(net_epoch_preempt)); + if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (m); @@ -2390,10 +2430,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) m_freem(m); return (NULL); } - BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { - BRIDGE_UNLOCK(sc); return (m); } @@ -2406,13 +2444,11 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if (memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */ - BRIDGE_UNLOCK(sc); return (NULL); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { - BRIDGE_UNLOCK(sc); return (m); } @@ -2423,7 +2459,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) */ mc = m_dup(m, M_NOWAIT); if (mc == NULL) { - BRIDGE_UNLOCK(sc); return (m); } @@ -2455,7 +2490,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { - BRIDGE_UNLOCK(sc); return (m); } @@ -2495,7 +2529,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ - BRIDGE_UNLOCK(sc); \ return (NULL); \ } \ eh = mtod(m, struct ether_header *); \ @@ -2505,13 +2538,11 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ - BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } \ } \ m->m_pkthdr.rcvif = iface; \ - BRIDGE_UNLOCK(sc); \ return (m); \ } \ \ @@ -2519,7 +2550,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_SRC((iface)) \ ) { \ - BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } @@ -2570,15 +2600,11 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct bridge_iflist *dbif, *sbif; struct mbuf *mc; struct ifnet *dst_if; - int error = 0, used = 0, i; + int used = 0, i; - sbif = bridge_lookup_member_if(sc, src_if); + MPASS(in_epoch(net_epoch_preempt)); - BRIDGE_LOCK2REF(sc, error); - if (error) { - m_freem(m); - return; - } + sbif = bridge_lookup_member_if(sc, src_if); /* Filter on the bridge interface before broadcasting */ if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) @@ -2587,9 +2613,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) - goto out; + return; if (m == NULL) - goto out; + return; } CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) { @@ -2652,9 +2678,6 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, } if (used == 0) m_freem(m); - -out: - BRIDGE_UNREF(sc); } /* @@ -2670,6 +2693,8 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) struct ifnet *dst_if; struct mbuf *mc; + MPASS(in_epoch(net_epoch_preempt)); + if (CK_LIST_EMPTY(&sc->sc_spanlist)) return; @@ -2701,7 +2726,8 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, struct bridge_rtnode *brt; int error; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); + BRIDGE_UNLOCK_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || @@ -2718,13 +2744,24 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { + BRIDGE_LOCK(sc); + + /* Check again, now that we have the lock. There could have + * been a race and we only want to insert this once. */ + if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) != NULL) { + BRIDGE_UNLOCK(sc); + return (0); + } + if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; + BRIDGE_UNLOCK(sc); return (ENOSPC); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; + BRIDGE_UNLOCK(sc); return (ENOSPC); } @@ -2734,8 +2771,11 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, * address. */ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO); - if (brt == NULL) + if (brt == NULL) { + BRIDGE_UNLOCK(sc); return (ENOMEM); + } + brt->brt_vnet = curvnet; if (bif->bif_flags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; @@ -2747,17 +2787,22 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(V_bridge_rtnode_zone, brt); + BRIDGE_UNLOCK(sc); return (error); } brt->brt_dst = bif; bif->bif_addrcnt++; + + BRIDGE_UNLOCK(sc); } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && brt->brt_dst != bif) { + BRIDGE_LOCK(sc); brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; + BRIDGE_UNLOCK(sc); } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) @@ -2778,7 +2823,7 @@ bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); @@ -3017,7 +3062,7 @@ bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan uint32_t hash; int dir; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); hash = bridge_rthash(sc, addr); CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { @@ -3080,6 +3125,18 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) return (0); } +static void +bridge_rtnode_destroy_cb(struct epoch_context *ctx) +{ + struct bridge_rtnode *brt; + + brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx); + + CURVNET_SET(brt->brt_vnet); + uma_zfree(V_bridge_rtnode_zone, brt); + CURVNET_RESTORE(); +} + /* * bridge_rtnode_destroy: * @@ -3095,7 +3152,9 @@ bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) CK_LIST_REMOVE(brt, brt_list); sc->sc_brtcnt--; brt->brt_dst->bif_addrcnt--; - uma_zfree(V_bridge_rtnode_zone, brt); + + epoch_call(net_epoch_preempt, &brt->brt_epoch_ctx, + bridge_rtnode_destroy_cb); } /* @@ -3640,17 +3699,20 @@ bridge_linkstate(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; + struct epoch_tracker et; + + NET_EPOCH_ENTER_ET(et); - BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { - BRIDGE_UNLOCK(sc); + NET_EPOCH_EXIT_ET(et); return; } bridge_linkcheck(sc); - BRIDGE_UNLOCK(sc); bstp_linkstate(&bif->bif_stp); + + NET_EPOCH_EXIT_ET(et); } static void @@ -3659,7 +3721,8 @@ bridge_linkcheck(struct bridge_softc *sc) struct bridge_iflist *bif; int new_link, hasls; - BRIDGE_LOCK_ASSERT(sc); + MPASS(in_epoch(net_epoch_preempt)); + new_link = LINK_STATE_DOWN; hasls = 0; /* Our link is considered up if at least one of our ports is active */ -- 2.25.2