/*- * Copyright (c) 2017 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ND6W, "nd6w", "ND6 watcher"); LIST_HEAD(nd6w_list, nd6w_entry); struct nd6w_entry { struct in6_addr addr; /* requested IPv6 address */ struct ifnet *ifp; /* outbound interface */ uint16_t index; /* index in the array of entries */ uint8_t expire; /* retry count */ LIST_ENTRY(nd6w_entry) drainq; /* used by expire timer */ LIST_ENTRY(nd6w_entry) chain; /* used by worker thread */ LIST_ENTRY(nd6w_entry) hash; /* used by pfil handler */ }; #define ND6W_EXPIRE 5 #define ND6W_COUNT 4096 struct nd6w_entry entries[ND6W_COUNT]; /* preallocated array of entries */ uint64_t emask[ND6W_COUNT / NBBY / sizeof(uint64_t)]; static struct nd6w_list nd6w_wq; static struct nd6w_list *nd6w_hashtbl; static u_long nd6w_hashmask; #define ADDRHASH_NHASH_LOG2 7 #define ADDRHASH_NHASH (1 << ADDRHASH_NHASH_LOG2) #define ADDRHASH_HASHVAL(addr) (nd6w_addrhash(addr) & nd6w_hashmask) #define ADDRHASH_HASH(addr) &nd6w_hashtbl[ADDRHASH_HASHVAL(addr)] static struct mtx hash_lock; #define ND6W_HASH_LOCK_INIT() mtx_init(&hash_lock, \ "nd6w_hash_lock", "ND6 watcher hash lock", MTX_DEF) #define ND6W_HASH_LOCK_DESTROY() mtx_destroy(&hash_lock) #define ND6W_HASH_LOCK() mtx_lock(&hash_lock) #define ND6W_HASH_UNLOCK() mtx_unlock(&hash_lock) #define ND6W_HASH_LOCK_ASSERT() mtx_assert(&hash_lock, MA_OWNED) static volatile int wq_running = 0; static struct proc *wq_proc = NULL; static struct mtx wq_lock; #define ND6W_WQ_LOCK_INIT() mtx_init(&wq_lock, "nd6w_wq_lock", \ "ND6 watcher work queue lock", MTX_DEF) #define ND6W_WQ_LOCK_DESTROY() mtx_destroy(&wq_lock) #define ND6W_WQ_LOCK() mtx_lock(&wq_lock) #define ND6W_WQ_UNLOCK() mtx_unlock(&wq_lock) #define ND6W_WQ_LOCK_ASSERT() mtx_assert(&wq_lock, MA_OWNED) #define ND6W_WQ_SLEEP() mtx_sleep(&nd6w_wq, &wq_lock, 0, \ "nd6wq", hz) static volatile int departure_occurred = 0; static eventhandler_tag nd6w_ifp_cookie = NULL; static struct mtx ifp_lock; #define ND6W_IFP_LOCK_INIT() mtx_init(&ifp_lock, "nd6w_ifp_lock", \ "ND6 watcher ifnet departure lock", MTX_DEF) #define ND6W_IFP_LOCK_DESTROY() mtx_destroy(&ifp_lock) #define ND6W_IFP_LOCK() mtx_lock(&ifp_lock) #define ND6W_IFP_UNLOCK() mtx_unlock(&ifp_lock) #define ND6W_IFP_LOCK_ASSERT() mtx_assert(&ifp_lock, MA_OWNED) static int debug_level = 0; static int count = 0; #define DBG(arg) do { if (debug_level != 0) printf arg; } while (0) SYSCTL_DECL(_net_inet6_icmp6); SYSCTL_NODE(_net_inet6_icmp6, OID_AUTO, nd6watcher, CTLFLAG_RW, 0, "ND6 watcher"); SYSCTL_INT(_net_inet6_icmp6_nd6watcher, OID_AUTO, debug, CTLFLAG_RW, &debug_level, 0, "Enable debug output"); SYSCTL_INT(_net_inet6_icmp6_nd6watcher, OID_AUTO, count, CTLFLAG_RD, &count, 0, "Number of active entries"); static uint32_t nd6w_addrhash(const struct in6_addr *addr) { return (fnv_32_buf(addr, sizeof(*addr), FNV1_32_INIT)); } static struct nd6w_entry * nd6w_get_entry(void) { u_int i, n; ND6W_HASH_LOCK_ASSERT(); for (i = 0; i < nitems(emask); i++) { if ((n = ffsll(emask[i])) != 0) { emask[i] &= ~(1 << (n - 1)); /* mark as used */ n += i * NBBY * sizeof(uint64_t) - 1; memset(&entries[n], 0, sizeof(entries[0])); entries[n].index = n; return (&entries[n]); } } return (NULL); } static void nd6w_flush_hash(void) { ND6W_IFP_LOCK_ASSERT(); departure_occurred = 0; ND6W_HASH_LOCK(); memset(emask, 0xff, sizeof(emask)); count = 0; ND6W_HASH_UNLOCK(); DBG(("%s: hash table was flused.\n", __func__)); } static void nd6w_register_request(struct in6_addr *addr, struct ifnet *ifp) { char saddr[INET6_ADDRSTRLEN]; struct nd6w_entry *entry; /* * Check in the hash table, that this address is not queued yet. */ ND6W_HASH_LOCK(); LIST_FOREACH(entry, ADDRHASH_HASH(addr), hash) { if (IN6_ARE_ADDR_EQUAL(&entry->addr, addr) && entry->ifp == ifp) { ND6W_HASH_UNLOCK(); DBG(("%s: %s already queued on %s\n", __func__, inet_ntop(AF_INET6, addr, saddr, sizeof(saddr)), if_name(ifp))); return; /* already queued */ } } entry = nd6w_get_entry(); if (entry == NULL) { ND6W_HASH_UNLOCK(); return; } memcpy(&entry->addr, addr, sizeof(*addr)); entry->ifp = ifp; entry->expire = ND6W_EXPIRE; count++; LIST_INSERT_HEAD(ADDRHASH_HASH(addr), entry, hash); ND6W_HASH_UNLOCK(); DBG(("%s: %s queued on %s\n", __func__, inet_ntop(AF_INET6, addr, saddr, sizeof(saddr)), if_name(ifp))); ND6W_WQ_LOCK(); LIST_INSERT_HEAD(&nd6w_wq, entry, chain); ND6W_WQ_UNLOCK(); wakeup(&nd6w_wq); } /* * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, * then it sets p to point at the offset "len" in the mbuf. WARNING: the * pointer might become stale after other pullups (but we never use it * this way). */ #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) #define PULLUP_LEN(_len, p, T) \ do { \ int x = (_len) + T; \ if ((m)->m_pkthdr.len < x) { \ m_freem(m), *m0 = NULL; \ return (EACCES); \ } \ if ((m)->m_len < x) { \ *m0 = m = m_pullup(m, x); \ if (m == NULL) \ return (ENOBUFS); \ } \ p = mtodo(m, (_len)); \ } while (0) static int nd6w_pfil_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) { struct ether_header *eh; struct ip6_hdr *ip6; struct llentry *lle; struct mbuf *m; m = *m0; /* Only unicast IPv6 datagrams are interested for us */ if (m->m_flags & (M_BCAST | M_MCAST)) return (0); /* We don't have to do link-layer address resolution on a p2p link. */ if (ifp->if_flags & IFF_POINTOPOINT) return (0); /* We work only on IPv6 capable interfaces. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (0); eh = mtod(m, struct ether_header *); if (ntohs(eh->ether_type) != ETHERTYPE_IPV6) return (0); PULLUP_TO(ETHER_HDR_LEN, ip6, struct ip6_hdr); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) return (0); IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(&ip6->ip6_src, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (lle == NULL || (lle->ln_state != ND6_LLINFO_REACHABLE && lle->ln_state != ND6_LLINFO_DELAY)) { if (lle != NULL) LLE_RUNLOCK(lle); /* Queue NS request. */ nd6w_register_request(&ip6->ip6_src, ifp); return (0); } if (lle != NULL) LLE_RUNLOCK(lle); return (0); } static void nd6w_ifnet_detach(void *arg __unused, struct ifnet *ifp) { ND6W_IFP_LOCK(); if ((ifp->if_flags & IFF_POINTOPOINT) == 0) departure_occurred = 1; DBG(("%s: ifnet departure event: %s\n", __func__, if_name(ifp))); ND6W_IFP_UNLOCK(); } static void nd6w_send(struct ifnet *ifp, struct in6_addr *addr) { char saddr[INET6_ADDRSTRLEN]; struct llentry *lle; ND6W_IFP_LOCK_ASSERT(); /* * Check that we have LLE. * nd6_na_input() drops NA if LLE is not exist for * requested address. */ IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (lle == NULL) { /* Create new LLE. */ IF_AFDATA_WLOCK(ifp); lle = nd6_lookup(addr, ND6_EXCLUSIVE | ND6_CREATE, ifp); IF_AFDATA_WUNLOCK(ifp); if (lle == NULL) return; /* * Mark entry as INCOMPLETE and start the * ND6 timer to make retransmits if needed. */ lle->ln_state = ND6_LLINFO_INCOMPLETE; lle->la_asked++; nd6_llinfo_settimer_locked(lle, (long)ND_IFINFO(ifp)->retrans * hz / 1000); LLE_WUNLOCK(lle); DBG(("%s: created new LLE for %s on %s\n", __func__, inet_ntop(AF_INET6, addr, saddr, sizeof(saddr)), if_name(ifp))); } else { /* Check the state of LLE. */ if (lle->ln_state <= ND6_LLINFO_REACHABLE) { LLE_RUNLOCK(lle); return; } LLE_RUNLOCK(lle); /* * STALE - address is stale; * DELAY - NUD (unreachability detection) is * in progress. * * Send NS to speed up validation. */ } DBG(("%s: send NS for %s via %s\n", __func__, inet_ntop(AF_INET6, addr, saddr, sizeof(saddr)), if_name(ifp))); nd6_ns_output(ifp, NULL, addr, NULL, 0); } static void nd6w_expire(void) { char saddr[INET6_ADDRSTRLEN]; struct nd6w_list wq, drainq; struct nd6w_entry *entry, *next; struct llentry *lle; struct ifnet *ifp; int i; LIST_INIT(&wq); LIST_INIT(&drainq); /* * It should be safe to walk through without a lock, * because new entries are added into the head of lists. */ for (i = 0; i < nd6w_hashmask + 1; i++) { LIST_FOREACH(entry, &nd6w_hashtbl[i], hash) { /* Skip fresh entries */ if (entry->expire == ND6W_EXPIRE) continue; /* * Add entries that are not yet expired to * the wq list to check them later. */ if (entry->expire > 1) { LIST_INSERT_HEAD(&wq, entry, drainq); continue; } /* Schedule expired entries for deleting. */ if (entry->expire < 2) LIST_INSERT_HEAD(&drainq, entry, drainq); } } /* * Check that entries in the wq list are already successfully * resolved in the previous run and now can be deleted. * Hold ND6W_IFP_LOCK to protect from ifnet departures. */ ND6W_IFP_LOCK(); if (departure_occurred != 0) { nd6w_flush_hash(); ND6W_IFP_UNLOCK(); return; } entry = LIST_FIRST(&wq); while (entry != NULL) { next = LIST_NEXT(entry, drainq); ifp = entry->ifp; IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(&entry->addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (lle != NULL && ( lle->ln_state == ND6_LLINFO_REACHABLE || lle->ln_state == ND6_LLINFO_DELAY)) { /* Remove from wq list and schedule for deletion. */ LIST_REMOVE(entry, drainq); LIST_INSERT_HEAD(&drainq, entry, drainq); } else { /* Address is still unresolved. Decrement counter. */ entry->expire--; } if (lle != NULL) LLE_RUNLOCK(lle); entry = next; } ND6W_IFP_UNLOCK(); if (!LIST_EMPTY(&drainq)) return; /* Nothing to do. */ /* * Now remove scheduled entries from the hash. */ ND6W_IFP_LOCK(); if (departure_occurred != 0) { nd6w_flush_hash(); ND6W_IFP_UNLOCK(); return; } ND6W_HASH_LOCK(); LIST_FOREACH(entry, &drainq, drainq) { DBG(("%s: %s removed from the hash\n", __func__, inet_ntop(AF_INET6, &entry->addr, saddr, sizeof(saddr)))); LIST_REMOVE(entry, hash); emask[entry->index / NBBY / sizeof(uint64_t)] |= (1 << (entry->index % (NBBY * sizeof(uint64_t)))); count--; /* We don't care about flushing drainq here */ } ND6W_HASH_UNLOCK(); ND6W_IFP_UNLOCK(); } static void nd6w_worker(void *arg __unused) { struct nd6w_list wq; struct nd6w_entry *entry; time_t last_timer; last_timer = 0; while (1) { LIST_INIT(&wq); ND6W_WQ_LOCK(); while (LIST_EMPTY(&nd6w_wq)) { if (wq_running == 0) goto done; /* * Wait for event from nd6w_register_request() or * from user (kldunload). * If there is no events within hz ticks, check * that we have some job at the timer label. */ if (ND6W_WQ_SLEEP() == EWOULDBLOCK) { ND6W_WQ_UNLOCK(); goto timer; } } LIST_SWAP(&nd6w_wq, &wq, nd6w_entry, chain); ND6W_WQ_UNLOCK(); /* * Now send ND6 NS. * Hold ND6W_IFP_LOCK to protect from ifnet departures. */ ND6W_IFP_LOCK(); /* * If departure event has been occurred, do not try to * send NS. Instead we ignore all remaining requests. */ if (departure_occurred != 0) { nd6w_flush_hash(); ND6W_IFP_UNLOCK(); continue; } LIST_FOREACH(entry, &wq, chain) nd6w_send(entry->ifp, &entry->addr); ND6W_IFP_UNLOCK(); timer: if (time_second - last_timer < hz) continue; nd6w_expire(); last_timer = time_second; } done: wq_running = -1; ND6W_WQ_UNLOCK(); kproc_exit(0); } static int nd6w_modevent(module_t mod, int type, void *data) { struct pfil_head *ph; int ret; switch (type) { case MOD_LOAD: ph = pfil_head_get(PFIL_TYPE_AF, AF_LINK); if (ph == NULL) return (ENOENT); nd6w_hashtbl = hashinit(ADDRHASH_NHASH, M_ND6W, &nd6w_hashmask); memset(emask, 0xff, sizeof(emask)); ND6W_HASH_LOCK_INIT(); ND6W_WQ_LOCK_INIT(); ND6W_IFP_LOCK_INIT(); ret = pfil_add_hook(nd6w_pfil_hook, NULL, PFIL_IN | PFIL_WAITOK, ph); if (ret != 0) goto cleanup; wq_running = 1; ret = kproc_create(&nd6w_worker, NULL, &wq_proc, 0, 0, "nd6_watcher"); if (ret != 0) goto cleanup; nd6w_ifp_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, nd6w_ifnet_detach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: ph = pfil_head_get(PFIL_TYPE_AF, AF_LINK); if (ph == NULL) return (ENOENT); pfil_remove_hook(nd6w_pfil_hook, NULL, PFIL_IN | PFIL_WAITOK, ph); if (wq_running > 0) { ND6W_WQ_LOCK(); wq_running = 0; wakeup(&nd6w_wq); while (wq_running != -1) ND6W_WQ_SLEEP(); ND6W_WQ_UNLOCK(); } if (nd6w_ifp_cookie != NULL) EVENTHANDLER_DEREGISTER(ifnet_departure_event, nd6w_ifp_cookie); ret = 0; goto cleanup; default: return (EOPNOTSUPP); } return (0); cleanup: hashdestroy(nd6w_hashtbl, M_ND6W, nd6w_hashmask); ND6W_HASH_LOCK_DESTROY(); ND6W_WQ_LOCK_DESTROY(); ND6W_IFP_LOCK_DESTROY(); return (ret); } static moduledata_t nd6w_mod = { "nd6_watcher", nd6w_modevent, 0 }; DECLARE_MODULE(nd6_watcher, nd6w_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(nd6_watcher, 1);