diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile index 54339260039..e09b058a962 100644 --- a/sbin/ifconfig/Makefile +++ b/sbin/ifconfig/Makefile @@ -35,6 +35,8 @@ SRCS+= ifvxlan.c # VXLAN support SRCS+= ifgre.c # GRE keys etc SRCS+= ifgif.c # GIF reversed header workaround SRCS+= ifipsec.c # IPsec VTI +SRCS+= ifmgif.c # Multipoint GIF +SRCS+= ifmpls.c # MPLS SRCS+= sfp.c # SFP/SFP+ information LIBADD+= m diff --git a/sbin/ifconfig/ifmgif.c b/sbin/ifconfig/ifmgif.c new file mode 100644 index 00000000000..5e662dffacb --- /dev/null +++ b/sbin/ifconfig/ifmgif.c @@ -0,0 +1,198 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Andrey V. Elsukov + * Copyright (c) 2015 Alexander V. Chernikov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ifconfig.h" +static void show_addrmaps(struct mgif_listmap *ml); + +/* + * Adds or updates mgif address map: + * 'addmap 192.0.2.1 2001:DB8::177' + */ +static +DECL_CMD_FUNC2(addmap, s1, s2) +{ + + struct mgif_reqmap mp; + struct ifreq req; + + bzero(&req, sizeof(req)); + bzero(&mp, sizeof(mp)); + strlcpy(req.ifr_name, name, sizeof(req.ifr_name)); + req.ifr_data = (caddr_t)∓ + + mp.size = sizeof(mp); + mp.action = MGIFMAP_ACT_ADD; + + if (inet_pton(AF_INET, s1, &mp.item.addr4) != 1) + err(1, "%s does not look like valid IPv4 address", s1); + if (inet_pton(AF_INET6, s2, &mp.item.addr6) != 1) + err(1, "%s does not look like valid IPv6 address", s1); + + if (ioctl(s, MGIFSMAPPING, &req) != 0) + err(1, "MGIFSMAPPING"); +} + +/* + * Deletes mgif address map based on IPv4 address: + * 'delmap 192.0.2.1' + */ +static +DECL_CMD_FUNC(delmap, s1, d) +{ + struct mgif_reqmap mp; + struct ifreq req; + + bzero(&req, sizeof(req)); + bzero(&mp, sizeof(mp)); + strlcpy(req.ifr_name, name, sizeof(req.ifr_name)); + req.ifr_data = (caddr_t)∓ + mp.size = sizeof(mp); + mp.action = MGIFMAP_ACT_DEL; + + if (inet_pton(AF_INET, s1, &mp.item.addr4) != 1) + err(1, "%s does not look like valid IPv4 address", s1); + + if (ioctl(s, MGIFSMAPPING, &req) != 0) + err(1, "MGIFSMAPPING"); +} + +static void +show_addrmaps(struct mgif_listmap *ml) +{ + char a6[INET6_ADDRSTRLEN], a4[INET_ADDRSTRLEN]; + int i; + + for (i = 0; i < ml->count; i++) { + inet_ntop(AF_INET, &ml->item[i].addr4, a4, sizeof(a4)); + inet_ntop(AF_INET6, &ml->item[i].addr6, a6, sizeof(a6)); + printf("\t%s <> %s\n", a4, a6); + } +} + +/* + * Lists all mgif address maps + */ +static +DECL_CMD_FUNC(listmap, val, d) +{ + char *buf; + struct mgif_listmap *ml; + struct ifreq req; + size_t bufsize; + + bzero(&req, sizeof(req)); + strlcpy(req.ifr_name, name, sizeof(req.ifr_name)); + + /* Start with reasonable default: 16 maps */ + bufsize = sizeof(struct mgif_listmap); + bufsize += sizeof(struct mgifmapitem) * 16; + buf = NULL; + for (;;) { + if (buf != NULL) + free(buf); + buf = calloc(1, bufsize); + if (buf == NULL) + errx(1, "Unable to alloc %lu bytes", bufsize); + + ml = (struct mgif_listmap *)buf; + ml->size = bufsize; + req.ifr_data = (caddr_t)ml; + if (ioctl(s, MGIFGMAPPINGS, &req) != 0) { + free(buf); + return; + } + if (ml->size <= bufsize) { + /* Done */ + break; + } + + bufsize = ml->size; + } + + show_addrmaps(ml); + free(buf); +} + +static void +mgif_status(int s) +{ + if (verbose > 0) + listmap(NULL, 0, s, NULL); +} + + +static struct cmd mgif_cmds[] = { + DEF_CMD_ARG2("addmap", addmap), + DEF_CMD_ARG("delmap", delmap), + DEF_CMD("listmap", 0, listmap), +}; + +static struct afswtch af_mgif = { + .af_name = "af_mgif", + .af_af = AF_UNSPEC, + .af_other_status = mgif_status, +}; + +static __constructor void +mgif_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + size_t i; + + for (i = 0; i < N(mgif_cmds); i++) + cmd_register(&mgif_cmds[i]); + af_register(&af_mgif); +#undef N +} diff --git a/sbin/ifconfig/ifmpls.c b/sbin/ifconfig/ifmpls.c new file mode 100644 index 00000000000..8c1fc70895f --- /dev/null +++ b/sbin/ifconfig/ifmpls.c @@ -0,0 +1,195 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static void +mpls_print_labels(uint32_t *l, size_t sz) +{ + int i; + + if (sz == 0) + return; + printf("\tlabels: "); + for (i = sz; i > MPLS_HDRLEN; i -= MPLS_HDRLEN, l++) + printf("%u,", *l); + printf("%u\n", *l); +} + +static void +mpls_status(int s) +{ + void *buf; + size_t sz; + uint8_t ttl; + + ifr.ifr_data = (caddr_t)&ttl; + if (ioctl(s, MPLSGTTL, &ifr) == -1) + return; + printf("\tttl: %u\n", ttl); + sz = 64; + buf = NULL; + while (1) { + buf = calloc(1, sz); + if (buf == NULL) + errx(1, "Unable to alloc %lu bytes", sz); + ifr.ifr_buffer.buffer = buf; + ifr.ifr_buffer.length = sz; + if (ioctl(s, MPLSGLABELS, &ifr) != 0) { + free(buf); + if (errno != EMSGSIZE) + return; + sz = ifr.ifr_buffer.length; + errno = 0; + continue; + } + sz = ifr.ifr_buffer.length; + mpls_print_labels(buf, sz); + free(buf); + break; + } +} + +static +DECL_CMD_FUNC(setttl, val, arg) +{ + unsigned long v; + char *ep; + uint8_t ttl; + + v = strtoul(val, &ep, 0); + if (*ep != '\0' || v == 0 || v > UINT8_MAX) { + warn("Invalid ttl value %s", val); + return; + } + ttl = (uint8_t)v; + ifr.ifr_data = &ttl; + if (ioctl(s, MPLSSTTL, &ifr) == -1) { + warn("ioctl(MPLSSTTL)"); + return; + } +} + +static void +clearlabels(const char *val, int d, int s, const struct afswtch *afp) +{ + + ifr.ifr_buffer.buffer = NULL; + ifr.ifr_buffer.length = 0; + if (ioctl(s, MPLSSLABELS, &ifr) != 0) + err(1, "ioctl(MPLSSLABELS)"); +} + +static +DECL_CMD_FUNC(setlabels, val, arg) +{ + uint32_t *labels; + char *ep, *str, *f, *l; + size_t sz; + int i; + + /* Count number of labels */ + for (sz = MPLS_HDRLEN, i = 0; val[i] != '\0'; i++) + if (val[i] == ',') + sz += MPLS_HDRLEN; + if (sz > 2048) + errx(1, "Labels list is too long"); + labels = calloc(1, sz); + if (labels == NULL) + err(1, "calloc() failed"); + + f = str = strdup(val); + if (f == NULL) + err(1, "strdup() failed"); + i = 0; + while ((l = strsep(&str, ",")) != NULL) { + if (*l == '\0') + errx(1, "Empty labels aren't allowed"); + labels[i] = (uint32_t)strtoul(l, &ep, 0); + if (*ep != '\0' || labels[i] <= MPLS_LABEL_RESERVED_MAX || + labels[i] > MPLS_LABEL_MAX) + errx(1, "Invalid label %s", l); + i++; + } + ifr.ifr_buffer.buffer = labels; + ifr.ifr_buffer.length = sz; + if (ioctl(s, MPLSSLABELS, &ifr) != 0) + err(1, "ioctl(MPLSSLABELS)"); + free(labels); + free(f); +} + +static struct cmd mpls_cmds[] = { + DEF_CMD("-label", 0, clearlabels), + DEF_CMD("-labels", 0, clearlabels), + DEF_CMD_ARG("ttl", setttl), + DEF_CMD_ARG("label", setlabels), + DEF_CMD_ARG("labels", setlabels), +}; + +static struct afswtch af_mpls = { + .af_name = "af_mpls", + .af_af = AF_MPLS, + .af_other_status = mpls_status, +}; + +static __constructor void +mpls_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + size_t i; + + for (i = 0; i < N(mpls_cmds); i++) + cmd_register(&mpls_cmds[i]); + af_register(&af_mpls); +#undef N +} diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 21d68575da4..787054cf852 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -163,6 +163,8 @@ SUBDIR= \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ + ${_if_mgif} \ + ${_if_mpls} \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ @@ -443,6 +445,11 @@ _random_other= random_other SUBDIR+= cuse .endif +.if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no" ) || \ + defined(ALL_MODULES) +_if_mgif= if_mgif +.endif + .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp @@ -450,6 +457,7 @@ _toecore= toecore _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre +_if_mpls= if_mpls _ipfw_pmod= ipfw_pmod .if ${MK_IPSEC_SUPPORT} != "no" _ipsec= ipsec diff --git a/sys/modules/if_mgif/Makefile b/sys/modules/if_mgif/Makefile new file mode 100644 index 00000000000..7ea5a640b32 --- /dev/null +++ b/sys/modules/if_mgif/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +SYSDIR?=${SRCTOP}/sys + +.PATH: ${SYSDIR}/netinet6 + +KMOD= if_mgif +SRCS= in6_mgif.c +CFLAGS+= -DMGIF_FAST_OUTPUT + +.include diff --git a/sys/modules/if_mpls/Makefile b/sys/modules/if_mpls/Makefile new file mode 100644 index 00000000000..7c072c2780d --- /dev/null +++ b/sys/modules/if_mpls/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +SYSDIR?=${SRCTOP}/sys + +.PATH: ${SYSDIR}/net + +KMOD= if_mpls +SRCS= if_mpls.c opt_inet.h opt_inet6.h + +.include diff --git a/sys/net/if_mpls.c b/sys/net/if_mpls.c new file mode 100644 index 00000000000..b4c3e8b93ab --- /dev/null +++ b/sys/net/if_mpls.c @@ -0,0 +1,1144 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#ifdef INET +#include +#include +#include +#include +#endif /* INET */ + +#ifdef INET6 +#ifndef INET +#include +#endif +#include +#include +#include +#include +#include +#include +#endif /* INET6 */ + +#include + +static const char mplsname[] = "mpls"; +struct mpls_softc { + struct ifnet *ifp; /* Interface pointer */ + u_int fibnum; /* Tunnel FIB */ + u_int family; /* Tunnel AF */ + uint8_t ttl; /* Outgoing TTL of label */ + struct rmlock lock; + union { /* Tunnel source */ +#ifdef INET + struct in_addr src4; +#endif +#ifdef INET6 + struct in6_addr src6; +#endif + }; + union { /* Tunnel destination */ +#ifdef INET + struct in_addr dst4; +#endif +#ifdef INET6 + struct in6_addr dst6; +#endif + }; +#ifdef INET6 + uint32_t zoneid; +#endif + struct ifnet *oifp; /* Outgoing Interface */ + struct ether_header *hdr; /* L2 header */ + struct mpls_label *labels; /* Pointer to labels */ + size_t lsz; /* Size of labels buffer */ +#ifdef INET + struct mpls_label null4; /* Explicit IPv4 NULL label */ +#endif +#ifdef INET6 + struct mpls_label null6; /* Explicit IPv6 NULL label */ +#endif + LIST_ENTRY(mpls_softc) entry; +}; + +#define MPLS_MTU (1500) /* Default MTU */ +#define MPLS_MTU_MIN (1280) /* Minimum MTU */ +#define MPLS_MTU_MAX (8192) /* Maximum MTU */ +#define MPLS_DEFTTL 1 + +/* + * mpls_mtx protects a per-vnet mpls_softc_list. + */ +static VNET_DEFINE(struct mtx, mpls_mtx); +#define V_mpls_mtx VNET(mpls_mtx) +static MALLOC_DEFINE(M_MPLS, "mpls", "MPLS Label Imposition Interface"); +static VNET_DEFINE(LIST_HEAD(, mpls_softc), mpls_softc_list); +#define V_mpls_softc_list VNET(mpls_softc_list) +static struct sx mpls_ioctl_sx; +SX_SYSINIT(mpls_ioctl_sx, &mpls_ioctl_sx, "mpls_ioctl"); + +static eventhandler_tag ifdetach_tag; +static eventhandler_tag iflladdr_tag; +static eventhandler_tag lleevent_tag; + +#define MPLS_LIST_LOCK_INIT(x) mtx_init(&V_mpls_mtx, "mpls_mtx", \ + NULL, MTX_DEF) +#define MPLS_LIST_LOCK_DESTROY(x) mtx_destroy(&V_mpls_mtx) +#define MPLS_LIST_LOCK(x) mtx_lock(&V_mpls_mtx) +#define MPLS_LIST_UNLOCK(x) mtx_unlock(&V_mpls_mtx) + +#define MPLS_LOCK_INIT(sc) rm_init(&(sc)->lock, "mpls softc") +#define MPLS_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) +#define MPLS_RLOCK_TRACKER struct rm_priotracker mpls_tracker +#define MPLS_RLOCK(sc) rm_rlock(&(sc)->lock, &mpls_tracker) +#define MPLS_RUNLOCK(sc) rm_runlock(&(sc)->lock, &mpls_tracker) +#define MPLS_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) +#define MPLS_WLOCK(sc) rm_wlock(&(sc)->lock) +#define MPLS_WUNLOCK(sc) rm_wunlock(&(sc)->lock) +#define MPLS_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) + +static int mpls_update_route(struct mpls_softc *sc, uint16_t fibnum); +static int mpls_set_tunnel(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static void mpls_delete_tunnel(struct ifnet *); +static int mpls_output(struct ifnet *ifp, struct mbuf *m, + const struct sockaddr *dst, struct route *ro); +static int mpls_ioctl(struct ifnet *, u_long, caddr_t); +static int mpls_transmit(struct ifnet *, struct mbuf *); +static void mpls_qflush(struct ifnet *); +static int mpls_clone_create(struct if_clone *, int, caddr_t); +static void mpls_clone_destroy(struct ifnet *); +static VNET_DEFINE(struct if_clone *, mpls_cloner); +#define V_mpls_cloner VNET(mpls_cloner) + +static int mpls_modevent(module_t, int, void *); +static void mpls_ifdetach(void *arg, struct ifnet *ifp); +static void mpls_iflladdr(void *arg, struct ifnet *ifp); +static void mpls_lleevent(void *arg, struct llentry *lle, int evt); + +static int +mpls_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct mpls_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(struct mpls_softc), M_MPLS, M_WAITOK | M_ZERO); + sc->fibnum = curthread->td_proc->p_fibnum; + sc->ttl = MPLS_DEFTTL; +#ifdef INET + sc->null4.label = htonl(MPLS_LABEL_IPV4NULL << 12) | + MPLS_BOS_MASK | htonl(sc->ttl); +#endif +#ifdef INET6 + sc->null6.label = htonl(MPLS_LABEL_IPV6NULL << 12) | + MPLS_BOS_MASK | htonl(sc->ttl); +#endif + ifp = sc->ifp = if_alloc(IFT_MPLSTUNNEL); + MPLS_LOCK_INIT(sc); + ifp->if_softc = sc; + if_initname(ifp, mplsname, unit); + + ifp->if_addrlen = 0; + ifp->if_mtu = MPLS_MTU; + ifp->if_flags = IFF_POINTOPOINT; + ifp->if_ioctl = mpls_ioctl; + ifp->if_transmit = mpls_transmit; + ifp->if_qflush = mpls_qflush; + ifp->if_output = mpls_output; + if_attach(ifp); + bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); + MPLS_LIST_LOCK(); + LIST_INSERT_HEAD(&V_mpls_softc_list, sc, entry); + MPLS_LIST_UNLOCK(); + return (0); +} + +static void +mpls_clone_destroy(struct ifnet *ifp) +{ + struct mpls_softc *sc; + + sx_xlock(&mpls_ioctl_sx); + sc = ifp->if_softc; + mpls_delete_tunnel(ifp); + MPLS_LIST_LOCK(); + LIST_REMOVE(sc, entry); + MPLS_LIST_UNLOCK(); + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&mpls_ioctl_sx); + + if_free(ifp); + MPLS_LOCK_DESTROY(sc); + free(sc, M_MPLS); +} + +static void +vnet_mpls_init(const void *unused __unused) +{ + + LIST_INIT(&V_mpls_softc_list); + MPLS_LIST_LOCK_INIT(); + V_mpls_cloner = if_clone_simple(mplsname, mpls_clone_create, + mpls_clone_destroy, 0); +} +VNET_SYSINIT(vnet_mpls_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_mpls_init, NULL); + +static void +vnet_mpls_uninit(const void *unused __unused) +{ + + if_clone_detach(V_mpls_cloner); + MPLS_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_mpls_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_mpls_uninit, NULL); + +/* + * We use direct if_output via sc->oifp, handle ifnet departure events. + */ +static void +mpls_ifdetach(void *arg __unused, struct ifnet *ifp) +{ + struct mpls_softc *sc; + struct ether_header *eh; + + /* If the ifnet is just being renamed, don't do anything. */ + if (ifp->if_flags & IFF_RENAMING) + return; + /* Ignore non-Ethernet interfaces */ + if (ifp->if_alloctype != IFT_ETHER) + return; + MPLS_LIST_LOCK(); + LIST_FOREACH(sc, &V_mpls_softc_list, entry) { + if (sc->oifp != ifp) + continue; + /* Reset precalculated header and outgoing interface */ + MPLS_WLOCK(sc); + eh = sc->hdr; + sc->oifp = NULL; + sc->hdr = NULL; + MPLS_WUNLOCK(sc); + if (eh != NULL) + free(eh, M_MPLS); + } + MPLS_LIST_UNLOCK(); +} + +/* + * If outgoing interface has changed L2 address, we need to recalculate + * L2 header to change source L2 address. + */ +static void +mpls_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + struct mpls_softc *sc; + struct ether_header *eh; + + /* Ignore non-Ethernet interfaces */ + if (ifp->if_alloctype != IFT_ETHER) + return; + MPLS_LIST_LOCK(); + LIST_FOREACH(sc, &V_mpls_softc_list, entry) { + if (sc->oifp != ifp) + continue; + /* Reset precalculated header and outgoing interface */ + MPLS_WLOCK(sc); + eh = sc->hdr; + sc->oifp = NULL; + sc->hdr = NULL; + MPLS_WUNLOCK(sc); + if (eh != NULL) + free(eh, M_MPLS); + } + MPLS_LIST_UNLOCK(); +} + +/* + * If llentry with our destination address has been expired, we + * need to recalculate L2 header. + */ +static void +mpls_lleevent(void *arg __unused, struct llentry *lle, int evt) +{ + MPLS_RLOCK_TRACKER; +#ifdef INET6 + struct sockaddr_in6 sin; +#else + struct sockaddr_in sin; +#endif + struct sockaddr *sa; + struct mpls_softc *sc; + struct ether_header *eh; + struct ifnet *ifp; + int family; + + if (evt == LLENTRY_RESOLVED) + return; + + ifp = lltable_get_ifp(lle->lle_tbl); + family = lltable_get_af(lle->lle_tbl); + sa = (struct sockaddr *)&sin; + lltable_fill_sa_entry(lle, sa); + + MPLS_LIST_LOCK(); + LIST_FOREACH(sc, &V_mpls_softc_list, entry) { + if (sc->family != family) + continue; + if (sc->oifp != ifp) + continue; +#ifdef INET + if (family == AF_INET) { + if (sc->dst4.s_addr != satosin(sa)->sin_addr.s_addr) + continue; + goto reset; + } +#endif +#ifdef INET6 + if (family == AF_INET6) { + MPLS_RLOCK(sc); + if (IN6_ARE_ADDR_EQUAL(&sc->dst6, + &satosin6(sa)->sin6_addr) == 0) { + MPLS_RUNLOCK(sc); + continue; + } + MPLS_RUNLOCK(sc); + goto reset; + } +#endif + continue; +reset: + MPLS_WLOCK(sc); + eh = sc->hdr; + sc->oifp = NULL; + sc->hdr = NULL; + MPLS_WUNLOCK(sc); + if (eh != NULL) + free(eh, M_MPLS); + } + MPLS_LIST_UNLOCK(); +} + +static int +mpls_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, + mpls_ifdetach, NULL, EVENTHANDLER_PRI_ANY); + if (ifdetach_tag == NULL) + return (ENOMEM); + iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, + mpls_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + if (iflladdr_tag == NULL) { + EVENTHANDLER_DEREGISTER(ifnet_departure_event, + ifdetach_tag); + return (ENOMEM); + } + lleevent_tag = EVENTHANDLER_REGISTER(lle_event, + mpls_lleevent, NULL, EVENTHANDLER_PRI_ANY); + if (lleevent_tag == NULL) { + EVENTHANDLER_DEREGISTER(ifnet_departure_event, + ifdetach_tag); + EVENTHANDLER_DEREGISTER(iflladdr_event, + iflladdr_tag); + return (ENOMEM); + } + break; + case MOD_UNLOAD: + EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); + EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); + EVENTHANDLER_DEREGISTER(lle_event, lleevent_tag); + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t mpls_mod = { + "if_mpls", + mpls_modevent, + 0 +}; + +DECLARE_MODULE(if_mpls, mpls_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_mpls, 1); + +static int +mpls_transmit(struct ifnet *ifp, struct mbuf *m) +{ + MPLS_RLOCK_TRACKER; + struct mpls_softc *sc; + struct ifnet *oifp; + void *l; + uint32_t af; + int error, plen, hlen; + + l = NULL; + plen = m->m_pkthdr.len; + sc = ifp->if_softc; +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) { + m_freem(m); + goto err; + } +#endif + if (sc == NULL || sc->family == 0 || + (ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + m_freem(m); + goto err; + } + if (sc->oifp == NULL) { + error = mpls_update_route(sc, M_GETFIB(m)); + if (error != 0) { + m_freem(m); + goto err; + } + } + af = m->m_pkthdr.PH_loc.thirtytwo[1]; + m->m_flags &= ~(M_BCAST | M_MCAST); + M_SETFIB(m, sc->fibnum); + BPF_MTAP2(ifp, &af, sizeof(af), m); + + MPLS_RLOCK(sc); + if (sc->lsz == 0 +#ifdef INET + && af != AF_INET +#endif +#ifdef INET6 + && af != AF_INET6 +#endif + ) { + /* We have not cofigured labels and af also unknown */ + m_freem(m); + error = ENETDOWN; + goto fail; + } + if (sc->oifp == NULL || sc->hdr == NULL) { + m_freem(m); + error = ENETDOWN; + goto fail; + } + hlen = sizeof(*sc->hdr); + hlen += sc->lsz == 0 ? MPLS_HDRLEN: sc->lsz; + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + error = ENOBUFS; + goto fail; + } + bcopy(sc->hdr, mtod(m, void *), sizeof(*sc->hdr)); + if (sc->lsz == 0) { + hlen = sizeof(struct mpls_label); +#ifdef INET + if (af == AF_INET) + l = &sc->null4; +#endif +#ifdef INET6 + if (af == AF_INET6) + l = &sc->null6; +#endif + } else { + l = sc->labels; + hlen = sc->lsz; + } + if (l == NULL) { + /* + * This means we don't know destination AF and there is + * no labels configured. + */ + error = EAFNOSUPPORT; + goto fail; + } + bcopy(l, mtodo(m, sizeof(*sc->hdr)), hlen); + oifp = sc->oifp; + MPLS_RUNLOCK(sc); + + error = ether_output_frame(oifp, m); + goto err; +fail: + MPLS_RUNLOCK(sc); +err: + if (error != 0) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + } + return (error); +} + +static void +mpls_qflush(struct ifnet *ifp __unused) +{ + +} + +static int +mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + + /* Save address family into mbuf's local storage */ + m->m_pkthdr.PH_loc.thirtytwo[1] = dst->sa_family; + return (ifp->if_transmit(ifp, m)); +} + +static int +mpls_update_route(struct mpls_softc *sc, uint16_t fibnum) +{ + union { + struct { + struct ifnet* nh_ifp; + uint16_t nh_mtu; + uint16_t nh_flags; + }; +#ifdef INET + struct nhop4_basic nh4; +#endif +#ifdef INET6 + struct nhop6_basic nh6; +#endif + } nh; +#ifdef INET + struct sockaddr_in dst4; +#endif +#ifdef INET6 + struct sockaddr_in6 dst6; +#endif + struct ether_header *buf, *hdr; + int error; + + buf = NULL; + error = 0; + switch (sc->family) { + case 0: + return (ENETDOWN); +#ifdef INET + case AF_INET: + error = fib4_lookup_nh_basic(fibnum, sc->dst4, 0, 0, &nh.nh4); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = fib6_lookup_nh_basic(fibnum, &sc->dst6, sc->zoneid, 0, + 0, &nh.nh6); + break; +#endif + default: + return (EAFNOSUPPORT); + } + /* Allow only directly connected neighbors */ + if (error != 0 || nh.nh_ifp == sc->ifp || + (nh.nh_flags & (NHF_GATEWAY | NHF_REJECT | NHF_BLACKHOLE)) != 0) { + error = ENETDOWN; + goto reset; + } + /* Construct L2 header */ + buf = malloc(sizeof(*buf), M_MPLS, M_NOWAIT | M_ZERO); + if (buf == NULL) { + error = ENOBUFS; + goto reset; + } + switch (sc->family) { +#ifdef INET + case AF_INET: + bzero(&dst4, sizeof(dst4)); + dst4.sin_family = AF_INET; + dst4.sin_len = sizeof(dst4); + dst4.sin_addr = nh.nh4.nh_addr; + error = arpresolve(nh.nh_ifp, 0, NULL, + (const struct sockaddr *)&dst4, (u_char *)buf, + NULL, NULL); + break; +#endif +#ifdef INET6 + case AF_INET6: + bzero(&dst6, sizeof(dst6)); + dst6.sin6_family = AF_INET; + dst6.sin6_len = sizeof(dst6); + dst6.sin6_addr = nh.nh6.nh_addr; + /* XXX: IPv6 LLA must be in embedded form */ + if (IN6_IS_SCOPE_LINKLOCAL(&dst6.sin6_addr)) + dst6.sin6_addr.s6_addr16[1] = + htons(nh.nh_ifp->if_index & 0xffff); + error = nd6_resolve(nh.nh_ifp, 0, NULL, + (const struct sockaddr *)&dst6, (u_char *)buf, + NULL, NULL); + break; +#endif + default: + error = EAFNOSUPPORT; + } + if (error != 0) + goto reset; + buf->ether_type = htons(ETHERTYPE_MPLS); + MPLS_WLOCK(sc); + hdr = sc->hdr; + sc->hdr = buf; + sc->oifp = nh.nh_ifp; + MPLS_WUNLOCK(sc); + if (hdr != NULL) + free(hdr, M_MPLS); + return (error); +reset: + if (buf != NULL) + free(buf, M_MPLS); + if (sc->hdr != NULL) { + MPLS_WLOCK(sc); + buf = sc->hdr; + sc->hdr = NULL; + sc->oifp = NULL; + MPLS_WUNLOCK(sc); + if (buf != NULL) + free(buf, M_MPLS); + } + return (error); +} + +/* + * Convert labels into array of uint32_t in host byte order. + */ +static __noinline int +mpls_labels2buf(struct mpls_softc *sc, void *ubuf) +{ + struct mpls_label *p; + uint32_t *l, *kbuf; + int error; + + sx_assert(&mpls_ioctl_sx, SA_LOCKED); + + if (sc->lsz == 0) + return (0); + kbuf = l = malloc(sc->lsz, M_TEMP, M_NOWAIT | M_ZERO); + if (l == NULL) + return (ENOMEM); + for (p = sc->labels; + p < sc->labels + (sc->lsz / MPLS_HDRLEN);p++, l++) + *l = ntohl(p->label & MPLS_LABEL_MASK) >> 12; + error = copyout(kbuf, ubuf, sc->lsz); + free(kbuf, M_TEMP); + return (error); +} + +/* + * Convert array of uint32_t labels in host byte order into + * array of MPLS labels. + */ +static int +mpls_buf2labels(struct mpls_softc *sc, void *ubuf, size_t size) +{ + struct mpls_label *kbuf; + size_t i; + uint32_t label, *l, *s; + int error; + + sx_assert(&mpls_ioctl_sx, SA_LOCKED); + + if (size == 0) { + /* Remove label stack */ + kbuf = NULL; + goto done; + } + if (size > 2048 || (size % MPLS_HDRLEN) != 0) + return (EINVAL); + l = malloc(size, M_TEMP, M_NOWAIT | M_ZERO); + if (l == NULL) + return (ENOMEM); + kbuf = (struct mpls_label *)l; + error = copyin(ubuf, kbuf, size); + if (error != 0) { + free(kbuf, M_TEMP); + return (error); + } + for (i = 0; i < size; i += MPLS_HDRLEN, l++) { + label = *l; + if (label <= MPLS_LABEL_RESERVED_MAX || + label > MPLS_LABEL_MAX) { + free(kbuf, M_TEMP); + return (EINVAL); + } + *l = htonl(label << 12) | htonl(sc->ttl); + s = l; + } + *s |= MPLS_BOS_MASK; +done: + MPLS_WLOCK(sc); + ubuf = sc->lsz == 0 ? NULL: sc->labels; + sc->labels = kbuf; + sc->lsz = size; + MPLS_WUNLOCK(sc); + if (ubuf != NULL) + free(ubuf, M_TEMP); + return (0); +} + +static int +mpls_update_ttl(struct mpls_softc *sc, uint8_t ttl) +{ + struct mpls_label *l; + size_t i; + + sx_assert(&mpls_ioctl_sx, SA_LOCKED); + if (ttl == 0) + return (EINVAL); + if (ttl == sc->ttl) + return (0); + sc->ttl = ttl; + if (sc->lsz == 0) { + /* Update only Explicit NULL labels */ + MPLS_WLOCK(sc); +#ifdef INET + sc->null4.label &= ~MPLS_TTL_MASK; + sc->null4.label |= htonl(ttl); +#endif +#ifdef INET6 + sc->null6.label &= ~MPLS_TTL_MASK; + sc->null6.label |= htonl(ttl); +#endif + MPLS_WUNLOCK(sc); + return (0); + } + MPLS_WLOCK(sc); + for (l = sc->labels, i = 0; i < sc->lsz; i += MPLS_HDRLEN, l++) { + l->label &= ~MPLS_TTL_MASK; + l->label |= htonl(ttl); + } + MPLS_WUNLOCK(sc); + return (0); +} + +int +mpls_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + MPLS_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq*)data; + struct ifreq_buffer *ifrb; + struct sockaddr *dst, *src; + struct mpls_softc *sc; +#ifdef INET + struct sockaddr_in *sin = NULL; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6 = NULL; +#endif + int error; + uint8_t ttl; + + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFMTU: + case SIOCSIFFLAGS: + return (0); + case SIOCSIFMTU: + if (ifr->ifr_mtu < MPLS_MTU_MIN || + ifr->ifr_mtu > MPLS_MTU_MAX) + return (EINVAL); + else + ifp->if_mtu = ifr->ifr_mtu; + return (0); + } + sx_xlock(&mpls_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; + goto bad; + } + error = 0; + switch (cmd) { + case MPLSGLABELS: + ifrb = &ifr->ifr_buffer; + if (ifrb->length < sc->lsz) + error = EMSGSIZE; + else + error = mpls_labels2buf(sc, ifrb->buffer); + ifrb->length = sc->lsz; + copyout(ifrb, ifr->ifr_data, sizeof(*ifrb)); + break; + case MPLSSLABELS: + ifrb = &ifr->ifr_buffer; + error = mpls_buf2labels(sc, ifrb->buffer, ifrb->length); + break; + case MPLSGTTL: + error = copyout(&sc->ttl, ifr->ifr_data, sizeof(sc->ttl)); + break; + case MPLSSTTL: + error = copyin(ifr->ifr_data, &ttl, sizeof(ttl)); + if (error != 0) + break; + error = mpls_update_ttl(sc, ttl); + break; + case SIOCSIFPHYADDR: +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif + error = EINVAL; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + break; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); + break; +#endif + default: + goto bad; + } + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto bad; + + /* validate sa_len */ + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto bad; + break; +#endif + default: + error = EAFNOSUPPORT; + goto bad; + } + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + goto bad; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + goto bad; +#endif + } + error = EADDRNOTAVAIL; + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (satosin(src)->sin_addr.s_addr == INADDR_ANY || + satosin(dst)->sin_addr.s_addr == INADDR_ANY) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) + || + IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + goto bad; + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + error = sa6_embedscope(satosin6(src), 0); + if (error != 0) + goto bad; + error = sa6_embedscope(satosin6(dst), 0); + if (error != 0) + goto bad; +#endif + }; + error = mpls_set_tunnel(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + mpls_delete_tunnel(ifp); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: +#endif + if (sc->family == 0) { + error = EADDRNOTAVAIL; + break; + } + MPLS_RLOCK(sc); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (sc->family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + sin = (struct sockaddr_in *)&ifr->ifr_addr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (sc->family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + break; +#endif + default: + error = EAFNOSUPPORT; + } + if (error == 0) { + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + sin->sin_addr.s_addr = sc->src4.s_addr; + break; + case SIOCGIFPDSTADDR: + sin->sin_addr.s_addr = sc->dst4.s_addr; + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = sc->src6; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = sc->dst6; + break; +#endif + } + } + MPLS_RUNLOCK(sc); + if (error != 0) + break; + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin); + if (error != 0) + memset(sin, 0, sizeof(*sin)); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0) + error = sa6_recoverscope(sin6); + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); +#endif + } + break; + case SIOCGTUNFIB: + ifr->ifr_fib = sc->fibnum; + break; + case SIOCSTUNFIB: + if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) + break; + if (ifr->ifr_fib >= rt_numfibs) + error = EINVAL; + else + sc->fibnum = ifr->ifr_fib; + break; + default: + error = EINVAL; + break; + } +bad: + sx_xunlock(&mpls_ioctl_sx); + return (error); +} + +static int +mpls_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) +{ + struct mpls_softc *sc = ifp->if_softc; + struct mpls_softc *tsc; + struct ether_header *eh; + int error = 0; + + if (sc == NULL) + return (ENXIO); + MPLS_LIST_LOCK(); + LIST_FOREACH(tsc, &V_mpls_softc_list, entry) { + if (tsc == sc || tsc->family != src->sa_family) + continue; +#ifdef INET + if (tsc->family == AF_INET && + tsc->src4.s_addr == satosin(src)->sin_addr.s_addr && + tsc->dst4.s_addr == satosin(dst)->sin_addr.s_addr) { + error = EADDRNOTAVAIL; + MPLS_LIST_UNLOCK(); + goto bad; + } +#endif +#ifdef INET6 + if (tsc->family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&tsc->src6, + &satosin6(src)->sin6_addr) && + IN6_ARE_ADDR_EQUAL(&tsc->dst6, + &satosin6(dst)->sin6_addr)) { + error = EADDRNOTAVAIL; + MPLS_LIST_UNLOCK(); + goto bad; + } +#endif + } + MPLS_LIST_UNLOCK(); + MPLS_WLOCK(sc); + eh = sc->hdr; + sc->hdr = NULL; + sc->oifp = NULL; + switch (src->sa_family) { +#ifdef INET + case AF_INET: + sc->src4.s_addr = satosin(src)->sin_addr.s_addr; + sc->dst4.s_addr = satosin(dst)->sin_addr.s_addr; + sc->family = src->sa_family; + break; +#endif +#ifdef INET6 + case AF_INET6: + sc->src6 = satosin6(src)->sin6_addr; + sc->dst6 = satosin6(dst)->sin6_addr; + sc->family = src->sa_family; + break; +#endif + default: + error = EAFNOSUPPORT; + sc->family = 0; + }; + MPLS_WUNLOCK(sc); + if (eh != NULL) + free(eh, M_MPLS); +#if defined(INET) || defined(INET6) +bad: +#endif + if (error == 0 && sc->family != 0) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + else + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + return (error); +} + +static void +mpls_delete_tunnel(struct ifnet *ifp) +{ + struct mpls_softc *sc = ifp->if_softc; + struct ether_header *eh; + + if (sc == NULL) + return; + + MPLS_WLOCK(sc); + eh = sc->hdr; + sc->family = 0; + sc->oifp = NULL; + sc->hdr = NULL; + MPLS_WUNLOCK(sc); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + if (eh != NULL) + free(eh, M_MPLS); +} diff --git a/sys/net/mpls.h b/sys/net/mpls.h new file mode 100644 index 00000000000..535b2fdb6dd --- /dev/null +++ b/sys/net/mpls.h @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _NET_MPLS_H_ +#define _NET_MPLS_H_ + +struct mpls_label { + uint32_t label; /* 20 bit label, 4 bit exp & BoS, 8 bit TTL */ +} __packed; + +#define MPLS_LABEL_MAX ((1 << 20) - 1) +#define MPLS_HDRLEN sizeof(struct mpls_label) +#define MPLS_LABEL_MASK (htonl(0xfffff000U)) +#define MPLS_EXP_MASK (htonl(0x00000e00U)) +#define MPLS_BOS_MASK (htonl(0x00000100U)) +#define MPLS_TTL_MASK (htonl(0x000000ffU)) + +#define MPLS_BOS_ISSET(l) (((l) & MPLS_BOS_MASK) == MPLS_BOS_MASK) + +/* + * Reserved label values (RFC3032) + */ +#define MPLS_LABEL_IPV4NULL 0 /* IPv4 Explicit NULL Label */ +#define MPLS_LABEL_RTALERT 1 /* Router Alert Label */ +#define MPLS_LABEL_IPV6NULL 2 /* IPv6 Explicit NULL Label */ +#define MPLS_LABEL_IMPLNULL 3 /* Implicit NULL Label */ +/* MPLS_LABEL_RESERVED 4-15 * Values 4-15 are reserved */ +#define MPLS_LABEL_RESERVED_MAX 15 + +#define MPLSGLABELS _IOWR('i', 152, struct ifreq) +#define MPLSSLABELS _IOW('i', 153, struct ifreq) +#define MPLSGTTL _IOWR('i', 154, struct ifreq) +#define MPLSSTTL _IOW('i', 155, struct ifreq) + +#endif /* _NET_MPLS_H_ */ + diff --git a/sys/netinet6/in6_mgif.c b/sys/netinet6/in6_mgif.c new file mode 100644 index 00000000000..992a6b27460 --- /dev/null +++ b/sys/netinet6/in6_mgif.c @@ -0,0 +1,967 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MGIF_MTU (1500 - sizeof(struct ip6_hdr)) +#define MGIF_HLIM 30 +static VNET_DEFINE(int, ip6_mgif_hlim) = MGIF_HLIM; +#define V_ip6_mgif_hlim VNET(ip6_mgif_hlim) + +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, mgifhlim, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip6_mgif_hlim), 0, ""); + +struct mgif_mapitem { + struct mgif_mapitem *next; + struct in_addr addr4; + uint32_t spare; + struct in6_addr addr6; /* XXX: scope zoneid */ +}; + +struct mgif_softc { + struct ifnet *ifp; + struct rmlock lock; + const struct encaptab *ecookie; + u_int fibnum; + uint8_t options; +#define MGIF_READY 0x01 +#define MGIF_IGNORE_SOURCE 0x02 + uint16_t a4hsize; + uint32_t zoneid; + struct ip6_hdr ip6hdr; + struct mgif_mapitem *addrmap[A4_HSIZE]; + + LIST_ENTRY(mgif_softc) link; +}; + +#define MGIF_OPTMASK MGIF_IGNORE_SOURCE + + +#define MGIF2IFP(sc) ((sc)->ifp) +#define MGIF_IS_READY(sc) ((sc)->options & MGIF_READY) +#define MGIF_LOCK_INIT(sc) rm_init(&(sc)->lock, "mgif softc") +#define MGIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) +#define MGIF_RLOCK_TRACKER struct rm_priotracker mgif_tracker +#define MGIF_RLOCK(sc) rm_rlock(&(sc)->lock, &mgif_tracker) +#define MGIF_RUNLOCK(sc) rm_runlock(&(sc)->lock, &mgif_tracker) +#define MGIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) +#define MGIF_WLOCK(sc) rm_wlock(&(sc)->lock) +#define MGIF_WUNLOCK(sc) rm_wunlock(&(sc)->lock) +#define MGIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) + +static VNET_DEFINE(struct mtx, mgif_mtx); +#define V_mgif_mtx VNET(mgif_mtx) +static MALLOC_DEFINE(M_MGIF, "mgif", "Multipoint Generic Tunnel Interface"); +static VNET_DEFINE(LIST_HEAD(, mgif_softc), mgif_softc_list); +#define V_mgif_softc_list VNET(mgif_softc_list) +static struct sx mgif_ioctl_sx; +SX_SYSINIT(mgif_ioctl_sx, &mgif_ioctl_sx, "mgif_ioctl"); + +#define MGIF_LIST_LOCK_INIT(x) mtx_init(&V_mgif_mtx, "mgif_mtx", \ + NULL, MTX_DEF) +#define MGIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_mgif_mtx) +#define MGIF_LIST_LOCK(x) mtx_lock(&V_mgif_mtx) +#define MGIF_LIST_UNLOCK(x) mtx_unlock(&V_mgif_mtx) + +static const char mgifname[] = "mgif"; +static int mgif_check_nesting(struct ifnet *, struct mbuf *); +static int mgif_set_tunnel(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static void mgif_delete_tunnel(struct ifnet *); +static int mgif_ioctl(struct ifnet *, u_long, caddr_t); +static int mgif_input(struct mbuf **, int *, int); +static int mgif_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static int mgif_transmit(struct ifnet *, struct mbuf *); +static void mgif_qflush(struct ifnet *); +static int mgif_clone_create(struct if_clone *, int, caddr_t); +static void mgif_clone_destroy(struct ifnet *); +static VNET_DEFINE(struct if_clone *, mgif_cloner); +#define V_mgif_cloner VNET(mgif_cloner) + +static int mgifmodevent(module_t, int, void *); +static int mgif_encapcheck(const struct mbuf *m, int off, int proto, void *arg); + +extern struct domain inet6domain; +static struct protosw mgif_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = 0, /* IPPROTO_IPV[46] */ + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = mgif_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}; + +static int +mgif_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct mgif_softc *sc; + + sc = malloc(sizeof(struct mgif_softc), M_MGIF, M_WAITOK | M_ZERO); + MGIF2IFP(sc) = if_alloc(IFT_TUNNEL); + MGIF_LOCK_INIT(sc); + MGIF2IFP(sc)->if_softc = sc; + if_initname(MGIF2IFP(sc), mgifname, unit); + + sc->a4hsize = A4_HSIZE; + sc->ecookie = encap_attach_func(AF_INET6, -1, mgif_encapcheck, + (void *)&mgif_protosw, sc); + //sc->addrmap = malloc(sizeof(void *) * sc->a4hsize, M_MGIF, + // M_WAITOK | M_ZERO); + MGIF2IFP(sc)->if_addrlen = 0; + MGIF2IFP(sc)->if_hdrlen = sizeof(struct ip); + MGIF2IFP(sc)->if_mtu = MGIF_MTU; + MGIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + MGIF2IFP(sc)->if_ioctl = mgif_ioctl; + MGIF2IFP(sc)->if_transmit = mgif_transmit; + MGIF2IFP(sc)->if_qflush = mgif_qflush; + MGIF2IFP(sc)->if_output = mgif_output; + if_attach(MGIF2IFP(sc)); + bpfattach(MGIF2IFP(sc), DLT_NULL, sizeof(u_int32_t)); + MGIF_LIST_LOCK(); + LIST_INSERT_HEAD(&V_mgif_softc_list, sc, link); + MGIF_LIST_UNLOCK(); + return (0); +} + +static void +mgif_clone_destroy(struct ifnet *ifp) +{ + struct mgif_softc *sc; + + sx_xlock(&mgif_ioctl_sx); + sc = ifp->if_softc; + if (sc->ecookie != NULL) + encap_detach(sc->ecookie); + MGIF_LIST_LOCK(); + LIST_REMOVE(sc, link); + MGIF_LIST_UNLOCK(); + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&mgif_ioctl_sx); + + if_free(ifp); + //free(sc->addrmap, M_MGIF); + MGIF_LOCK_DESTROY(sc); + free(sc, M_MGIF); +} + +static void +vnet_mgif_init(const void *unused __unused) +{ + + LIST_INIT(&V_mgif_softc_list); + MGIF_LIST_LOCK_INIT(); + V_mgif_cloner = if_clone_simple(mgifname, mgif_clone_create, + mgif_clone_destroy, 0); +} +VNET_SYSINIT(vnet_mgif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_mgif_init, NULL); + +static void +vnet_mgif_uninit(const void *unused __unused) +{ + + if_clone_detach(V_mgif_cloner); + MGIF_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_mgif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_mgif_uninit, NULL); + +static int +mgifmodevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t mgif_mod = { + "if_mgif", + mgifmodevent, + 0 +}; + +DECLARE_MODULE(if_mgif, mgif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_mgif, 1); + +#define A4_hash(x) (djb_hash((const unsigned char *)(x), 4)) +#define A4_first(_mh, h) (_mh)[h] +#define A4_next(x) (x)->next +#define A4_val(x) (&(x)->addr4.s_addr) +#define A4_cmp(a, b) (*a == *b) +#define A4_lock(a, b) +#define A4_unlock(a, b) + +#define A4HASH_FIND(_sc, _res, _a) \ + CHT_FIND(_sc->addrmap, _sc->a4hsize, A4_, _res, _a) +#define A4HASH_INSERT(_sc, _i) \ + CHT_INSERT_HEAD(_sc->addrmap, _sc->a4hsize, A4_, _i) +#define A4HASH_REMOVE(_sc, _res, _tmp, _a) \ + CHT_REMOVE(_sc->addrmap, _sc->a4hsize, A4_, _res, _tmp, _a) + +#define A4HASH_FOREACH_SAFE(_sc, _x, _tmp, _cb, _arg) \ + CHT_FOREACH_SAFE(_sc->addrmap, _sc->a4hsize, A4_, _x, _tmp, _cb, _arg) + +static unsigned +djb_hash(const unsigned char *h, const int len) +{ + unsigned int result = 0; + int i; + + for (i = 0; i < len; i++) + result = 33 * result ^ h[i]; + + return (result); +} + + +static int +mgif_add_mreq(struct mgif_softc *sc, struct mgif_reqmap *mreq) +{ + struct mgif_mapitem *mi, *mi_new; + + mi_new = malloc(sizeof(struct mgif_mapitem), M_MGIF, M_WAITOK|M_ZERO); + mi_new->addr4 = mreq->item.addr4; + mi_new->addr6 = mreq->item.addr6; + + MGIF_WLOCK(sc); + + A4HASH_FIND(sc, mi, &mreq->item.addr4.s_addr); + if (mi == NULL) { + /* Insert new item */ + A4HASH_INSERT(sc, mi_new); + MGIF_WUNLOCK(sc); + return (0); + } + + /* Update existing mapping */ + mi->addr6 = mi_new->addr6; + MGIF_WUNLOCK(sc); + + free(mi_new, M_MGIF); + + return (0); +} + +static int +mgif_del_mreq(struct mgif_softc *sc, struct mgif_reqmap *mreq) +{ + struct mgif_mapitem *mi, *mi_tmp; + + MGIF_WLOCK(sc); + + A4HASH_FIND(sc, mi, &mreq->item.addr4.s_addr); + if (mi == NULL) { + MGIF_WUNLOCK(sc); + return (ESRCH); + } + + A4HASH_REMOVE(sc, mi, mi_tmp, &mreq->item.addr4.s_addr); + MGIF_WUNLOCK(sc); + + free(mi, M_MGIF); + + return (0); +} + +static int +mgif_handle_mreq(struct mgif_softc *sc, struct mgif_reqmap *mreq) +{ + int error = 0; + + switch (mreq->action) { + case MGIFMAP_ACT_ADD: + error = mgif_add_mreq(sc, mreq); + break; + case MGIFMAP_ACT_DEL: + error = mgif_del_mreq(sc, mreq); + break; + default: + error = EINVAL; + } + + return (error); +} + +struct mgif_periodic_data { + struct mgif_listmap *mp; + struct mgif_listmap *pml; + int count; + int max_count; +}; + +static int +mgif_export_item(struct mgif_mapitem *mi, struct mgif_periodic_data *d) +{ + struct mgifmapitem *ext; + + if (d->count <= d->max_count) { + ext = &d->pml->item[d->count]; + ext->addr4 = mi->addr4; + ext->addr6 = mi->addr6; + } + + d->count++; + + return (0); +} + +static int +mgif_list_map(struct mgif_softc *sc, struct mgif_listmap *mp, + struct mgif_listmap *pml) +{ + struct mgif_mapitem *mi, *mi_tmp; + struct mgif_periodic_data d; + + + memset(&d, 0, sizeof(d)); + d.mp = mp; + d.pml = pml; + d.max_count = (mp->size - sizeof(*pml)) / sizeof(struct mgifmapitem); + + A4HASH_FOREACH_SAFE(sc, mi, mi_tmp, mgif_export_item, &d); + + pml->count = d.count; + pml->size = sizeof(struct mgif_listmap); + pml->size += d.count * sizeof(struct mgifmapitem); + + return (0); +} + +/* + * Lookup IPv6 tunnel endpoint address based on IPv4 dst address. + */ +static int +mgif_getdst6(const struct mgif_softc *sc, in_addr_t ip, struct in6_addr *dst) +{ + struct mgif_mapitem *mi; + + A4HASH_FIND(sc, mi, &ip); + if (mi != NULL) { + *dst = mi->addr6; + return (0); + } + + return (EHOSTUNREACH); +} + +#ifdef MGIF_FAST_OUTPUT +static int +mgif_fastoutput(struct mbuf *m) +{ + struct nhop6_basic nh; + struct sockaddr_in6 dst; + struct ip6_hdr *ip6; + int error; + + /* It is safe to use mtod without m_pullup here */ + ip6 = mtod(m, struct ip6_hdr *); + bzero(&dst, sizeof(dst)); + dst.sin6_len = sizeof(struct sockaddr_in6); + dst.sin6_family = AF_INET6; + dst.sin6_addr = ip6->ip6_dst; + /* + * Find route to destination. + */ + if (fib6_lookup_nh_basic(M_GETFIB(m), &dst.sin6_addr, + dst.sin6_scope_id, 0, dst.sin6_flowinfo, &nh) != 0) { + IP6STAT_INC(ip6s_noroute); + error = EHOSTUNREACH; + goto drop; + } + if (nh.nh_flags & NHF_BLACKHOLE) { + error = EHOSTUNREACH; + goto drop; + } + + if (nh.nh_flags & NHF_REJECT) { + error = EHOSTUNREACH; + goto drop; + } + if (m->m_pkthdr.len > nh.nh_mtu) { + in6_ifstat_inc(nh.nh_ifp, ifs6_in_toobig); + error = EMSGSIZE; + goto drop; + } + /* + * Outgoing packet firewall processing. + */ + if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + goto passout; + if (pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, + NULL) != 0 || m == NULL) { + error = EACCES; + goto drop; + } + if (m->m_flags & M_FASTFWD_OURS) { + error = EINVAL; + goto drop; + } +passout: + /* + * XXX: we need to use destination address with embedded scope + * zone id, because LLTABLE uses such form of addresses for lookup. + */ + dst.sin6_addr = nh.nh_addr; + if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) + dst.sin6_addr.s6_addr16[1] = + htons(nh.nh_ifp->if_index & 0xffff); + + error = (*nh.nh_ifp->if_output)(nh.nh_ifp, m, + (struct sockaddr *)&dst, NULL); + if (error != 0) + in6_ifstat_inc(nh.nh_ifp, ifs6_out_discard); + else + IP6STAT_INC(ip6s_total); + return (error); +drop: + if (m != NULL) + m_freem(m); + return (error); +} +#endif + +static int +mgif_transmit(struct ifnet *ifp, struct mbuf *m) +{ + MGIF_RLOCK_TRACKER; + struct mgif_softc *sc; + struct ip6_hdr *ip6; + struct ifnet *oif; + struct ip *ip; + in_addr_t dst4; + uint32_t zoneid; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error) { + m_freem(m); + goto err; + } +#endif + error = ENETDOWN; + sc = ifp->if_softc; + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0 || + m->m_pkthdr.csum_data != AF_INET || /* XXX */ + !MGIF_IS_READY(sc) || /* XXX */ + (error = mgif_check_nesting(ifp, m)) != 0) { + m_freem(m); + goto err; + } + + m->m_flags &= ~(M_BCAST|M_MCAST); + M_SETFIB(m, sc->fibnum); + BPF_MTAP2(ifp, &m->m_pkthdr.csum_data, sizeof(uint32_t), m); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); + + if (m->m_len < sizeof(struct ip)) + m = m_pullup(m, sizeof(struct ip)); + if (m == NULL) { + error = ENOBUFS; + goto err; + } + ip = mtod(m, struct ip *); + dst4 = ip->ip_dst.s_addr; + + /* prepend new IP header */ + M_PREPEND(m, sizeof(struct ip6_hdr), M_NOWAIT); + if (m == NULL) { + error = ENOBUFS; + goto err; + } + ip6 = mtod(m, struct ip6_hdr *); + MGIF_RLOCK(sc); + if (!MGIF_IS_READY(sc)) { + m_freem(m); + MGIF_RUNLOCK(sc); + error = ENETDOWN; + goto err; + } + bcopy(&sc->ip6hdr, ip6, sizeof(struct ip6_hdr)); + error = mgif_getdst6(sc, dst4, &ip6->ip6_dst); + ip6->ip6_hlim = V_ip6_mgif_hlim; + zoneid = sc->zoneid; + MGIF_RUNLOCK(sc); + + if (error == 0) { + /* When we have zoneid, we should suggest outgoing interface */ + if (zoneid != 0) + oif = in6_getlinkifnet(zoneid); + else + oif = NULL; + if (oif != NULL) { + in6_setscope(&ip6->ip6_src, oif, NULL); + in6_setscope(&ip6->ip6_dst, oif, NULL); + } +#ifndef MGIF_FAST_OUTPUT + /* reuse error variable for flags */ + if (ifp->if_mtu == IPV6_MMTU) + error = IPV6_MINMTU; + else + error = 0; + error = ip6_output(m, 0, NULL, error, 0, NULL, NULL); +#else + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + error = mgif_fastoutput(m); +#endif + } else + m_freem(m); /* dst6 not found */ +err: + if (error) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); +} + +static void +mgif_qflush(struct ifnet *ifp __unused) +{ + +} + +#define MGIF_MAX_NEST 1 +SYSCTL_DECL(_net_link); +static SYSCTL_NODE(_net_link, OID_AUTO, mgif, CTLFLAG_RW, 0, + "Multipoint Generic Tunnel Interface"); +static VNET_DEFINE(int, max_mgif_nesting) = MGIF_MAX_NEST; +#define V_max_mgif_nesting VNET(max_mgif_nesting) +SYSCTL_INT(_net_link_mgif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(max_mgif_nesting), 0, "Max nested tunnels"); + +#define MTAG_MGIF 1434491246 +static int +mgif_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + struct m_tag *mtag; + int count; + + /* + * mgif may cause infinite recursion calls when misconfigured. + * We'll prevent this by detecting loops. + */ + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, MTAG_MGIF, 0, mtag)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); + return (EIO); + } + count++; + } + if (count > V_max_mgif_nesting) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + if_name(ifp), count); + return (EIO); + } + mtag = m_tag_alloc(MTAG_MGIF, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +static int +mgif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + + /* + * Now save the af in the inbound pkt csum data, this is a cheat since + * we are using the inbound csum_data field to carry the af over to + * the mgif_transmit() routine, avoiding using yet another mtag. + */ + m->m_pkthdr.csum_data = (uint32_t)dst->sa_family; + return (ifp->if_transmit(ifp, m)); +} + +static int +mgif_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + struct mgif_softc *sc; + struct ifnet *ifp; + uint32_t af = AF_INET; + + sc = encap_getarg(m); + if (sc == NULL || + (MGIF2IFP(sc)->if_flags & IFF_UP) == 0 || + MGIF_IS_READY(sc) == 0) { + m_freem(m); + return (IPPROTO_DONE); + } + m->m_pkthdr.rcvif = ifp = MGIF2IFP(sc); +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + m_clrprotoflags(m); + m_adj(m, *offp); + M_SETFIB(m, ifp->if_fib); + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + return (IPPROTO_DONE); + } + netisr_dispatch(NETISR_IP, m); + return (IPPROTO_DONE); +} + +static int +mgif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + MGIF_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq*)data; + struct sockaddr *dst, *src; + struct mgif_softc *sc; + struct sockaddr_in6 *sin6 = NULL; + uint32_t zoneid; + u_int options; + struct mgif_listmap ml, *pml; + int error; + + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFMTU: + case SIOCSIFFLAGS: + return (0); + case SIOCSIFMTU: + if (ifr->ifr_mtu < IPV6_MMTU) + return (EINVAL); + ifp->if_mtu = ifr->ifr_mtu; + return (0); + } + sx_xlock(&mgif_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; + goto bad; + } + error = 0; + switch (cmd) { + case SIOCSIFPHYADDR: + error = EAFNOSUPPORT; + break; + case SIOCSIFPHYADDR_IN6: + error = EINVAL; + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto bad; + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto bad; + + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + if (src->sa_family != AF_INET6) + goto bad; + + error = EADDRNOTAVAIL; + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) + goto bad; + /* + * Check validity of the scope zone ID of the + * addresses. + */ + error = sa6_checkzone(satosin6(src)); + if (error != 0) + goto bad; + error = sa6_checkzone(satosin6(dst)); + if (error != 0) + goto bad; + zoneid = satosin6(src)->sin6_scope_id; + if (zoneid == 0) + zoneid = satosin6(dst)->sin6_scope_id; + else if (satosin6(dst)->sin6_scope_id != 0 && + satosin6(dst)->sin6_scope_id != zoneid) + goto bad; + error = mgif_set_tunnel(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + mgif_delete_tunnel(ifp); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = EAFNOSUPPORT; + break; + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (MGIF_IS_READY(sc) == 0) { + error = EADDRNOTAVAIL; + goto bad; + } + MGIF_RLOCK(sc); + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + switch (cmd) { + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = sc->ip6hdr.ip6_src; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = sc->ip6hdr.ip6_dst; + break; + } + zoneid = sc->zoneid; + MGIF_RUNLOCK(sc); + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0 && IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_scope_id = zoneid; + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); + break; + case MGIFGOPTS: + options = sc->options & MGIF_OPTMASK; + error = copyout(&options, ifr->ifr_data, sizeof(options)); + break; + case MGIFSOPTS: + if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) + break; + error = copyin(ifr->ifr_data, &options, sizeof(options)); + if (error) + break; + if (options & ~MGIF_OPTMASK) + error = EINVAL; + else { + sc->options &= ~MGIF_OPTMASK; + sc->options |= options; + } + break; + case MGIFGMAPPINGS: + error = copyin(ifr->ifr_data, &ml, sizeof(ml)); + if (error != 0) + break; + if (ml.size >= 65536) { + error = E2BIG; + break; + } + pml = malloc(ml.size, M_TEMP, M_WAITOK | M_ZERO); + error = mgif_list_map(sc, &ml, pml); + copyout(pml, ifr->ifr_data, ml.size); + free(pml, M_TEMP); + break; + case MGIFSMAPPING: + if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) + break; + + struct mgif_reqmap mreq; + error = copyin(ifr->ifr_data, &mreq, sizeof(mreq)); + if (error != 0) + break; + error = mgif_handle_mreq(sc, &mreq); + break; + + + default: + error = EINVAL; + break; + } +bad: + sx_xunlock(&mgif_ioctl_sx); + return (error); +} + +static int +mgif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) +{ + struct ip6_hdr ip6; + struct mgif_softc *sc = ifp->if_softc; + struct mgif_softc *tsc; + uint32_t zoneid; + int error = 0; + + if (sc == NULL) + return (ENXIO); + + MGIF_LIST_LOCK(); + LIST_FOREACH(tsc, &V_mgif_softc_list, link) { + if (tsc == sc || MGIF_IS_READY(tsc) == 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&tsc->ip6hdr.ip6_src, + &satosin6(src)->sin6_addr) && + IN6_ARE_ADDR_EQUAL(&tsc->ip6hdr.ip6_dst, + &satosin6(dst)->sin6_addr)) { + error = EADDRNOTAVAIL; + MGIF_LIST_UNLOCK(); + goto bad; + } + } + MGIF_LIST_UNLOCK(); + + memset(&ip6, 0, sizeof(ip6)); + ip6.ip6_src = satosin6(src)->sin6_addr; + ip6.ip6_dst = satosin6(dst)->sin6_addr; + ip6.ip6_vfc = IPV6_VERSION; + ip6.ip6_nxt = IPPROTO_IPV4; + /* ip6_plen will be initialized in ip6_output */ + zoneid = satosin6(src)->sin6_scope_id; + if (zoneid == 0) + zoneid = satosin6(dst)->sin6_scope_id; + + MGIF_WLOCK(sc); + sc->ip6hdr = ip6; + sc->options |= MGIF_READY; + sc->zoneid = zoneid; + MGIF_WUNLOCK(sc); +bad: + if (error == 0 && MGIF_IS_READY(sc) != 0) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + else + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + return (error); +} + +static void +mgif_delete_tunnel(struct ifnet *ifp) +{ + struct mgif_softc *sc = ifp->if_softc; + + if (sc == NULL) + return; + + MGIF_WLOCK(sc); + sc->options &= ~MGIF_READY; + MGIF_WUNLOCK(sc); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +static int +mgif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + MGIF_RLOCK_TRACKER; + const struct ip6_hdr *ip6; + struct mgif_softc *sc; + int ret; + + ret = 0; + sc = (struct mgif_softc *)arg; + if (sc == NULL || + (MGIF2IFP(sc)->if_flags & IFF_UP) == 0 || + proto != IPPROTO_IPV4) + return (0); + + /* Bail on short packets */ + if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + sizeof(struct ip)) + return (0); + + M_ASSERTPKTHDR(m); + MGIF_RLOCK(sc); + + if (MGIF_IS_READY(sc) == 0) + goto done; + /* + * Check for address match. Note that the check is for an incoming + * packet. We should compare the *source* address in our configuration + * and the *destination* address of the packet, and vice versa. + */ + ip6 = mtod(m, const struct ip6_hdr *); + if (!IN6_ARE_ADDR_EQUAL(&sc->ip6hdr.ip6_src, &ip6->ip6_dst)) + goto done; + ret = 128; + if (!IN6_ARE_ADDR_EQUAL(&sc->ip6hdr.ip6_dst, &ip6->ip6_src)) { + if ((sc->options & MGIF_IGNORE_SOURCE) == 0) + ret = 0; + } else + ret += 128; +done: + MGIF_RUNLOCK(sc); + return (ret); +} diff --git a/sys/netinet6/in6_mgif.h b/sys/netinet6/in6_mgif.h new file mode 100644 index 00000000000..448ac830b6f --- /dev/null +++ b/sys/netinet6/in6_mgif.h @@ -0,0 +1,143 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NETINET6_IN6_MGIF_H_ +#define _NETINET6_IN6_MGIF_H_ + +/* Hash setup. Use static non-resizable hash size */ +#define A4_HSIZE 256 + +#define MGIFGOPTS _IOWR('i', 150, struct ifreq) +#define MGIFSOPTS _IOW('i', 151, struct ifreq) + +struct mgifmapitem { + uint16_t flags; + uint16_t spare; + struct in_addr addr4; + struct in6_addr addr6; +}; + +#define MGIFMAP_ACT_ADD 1 +#define MGIFMAP_ACT_DEL 2 +struct mgif_reqmap { + uint32_t size; + uint8_t action; + uint8_t spare[3]; + struct mgifmapitem item; +}; + +struct mgif_listmap { + uint32_t size; + uint32_t count; + struct mgifmapitem item[0]; +}; +#define MGIFSMAPPING _IOW('i', 152, struct ifreq) +#define MGIFGMAPPINGS _IOWR('i', 153, struct ifreq) + + +/* Chained hash table */ +#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + for ( ; _x != NULL; _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x == NULL) \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \ + _PX##unlock(_ph, _buck); + +#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \ + unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _PX##next(_i) = _PX##first(_ph, _buck); \ + _PX##first(_ph, _buck) = _i; \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + _tmp = NULL; \ + for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x != NULL) { \ + if (_tmp == NULL) \ + _PX##first(_ph, _buck) = _PX##next(_x); \ + else \ + _PX##next(_tmp) = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _PX##lock(_ph, _i); \ + _x = _PX##first(_ph, _i); \ + _tmp = NULL; \ + for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_cb(_x, _arg) == 0) \ + continue; \ + if (_tmp == NULL) \ + _PX##first(_ph, _i) = _PX##next(_x); \ + else \ + _tmp = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _i); \ + } \ +} while(0) + +#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \ + unsigned int _buck; \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _x = _PX##first(_ph, _i); \ + _y = _x; \ + while (_y != NULL) { \ + _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\ + _y = _PX##next(_x); \ + _PX##next(_x) = _PX##first(_nph, _buck); \ + _PX##first(_nph, _buck) = _x; \ + } \ + } \ +} while(0) + + + +#endif + diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 33678c5d769..524019aec45 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -259,6 +259,7 @@ struct accept_filter_arg { #define AF_ARP 35 #define AF_BLUETOOTH 36 /* Bluetooth sockets */ #define AF_IEEE80211 37 /* IEEE 802.11 protocol */ +#define AF_MPLS 38 /* Multiprotocol Label Switching */ #define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */ #define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */ #define AF_MAX 42 @@ -386,6 +387,7 @@ struct sockproto { #define PF_IEEE80211 AF_IEEE80211 #define PF_INET_SDP AF_INET_SDP #define PF_INET6_SDP AF_INET6_SDP +#define PF_MPLS AF_MPLS #define PF_MAX AF_MAX