Index: sbin/ifconfig/Makefile =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sbin/ifconfig/Makefile,v retrieving revision 1.25 diff -u -r1.25 Makefile --- sbin/ifconfig/Makefile 23 Feb 2004 20:13:52 -0000 1.25 +++ sbin/ifconfig/Makefile 8 Aug 2004 10:12:47 -0000 @@ -17,6 +17,10 @@ SRCS+= ifieee80211.c CFLAGS+=-DUSE_IEEE80211 +#comment out to exclude SIOC[GS]VH support +SRCS+= ifcarp.c +CFLAGS+=-DUSE_CARP + #comment out to exclude MAC support SRCS+= ifmac.c CFLAGS+=-DUSE_MAC Index: sbin/ifconfig/ifcarp.c =================================================================== RCS file: sbin/ifconfig/ifcarp.c diff -N sbin/ifconfig/ifcarp.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sbin/ifconfig/ifcarp.c 8 Aug 2004 10:12:47 -0000 @@ -0,0 +1,169 @@ +/* $FreeBSD$ */ +/* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static const char *carp_states[] = { CARP_STATES }; + +void +carp_status(int s, struct rt_addrinfo *info __unused) +{ + const char *state; + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + return; + + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_state > CARP_MAXSTATE) + state = ""; + else + state = carp_states[carpr.carpr_state]; + + printf("\tcarp: %s vhid %d advbase %d advskew %d\n", + state, carpr.carpr_vhid, carpr.carpr_advbase, + carpr.carpr_advskew); + } + + return; + +} + +void +setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp) +{ + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + /* XXX Should hash the password into the key here, perhaps? */ + strlcpy(carpr.carpr_key, val, CARP_KEY_LEN); + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp) +{ + int vhid; + struct carpreq carpr; + + vhid = atoi(val); + + if (vhid <= 0) + errx(1, "vhid must be greater than 0"); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_vhid = vhid; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp) +{ + int advskew; + struct carpreq carpr; + + advskew = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advskew = advskew; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp) +{ + int advbase; + struct carpreq carpr; + + advbase = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advbase = advbase; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} Index: sbin/ifconfig/ifconfig.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.106 diff -u -r1.106 ifconfig.c --- sbin/ifconfig/ifconfig.c 9 Aug 2004 03:13:57 -0000 1.106 +++ sbin/ifconfig/ifconfig.c 10 Aug 2004 16:14:48 -0000 @@ -282,6 +282,12 @@ #ifdef USE_MAC { "maclabel", NEXTARG, setifmaclabel }, #endif +#ifdef USE_CARP + { "advbase", NEXTARG, setcarp_advbase }, + { "advskew", NEXTARG, setcarp_advskew }, + { "pass", NEXTARG, setcarp_passwd }, + { "vhid", NEXTARG, setcarp_vhid }, +#endif { "rxcsum", IFCAP_RXCSUM, setifcap }, { "-rxcsum", -IFCAP_RXCSUM, setifcap }, { "txcsum", IFCAP_TXCSUM, setifcap }, @@ -368,6 +374,9 @@ #ifdef USE_IEEE80211 { "ieee80211", AF_UNSPEC, ieee80211_status, NULL, NULL, }, /* XXX not real!! */ #endif +#ifdef USE_CARP + { "carp", AF_UNSPEC, carp_status, NULL, NULL, }, /* XXX not real!! */ +#endif #ifdef USE_MAC { "maclabel", AF_UNSPEC, maclabel_status, NULL, NULL, }, #endif @@ -1173,6 +1182,10 @@ if (allfamilies || afp->af_status == ieee80211_status) ieee80211_status(s, NULL); #endif +#ifdef USE_CARP + if (allfamilies || afp->af_status == carp_status) + carp_status(s, NULL); +#endif #ifdef USE_MAC if (allfamilies || afp->af_status == maclabel_status) maclabel_status(s, NULL); @@ -1189,6 +1202,9 @@ #ifdef USE_VLANS && afp->af_status != vlan_status #endif +#ifdef USE_CARP + && afp->af_status != carp_status +#endif ) warnx("%s has no %s interface address!", name, afp->af_name); Index: sbin/ifconfig/ifconfig.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sbin/ifconfig/ifconfig.h,v retrieving revision 1.12 diff -u -r1.12 ifconfig.h --- sbin/ifconfig/ifconfig.h 30 Mar 2004 22:59:22 -0000 1.12 +++ sbin/ifconfig/ifconfig.h 8 Aug 2004 10:12:47 -0000 @@ -70,3 +70,9 @@ extern void ieee80211_status(int s, struct rt_addrinfo *); extern void maclabel_status(int s, struct rt_addrinfo *); extern void setifmaclabel(const char *, int, int, const struct afswtch *rafp); + +extern void carp_status(int s, struct rt_addrinfo *); +extern void setcarp_advbase(const char *,int, int, const struct afswtch *rafp); +extern void setcarp_advskew(const char *, int, int, const struct afswtch *rafp); +extern void setcarp_passwd(const char *, int, int, const struct afswtch *rafp); +extern void setcarp_vhid(const char *, int, int, const struct afswtch *rafp); Index: sys/conf/files =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/conf/files,v retrieving revision 1.943 diff -u -r1.943 files --- sys/conf/files 17 Aug 2004 22:05:53 -0000 1.943 +++ sys/conf/files 23 Aug 2004 20:55:08 -0000 @@ -1471,6 +1471,8 @@ netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_gif.c optional gif inet +netinet/ip_carp.c optional carp +crypto/sha1.c optional carp netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_pcb.c optional inet Index: sys/conf/options =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/conf/options,v retrieving revision 1.475 diff -u -r1.475 options --- sys/conf/options 17 Aug 2004 22:05:53 -0000 1.475 +++ sys/conf/options 23 Aug 2004 20:55:09 -0000 @@ -604,6 +604,7 @@ DEV_BPF opt_bpf.h DEV_ISA opt_isa.h DEV_MCA opt_mca.h +DEV_CARP opt_carp.h DEV_SPLASH opt_splash.h EISA_SLOTS opt_eisa.h Index: sys/net/if.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/net/if.c,v retrieving revision 1.199 diff -u -r1.199 if.c --- sys/net/if.c 15 Aug 2004 06:24:42 -0000 1.199 +++ sys/net/if.c 23 Aug 2004 20:58:14 -0000 @@ -34,6 +34,7 @@ #include "opt_inet6.h" #include "opt_inet.h" #include "opt_mac.h" +#include "opt_carp.h" #include #include @@ -76,6 +77,9 @@ #ifdef INET #include #endif +#ifdef DEV_CARP +#include +#endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; @@ -524,6 +528,12 @@ int found; EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); +#ifdef DEV_CARP + /* XXX_CARP: hook to above?!? */ + if (ifp->if_carp) + carp_ifdetach(ifp); +#endif + /* * Remove routes and flush queues. */ Index: sys/net/if_ethersubr.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.177 diff -u -r1.177 if_ethersubr.c --- sys/net/if_ethersubr.c 27 Jul 2004 23:20:45 -0000 1.177 +++ sys/net/if_ethersubr.c 8 Aug 2004 10:11:11 -0000 @@ -37,6 +37,7 @@ #include "opt_bdg.h" #include "opt_mac.h" #include "opt_netgraph.h" +#include "opt_carp.h" #include #include @@ -73,6 +74,10 @@ #include #endif +#ifdef DEV_CARP +#include +#endif + #ifdef IPX #include #include @@ -315,6 +320,12 @@ } } +#ifdef DEV_CARP + if (ifp->if_carp && + (error = carp_output(ifp, m, dst, NULL))) + goto bad; +#endif + /* Handle ng_ether(4) processing, if any */ if (ng_ether_output_p != NULL) { if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { @@ -648,6 +659,19 @@ if (!(BDG_ACTIVE(ifp)) && !(ether_type == ETHERTYPE_VLAN && ifp->if_nvlans > 0)) { +#ifdef DEV_CARP +/* + * XXX: Okay, we need to call carp_forus() and - if it is for us - jump over + * code that does the normal check "ac_enaddr == ether_dhost". + * The check sequence is a bit different from OpenBSD, so we jump over + * as few code as possible, to catch _all_ sanity checks. This needs + * evaluation, to see if the carp ether_dhost values break any of these + * checks! + */ + if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost)) + goto pre_stats; +#endif + /* * Discard packet if upper layers shouldn't see it because it * was unicast to a different Ethernet address. If the driver @@ -670,6 +694,7 @@ } } +pre_stats: /* Discard packet if interface is not up */ if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); Index: sys/net/if_var.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/net/if_var.h,v retrieving revision 1.84 diff -u -r1.84 if_var.h --- sys/net/if_var.h 15 Aug 2004 06:24:42 -0000 1.84 +++ sys/net/if_var.h 23 Aug 2004 20:58:16 -0000 @@ -68,6 +68,7 @@ struct rt_addrinfo; struct socket; struct ether_header; +struct carp_if; #endif #include /* get TAILQ macros */ @@ -148,7 +149,7 @@ */ struct knlist if_klist; /* events attached to this if */ int if_pcount; /* number of promiscuous listeners */ - void *if_carp; /* carp (tbd) interface pointer */ + struct carp_if *if_carp; /* carp interface structure */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ Index: sys/netinet/if_ether.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/if_ether.c,v retrieving revision 1.128 diff -u -r1.128 if_ether.c --- sys/netinet/if_ether.c 13 Jun 2004 10:54:36 -0000 1.128 +++ sys/netinet/if_ether.c 8 Aug 2004 10:11:14 -0000 @@ -39,6 +39,7 @@ #include "opt_inet.h" #include "opt_bdg.h" #include "opt_mac.h" +#include "opt_carp.h" #include #include @@ -69,6 +70,10 @@ #include #include +#ifdef DEV_CARP +#include +#endif + #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) @@ -117,8 +122,8 @@ static void arp_init(void); static void arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static void arprequest(struct ifnet *, - struct in_addr *, struct in_addr *, u_char *); +static void arprequest(struct ifnet *, struct in_addr *, struct in_addr *, + u_char *); static void arpintr(struct mbuf *); static void arptfree(struct llinfo_arp *); static void arptimer(void *); @@ -528,6 +533,7 @@ struct sockaddr_dl *sdl; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; + u_int8_t *enaddr = NULL; int op, rif_len; int req_len; @@ -546,14 +552,23 @@ #else #define BRIDGE_TEST (0) /* cc will optimise the test away */ #endif + /* * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). + * If the interface does not match, but the recieving interface + * is part of carp, we call carp_iamatch to see if this is a + * request for the virtual host ip. + * XXX: This is really ugly! */ LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) - if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) && - itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) + if ((BRIDGE_TEST || (ia->ia_ifp == ifp) +#ifdef DEV_CARP + || (ifp->if_carp + && carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr)) +#endif + ) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) && @@ -568,14 +583,17 @@ ia = ifatoia(ifa); goto match; } + /* * If bridging, fall back to using any inet address. */ if (!BRIDGE_TEST || (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL) goto drop; match: + if (!enaddr) + enaddr = (u_int8_t *)IF_LLADDR(ifp); myaddr = ia->ia_addr.sin_addr; - if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) + if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { log(LOG_ERR, @@ -692,7 +710,7 @@ if (itaddr.s_addr == myaddr.s_addr) { /* I am the target */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { la = arplookup(itaddr.s_addr, 0, SIN_PROXY); if (la == NULL) { @@ -719,7 +737,7 @@ goto drop; } (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); rtfree(rt); /* @@ -861,6 +879,19 @@ ifa->ifa_flags |= RTF_CLONING; } +void +arp_ifinit2(ifp, ifa, enaddr) + struct ifnet *ifp; + struct ifaddr *ifa; + u_char *enaddr; +{ + if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) + arprequest(ifp, &IA_SIN(ifa)->sin_addr, + &IA_SIN(ifa)->sin_addr, enaddr); + ifa->ifa_rtrequest = arp_rtrequest; + ifa->ifa_flags |= RTF_CLONING; +} + static void arp_init(void) { Index: sys/netinet/if_ether.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/if_ether.h,v retrieving revision 1.30 diff -u -r1.30 if_ether.h --- sys/netinet/if_ether.h 7 Apr 2004 20:46:13 -0000 1.30 +++ sys/netinet/if_ether.h 8 Aug 2004 10:11:14 -0000 @@ -112,6 +112,7 @@ int arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, struct sockaddr *dst, u_char *desten); void arp_ifinit(struct ifnet *, struct ifaddr *); +void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *); #endif #endif Index: sys/netinet/in.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/in.c,v retrieving revision 1.77 diff -u -r1.77 in.c --- sys/netinet/in.c 16 Aug 2004 18:32:07 -0000 1.77 +++ sys/netinet/in.c 23 Aug 2004 20:58:27 -0000 @@ -1,4 +1,32 @@ /* + * Copyright (C) 2001 WIDE Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -55,6 +83,8 @@ static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); +static int in_addprefix(struct in_ifaddr *, int); +static int in_scrubprefix(struct in_ifaddr *); static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int); @@ -654,14 +684,7 @@ register struct ifnet *ifp; register struct in_ifaddr *ia; { - - if ((ia->ia_flags & IFA_ROUTE) == 0) - return; - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); - else - rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0); - ia->ia_flags &= ~IFA_ROUTE; + in_scrubprefix(ia); } /* @@ -743,26 +766,7 @@ return (0); flags |= RTF_HOST; } - - /*- - * Don't add host routes for interface addresses of - * 0.0.0.0 --> 0.255.255.255 netmask 255.0.0.0. This makes it - * possible to assign several such address pairs with consistent - * results (no host route) and is required by BOOTP. - * - * XXX: This is ugly ! There should be a way for the caller to - * say that they don't want a host route. - */ - if (ia->ia_addr.sin_addr.s_addr != INADDR_ANY || - ia->ia_netmask != IN_CLASSA_NET || - ia->ia_dstaddr.sin_addr.s_addr != htonl(IN_CLASSA_HOST)) { - if ((error = rtinit(&ia->ia_ifa, (int)RTM_ADD, flags)) != 0) { - ia->ia_addr = oldaddr; - return (error); - } - ia->ia_flags |= IFA_ROUTE; - } - + error = in_addprefix(ia, flags); /* * If the interface supports multicast, join the "all hosts" * multicast group on that interface. @@ -776,6 +780,120 @@ return (error); } +#define rtinitflags(x) \ + ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ + ? RTF_HOST : 0) +/* + * add a route to prefix ("connected route" in cisco terminology). + * does nothing if there's some interface address with the same prefix already. + */ +static int +in_addprefix(target, flags) + struct in_ifaddr *target; + int flags; +{ + struct in_ifaddr *ia; + struct in_addr prefix, mask, p; + int error; + + if ((flags & RTF_HOST) != 0) + prefix = target->ia_dstaddr.sin_addr; + else { + prefix = target->ia_addr.sin_addr; + mask = target->ia_sockmask.sin_addr; + prefix.s_addr &= mask.s_addr; + } + +/* for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) { */ + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + if (rtinitflags(ia)) + p = ia->ia_dstaddr.sin_addr; + else { + p = ia->ia_addr.sin_addr; + p.s_addr &= ia->ia_sockmask.sin_addr.s_addr; + } + + if (prefix.s_addr != p.s_addr) + continue; + + /* + * if we got a matching prefix route inserted by other + * interface adderss, we don't need to bother + */ + if (ia->ia_flags & IFA_ROUTE) + return 0; + } + + /* + * noone seem to have prefix route. insert it. + */ + error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); + if (!error) + target->ia_flags |= IFA_ROUTE; + return error; +} + +/* + * remove a route to prefix ("connected route" in cisco terminology). + * re-installs the route by using another interface address, if there's one + * with the same prefix (otherwise we lose the route mistakenly). + */ +static int +in_scrubprefix(target) + struct in_ifaddr *target; +{ + struct in_ifaddr *ia; + struct in_addr prefix, mask, p; + int error; + + if ((target->ia_flags & IFA_ROUTE) == 0) + return 0; + + if (rtinitflags(target)) + prefix = target->ia_dstaddr.sin_addr; + else { + prefix = target->ia_addr.sin_addr; + mask = target->ia_sockmask.sin_addr; + prefix.s_addr &= mask.s_addr; + } + +/* for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) { */ + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + if (rtinitflags(ia)) + p = ia->ia_dstaddr.sin_addr; + else { + p = ia->ia_addr.sin_addr; + p.s_addr &= ia->ia_sockmask.sin_addr.s_addr; + } + + if (prefix.s_addr != p.s_addr) + continue; + + /* + * if we got a matching prefix route, move IFA_ROUTE to him + */ + if ((ia->ia_flags & IFA_ROUTE) == 0) { + rtinit(&(target->ia_ifa), (int)RTM_DELETE, + rtinitflags(target)); + target->ia_flags &= ~IFA_ROUTE; + + error = rtinit(&ia->ia_ifa, (int)RTM_ADD, + rtinitflags(ia) | RTF_UP); + if (error == 0) + ia->ia_flags |= IFA_ROUTE; + return error; + } + } + + /* + * noone seem to have prefix route. remove it. + */ + rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); + target->ia_flags &= ~IFA_ROUTE; + return 0; +} + +#undef rtinitflags /* * Return 1 if the address might be a local broadcast address. Index: sys/netinet/in.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/in.h,v retrieving revision 1.87 diff -u -r1.87 in.h --- sys/netinet/in.h 16 Aug 2004 18:32:07 -0000 1.87 +++ sys/netinet/in.h 23 Aug 2004 20:58:27 -0000 @@ -230,6 +230,7 @@ #define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */ /* 101-254: Partly Unassigned */ #define IPPROTO_PIM 103 /* Protocol Independent Mcast */ +#define IPPROTO_CARP 112 /* CARP */ #define IPPROTO_PGM 113 /* PGM */ #define IPPROTO_PFSYNC 240 /* PFSYNC */ /* 255: Reserved */ @@ -351,6 +352,7 @@ #define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */ +#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */ #define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */ #define INADDR_ALLMDNS_GROUP (u_int32_t)0xe00000fb /* 224.0.0.251 */ #define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */ Index: sys/netinet/in_proto.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/in_proto.c,v retrieving revision 1.73 diff -u -r1.73 in_proto.c --- sys/netinet/in_proto.c 16 Aug 2004 18:32:07 -0000 1.73 +++ sys/netinet/in_proto.c 23 Aug 2004 20:58:28 -0000 @@ -36,6 +36,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_pf.h" +#include "opt_carp.h" #include #include @@ -92,6 +93,10 @@ #include #endif +#ifdef DEV_CARP +#include +#endif + extern struct domain inetdomain; static struct pr_usrreqs nousrreqs; @@ -239,7 +244,15 @@ &rip_usrreqs }, #endif /* DEV_PFSYNC */ - /* raw wildcard */ +#ifdef DEV_CARP +{ SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp_input, (pr_output_t*)rip_output, 0, rip_ctloutput, + 0, + 0, 0, 0, 0, + &rip_usrreqs +}, +#endif /* DEV_CARP */ +/* raw wildcard */ { SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR, rip_input, 0, 0, rip_ctloutput, 0, @@ -286,3 +299,6 @@ #ifdef PIM SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); #endif +#ifdef DEV_CARP +SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); +#endif Index: sys/netinet/ip_carp.c =================================================================== RCS file: sys/netinet/ip_carp.c diff -N sys/netinet/ip_carp.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_carp.c 14 Aug 2004 23:01:11 -0000 @@ -0,0 +1,2024 @@ +/* $FreeBSD$ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_carp.h" +#include "opt_bpf.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef INET +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifdef INET6 +#include +#include +#include +#include +#include +#endif + +#include +#include + +#define CARP_IFNAME "carp" +static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); +SYSCTL_DECL(_net_inet_carp); + +struct carp_softc { + struct arpcom sc_ac; /* Interface clue */ + int if_flags; /* UP/DOWN */ + struct ifnet *sc_ifp; /* Parent */ + struct in_ifaddr *sc_ia; /* primary iface address */ + struct ip_moptions sc_imo; +#ifdef INET6 + struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ + struct ip6_moptions sc_im6o; +#endif /* INET6 */ + TAILQ_ENTRY(carp_softc) sc_list; + + enum { INIT = 0, BACKUP, MASTER } sc_state; + + int sc_flags_backup; + int sc_suppress; + + int sc_sendad_errors; +#define CARP_SENDAD_MAX_ERRORS 3 + int sc_sendad_success; +#define CARP_SENDAD_MIN_SUCCESS 3 + + int sc_vhid; + int sc_advskew; + int sc_naddrs; + int sc_naddrs6; + int sc_advbase; /* seconds */ + int sc_init_counter; + u_int64_t sc_counter; + + /* authentication */ +#define CARP_HMAC_PAD 64 + unsigned char sc_key[CARP_KEY_LEN]; + unsigned char sc_pad[CARP_HMAC_PAD]; + SHA1_CTX sc_sha1; + + struct callout sc_ad_tmo; /* advertisement timeout */ + struct callout sc_md_tmo; /* master down timeout */ + struct callout sc_md6_tmo; /* master down timeout */ + + LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ +}; +#define sc_if sc_ac.ac_if + +int carp_number; +int carp_suppress_preempt = 0; +int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ +SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, + &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, + &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); +SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, + &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, + &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); + +struct carpstats carpstats; +SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, + &carpstats, carpstats, + "CARP statistics (struct carpstats, netinet/ip_carp.h)"); + +struct carp_if { + TAILQ_HEAD(, carp_softc) vhif_vrs; + int vhif_nvrs; + + struct ifnet *vhif_ifp; + struct mtx vhif_mtx; +}; +/* lock per carp_if queue */ +#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp", \ + NULL, MTX_DEF) +#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif->vhif_mtx)) +#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) +#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) +#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) + +#define CARP_LOG(s,a) if (carp_opts[CARPCTL_LOG]) \ + log(LOG_INFO, "carp: " s "\n", (a)); +#define CARP_LOG1(sc,s,a) if (carp_opts[CARPCTL_LOG]) \ + log(LOG_INFO, "%s: " s "\n", (sc)->sc_if.if_xname, (a)); +#define CARP_LOG2(s) if (carp_opts[CARPCTL_LOG]) \ + log(LOG_INFO, "carp: " s "\n"); + +void carp_hmac_prepare(struct carp_softc *); +void carp_hmac_generate(struct carp_softc *, u_int32_t *, + unsigned char *); +int carp_hmac_verify(struct carp_softc *, u_int32_t *, + unsigned char *); +void carp_setroute(struct carp_softc *, int); +void carp_input_c(struct mbuf *, struct carp_softc *, + struct carp_header *, sa_family_t); +int carp_clone_create(struct if_clone *, int); +void carp_clone_destroy(struct ifnet *); +void carpdetach(struct carp_softc *); +int carp_prepare_ad(struct mbuf *, struct carp_softc *, + struct carp_header *); +void carp_send_ad_all(void); +void carp_send_ad(void *); +void carp_send_arp(struct carp_softc *); +void carp_master_down(void *); +int carp_ioctl(struct ifnet *, u_long, caddr_t); +int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +void carp_start(struct ifnet *); +void carp_setrun(struct carp_softc *, sa_family_t); +void carp_set_state(struct carp_softc *, int); +int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); +enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; + +int carp_set_addr(struct carp_softc *, struct sockaddr_in *); +int carp_del_addr(struct carp_softc *, struct sockaddr_in *); +#ifdef INET6 +void carp_send_na(struct carp_softc *); +int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); +int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); +#endif + +static LIST_HEAD(, carp_softc) carpif_list; +IFC_SIMPLE_DECLARE(carp, 1); + +static __inline u_int16_t +carp_cksum(struct mbuf *m, int len) +{ + return (in_cksum(m, len)); +} + +void +carp_hmac_prepare(struct carp_softc *sc) +{ + u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + u_int8_t vhid = sc->sc_vhid & 0xff; + struct ifaddr *ifa; + int i; +#ifdef INET6 + struct in6_addr in6; +#endif + + /* compute ipad from key */ + bzero(sc->sc_pad, sizeof(sc->sc_pad)); + bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36; + + /* precompute first part of inner hash */ + SHA1Init(&sc->sc_sha1); + SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); + SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); + SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); +#ifdef INET + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET) + SHA1Update(&sc->sc_sha1, + (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, + sizeof(struct in_addr)); + } +#endif /* INET */ +#ifdef INET6 + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET6) { + in6 = ifatoia6(ifa)->ia_addr.sin6_addr; + if (IN6_IS_ADDR_LINKLOCAL(&in6)) + in6.s6_addr16[1] = 0; + SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); + } + } +#endif /* INET6 */ + + /* convert ipad to opad */ + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36 ^ 0x5c; +} + +void +carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + SHA1_CTX sha1ctx; + + /* fetch first half of inner hash */ + bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); + + SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); + SHA1Final(md, &sha1ctx); + + /* outer hash */ + SHA1Init(&sha1ctx); + SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sha1ctx, md, 20); + SHA1Final(md, &sha1ctx); +} + +int +carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + unsigned char md2[20]; + + carp_hmac_generate(sc, counter, md2); + + return (bcmp(md, md2, sizeof(md2))); +} + +void +carp_setroute(struct carp_softc *sc, int cmd) +{ + struct ifaddr *ifa; + int s; + + s = splnet(); + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && sc->sc_ifp != NULL) { + int count = carp_addrcount( + (struct carp_if *)sc->sc_ifp->if_carp, + ifatoia(ifa), CARP_COUNT_MASTER); + + if ((cmd == RTM_ADD && count == 1) || + (cmd == RTM_DELETE && count == 0)) + rtinit(ifa, cmd, RTF_UP | RTF_HOST); + } +#ifdef INET6 +/* + * XXX: LATER + * + * if (ifa->ifa_addr->sa_family == AF_INET6) { + * if (cmd == RTM_ADD) + * in6_ifaddloop(ifa); + * else + * in6_ifremloop(ifa); + * } + */ +#endif /* INET6 */ + } + splx(s); +} + +int +carp_clone_create(struct if_clone *ifc, int unit) +{ + + struct carp_softc *sc; + struct ifnet *ifp; + + MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + + carp_number = unit; + + sc->sc_flags_backup = 0; + sc->sc_suppress = 0; + sc->sc_advbase = CARP_DFLTINTV; + sc->sc_vhid = -1; /* required setting */ + sc->sc_advskew = 0; + sc->sc_init_counter = 1; + sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ +#ifdef INET6 + sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; +#endif + + callout_init(&sc->sc_ad_tmo, 0); + callout_init(&sc->sc_md_tmo, 0); + callout_init(&sc->sc_md6_tmo, 0); + + ifp = &sc->sc_if; + ifp->if_softc = sc; + if_initname(ifp, CARP_IFNAME, unit); + ifp->if_mtu = ETHERMTU; + ifp->if_flags = 0; + ifp->if_ioctl = carp_ioctl; + ifp->if_output = carp_looutput; + ifp->if_start = carp_start; + ifp->if_type = IFT_PROPVIRTUAL; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = 0; + if_attach(ifp); + LIST_INSERT_HEAD(&carpif_list, sc, sc_next); + bpfattach(&sc->sc_if, DLT_LOOP, sizeof(u_int32_t)); + return (0); +} + +void +carp_clone_destroy(struct ifnet *ifp) +{ + struct carp_softc *sc = ifp->if_softc; + struct carp_if *cif; + struct ip_moptions *imo = &sc->sc_imo; +#ifdef INET6 + struct ip6_moptions *im6o = &sc->sc_im6o; +#endif + +/* carpdetach(sc); */ + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + if (imo->imo_num_memberships) { + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } +#ifdef INET6 + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; +#endif + + /* Remove ourself from parents if_carp queue */ + if (sc->sc_ifp && (cif = sc->sc_ifp->if_carp)) { + CARP_LOCK(cif); + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + sc->sc_ifp->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_CARP); + } else { + CARP_UNLOCK(cif); + } + } + + bpfdetach(ifp); + if_detach(ifp); + LIST_REMOVE(sc, sc_next); + free(sc, M_CARP); +} + +/* + * process input packet. + * we have rearranged checks order compared to the rfc, + * but it seems more efficient this way or not possible otherwise. + */ +void +carp_input(struct mbuf *m, int hlen) +{ + struct carp_softc *sc = NULL; + struct ip *ip = mtod(m, struct ip *); + struct carp_header *ch; + int iplen, len; + + carpstats.carps_ipackets++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return; + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG("packet received on non-carp interface: %s", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ip->ip_ttl != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG("received ttl %d != CARP_DFLTTL", ip->ip_ttl); + m_freem(m); + return; + } + + iplen = ip->ip_hl << 2; + + if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + carpstats.carps_badlen++; + CARP_LOG("received len %d < sizeof(struct carp_header)", + m->m_len - sizeof(struct ip)); + m_freem(m); + return; + } + + if (iplen + sizeof(*ch) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + carpstats.carps_hdrops++; + /* CARP_LOG ? */ + return; + } + ip = mtod(m, struct ip *); + } + ch = (struct carp_header *)((char *)ip + iplen); + + /* + * verify that the received packet length is + * equal to the CARP header + */ + len = iplen + sizeof(*ch); + if (len > m->m_pkthdr.len) { + carpstats.carps_badlen++; + CARP_LOG("packet too short %d", m->m_pkthdr.len); + m_freem(m); + return; + } + + if ((m = m_pullup(m, len)) == NULL) { + carpstats.carps_hdrops++; + return; + } + ip = mtod(m, struct ip *); + ch = (struct carp_header *)((char *)ip + iplen); + + /* verify the CARP checksum */ + m->m_data += iplen; + if (carp_cksum(m, len - iplen)) { + carpstats.carps_badsum++; + CARP_LOG2("checksum failed"); + m_freem(m); + return; + } + m->m_data -= iplen; + + carp_input_c(m, sc, ch, AF_INET); +} + +#ifdef INET6 +int +carp6_input(struct mbuf **mp, int *offp, int proto) +{ + struct carp_softc *sc = NULL; + struct mbuf *m = *mp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct carp_header *ch; + u_int len; + + carpstats.carps_ipackets6++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return (IPPROTO_DONE); + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG("packet received on non-carp interface: %s", + m->m_pkthdr.rcvif->if_xname); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that the IP TTL is 255 */ + if (ip6->ip6_hlim != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG("received ttl %d != 255", ip6->ip6_hlim); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that we have a complete carp packet */ + len = m->m_len; + IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); + if (ch == NULL) { + carpstats.carps_badlen++; + CARP_LOG("packet size %d too small", len); + return (IPPROTO_DONE); + } + + + /* verify the CARP checksum */ + m->m_data += *offp; + if (carp_cksum(m, sizeof(*ch))) { + carpstats.carps_badsum++; + CARP_LOG2("checksum failed"); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + + carp_input_c(m, sc, ch, AF_INET6); + return (IPPROTO_DONE); +} +#endif /* INET6 */ + +void +carp_input_c(struct mbuf *m, struct carp_softc *sc, + struct carp_header *ch, sa_family_t af) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + u_int64_t tmp_counter; + struct timeval sc_tv, ch_tv; + + /* verify that the VHID is valid on the receiving interface */ + CARP_LOCK(ifp->if_carp); + TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) + if (sc->sc_vhid == ch->carp_vhid) + break; + CARP_UNLOCK(ifp->if_carp); + if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != + (IFF_UP|IFF_RUNNING)) { + carpstats.carps_badvhid++; + m_freem(m); + return; + } + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_if.if_ipackets++; + sc->sc_if.if_ibytes += m->m_pkthdr.len; + + if (sc->sc_if.if_bpf) { + /* + * We need to prepend the address family as + * a four byte field. Cons up a dummy header + * to pacify bpf. This is safe because bpf + * will only read from the mbuf (i.e., it won't + * try to free it or keep a pointer to it). + */ + struct mbuf m0; + u_int32_t maf = htonl(af); + + m0.m_next = m; + m0.m_len = sizeof(maf); + m0.m_data = (char *)&maf; + BPF_MTAP(&sc->sc_if, &m0); + } + + /* verify the CARP version. */ + if (ch->carp_version != CARP_VERSION) { + carpstats.carps_badver++; + sc->sc_if.if_ierrors++; + CARP_LOG1(sc, "invalid version %d", ch->carp_version); + m_freem(m); + return; + } + + /* verify the hash */ + if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { + carpstats.carps_badauth++; + sc->sc_if.if_ierrors++; + CARP_LOG2("incorrect hash"); + m_freem(m); + return; + } + + tmp_counter = ntohl(ch->carp_counter[0]); + tmp_counter = tmp_counter<<32; + tmp_counter += ntohl(ch->carp_counter[1]); + + /* XXX Replay protection goes here */ + + sc->sc_init_counter = 0; + sc->sc_counter = tmp_counter; + + sc_tv.tv_sec = sc->sc_advbase; + if (carp_suppress_preempt && sc->sc_advskew < 240) + sc_tv.tv_usec = 240 * 1000000 / 256; + else + sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; + ch_tv.tv_sec = ch->carp_advbase; + ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + */ + if (timevalcmp(&sc_tv, &ch_tv, >) || + timevalcmp(&sc_tv, &ch_tv, ==)) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (carp_opts[CARPCTL_PREEMPT] && + timevalcmp(&sc_tv, &ch_tv, <)) { + carp_master_down(sc); + break; + } + + /* + * If the master is going to advertise at such a low frequency + * that he's guaranteed to time out, we'd might as well just + * treat him as timed out now. + */ + sc_tv.tv_sec = sc->sc_advbase * 3; + if (timevalcmp(&sc_tv, &ch_tv, <)) { + carp_master_down(sc); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + + m_freem(m); + return; +} + +void +carpdetach(struct carp_softc *sc) +{ + struct ifaddr *ifa; + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL) + if (ifa->ifa_addr->sa_family == AF_INET) { + struct in_ifaddr *ia = ifatoia(ifa); + + carp_del_addr(sc, &ia->ia_addr); + + /* ripped screaming from in_control(SIOCDIFADDR) */ + in_ifscrub(&sc->sc_if, ia); + TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link); + TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); + IFAFREE((&ia->ia_ifa)); + } +} + +/* Detach an interface from the carp. */ +void +carp_ifdetach(struct ifnet *ifp) +{ + struct carp_softc *sc; + struct carp_if *cif = (struct carp_if *)ifp->if_carp; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) + carpdetach(sc); + CARP_UNLOCK(cif); +} + +int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + struct m_tag *mtag; + + if (sc->sc_init_counter) { + /* this could also be seconds since unix epoch */ + sc->sc_counter = arc4random(); + sc->sc_counter = sc->sc_counter << 32; + sc->sc_counter += arc4random(); + } else + sc->sc_counter++; + + ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); + ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); + + carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); + + /* Tag packet for carp_output */ + mtag = m_tag_get(PACKET_TAG_CARP, + sizeof(struct carp_softc *), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + sc->sc_if.if_oerrors++; + return (ENOMEM); + } + bcopy(&sc, (caddr_t)(mtag + 1), sizeof(struct carp_softc *)); + m_tag_prepend(m, mtag); + + return (0); +} + +void +carp_send_ad_all(void) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct carp_softc *vh; + + TAILQ_FOREACH(ifp, &ifnet, if_list) { + if (ifp->if_carp == NULL) + continue; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) && + vh->sc_state == MASTER) + carp_send_ad(vh); + } + CARP_UNLOCK(cif); + } +} + +void +carp_send_ad(void *v) +{ + struct carp_header ch; + struct timeval tv; + struct carp_softc *sc = v; + struct carp_header *ch_ptr; + struct mbuf *m; + int len, advbase, advskew; + + /* bow out if we've lost our UPness or RUNNINGuiness */ + if ((sc->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { + advbase = 255; + advskew = 255; + } else { + advbase = sc->sc_advbase; + if (!carp_suppress_preempt || sc->sc_advskew > 240) + advskew = sc->sc_advskew; + else + advskew = 240; + tv.tv_sec = advbase; + tv.tv_usec = advskew * 1000000 / 256; + } + + ch.carp_version = CARP_VERSION; + ch.carp_type = CARP_ADVERTISEMENT; + ch.carp_vhid = sc->sc_vhid; + ch.carp_advbase = advbase; + ch.carp_advskew = advskew; + ch.carp_authlen = 7; /* XXX DEFINE */ + ch.carp_pad1 = 0; /* must be zero */ + ch.carp_cksum = 0; + +#ifdef INET + if (sc->sc_ia) { + struct ip *ip; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + sc->sc_ac.ac_if.if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; +/* XXX: FreeBSD expects host byte order for ip_output() */ + ip->ip_len = len; + ip->ip_id = ip_newid(); + ip->ip_off = IP_DF; + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + ch_ptr = (struct carp_header *)(&ip[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); + m->m_data -= sizeof(*ip); + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_ac.ac_if.if_opackets++; + sc->sc_ac.ac_if.if_obytes += len; + carpstats.carps_opackets++; + + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { + sc->sc_if.if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET */ +#ifdef INET6 + if (sc->sc_ia6) { + struct ip6_hdr *ip6; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + sc->sc_ac.ac_if.if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip6) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, + sizeof(struct in6_addr)); + /* set the multicast destination */ + + ip6->ip6_dst.s6_addr8[0] = 0xff; + ip6->ip6_dst.s6_addr8[1] = 0x02; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + ch_ptr = (struct carp_header *)(&ip6[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip6); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); + m->m_data -= sizeof(*ip6); + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += len; + carpstats.carps_opackets6++; + + if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { + sc->sc_if.if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET6 */ + + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + +} + +/* + * Broadcast a gratuitous ARP request containing + * the virtual router MAC address for each IP address + * associated with the virtual router. + */ +void +carp_send_arp(struct carp_softc *sc) +{ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + +/* arprequest(sc->sc_ifp, &in, &in, sc->sc_ac.ac_enaddr); */ + arp_ifinit2(sc->sc_ifp, ifa, sc->sc_ac.ac_enaddr); + + DELAY(1000); /* XXX */ + } +} + +#ifdef INET6 +void +carp_send_na(struct carp_softc *sc) +{ + struct ifaddr *ifa; + struct in6_addr *in6; + static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; + + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; + nd6_na_output(sc->sc_ifp, &mcast, in6, + ND_NA_FLAG_OVERRIDE, 1, NULL); + DELAY(1000); /* XXX */ + } +} +#endif /* INET6 */ + +int +carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) +{ + struct carp_softc *vh; + struct ifaddr *ifa; + int count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((type == CARP_COUNT_RUNNING && + (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING)) || + (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { + TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) + count++; + } + } + } + return (count); +} + +int +carp_iamatch(void *v, struct in_ifaddr *ia, + struct in_addr *isaddr, u_int8_t **enaddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + int index, count = 0; + struct ifaddr *ifa; + + CARP_LOCK(cif); + + if (carp_opts[CARPCTL_ARPBALANCE]) { + /* + * XXX proof of concept implementation. + * We use the source ip to decide which virtual host should + * handle the request. If we're master of that virtual host, + * then we respond, otherwise, just drop the arp packet on + * the floor. + */ + count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); + if (count == 0) { + /* should never reach this */ + CARP_UNLOCK(cif); + return (0); + } + + /* this should be a hash, like pf_hash() */ + index = isaddr->s_addr % count; + count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING)) { + TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == + AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) { + if (count == index) { + if (vh->sc_state == + MASTER) { + *enaddr = vh->sc_ac.ac_enaddr; + CARP_UNLOCK(cif); + return (1); + } else { + CARP_UNLOCK(cif); + return (0); + } + } + count++; + } + } + } + } + } else { + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING) && ia->ia_ifp == + &vh->sc_if) { + *enaddr = vh->sc_ac.ac_enaddr; + CARP_UNLOCK(cif); + return (1); + } + } + } + CARP_UNLOCK(cif); + return (0); +} + +#ifdef INET6 +struct ifaddr * +carp_iamatch6(void *v, struct in6_addr *taddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + ((vh->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { + CARP_UNLOCK(cif); + return (ifa); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} + +void * +carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) +{ + struct m_tag *mtag; + struct carp_if *cif = v; + struct carp_softc *sc; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + ((sc->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { + mtag = m_tag_get(PACKET_TAG_CARP, + sizeof(struct carp_softc *), M_NOWAIT); + if (mtag == NULL) { + /* better a bit than nothing */ + CARP_UNLOCK(cif); + return (sc->sc_ac.ac_enaddr); + } + bcopy(&sc, (caddr_t)(mtag + 1), + sizeof(struct carp_softc *)); + m_tag_prepend(m, mtag); + + CARP_UNLOCK(cif); + return (sc->sc_ac.ac_enaddr); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} +#endif + +struct ifnet * +carp_forus(void *v, void *dhost) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + u_int8_t *ena = dhost; + + if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) + return (NULL); + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && + !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) { + CARP_UNLOCK(cif); + return (&vh->sc_if); + } + + CARP_UNLOCK(cif); + return (NULL); +} + +void +carp_master_down(void *v) +{ + struct carp_softc *sc = v; + + switch (sc->sc_state) { + case INIT: + printf("%s: master_down event in INIT state\n", + sc->sc_if.if_xname); + break; + case MASTER: + break; + case BACKUP: + carp_set_state(sc, MASTER); + carp_send_ad(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + carp_setrun(sc, 0); + carp_setroute(sc, RTM_ADD); + break; + } +} + +/* + * When in backup state, af indicates whether to reset the master down timer + * for v4 or v6. If it's set to zero, reset the ones which are already pending. + */ +void +carp_setrun(struct carp_softc *sc, sa_family_t af) +{ + struct timeval tv; + + if (sc->sc_if.if_flags & IFF_UP && + sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) + sc->sc_if.if_flags |= IFF_RUNNING; + else { + sc->sc_if.if_flags &= ~IFF_RUNNING; + carp_setroute(sc, RTM_DELETE); + return; + } + + switch (sc->sc_state) { + case INIT: + if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { + carp_send_ad(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + carp_set_state(sc, MASTER); + carp_setroute(sc, RTM_ADD); + } else { + carp_set_state(sc, BACKUP); + carp_setroute(sc, RTM_DELETE); + carp_setrun(sc, 0); + } + break; + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + switch (af) { +#ifdef INET + case AF_INET: + callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + carp_master_down, sc); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + carp_master_down, sc); + break; +#endif /* INET6 */ + default: + if (sc->sc_naddrs) + callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + carp_master_down, sc); + if (sc->sc_naddrs6) + callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + carp_master_down, sc); + break; + } + break; + case MASTER: + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + break; + } +} + +int +carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in_ifaddr *ia, *ia_if; + struct ip_moptions *imo = &sc->sc_imo; + struct in_addr addr; + u_long iaddr = htonl(sin->sin_addr.s_addr); + int own, error; + + if (sin->sin_addr.s_addr == 0) { + if (!(sc->sc_if.if_flags & IFF_UP)) + carp_set_state(sc, INIT); + if (sc->sc_naddrs) + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + return (0); + } + + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != &sc->sc_if && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { + if (!ia_if) + ia_if = ia; + if (sin->sin_addr.s_addr == + ia->ia_addr.sin_addr.s_addr) + own++; + } + } + + if (!ia_if) + return (EADDRNOTAVAIL); + + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (imo->imo_num_memberships == 0) { + addr.s_addr = htonl(INADDR_CARP_GROUP); + if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) + return (ENOBUFS); + imo->imo_num_memberships++; + imo->imo_multicast_ifp = ifp; + imo->imo_multicast_ttl = CARP_DFLTTL; + imo->imo_multicast_loop = 0; + } + + if (!ifp->if_carp) { + + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia = ia; + sc->sc_ifp = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + + /* XXX: cif should not change, right? So we still hold the lock */ + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + CARP_UNLOCK(cif); + + sc->sc_naddrs++; + sc->sc_if.if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + + return (0); + +cleanup: + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + return (error); +} + +int +carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + int error = 0; + + if (!--sc->sc_naddrs) { + struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp; + struct ip_moptions *imo = &sc->sc_imo; + + callout_stop(&sc->sc_ad_tmo); + sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + sc->sc_vhid = -1; + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + CARP_LOCK(cif); + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + sc->sc_ifp->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_IFADDR); + } else { + CARP_UNLOCK(cif); + } + } + + return (error); +} + +#ifdef INET6 +int +carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in6_ifaddr *ia, *ia_if; + struct ip6_moptions *im6o = &sc->sc_im6o; + struct in6_multi_mship *imm; + struct sockaddr_in6 addr; + int own, error; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(sc->sc_if.if_flags & IFF_UP)) + carp_set_state(sc, INIT); + if (sc->sc_naddrs6) + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + return (0); + } + + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + int i; + + for (i = 0; i < 4; i++) { + if ((sin6->sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != + (ia->ia_addr.sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i])) + break; + } + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != &sc->sc_ac.ac_if && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (i == 4)) { + if (!ia_if) + ia_if = ia; + if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, + &ia->ia_addr.sin6_addr)) + own++; + } + } + + if (!ia_if) + return (EADDRNOTAVAIL); + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (!sc->sc_naddrs6) { + im6o->im6o_multicast_ifp = ifp; + + /* join CARP multicast address */ + bzero(&addr, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_len = sizeof(addr); + addr.sin6_addr.s6_addr16[0] = htons(0xff02); + addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + addr.sin6_addr.s6_addr8[15] = 0x12; + if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + + /* join solicited multicast address */ + bzero(&addr.sin6_addr, sizeof(addr.sin6_addr)); + addr.sin6_addr.s6_addr16[0] = htons(0xff02); + addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + addr.sin6_addr.s6_addr32[1] = 0; + addr.sin6_addr.s6_addr32[2] = htonl(1); + addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; + addr.sin6_addr.s6_addr8[12] = 0xff; + if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + } + + if (!ifp->if_carp) { + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia6 = ia; + sc->sc_ifp = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + CARP_UNLOCK(cif); + sc->sc_naddrs6++; + sc->sc_ac.ac_if.if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + + return (0); + +cleanup: + /* clean up multicast memberships */ + if (!sc->sc_naddrs6) { + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + imm = LIST_FIRST(&im6o->im6o_memberships); + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + } + return (error); +} + +int +carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + int error = 0; + + if (!--sc->sc_naddrs6) { + struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp; + struct ip6_moptions *im6o = &sc->sc_im6o; + + callout_stop(&sc->sc_ad_tmo); + sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + sc->sc_vhid = -1; + CARP_LOCK(cif); + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + CARP_LOCK_DESTROY(cif); + sc->sc_ifp->if_carp = NULL; + FREE(cif, M_IFADDR); + } else + CARP_UNLOCK(cif); + } + + return (error); +} +#endif /* INET6 */ + +int +carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) +{ + struct carp_softc *sc = ifp->if_softc, *vr; + struct carpreq carpr; + struct ifaddr *ifa; + struct ifreq *ifr; + struct ifaliasreq *ifra; + int error = 0; + + ifa = (struct ifaddr *)addr; + ifra = (struct ifaliasreq *)addr; + ifr = (struct ifreq *)addr; + + switch (cmd) { + case SIOCSIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + sc->sc_if.if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(ifa->ifa_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + sc->sc_if.if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCAIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + sc->sc_if.if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + sc->sc_if.if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCDIFADDR: + sc->if_flags &= ~IFF_UP; + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCSIFFLAGS: + if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { + sc->if_flags &= ~IFF_UP; + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + if (sc->sc_state == MASTER) + carp_send_ad(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + } + break; + + case SIOCSVH: + if ((error = suser(curthread)) != 0) + break; + if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + break; + error = 1; + if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { + switch (carpr.carpr_state) { + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + break; + case MASTER: + carp_master_down(sc); + break; + default: + break; + } + } + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_vhid > 255) { + error = EINVAL; + break; + } + if (sc->sc_ifp) { + struct carp_if *cif; + cif = (struct carp_if *)sc->sc_ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && + vr->sc_vhid == carpr.carpr_vhid) { + CARP_UNLOCK(cif); + return EINVAL; + } + CARP_UNLOCK(cif); + } + sc->sc_vhid = carpr.carpr_vhid; + sc->sc_ac.ac_enaddr[0] = 0; + sc->sc_ac.ac_enaddr[1] = 0; + sc->sc_ac.ac_enaddr[2] = 0x5e; + sc->sc_ac.ac_enaddr[3] = 0; + sc->sc_ac.ac_enaddr[4] = 1; + sc->sc_ac.ac_enaddr[5] = sc->sc_vhid; + error--; + } + if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { + if (carpr.carpr_advskew >= 255) { + error = EINVAL; + break; + } + if (carpr.carpr_advbase > 255) { + error = EINVAL; + break; + } + sc->sc_advbase = carpr.carpr_advbase; + sc->sc_advskew = carpr.carpr_advskew; + error--; + } + bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); + if (error > 0) + error = EINVAL; + else { + error = 0; + carp_setrun(sc, 0); + } + break; + + case SIOCGVH: + bzero(&carpr, sizeof(carpr)); + carpr.carpr_state = sc->sc_state; + carpr.carpr_vhid = sc->sc_vhid; + carpr.carpr_advbase = sc->sc_advbase; + carpr.carpr_advskew = sc->sc_advskew; + if (suser(curthread) == 0) + bcopy(sc->sc_key, carpr.carpr_key, + sizeof(carpr.carpr_key)); + error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); + break; + + default: + error = EINVAL; + } + + carp_hmac_prepare(sc); + return (error); +} + +/* + * XXX: this is looutput. We should eventually use it from there. + */ +int +carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + M_ASSERTPKTHDR(m); /* check if we have the packet header */ + + if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + m_freem(m); + return (rt->rt_flags & RTF_BLACKHOLE ? 0 : + rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + } + + ifp->if_opackets++; + ifp->if_obytes += m->m_pkthdr.len; +#if 1 /* XXX */ + switch (dst->sa_family) { + case AF_INET: + case AF_INET6: + case AF_IPX: + case AF_APPLETALK: + break; + default: + printf("looutput: af=%d unexpected\n", dst->sa_family); + m_freem(m); + return (EAFNOSUPPORT); + } +#endif + return(if_simloop(ifp, m, dst->sa_family, 0)); +} + +/* + * Start output on carp interface. This function should never be called. + */ +void +carp_start(struct ifnet *ifp) +{ +#ifdef DEBUG + printf("%s: start called\n", ifp->if_xname); +#endif +} + +int +carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + struct m_tag *mtag; + struct carp_softc *sc; + + if (!sa) + return (0); + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + default: + return (0); + } + + mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); + if (mtag == NULL) + return (0); + + bcopy(mtag + 1, &sc, sizeof(struct carp_softc *)); + + /* Set the source MAC address to Virtual Router MAC Address */ + switch (ifp->if_type) { + case IFT_ETHER: { + struct ether_header *eh; + + eh = mtod(m, struct ether_header *); + eh->ether_shost[0] = 0; + eh->ether_shost[1] = 0; + eh->ether_shost[2] = 0x5e; + eh->ether_shost[3] = 0; + eh->ether_shost[4] = 1; + eh->ether_shost[5] = sc->sc_vhid; + } + break; + case IFT_FDDI: { + struct fddi_header *fh; + + fh = mtod(m, struct fddi_header *); + fh->fddi_shost[0] = 0; + fh->fddi_shost[1] = 0; + fh->fddi_shost[2] = 0x5e; + fh->fddi_shost[3] = 0; + fh->fddi_shost[4] = 1; + fh->fddi_shost[5] = sc->sc_vhid; + } + break; + case IFT_ISO88025: { + struct iso88025_header *th; + th = mtod(m, struct iso88025_header *); + th->iso88025_shost[0] = 3; + th->iso88025_shost[1] = 0; + th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); + th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); + th->iso88025_shost[4] = 0; + th->iso88025_shost[5] = 0; + } + break; + default: + printf("%s: carp is not supported for this interface type\n", + ifp->if_xname); + return (EOPNOTSUPP); + } + + return (0); +} + +void +carp_set_state(struct carp_softc *sc, int state) +{ + if (sc->sc_state == state) + return; + + sc->sc_state = state; + switch (state) { + case BACKUP: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN; + break; + case MASTER: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP; + break; + default: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN; + break; + } + rt_ifmsg(&sc->sc_ac.ac_if); +} + +void +carp_carpdev_state(void *v) +{ + struct carp_if *cif = v; + struct carp_softc *sc; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { + if (sc->sc_ifp->if_link_state == LINK_STATE_DOWN || + !(sc->sc_ifp->if_flags & IFF_UP)) { + sc->sc_flags_backup = sc->sc_if.if_flags; + sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (!sc->sc_suppress) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_suppress = 1; + } else { + sc->sc_if.if_flags |= sc->sc_flags_backup; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (sc->sc_suppress) + carp_suppress_preempt--; + sc->sc_suppress = 0; + } + } + CARP_UNLOCK(cif); +} + +static int +carp_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + LIST_INIT(&carpif_list); + if_clone_attach(&carp_cloner); + printf("carp: attached\n"); + break; + + case MOD_UNLOAD: + if_clone_detach(&carp_cloner); + while (!LIST_EMPTY(&carpif_list)) + carp_clone_destroy( + &LIST_FIRST(&carpif_list)->sc_if); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + +static moduledata_t carp_mod = { + "carp", + carp_modevent, + 0 +}; + +DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: sys/netinet/ip_carp.h =================================================================== RCS file: sys/netinet/ip_carp.h diff -N sys/netinet/ip_carp.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_carp.h 13 Aug 2004 13:50:13 -0000 @@ -0,0 +1,162 @@ +/* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IP_CARP_H +#define _IP_CARP_H + +/* + * The CARP header layout is as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | VirtualHostID | AdvSkew | Auth Len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | AdvBase | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (3) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (4) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (5) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct carp_header { +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t carp_type:4, + carp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int8_t carp_version:4, + carp_type:4; +#endif + u_int8_t carp_vhid; /* virtual host id */ + u_int8_t carp_advskew; /* advertisement skew */ + u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */ + u_int8_t carp_pad1; /* reserved */ + u_int8_t carp_advbase; /* advertisement interval */ + u_int16_t carp_cksum; + u_int32_t carp_counter[2]; + unsigned char carp_md[20]; /* SHA1 HMAC */ +} __packed; + +#define CARP_DFLTTL 255 + +/* carp_version */ +#define CARP_VERSION 2 + +/* carp_type */ +#define CARP_ADVERTISEMENT 0x01 + +#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */ + +/* carp_advbase */ +#define CARP_DFLTINTV 1 + +/* + * Statistics. + */ +struct carpstats { + uint64_t carps_ipackets; /* total input packets, IPv4 */ + uint64_t carps_ipackets6; /* total input packets, IPv6 */ + uint64_t carps_badif; /* wrong interface */ + uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */ + uint64_t carps_hdrops; /* packets shorter than hdr */ + uint64_t carps_badsum; /* bad checksum */ + uint64_t carps_badver; /* bad (incl unsupp) version */ + uint64_t carps_badlen; /* data length does not match */ + uint64_t carps_badauth; /* bad authentication */ + uint64_t carps_badvhid; /* bad VHID */ + uint64_t carps_badaddrs; /* bad address list */ + + uint64_t carps_opackets; /* total output packets, IPv4 */ + uint64_t carps_opackets6; /* total output packets, IPv6 */ + uint64_t carps_onomem; /* no memory for an mbuf */ + uint64_t carps_ostates; /* total state updates sent */ + + uint64_t carps_preempt; /* if enabled, preemptions */ +}; + +/* + * Configuration structure for SIOCSVH SIOCGVH + */ +struct carpreq { + int carpr_state; +#define CARP_STATES "INIT", "BACKUP", "MASTER" +#define CARP_MAXSTATE 2 + int carpr_vhid; + int carpr_advskew; + int carpr_advbase; + unsigned char carpr_key[CARP_KEY_LEN]; +}; +#define SIOCSVH _IOWR('i', 245, struct ifreq) +#define SIOCGVH _IOWR('i', 246, struct ifreq) + +/* + * Names for CARP sysctl objects + */ +#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */ +#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */ +#define CARPCTL_LOG 3 /* log bad packets */ +#define CARPCTL_STATS 4 /* statistics (read-only) */ +#define CARPCTL_ARPBALANCE 5 /* balance arp responses */ +#define CARPCTL_MAXID 6 + +#define CARPCTL_NAMES { \ + { 0, 0 }, \ + { "allow", CTLTYPE_INT }, \ + { "preempt", CTLTYPE_INT }, \ + { "log", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ + { "arpbalance", CTLTYPE_INT }, \ +} + +#ifdef _KERNEL +void carp_ifdetach (struct ifnet *); +void carp_carpdev_state(void *); +void carp_input (struct mbuf *, int); +int carp6_input (struct mbuf **, int *, int); +int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *, + u_int8_t **); +struct ifaddr *carp_iamatch6(void *, struct in6_addr *); +void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *); +struct ifnet *carp_forus (void *, void *); +#endif +#endif /* _IP_CARP_H */ Index: sys/netinet/ip_input.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet/ip_input.c,v retrieving revision 1.283.2.2 diff -u -r1.283.2.2 ip_input.c --- sys/netinet/ip_input.c 20 Aug 2004 01:40:42 -0000 1.283.2.2 +++ sys/netinet/ip_input.c 23 Aug 2004 21:16:17 -0000 @@ -36,6 +36,7 @@ #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_pfil_hooks.h" +#include "opt_carp.h" #include #include @@ -66,6 +67,9 @@ #include #include #include +#ifdef DEV_CARP +#include +#endif #include @@ -497,10 +501,17 @@ * XXX - Checking is incompatible with IP aliases added * to the loopback interface instead of the interface where * the packets are received. + * + * XXX - This is the case for carp vhost IPs as well so we + * insert a workaround. If the packet got here, we already + * checked with carp_iamatch() and carp_forus(). */ checkif = ip_checkinterface && (ipforwarding == 0) && m->m_pkthdr.rcvif != NULL && ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && +#ifdef DEV_CARP + !m->m_pkthdr.rcvif->if_carp && +#endif (dchg == 0); /* Index: sys/netinet6/in6.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet6/in6.c,v retrieving revision 1.47 diff -u -r1.47 in6.c --- sys/netinet6/in6.c 7 Apr 2004 20:46:15 -0000 1.47 +++ sys/netinet6/in6.c 8 Aug 2004 10:11:15 -0000 @@ -1558,6 +1558,39 @@ return (error); } +struct in6_multi_mship * +in6_joingroup(ifp, addr, errorp) + struct ifnet *ifp; + struct in6_addr *addr; + int *errorp; +{ + struct in6_multi_mship *imm; + + imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT); + if (!imm) { + *errorp = ENOBUFS; + return NULL; + } + imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp); + if (!imm->i6mm_maddr) { + /* *errorp is alrady set */ + free(imm, M_IPMADDR); + return NULL; + } + return imm; +} + +int +in6_leavegroup(imm) + struct in6_multi_mship *imm; +{ + + if (imm->i6mm_maddr) + in6_delmulti(imm->i6mm_maddr); + free(imm, M_IPMADDR); + return 0; +} + /* * Find an IPv6 interface link-local address specific to an interface. */ Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.29 diff -u -r1.29 in6_proto.c --- sys/netinet6/in6_proto.c 14 Aug 2004 15:32:20 -0000 1.29 +++ sys/netinet6/in6_proto.c 14 Aug 2004 23:07:06 -0000 @@ -64,6 +64,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -122,6 +123,10 @@ #endif #endif /* IPSEC */ +#ifdef DEV_CARP +#include +#endif + #ifdef FAST_IPSEC #include #define IPSEC @@ -242,6 +247,14 @@ 0, 0, 0, 0, &rip6_usrreqs }, +#ifdef DEV_CARP +{ SOCK_RAW, &inet6domain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp6_input, rip6_output, 0, rip6_ctloutput, + 0, + 0, 0, 0, 0, + &rip6_usrreqs +}, +#endif /* DEV_CARP */ /* raw wildcard */ { SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR, rip6_input, rip6_output, 0, rip6_ctloutput, Index: sys/netinet6/in6_var.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet6/in6_var.h,v retrieving revision 1.19 diff -u -r1.19 in6_var.h --- sys/netinet6/in6_var.h 7 Apr 2004 20:46:15 -0000 1.19 +++ sys/netinet6/in6_var.h 8 Aug 2004 10:11:15 -0000 @@ -578,6 +578,8 @@ struct in6_multi *in6_addmulti __P((struct in6_addr *, struct ifnet *, int *)); void in6_delmulti __P((struct in6_multi *)); +struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *); +int in6_leavegroup(struct in6_multi_mship *); int in6_mask2len __P((struct in6_addr *, u_char *)); int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *, struct thread *)); Index: sys/netinet6/nd6_nbr.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/netinet6/nd6_nbr.c,v retrieving revision 1.26 diff -u -r1.26 nd6_nbr.c --- sys/netinet6/nd6_nbr.c 19 Apr 2004 08:02:52 -0000 1.26 +++ sys/netinet6/nd6_nbr.c 8 Aug 2004 10:11:16 -0000 @@ -32,6 +32,8 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -59,6 +61,10 @@ #include #include +#ifdef DEV_CARP +#include +#endif + #include #define SDL(s) ((struct sockaddr_dl *)s) @@ -94,7 +100,7 @@ struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; @@ -193,7 +199,14 @@ * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ +#ifdef DEV_CARP + if (ifp->if_carp) + ifa = carp_iamatch6(ifp->if_carp, &taddr6); + if (!ifa) + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#endif /* (2) check. */ if (!ifa) { @@ -888,9 +901,16 @@ * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ - if (sdl0 == NULL) + if (sdl0 == NULL) { +#ifdef DEV_CARP + if (ifp->if_carp) + mac = carp_macmatch6(ifp->if_carp, m, taddr6); + if (mac == NULL) + mac = nd6_ifptomac(ifp); +#else mac = nd6_ifptomac(ifp); - else if (sdl0->sa_family == AF_LINK) { +#endif + } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) Index: sys/sys/mbuf.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/sys/sys/mbuf.h,v retrieving revision 1.157 diff -u -r1.157 mbuf.h --- sys/sys/mbuf.h 17 Aug 2004 22:05:54 -0000 1.157 +++ sys/sys/mbuf.h 23 Aug 2004 20:59:08 -0000 @@ -642,6 +642,7 @@ #define PACKET_TAG_PF_TAG 24 /* PF tagged */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_PF_TRANSLATE_LOCALHOST 26 /* PF translate localhost */ +#define PACKET_TAG_CARP 27 /* CARP info */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); Index: usr.bin/netstat/inet.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/usr.bin/netstat/inet.c,v retrieving revision 1.67 diff -u -r1.67 inet.c --- usr.bin/netstat/inet.c 26 Jul 2004 20:18:11 -0000 1.67 +++ usr.bin/netstat/inet.c 10 Aug 2004 15:40:41 -0000 @@ -51,6 +51,7 @@ #include #include #include +#include #ifdef INET6 #include #endif /* INET6 */ @@ -525,6 +526,48 @@ #undef p1a } +/* + * Dump CARP statistics structure. + */ +void +carp_stats(u_long off, const char *name, int af1 __unused) +{ + struct carpstats carpstat, zerostat; + size_t len = sizeof(struct carpstats); + + if (zflag) + memset(&zerostat, 0, len); + if (sysctlbyname("net.inet.carp.stats", &carpstat, &len, + zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { + warn("sysctl: net.inet.carp.stats"); + return; + } + + printf("%s:\n", name); + +#define p(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f, plural((int)carpstat.f)) +#define p2(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f) + + p(carps_ipackets, "\t%llu packet%s received (IPv4)\n"); + p(carps_ipackets6, "\t%llu packet%s received (IPv6)\n"); + p(carps_hdrops, "\t\t%llu packet%s shorter than header\n"); + p(carps_badsum, "\t\t%llu discarded for bad checksum%s\n"); + p(carps_badver, "\t\t%llu discarded packet%s with a bad version\n"); + p2(carps_badlen, "\t\t%llu discarded because packet too short\n"); + p2(carps_badauth, "\t\t%llu discarded for bad authentication\n"); + p2(carps_badvhid, "\t\t%llu discarded for bad vhid\n"); + p2(carps_badaddrs, "\t\t%llu discarded because of a bad address list\n"); + p(carps_opackets, "\t%llu packet%s sent (IPv4)\n"); + p(carps_opackets6, "\t%llu packet%s sent (IPv6)\n"); +#if notyet + p(carps_ostates, "\t\t%s state update%s sent\n"); +#endif +#undef p +#undef p2 +} + /* * Dump IP statistics structure. */ Index: usr.bin/netstat/main.c =================================================================== RCS file: /usr/store/mlaier/fcvs/src/usr.bin/netstat/main.c,v retrieving revision 1.69 diff -u -r1.69 main.c --- usr.bin/netstat/main.c 26 Jul 2004 20:18:11 -0000 1.69 +++ usr.bin/netstat/main.c 8 Aug 2004 10:12:09 -0000 @@ -136,6 +136,8 @@ { "_mbuf_lowm" }, #define N_CLLO 32 { "_clust_lowm" }, +#define N_CARPSTAT 33 + { "_carpstats" }, { "" }, }; @@ -171,6 +173,8 @@ bdg_stats, NULL, "bdg", 1 /* bridging... */ }, { -1, -1, 1, protopr, pim_stats, NULL, "pim", IPPROTO_PIM }, + { -1, N_CARPSTAT, 1, 0, + carp_stats, NULL, "carp", 0}, { -1, -1, 0, NULL, NULL, NULL, NULL, 0 } }; Index: usr.bin/netstat/netstat.h =================================================================== RCS file: /usr/store/mlaier/fcvs/src/usr.bin/netstat/netstat.h,v retrieving revision 1.40 diff -u -r1.40 netstat.h --- usr.bin/netstat/netstat.h 31 May 2004 21:46:06 -0000 1.40 +++ usr.bin/netstat/netstat.h 8 Aug 2004 10:12:09 -0000 @@ -71,6 +71,7 @@ void icmp_stats(u_long, const char *, int); void igmp_stats(u_long, const char *, int); void pim_stats(u_long, const char *, int); +void carp_stats (u_long, const char *, int); #ifdef IPSEC void ipsec_stats(u_long, const char *, int); #endif