Add and document the IP_SENDIF option. Add support for it to the IP broadcast regression test. Add some reworking and additional comments in the output path regarding source interface selection. Neither IP_SENDIF nor IP_SENDSRCADDR check to see if the socket is bound or not. The way temporary binding is implemented causes IP_SENDSRCADDR to fail if the socket is unbound, which is contrary to the documentation. This needs to be fixed before we can do DHCP without BPF. TODO: Fix IP_SENDIF and IP_SENDSRCADDR for unbound sockets. TODO: Allow IP_SENDIF to be used from the raw IP output path. TODO: Add a specific privilege level for IP_SENDIF. Currently it requires the 'open raw socket' privilege. TODO: We *may* need to turn hardware checksums off if the ifp is changed to something other than what the stack normally expects. ==== //depot/user/bms/netdev/share/man/man4/ip.4#1 - /home/bms/p4/netdev/share/man/man4/ip.4 ==== --- /tmp/tmp.1243.0 Sun Mar 4 22:10:48 2007 +++ /home/bms/p4/netdev/share/man/man4/ip.4 Sun Mar 4 22:05:37 2007 @@ -32,7 +32,7 @@ .\" @(#)ip.4 8.2 (Berkeley) 11/30/93 .\" $FreeBSD: src/share/man/man4/ip.4,v 1.47 2006/05/14 14:13:30 bms Exp $ .\" -.Dd May 14, 2006 +.Dd March 4, 2007 .Dt IP 4 .Os .Sh NAME @@ -169,7 +169,7 @@ cmsg_level = IPPROTO_IP cmsg_type = IP_RECVDSTADDR .Ed -.Pp +\" The source address to be used for outgoing .Tn UDP datagrams on a socket that is not bound to a specific @@ -199,6 +199,7 @@ .Xr recvmsg 2 can be used directly as a control message for .Xr sendmsg 2 . +.\" .Pp If the .Dv IP_ONESBCAST @@ -262,6 +263,7 @@ cmsg_level = IPPROTO_IP cmsg_type = IP_RECVTTL .Ed +.\" .Pp If the .Dv IP_RECVIF @@ -290,6 +292,34 @@ cmsg_type = IP_RECVIF .Ed .Pp +.\" +.Pp +The source interface to be used for outgoing +.Tn UDP +datagrams on a socket that is not bound to a specific +.Tn IP +address may be specified as ancillary data with a type code of +.Dv IP_SENDIF . +The msg_control field in the msghdr structure should point to a buffer +that contains a +.Vt cmsghdr +structure followed by a +.Vt "struct sockaddr_dl" +structure. +Either the +.Va sdl_index +field, or the name portion of the +.Va sdl_data +member, should be filled out with either the index of the +interface or its name respectively. +The cmsghdr fields should have the following values: +.Bd -literal +cmsg_len = sizeof(struct sockaddr_dl) +cmsg_level = IPPROTO_IP +cmsg_type = IP_SENDIF +.Ed +.Pp +.\" .Dv IP_PORTRANGE may be used to set the port range used for selecting a local port number on a socket with an unspecified (zero) port number. ==== //depot/user/bms/netdev/sys/netinet/in.h#1 - /home/bms/p4/netdev/sys/netinet/in.h ==== --- /tmp/tmp.1243.1 Sun Mar 4 22:10:48 2007 +++ /home/bms/p4/netdev/sys/netinet/in.h Sun Mar 4 16:40:15 2007 @@ -407,6 +407,7 @@ #define IP_FAITH 22 /* bool; accept FAITH'ed connections */ #define IP_ONESBCAST 23 /* bool: send all-ones broadcast */ +#define IP_SENDIF 24 /* send packet via named interface */ #define IP_FW_TABLE_ADD 40 /* add entry */ #define IP_FW_TABLE_DEL 41 /* delete entry */ ==== //depot/user/bms/netdev/sys/netinet/ip_output.c#2 - /home/bms/p4/netdev/sys/netinet/ip_output.c ==== --- /tmp/tmp.1243.2 Sun Mar 4 22:10:48 2007 +++ /home/bms/p4/netdev/sys/netinet/ip_output.c Sun Mar 4 21:53:27 2007 @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -162,6 +163,40 @@ hlen = ip->ip_hl << 2; } + /* + * Force source interface selection for datagrams with the + * IP_SENDTOIF option set. The caller specified the interface by + * passing a sockaddr_dl in the struct route passed to this function, + * which MAY be overwritten by this code. + * This is ignored for multicast datagrams. + */ + if (flags & IP_SENDTOIF) { + struct sockaddr_dl *sdl; + + if (ro == &iproute) { + /* Caller specified IP_SENDTOIF w/o a route. */ + error = EINVAL; + goto bad; + } + sdl = (struct sockaddr_dl *)&ro->ro_dst; + if (sdl->sdl_family != AF_LINK) { + error = EINVAL; + goto bad; + } + KASSERT(sdl->sdl_len == sizeof(struct sockaddr_dl), + ("%s: invalid sdl_len")); + if (sdl->sdl_index != 0) + ifp = ifnet_byindex(sdl->sdl_index); + else if (sdl->sdl_nlen != 0) { + sdl->sdl_data[sdl->sdl_nlen] = '\0'; + ifp = ifunit((char *)(&sdl->sdl_data)); + } + if (ifp == NULL) { + error = EINVAL; + goto bad; + } + } + dst = (struct sockaddr_in *)&ro->ro_dst; again: /* @@ -188,47 +223,81 @@ dst->sin_addr = ip->ip_dst; } /* - * If routing to interface only, short circuit routing lookup. - * The use of an all-ones broadcast address implies this; an - * interface is specified by the broadcast address of an interface, - * or the destination address of a ptp interface. + * Perform source interface selection and/or layer 2 next-hop + * resolution for undirected broadcast, SO_DONTROUTE, directed + * multicast and other traffic in that order. */ if (flags & IP_SENDONES) { - if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL && - (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + /* + * Send to the undirected broadcast address, 255.255.255.255. + * If an interface was already specified by IP_SENDTOIF, + * skip L2 next-hop resolution. + * Otherwise, look up the L2 next-hop based on the + * destination (a network directed broadcast address). + * Match broadcast interfaces first, followed by interfaces + * which may have the same network address configured. + */ + if ((flags & IP_SENDTOIF) == 0) { + ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst))); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet(sintosa(dst))); + if (ia == NULL) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } + ifp = ia->ia_ifp; } + /* + * Rewrite destination as 255.255.255.255. Force hops to 1. + * Force datagram to be link-layer broadcast. + */ ip->ip_dst.s_addr = INADDR_BROADCAST; dst->sin_addr = ip->ip_dst; - ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = 1; } else if (flags & IP_ROUTETOIF) { - if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && - (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + /* + * Provide traditional BSD SO_DONTROUTE semantics. + * Datagram is to be sent on the local link only, and only + * to a directly attached neighbor. Force hops to 1. + * Try to match point-to-point interfaces first, as they + * are more specific than broadcast interfaces. + */ + if ((flags & IP_SENDTOIF) == 0) { + ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst))); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet(sintosa(dst))); + if (ia == NULL) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } + ifp = ia->ia_ifp; } - ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = in_broadcast(dst->sin_addr, ifp); } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* - * Bypass the normal routing lookup for multicast - * packets if the interface is specified. + * Handle the IP_MULTICAST_IF option if specified. + * Next-hop resolution for multicast destinations is + * performed independently of the unicast output code. */ + if (flags & IP_SENDTOIF) { + error = EINVAL; + goto bad; + } ifp = imo->imo_multicast_ifp; IFP_TO_IA(ifp, ia); isbroadcast = 0; /* fool gcc */ } else { /* + * Layer 2 next-hop resolution for all other traffic. * We want to do any cloning requested by the link layer, * as this is probably required in all cases for correct * operation (as it is for ARP). + * TODO: Source address selection policy. */ if (ro->ro_rt == NULL) rtalloc_ign(ro, 0); @@ -238,7 +307,9 @@ goto bad; } ia = ifatoia(ro->ro_rt->rt_ifa); - ifp = ro->ro_rt->rt_ifp; + /* If the source interface was already overridden, keep it. */ + if ((flags & IP_SENDTOIF) == 0) + ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_rmx.rmx_pksent++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; @@ -247,6 +318,7 @@ else isbroadcast = in_broadcast(dst->sin_addr, ifp); } + KASSERT(ifp != NULL, ("%s: invalid ifp", __func__)); /* * Calculate MTU. If we have a route that is up, use that, * otherwise use the interface's MTU. @@ -276,7 +348,8 @@ */ dst = (struct sockaddr_in *)&ro->ro_dst; /* - * See if the caller provided any multicast options + * See if the caller provided any multicast options. + * Source address selection may be explicitly overridden here. */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; ==== //depot/user/bms/netdev/sys/netinet/ip_var.h#1 - /home/bms/p4/netdev/sys/netinet/ip_var.h ==== --- /tmp/tmp.1243.3 Sun Mar 4 22:10:48 2007 +++ /home/bms/p4/netdev/sys/netinet/ip_var.h Sun Mar 4 16:43:08 2007 @@ -131,6 +131,7 @@ #define IP_FORWARDING 0x1 /* most of ip header exists */ #define IP_RAWOUTPUT 0x2 /* raw ip header exists */ #define IP_SENDONES 0x4 /* send all-ones broadcast */ +#define IP_SENDTOIF 0x8 /* send on specific ifnet */ #define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ ==== //depot/user/bms/netdev/sys/netinet/udp_usrreq.c#1 - /home/bms/p4/netdev/sys/netinet/udp_usrreq.c ==== --- /tmp/tmp.1243.4 Sun Mar 4 22:10:48 2007 +++ /home/bms/p4/netdev/sys/netinet/udp_usrreq.c Sun Mar 4 22:03:44 2007 @@ -58,6 +58,7 @@ #include #include +#include #include #include @@ -729,10 +730,18 @@ struct in_addr faddr, laddr; struct cmsghdr *cm; struct sockaddr_in *sin, src; + struct route *ro; int error = 0; int ipflags; u_short fport, lport; int unlock_udbinfo; + struct { + struct rtentry *ro_rt; + struct sockaddr_dl ro_dst; /* XXX alignment? */ + } sro_sdl; + + ro = NULL; + ipflags = 0; /* * udp_output() may need to temporarily bind or connect the current @@ -783,6 +792,23 @@ src.sin_port = inp->inp_lport; src.sin_addr = *(struct in_addr *)CMSG_DATA(cm); break; + case IP_SENDIF: + if (cm->cmsg_len != + CMSG_LEN(sizeof(struct sockaddr_dl))) { + error = EINVAL; + break; + } + if (priv_check_cred(td->td_ucred, + PRIV_NETINET_RAW, SUSER_ALLOWJAIL) != 0) { + error = EPERM; + break; + } + ro = (struct route *)&sro_sdl; + ro->ro_rt = NULL; + bcopy(CMSG_DATA(cm), &ro->ro_dst, + sizeof(struct sockaddr_dl)); + ipflags |= IP_SENDTOIF; + break; default: error = ENOPROTOOPT; break; @@ -897,7 +923,6 @@ ip->ip_off |= IP_DF; } - ipflags = 0; if (inp->inp_socket->so_options & SO_DONTROUTE) ipflags |= IP_ROUTETOIF; if (inp->inp_socket->so_options & SO_BROADCAST) @@ -924,7 +949,7 @@ if (unlock_udbinfo) INP_INFO_WUNLOCK(&udbinfo); - error = ip_output(m, inp->inp_options, NULL, ipflags, + error = ip_output(m, inp->inp_options, ro, ipflags, inp->inp_moptions, inp); INP_UNLOCK(inp); return (error); ==== //depot/user/bms/netdev/tools/regression/netinet/ipbroadcast/ipbroadcast.c#2 - /home/bms/p4/netdev/tools/regression/netinet/ipbroadcast/ipbroadcast.c ==== --- /tmp/tmp.1243.5 Sun Mar 4 22:10:49 2007 +++ /home/bms/p4/netdev/tools/regression/netinet/ipbroadcast/ipbroadcast.c Sun Mar 4 21:37:46 2007 @@ -36,6 +36,8 @@ #include #include +#include +#include #include #include @@ -54,10 +56,17 @@ #include #include +#ifndef IP_SENDIF +#define IP_SENDIF 24 /* XXX */ +#endif + #define DEFAULT_PORT 6698 #define DEFAULT_PAYLOAD_SIZE 24 #define DEFAULT_TTL 1 -#define MY_CMSG_SIZE CMSG_SPACE(sizeof(struct in_addr)) + +#define MY_CMSG_SIZE \ + CMSG_SPACE(sizeof(struct in_addr)) + \ + CMSG_SPACE(sizeof(struct sockaddr_dl)) static char *progname = NULL; @@ -66,8 +75,8 @@ { fprintf(stderr, -"usage: %s [-1] [-b] [-B] [-d] [-l len] [-p port] [-r] [-s srcaddr] [-t ttl]\n" -" \n", +"usage: %s [-1] [-b] [-B] [-d] [-i iface] [-l len] [-p port] [-r]\n" +" [-s srcaddr] [-t ttl] \n", progname); fprintf(stderr, "IPv4 broadcast test program. Sends a %d byte UDP " "datagram to :.\n", DEFAULT_PAYLOAD_SIZE); @@ -75,12 +84,13 @@ fprintf(stderr, "-b: bind socket to INADDR_ANY:\n"); fprintf(stderr, "-B: Set SO_BROADCAST\n"); fprintf(stderr, "-d: Set SO_DONTROUTE\n"); -#if 0 - fprintf(stderr, "-r: Fill datagram with random bytes\n"); -#endif + fprintf(stderr, "-i: Set IP_SENDIF \n"); fprintf(stderr, "-l: Set payload size to \n"); fprintf(stderr, "-p: Set source and destination port (default: %d)\n", DEFAULT_PORT); +#if 0 + fprintf(stderr, "-r: Fill datagram with random bytes\n"); +#endif fprintf(stderr, "-s: Set IP_SENDSRCADDR to \n"); fprintf(stderr, "-t: Set IP_TTL to \n"); @@ -95,9 +105,11 @@ struct iovec iov[1]; struct msghdr msg; struct sockaddr_in dsin; + struct sockaddr_dl *sdl; struct cmsghdr *cmsgp; struct in_addr dstaddr; struct in_addr *srcaddrp; + char *ifname; char *srcaddr_s; int ch; int dobind; @@ -120,6 +132,7 @@ doonesbcast = 0; dorandom = 0; + ifname = NULL; dstaddr.s_addr = INADDR_ANY; srcaddr_s = NULL; portno = DEFAULT_PORT; @@ -129,7 +142,7 @@ buflen = DEFAULT_PAYLOAD_SIZE; progname = basename(argv[0]); - while ((ch = getopt(argc, argv, "1bBdl:p:rs:t:")) != -1) { + while ((ch = getopt(argc, argv, "1bBdi:l:p:rs:t:")) != -1) { switch (ch) { case '1': doonesbcast = 1; @@ -143,6 +156,9 @@ case 'd': dontroute = 1; break; + case 'i': + ifname = optarg; + break; case 'l': buflen = atoi(optarg); break; @@ -170,6 +186,9 @@ usage(); if (argv[0] == NULL || inet_aton(argv[0], &dstaddr) == 0) usage(); + /* IP_SENDSRCADDR and IP_SENDIF are mutually exclusive just now. */ + if (srcaddr_s != NULL && ifname != NULL) + usage(); s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); if (s == -1) { perror("socket"); @@ -257,10 +276,14 @@ msg.msg_iov = iov; msg.msg_iovlen = 1; + /* Assume we fill out a control msg; macros need to see buf ptr */ + msg.msg_control = cmsgbuf; + msg.msg_controllen = 0; + memset(cmsgbuf, 0, MY_CMSG_SIZE); + + /* IP_SENDSRCADDR and IP_SENDIF are mutually exclusive just now. */ if (srcaddr_s != NULL) { - memset(cmsgbuf, 0, MY_CMSG_SIZE); - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_controllen += CMSG_SPACE(sizeof(struct in_addr)); cmsgp = CMSG_FIRSTHDR(&msg); cmsgp->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); cmsgp->cmsg_level = IPPROTO_IP; @@ -268,6 +291,41 @@ srcaddrp = (struct in_addr *)CMSG_DATA(cmsgp); srcaddrp->s_addr = inet_addr(srcaddr_s); } + + if (ifname != NULL) { +#ifdef IP_SENDIF + msg.msg_controllen += CMSG_SPACE(sizeof(struct sockaddr_dl)); + cmsgp = CMSG_FIRSTHDR(&msg); + cmsgp->cmsg_len = CMSG_LEN(sizeof(struct sockaddr_dl)); + cmsgp->cmsg_level = IPPROTO_IP; + cmsgp->cmsg_type = IP_SENDIF; + +#ifdef DIAGNOSTIC + fprintf(stderr, "DEBUG: cmsgp->cmsg_len is %d\n", + cmsgp->cmsg_len); +#endif + + sdl = (struct sockaddr_dl *)CMSG_DATA(cmsgp); + memset(sdl, 0, sizeof(struct sockaddr_dl)); + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(struct sockaddr_dl); + sdl->sdl_index = if_nametoindex(ifname); + +#ifdef DIAGNOSTIC + fprintf(stderr, "DEBUG: sdl->sdl_family is %d\n", + sdl->sdl_family); + fprintf(stderr, "DEBUG: sdl->sdl_len is %d\n", + sdl->sdl_len); + fprintf(stderr, "DEBUG: sdl->sdl_index is %d\n", + sdl->sdl_index); +#endif +#else + fprintf(stderr, "WARNING: IP_SENDIF not supported, ignored.\n"); +#endif + } + + if (msg.msg_controllen == 0) + msg.msg_control = NULL; nbytes = sendmsg(s, &msg, (dontroute ? MSG_DONTROUTE : 0)); if (nbytes == -1) {