diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist index fbffe0ece868..21fb28798b45 100644 --- a/etc/mtree/BSD.include.dist +++ b/etc/mtree/BSD.include.dist @@ -290,6 +290,8 @@ netinet cc .. + netdump + .. .. netinet6 .. diff --git a/include/Makefile b/include/Makefile index 97124d65370e..a7581f058a73 100644 --- a/include/Makefile +++ b/include/Makefile @@ -56,6 +56,7 @@ LSUBDIRS= cam/ata cam/mmc cam/nvme cam/scsi \ net/altq \ netgraph/atm netgraph/netflow \ netinet/cc \ + netinet/netdump \ security/audit \ security/mac_biba security/mac_bsdextended security/mac_lomac \ security/mac_mls security/mac_partition \ diff --git a/sbin/dumpon/dumpon.8 b/sbin/dumpon/dumpon.8 index e6aace1bbbc0..b13b4b0e87e9 100644 --- a/sbin/dumpon/dumpon.8 +++ b/sbin/dumpon/dumpon.8 @@ -28,7 +28,7 @@ .\" From: @(#)swapon.8 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd February 13, 2018 +.Dd March 6, 2018 .Dt DUMPON 8 .Os .Sh NAME @@ -37,10 +37,19 @@ .Sh SYNOPSIS .Nm .Op Fl v -.Op Fl k Ar public_key_file +.Op Fl k Ar pubkey +.Op Fl Z .Op Fl z +.Ar device +.Nm +.Op Fl v +.Op Fl k Ar pubkey .Op Fl Z -.Ar special_file +.Op Fl z +.Op Fl g Ar gateway | Li default +.Fl s Ar server +.Fl c Ar client +.Ar iface .Nm .Op Fl v .Cm off @@ -60,7 +69,7 @@ normally occur from the system multi-user initialization file controlled by the .Dq dumpdev and -.Dq dumppubkey +.Dq dumpon_flags variables in the boot time configuration file .Pa /etc/rc.conf . .Pp @@ -72,8 +81,7 @@ Alternatively, full memory dumps can be enabled by setting the variable to 0. .Pp For systems using full memory dumps, the size of the specified dump -device must be at -least the size of physical memory. +device must be at least the size of physical memory. Even though an additional 64 kB header is added to the dump, the BIOS for a platform typically holds back some memory, so it is not usually necessary to size the dump device larger than the actual amount of RAM @@ -86,8 +94,35 @@ total amount of physical memory as reported by the .Xr sysctl 8 variable. .Pp +.Nm +is used to configure a local storage device as the dump device. +With additional parameters, the kernel can instead be configured to +transmit a dump to a remote server using +.Xr netdump 4 . +This eliminates the need to reserve space for saving crash dumps and +is especially useful in diskless environments. +The +.Xr netdump 4 +server address is specified with +.Fl s Ar server , +and the local address is specified with +.Fl c Ar client . +The +.Fl g Ar gateway +parameter may be used to specify a first-hop router to the server, +or to specify that the currently configured default gateway is to +be used. +Note that the +.Xr netdump 4 +configuration is not automatically updated if any network configuration +(e.g., the default route) changes after the +.Nm +invocation. +The name of the interface to be used must be specified as +.Ar iface . +.Pp The -.Op Fl k Ar public_key_file +.Fl k Ar pubkey flag causes .Nm to generate a one-time key for kernel crash dump encryption. @@ -95,16 +130,16 @@ The key will be replaced by a new one when the .Nm utility is run again. The key is encrypted using -.Ar public_key_file . +.Ar pubkey . This process is sandboxed using .Xr capsicum 4 . Both plain and encrypted keys are sent to the kernel using .Dv DIOCSKERNELDUMP .Xr ioctl 2 . A user can specify the -.Ar public_key_file +.Ar pubkey in the -.Dq dumppubkey +.Dq dumpon_flags variable defined in .Pa /etc/rc.conf for use with the @@ -172,13 +207,13 @@ should be used as the dump device. The .Nm utility operates by opening -.Ar special_file +.Ar device and making a .Dv DIOCSKERNELDUMP .Xr ioctl 2 request on it to save kernel crash dumps. If -.Ar special_file +.Ar device is the text string: .Dq Li off , .Nm diff --git a/sbin/dumpon/dumpon.c b/sbin/dumpon/dumpon.c index 0ae95e6e1847..0f536fba88ef 100644 --- a/sbin/dumpon/dumpon.c +++ b/sbin/dumpon/dumpon.c @@ -46,12 +46,15 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -61,6 +64,15 @@ __FBSDID("$FreeBSD$"); #include #include +#include + +#include +#include +#include + +#include +#include + #ifdef HAVE_CRYPTO #include #include @@ -72,13 +84,106 @@ static int verbose; static void usage(void) { - fprintf(stderr, "%s\n%s\n%s\n", - "usage: dumpon [-v] [-k public_key_file] [-Zz] special_file", - " dumpon [-v] off", - " dumpon [-v] -l"); + fprintf(stderr, + "usage: dumpon [-v] [-k ] [-Zz] \n" + " dumpon [-v] [-k ] [-Zz]\n" + " [-g |default] -s -c \n" + " dumpon [-v] off\n" + " dumpon [-v] -l\n"); exit(EX_USAGE); } +/* + * Look for a default route on the specified interface. + */ +static char * +find_gateway(const char *ifname) +{ + struct ifaddrs *ifa, *ifap; + struct rt_msghdr *rtm; + struct sockaddr *sa; + struct sockaddr_dl *sdl; + struct sockaddr_in *dst, *mask, *gw; + char *buf, *next, *ret; + size_t sz; + int error, i, ifindex, mib[7]; + + ret = NULL; + + /* First look up the interface index. */ + if (getifaddrs(&ifap) != 0) + err(EX_OSERR, "getifaddrs"); + for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + if (strcmp(ifa->ifa_name, ifname) == 0) { + sdl = (struct sockaddr_dl *)(void *)ifa->ifa_addr; + ifindex = sdl->sdl_index; + break; + } + } + if (ifa == NULL) + errx(1, "couldn't find interface index for '%s'", ifname); + freeifaddrs(ifap); + + /* Now get the IPv4 routing table. */ + mib[0] = CTL_NET; + mib[1] = PF_ROUTE; + mib[2] = 0; + mib[3] = AF_INET; + mib[4] = NET_RT_DUMP; + mib[5] = 0; + mib[6] = -1; /* FIB */ + + for (;;) { + if (sysctl(mib, nitems(mib), NULL, &sz, NULL, 0) != 0) + err(EX_OSERR, "sysctl(NET_RT_DUMP)"); + buf = malloc(sz); + error = sysctl(mib, nitems(mib), buf, &sz, NULL, 0); + if (error == 0) + break; + if (errno != ENOMEM) + err(EX_OSERR, "sysctl(NET_RT_DUMP)"); + free(buf); + } + + for (next = buf; next < buf + sz; next += rtm->rtm_msglen) { + rtm = (struct rt_msghdr *)(void *)next; + if (rtm->rtm_version != RTM_VERSION) + continue; + if ((rtm->rtm_flags & RTF_GATEWAY) == 0 || + rtm->rtm_index != ifindex) + continue; + + dst = gw = mask = NULL; + sa = (struct sockaddr *)(rtm + 1); + for (i = 0; i < RTAX_MAX; i++) { + if ((rtm->rtm_addrs & (1 << i)) != 0) { + switch (i) { + case RTAX_DST: + dst = (void *)sa; + break; + case RTAX_GATEWAY: + gw = (void *)sa; + break; + case RTAX_NETMASK: + mask = (void *)sa; + break; + } + } + sa = (struct sockaddr *)((char *)sa + SA_SIZE(sa)); + } + + if (dst->sin_addr.s_addr == INADDR_ANY && + mask->sin_addr.s_addr == 0) { + ret = inet_ntoa(gw->sin_addr); + break; + } + } + free(buf); + return (ret); +} + static void check_size(int fd, const char *fn) { @@ -107,13 +212,13 @@ check_size(int fd, const char *fn) #ifdef HAVE_CRYPTO static void -genkey(const char *pubkeyfile, struct diocskerneldump_arg *kda) +genkey(const char *pubkeyfile, struct diocskerneldump_arg *kdap) { FILE *fp; RSA *pubkey; assert(pubkeyfile != NULL); - assert(kda != NULL); + assert(kdap != NULL); fp = NULL; pubkey = NULL; @@ -137,21 +242,21 @@ genkey(const char *pubkeyfile, struct diocskerneldump_arg *kda) if (pubkey == NULL) errx(1, "Unable to read data from %s.", pubkeyfile); - kda->kda_encryptedkeysize = RSA_size(pubkey); - if (kda->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { + kdap->kda_encryptedkeysize = RSA_size(pubkey); + if (kdap->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { errx(1, "Public key has to be at most %db long.", 8 * KERNELDUMP_ENCKEY_MAX_SIZE); } - kda->kda_encryptedkey = calloc(1, kda->kda_encryptedkeysize); - if (kda->kda_encryptedkey == NULL) + kdap->kda_encryptedkey = calloc(1, kdap->kda_encryptedkeysize); + if (kdap->kda_encryptedkey == NULL) err(1, "Unable to allocate encrypted key"); - kda->kda_encryption = KERNELDUMP_ENC_AES_256_CBC; - arc4random_buf(kda->kda_key, sizeof(kda->kda_key)); - if (RSA_public_encrypt(sizeof(kda->kda_key), kda->kda_key, - kda->kda_encryptedkey, pubkey, - RSA_PKCS1_PADDING) != (int)kda->kda_encryptedkeysize) { + kdap->kda_encryption = KERNELDUMP_ENC_AES_256_CBC; + arc4random_buf(kdap->kda_key, sizeof(kdap->kda_key)); + if (RSA_public_encrypt(sizeof(kdap->kda_key), kdap->kda_key, + kdap->kda_encryptedkey, pubkey, + RSA_PKCS1_PADDING) != (int)kdap->kda_encryptedkeysize) { errx(1, "Unable to encrypt the one-time key."); } RSA_free(pubkey); @@ -162,8 +267,10 @@ static void listdumpdev(void) { char dumpdev[PATH_MAX]; + struct netdump_conf ndconf; size_t len; const char *sysctlname = "kern.shutdown.dumpdevname"; + int fd; len = sizeof(dumpdev); if (sysctlbyname(sysctlname, &dumpdev, &len, NULL, 0) != 0) { @@ -174,36 +281,88 @@ listdumpdev(void) err(EX_OSERR, "Sysctl get '%s'\n", sysctlname); } } - if (verbose) { + if (strlen(dumpdev) == 0) + (void)strlcpy(dumpdev, _PATH_DEVNULL, sizeof(dumpdev)); + + if (verbose) printf("kernel dumps on "); + printf("%s\n", dumpdev); + + /* If netdump is enabled, print the configuration parameters. */ + if (verbose) { + fd = open(_PATH_NETDUMP, O_RDONLY); + if (fd < 0) { + if (errno != ENOENT) + err(EX_OSERR, "opening %s", _PATH_NETDUMP); + return; + } + if (ioctl(fd, NETDUMPGCONF, &ndconf) != 0) { + if (errno != ENXIO) + err(EX_OSERR, "ioctl(NETDUMPGCONF)"); + (void)close(fd); + return; + } + + printf("server address: %s\n", inet_ntoa(ndconf.ndc_server)); + printf("client address: %s\n", inet_ntoa(ndconf.ndc_client)); + printf("gateway address: %s\n", inet_ntoa(ndconf.ndc_gateway)); + (void)close(fd); } - if (strlen(dumpdev) == 0) { - printf("%s\n", _PATH_DEVNULL); - } else { - printf("%s\n", dumpdev); +} + +static int +opendumpdev(const char *arg, char *dumpdev) +{ + int fd, i; + + if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + strlcpy(dumpdev, arg, PATH_MAX); + else { + i = snprintf(dumpdev, PATH_MAX, "%s%s", _PATH_DEV, arg); + if (i < 0) + err(EX_OSERR, "%s", arg); + if (i >= PATH_MAX) + errc(EX_DATAERR, EINVAL, "%s", arg); } + + fd = open(dumpdev, O_RDONLY); + if (fd < 0) + err(EX_OSFILE, "%s", dumpdev); + return (fd); } int main(int argc, char *argv[]) { - struct diocskerneldump_arg kda; - const char *pubkeyfile; - int ch; - int i, fd; - int do_listdumpdev = 0; - bool enable, gzip, zstd; - - gzip = zstd = false; + char dumpdev[PATH_MAX]; + struct diocskerneldump_arg _kda, *kdap; + struct netdump_conf ndconf; + struct addrinfo hints, *res; + const char *dev, *pubkeyfile, *server, *client, *gateway; + int ch, error, fd; + bool enable, gzip, list, netdump, zstd; + + gzip = list = netdump = zstd = false; + kdap = NULL; pubkeyfile = NULL; + server = client = gateway = NULL; - while ((ch = getopt(argc, argv, "k:lvZz")) != -1) - switch((char)ch) { + while ((ch = getopt(argc, argv, "c:g:k:ls:vZz")) != -1) + switch ((char)ch) { + case 'c': + client = optarg; + break; + case 'g': + gateway = optarg; + break; case 'k': pubkeyfile = optarg; break; case 'l': - do_listdumpdev = 1; + list = true; + break; + case 's': + server = optarg; break; case 'v': verbose = 1; @@ -224,7 +383,7 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; - if (do_listdumpdev) { + if (list) { listdumpdev(); exit(EX_OK); } @@ -232,72 +391,104 @@ main(int argc, char *argv[]) if (argc != 1) usage(); - enable = (strcmp(argv[0], "off") != 0); #ifndef HAVE_CRYPTO - if (pubkeyfile != NULL) { - enable = false; - warnx("Unable to use the public key. Recompile dumpon with OpenSSL support."); - } + if (pubkeyfile != NULL) + errx("Unable to use the public key. Recompile dumpon with OpenSSL support."); #endif - if (enable) { - char tmp[PATH_MAX]; - char *dumpdev; + if (server != NULL && client != NULL) { + enable = true; + dev = _PATH_NETDUMP; + netdump = true; + kdap = &ndconf.ndc_kda; + } else if (server == NULL && client == NULL && argc > 0) { + enable = strcmp(argv[0], "off") != 0; + dev = enable ? argv[0] : _PATH_DEVNULL; + netdump = false; + kdap = &_kda; + } else + usage(); - if (strncmp(argv[0], _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) { - dumpdev = argv[0]; - } else { - i = snprintf(tmp, PATH_MAX, "%s%s", _PATH_DEV, argv[0]); - if (i < 0) { - err(EX_OSERR, "%s", argv[0]); - } else if (i >= PATH_MAX) { - errno = EINVAL; - err(EX_DATAERR, "%s", argv[0]); - } - dumpdev = tmp; - } - fd = open(dumpdev, O_RDONLY); - if (fd < 0) - err(EX_OSFILE, "%s", dumpdev); + fd = opendumpdev(dev, dumpdev); + if (!netdump && !gzip) + check_size(fd, dumpdev); - if (!gzip && !zstd) - check_size(fd, dumpdev); + bzero(kdap, sizeof(*kdap)); + kdap->kda_enable = 0; + if (ioctl(fd, DIOCSKERNELDUMP, kdap) != 0) + err(EX_OSERR, "ioctl(DIOCSKERNELDUMP)"); + if (!enable) + exit(EX_OK); - bzero(&kda, sizeof(kda)); - kda.kda_enable = 0; - i = ioctl(fd, DIOCSKERNELDUMP, &kda); - explicit_bzero(&kda, sizeof(kda)); + explicit_bzero(kdap, sizeof(*kdap)); + kdap->kda_enable = 1; + kdap->kda_compression = KERNELDUMP_COMP_NONE; + if (zstd) + kdap->kda_compression = KERNELDUMP_COMP_ZSTD; + else if (gzip) + kdap->kda_compression = KERNELDUMP_COMP_GZIP; + + if (netdump) { + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_protocol = IPPROTO_UDP; + res = NULL; + error = getaddrinfo(server, NULL, &hints, &res); + if (error != 0) + err(1, "%s", gai_strerror(error)); + if (res == NULL) + errx(1, "failed to resolve '%s'", server); + server = inet_ntoa( + ((struct sockaddr_in *)(void *)res->ai_addr)->sin_addr); + freeaddrinfo(res); + + if (strlcpy(ndconf.ndc_iface, argv[0], + sizeof(ndconf.ndc_iface)) >= sizeof(ndconf.ndc_iface)) + errx(EX_USAGE, "invalid interface name '%s'", argv[0]); + if (inet_aton(server, &ndconf.ndc_server) == 0) + errx(EX_USAGE, "invalid server address '%s'", server); + if (inet_aton(client, &ndconf.ndc_client) == 0) + errx(EX_USAGE, "invalid client address '%s'", client); + + if (gateway == NULL) + gateway = server; + else if (strcmp(gateway, "default") == 0 && + (gateway = find_gateway(argv[0])) == NULL) + errx(EX_NOHOST, + "failed to look up next-hop router for %s", server); + if (inet_aton(gateway, &ndconf.ndc_gateway) == 0) + errx(EX_USAGE, "invalid gateway address '%s'", gateway); #ifdef HAVE_CRYPTO if (pubkeyfile != NULL) - genkey(pubkeyfile, &kda); + genkey(pubkeyfile, kdap); #endif - - kda.kda_enable = 1; - kda.kda_compression = KERNELDUMP_COMP_NONE; - if (zstd) - kda.kda_compression = KERNELDUMP_COMP_ZSTD; - else if (gzip) - kda.kda_compression = KERNELDUMP_COMP_GZIP; - i = ioctl(fd, DIOCSKERNELDUMP, &kda); - explicit_bzero(kda.kda_encryptedkey, kda.kda_encryptedkeysize); - free(kda.kda_encryptedkey); - explicit_bzero(&kda, sizeof(kda)); - if (i == 0 && verbose) - printf("kernel dumps on %s\n", dumpdev); + error = ioctl(fd, NETDUMPSCONF, &ndconf); + if (error != 0) + error = errno; + explicit_bzero(kdap->kda_encryptedkey, + kdap->kda_encryptedkeysize); + free(kdap->kda_encryptedkey); + explicit_bzero(kdap, sizeof(*kdap)); + if (error != 0) + errc(EX_OSERR, error, "ioctl(NETDUMPSCONF)"); } else { - fd = open(_PATH_DEVNULL, O_RDONLY); - if (fd < 0) - err(EX_OSFILE, "%s", _PATH_DEVNULL); - - kda.kda_enable = 0; - i = ioctl(fd, DIOCSKERNELDUMP, &kda); - explicit_bzero(&kda, sizeof(kda)); - if (i == 0 && verbose) - printf("kernel dumps disabled\n"); +#ifdef HAVE_CRYPTO + if (pubkeyfile != NULL) + genkey(pubkeyfile, kdap); +#endif + error = ioctl(fd, DIOCSKERNELDUMP, kdap); + if (error != 0) + error = errno; + explicit_bzero(kdap->kda_encryptedkey, + kdap->kda_encryptedkeysize); + free(kdap->kda_encryptedkey); + explicit_bzero(kdap, sizeof(*kdap)); + if (error != 0) + errc(EX_OSERR, error, "ioctl(DIOCSKERNELDUMP)"); } - if (i < 0) - err(EX_OSERR, "ioctl(DIOCSKERNELDUMP)"); + if (verbose) + printf("kernel dumps on %s\n", dumpdev); - exit (0); + exit(EX_OK); } diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 35ef9527cdec..9cff653b033a 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -322,6 +322,7 @@ MAN= aac.4 \ ncv.4 \ ${_ndis.4} \ net80211.4 \ + netdump.4 \ netfpga10g_nf10bmac.4 \ netgraph.4 \ netintro.4 \ diff --git a/share/man/man4/netdump.4 b/share/man/man4/netdump.4 new file mode 100644 index 000000000000..352f58ebbdac --- /dev/null +++ b/share/man/man4/netdump.4 @@ -0,0 +1,125 @@ +.\"- +.\" Copyright (c) 2018 Mark Johnston +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd March 27, 2018 +.Dt NETDUMP 4 +.Os +.Sh NAME +.Nm netdump +.Nd protocol for transmitting kernel dumps to a remote server +.Sh SYNOPSIS +To compile netdump client support into the kernel, place the following line in +your kernel configuration file: +.Bd -ragged -offset indent +.Cd "options NETDUMP" +.Ed +.Pp +Debug output can be enabled by adding the following line: +.Bd -ragged -offset indent +.Cd "options NETDUMP_DEBUG" +.Ed +.Sh DESCRIPTION +netdump is a UDP-based protocol for transmitting kernel dumps to a remote host. +A netdump client is a panicking kernel, and a netdump server is a host +running the +.Nm +daemon, available in ports as +.Pa ports/ftp/netdumpd . +.Nm +clients are configured using the +.Xr dumpon 8 +utility. +.Pp +.Nm +client messages consist of a fixed-size header followed by a variable-sized +payload. +The header contains the message type, a sequence number, the offset of +the payload data in the kernel dump, and the length of the payload data +(not including the header). +The message types are +.Dv HERALD , FINISHED , KDH , VMCORE , +and +.Dv EKCD_KEY . +.Nm +server messages have a fixed size and contain only the sequence number of +the client message. +These messages indicate that the server has successfully processed the +client message with the corresponding sequence number. +All client messages are acknowledged this way. +Server messages are always sent to port 20024 of the client. +.Pp +To initiate a +.Nm , +the client sends a +.Dv HERALD +message to the server at port 20023. +The client may include a relative path in its payload, in which case the +.Nm +server should attempt to save the dump at that path relative to its configured +dump directory. +The server will acknowledge the +.Dv HERALD +using a random source port, and the client must send all subsequent messages +to that port. +.Pp +The +.Dv KDH , VMCORE , +and +.Dv EKCD_KEY +message payloads contain the kernel dump header, dump contents, and +dump encryption key respectively. +The offset in the message header should be treated as a seek offset +in the corresponding file. +There are no ordering requirements for these messages. +.Pp +A +.Nm +is completed by sending the +.Dv FINISHED +message to the server. +.Pp +The following network drivers support netdump: +.Xr alc 4 , +.Xr bge 4 , +.Xr bxe 4 , +.Xr cxgb 4 , +.Xr em 4 , +.Xr igb 4 , +.Xr ix 4 , +.Xr mlx4en 4 , +.Xr re 4 , +.Xr vtnet 4 . +.Sh SEE ALSO +.Xr decryptcore 8 , +.Xr dumpon 8 , +.Xr savecore 8 +.Sh HISTORY +.Nm +client support first appeared in +.Fx 12.0 . +.Sh BUGS +Only IPv4 is supported. diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index 7c1cbc5286eb..42a888340cfe 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -337,13 +337,13 @@ minidumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize); - printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, - ptoa((uintmax_t)physmem) / 1048576); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, + ptoa((uintmax_t)physmem) / 1048576); + /* Dump my header */ bzero(&fakepd, sizeof(fakepd)); bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); diff --git a/sys/arm/arm/minidump_machdep.c b/sys/arm/arm/minidump_machdep.c index 49bd4c7447df..f9e3e838104a 100644 --- a/sys/arm/arm/minidump_machdep.c +++ b/sys/arm/arm/minidump_machdep.c @@ -246,13 +246,13 @@ minidumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize); - printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); - printf("Dumping %llu MB:", (long long)dumpsize >> 20); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); + printf("Dumping %llu MB:", (long long)dumpsize >> 20); + /* Dump my header */ bzero(dumpbuf, sizeof(dumpbuf)); bcopy(&mdhdr, dumpbuf, sizeof(mdhdr)); diff --git a/sys/arm64/arm64/minidump_machdep.c b/sys/arm64/arm64/minidump_machdep.c index 34295be87a08..55062dcd0660 100644 --- a/sys/arm64/arm64/minidump_machdep.c +++ b/sys/arm64/arm64/minidump_machdep.c @@ -289,13 +289,13 @@ minidumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION, dumpsize); - printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, - ptoa((uintmax_t)physmem) / 1048576); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, + ptoa((uintmax_t)physmem) / 1048576); + /* Dump my header */ bzero(&tmpbuffer, sizeof(tmpbuffer)); bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr)); diff --git a/sys/conf/NOTES b/sys/conf/NOTES index bcc7f935290f..2cc56df87189 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1028,6 +1028,9 @@ options TCP_SIGNATURE #include support for RFC 2385 # a smooth scheduling of the traffic. options DUMMYNET +options NETDUMP +options NETDUMP_DEBUG + ##################################################################### # FILESYSTEM OPTIONS diff --git a/sys/conf/files b/sys/conf/files index 1425b5c5c29f..0451514979f3 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4363,6 +4363,7 @@ netinet/libalias/alias_mod.c optional libalias | netgraph_nat netinet/libalias/alias_proxy.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_util.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_sctp.c optional libalias inet | netgraph_nat inet +netinet/netdump/netdump_client.c optional inet netdump netinet6/dest6.c optional inet6 netinet6/frag6.c optional inet6 netinet6/icmp6.c optional inet6 diff --git a/sys/conf/options b/sys/conf/options index 2c50d3b47f3f..6e203dc8ea74 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -312,6 +312,9 @@ NFS_ROOT opt_nfsroot.h # SMB/CIFS requester NETSMB opt_netsmb.h +NETDUMP opt_global.h +NETDUMP_DEBUG opt_netdump.h + # Options used only in subr_param.c. HZ opt_param.h MAXFILES opt_param.h diff --git a/sys/dev/alc/if_alc.c b/sys/dev/alc/if_alc.c index af10c1abc138..ead3741838f7 100644 --- a/sys/dev/alc/if_alc.c +++ b/sys/dev/alc/if_alc.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -199,6 +200,7 @@ static int alc_shutdown(device_t); static void alc_start(struct ifnet *); static void alc_start_locked(struct ifnet *); static void alc_start_queue(struct alc_softc *); +static void alc_start_tx(struct alc_softc *); static void alc_stats_clear(struct alc_softc *); static void alc_stats_update(struct alc_softc *); static void alc_stop(struct alc_softc *); @@ -213,6 +215,8 @@ static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS); static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS); +NETDUMP_DEFINE(alc); + static device_method_t alc_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, alc_probe), @@ -227,7 +231,7 @@ static device_method_t alc_methods[] = { DEVMETHOD(miibus_writereg, alc_miibus_writereg), DEVMETHOD(miibus_statchg, alc_miibus_statchg), - { NULL, NULL } + DEVMETHOD_END }; static driver_t alc_driver = { @@ -1651,6 +1655,9 @@ alc_attach(device_t dev) goto fail; } + /* Attach driver netdump methods. */ + NETDUMP_SET(ifp, alc); + fail: if (error != 0) alc_detach(dev); @@ -2974,22 +2981,28 @@ alc_start_locked(struct ifnet *ifp) ETHER_BPF_MTAP(ifp, m_head); } - if (enq > 0) { - /* Sync descriptors. */ - bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, - sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); - /* Kick. Assume we're using normal Tx priority queue. */ - if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) - CSR_WRITE_2(sc, ALC_MBOX_TD_PRI0_PROD_IDX, - (uint16_t)sc->alc_cdata.alc_tx_prod); - else - CSR_WRITE_4(sc, ALC_MBOX_TD_PROD_IDX, - (sc->alc_cdata.alc_tx_prod << - MBOX_TD_PROD_LO_IDX_SHIFT) & - MBOX_TD_PROD_LO_IDX_MASK); - /* Set a timeout in case the chip goes out to lunch. */ - sc->alc_watchdog_timer = ALC_TX_TIMEOUT; - } + if (enq > 0) + alc_start_tx(sc); +} + +static void +alc_start_tx(struct alc_softc *sc) +{ + + /* Sync descriptors. */ + bus_dmamap_sync(sc->alc_cdata.alc_tx_ring_tag, + sc->alc_cdata.alc_tx_ring_map, BUS_DMASYNC_PREWRITE); + /* Kick. Assume we're using normal Tx priority queue. */ + if ((sc->alc_flags & ALC_FLAG_AR816X_FAMILY) != 0) + CSR_WRITE_2(sc, ALC_MBOX_TD_PRI0_PROD_IDX, + (uint16_t)sc->alc_cdata.alc_tx_prod); + else + CSR_WRITE_4(sc, ALC_MBOX_TD_PROD_IDX, + (sc->alc_cdata.alc_tx_prod << + MBOX_TD_PROD_LO_IDX_SHIFT) & + MBOX_TD_PROD_LO_IDX_MASK); + /* Set a timeout in case the chip goes out to lunch. */ + sc->alc_watchdog_timer = ALC_TX_TIMEOUT; } static void @@ -4642,3 +4655,54 @@ sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS) return (sysctl_int_range(oidp, arg1, arg2, req, ALC_IM_TIMER_MIN, ALC_IM_TIMER_MAX)); } + +#ifdef NETDUMP +static void +alc_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct alc_softc *sc; + + sc = if_getsoftc(ifp); + KASSERT(sc->alc_buf_size <= MCLBYTES, ("incorrect cluster size")); + + *nrxr = ALC_RX_RING_CNT; + *ncl = NETDUMP_MAX_IN_FLIGHT; + *clsize = MCLBYTES; +} + +static void +alc_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +{ +} + +static int +alc_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct alc_softc *sc; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + error = alc_encap(sc, &m); + if (error == 0) + alc_start_tx(sc); + return (error); +} + +static int +alc_netdump_poll(struct ifnet *ifp, int count) +{ + struct alc_softc *sc; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + alc_txeof(sc); + return (alc_rxintr(sc, count)); +} +#endif /* NETDUMP */ diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c index 1edf2a274632..f736eeb00790 100644 --- a/sys/dev/bge/if_bge.c +++ b/sys/dev/bge/if_bge.c @@ -100,6 +100,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -426,8 +427,9 @@ static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *); static void bge_intr(void *); static int bge_msi_intr(void *); static void bge_intr_task(void *, int); -static void bge_start_locked(if_t); static void bge_start(if_t); +static void bge_start_locked(if_t); +static void bge_start_tx(struct bge_softc *, uint32_t); static int bge_ioctl(if_t, u_long, caddr_t); static void bge_init_locked(struct bge_softc *); static void bge_init(void *); @@ -517,6 +519,8 @@ static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); +NETDUMP_DEFINE(bge); + static device_method_t bge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bge_probe), @@ -3941,8 +3945,12 @@ bge_attach(device_t dev) if (error) { ether_ifdetach(ifp); device_printf(sc->bge_dev, "couldn't set up irq\n"); + goto fail; } + /* Attach driver netdump methods. */ + NETDUMP_SET(ifp, bge); + fail: if (error) bge_detach(dev); @@ -5389,22 +5397,26 @@ bge_start_locked(if_t ifp) if_bpfmtap(ifp, m_head); } - if (count > 0) { - bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, - sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); - /* Transmit. */ + if (count > 0) + bge_start_tx(sc, prodidx); +} + +static void +bge_start_tx(struct bge_softc *sc, uint32_t prodidx) +{ + + bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, + sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); + /* Transmit. */ + bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); + /* 5700 b2 errata */ + if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); - /* 5700 b2 errata */ - if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) - bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx); - sc->bge_tx_prodidx = prodidx; + sc->bge_tx_prodidx = prodidx; - /* - * Set a timeout in case the chip goes out to lunch. - */ - sc->bge_timer = BGE_TX_TIMEOUT; - } + /* Set a timeout in case the chip goes out to lunch. */ + sc->bge_timer = BGE_TX_TIMEOUT; } /* @@ -6796,3 +6808,74 @@ bge_get_counter(if_t ifp, ift_counter cnt) return (if_get_counter_default(ifp, cnt)); } } + +#ifdef NETDUMP +static void +bge_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) +{ + struct bge_softc *sc; + + sc = if_getsoftc(ifp); + BGE_LOCK(sc); + *nrxr = sc->bge_return_ring_cnt; + *ncl = NETDUMP_MAX_IN_FLIGHT; + if ((sc->bge_flags & BGE_FLAG_JUMBO_STD) != 0 && + (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) + *clsize = MJUM9BYTES; + else + *clsize = MCLBYTES; + BGE_UNLOCK(sc); +} + +static void +bge_netdump_event(if_t ifp __unused, enum netdump_ev event __unused) +{ +} + +static int +bge_netdump_transmit(if_t ifp, struct mbuf *m) +{ + struct bge_softc *sc; + uint32_t prodidx; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (1); + + prodidx = sc->bge_tx_prodidx; + error = bge_encap(sc, &m, &prodidx); + if (error == 0) + bge_start_tx(sc, prodidx); + return (error); +} + +static int +bge_netdump_poll(if_t ifp, int count) +{ + struct bge_softc *sc; + uint32_t rx_prod, tx_cons; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (1); + + bus_dmamap_sync(sc->bge_cdata.bge_status_tag, + sc->bge_cdata.bge_status_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + + rx_prod = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; + tx_cons = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; + + bus_dmamap_sync(sc->bge_cdata.bge_status_tag, + sc->bge_cdata.bge_status_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + (void)bge_rxeof(sc, rx_prod, 0); + bge_txeof(sc, tx_cons); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/dev/bxe/bxe.c b/sys/dev/bxe/bxe.c index 795b42184d9a..6ea0ec28496d 100644 --- a/sys/dev/bxe/bxe.c +++ b/sys/dev/bxe/bxe.c @@ -236,6 +236,8 @@ MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); +NETDUMP_DEFINE(bxe); + /* resources needed for unloading a previously loaded device */ #define BXE_PREV_WAIT_NEEDED 1 @@ -12767,6 +12769,9 @@ bxe_init_ifnet(struct bxe_softc *sc) /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); + /* Attach driver netdump methods. */ + NETDUMP_SET(ifp, bxe); + return (0); } @@ -19164,3 +19169,57 @@ bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, return (rval); } + +#ifdef NETDUMP +static void +bxe_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct bxe_softc *sc; + + sc = if_getsoftc(ifp); + BXE_CORE_LOCK(sc); + *nrxr = sc->num_queues; + *ncl = NETDUMP_MAX_IN_FLIGHT; + *clsize = sc->fp[0].mbuf_alloc_size; + BXE_CORE_UNLOCK(sc); +} + +static void +bxe_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +{ +} + +static int +bxe_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct bxe_softc *sc; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING || !sc->link_vars.link_up) + return (ENOENT); + + error = bxe_tx_encap(&sc->fp[0], &m); + if (error != 0 && m != NULL) + m_freem(m); + return (error); +} + +static int +bxe_netdump_poll(struct ifnet *ifp, int count) +{ + struct bxe_softc *sc; + int i; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || + !sc->link_vars.link_up) + return (ENOENT); + + for (i = 0; i < sc->num_queues; i++) + (void)bxe_rxeof(sc, &sc->fp[0]); + (void)bxe_txeof(sc, &sc->fp[0]); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/dev/bxe/bxe.h b/sys/dev/bxe/bxe.h index c63fb1ccbbd0..7a2a112b104a 100644 --- a/sys/dev/bxe/bxe.h +++ b/sys/dev/bxe/bxe.h @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index e6f22baf3ae0..39abeba2fa5d 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -576,4 +576,11 @@ int cxgb_transmit(struct ifnet *ifp, struct mbuf *m); void cxgb_qflush(struct ifnet *ifp); void t3_iterate(void (*)(struct adapter *, void *), void *); void cxgb_refresh_stats(struct port_info *); + +#ifdef NETDUMP +int cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m); +int cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs); +int cxgb_netdump_poll_tx(struct sge_qset *qs); +#endif + #endif diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index 622fbd196276..127a61d61c21 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -191,6 +192,8 @@ static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); MODULE_VERSION(cxgb, 1); +NETDUMP_DEFINE(cxgb); + static struct mtx t3_list_lock; static SLIST_HEAD(, adapter) t3_list; #ifdef TCP_OFFLOAD @@ -1045,6 +1048,9 @@ cxgb_port_attach(device_t dev) ether_ifattach(ifp, p->hw_addr); + /* Attach driver netdump methods. */ + NETDUMP_SET(ifp, cxgb); + #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) ifp->if_mtu = ETHERMTU_JUMBO; @@ -3578,3 +3584,72 @@ cxgbc_mod_event(module_t mod, int cmd, void *arg) return (rc); } + +#ifdef NETDUMP +static void +cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct port_info *pi; + adapter_t *adap; + + pi = if_getsoftc(ifp); + adap = pi->adapter; + ADAPTER_LOCK(adap); + *nrxr = SGE_QSETS; + *ncl = adap->sge.qs[0].fl[1].size; + *clsize = adap->sge.qs[0].fl[1].buf_size; + ADAPTER_UNLOCK(adap); +} + +static void +cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event) +{ + struct port_info *pi; + struct sge_qset *qs; + int i; + + pi = if_getsoftc(ifp); + if (event == NETDUMP_START) + for (i = 0; i < SGE_QSETS; i++) { + qs = &pi->adapter->sge.qs[i]; + + /* Need to reinit after netdump_mbuf_dump(). */ + qs->fl[0].zone = zone_pack; + qs->fl[1].zone = zone_clust; + qs->lro.enabled = 0; + } +} + +static int +cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct port_info *pi; + struct sge_qset *qs; + + pi = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (ENOENT); + + qs = &pi->adapter->sge.qs[pi->first_qset]; + return (cxgb_netdump_encap(qs, &m)); +} + +static int +cxgb_netdump_poll(struct ifnet *ifp, int count) +{ + struct port_info *pi; + adapter_t *adap; + int i; + + pi = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) + return (ENOENT); + + adap = pi->adapter; + for (i = 0; i < SGE_QSETS; i++) + (void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]); + (void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index 40d0cd65a503..74c3b873f593 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -390,6 +390,15 @@ reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) return (reclaim); } +#ifdef NETDUMP +int +cxgb_netdump_poll_tx(struct sge_qset *qs) +{ + + return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); +} +#endif + /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue @@ -1586,6 +1595,23 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) return (0); } +#ifdef NETDUMP +int +cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m) +{ + int error; + + error = t3_encap(qs, m); + if (error == 0) + check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); + else if (*m != NULL) { + m_freem(*m); + *m = NULL; + } + return (error); +} +#endif + void cxgb_tx_watchdog(void *arg) { @@ -3014,6 +3040,14 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq) return (work); } +#ifdef NETDUMP +int +cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs) +{ + + return (process_responses_gts(adap, &qs->rspq)); +} +#endif /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. diff --git a/sys/dev/mlx4/mlx4_en/en.h b/sys/dev/mlx4/mlx4_en/en.h index b4abecb84c24..9fae5a43bf1a 100644 --- a/sys/dev/mlx4/mlx4_en/en.h +++ b/sys/dev/mlx4/mlx4_en/en.h @@ -54,6 +54,7 @@ #include #include +#include #include "en_port.h" #include @@ -814,6 +815,8 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); +int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq); +int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **m); void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, diff --git a/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c b/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c index cdf7abef2d65..1c2d0a5116f2 100644 --- a/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c +++ b/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c @@ -53,6 +53,8 @@ #include "en.h" #include "en_port.h" +NETDUMP_DEFINE(mlx4_en); + static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); @@ -2298,6 +2300,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); + NETDUMP_SET(dev, mlx4_en); + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); @@ -2879,3 +2883,56 @@ static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) CTLFLAG_RD, &rx_ring->errors, 0, "RX soft errors"); } } + +#ifdef NETDUMP +static void +mlx4_en_netdump_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) +{ + struct mlx4_en_priv *priv; + + priv = if_getsoftc(dev); + mutex_lock(&priv->mdev->state_lock); + *nrxr = priv->rx_ring_num; + *ncl = NETDUMP_MAX_IN_FLIGHT; + *clsize = priv->rx_mb_size; + mutex_unlock(&priv->mdev->state_lock); +} + +static void +mlx4_en_netdump_event(struct ifnet *dev, enum netdump_ev event) +{ +} + +static int +mlx4_en_netdump_transmit(struct ifnet *dev, struct mbuf *m) +{ + struct mlx4_en_priv *priv; + int err; + + priv = if_getsoftc(dev); + if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING || !priv->link_state) + return (ENOENT); + + err = mlx4_en_xmit(priv, 0, &m); + if (err != 0 && m != NULL) + m_freem(m); + return (err); +} + +static int +mlx4_en_netdump_poll(struct ifnet *dev, int count) +{ + struct mlx4_en_priv *priv; + int i; + + priv = if_getsoftc(dev); + if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 || !priv->link_state) + return (ENOENT); + + (void)mlx4_en_process_tx_cq(dev, priv->tx_cq[0]); + for (i = 0; i < priv->rx_ring_num; i++) + (void)mlx4_en_process_rx_cq(dev, priv->rx_cq[i], 0); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c b/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c index a3692712767a..ac94b14cc47f 100644 --- a/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c +++ b/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c @@ -356,8 +356,7 @@ mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring) return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS))); } -static int mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq) +int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; @@ -648,7 +647,7 @@ static void mlx4_bf_copy(void __iomem *dst, volatile unsigned long *src, unsigne __iowrite64_copy(dst, __DEVOLATILE(void *, src), bytecnt / 8); } -static int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **mbp) +int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **mbp) { enum { DS_FACT = TXBB_SIZE / DS_SIZE_ALIGNMENT, diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c index c11a6fd8a102..c1e81ed24024 100644 --- a/sys/dev/null/null.c +++ b/sys/dev/null/null.c @@ -107,14 +107,14 @@ null_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data __unused, int flags __unused, struct thread *td) { int error; - error = 0; + error = 0; switch (cmd) { #ifdef COMPAT_FREEBSD11 case DIOCSKERNELDUMP_FREEBSD11: #endif case DIOCSKERNELDUMP: - error = set_dumper(NULL, NULL, td, 0, 0, NULL, 0, NULL); + error = clear_dumper(td); break; case FIONBIO: break; diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index e9328b5ec1d1..50b194fd544c 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -139,6 +139,8 @@ __FBSDID("$FreeBSD$"); #include +#include + #include #include #include @@ -279,6 +281,7 @@ static void re_tick (void *); static void re_int_task (void *, int); static void re_start (struct ifnet *); static void re_start_locked (struct ifnet *); +static void re_start_tx (struct rl_softc *); static int re_ioctl (struct ifnet *, u_long, caddr_t); static void re_init (void *); static void re_init_locked (struct rl_softc *); @@ -307,6 +310,8 @@ static void re_setwol (struct rl_softc *); static void re_clrwol (struct rl_softc *); static void re_set_linkspeed (struct rl_softc *); +NETDUMP_DEFINE(re); + #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include MODULE_DEPEND(re, netmap, 1, 1, 1); @@ -1737,8 +1742,11 @@ re_attach(device_t dev) if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); + goto fail; } + NETDUMP_SET(ifp, re); + fail: if (error) re_detach(dev); @@ -2981,8 +2989,14 @@ re_start_locked(struct ifnet *ifp) return; } - /* Flush the TX descriptors */ + re_start_tx(sc); +} +static void +re_start_tx(struct rl_softc *sc) +{ + + /* Flush the TX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); @@ -4078,3 +4092,59 @@ sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS) return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN, RL_TIMER_MAX)); } + +#ifdef NETDUMP +static void +re_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct rl_softc *sc; + + sc = if_getsoftc(ifp); + RL_LOCK(sc); + *nrxr = sc->rl_ldata.rl_rx_desc_cnt; + *ncl = NETDUMP_MAX_IN_FLIGHT; + *clsize = (ifp->if_mtu > RL_MTU && + (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) ? MJUM9BYTES : MCLBYTES; + RL_UNLOCK(sc); +} + +static void +re_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +{ +} + +static int +re_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct rl_softc *sc; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) + return (EBUSY); + + error = re_encap(sc, &m); + if (error == 0) + re_start_tx(sc); + return (error); +} + +static int +re_netdump_poll(struct ifnet *ifp, int count) +{ + struct rl_softc *sc; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || + (sc->rl_flags & RL_FLAG_LINK) == 0) + return (EBUSY); + + re_txeof(sc); + error = re_rxeof(sc, NULL); + if (error != 0 && error != EAGAIN) + return (error); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 2a7174177d56..fa69fa8a6fa4 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -143,7 +144,7 @@ static struct mbuf * struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); -static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); +static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); @@ -231,6 +232,8 @@ static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); +NETDUMP_DEFINE(vtnet); + /* Tunables. */ static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); static int vtnet_csum_disable = 0; @@ -1026,6 +1029,8 @@ vtnet_setup_interface(struct vtnet_softc *sc) vtnet_set_rx_process_limit(sc); vtnet_set_tx_intr_threshold(sc); + NETDUMP_SET(ifp, vtnet); + return (0); } @@ -2176,7 +2181,7 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, } static int -vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) +vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) { struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; @@ -2186,7 +2191,7 @@ vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) m = *m_head; M_ASSERTPKTHDR(m); - txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); + txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; @@ -2260,7 +2265,7 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) if (m0 == NULL) break; - if (vtnet_txq_encap(txq, &m0) != 0) { + if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { if (m0 != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m0); break; @@ -2337,7 +2342,7 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) break; } - if (vtnet_txq_encap(txq, &m) != 0) { + if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else @@ -3976,3 +3981,69 @@ vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) return (def); } + +#ifdef NETDUMP +static void +vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct vtnet_softc *sc; + + sc = if_getsoftc(ifp); + + VTNET_CORE_LOCK(sc); + *nrxr = sc->vtnet_max_vq_pairs; + *ncl = NETDUMP_MAX_IN_FLIGHT; + *clsize = sc->vtnet_rx_clsize; + VTNET_CORE_UNLOCK(sc); + + /* + * We need to allocate from this zone in the transmit path, so ensure + * that we have at least one item per header available. + * XXX add a separate zone like we do for mbufs? otherwise we may alloc + * buckets + */ + uma_zone_reserve(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); + uma_prealloc(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); +} + +static void +vtnet_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +{ +} + +static int +vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + int error; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + txq = &sc->vtnet_txqs[0]; + error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); + if (error == 0) + error = vtnet_txq_notify(txq); + return (error); +} + +static int +vtnet_netdump_poll(struct ifnet *ifp, int count) +{ + struct vtnet_softc *sc; + int i; + + sc = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) + (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index e7c0420d3316..2a5aca6b4237 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -138,10 +138,11 @@ g_dev_setdumpdev(struct cdev *dev, struct diocskerneldump_arg *kda, int error, len; if (dev == NULL || kda == NULL) - return (set_dumper(NULL, NULL, td, 0, 0, NULL, 0, NULL)); + return (clear_dumper(td)); cp = dev->si_drv2; len = sizeof(kd); + memset(&kd, 0, len); kd.offset = 0; kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); @@ -833,7 +834,7 @@ g_dev_orphan(struct g_consumer *cp) /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) - (void)set_dumper(NULL, NULL, curthread, 0, 0, NULL, 0, NULL); + (void)clear_dumper(curthread); /* Destroy the struct cdev *so we get no more requests */ destroy_dev_sched_cb(dev, g_dev_callback, cp); diff --git a/sys/i386/i386/minidump_machdep.c b/sys/i386/i386/minidump_machdep.c index 3f1ba361501f..9aa17d0e3f36 100644 --- a/sys/i386/i386/minidump_machdep.c +++ b/sys/i386/i386/minidump_machdep.c @@ -254,13 +254,13 @@ minidumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, dumpsize); - printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); - printf("Dumping %llu MB:", (long long)dumpsize >> 20); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); + printf("Dumping %llu MB:", (long long)dumpsize >> 20); + /* Dump my header */ bzero(&fakept, sizeof(fakept)); bcopy(&mdhdr, &fakept, sizeof(mdhdr)); diff --git a/sys/kern/kern_dump.c b/sys/kern/kern_dump.c index 8d2e8ab19a8c..be5339e3a9e5 100644 --- a/sys/kern/kern_dump.c +++ b/sys/kern/kern_dump.c @@ -330,13 +330,13 @@ dumpsys_generic(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, dumpsize); - printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, - ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, + ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); + /* Dump ELF header */ error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); if (error) diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index aa3fed74697a..3d263634aa75 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -379,6 +380,191 @@ mbuf_init(void *dummy) } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); +#ifdef NETDUMP +static struct mbufq nd_mbufq = + { STAILQ_HEAD_INITIALIZER(nd_mbufq.mq_head), 0, INT_MAX }; +static struct mbufq nd_clustq = + { STAILQ_HEAD_INITIALIZER(nd_clustq.mq_head), 0, INT_MAX }; + +static int nd_clsize; +static uma_zone_t nd_zone_mbuf; +static uma_zone_t nd_zone_clust; +static uma_zone_t nd_zone_pack; + +static int +nd_buf_import(void *arg, void **store, int count, int domain __unused, + int flags) +{ + struct mbufq *q; + struct mbuf *m; + int i; + + q = arg; + + for (i = 0; i < count; i++) { + m = mbufq_dequeue(q); + if (m == NULL) + break; + trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags); + store[i] = m; + } + return (i); +} + +static void +nd_buf_release(void *arg, void **store, int count) +{ + struct mbufq *q; + struct mbuf *m; + int i; + + q = arg; + + for (i = 0; i < count; i++) { + m = store[i]; + (void)mbufq_enqueue(q, m); + } +} + +static int +nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, + int flags __unused) +{ + struct mbuf *m; + void *clust; + int i; + + for (i = 0; i < count; i++) { + m = m_get(MT_DATA, M_NOWAIT); + if (m == NULL) + break; + clust = uma_zalloc(nd_zone_clust, M_NOWAIT); + if (clust == NULL) { + m_free(m); + break; + } + mb_ctor_clust(clust, nd_clsize, m, 0); + store[i] = m; + } + return (i); +} + +static void +nd_pack_release(void *arg __unused, void **store, int count) +{ + struct mbuf *m; + void *clust; + int i; + + for (i = 0; i < count; i++) { + m = store[i]; + clust = m->m_ext.ext_buf; + uma_zfree(nd_zone_clust, clust); + uma_zfree(nd_zone_mbuf, m); + } +} + +void +netdump_mbuf_drain(void) +{ + struct mbuf *m; + void *item; + + if (nd_zone_mbuf != NULL) { + uma_zdestroy(nd_zone_mbuf); + nd_zone_mbuf = NULL; + } + if (nd_zone_clust != NULL) { + uma_zdestroy(nd_zone_clust); + nd_zone_clust = NULL; + } + if (nd_zone_pack != NULL) { + uma_zdestroy(nd_zone_pack); + nd_zone_pack = NULL; + } + + while ((m = mbufq_dequeue(&nd_mbufq)) != NULL) + m_free(m); + while ((item = mbufq_dequeue(&nd_clustq)) != NULL) + uma_zfree(m_getzone(nd_clsize), item); +} + +/* + * Callback invoked immediately prior to starting a netdump. + */ +void +netdump_mbuf_dump(void) +{ + + /* + * All cluster zones return buffers of the size requested by the + * drivers. It's up to the driver to reinitialize the zones if the + * MTU of a netdump-enabled interface changes. + */ + printf("netdump: overwriting mbuf zone pointers\n"); + zone_mbuf = nd_zone_mbuf; + zone_clust = nd_zone_clust; + zone_pack = nd_zone_pack; + zone_jumbop = nd_zone_clust; + zone_jumbo9 = nd_zone_clust; + zone_jumbo16 = nd_zone_clust; +} + +/* + * (Re-)Initialize zones used to cache netdump packet buffers. At panic-time, we + * swap out the regular mbuf/cluster zones with these, ensuring that drivers and + * the protocol code can allocate buffers from a preallocated pool, rather than + * relying on memory allocation to succeed after a panic. + * + * We keep mbufs and clusters in a pair of mbuf queues. In particular, for the + * purpose of caching clusters, we treat them as mbufs. + */ +void +netdump_mbuf_reinit(int nmbuf, int nclust, int clsize) +{ + struct mbuf *m; + void *item; + + netdump_mbuf_drain(); + + nd_clsize = clsize; + + nd_zone_mbuf = uma_zcache_create("netdump_" MBUF_MEM_NAME, + MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, +#ifdef INVARIANTS + trash_init, trash_fini, +#else + NULL, NULL, +#endif + nd_buf_import, nd_buf_release, + &nd_mbufq, UMA_ZONE_NOBUCKET); + + nd_zone_clust = uma_zcache_create("netdump_" MBUF_CLUSTER_MEM_NAME, + clsize, mb_ctor_clust, +#ifdef INVARIANTS + trash_dtor, trash_init, trash_fini, +#else + NULL, NULL, NULL, +#endif + nd_buf_import, nd_buf_release, + &nd_clustq, UMA_ZONE_NOBUCKET); + + nd_zone_pack = uma_zcache_create("netdump_" MBUF_PACKET_MEM_NAME, + MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, + nd_pack_import, nd_pack_release, + NULL, UMA_ZONE_NOBUCKET); + + while (nmbuf-- > 0) { + m = m_get(MT_DATA, M_WAITOK); + uma_zfree(nd_zone_mbuf, m); + } + while (nclust-- > 0) { + item = uma_zalloc(m_getzone(nd_clsize), M_WAITOK); + uma_zfree(nd_zone_clust, item); + } +} +#endif /* NETDUMP */ + /* * UMA backend page allocator for the jumbo frame zones. * @@ -682,18 +868,18 @@ mb_free_ext(struct mbuf *m) case EXT_MOD_TYPE: case EXT_DISPOSABLE: KASSERT(mref->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); + ("%s: ext_free not set", __func__)); mref->m_ext.ext_free(mref); uma_zfree(zone_mbuf, mref); break; case EXT_EXTREF: KASSERT(m->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); + ("%s: ext_free not set", __func__)); m->m_ext.ext_free(m); break; default: KASSERT(m->m_ext.ext_type == 0, - ("%s: unknown ext_type", __func__)); + ("%s: unknown ext_type", __func__)); } } diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index ea16f1fa99d3..c803073d78d0 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -191,6 +191,11 @@ SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, &kerneldump_gzlevel, 0, "Kernel crash dump compression level"); +#ifdef NETDUMP +/* Defined in kern_mbuf.c. */ +void netdump_mbuf_drain(void); +#endif + /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. @@ -1065,10 +1070,6 @@ set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, if (error != 0) return (error); - if (di == NULL) { - error = 0; - goto cleanup; - } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; @@ -1114,7 +1115,25 @@ set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); return (0); + cleanup: + (void)clear_dumper(td); + return (error); +} + +int +clear_dumper(struct thread *td) +{ + int error; + + error = priv_check(td, PRIV_SETDUMPER); + if (error != 0) + return (error); + +#ifdef NETDUMP + netdump_mbuf_drain(); +#endif + #ifdef EKCD if (dumper.kdcrypto != NULL) { explicit_bzero(dumper.kdcrypto, sizeof(*dumper.kdcrypto) + @@ -1131,14 +1150,14 @@ set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, } explicit_bzero(&dumper, sizeof(dumper)); dumpdevname[0] = '\0'; - return (error); + return (0); } static int dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) { - if (length != 0 && (offset < di->mediaoffset || + if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { if (di->kdcomp != NULL && offset >= di->mediaoffset) { printf( @@ -1219,18 +1238,6 @@ dump_encrypted_write(struct dumperinfo *di, void *virtual, return (0); } - -static int -dump_write_key(struct dumperinfo *di, off_t offset) -{ - struct kerneldumpcrypto *kdc; - - kdc = di->kdcrypto; - if (kdc == NULL) - return (0); - return (dump_write(di, kdc->kdc_dumpkey, 0, offset, - kdc->kdc_dumpkeysize)); -} #endif /* EKCD */ static int @@ -1264,20 +1271,42 @@ kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) } /* - * Write a kerneldumpheader at the specified offset. The header structure is 512 - * bytes in size, but we must pad to the device sector size. + * Write kernel dump headers at the beginning and end of the dump extent. + * Write the kernel dump encryption key after the leading header if we were + * configured to do so. */ static int -dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, - off_t offset) +dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) { - void *buf; +#ifdef EKCD + struct kerneldumpcrypto *kdc; +#endif + void *buf, *key; size_t hdrsz; + uint64_t extent; + uint32_t keysize; + int error; hdrsz = sizeof(*kdh); if (hdrsz > di->blocksize) return (ENOMEM); +#ifdef EKCD + kdc = di->kdcrypto; + key = kdc->kdc_dumpkey; + keysize = kerneldumpcrypto_dumpkeysize(kdc); +#else + key = NULL; + keysize = 0; +#endif + + /* + * If the dump device has special handling for headers, let it take care + * of writing them out. + */ + if (di->dumper_hdr != NULL) + return (di->dumper_hdr(di, kdh, key, keysize)); + if (hdrsz == di->blocksize) buf = kdh; else { @@ -1286,7 +1315,24 @@ dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, memcpy(buf, kdh, hdrsz); } - return (dump_write(di, buf, 0, offset, di->blocksize)); + extent = dtoh64(kdh->dumpextent); +#ifdef EKCD + if (kdc != NULL) { + error = dump_write(di, kdc->kdc_dumpkey, 0, + di->mediaoffset + di->mediasize - di->blocksize - extent - + keysize, keysize); + if (error != 0) + return (error); + } +#endif + + error = dump_write(di, buf, 0, + di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - + keysize, di->blocksize); + if (error == 0) + error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - + di->blocksize, di->blocksize); + return (error); } /* @@ -1311,26 +1357,37 @@ dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, * Uncompressed dumps will use the entire extent, but compressed dumps typically * will not. The true length of the dump is recorded in the leading and trailing * headers once the dump has been completed. + * + * The dump device may provide a callback, in which case it will initialize + * dumpoff and take care of laying out the headers. */ int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) { - uint64_t dumpextent; + uint64_t dumpextent, span; uint32_t keysize; + int error; #ifdef EKCD - int error = kerneldumpcrypto_init(di->kdcrypto); + error = kerneldumpcrypto_init(di->kdcrypto); if (error != 0) return (error); keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto); #else + error = 0; keysize = 0; #endif - dumpextent = dtoh64(kdh->dumpextent); - if (di->mediasize < SIZEOF_METADATA + dumpextent + 2 * di->blocksize + - keysize) { - if (di->kdcomp != NULL) { + if (di->dumper_start != NULL) { + error = di->dumper_start(di); + } else { + dumpextent = dtoh64(kdh->dumpextent); + span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + + keysize; + if (di->mediasize < span) { + if (di->kdcomp == NULL) + return (E2BIG); + /* * We don't yet know how much space the compressed dump * will occupy, so try to use the whole swap partition @@ -1339,18 +1396,18 @@ dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) * be enough, the bounds checking in dump_write() * will catch us and cause the dump to fail. */ - dumpextent = di->mediasize - SIZEOF_METADATA - - 2 * di->blocksize - keysize; + dumpextent = di->mediasize - span + dumpextent; kdh->dumpextent = htod64(dumpextent); - } else - return (E2BIG); - } - - /* The offset at which to begin writing the dump. */ - di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - - dumpextent; + } - return (0); + /* + * The offset at which to begin writing the dump. + */ + di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - + dumpextent; + } + di->origdumpoff = di->dumpoff; + return (error); } static int @@ -1418,17 +1475,10 @@ int dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) { uint64_t extent; - uint32_t keysize; int error; extent = dtoh64(kdh->dumpextent); -#ifdef EKCD - keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto); -#else - keysize = 0; -#endif - if (di->kdcomp != NULL) { error = compressor_flush(di->kdcomp->kdc_stream); if (error == EAGAIN) { @@ -1445,33 +1495,14 @@ dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) * We now know the size of the compressed dump, so update the * header accordingly and recompute parity. */ - kdh->dumplength = htod64(di->dumpoff - - (di->mediaoffset + di->mediasize - di->blocksize - extent)); + kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); kdh->parity = 0; kdh->parity = kerneldump_parity(kdh); compressor_reset(di->kdcomp->kdc_stream); } - /* - * Write kerneldump headers at the beginning and end of the dump extent. - * Write the key after the leading header. - */ - error = dump_write_header(di, kdh, - di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - - keysize); - if (error != 0) - return (error); - -#ifdef EKCD - error = dump_write_key(di, - di->mediaoffset + di->mediasize - di->blocksize - extent - keysize); - if (error != 0) - return (error); -#endif - - error = dump_write_header(di, kdh, - di->mediaoffset + di->mediasize - di->blocksize); + error = dump_write_headers(di, kdh); if (error != 0) return (error); diff --git a/sys/mips/mips/minidump_machdep.c b/sys/mips/mips/minidump_machdep.c index 989570bf449a..66697a7dfdcd 100644 --- a/sys/mips/mips/minidump_machdep.c +++ b/sys/mips/mips/minidump_machdep.c @@ -264,13 +264,13 @@ minidumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_MIPS_VERSION, dumpsize); - printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, - ptoa((uintmax_t)physmem) / 1048576); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, + ptoa((uintmax_t)physmem) / 1048576); + /* Dump my header */ bzero(tmpbuffer, sizeof(tmpbuffer)); bcopy(&mdhdr, tmpbuffer, sizeof(mdhdr)); diff --git a/sys/net/if.c b/sys/net/if.c index a09bd1c3df0a..620273086fe8 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -87,6 +87,7 @@ #include #ifdef INET #include +#include #endif /* INET */ #ifdef INET6 #include @@ -2754,6 +2755,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); +#ifdef INET + NETDUMP_REINIT(ifp); +#endif } /* * If the link MTU changed, do network layer specific procedure. diff --git a/sys/net/if_var.h b/sys/net/if_var.h index fbb09ca5d269..45499970de14 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -70,6 +70,7 @@ struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; +struct netdump_methods; #ifdef _KERNEL #include /* ifqueue only? */ @@ -368,6 +369,11 @@ struct ifnet { /* Ethernet PCP */ uint8_t if_pcp; + /* + * Netdump hooks to be called while dumping. + */ + struct netdump_methods *if_netdump_methods; + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 31d900ec3bab..2d183dcbfb75 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$"); #include #include - #include #include #include @@ -71,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -727,6 +727,8 @@ static void iflib_if_init_locked(if_ctx_t ctx); static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif +NETDUMP_DEFINE(iflib); + #ifdef DEV_NETMAP #include #include @@ -3367,7 +3369,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); - while (n--) { + while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); @@ -4402,6 +4404,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct } *ctxp = ctx; + NETDUMP_SET(ctx->ifc_ifp, iflib); + if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; @@ -5967,3 +5971,89 @@ iflib_fixup_rx(struct mbuf *m) return (n); } #endif + +#ifdef NETDUMP +static void +iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + if_ctx_t ctx; + + ctx = if_getsoftc(ifp); + CTX_LOCK(ctx); + *nrxr = NRXQSETS(ctx); + *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; + *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; + CTX_UNLOCK(ctx); +} + +static void +iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) +{ + if_ctx_t ctx; + if_softc_ctx_t scctx; + iflib_fl_t fl; + iflib_rxq_t rxq; + int i, j; + + ctx = if_getsoftc(ifp); + scctx = &ctx->ifc_softc_ctx; + + switch (event) { + case NETDUMP_START: + for (i = 0; i < scctx->isc_nrxqsets; i++) { + rxq = &ctx->ifc_rxqs[i]; + for (j = 0; j < rxq->ifr_nfl; j++) { + fl = rxq->ifr_fl; + fl->ifl_zone = m_getzone(fl->ifl_buf_size); + } + } + iflib_no_tx_batch = 1; + break; + default: + break; + } +} + +static int +iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + if_ctx_t ctx; + iflib_txq_t txq; + int error; + + ctx = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + txq = &ctx->ifc_txqs[0]; + error = iflib_encap(txq, &m); + if (error == 0) + (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use); + return (error); +} + +static int +iflib_netdump_poll(struct ifnet *ifp, int count) +{ + if_ctx_t ctx; + if_softc_ctx_t scctx; + iflib_txq_t txq; + int i; + + ctx = if_getsoftc(ifp); + scctx = &ctx->ifc_softc_ctx; + + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + + txq = &ctx->ifc_txqs[0]; + (void)iflib_tx_credits_update(ctx, txq); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + + for (i = 0; i < scctx->isc_nrxqsets; i++) + (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); + return (0); +} +#endif /* NETDUMP */ diff --git a/sys/netinet/netdump/netdump.h b/sys/netinet/netdump/netdump.h new file mode 100644 index 000000000000..11f181ff6d69 --- /dev/null +++ b/sys/netinet/netdump/netdump.h @@ -0,0 +1,130 @@ +/*- + * Copyright (c) 2005-2014 Sandvine Incorporated + * Copyright (c) 2000 Darrell Anderson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_NETDUMP_H_ +#define _NETINET_NETDUMP_H_ + +#include +#include +#include + +#include +#include + +#define NETDUMP_PORT 20023 /* Server udp port number for data. */ +#define NETDUMP_ACKPORT 20024 /* Client udp port number for acks. */ + +#define NETDUMP_HERALD 1 /* Broadcast before starting a dump. */ +#define NETDUMP_FINISHED 2 /* Send after finishing a dump. */ +#define NETDUMP_VMCORE 3 /* Contains dump data. */ +#define NETDUMP_KDH 4 /* Contains kernel dump header. */ +#define NETDUMP_EKCD_KEY 5 /* Contains kernel dump key. */ + +#define NETDUMP_DATASIZE 4096 /* Arbitrary packet size limit. */ + +struct netdump_msg_hdr { + uint32_t mh_type; /* Netdump message type. */ + uint32_t mh_seqno; /* Match acks with msgs. */ + uint64_t mh_offset; /* vmcore offset (bytes). */ + uint32_t mh_len; /* Attached data (bytes). */ + uint32_t mh__pad; +} __packed; + +struct netdump_ack { + uint32_t na_seqno; /* Match acks with msgs. */ +} __packed; + +struct netdump_conf { + struct diocskerneldump_arg ndc_kda; + char ndc_iface[IFNAMSIZ]; + struct in_addr ndc_server; + struct in_addr ndc_client; + struct in_addr ndc_gateway; +}; + +#define _PATH_NETDUMP "/dev/netdump" + +#define NETDUMPGCONF _IOR('n', 1, struct netdump_conf) +#define NETDUMPSCONF _IOW('n', 2, struct netdump_conf) + +#ifdef _KERNEL +#ifdef NETDUMP + +#define NETDUMP_MAX_IN_FLIGHT 64 + +enum netdump_ev { + NETDUMP_START, + NETDUMP_END, +}; + +struct ifnet; +struct mbuf; + +void netdump_reinit(struct ifnet *); + +typedef void netdump_init_t(struct ifnet *, int *nrxr, int *ncl, int *clsize); +typedef void netdump_event_t(struct ifnet *, enum netdump_ev); +typedef int netdump_transmit_t(struct ifnet *, struct mbuf *); +typedef int netdump_poll_t(struct ifnet *, int); + +struct netdump_methods { + netdump_init_t *nd_init; + netdump_event_t *nd_event; + netdump_transmit_t *nd_transmit; + netdump_poll_t *nd_poll; +}; + +#define NETDUMP_DEFINE(driver) \ + static netdump_init_t driver##_netdump_init; \ + static netdump_event_t driver##_netdump_event; \ + static netdump_transmit_t driver##_netdump_transmit; \ + static netdump_poll_t driver##_netdump_poll; \ + \ + static struct netdump_methods driver##_netdump_methods = { \ + .nd_init = driver##_netdump_init, \ + .nd_event = driver##_netdump_event, \ + .nd_transmit = driver##_netdump_transmit, \ + .nd_poll = driver##_netdump_poll, \ + } + +#define NETDUMP_REINIT(ifp) netdump_reinit(ifp) + +#define NETDUMP_SET(ifp, driver) \ + (ifp)->if_netdump_methods = &driver##_netdump_methods + +#else /* !NETDUMP */ + +#define NETDUMP_DEFINE(driver) +#define NETDUMP_REINIT(ifp) +#define NETDUMP_SET(ifp, driver) + +#endif /* NETDUMP */ +#endif /* _KERNEL */ + +#endif /* _NETINET_NETDUMP_H_ */ diff --git a/sys/netinet/netdump/netdump_client.c b/sys/netinet/netdump/netdump_client.c new file mode 100644 index 000000000000..900b3157a36a --- /dev/null +++ b/sys/netinet/netdump/netdump_client.c @@ -0,0 +1,1260 @@ +/*- + * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. + * Copyright (c) 2000 Darrell Anderson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * netdump_client.c + * FreeBSD subsystem supporting netdump network dumps. + * A dedicated server must be running to accept client dumps. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_netdump.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef NETDUMP_DEBUG +#define NETDDEBUG(f, ...) \ + printf(("%s: " f), __func__, ## __VA_ARGS__) +#define NETDDEBUG_IF(i, f, ...) \ + if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) +#if NETDUMP_DEBUG > 1 +#define NETDDEBUGV(f, ...) \ + printf(("%s: " f), __func__, ## __VA_ARGS__) +#define NETDDEBUGV_IF(i, f, ...) \ + if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__) +#else +#define NETDDEBUGV(f, ...) +#define NETDDEBUGV_IF(i, f, ...) +#endif +#else +#define NETDDEBUG(f, ...) +#define NETDDEBUG_IF(i, f, ...) +#define NETDDEBUGV(f, ...) +#define NETDDEBUGV_IF(i, f, ...) +#endif + +static int netdump_arp_gw(void); +static void netdump_cleanup(void); +static int netdump_configure(struct netdump_conf *); +static int netdump_dumper(void *priv __unused, void *virtual, + vm_offset_t physical __unused, off_t offset, size_t length); +static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, + struct ether_addr dst, u_short etype); +static void netdump_handle_arp(struct mbuf **mb); +static void netdump_handle_ip(struct mbuf **mb); +static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, + caddr_t addr, int flags __unused, struct thread *td); +static int netdump_modevent(module_t mod, int type, void *priv); +static void netdump_network_poll(void); +static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m); +static int netdump_send(uint32_t type, off_t offset, unsigned char *data, + uint32_t datalen); +static int netdump_send_arp(in_addr_t dst); +static int netdump_start(struct dumperinfo *di); +static int netdump_udp_output(struct mbuf *m); + +/* Must be at least as big as the chunks dumpsys() gives us. */ +static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE]; +static uint32_t nd_seqno; +static int dump_failed, have_gw_mac; +static void (*drv_if_input)(struct ifnet *, struct mbuf *); +static int restore_gw_addr; + +static uint64_t rcvd_acks; +CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT); + +/* + * Times to poll the NIC (0.5ms each poll) before assuming packetloss + * occurred (default to 1s). + */ +static int nd_polls = 2000; + +/* Times to retransmit lost packets. */ +static int nd_retries = 10; + +/* Number of ARP retries. */ +static int nd_arp_retries = 3; + +/* Configuration parameters. */ +static struct netdump_conf nd_conf; +#define nd_server nd_conf.ndc_server +#define nd_client nd_conf.ndc_client +#define nd_gateway nd_conf.ndc_gateway + +/* General dynamic settings. */ +static struct ether_addr nd_gw_mac; +static struct ifnet *nd_ifp; +static uint16_t nd_server_port = NETDUMP_PORT; + +FEATURE(netdump, "Netdump client support"); + +static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD, NULL, + "netdump parameters"); + +static int nd_enabled; +SYSCTL_INT(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD, + &nd_enabled, 0, + "netdump configuration status"); +static char nd_path[MAXPATHLEN]; +SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW, + nd_path, sizeof(nd_path), + "Server path for output files"); + +/* + * Checks for netdump support on a network interface + * + * Parameters: + * ifp The network interface that is being tested for support + * + * Returns: + * int 1 if the interface is supported, 0 if not + */ +static bool +netdump_supported_nic(struct ifnet *ifp) +{ + + return (ifp->if_netdump_methods != NULL); +} + +/*- + * Network specific primitives. + * Following down the code they are divided ordered as: + * - Packet buffer primitives + * - Output primitives + * - Input primitives + * - Polling primitives + */ + +/* + * Handles creation of the ethernet header, then places outgoing packets into + * the tx buffer for the NIC + * + * Parameters: + * m The mbuf containing the packet to be sent (will be freed by + * this function or the NIC driver) + * ifp The interface to send on + * dst The destination ethernet address (source address will be looked + * up using ifp) + * etype The ETHERTYPE_* value for the protocol that is being sent + * + * Returns: + * int see errno.h, 0 for success + */ +static int +netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, + u_short etype) +{ + struct ether_header *eh; + + if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || + (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { + if_printf(ifp, "netdump_ether_output: interface isn't up\n"); + m_freem(m); + return (ENETDOWN); + } + + /* Fill in the ethernet header. */ + M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); + if (m == NULL) { + printf("%s: out of mbufs\n", __func__); + return (ENOBUFS); + } + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); + memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); + eh->ether_type = htons(etype); + return ((ifp->if_netdump_methods->nd_transmit)(ifp, m)); +} + +/* + * Unreliable transmission of an mbuf chain to the netdump server + * Note: can't handle fragmentation; fails if the packet is larger than + * nd_ifp->if_mtu after adding the UDP/IP headers + * + * Parameters: + * m mbuf chain + * + * Returns: + * int see errno.h, 0 for success + */ +static int +netdump_udp_output(struct mbuf *m) +{ + struct udpiphdr *ui; + struct ip *ip; + + MPASS(nd_ifp != NULL); + + M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); + if (m == NULL) { + printf("%s: out of mbufs\n", __func__); + return (ENOBUFS); + } + + if (m->m_pkthdr.len > nd_ifp->if_mtu) { + printf("netdump_udp_output: Packet is too big: %d > MTU %u\n", + m->m_pkthdr.len, nd_ifp->if_mtu); + m_freem(m); + return (ENOBUFS); + } + + ui = mtod(m, struct udpiphdr *); + bzero(ui->ui_x1, sizeof(ui->ui_x1)); + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip)); + ui->ui_ulen = ui->ui_len; + ui->ui_src = nd_client; + ui->ui_dst = nd_server; + /* Use this src port so that the server can connect() the socket */ + ui->ui_sport = htons(NETDUMP_ACKPORT); + ui->ui_dport = htons(nd_server_port); + ui->ui_sum = 0; + if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0) + ui->ui_sum = 0xffff; + + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_tos = 0; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = 0; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = 255; + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, sizeof(struct ip)); + + return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP)); +} + +/* + * Builds and sends a single ARP request to locate the server + * + * Return value: + * 0 on success + * errno on error + */ +static int +netdump_send_arp(in_addr_t dst) +{ + struct ether_addr bcast; + struct mbuf *m; + struct arphdr *ah; + int pktlen; + + MPASS(nd_ifp != NULL); + + /* Fill-up a broadcast address. */ + memset(&bcast, 0xFF, ETHER_ADDR_LEN); + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + printf("netdump_send_arp: Out of mbufs\n"); + return (ENOBUFS); + } + pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr)); + m->m_len = pktlen; + m->m_pkthdr.len = pktlen; + MH_ALIGN(m, pktlen); + ah = mtod(m, struct arphdr *); + ah->ar_hrd = htons(ARPHRD_ETHER); + ah->ar_pro = htons(ETHERTYPE_IP); + ah->ar_hln = ETHER_ADDR_LEN; + ah->ar_pln = sizeof(struct in_addr); + ah->ar_op = htons(ARPOP_REQUEST); + memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN); + ((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr; + bzero(ar_tha(ah), ETHER_ADDR_LEN); + ((struct in_addr *)ar_tpa(ah))->s_addr = dst; + return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP)); +} + +/* + * Sends ARP requests to locate the server and waits for a response. + * We first try to ARP the server itself, and fall back to the provided + * gateway if the server appears to be off-link. + * + * Return value: + * 0 on success + * errno on error + */ +static int +netdump_arp_gw(void) +{ + in_addr_t dst; + int error, polls, retries; + + dst = nd_server.s_addr; +restart: + for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0; + retries++) { + error = netdump_send_arp(dst); + if (error != 0) + return (error); + for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) { + netdump_network_poll(); + DELAY(500); + } + if (have_gw_mac == 0) + printf("(ARP retry)"); + } + if (have_gw_mac != 0) + return (0); + if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) { + printf("Failed to ARP server, trying to reach gateway...\n"); + dst = nd_gateway.s_addr; + goto restart; + } + + printf("\nARP timed out.\n"); + return (ETIMEDOUT); +} + +/* + * Dummy free function for netdump clusters. + */ +static void +netdump_mbuf_free(struct mbuf *m __unused) +{ +} + +/* + * Construct and reliably send a netdump packet. May fail from a resource + * shortage or extreme number of unacknowledged retransmissions. Wait for + * an acknowledgement before returning. Splits packets into chunks small + * enough to be sent without fragmentation (looks up the interface MTU) + * + * Parameters: + * type netdump packet type (HERALD, FINISHED, or VMCORE) + * offset vmcore data offset (bytes) + * data vmcore data + * datalen vmcore data size (bytes) + * + * Returns: + * int see errno.h, 0 for success + */ +static int +netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen) +{ + struct netdump_msg_hdr *nd_msg_hdr; + struct mbuf *m, *m2; + uint64_t want_acks; + uint32_t i, pktlen, sent_so_far; + int retries, polls, error; + + want_acks = 0; + rcvd_acks = 0; + retries = 0; + + MPASS(nd_ifp != NULL); + +retransmit: + /* Chunks can be too big to fit in packets. */ + for (i = sent_so_far = 0; sent_so_far < datalen || + (i == 0 && datalen == 0); i++) { + pktlen = datalen - sent_so_far; + + /* First bound: the packet structure. */ + pktlen = min(pktlen, NETDUMP_DATASIZE); + + /* Second bound: the interface MTU (assume no IP options). */ + pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) - + sizeof(struct netdump_msg_hdr)); + + /* + * Check if it is retransmitting and this has been ACKed + * already. + */ + if ((rcvd_acks & (1 << i)) != 0) { + sent_so_far += pktlen; + continue; + } + + /* + * Get and fill a header mbuf, then chain data as an extended + * mbuf. + */ + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + printf("netdump_send: Out of mbufs\n"); + return (ENOBUFS); + } + m->m_len = sizeof(struct netdump_msg_hdr); + m->m_pkthdr.len = sizeof(struct netdump_msg_hdr); + MH_ALIGN(m, sizeof(struct netdump_msg_hdr)); + nd_msg_hdr = mtod(m, struct netdump_msg_hdr *); + nd_msg_hdr->mh_seqno = htonl(nd_seqno + i); + nd_msg_hdr->mh_type = htonl(type); + nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far); + nd_msg_hdr->mh_len = htonl(pktlen); + nd_msg_hdr->mh__pad = 0; + + if (pktlen != 0) { + m2 = m_get(M_NOWAIT, MT_DATA); + if (m2 == NULL) { + m_freem(m); + printf("netdump_send: Out of mbufs\n"); + return (ENOBUFS); + } + MEXTADD(m2, data + sent_so_far, pktlen, + netdump_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE); + m2->m_len = pktlen; + + m_cat(m, m2); + m->m_pkthdr.len += pktlen; + } + error = netdump_udp_output(m); + if (error != 0) + return (error); + + /* Note that we're waiting for this packet in the bitfield. */ + want_acks |= (1 << i); + sent_so_far += pktlen; + } + if (i >= NETDUMP_MAX_IN_FLIGHT) + printf("Warning: Sent more than %d packets (%d). " + "Acknowledgements will fail unless the size of " + "rcvd_acks/want_acks is increased.\n", + NETDUMP_MAX_IN_FLIGHT, i); + + /* + * Wait for acks. A *real* window would speed things up considerably. + */ + polls = 0; + while (rcvd_acks != want_acks) { + if (polls++ > nd_polls) { + if (retries++ > nd_retries) + return (ETIMEDOUT); + printf(". "); + goto retransmit; + } + netdump_network_poll(); + DELAY(500); + } + nd_seqno += i; + return (0); +} + +/* + * Handler for IP packets: checks their sanity and then processes any netdump + * ACK packets it finds. + * + * It needs to replicate partially the behaviour of ip_input() and + * udp_input(). + * + * Parameters: + * mb a pointer to an mbuf * containing the packet received + * Updates *mb if m_pullup et al change the pointer + * Assumes the calling function will take care of freeing the mbuf + */ +static void +netdump_handle_ip(struct mbuf **mb) +{ + struct ip *ip; + struct udpiphdr *udp; + struct netdump_ack *nd_ack; + struct mbuf *m; + int rcv_ackno; + unsigned short hlen; + + /* IP processing. */ + m = *mb; + if (m->m_pkthdr.len < sizeof(struct ip)) { + NETDDEBUG("dropping packet too small for IP header\n"); + return; + } + if (m->m_len < sizeof(struct ip)) { + m = m_pullup(m, sizeof(struct ip)); + *mb = m; + if (m == NULL) { + NETDDEBUG("m_pullup failed\n"); + return; + } + } + ip = mtod(m, struct ip *); + + /* IP version. */ + if (ip->ip_v != IPVERSION) { + NETDDEBUG("bad IP version %d\n", ip->ip_v); + return; + } + + /* Header length. */ + hlen = ip->ip_hl << 2; + if (hlen < sizeof(struct ip)) { + NETDDEBUG("bad IP header length (%hu)\n", hlen); + return; + } + if (hlen > m->m_len) { + m = m_pullup(m, hlen); + *mb = m; + if (m == NULL) { + NETDDEBUG("m_pullup failed\n"); + return; + } + ip = mtod(m, struct ip *); + } + /* Ignore packets with IP options. */ + if (hlen > sizeof(struct ip)) { + NETDDEBUG("drop packet with IP options\n"); + return; + } + +#ifdef INVARIANTS + if (((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || + (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) && + (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { + NETDDEBUG("Bad IP header (RFC1122)\n"); + return; + } +#endif + + /* Checksum. */ + if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) { + if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) { + NETDDEBUG("bad IP checksum\n"); + return; + } + } else { + /* XXX */ ; + } + + /* Convert fields to host byte order. */ + ip->ip_len = ntohs(ip->ip_len); + if (ip->ip_len < hlen) { + NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n", + ip->ip_len, hlen); + return; + } + if (m->m_pkthdr.len < ip->ip_len) { + NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n", + ip->ip_len, m->m_pkthdr.len); + return; + } + if (m->m_pkthdr.len > ip->ip_len) { + + /* Truncate the packet to the IP length. */ + if (m->m_len == m->m_pkthdr.len) { + m->m_len = ip->ip_len; + m->m_pkthdr.len = ip->ip_len; + } else + m_adj(m, ip->ip_len - m->m_pkthdr.len); + } + + ip->ip_off = ntohs(ip->ip_off); + + /* Check that the source is the server's IP. */ + if (ip->ip_src.s_addr != nd_server.s_addr) { + NETDDEBUG("drop packet not from server (from 0x%x)\n", + ip->ip_src.s_addr); + return; + } + + /* Check if the destination IP is ours. */ + if (ip->ip_dst.s_addr != nd_client.s_addr) { + NETDDEBUGV("drop packet not to our IP\n"); + return; + } + + if (ip->ip_p != IPPROTO_UDP) { + NETDDEBUG("drop non-UDP packet\n"); + return; + } + + /* Do not deal with fragments. */ + if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) { + NETDDEBUG("drop fragmented packet\n"); + return; + } + + /* UDP custom is to have packet length not include IP header. */ + ip->ip_len -= hlen; + + /* UDP processing. */ + + /* Get IP and UDP headers together, along with the netdump packet. */ + if (m->m_pkthdr.len < + sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { + NETDDEBUG("ignoring small packet\n"); + return; + } + if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { + m = m_pullup(m, sizeof(struct udpiphdr) + + sizeof(struct netdump_ack)); + *mb = m; + if (m == NULL) { + NETDDEBUG("m_pullup failed\n"); + return; + } + } + udp = mtod(m, struct udpiphdr *); + + if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) { + NETDDEBUG("not on the netdump port.\n"); + return; + } + + /* Netdump processing. */ + + /* + * Packet is meant for us. Extract the ack sequence number and the + * port number if necessary. + */ + nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) + + sizeof(struct udpiphdr)); + rcv_ackno = ntohl(nd_ack->na_seqno); + if (nd_server_port == NETDUMP_PORT) + nd_server_port = ntohs(udp->ui_u.uh_sport); + if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT) + printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno); + else if (rcv_ackno >= nd_seqno) { + /* We're interested in this ack. Record it. */ + rcvd_acks |= 1 << (rcv_ackno - nd_seqno); + } +} + +/* + * Handler for ARP packets: checks their sanity and then + * 1. If the ARP is a request for our IP, respond with our MAC address + * 2. If the ARP is a response from our server, record its MAC address + * + * It needs to replicate partially the behaviour of arpintr() and + * in_arpinput(). + * + * Parameters: + * mb a pointer to an mbuf * containing the packet received + * Updates *mb if m_pullup et al change the pointer + * Assumes the calling function will take care of freeing the mbuf + */ +static void +netdump_handle_arp(struct mbuf **mb) +{ + char buf[INET_ADDRSTRLEN]; + struct in_addr isaddr, itaddr, myaddr; + struct ether_addr dst; + struct mbuf *m; + struct arphdr *ah; + struct ifnet *ifp; + uint8_t *enaddr; + int req_len, op; + + m = *mb; + ifp = m->m_pkthdr.rcvif; + if (m->m_len < sizeof(struct arphdr)) { + m = m_pullup(m, sizeof(struct arphdr)); + *mb = m; + if (m == NULL) { + NETDDEBUG("runt packet: m_pullup failed\n"); + return; + } + } + + ah = mtod(m, struct arphdr *); + if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) { + NETDDEBUG("unknown hardware address 0x%2D)\n", + (unsigned char *)&ah->ar_hrd, ""); + return; + } + if (ntohs(ah->ar_pro) != ETHERTYPE_IP) { + NETDDEBUG("drop ARP for unknown protocol %d\n", + ntohs(ah->ar_pro)); + return; + } + req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); + if (m->m_len < req_len) { + m = m_pullup(m, req_len); + *mb = m; + if (m == NULL) { + NETDDEBUG("runt packet: m_pullup failed\n"); + return; + } + } + ah = mtod(m, struct arphdr *); + + op = ntohs(ah->ar_op); + memcpy(&isaddr, ar_spa(ah), sizeof(isaddr)); + memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr)); + enaddr = (uint8_t *)IF_LLADDR(ifp); + myaddr = nd_client; + + if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) { + NETDDEBUG("ignoring ARP from myself\n"); + return; + } + + if (isaddr.s_addr == nd_client.s_addr) { + printf("%s: %*D is using my IP address %s!\n", __func__, + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + inet_ntoa_r(isaddr, buf)); + return; + } + + if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) { + NETDDEBUG("ignoring ARP from broadcast address\n"); + return; + } + + if (op == ARPOP_REPLY) { + if (isaddr.s_addr != nd_gateway.s_addr && + isaddr.s_addr != nd_server.s_addr) { + inet_ntoa_r(isaddr, buf); + NETDDEBUG( + "ignoring ARP reply from %s (not netdump server)\n", + buf); + return; + } + memcpy(nd_gw_mac.octet, ar_sha(ah), + min(ah->ar_hln, ETHER_ADDR_LEN)); + have_gw_mac = 1; + NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":"); + return; + } + + if (op != ARPOP_REQUEST) { + NETDDEBUG("ignoring ARP non-request/reply\n"); + return; + } + + if (itaddr.s_addr != nd_client.s_addr) { + NETDDEBUG("ignoring ARP not to our IP\n"); + return; + } + + memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); + memcpy(ar_sha(ah), enaddr, ah->ar_hln); + memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); + memcpy(ar_spa(ah), &itaddr, ah->ar_pln); + ah->ar_op = htons(ARPOP_REPLY); + ah->ar_pro = htons(ETHERTYPE_IP); + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_len = arphdr_len(ah); + m->m_pkthdr.len = m->m_len; + + memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN); + netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP); + *mb = NULL; +} + +/* + * Handler for incoming packets directly from the network adapter + * Identifies the packet type (IP or ARP) and passes it along to one of the + * helper functions netdump_handle_ip or netdump_handle_arp. + * + * It needs to replicate partially the behaviour of ether_input() and + * ether_demux(). + * + * Parameters: + * ifp the interface the packet came from (should be nd_ifp) + * m an mbuf containing the packet received + */ +static void +netdump_pkt_in(struct ifnet *ifp, struct mbuf *m) +{ + struct ether_header *eh; + u_short etype; + + /* Ethernet processing. */ + if ((m->m_flags & M_PKTHDR) == 0) { + NETDDEBUG_IF(ifp, "discard frame without packet header\n"); + goto done; + } + if (m->m_len < ETHER_HDR_LEN) { + NETDDEBUG_IF(ifp, + "discard frame without leading eth header (len %u pktlen %u)\n", + m->m_len, m->m_pkthdr.len); + goto done; + } + if ((m->m_flags & M_HASFCS) != 0) { + m_adj(m, -ETHER_CRC_LEN); + m->m_flags &= ~M_HASFCS; + } + eh = mtod(m, struct ether_header *); + etype = ntohs(eh->ether_type); + if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { + NETDDEBUG_IF(ifp, "ignoring vlan packets\n"); + goto done; + } + + /* XXX: Probably must also check if we're the recipient MAC address. */ + + /* Done ethernet processing. Strip off the ethernet header. */ + m_adj(m, ETHER_HDR_LEN); + switch (etype) { + case ETHERTYPE_ARP: + netdump_handle_arp(&m); + break; + case ETHERTYPE_IP: + netdump_handle_ip(&m); + break; + default: + NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); + break; + } +done: + if (m != NULL) + m_freem(m); +} + +/* + * After trapping, instead of assuming that most of the network stack is sane, + * we just poll the driver directly for packets. + */ +static void +netdump_network_poll(void) +{ + + MPASS(nd_ifp != NULL); + + nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000); +} + +/*- + * Dumping specific primitives. + */ + +/* + * Callback from dumpsys() to dump a chunk of memory. + * Copies it out to our static buffer then sends it across the network. + * Detects the initial KDH and makes sure it is given a special packet type. + * + * Parameters: + * priv Unused. Optional private pointer. + * virtual Virtual address (where to read the data from) + * physical Unused. Physical memory address. + * offset Offset from start of core file + * length Data length + * + * Return value: + * 0 on success + * errno on error + */ +static int +netdump_dumper(void *priv __unused, void *virtual, + vm_offset_t physical __unused, off_t offset, size_t length) +{ + int error; + + NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n", + virtual, (uintmax_t)offset, length); + + if (virtual == NULL) { + if (dump_failed != 0) + printf("failed to dump the kernel core\n"); + else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0) + printf("failed to close the transaction\n"); + else + printf("\nnetdump finished.\n"); + netdump_cleanup(); + return (0); + } + if (length > sizeof(nd_buf)) + return (ENOSPC); + + memmove(nd_buf, virtual, length); + error = netdump_send(NETDUMP_VMCORE, offset, nd_buf, length); + if (error != 0) { + dump_failed = 1; + return (error); + } + return (0); +} + +/* + * Perform any initalization needed prior to transmitting the kernel core. + */ +static int +netdump_start(struct dumperinfo *di) +{ + char *path; + char buf[INET_ADDRSTRLEN]; + uint32_t len; + int error; + + error = 0; + + /* Check if the dumping is allowed to continue. */ + if (nd_enabled == 0) + return (EINVAL); + + MPASS(nd_ifp != NULL); + + if (nd_server.s_addr == INADDR_ANY) { + printf("netdump_start: can't netdump; no server IP given\n"); + return (EINVAL); + } + if (nd_client.s_addr == INADDR_ANY) { + printf("netdump_start: can't netdump; no client IP given\n"); + return (EINVAL); + } + + /* We start dumping at offset 0. */ + di->dumpoff = 0; + + nd_seqno = 1; + + /* + * nd_server_port could have switched after the first ack the + * first time it gets called. Adjust it accordingly. + */ + nd_server_port = NETDUMP_PORT; + + /* Switch to the netdump mbuf zones. */ + netdump_mbuf_dump(); + + nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START); + + /* Make the card use *our* receive callback. */ + drv_if_input = nd_ifp->if_input; + nd_ifp->if_input = netdump_pkt_in; + + if (nd_gateway.s_addr == INADDR_ANY) { + restore_gw_addr = 1; + nd_gateway.s_addr = nd_server.s_addr; + } + + printf("netdump in progress. searching for server...\n"); + if (netdump_arp_gw()) { + printf("failed to locate server MAC address\n"); + error = EINVAL; + goto trig_abort; + } + + if (nd_path[0] != '\0') { + path = nd_path; + len = strlen(path) + 1; + } else { + path = NULL; + len = 0; + } + if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) { + printf("failed to contact netdump server\n"); + error = EINVAL; + goto trig_abort; + } + printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), + nd_gw_mac.octet, ":"); + return (0); + +trig_abort: + netdump_cleanup(); + return (error); +} + +static int +netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh, + void *key, uint32_t keysize) +{ + int error; + + memcpy(nd_buf, kdh, sizeof(*kdh)); + error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh)); + if (error == 0 && keysize > 0) { + if (keysize > sizeof(nd_buf)) + return (EINVAL); + memcpy(nd_buf, key, keysize); + error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize); + } + return (error); +} + +/* + * Cleanup routine for a possibly failed netdump. + */ +static void +netdump_cleanup(void) +{ + + if (restore_gw_addr != 0) { + nd_gateway.s_addr = INADDR_ANY; + restore_gw_addr = 0; + } + if (drv_if_input != NULL) { + nd_ifp->if_input = drv_if_input; + drv_if_input = NULL; + } + nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END); +} + +/*- + * KLD specific code. + */ + +static struct cdevsw netdump_cdevsw = { + .d_version = D_VERSION, + .d_ioctl = netdump_ioctl, + .d_name = "netdump", +}; + +static struct cdev *netdump_cdev; + +static int +netdump_configure(struct netdump_conf *conf) +{ + struct ifnet *ifp; + + IFNET_RLOCK_NOSLEEP(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + if (strcmp(ifp->if_xname, conf->ndc_iface) == 0) + break; + } + /* XXX ref */ + IFNET_RUNLOCK_NOSLEEP(); + + if (ifp == NULL) { + printf("netdump: unknown interface '%s'\n", conf->ndc_iface); + return (1); + } else if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER) { + printf("netdump: unsupported interface '%s'\n", + conf->ndc_iface); + return (1); + } + + nd_ifp = ifp; + netdump_reinit(ifp); + memcpy(&nd_conf, conf, sizeof(nd_conf)); + nd_enabled = 1; + return (0); +} + +/* + * Reinitialize the mbuf pool used by drivers while dumping. This is called + * from the generic ioctl handler for SIOCSIFMTU after the driver has + * reconfigured itself. + */ +void +netdump_reinit(struct ifnet *ifp) +{ + int clsize, nmbuf, ncl, nrxr; + + if (ifp != nd_ifp) + return; + + ifp->if_netdump_methods->nd_init(ifp, &nrxr, &ncl, &clsize); + KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); + + /* + * We need two headers per message on the transmit side. Multiply by + * four to give us some breathing room. + */ + nmbuf = ncl * (4 + nrxr); + ncl *= nrxr; + netdump_mbuf_reinit(nmbuf, ncl, clsize); +} + +/* + * ioctl(2) handler for the netdump device. This is currently only used to + * register netdump as a dump device. + * + * Parameters: + * dev, Unused. + * cmd, The ioctl to be handled. + * addr, The parameter for the ioctl. + * flags, Unused. + * td, The thread invoking this ioctl. + * + * Returns: + * 0 on success, and an errno value on failure. + */ +static int +netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, + int flags __unused, struct thread *td) +{ + struct dumperinfo dumper; + struct netdump_conf *conf; + int error; + u_int u; + + error = 0; + switch (cmd) { + case DIOCSKERNELDUMP: + u = *(u_int *)addr; + if (u != 0) { + error = ENXIO; + break; + } + + if (nd_enabled) { + nd_enabled = 0; + netdump_mbuf_drain(); + } + break; + case NETDUMPGCONF: + conf = (struct netdump_conf *)addr; + if (!nd_enabled) { + error = ENXIO; + break; + } + + strlcpy(conf->ndc_iface, nd_ifp->if_xname, + sizeof(conf->ndc_iface)); + memcpy(&conf->ndc_server, &nd_server, sizeof(nd_server)); + memcpy(&conf->ndc_client, &nd_client, sizeof(nd_client)); + memcpy(&conf->ndc_gateway, &nd_gateway, sizeof(nd_gateway)); + break; + case NETDUMPSCONF: + conf = (struct netdump_conf *)addr; + if (conf->ndc_kda.kda_enable == 0) { + if (nd_enabled) { + error = clear_dumper(td); + if (error == 0) + nd_enabled = 0; + } + break; + } + + if (netdump_configure(conf) != 0) { + error = EINVAL; + break; + } + + dumper.dumper_start = netdump_start; + dumper.dumper_hdr = netdump_write_headers; + dumper.dumper = netdump_dumper; + dumper.priv = NULL; + dumper.blocksize = NETDUMP_DATASIZE; + dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE; + dumper.mediaoffset = 0; + dumper.mediasize = 0; + error = set_dumper(&dumper, conf->ndc_iface, td, + conf->ndc_kda.kda_compression, conf->ndc_kda.kda_encryption, + conf->ndc_kda.kda_key, conf->ndc_kda.kda_encryptedkeysize, + conf->ndc_kda.kda_encryptedkey); + if (error != 0) + nd_enabled = 0; + break; + default: + error = EINVAL; + break; + } + return (error); +} + +/* + * Called upon system init or kld load. Initializes the netdump parameters to + * sane defaults (locates the first available NIC and uses the first IPv4 IP on + * that card as the client IP). Leaves the server IP unconfigured. + * + * Parameters: + * mod, Unused. + * what, The module event type. + * priv, Unused. + * + * Returns: + * int, An errno value if an error occured, 0 otherwise. + */ +static int +netdump_modevent(module_t mod __unused, int what, void *priv __unused) +{ + struct netdump_conf conf; + char *arg; + int error; + + error = 0; + switch (what) { + case MOD_LOAD: + error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev, + &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump"); + if (error != 0) + return (error); + + if ((arg = kern_getenv("net.dump.iface")) != NULL) { + strlcpy(conf.ndc_iface, arg, sizeof(conf.ndc_iface)); + freeenv(arg); + + if ((arg = kern_getenv("net.dump.server")) != NULL) { + inet_aton(arg, &conf.ndc_server); + freeenv(arg); + } + if ((arg = kern_getenv("net.dump.client")) != NULL) { + inet_aton(arg, &conf.ndc_server); + freeenv(arg); + } + if ((arg = kern_getenv("net.dump.gateway")) != NULL) { + inet_aton(arg, &conf.ndc_server); + freeenv(arg); + } + + /* Ignore errors; we print a message to the console. */ + (void)netdump_configure(&conf); + } + break; + case MOD_UNLOAD: + destroy_dev(netdump_cdev); + if (nd_enabled) { + printf("netdump: disabling dump device for unload\n"); + (void)clear_dumper(curthread); + nd_enabled = 0; + } + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +static moduledata_t netdump_mod = { + "netdump", + netdump_modevent, + NULL, +}; + +MODULE_VERSION(netdump, 1); +DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/sparc64/sparc64/dump_machdep.c b/sys/sparc64/sparc64/dump_machdep.c index 8d35b3e2ba2c..2129485257cc 100644 --- a/sys/sparc64/sparc64/dump_machdep.c +++ b/sys/sparc64/sparc64/dump_machdep.c @@ -98,12 +98,12 @@ dumpsys(struct dumperinfo *di) dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_SPARC64_VERSION, size); - printf("Dumping %lu MB (%d chunks)\n", (u_long)(size >> 20), nreg); - error = dump_start(di, &kdh); if (error != 0) goto fail; + printf("Dumping %lu MB (%d chunks)\n", (u_long)(size >> 20), nreg); + /* Dump the private header. */ hdr.dh_hdr_size = hdrsize; hdr.dh_tsb_pa = tsb_kernel_phys; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index be3dee3d3de7..190562c6ef47 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -101,6 +101,8 @@ struct cdev { struct bio; struct buf; +struct dumperinfo; +struct kerneldumpheader; struct thread; struct uio; struct knote; @@ -131,6 +133,9 @@ typedef int dumper_t( vm_offset_t _physical, /* Physical address of virtual. */ off_t _offset, /* Byte-offset to write at. */ size_t _length); /* Number of bytes to dump. */ +typedef int dumper_start_t(struct dumperinfo *di); +typedef int dumper_hdr_t(struct dumperinfo *di, struct kerneldumpheader *kdh, + void *key, uint32_t keylen); #endif /* _KERNEL */ @@ -332,13 +337,18 @@ struct kerneldumpheader; struct dumperinfo { dumper_t *dumper; /* Dumping function. */ + dumper_start_t *dumper_start; /* Dumper callback for dump_start(). */ + dumper_hdr_t *dumper_hdr; /* Dumper callback for writing headers. */ void *priv; /* Private parts. */ u_int blocksize; /* Size of block in bytes. */ u_int maxiosize; /* Max size allowed for an individual I/O */ off_t mediaoffset; /* Initial offset in bytes. */ off_t mediasize; /* Space available in bytes. */ + + /* MI kernel dump state. */ void *blockbuf; /* Buffer for padding shorter dump blocks */ off_t dumpoff; /* Offset of ongoing kernel dump. */ + off_t origdumpoff; /* Starting dump offset. */ struct kerneldumpcrypto *kdcrypto; /* Kernel dump crypto. */ struct kerneldumpcomp *kdcomp; /* Kernel dump compression. */ }; @@ -349,6 +359,7 @@ int doadump(boolean_t); int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, uint8_t compression, uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey); +int clear_dumper(struct thread *td); int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh); int dump_append(struct dumperinfo *, void *, vm_offset_t, size_t); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index ba1e88c6175d..519dfc987ab3 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1372,5 +1372,12 @@ mbuf_tstmp2timespec(struct mbuf *m, struct timespec *ts) } #endif +#ifdef NETDUMP +/* Invoked from the netdump client code. */ +void netdump_mbuf_drain(void); +void netdump_mbuf_dump(void); +void netdump_mbuf_reinit(int nmbuf, int nclust, int clsize); +#endif + #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 017669363c69..41d9d88bfe3a 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -265,8 +265,8 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, * information in the vm_page. */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ -/* 0x0400 Unused */ -#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ +#define UMA_ZONE_NOBUCKET 0x0400 /* Do not use buckets. */ +#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets. */ #define UMA_ZONE_CACHESPREAD 0x1000 /* * Spread memory start locations across * all possible cache lines. May diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 949bb54e777a..87c4c2b2fb53 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1681,10 +1681,15 @@ zone_ctor(void *mem, int size, void *udata, int flags) } out: - if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0) - zone->uz_count = bucket_select(zone->uz_size); - else + KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) != + (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET), + ("Invalid zone flag combination")); + if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0) zone->uz_count = BUCKET_MAX; + else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0) + zone->uz_count = 0; + else + zone->uz_count = bucket_select(zone->uz_size); zone->uz_count_min = zone->uz_count; return (0);