From dcf6ed47b233ec7aa1d8d0f2d402f8a82d2b1053 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 21 Sep 2016 11:45:02 -0700 Subject: [PATCH 5/7] Add netdumpd This is a userland daemon that accepts kernel dumps from netdump clients (i.e. panicked nodes). It implements the custom netdump protocol over UDP and has some facilities to run hooks after a dump has completed. This can be used to, for example, send an email notifying the node owner of the panic. --- etc/defaults/rc.conf | 3 + etc/rc.d/Makefile | 1 + etc/rc.d/netdumpd | 19 + share/man/man5/rc.conf.5 | 18 +- usr.sbin/Makefile | 1 + usr.sbin/netdumpd/Makefile | 8 + usr.sbin/netdumpd/netdumpd.8 | 92 +++++ usr.sbin/netdumpd/netdumpd.c | 937 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1078 insertions(+), 1 deletion(-) create mode 100644 etc/rc.d/netdumpd create mode 100644 usr.sbin/netdumpd/Makefile create mode 100644 usr.sbin/netdumpd/netdumpd.8 create mode 100644 usr.sbin/netdumpd/netdumpd.c diff --git a/etc/defaults/rc.conf b/etc/defaults/rc.conf index fae7523..4a467af 100644 --- a/etc/defaults/rc.conf +++ b/etc/defaults/rc.conf @@ -664,6 +664,9 @@ mixer_enable="YES" # Run the sound mixer. opensm_enable="NO" # Opensm(8) for infiniband devices defaults to off casperd_enable="YES" # casperd(8) daemon +netdumpd_enable="NO" # Run the netdump daemon at startup. +netdumpd_flags="" # netdumpd(8) flags. + ############################################################## ### Jail Configuration (see rc.conf(5) manual page) ########## ############################################################## diff --git a/etc/rc.d/Makefile b/etc/rc.d/Makefile index b36b497..fa31b51 100755 --- a/etc/rc.d/Makefile +++ b/etc/rc.d/Makefile @@ -92,6 +92,7 @@ FILES= DAEMON \ mrouted \ msgs \ natd \ + netdumpd \ netif \ netoptions \ netwait \ diff --git a/etc/rc.d/netdumpd b/etc/rc.d/netdumpd new file mode 100644 index 0000000..cc1fbe0 --- /dev/null +++ b/etc/rc.d/netdumpd @@ -0,0 +1,19 @@ +#!/bin/sh +# +# $FreeBSD$ +# + +# PROVIDE: netdumpd +# REQUIRE: NETWORKING +# KEYWORD: shutdown + +. /etc/rc.subr + +name="netdumpd" +rcvar="netdumpd_enable" +rcflags="${netdumpd_flags}" +pidfile="/var/run/${name}.pid" +command="/usr/sbin/${name}" + +load_rc_config $name +run_rc_command "$1" diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5 index 0f6bca4..086c606 100644 --- a/share/man/man5/rc.conf.5 +++ b/share/man/man5/rc.conf.5 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 25, 2013 +.Dd July 19, 2016 .Dt RC.CONF 5 .Os .Sh NAME @@ -4429,6 +4429,21 @@ interface if desired. Defines the total number of seconds to wait for link to become usable, polled at a 1-second interval. The default is 30. +.It Va netdumpd_enable +.Pq Vt bool +If set to +.Dq Li YES , +run the +.Xr netdumpd 8 +daemon. +.It Va netdumpd_flags +.Pq Vt str +If +.Va netdumpd_enable +is set to +.Dq Li YES , +these are the flags to pass to +.Xr netdumpd 8 . .El .Sh FILES .Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact @@ -4493,6 +4508,7 @@ The default is 30. .Xr mountd 8 , .Xr moused 8 , .Xr mrouted 8 , +.Xr netdumpd 8 , .Xr newfs 8 , .Xr newsyslog 8 , .Xr nfsd 8 , diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index caf89db..1bedb2e 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -49,6 +49,7 @@ SUBDIR= adduser \ mptutil \ mtest \ mtree \ + netdumpd \ newsyslog \ nfscbd \ nfsd \ diff --git a/usr.sbin/netdumpd/Makefile b/usr.sbin/netdumpd/Makefile new file mode 100644 index 0000000..7997e10 --- /dev/null +++ b/usr.sbin/netdumpd/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +PROG= netdumpd +MAN= netdumpd.8 + +LIBADD= util + +.include diff --git a/usr.sbin/netdumpd/netdumpd.8 b/usr.sbin/netdumpd/netdumpd.8 new file mode 100644 index 0000000..8b1d1e7 --- /dev/null +++ b/usr.sbin/netdumpd/netdumpd.8 @@ -0,0 +1,92 @@ +.\" Copyright (c) 2011 Sandvine Incorporated. All rights reserved. +.\" Copyright (c) 2016 Dell EMC +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 26, 2016 +.Dt NETDUMPD 8 +.Os +.Sh NAME +.Nm netdumpd +.Nd receive kernel core dumps over the network +.Sh SYNOPSIS +.Nm +.Op Fl a Ar addr +.Op Fl b Ar prescript +.Op Fl D +.Op Fl d Ar dumpdir +.Op Fl i Ar postscript +.Sh DESCRIPTION +The +.Nm +utility listens on a UDP socket for incoming connections from a +.Fx +kernel core dumping over the network. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl a +Bind the daemon to the given address +.Dq Pa addr . +.It Fl b +Execute the script +.Dq Pa script +as soon as a dump begins. +The script accepts the following strings as parameters: its name, reasons for +invocation, the client address, the client hostname, the info file name and the +core dump file name. +.It Fl D +Run the utility in debugging mode. +The daemon version is not entered while the output is printed entirely on the +console. +.It Fl d +Save the core dumps to the specified +.Dq Pa dumpdir +directory. +The default directory is +.Pa /var/crash . +.It Fl i +Execute the script +.Dq Pa script +after each dump received. +The script receives the same parameters as the +.Fl b +.Pa prescript . +.El +.Sh SECURITY +The +.Nm +utility does not perform any authentication of clients and should therefore +be configured to listen only to trusted networks. +.Nm +can be made to write an arbitrary amount of client data to a locally-mounted +filesystem. +.Sh SEE ALSO +.Xr netdump 4 , +.Xr dumpon 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 12.0 . diff --git a/usr.sbin/netdumpd/netdumpd.c b/usr.sbin/netdumpd/netdumpd.c new file mode 100644 index 0000000..8cb0d92 --- /dev/null +++ b/usr.sbin/netdumpd/netdumpd.c @@ -0,0 +1,937 @@ +/*- + * Copyright (c) 2005-2011 Sandvine Incorporated. All rights reserved. + * Copyright (c) 2016 Dell EMC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MAX_DUMPS 256 /* Maximum saved dumps per remote host. */ +#define CLIENT_TIMEOUT 600 /* Netdump timeout period, in seconds. */ +#define CLIENT_TPASS 10 /* Scan for timed-out clients every 10s. */ + +#define LOGERR(m, ...) \ + (*g_phook)(LOG_ERR | LOG_DAEMON, (m), ## __VA_ARGS__) +#define LOGERR_PERROR(m) \ + (*g_phook)(LOG_ERR | LOG_DAEMON, "%s: %s\n", m, strerror(errno)) +#define LOGINFO(m, ...) \ + (*g_phook)(LOG_INFO | LOG_DAEMON, (m), ## __VA_ARGS__) +#define LOGWARN(m, ...) \ + (*g_phook)(LOG_WARNING | LOG_DAEMON, (m), ## __VA_ARGS__) + +#define client_ntoa(cl) \ + inet_ntoa((cl)->ip) +#define client_pinfo(cl, f, ...) \ + fprintf((cl)->infofile, (f), ## __VA_ARGS__) + +struct netdump_msg { + struct netdump_msg_hdr nm_hdr; + uint8_t nm_data[NETDUMP_DATASIZE]; +}; + +struct netdump_client { + char infofilename[MAXPATHLEN]; + char corefilename[MAXPATHLEN]; + char hostname[NI_MAXHOST]; + time_t last_msg; + LIST_ENTRY(netdump_client) iter; + struct in_addr ip; + FILE *infofile; + int corefd; + int sock; + unsigned short printed_port_warning: 1; + unsigned short any_data_rcvd: 1; +}; + +/* Clients list. */ +static LIST_HEAD(, netdump_client) g_clients = LIST_HEAD_INITIALIZER(g_clients); + +/* Program arguments handlers. */ +static char g_dumpdir[MAXPATHLEN]; +static char *g_handler_script; +static char *g_handler_pre_script; +static struct in_addr g_bindip; + +/* Miscellaneous handlers. */ +static struct pidfh *g_pfh; +static time_t g_now; +static time_t g_last_timeout_check; +static int g_kq; +static int g_sock = -1; +static bool g_debug = false; + +/* Daemon print functions hook. */ +static void (*g_phook)(int, const char *, ...); + +static struct netdump_client *alloc_client(struct sockaddr_in *sip); +static int eventloop(void); +static void exec_handler(struct netdump_client *client, const char *reason); +static void free_client(struct netdump_client *client); +static void handle_finish(struct netdump_client *client, + struct netdump_msg *msg); +static void handle_herald(struct sockaddr_in *from, + struct netdump_client *client, + struct netdump_msg *msg); +static void handle_kdh(struct netdump_client *client, + struct netdump_msg *msg); +static bool handle_packet(struct netdump_client *client, + struct sockaddr_in *from, const char *fromstr, + struct netdump_msg *msg); +static void handle_timeout(struct netdump_client *client); +static void handle_vmcore(struct netdump_client *client, + struct netdump_msg *msg); +static void phook_printf(int priority, const char *message, ...) + __printflike(2, 3); +static ssize_t receive_message(int isock, struct sockaddr_in *from, + char *fromstr, size_t fromstrlen, struct netdump_msg *msg); +static void send_ack(struct netdump_client *client, + struct netdump_msg *msg); +static void timeout_clients(void); +static void usage(const char *cmd); + +static void +usage(const char *cmd) +{ + + warnx( + "usage: %s [-D] [-a bind_addr] [-d dumpdir] [-i script] [-b script]", + cmd); +} + +static void +phook_printf(int priority, const char *message, ...) +{ + va_list ap; + + va_start(ap, message); + if ((priority & LOG_INFO) != 0) { + vprintf(message, ap); + } else + vfprintf(stderr, message, ap); + va_end(ap); +} + +static struct netdump_client * +alloc_client(struct sockaddr_in *sip) +{ + struct kevent event; + struct sockaddr_in saddr; + struct netdump_client *client; + struct in_addr *ip; + char *firstdot; + int i, ecode, fd, bufsz; + + assert(sip != NULL); + + client = calloc(1, sizeof(*client)); + if (client == NULL) { + LOGERR_PERROR("calloc()"); + goto error_out; + } + ip = &sip->sin_addr; + bcopy(ip, &client->ip, sizeof(*ip)); + client->corefd = -1; + client->sock = -1; + client->last_msg = g_now; + + ecode = getnameinfo((struct sockaddr *)sip, sip->sin_len, + client->hostname, sizeof(client->hostname), NULL, 0, NI_NAMEREQD); + if (ecode != 0) { + /* Can't resolve, try with a numeric IP. */ + ecode = getnameinfo((struct sockaddr *)sip, sip->sin_len, + client->hostname, sizeof(client->hostname), NULL, 0, 0); + if (ecode != 0) { + LOGERR("getnameinfo(): %s\n", gai_strerror(ecode)); + goto error_out; + } + } else { + /* Strip off the domain name */ + firstdot = strchr(client->hostname, '.'); + if (firstdot) + *firstdot = '\0'; + } + + client->sock = socket(PF_INET, + SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_UDP); + if (client->sock == -1) { + LOGERR_PERROR("socket()"); + goto error_out; + } + bzero(&saddr, sizeof(saddr)); + saddr.sin_len = sizeof(saddr); + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = g_bindip.s_addr; + saddr.sin_port = htons(0); + if (bind(client->sock, (struct sockaddr *)&saddr, sizeof(saddr))) { + LOGERR_PERROR("bind()"); + goto error_out; + } + saddr.sin_addr.s_addr = ip->s_addr; + saddr.sin_port = htons(NETDUMP_ACKPORT); + if (connect(client->sock, (struct sockaddr *)&saddr, sizeof(saddr))) { + LOGERR_PERROR("connect()"); + goto error_out; + } + + /* It should be enough to hold approximatively twice the chunk size. */ + bufsz = 131072; + if (setsockopt(client->sock, SOL_SOCKET, SO_RCVBUF, &bufsz, + sizeof(bufsz))) { + LOGERR_PERROR("setsockopt()"); + LOGWARN( + "May drop packets from %s due to small receive buffer\n", + client->hostname); + } + + /* Try info.host.0 through info.host.255 in sequence. */ + for (i = 0; i < MAX_DUMPS; i++) { + snprintf(client->infofilename, sizeof(client->infofilename), + "%s/info.%s.%d", g_dumpdir, client->hostname, i); + snprintf(client->corefilename, sizeof(client->corefilename), + "%s/vmcore.%s.%d", g_dumpdir, client->hostname, i); + + /* Try the info file first. */ + fd = open(client->infofilename, O_WRONLY | O_CREAT | O_EXCL, + 0600); + if (fd == -1) { + if (errno != EEXIST) + LOGERR("open(\"%s\"): %s\n", + client->infofilename, strerror(errno)); + continue; + } + client->infofile = fdopen(fd, "w"); + if (client->infofile == NULL) { + LOGERR_PERROR("fdopen()"); + close(fd); + (void)unlink(client->infofilename); + continue; + } + + /* Next make the core file. */ + fd = open(client->corefilename, O_RDWR | O_CREAT | O_EXCL, + 0600); + if (fd == -1) { + /* Failed. Keep the numbers in sync. */ + fclose(client->infofile); + (void)unlink(client->infofilename); + client->infofile = NULL; + if (errno != EEXIST) + LOGERR("open(\"%s\"): %s\n", + client->corefilename, strerror(errno)); + continue; + } + client->corefd = fd; + break; + } + + if (client->infofile == NULL || client->corefd == -1) { + LOGERR("Can't create output files for new client %s [%s]\n", + client->hostname, client_ntoa(client)); + goto error_out; + } + + EV_SET(&event, client->sock, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(g_kq, &event, 1, NULL, 0, NULL) != 0) { + LOGERR_PERROR("kevent(EV_ADD)"); + goto error_out; + } + + LIST_INSERT_HEAD(&g_clients, client, iter); + return (client); + +error_out: + if (client != NULL) { + if (client->infofile != NULL) + fclose(client->infofile); + if (client->corefd != -1) + close(client->corefd); + if (client->sock != -1) + (void)close(client->sock); + free(client); + } + return (NULL); +} + +static void +free_client(struct netdump_client *client) +{ + struct kevent event; + + EV_SET(&event, client->sock, EVFILT_READ, EV_DELETE, 0, 0, NULL); + if (kevent(g_kq, &event, 1, NULL, 0, NULL) != 0) + LOGERR_PERROR("kevent(EV_DELETE)"); + + /* Remove from the list. Ignore errors from close() routines. */ + LIST_REMOVE(client, iter); + fclose(client->infofile); + close(client->corefd); + close(client->sock); + free(client); +} + +static void +exec_script(struct netdump_client *client, const char *reason, + const char *script) +{ + const char *argv[7]; + int error; + pid_t pid; + + argv[0] = script; + argv[1] = reason; + argv[2] = client_ntoa(client); + argv[3] = client->hostname; + argv[4] = client->infofilename; + argv[5] = client->corefilename; + argv[6] = NULL; + + error = posix_spawn(&pid, script, NULL, NULL, + __DECONST(char *const *, argv), NULL); + if (error != 0) + LOGERR("posix_spawn(): %s", strerror(error)); +} + +static void +exec_handler(struct netdump_client *client, const char *reason) +{ + + if (g_handler_script != NULL) + exec_script(client, reason, g_handler_script); +} + +static void +exec_pre_script(struct netdump_client *client, const char *reason) +{ + + if (g_handler_pre_script != NULL) + exec_script(client, reason, g_handler_pre_script); +} + +static void +handle_timeout(struct netdump_client *client) +{ + + assert(client != NULL); + + LOGINFO("Client %s timed out\n", client_ntoa(client)); + client_pinfo(client, "Dump incomplete: client timed out\n"); + exec_handler(client, "timeout"); + free_client(client); +} + +static void +timeout_clients(void) +{ + struct netdump_client *client, *tmp; + + /* Only time out clients every 10 seconds. */ + if (g_now - g_last_timeout_check < CLIENT_TPASS) + return; + g_last_timeout_check = g_now; + + /* Traverse the list looking for stale clients. */ + LIST_FOREACH_SAFE(client, &g_clients, iter, tmp) { + if (client->last_msg + CLIENT_TIMEOUT < g_now) { + LOGINFO("Timingout with such values: %jd + %jd < %jd\n", + (intmax_t)client->last_msg, + (intmax_t)CLIENT_TIMEOUT, (intmax_t)g_now); + handle_timeout(client); + } + } +} + +static void +send_ack(struct netdump_client *client, struct netdump_msg *msg) +{ + struct netdump_ack ack; + + assert(client != NULL && msg != NULL); + + bzero(&ack, sizeof(ack)); + ack.na_seqno = htonl(msg->nm_hdr.mh_seqno); + + if (send(client->sock, &ack, sizeof(ack), 0) == -1) + LOGERR_PERROR("send()"); + /* + * XXX: On EAGAIN, we should probably queue the packet + * to be sent when the socket is writable but + * that is too much effort, since it is mostly + * harmless to wait for the client to retransmit. + */ +} + +static void +handle_herald(struct sockaddr_in *from, struct netdump_client *client, + struct netdump_msg *msg) +{ + + assert(from != NULL && msg != NULL); + + if (client != NULL) { + if (client->any_data_rcvd == 0) { + + /* Must be a retransmit of the herald packet. */ + send_ack(client, msg); + return; + } + + /* An old connection must have timed out. Clean it up first. */ + handle_timeout(client); + } + + client = alloc_client(from); + if (client == NULL) { + LOGERR("handle_herald(): new client allocation failure\n"); + return; + } + client_pinfo(client, "Dump from %s [%s]\n", client->hostname, + client_ntoa(client)); + LOGINFO("New dump from client %s [%s] (to %s)\n", client->hostname, + client_ntoa(client), client->corefilename); + exec_pre_script(client, "new dump"); + send_ack(client, msg); +} + +static void +handle_kdh(struct netdump_client *client, struct netdump_msg *msg) +{ + time_t t; + uint64_t dumplen; + struct kerneldumpheader *h; + int parity_check; + + assert(msg != NULL); + + if (client == NULL) + return; + + client->any_data_rcvd = 1; + h = (struct kerneldumpheader *)(void *)msg->nm_data; + if (msg->nm_hdr.mh_len < sizeof(struct kerneldumpheader)) { + LOGERR("Bad KDH from %s [%s]: packet too small\n", + client->hostname, client_ntoa(client)); + client_pinfo(client, "Bad KDH: packet too small\n"); + fflush(client->infofile); + return; + } + parity_check = kerneldump_parity(h); + + /* Make sure all the strings are null-terminated. */ + h->architecture[sizeof(h->architecture) - 1] = '\0'; + h->hostname[sizeof(h->hostname) - 1] = '\0'; + h->versionstring[sizeof(h->versionstring) - 1] = '\0'; + h->panicstring[sizeof(h->panicstring) - 1] = '\0'; + + client_pinfo(client, " Architecture: %s\n", h->architecture); + client_pinfo(client, " Architecture version: %d\n", + dtoh32(h->architectureversion)); + dumplen = dtoh64(h->dumplength); + client_pinfo(client, " Dump length: %lldB (%lld MB)\n", + (long long)dumplen, (long long)(dumplen >> 20)); + client_pinfo(client, " blocksize: %d\n", dtoh32(h->blocksize)); + t = dtoh64(h->dumptime); + client_pinfo(client, " Dumptime: %s", ctime(&t)); + client_pinfo(client, " Hostname: %s\n", h->hostname); + client_pinfo(client, " Versionstring: %s", h->versionstring); + client_pinfo(client, " Panicstring: %s\n", h->panicstring); + client_pinfo(client, " Header parity check: %s\n", + parity_check ? "Fail" : "Pass"); + fflush(client->infofile); + + LOGINFO("(KDH from %s [%s])", client->hostname, client_ntoa(client)); + send_ack(client, msg); +} + +static void +handle_vmcore(struct netdump_client *client, struct netdump_msg *msg) +{ + + assert(msg != NULL); + + if (client == NULL) + return; + + client->any_data_rcvd = 1; + if (msg->nm_hdr.mh_seqno % (16 * 1024 * 1024 / 1456) == 0) { + /* Approximately every 16MB with MTU of 1500 */ + LOGINFO("."); + } + if (pwrite(client->corefd, msg->nm_data, msg->nm_hdr.mh_len, + msg->nm_hdr.mh_offset) == -1) { + LOGERR("pwrite (for client %s [%s]): %s\n", client->hostname, + client_ntoa(client), strerror(errno)); + client_pinfo(client, + "Dump unsuccessful: write error @ offset %08jx: %s\n", + (uintmax_t)msg->nm_hdr.mh_offset, strerror(errno)); + exec_handler(client, "error"); + free_client(client); + return; + } + send_ack(client, msg); +} + +static void +handle_finish(struct netdump_client *client, struct netdump_msg *msg) +{ + + assert(msg != NULL); + + if (client == NULL) + return; + + LOGINFO("\nCompleted dump from client %s [%s]\n", client->hostname, + client_ntoa(client)); + client_pinfo(client, "Dump complete\n"); + send_ack(client, msg); + (void)fsync(client->corefd); + exec_handler(client, "success"); + free_client(client); +} + + +static ssize_t +receive_message(int isock, struct sockaddr_in *from, char *fromstr, + size_t fromstrlen, struct netdump_msg *msg) +{ + socklen_t fromlen; + ssize_t len; + + assert(from != NULL && fromstr != NULL && msg != NULL); + + fromlen = sizeof(*from); + len = recvfrom(isock, msg, sizeof(*msg), 0, (struct sockaddr *)from, + &fromlen); + if (len == -1) { + + /* + * As long as some callers may discard the errors printing + * in defined circumstances, leave them the choice and avoid + * any error reporting. + */ + return (-1); + } + + snprintf(fromstr, fromstrlen, "%s:%hu", inet_ntoa(from->sin_addr), + ntohs(from->sin_port)); + if ((size_t)len < sizeof(struct netdump_msg_hdr)) { + LOGERR("Ignoring runt packet from %s (got %zu)\n", fromstr, + (size_t)len); + return (0); + } + + /* Convert byte order. */ + msg->nm_hdr.mh_type = ntohl(msg->nm_hdr.mh_type); + msg->nm_hdr.mh_seqno = ntohl(msg->nm_hdr.mh_seqno); + msg->nm_hdr.mh_offset = be64toh(msg->nm_hdr.mh_offset); + msg->nm_hdr.mh_len = ntohl(msg->nm_hdr.mh_len); + + if ((size_t)len < sizeof(struct netdump_msg_hdr) + msg->nm_hdr.mh_len) { + LOGERR("Packet too small from %s (got %zu, expected %zu)\n", + fromstr, (size_t)len, + sizeof(struct netdump_msg_hdr) + msg->nm_hdr.mh_len); + return (0); + } + return (len); +} + +static bool +handle_packet(struct netdump_client *client, struct sockaddr_in *from, + const char *fromstr, struct netdump_msg *msg) +{ + bool finished; + + assert(from != NULL && fromstr != NULL && msg != NULL); + + if (client != NULL) + client->last_msg = time(NULL); + + finished = false; + switch (msg->nm_hdr.mh_type) { + case NETDUMP_HERALD: + handle_herald(from, client, msg); + break; + case NETDUMP_KDH: + handle_kdh(client, msg); + break; + case NETDUMP_VMCORE: + handle_vmcore(client, msg); + break; + case NETDUMP_FINISHED: + handle_finish(client, msg); + finished = true; + break; + default: + LOGERR("Received unknown message type %d from %s\n", + msg->nm_hdr.mh_type, fromstr); + break; + } + return (finished); +} + +/* Handle a read event on the server socket. */ +static int +server_event(void) +{ + char fromstr[INET_ADDRSTRLEN + 6]; + struct sockaddr_in from; + struct netdump_msg msg; + struct netdump_client *client; + ssize_t len; + + while ((len = receive_message(g_sock, &from, fromstr, + sizeof(fromstr), &msg)) > 0) { + /* + * With len == 0 the packet was rejected (probably because it + * was too small) so just ignore this case. + */ + + LIST_FOREACH(client, &g_clients, iter) + if (client->ip.s_addr == from.sin_addr.s_addr) + break; + + /* + * Technically, clients should not be responding on the server + * port, so client should be NULL, however, if they insist on + * doing so, it's not really going to hurt anything (except + * maybe fill up the server socket's receive buffer), so still + * accept it. The only possibly legitimate case is if there's a + * new dump starting and the previous one didn't finish cleanly. + * Handle this by suppressing the error on HERALD packets. + */ + if (client != NULL && msg.nm_hdr.mh_type != NETDUMP_HERALD && + client->printed_port_warning == 0) { + LOGWARN("Client %s responding on server port\n", + client->hostname); + client->printed_port_warning = 1; + } + if (handle_packet(client, &from, fromstr, &msg)) + break; + } + if (len < 0 && errno != EAGAIN) { + LOGERR_PERROR("recvfrom()"); + return (1); + } + return (0); +} + +/* Handle a read event on a client socket. */ +static void +client_event(struct netdump_client *client) +{ + char fromstr[INET_ADDRSTRLEN + 6]; + struct sockaddr_in from; + struct netdump_msg msg; + ssize_t len; + + while ((len = receive_message(client->sock, &from, fromstr, + sizeof(fromstr), &msg)) > 0) { + /* + * With len == 0 the packet was + * rejected (probably because it was + * too small) so just ignore this case. + */ + if (handle_packet(client, &from, fromstr, &msg)) + break; + } + if (len == -1 && errno != EAGAIN) { + LOGERR_PERROR("recvfrom()"); + handle_timeout(client); + } +} + +static int +eventloop(void) +{ + struct kevent events[8]; + struct timespec ts; + struct netdump_client *client, *tmp; + int ev, rc; + + /* We check for timed-out clients regularly. */ + ts.tv_sec = CLIENT_TPASS; + ts.tv_nsec = 0; + + for (;;) { + rc = kevent(g_kq, NULL, 0, events, nitems(events), &ts); + if (rc < 0) { + LOGERR_PERROR("kevent()"); + return (1); + } + + g_now = time(NULL); + for (ev = 0; ev < rc; ev++) { + if (events[ev].filter == EVFILT_SIGNAL) + /* We received SIGINT or SIGTERM. */ + goto out; + + if ((int)events[ev].ident == g_sock) + if (server_event() != 0) + return (1); + + /* + * handle_packet() and handle_timeout() may free the client, + * handle stale pointers. + */ + LIST_FOREACH_SAFE(client, &g_clients, iter, tmp) { + if (client->sock == (int)events[ev].ident) { + client_event(client); + break; + } + } + } + + timeout_clients(); + } +out: + LOGINFO("Shutting down..."); + + /* + * Clients is the head of the list, so clients != NULL iff the list + * is not empty. Call it a timeout so that the scripts get run. + */ + while (!LIST_EMPTY(&g_clients)) + handle_timeout(LIST_FIRST(&g_clients)); + + return (0); +} + +static char * +get_script_option(void) +{ + char *script; + + script = strdup(optarg); + if (script == NULL) { + err(1, "strdup()"); + return (NULL); + } + if (access(script, F_OK | X_OK)) { + warn("cannot access %s", script); + free(script); + return (NULL); + } + return (script); +} + +int +main(int argc, char **argv) +{ + struct stat statbuf; + struct sockaddr_in bindaddr; + struct sigaction sa; + struct kevent sockev, sigev[2]; + sigset_t set; + int ch, exit_code; + + g_bindip.s_addr = INADDR_ANY; + + exit_code = 0; + while ((ch = getopt(argc, argv, "a:b:Dd:i:")) != -1) { + switch (ch) { + case 'a': + if (inet_aton(optarg, &g_bindip) == 0) { + warnx("invalid bind IP specified"); + exit_code = 1; + goto cleanup; + } + warnx("listening on IP %s", optarg); + break; + case 'b': + g_handler_pre_script = get_script_option(); + if (g_handler_pre_script == NULL) { + exit_code = 1; + goto cleanup; + } + break; + case 'D': + g_debug = true; + break; + case 'd': + if (strlcpy(g_dumpdir, optarg, sizeof(g_dumpdir)) >= + sizeof(g_dumpdir)) { + warnx("dumpdir '%s' is too long", optarg); + exit_code = 1; + goto cleanup; + } + break; + case 'i': + g_handler_script = get_script_option(); + if (g_handler_script == NULL) { + exit_code = 1; + goto cleanup; + } + break; + default: + usage(argv[0]); + exit_code = 1; + goto cleanup; + } + } + + g_pfh = pidfile_open(NULL, 0600, NULL); + if (g_pfh == NULL) { + if (errno == EEXIST) + errx(1, "netdumpd is already running"); + else + err(1, "pidfile_open"); + } + + if (g_bindip.s_addr == INADDR_ANY) + warnx("default: listening on all interfaces"); + if (g_dumpdir[0] == '\0') { + strcpy(g_dumpdir, "/var/crash"); + warnx("default: dumping to /var/crash/"); + } + if (g_debug) + g_phook = phook_printf; + else + g_phook = syslog; + + /* Further sanity checks on dump location. */ + if (stat(g_dumpdir, &statbuf)) { + warnx("invalid dump location specified"); + exit_code = 1; + goto cleanup; + } + if (!S_ISDIR(statbuf.st_mode)) { + fprintf(stderr, "Dump location is not a directory"); + exit_code = 1; + goto cleanup; + } + if (access(g_dumpdir, F_OK | W_OK)) + warn("warning: may be unable to write into dump location"); + + if (!g_debug && daemon(0, 0) == -1) { + warn("daemon()"); + exit_code = 1; + goto cleanup; + } + pidfile_write(g_pfh); + + g_kq = kqueue(); + if (g_kq < 0) { + LOGERR_PERROR("kqueue()"); + exit_code = 1; + goto cleanup; + } + + /* Set up the server socket. */ + g_sock = socket(PF_INET, SOCK_DGRAM | SOCK_CLOEXEC, IPPROTO_UDP); + if (g_sock == -1) { + LOGERR_PERROR("socket()"); + exit_code = 1; + goto cleanup; + } + bzero(&bindaddr, sizeof(bindaddr)); + bindaddr.sin_len = sizeof(bindaddr); + bindaddr.sin_family = AF_INET; + bindaddr.sin_addr.s_addr = g_bindip.s_addr; + bindaddr.sin_port = htons(NETDUMP_PORT); + if (bind(g_sock, (struct sockaddr *)&bindaddr, sizeof(bindaddr))) { + LOGERR_PERROR("bind()"); + exit_code = 1; + goto cleanup; + } + if (fcntl(g_sock, F_SETFL, O_NONBLOCK) == -1) { + LOGERR_PERROR("fcntl()"); + exit_code = 1; + goto cleanup; + } + + EV_SET(&sockev, g_sock, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(g_kq, &sockev, 1, NULL, 0, NULL) != 0) { + LOGERR_PERROR("kevent(socket)"); + exit_code = 1; + goto cleanup; + } + + /* Mask all signals. */ + sigfillset(&set); + if (sigprocmask(SIG_BLOCK, &set, NULL) != 0) { + LOGERR_PERROR("sigprocmask()"); + exit_code = 1; + goto cleanup; + } + bzero(&sa, sizeof(sa)); + sa.sa_handler = SIG_IGN; + sa.sa_flags = SA_NOCLDWAIT; + if (sigaction(SIGCHLD, &sa, NULL)) { + LOGERR_PERROR("sigaction(SIGCHLD)"); + exit_code = 1; + goto cleanup; + } + + /* Watch for SIGINT and SIGTERM. */ + EV_SET(&sigev[0], SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + EV_SET(&sigev[1], SIGTERM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(g_kq, sigev, nitems(sigev), NULL, 0, NULL) != 0) { + LOGERR_PERROR("kevent(signals)"); + exit_code = 1; + goto cleanup; + } + + LOGINFO("Waiting for clients.\n"); + exit_code = eventloop(); + +cleanup: + if (g_pfh != NULL) + pidfile_remove(g_pfh); + free(g_handler_pre_script); + free(g_handler_script); + if (g_sock != -1) + close(g_sock); + return (exit_code); +} -- 2.10.1