--- Add the core DIFFUSE kernel module code and supporting build infrastructure. --- This module provides the kernel-side support for handling DIFFUSE's IPFW --- configuration extensions, rule exporting, classifier/feature modules and --- flow-related housekeeping. --- --- Sponsored by: FreeBSD Foundation --- Reviewed by: bz --- MFC after: 1 month --- diff -r 3abcaa91ffe8 sys/modules/Makefile --- a/sys/modules/Makefile Tue Oct 04 19:24:06 2011 +1100 +++ b/sys/modules/Makefile Tue Oct 04 22:36:46 2011 +1100 @@ -78,6 +78,7 @@ dcons \ dcons_crom \ de \ + ${_diffuse} \ ${_dpms} \ ${_dpt} \ ${_drm} \ @@ -363,6 +364,7 @@ .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) +_diffuse= diffuse _if_gre= if_gre .endif diff -r 3abcaa91ffe8 sys/modules/diffuse/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/modules/diffuse/Makefile Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,5 @@ +# $FreeBSD$ + +SUBDIR= diffuse + +.include diff -r 3abcaa91ffe8 sys/modules/diffuse/diffuse/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/modules/diffuse/diffuse/Makefile Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,24 @@ +# $FreeBSD$ + +.include + +.PATH: ${.CURDIR}/../../../netinet/ipfw + +KMOD= diffuse +SRCS= diffuse_export.c \ + diffuse_flowtable.c \ + ip_diffuse.c \ + opt_inet6.h + +.if !defined(KERNBUILDDIR) +.if ${MK_INET_SUPPORT} != "no" +opt_inet.h: + echo "#define INET 1" > ${.TARGET} +.endif +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + echo "#define INET6 1" > ${.TARGET} +.endif +.endif + +.include diff -r 3abcaa91ffe8 sys/netinet/ip_diffuse_export.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ip_diffuse_export.h Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * The public header file for DIFFUSE export protocol stuff. + */ + +#ifndef _NETINET_IP_DIFFUSE_EXPORT_H_ +#define _NETINET_IP_DIFFUSE_EXPORT_H_ + +/* DIFFUSE protocol version. */ +#define DIP_VERSION 1 + +/* Used if querying MTU from routing table fails. */ +#define DIP_DEFAULT_MTU 1500 + +enum dip_msg_types { + DIP_MSG_ADD = 0, + DIP_MSG_REMOVE +}; + +enum dip_timeout_types { + DIP_TIMEOUT_NONE = 0, + DIP_TIMEOUT_RULE, + DIP_TIMEOUT_FLOW +}; + +enum dip_info_element_types { + DIP_IE_NOP = 0, + DIP_IE_SRC_IPV4, + DIP_IE_DST_IPV4, + DIP_IE_SRC_PORT, + DIP_IE_DST_PORT, + DIP_IE_PROTO, + DIP_IE_SRC_IPV6, + DIP_IE_DST_IPV6, + DIP_IE_IPV4_TOS, + DIP_IE_IPV6_LABEL, + DIP_IE_CLASS_LABEL, + DIP_IE_MATCH_DIR, + DIP_IE_MSG_TYPE, /* Add or remove. */ + DIP_IE_TIMEOUT_TYPE, /* Rule timeout vs flow timeout. */ + DIP_IE_TIMEOUT, /* Timeout value. */ + DIP_IE_ACTION_FLAGS, /* Bidir. */ + DIP_IE_PCKT_CNT, /* Current number of packets. */ + DIP_IE_KBYTE_CNT, /* Current number of bytes. */ + DIP_IE_ACTION, /* Type of action. */ + DIP_IE_ACTION_PARAMS, /* Opaque, passed on to packet filter. */ + DIP_IE_EXPORT_NAME, /* Name of export. */ + DIP_IE_CLASSIFIER_NAME, /* Name of classifier. */ + DIP_IE_CLASSES /* Classifier names/classes. */ +}; + +#define DI_IS_FIXED_LEN(x) (((x) & 0xC000) == 0x0) +#define DI_IS_VARIABLE_LEN(x) (((x) & 0xC000) == 0x8000) +#define DI_IS_DYNAMIC_LEN(x) (((x) & 0xC000) == 0xC000) + +struct dip_info_element { + uint16_t idx; + uint16_t id; + int16_t len; +}; + +struct dip_info_descr { + uint16_t idx; + uint16_t id; + int16_t len; /* Length in bytes, 0/-1 = var/dynamic length. */ + char *name; +}; + +struct dip_header { + uint16_t version; + uint16_t msg_len; + uint32_t seq_no; + uint32_t time; +}; + +struct dip_set_header { + uint16_t set_id; + uint16_t set_len; +}; + +struct dip_templ_header { + uint16_t templ_id; + uint16_t flags; +}; + +#if defined(WITH_DIP_INFO) +static struct dip_info_descr dip_info[] = { + {DIP_IE_NOP, 0, 0, "NOP"}, + {DIP_IE_SRC_IPV4, 1, 4, "SrcIP"}, + {DIP_IE_DST_IPV4, 2, 4, "DstIP"}, + {DIP_IE_SRC_PORT, 3, 2, "SrcPort"}, + {DIP_IE_DST_PORT, 4, 2, "DstPort"}, + {DIP_IE_PROTO, 5, 1, "Proto"}, + {DIP_IE_SRC_IPV6, 6, 16, "SrcIP6"}, + {DIP_IE_DST_IPV6, 7, 16, "DstIP6"}, + {DIP_IE_IPV4_TOS, 8, 1, "ToS"}, + {DIP_IE_IPV6_LABEL, 9, 3, "IP6Label"}, + {DIP_IE_CLASS_LABEL, 10, 2, "Class"}, + {DIP_IE_MATCH_DIR, 11, 1, "MatchDir"}, + {DIP_IE_MSG_TYPE, 12, 1, "MsgType"}, + {DIP_IE_TIMEOUT_TYPE, 13, 1, "TimeoutType"}, + {DIP_IE_TIMEOUT, 14, 2, "TimeoutValue"}, + {DIP_IE_ACTION_FLAGS, 15, 2, "ActionFlags"}, + {DIP_IE_PCKT_CNT, 16, 4, "Packets"}, + {DIP_IE_KBYTE_CNT, 17, 4, "KBytes"}, + {DIP_IE_ACTION, 32768, 0, "Action"}, + {DIP_IE_ACTION_PARAMS, 32769, 0, "ActionParams"}, + {DIP_IE_EXPORT_NAME, 32770, 0, "ExportName"}, + {DIP_IE_CLASSIFIER_NAME, 32771, 0, "ClassName"}, + {DIP_IE_CLASSES, 49152, -1, "ClassNames"}, + {DIP_IE_NOP, 0, 0, "Unknown"} +}; +#endif + +#endif /* _NETINET_IP_DIFFUSE_EXPORT_H_ */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_classifier.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_classifier.h Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,100 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_IPFW_DIFFUSE_CLASSIFIER_H_ +#define _NETINET_IPFW_DIFFUSE_CLASSIFIER_H_ + +struct di_cdata; +struct di_fdata; + +/* + * Descriptor for a classifier. Contains all function pointers for a given + * classifier. This is typically created when a module is loaded, and stored in + * a global list of classifiers. + */ +struct di_classifier_alg { + const char *name; /* Classifier name. */ + volatile int ref_count; /* Number of instances in the system. */ + + /* + * Init instance. + * param1: pointer to instance config + * param2: config from userspace + * return: non-zero on error + */ + int (*init_instance)(struct di_cdata *, struct di_oid *); + + /* + * Destroy instance. + * param1: pointer to instance config + * return: non-zero on error + */ + int (*destroy_instance)(struct di_cdata *); + + /* + * Classify packet (sub flow). + * param1: pointer to instance config + * param2: pointer to features + * param3: number of features + * return: class + */ + int (*classify)(struct di_cdata *, int32_t *, int); + + /* + * Get configuration data. + * param1: pointer to instance config + * param2: pointer to configuration + * param3: only compute size (if 1) + * return: number of stats + */ + int (*get_conf)(struct di_cdata *, struct di_oid *, int); + + /* + * Get number of features needed. + * param1: pointer to instance config + * return: number of features + */ + int (*get_feature_cnt)(struct di_cdata *); + + /* + * Get number of classes. + * param1: pointer to instance config + * return: number of classes + */ + int (*get_class_cnt)(struct di_cdata *); + + SLIST_ENTRY(di_classifier_alg) next; /* Next in the list. */ +}; + +#endif /* _NETINET_IPFW_DIFFUSE_CLASSIFIER_H_ */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_classifier_module.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_classifier_module.h Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_IPFW_DIFFUSE_CLASSIFIER_MODULE_H_ +#define _NETINET_IPFW_DIFFUSE_CLASSIFIER_MODULE_H_ + +#define DECLARE_DIFFUSE_CLASSIFIER_MODULE(cname, cstruct) \ + static moduledata_t di_classifier_##cname = { \ + .name = #cname, \ + .evhand = diffuse_classifier_modevent, \ + .priv = cstruct \ + }; \ + DECLARE_MODULE(cname, di_classifier_##cname, \ + SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \ + MODULE_DEPEND(cname, diffuse, 1, 1, 1) + +int diffuse_classifier_modevent(module_t mod, int cmd, void *arg); + +#endif /* _NETINET_IPFW_DIFFUSE_CLASSIFIER_MODULE_H_ */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_export.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_export.c Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,750 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Functions to manage the export protocol. + */ + +#include +__FBSDID("$FreeBSD$"); + +#if !defined(KLD_MODULE) +#include "opt_ipfw.h" +#include "opt_inet.h" +#ifndef INET +#error DIFFUSE requires INET. +#endif /* INET */ +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#define WITH_DIP_INFO 1 +#include +#include +#include + +#include +#include + +static VNET_DEFINE(uint32_t, ex_max_qsize); +#define V_ex_max_qsize VNET(ex_max_qsize) + +static uma_zone_t di_rec_zone; + +#ifndef __FreeBSD__ +DEFINE_SPINLOCK(di_er_mtx); +#else +static struct mtx di_er_mtx; /* Mutex guarding dynamic rules. */ +#endif + +#define DI_ER_LOCK() mtx_lock(&di_er_mtx) +#define DI_ER_UNLOCK() mtx_unlock(&di_er_mtx) +#define DI_ER_LOCK_ASSERT() mtx_assert(&di_er_mtx, MA_OWNED) +#define DI_ER_LOCK_DESTROY() mtx_destroy(&di_er_mtx) +#define DI_ER_LOCK_INIT() mtx_init(&di_er_mtx, \ + "DIFFUSE export record list", NULL, MTX_DEF) + +uint16_t def_template[15] = { + DIP_IE_EXPORT_NAME, + DIP_IE_MSG_TYPE, + DIP_IE_SRC_IPV4, + DIP_IE_DST_IPV4, + DIP_IE_SRC_PORT, + DIP_IE_DST_PORT, + DIP_IE_PROTO, + DIP_IE_PCKT_CNT, + DIP_IE_KBYTE_CNT, + DIP_IE_CLASSES, + DIP_IE_TIMEOUT_TYPE, + DIP_IE_TIMEOUT, + DIP_IE_ACTION, + DIP_IE_ACTION_FLAGS, + DIP_IE_ACTION_PARAMS +}; + +#define N_TEMPLATE_ITEMS (sizeof(def_template) / sizeof(*def_template)) + +/* + * Length of the fixed size, per-packet header on outgoing flow rule template + * based export packets. + * The packet header consists of the following parts (in order): + * - struct dip_header + * - struct dip_set_header + * - struct dip_templ_header + * - A uint16_t ID field for each information element (IE) + * - A uint16_t length field for variable length IEs (there are currently 4) + */ +#define DIP_FIXED_HDR_LEN (sizeof(struct dip_header) + \ + sizeof(struct dip_set_header) + sizeof(struct dip_templ_header) + \ + (N_TEMPLATE_ITEMS * sizeof(uint16_t)) + 4 * sizeof(uint16_t)) + +/* Size for one data set. */ +static int def_data_size; + +/* Offset into mhead where the data set header starts. */ +static int def_data_shdr_offs; + +/* Template for packet header. */ +static struct mbuf *mhead; + +#ifdef SYSCTL_NODE +SYSBEGIN(xxx) +SYSCTL_DECL(_net_inet_ip_diffuse); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ex_max_qsize, CTLFLAG_RW, + &VNET_NAME(ex_max_qsize), 0, "Max export record queue size"); +SYSEND +#endif /* SYSCTL_NODE */ + +/* Compute static size of one data set according to template. */ +static int +_get_data_size(void) +{ + int i, n, size; + + size = 0; + + for (i = 0; i < N_TEMPLATE_ITEMS; i++) { + n = dip_info[def_template[i]].len; + if (n > 0) + size += n; + else if (n == 0) { + if (def_template[i] == DIP_IE_ACTION || + def_template[i] == DIP_IE_EXPORT_NAME || + def_template[i] == DIP_IE_CLASSIFIER_NAME) { + size += DI_MAX_NAME_STR_LEN; + } else if (def_template[i] == DIP_IE_ACTION_PARAMS) { + size += DI_MAX_PARAM_STR_LEN; + } + } + /* Do not count dynamic length fields here. */ + } + + return (size); +} + +/* Compute dynamic size of record r. */ +static int +get_data_size(struct di_export_rec *r) +{ + int i, l, slen; + + l = 0; + + for (i = 0; i < r->tcnt; i++) { + slen = strlen(r->class_tags[i].cname); + /* String, null, class. */ + l += slen + 1 + dip_info[DIP_IE_CLASS_LABEL].len; + } + l++; /* Field length byte. */ + + return (def_data_size + l); +} + +static inline uint32_t +tv_sub0_ms(struct timeval *num, struct timeval *sub) +{ + struct timeval rv; + + rv = tv_sub0(num, sub); + + return (tvtoms(&rv)); +} + +static void +remove_rec(struct di_export_rec *r) +{ + + DI_ER_LOCK_ASSERT(); + + TAILQ_REMOVE(&di_conf.export_rec_list, r, next); + uma_zfree(di_rec_zone, r); + di_conf.export_rec_count--; +} + +struct di_export_rec * +diffuse_export_add_rec(struct di_ft_entry *q, struct di_export *ex, + int add_command) +{ + struct di_export_rec *r, *s; + struct di_flow_class *c; + int have_entry; + + r = NULL; + have_entry = 0; + + /* We don't handle exporting v6 flow records yet. */ + if (q->id.addr_type == 6) + return (NULL); + + DI_ER_LOCK(); + + /* Update and export entry if we have one for this flow already. */ + TAILQ_FOREACH(s, &di_conf.export_rec_list, next) { + /* + * Only compare pointer for speed. If new flow with same 5-tuple + * we may add another record for same 5-tuple + */ + if (s->ft_rec == q) { + have_entry = 1; + r = s; + break; + } + } + + if (!have_entry) { + r = uma_zalloc(di_rec_zone, M_NOWAIT | M_ZERO); + if (r == NULL) { + DI_ER_UNLOCK(); + return (NULL); + } + strcpy(r->ename, ex->name); + r->ft_rec = q; + } + + getmicrotime(&r->time); + r->id = q->id; + r->fcnt = q->fcnt; + r->ftype = q->ftype; + r->mtype = add_command ? DIP_MSG_ADD : DIP_MSG_REMOVE; + r->ttype = di_conf.an_rule_removal; + r->pcnt = q->pcnt; + /* + * The flow byte count we send across the wire is in KBytes + * (see DIP_IE_KBYTE_CNT in ). + */ + r->bcnt = q->bcnt / 1000; + if (add_command) { + r->tval = (uint16_t) (q->expire > time_uptime) ? + q->expire - time_uptime : 0; + r->tval++; /* Make it slightly larger. */ + } else { + r->tval = 0; + } + + /* Class tags (only confirmed!). */ + r->tcnt = 0; + SLIST_FOREACH(c, &q->flow_classes, next) { + if (r->tcnt >= DI_MAX_CLASSES) + break; + + if (c->confirm >= ex->conf.confirm) { + strcpy(r->class_tags[r->tcnt].cname, c->cname); + r->class_tags[r->tcnt].class = c->class; + r->tcnt++; + } + } + + if (!have_entry) { + if (r->ftype & DI_FLOW_TYPE_BIDIRECTIONAL || + ex->conf.atype & DI_ACTION_TYPE_BIDIRECTIONAL) + r->action_dir = DI_ACTION_TYPE_BIDIRECTIONAL; + else + r->action_dir = DI_ACTION_TYPE_UNIDIRECTIONAL; + + strcpy(r->action, ex->conf.action); + strcpy(r->act_params, ex->conf.action_param); + TAILQ_INSERT_TAIL(&di_conf.export_rec_list, r, next); + di_conf.export_rec_count++; + r->no_earlier.tv_sec = r->no_earlier.tv_usec = 0; + } + + DI_ER_UNLOCK(); + + return (r); +} + +/* Limit total number of records. */ +void +diffuse_export_prune_recs(void) +{ + struct di_export_rec *r; + + DI_ER_LOCK(); + + if (V_ex_max_qsize > 65535) + V_ex_max_qsize = 65535; + + if (V_ex_max_qsize < 0) + V_ex_max_qsize = 0; + + while (di_conf.export_rec_count > V_ex_max_qsize) { + r = TAILQ_FIRST(&di_conf.export_rec_list); + remove_rec(r); + } + + DI_ER_UNLOCK(); +} + +int +diffuse_export_remove_recs(char *ename) +{ + struct di_export_rec *r, *tmp; + + DI_ER_LOCK(); + + TAILQ_FOREACH_SAFE(r, &di_conf.export_rec_list, next, tmp) { + if (ename == NULL || !strcmp(r->ename, ename)) + remove_rec(r); + } + + DI_ER_UNLOCK(); + + return (0); +} + +/* Open and bind socket. */ +struct socket * +diffuse_export_open(struct di_export_config *conf) +{ + struct socket *sock; + struct thread *td; + int ret; + + sock = NULL; + td = curthread; + + if (mhead != NULL) { + /* Open UDP socket. */ + ret = socreate(AF_INET, &sock, SOCK_DGRAM, IPPROTO_UDP, + td->td_ucred, td); + if (ret) + DID("socket create error %d", ret); + } + + return (sock); +} + +/* + * Prepare the packet header for later use. Every packet contains one rule spec + * template followed by data. + */ +static void +prepare_header(void) +{ +#define SET_ID_OPTS_TPL 0 +#define SET_ID_FLOWRULE_TPL 1 +#define SET_ID_DATA 256 + + struct dip_header *hdr; + struct dip_set_header *shdr; + struct dip_templ_header *thdr; + int i, offs; + char *buf; + + /* + * Ensures we will be able to shoehorn the entire fixed length header + * into a single mbuf. + */ + CTASSERT(MHLEN >= DIP_FIXED_HDR_LEN); + + offs = 0; + + mhead = m_gethdr(M_WAITOK, MT_DATA); + mhead->m_next = NULL; + buf = mtod(mhead, char *); + hdr = (struct dip_header *)buf; + hdr->version = htons((uint16_t)DIP_VERSION); + hdr->msg_len = 0; + hdr->seq_no = 0; + hdr->time = 0; + offs += sizeof(struct dip_header); + + shdr = (struct dip_set_header *)(buf + offs); + shdr->set_id = htons((uint16_t)SET_ID_FLOWRULE_TPL); + shdr->set_len = 0; + offs += sizeof(struct dip_set_header); + + thdr = (struct dip_templ_header *)(buf + offs); + thdr->templ_id = htons((uint16_t)SET_ID_DATA); + thdr->flags = 0; + offs += sizeof(struct dip_templ_header); + + for (i = 0; i < N_TEMPLATE_ITEMS; i++) { + *((uint16_t *)(buf + offs)) = + htons(dip_info[def_template[i]].id); + offs += sizeof(uint16_t); + if (def_template[i] == DIP_IE_ACTION || + def_template[i] == DIP_IE_EXPORT_NAME || + def_template[i] == DIP_IE_CLASSIFIER_NAME) { + *((uint16_t *)(buf + offs)) = + htons((uint16_t)DI_MAX_NAME_STR_LEN); + offs += sizeof(uint16_t); + } else if (def_template[i] == DIP_IE_ACTION_PARAMS) { + *((uint16_t *)(buf + offs)) = + htons((uint16_t)DI_MAX_PARAM_STR_LEN); + offs += sizeof(uint16_t); + } + } + + shdr->set_len = htons(offs - sizeof(struct dip_header)); + def_data_shdr_offs = offs; + shdr = (struct dip_set_header *)(buf + offs); + shdr->set_id = htons((uint16_t)SET_ID_DATA); + shdr->set_len = htons(sizeof(struct dip_set_header)); + offs += sizeof(struct dip_set_header); + + hdr->msg_len = htons(offs); + mhead->m_len = offs; + + m_fixhdr(mhead); + mhead->m_pkthdr.rcvif = NULL; +} + +/* + * If r is not NULL we add data and possibly send if packet length reached. + * If r is NULL we just send packet if we have an mbuf. + */ +static void +add_record(struct di_export *ex, struct di_export_rec *r, + struct timeval *tv, int dyn_rsize) +{ + struct dip_header *hdr; + struct dip_set_header *shdr; + struct di_export_config *conf; + struct mbuf *md; + char *buf; + int i, new_header, offs, slen; + unsigned char *lfield; + + DI_ER_LOCK_ASSERT(); + + conf = &ex->conf; + new_header = offs = 0; + + if (ex->mh == NULL) { + ex->mh = m_dup(mhead, MT_DATA); + ex->mh->m_next = NULL; + ex->mh->m_nextpkt = NULL; + ex->mt = ex->mh; + /* Make sure IPFW/DIFFUSE skips exporter packets. */ + ex->mh->m_flags |= M_SKIP_FIREWALL; + new_header = 1; + } + + /* Update stuff in packet header. */ + buf = mtod(ex->mh, char *); + hdr = (struct dip_header *)buf; + if (new_header) + hdr->seq_no = htonl(ex->seq_no++); + + hdr->time = htonl(tv->tv_sec); + hdr->msg_len = htons(ntohs(hdr->msg_len) + dyn_rsize); + + /* Update stuff in data set header. */ + shdr = (struct dip_set_header *)(buf + def_data_shdr_offs); + shdr->set_len = htons(ntohs(shdr->set_len) + dyn_rsize); + + /* Find space for data, add new mbuf if required. */ + if (ex->mt == ex->mh || (MLEN - ex->mt->m_len) < dyn_rsize) { + /* Add new mbuf to chain. */ + md = m_get(M_NOWAIT, MT_DATA); + if (!md) + return; + md->m_next = NULL; + ex->mt->m_next = md; + ex->mt = md; + } + + buf = mtod(ex->mt, char *); + offs = ex->mt->m_len; + + /* Fill in data. */ + /* XXX: Create function with a switch for all IEs. */ + memcpy((char *)(buf + offs), r->ename, DI_MAX_NAME_STR_LEN); + offs += DI_MAX_NAME_STR_LEN; + *((uint8_t *)(buf + offs)) = r->mtype; + offs += sizeof(uint8_t); + *((uint32_t *)(buf + offs)) = htonl(r->id.src_ip); + offs += sizeof(uint32_t); + *((uint32_t *)(buf + offs)) = htonl(r->id.dst_ip); + offs += sizeof(uint32_t); + *((uint16_t *)(buf + offs)) = htons(r->id.src_port); + offs += sizeof(uint16_t); + *((uint16_t *)(buf + offs)) = htons(r->id.dst_port); + offs += sizeof(uint16_t); + *((uint8_t *)(buf + offs)) = r->id.proto; + offs += sizeof(uint8_t); + *((uint32_t *)(buf + offs)) = htonl(r->pcnt); + offs += sizeof(uint32_t); + *((uint32_t *)(buf + offs)) = htonl(r->bcnt); + offs += sizeof(uint32_t); + + lfield = buf + offs; + offs++; + *lfield = 1; + /* + * tcnt will be <= DI_MAX_CLASSES which is set so that lfield will + * never overflow. + */ + for (i = 0; i < r->tcnt; i++) { + slen = strlen(r->class_tags[i].cname); + memcpy((char *)(buf + offs), r->class_tags[i].cname, slen + 1); + offs += slen + 1; + *((uint16_t *)(buf + offs)) = htons(r->class_tags[i].class); + offs += sizeof(uint16_t); + KASSERT((*lfield + slen + 1 + sizeof(uint16_t) < + (1 << sizeof(*lfield))), ("%s: lfield overflowed", + __func__)); + *lfield += slen + 1 + sizeof(uint16_t); + } + *((uint8_t *)(buf + offs)) = r->ttype; + offs += sizeof(uint8_t); + *((uint16_t *)(buf + offs)) = htons(r->tval); + offs += sizeof(uint16_t); + memcpy((char *)(buf + offs), r->action, DI_MAX_NAME_STR_LEN); + offs += DI_MAX_NAME_STR_LEN; + *((uint16_t *)(buf + offs)) = htons((uint16_t)r->action_dir); + offs += sizeof(uint16_t); + memcpy((char *)(buf + offs), r->act_params, DI_MAX_PARAM_STR_LEN); + offs += DI_MAX_PARAM_STR_LEN; + + ex->mt->m_len = offs; + + /* Fix chain header, e.g. adjust chain length. */ + m_fixhdr(ex->mh); + ex->mh->m_pkthdr.rcvif = NULL; +} + +static inline int +queue_tx_pkt_if(struct di_export *ex, int dyn_rsize, struct timeval *tv, + struct mbuf **tx_pkt_queue, int force) +{ + struct route sro; + struct sockaddr_in *dst; + unsigned long mtu; + int ready_to_send; + + DI_ER_LOCK_ASSERT(); + + ready_to_send = 0; + mtu = DIP_DEFAULT_MTU; + + bzero(&sro, sizeof(sro)); + dst = (struct sockaddr_in *)&sro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = ex->conf.ip; + in_rtalloc_ign(&sro, 0, ex->sock->so_fibnum); + + if (sro.ro_rt != NULL) { + if (sro.ro_rt->rt_rmx.rmx_mtu == 0) + mtu = sro.ro_rt->rt_ifp->if_mtu; + else + mtu = min(sro.ro_rt->rt_rmx.rmx_mtu, + sro.ro_rt->rt_ifp->if_mtu); + RTFREE(sro.ro_rt); + } + + /* + * If the export packet currently being constructed (chain headed by + * ex->mh) would be overfilled by adding a record of size dyn_rsize, + * move the chain to the tx_pkt_queue and set the ex chain headers to + * NULL so that add_record() will start a new chain. + */ + if (force || (ex->mt->m_len + dyn_rsize) >= + (mtu - sizeof(struct ip) + sizeof(struct udphdr))) { + /* Add to queue. */ + if (*tx_pkt_queue == NULL) { + *tx_pkt_queue = ex->mh; + } else { + while ((*tx_pkt_queue)->m_nextpkt != NULL) + tx_pkt_queue = &((*tx_pkt_queue)->m_nextpkt); + + (*tx_pkt_queue)->m_nextpkt = ex->mh; + } + + ex->mh = NULL; + ex->mt = NULL; + ex->last_pkt_time = *tv; + ready_to_send = 1; + } + + return (ready_to_send); +} + +int +diffuse_export_send(struct di_export *ex) +{ + static int waiting = 0; /* Number of records waiting to be sent. */ + struct thread *td; + struct di_export_rec *r, *tmp; + struct di_export_config *conf; + struct timeval tv; + /* Copies for sending outside lock. */ + struct socket *sock; + struct sockaddr_in sin; + struct mbuf *next_tx_pkt, *tx_pkt_queue; + int cnt, dyn_rsize, ret; + + td = curthread; + sock = NULL; + tx_pkt_queue = NULL; + conf = &ex->conf; + cnt = waiting; /* Number of records processed */ + + DI_ER_LOCK(); + + if (di_conf.export_rec_count == 0 || + waiting + di_conf.export_rec_count < conf->min_batch) { + DI_ER_UNLOCK(); + return (0); + } + + getmicrotime(&tv); + + /* Export the records that are over max delay, if max_delay set. */ + if (conf->max_delay > 0) { + TAILQ_FOREACH_SAFE(r, &di_conf.export_rec_list, next, tmp) { + if (tv_sub0_ms(&tv, &r->time) >= conf->max_delay && + tv_sub0_ms(&tv, &r->no_earlier) > 0) { + dyn_rsize = get_data_size(r); + if (queue_tx_pkt_if(ex, dyn_rsize, &tv, + &tx_pkt_queue, 0) == 1) + waiting = 0; + else + waiting++; + + add_record(ex, r, &tv, dyn_rsize); + remove_rec(r); + cnt++; + } + } + } + + /* Export up to max_batch or if max_batch is not set export the rest. */ + if ((conf->max_batch == 0 || cnt < conf->max_batch) && + di_conf.export_rec_count > 0) { + TAILQ_FOREACH_SAFE(r, &di_conf.export_rec_list, next, tmp) { + if (tv_sub0_ms(&tv, &r->no_earlier) > 0) { + dyn_rsize = get_data_size(r); + if (queue_tx_pkt_if(ex, dyn_rsize, &tv, + &tx_pkt_queue, 0) == 1) + waiting = 0; + else + waiting++; + + add_record(ex, r, &tv, dyn_rsize); + remove_rec(r); + cnt++; + } + if ((conf->max_batch > 0 && cnt >= conf->max_batch) || + di_conf.export_rec_count == 0) + break; + } + } + + DI_ER_UNLOCK(); + + if (waiting > 0 && waiting >= conf->min_batch) { + /* Force send of incomplete packet. */ + queue_tx_pkt_if(ex, 0, &tv, &tx_pkt_queue, 1); + waiting = 0; + } + + if (tx_pkt_queue != NULL) { + sock = ex->sock; + /* Set target. */ + memset(&sin, 0, sizeof(sin)); +#ifndef __linux__ + sin.sin_len = sizeof(sin); +#endif + sin.sin_family = AF_INET; + sin.sin_port = htons(conf->port); + sin.sin_addr = conf->ip; + } + + DID("exported %d rules", cnt); + + while (tx_pkt_queue != NULL) { + ret = sosend(sock, (struct sockaddr *)&sin, NULL, tx_pkt_queue, NULL, + MSG_DONTWAIT, td); + DID("send packet %d\n", ret); + next_tx_pkt = tx_pkt_queue->m_nextpkt; + m_freem(tx_pkt_queue); + tx_pkt_queue = next_tx_pkt; + } + + return (0); +} + +/* Close socket. */ +void +diffuse_export_close(struct socket *sock) +{ + + soclose(sock); +} + +void +diffuse_export_init(void) +{ + + V_ex_max_qsize = 256; + + di_rec_zone = uma_zcreate("DIFFUSE rule recs", + sizeof(struct di_export_rec), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + + DI_ER_LOCK_INIT(); + + /* Determine size of one entry. */ + def_data_size = _get_data_size(); + + /* Prepare packet header. */ + prepare_header(); +} + +void +diffuse_export_uninit(void) +{ + + /* Free packet header. */ + if (mhead) + m_freem(mhead); + + uma_zdestroy(di_rec_zone); + DI_ER_LOCK_DESTROY(); +} diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_feature_module.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_feature_module.h Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_IPFW_DIFFUSE_FEATURE_MODULE_H_ +#define _NETINET_IPFW_DIFFUSE_FEATURE_MODULE_H_ + +#define DECLARE_DIFFUSE_FEATURE_MODULE(fname, fstruct) \ + static moduledata_t di_feature_##fname = { \ + .name = #fname, \ + .evhand = diffuse_feature_modevent, \ + .priv = fstruct \ + }; \ + DECLARE_MODULE(fname, di_feature_##fname, \ + SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \ + MODULE_DEPEND(fname, diffuse, 1, 1, 1) + +int diffuse_feature_modevent(module_t mod, int cmd, void *arg); + +#endif /* _NETINET_IPFW_DIFFUSE_FEATURE_MODULE_H_ */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_flowtable.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_flowtable.c Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,1597 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Functions to manage the flow table (originally based on ip_fw_dynamic.c). + */ + +#include +__FBSDID("$FreeBSD$"); + +#ifdef _KERNEL +#if !defined(KLD_MODULE) +#include "opt_ipfw.h" +#include "opt_inet.h" +#include "opt_inet6.h" +#ifndef INET +#error DIFFUSE requires INET. +#endif +#endif + +#include +#include +#include +#endif /* _KERNEL */ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef INET6 +#include +#include +#endif + +#include +#include +#ifdef _KERNEL +#include +#include +#endif + +#ifdef _KERNEL +#ifdef MAC +#include +#endif +#endif + +#ifndef _KERNEL +#include +#include + +#include + +#define KPI_USER_COMPAT +#include /* Must come after stdlib.h */ +#endif + +/* + * Description of flow table. + * + * Flow features are stored in lists accessed through a hash table + * (diffuse_ft_v) whose size is ft_curr_buckets. This value can be modified + * through the sysctl variable ft_buckets which is updated when the table + * becomes empty. + * + * When a packet is received, its address fields are first masked with the mask + * defined for the rule, then hashed, then matched against the entries in the + * corresponding list. Then all the features configured are computed. + * + * The lifetime of flow entries is regulated by ft_*_lifetime, measured in + * seconds and depending on the TCP flags. + * + * The total number of flows is stored in ft_count. The max number of flows is + * ft_max. We stop creating flow table entries when ft_max is reached. This is + * done to avoid consuming too much memory, but also too much time when + * searching on each packet (we should try instead to put a limit on the length + * of the list on each bucket...). + * + * Each flow entry holds a pointer to the creating ipfw rule. Flows are removed + * when the rule is removed. + */ + +static VNET_DEFINE(struct di_ft_entry **, diffuse_ft_v); +static VNET_DEFINE(u_int32_t, ft_buckets); +static VNET_DEFINE(u_int32_t, ft_curr_buckets); +#define V_diffuse_ft_v VNET(diffuse_ft_v) +#define V_ft_buckets VNET(ft_buckets) +#define V_ft_curr_buckets VNET(ft_curr_buckets) + +#ifdef _KERNEL + +LIST_HEAD(di_to_head, di_to_entry); + +static VNET_DEFINE(struct di_to_head *, diffuse_to_v); +static VNET_DEFINE(u_int32_t, to_buckets); +static VNET_DEFINE(u_int32_t, to_curr_buckets); +#define V_diffuse_to_v VNET(diffuse_to_v) +#define V_to_buckets VNET(to_buckets) +#define V_to_curr_buckets VNET(to_curr_buckets) + +static uma_zone_t di_ft_zone; + +#ifndef __FreeBSD__ +DEFINE_SPINLOCK(di_ft_mtx); +#else +static struct rwlock di_ft_mtx; +#endif + +#define DI_FT_LOCK_INIT() rw_init(&di_ft_mtx, "DIFFUSE flow table") +#define DI_FT_LOCK_DESTROY() rw_destroy(&di_ft_mtx) +#define DI_FT_RLOCK() rw_rlock(&di_ft_mtx) +#define DI_FT_WLOCK() rw_wlock(&di_ft_mtx) +#define DI_FT_UNLOCK() rw_unlock(&di_ft_mtx) +#define DI_FT_RLOCK_ASSERT() rw_assert(&di_ft_mtx, RA_RLOCKED) +#define DI_FT_WLOCK_ASSERT() rw_assert(&di_ft_mtx, RA_WLOCKED) +#define DI_FT_LOCK_ASSERT() rw_assert(&di_ft_mtx, RA_LOCKED) + +#endif /* _KERNEL */ + +/* Timeouts for ending flows. */ +static VNET_DEFINE(u_int32_t, ft_ack_lifetime); +static VNET_DEFINE(u_int32_t, ft_syn_lifetime); +static VNET_DEFINE(u_int32_t, ft_fin_lifetime); +static VNET_DEFINE(u_int32_t, ft_rst_lifetime); +static VNET_DEFINE(u_int32_t, ft_udp_lifetime); +static VNET_DEFINE(u_int32_t, ft_short_lifetime); +#define V_ft_ack_lifetime VNET(ft_ack_lifetime) +#define V_ft_syn_lifetime VNET(ft_syn_lifetime) +#define V_ft_fin_lifetime VNET(ft_fin_lifetime) +#define V_ft_rst_lifetime VNET(ft_rst_lifetime) +#define V_ft_udp_lifetime VNET(ft_udp_lifetime) +#define V_ft_short_lifetime VNET(ft_short_lifetime) + +static VNET_DEFINE(u_int32_t, ft_count); /* # of flows. */ +static VNET_DEFINE(u_int32_t, ft_max); /* Max # of flows. */ +#define V_ft_count VNET(ft_count) +#define V_ft_max VNET(ft_max) + +#ifdef _KERNEL +#ifdef SYSCTL_NODE +SYSBEGIN(xxx) +SYSCTL_DECL(_net_inet_ip_diffuse); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_buckets, + CTLFLAG_RW, &VNET_NAME(ft_buckets), 0, + "Number of buckets"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_curr_buckets, + CTLFLAG_RD, &VNET_NAME(ft_curr_buckets), 0, + "Current Number of buckets"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_count, + CTLFLAG_RD, &VNET_NAME(ft_count), 0, + "Number of entries"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_max, + CTLFLAG_RW, &VNET_NAME(ft_max), 0, + "Max number of entries"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_ack_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_ack_lifetime), 0, + "Lifetime of entries for acks"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_syn_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_syn_lifetime), 0, + "Lifetime of entries for syn"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_fin_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_fin_lifetime), 0, + "Lifetime of entries for fin"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_rst_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_rst_lifetime), 0, + "Lifetime of entries for rst"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_udp_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_udp_lifetime), 0, + "Lifetime of entries for UDP"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, ft_short_lifetime, + CTLFLAG_RW, &VNET_NAME(ft_short_lifetime), 0, + "Lifetime of entries for other situations"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, to_buckets, + CTLFLAG_RW, &VNET_NAME(to_buckets), 0, + "Number of timeout buckets"); +SYSCTL_VNET_UINT(_net_inet_ip_diffuse, OID_AUTO, to_curr_buckets, + CTLFLAG_RD, &VNET_NAME(to_curr_buckets), 0, + "Current Number of timeout buckets"); +SYSEND +#endif /* SYSCTL_NODE */ +#else /* _KERNEL */ + /* Only for userspace (ipfw_fstats). */ + uint64_t id_cnt = 0; +#endif /* _KERNEL */ + +#ifdef DIFFUSE_DEBUG +/* Wrapper so this works in kernel and userspace. */ +static char * +_inet_ntoa_r(struct in_addr in, char *buf, int len) +{ +#ifdef _KERNEL + return (inet_ntoa_r(in, buf)); +#else + return (inet_ntoa_r(in, buf, len)); +#endif +} +#endif + +/* + * XXX: Maybe use a better hash function? Doesn't need to be commutative, + * can do two lookups. + */ +static inline int +hash_packet6(struct ipfw_flow_id *id) +{ + uint32_t i; + + i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ + (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ + (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ + (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ + (id->dst_port) ^ (id->src_port); + + return (i); +} + +/* + * IMPORTANT: the hash function for dynamic rules must be commutative + * in source and destination (ip,port), because rules are bidirectional + * and we want to find both in the same bucket. + */ +static inline int +hash_packet(struct ipfw_flow_id *id) +{ + uint32_t i; + +#ifdef INET6 + if (IS_IP6_FLOW_ID(id)) + i = hash_packet6(id); + else +#endif /* INET6 */ + i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ + (id->src_port); + i &= (V_ft_curr_buckets - 1); + + return (i); +} + +static inline void +unlink_entry_print(struct ipfw_flow_id *id) +{ +#ifdef DIFFUSE_DEBUG + struct in_addr da; +#ifdef INET6 + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; +#else + char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; +#endif + +#ifdef INET6 + if (IS_IP6_FLOW_ID(id)) { + ip6_sprintf(src, &id->src_ip6); + ip6_sprintf(dst, &id->dst_ip6); + } else +#endif + { + da.s_addr = htonl(id->src_ip); + _inet_ntoa_r(da, src, sizeof(src)); + da.s_addr = htonl(id->dst_ip); + _inet_ntoa_r(da, dst, sizeof(dst)); + } + DID("ft unlink entry %s %d -> %s %d, %d left", + src, id->src_port, dst, id->dst_port, V_ft_count - 1); +#endif /* DIFFUSE_DEBUG */ +} + +/* + * Unlink a dynamic rule from a chain. prev is a pointer to the previous one, q + * is a pointer to the rule to delete, head is a pointer to the head of the + * queue. Modifies q and potentially also head. + */ +#define UNLINK_DYN_RULE(prev, head, q) do { \ + struct di_ft_entry *old_q = q; \ + \ + unlink_entry_print(&q->id); \ + if (prev != NULL) \ + prev->next = q = q->next; \ + else \ + head = q = q->next; \ + V_ft_count--; \ + uma_zfree(di_ft_zone, old_q); \ +} while (0) + +#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) + +#ifdef _KERNEL + +/* Delete tags list, called under lock. */ +static void +remove_classes(struct di_ft_entry *e) +{ + struct di_flow_class *s; + + DI_FT_WLOCK_ASSERT(); + + while (!SLIST_EMPTY(&e->flow_classes)) { + s = SLIST_FIRST(&e->flow_classes); + SLIST_REMOVE_HEAD(&e->flow_classes, next); + free(s, M_DIFFUSE); + e->tcnt--; + } +} + +/* Delete tags list, called under lock. */ +static void +remove_exports(struct di_ft_entry *e) +{ + struct di_exp *s; + + DI_FT_WLOCK_ASSERT(); + + while (!SLIST_EMPTY(&e->ex_list)) { + s = SLIST_FIRST(&e->ex_list); + SLIST_REMOVE_HEAD(&e->ex_list, next); + free(s, M_DIFFUSE); + } +} + +/* Remove a rule timeout (if present). */ +static void +remove_timeout(struct di_ft_entry *e) +{ + + DI_FT_WLOCK_ASSERT(); + + if (e->to) { + LIST_REMOVE(e->to, next); + free(e->to, M_DIFFUSE); + e->to = NULL; + } +} + +/* Export end of flow record. */ +static void +remove_rule_msg(struct di_ft_entry *q) +{ + struct di_exp *s; + + DI_FT_WLOCK_ASSERT(); + + SLIST_FOREACH(s, &q->ex_list, next) { + if (q->ex_time.tv_sec > 0) + diffuse_export_add_rec(q, s->ex, 0); + } +} + +#endif /* _KERNEL */ + +/* Free memory allocated by entry. Send remove rule msg if needed. */ +static void +destroy_entry(struct di_ft_entry *q, int remove_msg) +{ + int i; + + DI_FT_WLOCK_ASSERT(); + +#ifdef _KERNEL + if (remove_msg) { + /* Signal end of flow for non-expired entries. */ + remove_rule_msg(q); + } +#endif + + /* Destroy features */ + for (i = 0; i < q->fcnt; i++) { + q->features[i]->alg->destroy_stats(&q->features[i]->conf, + &q->fwd_data[i]); + if (!(q->features[i]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) && + (q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL)) { + q->features[i]->alg->destroy_stats(&q->features[i]->conf, + &q->bck_data[i]); + } + } + +#ifdef _KERNEL + remove_classes(q); + remove_exports(q); + remove_timeout(q); +#endif +} + +/* + * Remove flows pointing to "rule", or all of them if rule == NULL. + * + * If expired_only == 0, flows are deleted even if not expired, otherwise only + * expired flows are removed. + */ +static void +remove_entry(struct ip_fw *rule, int expired_only) +{ + static uint32_t last_remove = 0; + struct di_ft_entry *prev, *q; + int i; + + DI_FT_WLOCK_ASSERT(); + + if (V_diffuse_ft_v == NULL || V_ft_count == 0) + return; + + /* Do not expire more than once per second, it is useless. */ + if (expired_only != 0 && last_remove == time_uptime) + return; + + last_remove = time_uptime; + + for (i = 0; i < V_ft_curr_buckets; i++) { + for (prev = NULL, q = V_diffuse_ft_v[i]; q; ) { + if (rule != NULL && rule != q->rule) + goto next; + + if (expired_only != 0 && + !TIME_LEQ(q->expire, time_uptime)) + goto next; + + destroy_entry(q, 1); + UNLINK_DYN_RULE(prev, V_diffuse_ft_v[i], q); + continue; +next: + prev = q; + q = q->next; + } + } +} + +void +diffuse_ft_remove_entries(struct ip_fw *rule) +{ + + DI_FT_WLOCK(); + remove_entry(rule, 0); /* 0 forces removal. */ + DI_FT_UNLOCK(); +} + +/* + * Updates flow lifetime. If we don't see start of TCP flow -> very short + * timeout. + */ +static void +update_lifetime(struct di_ft_entry *q, struct ipfw_flow_id *pkt, int dir) +{ + uint32_t timeout; + unsigned char flags; +#ifdef _KERNEL + struct di_to_entry *te; + uint32_t slot, old_expire; + + old_expire = q->expire; +#endif + + DI_FT_WLOCK_ASSERT(); + + if (pkt->proto == IPPROTO_TCP) { /* Update state according to flags. */ + flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST); + +#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) +#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) + q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); + switch (q->state) { + case TH_SYN: /* Opening. */ + timeout = V_ft_syn_lifetime; + break; + + case BOTH_SYN: /* Move to established. */ + case BOTH_SYN | TH_FIN : /* One side tries to close. */ + case BOTH_SYN | (TH_FIN << 8) : + timeout = V_ft_ack_lifetime; + break; + + case BOTH_SYN | BOTH_FIN: /* Both sides closed. */ + timeout = V_ft_fin_lifetime; + break; + + default: + timeout = V_ft_rst_lifetime; + break; + } + } else if (pkt->proto == IPPROTO_UDP) { + timeout = V_ft_udp_lifetime; + } else { + /* Other protocols. */ + timeout = V_ft_short_lifetime; + } + + q->expire = time_uptime + timeout; + +#ifdef _KERNEL + + if (di_conf.an_rule_removal == DIP_TIMEOUT_NONE && + (!q->to || q->expire != old_expire)) { + /* + * Guard against too big timeout values. + * XXX: use SYSCTL_PROC to check setting of timeouts + */ + if (timeout > V_to_curr_buckets) + q->expire = time_uptime + V_to_curr_buckets; + + slot = q->expire & (V_to_curr_buckets - 1); + + if (q->to) { + /* Update existing timeout. */ + DID2("updating timeout to slot %u", slot); + LIST_REMOVE(q->to, next); + LIST_INSERT_HEAD(&V_diffuse_to_v[slot], q->to, next); + } else { + /* Add new timeout. */ + DID2("inserting timeout in slot %u", slot); + te = malloc(sizeof(*te), M_DIFFUSE, M_NOWAIT | M_ZERO); + if (te != NULL) { + te->flow = q; + q->to = te; + LIST_INSERT_HEAD(&V_diffuse_to_v[slot], te, next); + } + } + } +#endif + + DID2("expiry update %d %d %d", pkt->proto, q->state, q->expire); +} + +#ifdef _KERNEL +/* We rely on this function being called regularly. */ +void +diffuse_ft_check_timeouts(di_to_handler_fn_t f) +{ + static uint32_t last_time = 0; + struct di_to_entry *t, *tmp; + struct di_export_rec *ex_rec; + struct di_exp *s; + uint32_t i, slot; + + if (last_time == 0) + last_time = time_uptime - 1; + + DI_FT_WLOCK(); + + if (V_diffuse_to_v != NULL && (time_uptime - last_time > 0)) { + for (i = last_time + 1; i <= time_uptime; i++) { + slot = i & (V_to_curr_buckets - 1); + + DID2("checking timeout slot %u", slot); + LIST_FOREACH_SAFE(t, &V_diffuse_to_v[slot], next, tmp) { + + SLIST_FOREACH(s, &t->flow->ex_list, next) { + if (t->flow->ex_time.tv_sec > 0) { + ex_rec = f(t->flow, s->ex, 0); + t->flow->ex_time = ex_rec->time; + } + } + + LIST_REMOVE(t, next); + t->flow->to = NULL; + free(t, M_DIFFUSE); + } + + last_time++; + } + } + + DI_FT_UNLOCK(); +} +#endif + +static void +update_features(struct di_ft_entry *q, struct ipfw_flow_id *pkt, + struct ip_fw_args *args, void *ulp, int dir) +{ + struct di_feature *f; + int i; + + DI_FT_WLOCK_ASSERT(); + + for (i = 0; i < q->fcnt; i++) { + f = q->features[i]; + + if (dir == MATCH_FORWARD || + q->features[i]->alg->type & DI_FEATURE_ALG_BIDIRECTIONAL) { + f->alg->update_stats(&f->conf, &q->fwd_data[i], args->m, + pkt->proto, ulp, dir); + } else if (dir == MATCH_REVERSE) { /* Only possible if bidir. */ + f->alg->update_stats(&f->conf, &q->bck_data[i], args->m, + pkt->proto, ulp, dir); + } + } +} + +/* Lookup a dynamic rule, locked version. */ +static struct di_ft_entry * +lookup_entry_locked(struct ipfw_flow_id *pkt, int *match_direction) +{ + struct di_ft_entry *prev, *q; + int i, dir; + + DI_FT_WLOCK_ASSERT(); + + q = NULL; + dir = MATCH_NONE; + + if (V_diffuse_ft_v == NULL) + goto done; + + i = hash_packet(pkt); + + for (prev = NULL, q = V_diffuse_ft_v[i]; q != NULL; ) { + if (TIME_LEQ(q->expire, time_uptime)) { /* Expire entry. */ + /* End of flow record already sent via timeout. */ + destroy_entry(q, 0); + UNLINK_DYN_RULE(prev, V_diffuse_ft_v[i], q); + continue; + } + if (pkt->proto == q->id.proto) { +#ifdef INET6 + if (IS_IP6_FLOW_ID(pkt)) { + if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), + &(q->id.src_ip6)) && + IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), + &(q->id.dst_ip6)) && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port) { + dir = MATCH_FORWARD; + break; + } + if (q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL && + IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), + &(q->id.dst_ip6)) && + IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), + &(q->id.src_ip6)) && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port) { + dir = MATCH_REVERSE; + break; + } + } else +#endif + { + if (pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port) { + dir = MATCH_FORWARD; + break; + } + if (q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL && + pkt->src_ip == q->id.dst_ip && + pkt->dst_ip == q->id.src_ip && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port) { + dir = MATCH_REVERSE; + break; + } + } + } + prev = q; + q = q->next; + } + + if (q == NULL) + goto done; + + if (prev != NULL) { /* Found and not in front. */ + prev->next = q->next; + q->next = V_diffuse_ft_v[i]; + V_diffuse_ft_v[i] = q; + } + + update_lifetime(q, pkt, dir); + +done: + if (match_direction) + *match_direction = dir; + + return (q); +} + +/* NOTE: function returns with lock held, caller must unlock. */ +struct di_ft_entry * +diffuse_ft_lookup_entry(struct ipfw_flow_id *pkt, struct ip_fw_args *args, + void *ulp, int pktlen, int *match_direction) +{ + struct di_ft_entry *q; + + DI_FT_WLOCK(); + + q = lookup_entry_locked(pkt, match_direction); + if (q == NULL) { + DI_FT_UNLOCK(); + return (NULL); + } + + update_features(q, &args->f_id, args, ulp, *match_direction); + q->pcnt++; + q->bcnt += pktlen; + + return (q); +} + +/* Return a single statistic. */ +int +diffuse_ft_get_stat(struct di_ft_entry *q, int fdir, + struct di_feature *fptr, int sidx, int32_t *val) +{ + int i, ret; + + ret = 0; + + DI_FT_RLOCK(); + + for (i = 0; i < q->fcnt; i++) { + if (q->features[i] == fptr) { + if (fdir == DI_MATCH_DIR_NONE || + fdir == DI_MATCH_DIR_FWD || + (q->features[i]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL)) { + ret = q->features[i]->alg->get_stat( + &q->features[i]->conf, &q->fwd_data[i], sidx, + val); + } else if (q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL) { + ret = q->features[i]->alg->get_stat( + &q->features[i]->conf, + &q->bck_data[i], sidx, val); + } + break; + } + } + + DI_FT_UNLOCK(); + + return (ret); +} + +/* Get multiple statistics */ +int +diffuse_ft_get_stats(struct di_ft_entry *q, int fscnt, + struct di_feature_stat *fstats, struct di_feature_stat_ptr *fstats_ptr, + int32_t *fvec) +{ + int i, j, ret; + + ret = 0; + + DI_FT_RLOCK(); + + for (i = 0; i < fscnt; i++) { + for (j = 0; j < q->fcnt; j++) { + if (q->features[j] == fstats_ptr[i].fptr) { + if (fstats[i].fdir == DI_MATCH_DIR_NONE || + fstats[i].fdir == DI_MATCH_DIR_FWD || + (q->features[j]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL)) { + ret = q->features[j]->alg->get_stat( + &q->features[j]->conf, + &q->fwd_data[j], fstats_ptr[i].sidx, + &fvec[i]); + } else if (q->ftype & + DI_FLOW_TYPE_BIDIRECTIONAL) { + ret = q->features[j]->alg->get_stat( + &q->features[j]->conf, + &q->bck_data[j], + fstats_ptr[i].sidx, + &fvec[i]); + } + /* If any statistic is still missing, return. */ + if (!ret) { + DI_FT_UNLOCK(); + return (ret); + } + } + } + } + + DI_FT_UNLOCK(); + + return (ret); +} + +void +diffuse_ft_unlock(void) +{ + + DI_FT_UNLOCK(); +} + +static void +realloc_flow_table(void) +{ +#ifdef _KERNEL + int i; +#endif + + DI_FT_WLOCK_ASSERT(); + + /* + * Try reallocation, make sure we have a power of 2 and do not allow + * more than 64k entries. In case of overflow, default to 1024. + */ + + if (V_ft_buckets > 65536) + V_ft_buckets = 1024; + + if ((V_ft_buckets & (V_ft_buckets - 1)) != 0) { + V_ft_buckets = V_ft_curr_buckets; /* Not a power of 2, reset. */ + return; + } + +#ifdef _KERNEL + if (V_to_buckets > 4096) + V_to_buckets = 4096; + + if ((V_to_buckets & (V_to_buckets - 1)) != 0) { + V_to_buckets = V_to_curr_buckets; /* Not a power of 2, reset. */ + return; + } + + /* Must be larger than all of the timeout values. */ + if (V_to_buckets < V_ft_ack_lifetime || + V_to_buckets < V_ft_syn_lifetime || + V_to_buckets < V_ft_fin_lifetime || + V_to_buckets < V_ft_rst_lifetime || + V_to_buckets < V_ft_udp_lifetime || + V_to_buckets < V_ft_short_lifetime) { + V_to_buckets = V_to_curr_buckets; /* Reset. */ + return; + } +#endif + + V_ft_curr_buckets = V_ft_buckets; + if (V_diffuse_ft_v != NULL) + free(V_diffuse_ft_v, M_DIFFUSE); + + for (;;) { + V_diffuse_ft_v = malloc(V_ft_curr_buckets * + sizeof(struct di_ft_entry *), M_DIFFUSE, M_NOWAIT | M_ZERO); + + if (V_diffuse_ft_v != NULL || V_ft_curr_buckets <= 2) + break; + + V_ft_curr_buckets >>= 1; + } + +#ifdef _KERNEL + V_to_curr_buckets = V_to_buckets; + if (V_diffuse_to_v != NULL) + free(V_diffuse_to_v, M_DIFFUSE); + + V_diffuse_to_v = malloc(V_to_curr_buckets * + sizeof(struct di_to_head), M_DIFFUSE, M_NOWAIT | M_ZERO); + + if (V_diffuse_to_v != NULL) { + for(i = 0; i < V_to_curr_buckets; i++) + LIST_INIT(&V_diffuse_to_v[i]); + } +#endif +} + +/* Add entry for a new flow. */ +static struct di_ft_entry * +add_entry(struct ipfw_flow_id *id, struct ip_fw *rule) +{ + struct di_ft_entry *r; + int i; + + DI_FT_WLOCK_ASSERT(); + + if (V_diffuse_ft_v == NULL || + (V_ft_count == 0 && V_ft_buckets != V_ft_curr_buckets) +#ifdef _KERNEL + || V_diffuse_to_v == NULL || + (V_ft_count == 0 && V_to_buckets != V_to_curr_buckets) +#endif + ) { + realloc_flow_table(); + if (V_diffuse_ft_v == NULL) + return (NULL); +#ifdef _KERNEL + if (V_diffuse_to_v == NULL) + return (NULL); +#endif + } + + i = hash_packet(id); + +#ifdef _KERNEL + r = uma_zalloc(di_ft_zone, M_NOWAIT | M_ZERO); +#else + r = calloc(sizeof(struct di_ft_entry), 1); +#endif + if (r == NULL) { + DID("add entry cannot allocate state"); + return (NULL); + } + + r->id = *id; + r->expire = time_uptime + V_ft_syn_lifetime; + r->rule = rule; + r->pcnt = r->bcnt = 0; + r->bucket = i; + r->tcnt = 0; +#ifdef _KERNEL + r->ex_time.tv_sec = 0; + r->ex_time.tv_usec = 0; + SLIST_INIT(&r->flow_classes); + SLIST_INIT(&r->ex_list); + r->to = NULL; +#else + r->flow_id = id_cnt++; +#endif + r->next = V_diffuse_ft_v[i]; + V_diffuse_ft_v[i] = r; + V_ft_count++; +#ifdef DIFFUSE_DEBUG + { + struct in_addr da; +#ifdef INET6 + char src[INET6_ADDRSTRLEN]; + char dst[INET6_ADDRSTRLEN]; +#else + char src[INET_ADDRSTRLEN]; + char dst[INET_ADDRSTRLEN]; +#endif + +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(r->id))) { + ip6_sprintf(src, &r->id.src_ip6); + ip6_sprintf(dst, &r->id.dst_ip6); + } else +#endif + { + da.s_addr = htonl(r->id.src_ip); + _inet_ntoa_r(da, src, sizeof(src)); + da.s_addr = htonl(r->id.dst_ip); + _inet_ntoa_r(da, dst, sizeof(dst)); + } + DID("add ft entry %s %d -> %s %d, total %d", + src, r->id.src_port, dst, r->id.dst_port, + V_ft_count); + } +#endif /* DIFFUSE_DEBUG */ + return (r); +} + +#ifdef _KERNEL + +/* + * Add a class tag to a flow entry, or change the class of an existing tag, or + * just increase confirm when class is unchanged. + */ +void +diffuse_ft_add_class(struct di_ft_entry *e, char *cname, int class, + int *prev_class, int *confirm) +{ + struct di_flow_class *s, *c; + + c = NULL; + + DI_FT_WLOCK(); + + SLIST_FOREACH(s, &e->flow_classes, next) { + if (strcmp(s->cname, cname) == 0) { + if (s->class == class) { + /* XXX: Why 0xFFFF? */ + if (s->confirm < 0xFFFF) + s->confirm++; + } else { + /* + * Memorise last class if well confirmed. + * XXX: 10 is a magic constant. + * XXX: Do something smarter like selecting the + * most frequent class within last x + * packets. + */ + if ((s->prev_class < 0 && + s->confirm >= *confirm) || + (s->confirm >= *confirm * 10)) { + s->prev_class = s->class; + } + s->class = class; + s->confirm = 0; + } + + *prev_class = s->prev_class; + *confirm = s->confirm; + + DI_FT_UNLOCK(); + + return; + } + } + + c = malloc(sizeof(struct di_flow_class), M_DIFFUSE, M_NOWAIT | M_ZERO); + if (c != NULL) { + strcpy(c->cname, cname); + c->class = class; + c->prev_class = -1; + c->confirm = 0; + + SLIST_INSERT_HEAD(&e->flow_classes, c, next); + e->tcnt++; + + *prev_class = c->prev_class; + *confirm = c->confirm; + } + + DI_FT_UNLOCK(); +} + +/* Return flow class for classifier cname or -1 if not classified. */ +int +diffuse_ft_get_class(struct di_ft_entry *e, char *cname, int *prev_class, + int *confirm) +{ + struct di_flow_class *s; + int class; + + class = -1; + + DI_FT_RLOCK(); + + SLIST_FOREACH(s, &e->flow_classes, next) { + if (strcmp(s->cname, cname) == 0) { + class = s->class; + *prev_class = s->prev_class; + *confirm = s->confirm; + break; + } + } + + DI_FT_UNLOCK(); + + return (class); +} + +/* Add pointer to exporter, if not exist. */ +void +diffuse_ft_add_export(struct di_ft_entry *q, struct di_export_rec *ex_rec, + struct di_export *nex) +{ + struct di_exp *s, *e; + + e = NULL; + + DI_FT_WLOCK(); + + /* Store pointer to export record and last export record time. */ + if (ex_rec != NULL) + q->ex_time = ex_rec->time; + + /* Link pointer to export to this flow entry (if not there yet). */ + SLIST_FOREACH(s, &q->ex_list, next) { + if (nex == s->ex) { + DI_FT_UNLOCK(); + return; + } + } + + e = malloc(sizeof(struct di_exp), M_DIFFUSE, M_NOWAIT | M_ZERO); + if (e != NULL) { + e->ex = nex; + SLIST_INSERT_HEAD(&q->ex_list, e, next); + } + + DI_FT_UNLOCK(); +} +#endif /* _KERNEL */ + +/* + * Install state for new flow + * + * Returns NULL (failure) if state is not installed because of errors or because + * session limitations are enforced. + */ +struct di_ft_entry * +diffuse_ft_install_state(struct ip_fw *rule, ipfw_insn_features *cmd, + struct ip_fw_args *args, void *ulp, int pktlen) +{ + static int last_log; + struct di_ft_entry *q; + struct di_feature *f; + int i; +#ifdef DIFFUSE_DEBUG + struct in_addr da; +#ifdef INET6 + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; +#else + char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; +#endif +#endif + + q = NULL; + + DI_FT_WLOCK(); + +#ifdef DIFFUSE_DEBUG +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(args->f_id))) { + ip6_sprintf(src, &args->f_id.src_ip6); + ip6_sprintf(dst, &args->f_id.dst_ip6); + } else +#endif + { + da.s_addr = htonl(args->f_id.src_ip); + _inet_ntoa_r(da, src, sizeof(src)); + da.s_addr = htonl(args->f_id.dst_ip); + _inet_ntoa_r(da, dst, sizeof(dst)); + } + DID("ft %s: type %d %s %u -> %s %u", + __func__, cmd->o.opcode, src, args->f_id.src_port, dst, + args->f_id.dst_port); +#endif + + q = lookup_entry_locked(&args->f_id, NULL); + + if (q != NULL) { + if (last_log != time_uptime) { + last_log = time_uptime; + DID("ft entry already present, done"); + } + DI_FT_UNLOCK(); + return (q); + } + + if (V_ft_count >= V_ft_max) { + /* Run out of slots, try to remove any expired rule. */ + remove_entry(NULL, 1); + } + + if (V_ft_count >= V_ft_max) { + if (last_log != time_uptime) { + last_log = time_uptime; + DID("ft too many entries"); + } + DI_FT_UNLOCK(); + return (NULL); /* Cannot install, notify caller. */ + } + + q = add_entry(&args->f_id, rule); + if (!q) { + DI_FT_UNLOCK(); + return (NULL); + } + + /* Set lifetime. */ + update_lifetime(q, &args->f_id, MATCH_FORWARD); + + /* Setup everything. */ + q->fcnt = cmd->fcnt; + q->ftype = cmd->ftype; + q->sample_int = cmd->sample_int; + q->sample_prob = cmd->sample_prob; + q->pkts_after_last = cmd->sample_int; /* Ensure we match the first. */ + /* Link features. */ + memcpy(q->features, cmd->fptrs, + cmd->fcnt * sizeof(struct di_feature *)); + + for (i = 0; i < q->fcnt; i++) { + f = q->features[i]; + f->alg->init_stats(&f->conf, &q->fwd_data[i]); + if (!(f->alg->type & DI_FEATURE_ALG_BIDIRECTIONAL) && + q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL) { + f->alg->init_stats(&f->conf, &q->bck_data[i]); + } + } + + /* Update features. */ + update_features(q, &args->f_id, args, ulp, MATCH_FORWARD); + + /* Update counters. */ + q->pcnt++; + q->bcnt += pktlen; + + DI_FT_UNLOCK(); + + return (q); +} + +/* + * Add all features in cmd which are not already there and update their values. + * If DI_MAX_FEATURES reached, return error. + */ +int +diffuse_ft_update_features(struct di_ft_entry *q, ipfw_insn_features *cmd, + struct ip_fw_args *args, void *ulp) +{ + struct di_feature *f; + int i, j; + + DI_FT_WLOCK(); + + for (i = 0; i < cmd->fcnt; i++) { + for (j = 0; j < q->fcnt; j++) { + if (q->features[j] == cmd->fptrs[i]) + break; + } + if (j == q->fcnt) { + /* Add missing feature. */ + if (q->fcnt == DI_MAX_FEATURES) { + DI_FT_UNLOCK(); + return (-1); + } + + f = q->features[q->fcnt] = cmd->fptrs[i]; + f->alg->init_stats(&f->conf, &q->fwd_data[q->fcnt]); + if (!(q->features[q->fcnt]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) && + (q->ftype & DI_FLOW_TYPE_BIDIRECTIONAL)) { + f->alg->init_stats(&f->conf, + &q->bck_data[q->fcnt]); + } + q->fcnt++; + update_features(q, &args->f_id, args, ulp, MATCH_FORWARD); + } + } + + DI_FT_UNLOCK(); + + return (0); +} + +/* Flush the flow table (or flow counters). */ +void +diffuse_ft_flush(int reset_counters_only) +{ + struct di_ft_entry *prev, *q; + int i; + + DI_FT_WLOCK(); + + if (V_diffuse_ft_v == NULL || V_ft_count == 0) { + DI_FT_UNLOCK(); + return; + } + + for (i = 0; i < V_ft_curr_buckets; i++) { + for (prev = NULL, q = V_diffuse_ft_v[i]; q; ) { + if (reset_counters_only) { + q->pcnt = 0; + q->bcnt = 0; + } else { + destroy_entry(q, 1); + UNLINK_DYN_RULE(prev, V_diffuse_ft_v[i], q); + continue; + } + prev = q; + q = q->next; + } + } + + DI_FT_UNLOCK(); +} + +#ifdef _KERNEL + +/* For rule timeouts refresh x seconds before expiration. */ +#define TIME_BEFORE_EXPIRE 2 + +/* Returns with lock held, caller must unlock. */ +int +diffuse_ft_do_export(struct di_ft_entry *q, uint16_t confirm) +{ + struct di_flow_class *c; + struct timeval now; + + if (di_conf.an_rule_removal == DIP_TIMEOUT_RULE) + getmicrotime(&now); + + DI_FT_RLOCK(); + + /* Export if rule timeout used and we are close to expiry. */ + if (di_conf.an_rule_removal == DIP_TIMEOUT_RULE && + q->ex_time.tv_sec > 0 && + now.tv_sec > q->ex_time.tv_sec && + q->expire - time_uptime < TIME_BEFORE_EXPIRE) { + return (1); + } + + /* + * Do export if one classifier has confirmed consecutive classifications. + * Do not export if we have more because class is unchanged. + * XXX: Currently it can happen that we export again even if class + * hasn't changed; to prevent that we would need to check the last + * exported class for each export,classifier combination. + */ + SLIST_FOREACH(c, &q->flow_classes, next) { + if (c->confirm == confirm) + return (1); + } + + return (0); +} + +void +diffuse_ft_attach(void) +{ + + di_ft_zone = uma_zcreate("DIFFUSE flow table", + sizeof(struct di_ft_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + 0); + + DI_FT_LOCK_INIT(); +} + +void +diffuse_ft_detach(void) +{ + + uma_zdestroy(di_ft_zone); + DI_FT_LOCK_DESTROY(); +} + +#endif + +void +diffuse_ft_init(void) +{ + + V_diffuse_ft_v = NULL; + +#ifdef _KERNEL + V_ft_buckets = 256; /* Must be power of 2. */ + V_ft_curr_buckets = 256; /* Must be power of 2. */ + V_ft_max = 4096; /* Max # of flow entries. */ +#else /* ipfw_fstats */ + V_ft_buckets = 4096; /* Must be power of 2. */ + V_ft_curr_buckets = 4096; /* Must be power of 2. */ + V_ft_max = 131072; +#endif + +#ifdef _KERNEL + V_diffuse_to_v = NULL; + V_to_buckets = 512; /* Must be power of 2. */ + V_to_curr_buckets = 512; /* Must be power of 2. */ +#endif + + V_ft_ack_lifetime = 300; + V_ft_syn_lifetime = 20; + /* Change following two defaults back to 1? */ + V_ft_fin_lifetime = 2; + V_ft_rst_lifetime = 2; + V_ft_udp_lifetime = 10; + V_ft_short_lifetime = 5; +} + +void +diffuse_ft_uninit(void) +{ +#ifdef _KERNEL + struct di_to_entry *e; + int i; +#endif + + if (V_diffuse_ft_v != NULL) + free(V_diffuse_ft_v, M_DIFFUSE); + +#ifdef _KERNEL + if (V_diffuse_to_v != NULL) { + for (i = 0; i < V_to_curr_buckets; i++) { + while (!LIST_EMPTY(&V_diffuse_to_v[i])) { + e = LIST_FIRST(&V_diffuse_to_v[i]); + LIST_REMOVE(e, next); + free(e, M_DIFFUSE); + } + } + free(V_diffuse_to_v, M_DIFFUSE); + } +#endif +} + +int +diffuse_ft_entries(void) +{ + + return (V_ft_count); +} + +#ifdef _KERNEL + +static int +get_entry_size(struct di_ft_entry *p) +{ + char **names; + int j, n, needed; + + needed = 0; + + DI_FT_RLOCK_ASSERT(); + + needed += sizeof(struct di_ft_export_entry); + needed += 2 * p->fcnt * sizeof(uint8_t); + + for (j = 0; j < p->fcnt; j++) { + n = p->features[j]->alg->get_stat_names(&names); + + if (!(p->features[j]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) && + p->ftype & DI_FLOW_TYPE_BIDIRECTIONAL) { + needed += 2 * n * sizeof(int32_t); + } else { + needed += n * sizeof(int32_t); + } + } + + /* Class tags. */ + needed += p->tcnt * sizeof(struct di_ft_flow_class); + + return (needed); +} + +int +diffuse_ft_len(int expired) +{ + struct di_ft_entry *p; + int i, needed; + + needed = 0; + + if (V_diffuse_ft_v == NULL) + return (0); + + DI_FT_RLOCK(); + + for (i = 0; i < V_ft_curr_buckets; i++) { + for (p = V_diffuse_ft_v[i]; p != NULL; p = p->next) { + if (!expired && TIME_LEQ(p->expire, time_uptime)) + continue; + + needed += get_entry_size(p); + } + } + + DI_FT_UNLOCK(); + + return (needed); +} + +/* Called for show command. */ +void +diffuse_get_ft(char **pbp, const char *ep, int expired) +{ + struct di_ft_entry *p; + struct di_ft_export_entry *dst; + struct di_flow_class *s; + char *bp, **names; + uint8_t scnt[DI_MAX_FEATURES]; + int i, idx, j, needed; + int32_t *stats; + + if (V_diffuse_ft_v == NULL) + return; + + bp = *pbp; + + DI_FT_RLOCK(); + + for (i = 0; i < V_ft_curr_buckets; i++) { + for (p = V_diffuse_ft_v[i]; p != NULL; p = p->next) { + needed = 0; + + if (!expired && TIME_LEQ(p->expire, time_uptime)) + continue; + + needed = get_entry_size(p); + + if (bp + needed <= ep) { + dst = (struct di_ft_export_entry *)bp; + + dst->ruleno = p->rule->rulenum; + dst->setno = p->rule->set; + dst->pcnt = p->pcnt; + dst->bcnt = p->bcnt; + memcpy(&dst->id, &p->id, + sizeof(struct ipfw_flow_id)); + dst->expire = TIME_LEQ(p->expire, time_uptime) ? + 0 : p->expire - time_uptime; + dst->bucket = p->bucket; + dst->state = p->state; + dst->fcnt = p->fcnt; + dst->tcnt = p->tcnt; + dst->ftype = p->ftype; + dst->final = p->next == NULL ? 1 : 0; + + bp += sizeof(struct di_ft_export_entry); + + /* + * Copy feature number (as they are in the list + * copied earlier). + */ + for (j = 0; j < p->fcnt; j++) { + idx = diffuse_get_feature_idx( + p->features[j]->name); + *bp = idx; + bp++; + } + + /* Copy number of stats per feature. */ + for (j = 0; j < p->fcnt; j++) { + scnt[j] = p->features[j]->alg-> + get_stat_names(&names); + *bp = scnt[j]; + bp++; + } + + for (j = 0; j < p->fcnt; j++) { + stats = NULL; + p->features[j]->alg->get_stats( + &p->features[j]->conf, + &p->fwd_data[j], &stats); + + if (stats != NULL) { + memcpy(bp, stats, + scnt[j] * sizeof(int32_t)); + } else { + memset(bp, 0, + scnt[j] * sizeof(int32_t)); + } + + bp += scnt[j] * sizeof(int32_t); + + if (!(p->features[j]->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) && + (p->ftype & + DI_FLOW_TYPE_BIDIRECTIONAL)) { + stats = NULL; + p->features[j]->alg->get_stats( + &p->features[j]->conf, + &p->bck_data[j], &stats); + + if (stats != NULL) { + memcpy(bp, stats, + scnt[j] * + sizeof(int32_t)); + } else { + memset(bp, 0, scnt[j] * + sizeof(int32_t)); + } + + bp += scnt[j] * sizeof(int32_t); + } + } + + /* Class tags. */ + SLIST_FOREACH(s, &p->flow_classes, next) { + memcpy(bp, s->cname, DI_MAX_NAME_STR_LEN); + bp += DI_MAX_NAME_STR_LEN; + memcpy(bp, &s->class, sizeof(uint16_t)); + bp += sizeof(uint16_t); + } + } else { + goto done; + } + } + } + +done: + + DI_FT_UNLOCK(); + *pbp = bp; +} + +#endif /* _KERNEL */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/diffuse_user_compat.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/diffuse_user_compat.h Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,139 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Compatibility goo to allow kernel code to be compiled in userspace. Any file + * including this file must ensure the contents of this file are only visible if + * _KERNEL is not defined. + */ + +#ifndef _NETINET_IPFW_DIFFUSE_USER_COMPAT_H_ +#define _NETINET_IPFW_DIFFUSE_USER_COMPAT_H_ + +/* Mbuf for userspace. */ +struct mbuf { + uint8_t *data; + uint16_t len; +}; + +/* From ip_fw_private.h */ + +struct ip_fw_args { + struct mbuf *m; /* The mbuf chain. */ + struct ipfw_flow_id f_id; /* Grabbed from IP header. */ +}; + +/* Result for matching dynamic rules. */ +enum { + MATCH_REVERSE = 0, + MATCH_FORWARD, + MATCH_NONE, + MATCH_UNKNOWN, +}; + +/* From diffuse_private.h */ + +struct di_feature { + char *name; /* Instance name. */ + struct di_cdata conf; + struct di_feature_alg *alg; /* Feature algo ptr. */ +}; + +struct di_ft_entry { + struct di_ft_entry *next; /* Linked list of rules. */ + struct ip_fw *rule; /* Used to pass up the rule number. */ + struct ipfw_flow_id id; /* (masked) flow id. */ + uint64_t pcnt; /* Packet match counter. */ + uint64_t bcnt; /* Byte match counter. */ + uint32_t expire; /* Expire time. */ + uint32_t bucket; /* Which bucket in hash table. */ + uint32_t state; /* State of this rule (typically a + * combination of TCP flags). */ + uint8_t ftype; /* Bidir vs unidir, match limiting. */ + uint8_t fcnt; /* Number of features. */ + uint8_t tcnt; /* Number of class tags. */ + uint16_t sample_int; /* Sample interval. */ + uint32_t sample_prob; /* Sample probability. */ + uint16_t pkts_after_last; /* Match limiting: packets n */ + struct di_feature *features[DI_MAX_FEATURES]; /* Feature ptrs. */ + struct di_fdata fwd_data[DI_MAX_FEATURES]; + struct di_fdata bck_data[DI_MAX_FEATURES]; + uint64_t flow_id; + struct timeval start_time; /* Timestamp of first packet. */ +}; + +struct di_feature_stat_ptr { + struct di_feature *fptr; /* Feature ptr. */ + uint8_t sidx; /* Statistic index. */ +}; + +/* Redefined to get pointer to alg definition. */ +/* XXX: Maybe better to call register function here? */ +#define DECLARE_DIFFUSE_FEATURE_MODULE(fname, fstruct) \ +struct di_feature_alg * diffuse_feature_##fname##_kmodule() \ +{ \ + return (fstruct); \ +} + +#define VNET_DEFINE(t, n) t n +#define VNET(n) n + +#define DI_FT_LOCK_INIT() +#define DI_FT_LOCK_DESTROY() +#define DI_FT_RLOCK() +#define DI_FT_WLOCK() +#define DI_FT_UNLOCK() +#define DI_FT_RLOCK_ASSERT() +#define DI_FT_WLOCK_ASSERT() +#define DI_FT_LOCK_ASSERT() + +/* #define this if you need to compile kernel code in userspace. */ +#ifdef KPI_USER_COMPAT + +/* Functions hacks. */ +#define free(s, n) free(s) +#define getmicrouptime(t) mygettimeofday(t, NULL) +#define m_length(m, p) m->len +#define malloc(s, n, f) calloc(s, 1) +#define microuptime(t) mygettimeofday(t, NULL) +#define mtod(m, p) (p) m->data +#define time_uptime mytime(NULL) +#define uma_zfree(z, p) free(p, M_DIFFUSE) + +time_t mytime(time_t *t); +int mygettimeofday(struct timeval *tp, struct timezone *tzp); + +#endif /* KPI_USER_COMPAT */ + +#endif /* _NETINET_IPFW_DIFFUSE_USER_COMPAT_H_ */ diff -r 3abcaa91ffe8 sys/netinet/ipfw/ip_diffuse.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/netinet/ipfw/ip_diffuse.c Tue Oct 04 22:36:46 2011 +1100 @@ -0,0 +1,2492 @@ +/*- + * Copyright (c) 2010-2011 + * Swinburne University of Technology, Melbourne, Australia. + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Sebastian Zander, made + * possible in part by a gift from The Cisco University Research Program Fund, a + * corporate advised fund of Silicon Valley Community Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Machine learning classsifier and remote action nodes support (DIFFUSE) + * http://www.caia.swin.edu.au/urp/diffuse + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_DIFFUSE, "DIFFUSE", "DIFFUSE heap"); + +/* Global config. */ +struct di_parms di_config; + +/* Callout stuff for timeouts. */ +static struct callout di_timeout; + +static void +diffuse_timeout(void *unused __unused) +{ + struct di_export *ex; + + if (di_config.an_rule_removal == DIP_TIMEOUT_NONE) + diffuse_ft_check_timeouts(diffuse_export_add_rec); + + DI_RLOCK(); + + /* Kick the actual export function for each export. */ + LIST_FOREACH(ex, &di_config.export_list, next) { + diffuse_export_send(ex); + } + + DI_UNLOCK(); + + callout_reset(&di_timeout, hz / 10, diffuse_timeout, NULL); +} + +#ifdef SYSCTL_NODE +SYSBEGIN(xxx) +SYSCTL_DECL(_net_inet); +SYSCTL_DECL(_net_inet_ip); +SYSCTL_DECL(_net_inet_ip_diffuse); +SYSCTL_NODE(_net_inet_ip, OID_AUTO, diffuse, CTLFLAG_RW, 0, "DIFFUSE"); +SYSEND +#endif + +/* + * Feature code. + */ + +static struct di_feature * +search_feature_instance(const char *name) +{ + struct di_feature *s = NULL; + + DI_LOCK_ASSERT(); + + LIST_FOREACH(s, &di_config.feature_inst_list, next) { + if (strcmp(s->name, name) == 0) + return (s); + } + + return (NULL); +} + +/* Return 1 if there is any feature instance that is still used. */ +static int +feature_instance_used(void) +{ + struct di_feature *s; + + DI_RLOCK(); + + LIST_FOREACH(s, &di_config.feature_inst_list, next) { + if (s->ref_count > 0) { + DI_UNLOCK(); + return (1); + } + } + + DI_UNLOCK(); + + return (0); +} + +static struct di_feature_alg * +search_feature_alg(const char *name) +{ + struct di_feature_alg *s; + + DI_LOCK_ASSERT(); + + SLIST_FOREACH(s, &di_config.feature_list, next) { + if (strcmp(s->name, name) == 0) + return (s); + } + + return (NULL); +} + +/* Return index to feature, used for flowtable show. */ +int +diffuse_get_feature_idx(const char *name) +{ + struct di_feature *s; + int c; + + c = 0; /* Index into feature list. */ + + DI_LOCK_ASSERT(); + + LIST_FOREACH(s, &di_config.feature_inst_list, next) { + if (strcmp(s->name, name) == 0) + return (c); + + c++; + } + + return (-1); +} + +static int +add_feature_instance(const char *alg_name, const char *name, + struct di_oid *params) +{ + struct di_feature *s, *f; + struct di_feature_alg *a; + + DI_WLOCK_ASSERT(); + + DID("feature add %s %s", alg_name, name); + + /* Check if instance exists. */ + s = search_feature_instance(name); + + if (s == NULL) { + /* Check algo exits. */ + a = search_feature_alg(alg_name); + if (a == NULL) { + DID("feature %s algo doesn't exist", alg_name); + return (1); + } + + f = malloc(sizeof(struct di_feature), M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (f == NULL) + return (ENOMEM); + + f->name = malloc(strlen(name) + 1, M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (f->name == NULL) { + free(f, M_DIFFUSE); + return (ENOMEM); + } + strcpy(f->name, name); + f->ref_count = 0; + f->alg = a; + if (f->alg->init_instance(&f->conf, params)) { + free(f->name, M_DIFFUSE); + free(f, M_DIFFUSE); + return (ENOMEM); + } + + LIST_INSERT_HEAD(&di_config.feature_inst_list, f, next); + + a->ref_count++; + di_config.feature_count++; + + return (0); + } else { + DID("feature %s already exists", name); + return (1); + } +} + +static int +remove_feature_instance(char *name) +{ + struct di_feature *s, *tmp; + + DI_WLOCK_ASSERT(); + + LIST_FOREACH_SAFE(s, &di_config.feature_inst_list, next, tmp) { + if (!strcmp(s->name, name) && s->ref_count == 0) { + LIST_REMOVE(s, next); + free(s->name, M_DIFFUSE); + s->alg->destroy_instance(&s->conf); + s->alg->ref_count--; + free(s, M_DIFFUSE); + di_config.feature_count--; + return (0); + } + } + + DID("feature %s can't remove", name); + return (1); +} + +/* + * If name == NULL we try to remove all instances, otherwise we only remove + * instances of the particular module. + */ +static int +diffuse_remove_feature_instances(const char *name, int force) +{ + struct di_feature *tmp, *r; + + DI_WLOCK(); + + LIST_FOREACH_SAFE(r, &di_config.feature_inst_list, next, tmp) { + if ((name == NULL || !strcmp(r->alg->name, name)) && + (r->ref_count == 0 || force)) { + LIST_REMOVE(r, next); + free(r->name, M_DIFFUSE); + r->alg->destroy_instance(&r->conf); + r->alg->ref_count--; + free(r, M_DIFFUSE); + di_config.feature_count--; + } + } + + DI_UNLOCK(); + + return (0); +} + +/* + * Classifier code. + */ + +static struct di_classifier * +search_classifier_instance(const char *name) +{ + struct di_classifier *s; + + DI_LOCK_ASSERT(); + + LIST_FOREACH(s, &di_config.classifier_inst_list, next) { + if (strcmp(s->name, name) == 0) + return (s); + } + + return (NULL); +} + +static struct di_classifier_alg * +search_classifier_alg(const char *name) +{ + struct di_classifier_alg *s; + + DI_LOCK_ASSERT(); + + SLIST_FOREACH(s, &di_config.classifier_list, next) { + if (strcmp(s->name, name) == 0) + return (s); + } + + return (NULL); +} + +/* Return 1 if there is any feature instance that is still used. */ +static int +classifier_instance_used(void) +{ + struct di_classifier *s; + + DI_WLOCK(); + + LIST_FOREACH(s, &di_config.classifier_inst_list, next) { + if (s->ref_count > 0) { + DI_UNLOCK(); + return (1); + } + } + + DI_UNLOCK(); + + return (0); +} + +static int +add_classifier_instance(struct di_ctl_classifier *class, struct di_oid *params) +{ + static uint32_t clid_counter = 0; + struct di_classifier *s, *c; + struct di_classifier_alg *a; + char *alg_name, *name; + int needed, ret; + + alg_name = class->mod_name; + name = class->name; + ret = 0; + + DI_WLOCK_ASSERT(); + + DID("classifier add %s %s", alg_name, name); + + /* Check if instance exists. */ + s = search_classifier_instance(name); + + if (s == NULL) { + /* Check algo exits. */ + a = search_classifier_alg(alg_name); + if (a == NULL) { + DID("classifier %s algo doesn't exist", alg_name); + return (1); + } + + c = malloc(sizeof(struct di_classifier), M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (c == NULL) + return (ENOMEM); + + if ((ret = a->init_instance(&c->conf, params))) + goto error; + + /* Check number of features. */ + needed = a->get_feature_cnt(&c->conf); + if (class->fscnt < needed) { + DID("classifier %s needs %d features", name, needed); + ret = EINVAL; + goto error; + } + if (class->fscnt > needed) + DID("classifier %s needs %d features, ignoring rest", + name, needed); + + c->name = malloc(strlen(name) + 1, M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (c->name == NULL) { + ret = ENOMEM; + goto error; + } + + strcpy(c->name, name); + c->fscnt = class->fscnt; + c->ccnt = class->ccnt; + c->fstats = malloc((c->fscnt + c->ccnt) * + sizeof(struct di_feature_stat), M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (c->fstats == NULL) { + ret = ENOMEM; + goto error; + } + + memcpy(c->fstats, class->fstats, (c->fscnt + c->ccnt) * + sizeof(struct di_feature_stat)); + + c->fstats_ptr = malloc(c->fscnt * + sizeof(struct di_feature_stat_ptr), M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (c->fstats_ptr == NULL) { + ret = ENOMEM; + goto error; + } + + c->ref_count = 0; + c->alg = a; + c->confirm = class->confirm; + c->id = clid_counter++; + + LIST_INSERT_HEAD(&di_config.classifier_inst_list, c, next); + + a->ref_count++; + di_config.classifier_count++; + + return (0); + +error: + free(c->name, M_DIFFUSE); + free(c->fstats, M_DIFFUSE); + free(c->fstats_ptr, M_DIFFUSE); + a->destroy_instance(&c->conf); + free(c, M_DIFFUSE); + + return (ret); + } else { + DID("classifier %s already exists", name); + return (1); + } +} + +static int +remove_classifier_instance(char *name) +{ + struct di_classifier *s, *tmp; + + DI_WLOCK_ASSERT(); + + LIST_FOREACH_SAFE(s, &di_config.classifier_inst_list, next, tmp) { + if (!strcmp(s->name, name) && s->ref_count == 0) { + LIST_REMOVE(s, next); + free(s->name, M_DIFFUSE); + free(s->fstats, M_DIFFUSE); + free(s->fstats_ptr, M_DIFFUSE); + s->alg->destroy_instance(&s->conf); + s->alg->ref_count--; + free(s, M_DIFFUSE); + di_config.classifier_count--; + + return (0); + } + } + + DID("classifier %s can't remove", name); + return (1); +} + +/* + * If name == NULL we try to remove all instances, otherwise we only remove + * instances of the particular module. + */ +static int +diffuse_remove_classifier_instances(const char *name, int force) +{ + struct di_classifier *tmp, *r; + + DI_WLOCK(); + + LIST_FOREACH_SAFE(r, &di_config.classifier_inst_list, next, tmp) { + if ((name == NULL || !strcmp(r->alg->name, name)) && + (r->ref_count == 0 || force)) { + LIST_REMOVE(r, next); + free(r->name, M_DIFFUSE); + free(r->fstats, M_DIFFUSE); + free(r->fstats_ptr, M_DIFFUSE); + r->alg->destroy_instance(&r->conf); + r->alg->ref_count--; + free(r, M_DIFFUSE); + di_config.classifier_count--; + } + } + + DI_UNLOCK(); + + return (0); +} + +/* + * Export code. + */ + +static struct di_export * +search_export_instance(const char *name) +{ + struct di_export *s; + + DI_LOCK_ASSERT(); + + LIST_FOREACH(s, &di_config.export_list, next) { + if (strcmp(s->name, name) == 0) + return (s); + } + + return (NULL); +} + +/* Return 1 if there is any feature instance that is still used. */ +static int +export_instance_used(void) +{ + struct di_export *s; + + DI_WLOCK(); + + LIST_FOREACH(s, &di_config.export_list, next) { + if (s->ref_count > 0) { + DI_UNLOCK(); + return (1); + } + } + + DI_UNLOCK(); + + return (0); +} + +static int +add_export_instance(const char *name, struct di_export *e_conf, + struct socket *sock) +{ + struct di_export *s, *e; + + DI_WLOCK_ASSERT(); + + DID("export add %s", name); + + /* Check if instance exists. */ + s = search_export_instance(name); + + if (s == NULL) { + e = malloc(sizeof(struct di_export), M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (e == NULL) + return (ENOMEM); + + e->name = malloc(strlen(name) + 1, M_DIFFUSE, + M_NOWAIT | M_ZERO); + if (e->name == NULL) { + free(e, M_DIFFUSE); + return (ENOMEM); + } + + strcpy(e->name, name); + e->ref_count = 0; + e->conf = e_conf->conf; + e->sock = NULL; + e->seq_no = 0; + e->last_pkt_time.tv_sec = 0; + e->last_pkt_time.tv_usec = 0; + e->mh = NULL; + e->mt = NULL; + e->sock = sock; + + LIST_INSERT_HEAD(&di_config.export_list, e, next); + di_config.export_count++; + + return (0); + } else { + DID("export %s already exists", name); + return (1); + } +} + +static int +remove_export_instance(char *name) +{ + struct di_export *s, *tmp; + + DI_WLOCK_ASSERT(); + + LIST_FOREACH_SAFE(s, &di_config.export_list, next, tmp) { + if (s != NULL && s->ref_count == 0) { + LIST_REMOVE(s, next); + + diffuse_export_close(s->sock); + + free(s->name, M_DIFFUSE); + if (s->mh != NULL) + m_freem(s->mh); + free(s, M_DIFFUSE); + di_config.export_count--; + + return (0); + } + } + + DID("export %s can't remove", name); + return (1); +} + +static int +diffuse_remove_export_instances(int force) +{ + struct di_export *tmp, *r; + + DI_WLOCK(); + + LIST_FOREACH_SAFE(r, &di_config.export_list, next, tmp) { + if (r->ref_count == 0 || force) { + LIST_REMOVE(r, next); + DID("closing socket for export %s\n", r->name); + diffuse_export_close(r->sock); + free(r->name, M_DIFFUSE); + if (r->mh != NULL) + m_freem(r->mh); + free(r, M_DIFFUSE); + di_config.export_count--; + } + } + + DI_UNLOCK(); + + return (0); +} + +/* + * Code called from IPFW. + */ + +static int +chk_features(ipfw_insn_features *cmd) +{ + int c; + + DI_WLOCK_ASSERT(); + + for (c = 0; c < cmd->fcnt; c++) { + cmd->fptrs[c] = search_feature_instance(cmd->fnames[c]); + if (cmd->fptrs[c] == NULL) { + DID("feature %s doesn't exist", cmd->fnames[c]); + return (1); + } + } + + return (0); +} + +static int +chk_export(ipfw_insn_export *cmd) +{ + DI_WLOCK_ASSERT(); + + cmd->eptr = search_export_instance(cmd->ename); + if (cmd->eptr == NULL) { + DID("export %s doesn't exist", cmd->ename); + return (1); + } + + return (0); +} + +static int +chk_classifier(ipfw_insn_ml_classify *cmd, ipfw_insn_features *fcmd) +{ + struct di_classifier *class; + char **snames; + int i, j, scnt; + + scnt = 0; + + DI_WLOCK_ASSERT(); + + class = cmd->clptr = search_classifier_instance(cmd->cname); + if (cmd->clptr == NULL) { + DID("classifier %s doesn't exist", cmd->cname); + return (1); + } + + /* Add features to list and get indices. */ + for (i = 0; i < class->fscnt; i++) { + class->fstats_ptr[i].fptr = NULL; + + for (j = 0; j < fcmd->fcnt; j++) { + if (!strcmp(class->fstats[i].fname, fcmd->fnames[j])) { + class->fstats_ptr[i].fptr = fcmd->fptrs[j]; + break; + } + } + + if (class->fstats_ptr[i].fptr == NULL) { + /* Add feature to feature list if exists. */ + fcmd->fptrs[fcmd->fcnt] = + search_feature_instance(class->fstats[i].fname); + if (fcmd->fptrs[fcmd->fcnt] == NULL) { + DID("feature %s doesn't exist", + class->fstats[i].fname); + return (1); + } + + strcpy(fcmd->fnames[fcmd->fcnt], + class->fstats[i].fname); + class->fstats_ptr[i].fptr = fcmd->fptrs[fcmd->fcnt]; + fcmd->fcnt++; + } + + /* + * If direction is indicated or feature module expects + * bidirectional, must set bidirectional. + */ + if (class->fstats[i].fdir == DI_MATCH_DIR_BCK || + class->fstats[i].fdir == DI_MATCH_DIR_FWD || + class->fstats_ptr[i].fptr->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) { + fcmd->ftype |= DI_FLOW_TYPE_BIDIRECTIONAL; + } + /* Can't have direction with bidirectional module. */ + if (class->fstats[i].fdir != DI_MATCH_DIR_NONE && + class->fstats_ptr[i].fptr->alg->type & + DI_FEATURE_ALG_BIDIRECTIONAL) { + class->fstats[i].fdir = DI_MATCH_DIR_NONE; + } + } + + /* Get statistics indices. */ + for (i = 0; i < class->fscnt; i++) { + scnt = class->fstats_ptr[i].fptr->alg->get_stat_names(&snames); + class->fstats_ptr[i].sidx = 0; + + for (j = 0; j < scnt; j++) { + if (!strcmp(class->fstats[i].sname, snames[j])) + break; + + class->fstats_ptr[i].sidx++; + } + if (class->fstats_ptr[i].sidx >= scnt) { + DID("feature %s has no statistic %s", + class->fstats[i].fname, class->fstats[i].sname); + + return (1); + } + } + + return (0); +} + +static int +chk_class_tags(ipfw_insn_class_tags *cmd) +{ + int class_cnt; + + DI_WLOCK_ASSERT(); + + cmd->clptr = search_classifier_instance(cmd->cname); + if (cmd->clptr == NULL) { + DID("classifier %s doesn't exist", cmd->cname); + return (1); + } + + class_cnt = cmd->clptr->alg->get_class_cnt(&cmd->clptr->conf); + if (cmd->tcnt > class_cnt) { + DID("cannot have more tags (%d) than classifier %s has " + "classes (%d)", cmd->tcnt, cmd->cname, class_cnt); + return (1); + } + + return (0); +} + +static int +chk_match_if_class(ipfw_insn_match_if_class *cmd) +{ + int i, j; + + DI_WLOCK_ASSERT(); + + cmd->clptr = search_classifier_instance(cmd->cname); + if (cmd->clptr == NULL) { + DID("classifier %s doesn't exist", cmd->cname); + return (1); + } + + /* Get class numbers from names. */ + for (i = 0; i < cmd->mcnt; i++) { + if (cmd->clnames[i][0] != DI_CLASS_NO_CHAR) { + for (j = 0; j < cmd->clptr->ccnt; j++) { + if (!strcmp(cmd->clnames[i], + cmd->clptr-> + fstats[cmd->clptr->fscnt + j].fname)) { + break; + } + } + if (j < cmd->clptr->ccnt) { + cmd->match_classes[i] = j; + } else { + DID("classifier %s has no class %s", + cmd->cname, cmd->clnames[i]); + return (1); + } + } + } + + return (0); +} + +/* We can assume that we have had a prior ipfw_insn_features. */ +static int +chk_feature_match(ipfw_insn_feature_match *cmd, ipfw_insn_features *fcmd) +{ + char **snames; + int i, scnt; + + scnt = 0; + + DI_WLOCK_ASSERT(); + + cmd->fptr = NULL; + for (i = 0; i < fcmd->fcnt; i++) { + if (!strcmp(cmd->fname, fcmd->fnames[i])) { + cmd->fptr = fcmd->fptrs[i]; + break; + } + } + if (cmd->fptr == NULL) { + DID("feature %s doesn't exist", cmd->fname); + return (1); + } + + scnt = cmd->fptr->alg->get_stat_names(&snames); + cmd->sidx = 0; + for (i = 0; i < scnt; i++) { + if (!strcmp(cmd->sname, snames[i])) + break; + + cmd->sidx++; + } + if (cmd->sidx >= scnt) { + DID("feature %s has no statistic called %s", + cmd->fname, cmd->sname); + + return (1); + } + + /* Can't have direction with bidirectional module. */ + if (cmd->fdir != DI_MATCH_DIR_NONE&& + cmd->fptr->alg->type & DI_FEATURE_ALG_BIDIRECTIONAL) { + cmd->fdir = DI_MATCH_DIR_NONE; + } + + return (0); +} + +static int +ref_cnt_features(ipfw_insn_features *cmd, int inc) +{ + int c; + + DI_WLOCK_ASSERT(); + + for (c = 0; c < cmd->fcnt; c++) { + /* Pointers have been set before in chk_features. */ + if (inc) + cmd->fptrs[c]->ref_count++; + else + cmd->fptrs[c]->ref_count--; + + DID("feature %s ref cnt %d", cmd->fptrs[c]->name, + cmd->fptrs[c]->ref_count); + } + + return (0); +} + +static int +ref_cnt_classifier(ipfw_insn_ml_classify *cmd, int inc) +{ + DI_WLOCK_ASSERT(); + + /* Pointer has been set before in chk_classifier. */ + if (inc) + cmd->clptr->ref_count++; + else + cmd->clptr->ref_count--; + + DID("classifier %s ref cnt %d", cmd->cname, cmd->clptr->ref_count); + + return (0); +} + +static int +ref_cnt_export(ipfw_insn_export *cmd, int inc) +{ + DI_WLOCK_ASSERT(); + + /* Pointer has been set before in chk_export */ + if (inc) + cmd->eptr->ref_count++; + else + cmd->eptr->ref_count--; + + DID("export %s ref cnt %d\n", cmd->ename, cmd->eptr->ref_count); + + return (0); +} + +static int +diffuse_chk_rule_cmd(struct di_chk_rule_cmd_args *di_args, ipfw_insn *cmd, + int *have_action) +{ + switch(cmd->opcode) { + case O_DI_BEFORE_RULE_CHK: + /* Prior to checking any commands. */ + break; + + case O_DI_AFTER_RULE_CHK: + /* All rule instructions have been successfully checked. */ + break; + + case O_DI_FEATURES: + case O_DI_FEATURES_IMPLICIT: + if (cmd->len != F_INSN_SIZE(ipfw_insn_features)) + goto bad_size; + break; + + case O_DI_FLOW_TABLE: + if (cmd->len != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + break; + + case O_DI_FEATURE_MATCH: + if (cmd->len != F_INSN_SIZE(ipfw_insn_feature_match)) + goto bad_size; + break; + + case O_DI_ML_CLASSIFY: + case O_DI_ML_CLASSIFY_IMPLICIT: + if (cmd->len != F_INSN_SIZE(ipfw_insn_ml_classify)) + goto bad_size; + if (cmd->opcode == O_DI_ML_CLASSIFY) + *have_action = 1; + break; + + case O_DI_CLASS_TAGS: + break; + + case O_DI_EXPORT: + if (cmd->len != F_INSN_SIZE(ipfw_insn_export)) + goto bad_size; + *have_action = 1; + break; + + case O_DI_MATCH_IF_CLASS: + if (cmd->len <= F_INSN_SIZE(ipfw_insn_match_if_class)) + goto bad_size; + break; + + default: + return (-1); + } + + return (0); + +bad_size: + DID("opcode %d size %d wrong", cmd->opcode, cmd->len); + + return (EINVAL); +} + +/* Add IPFW tag to packet. */ +static void +ipfw_tag_packet(struct mbuf *m, uint16_t tag) +{ + struct m_tag *mtag; + + mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); + if (mtag == NULL) { + if ((mtag = m_tag_alloc(MTAG_IPFW, tag, 0, M_NOWAIT)) != NULL) + m_tag_prepend(m, mtag); + } +} + +static int +diffuse_chk_pkt_cmd(struct di_chk_pkt_args *di_args, struct ip_fw *f, + int opcode, ipfw_insn **cmd, int *cmdlen, struct ip_fw_args *args, + void *ulp, int pktlen, int *dir, int *match, int *l, int *done, int *retval) +{ + + switch(opcode) { + case O_DI_BEFORE_ALL_RULES: + /* + * We cache flow entry q for quick access to feature data, as q + * cannot be deleted while in this function. Need to take care + * when reading/writing from q! + */ + di_args->q = NULL; + di_args->tcmd = NULL; + di_args->no_class = 0; + DI_RLOCK(); + break; + + case O_DI_FEATURES: + case O_DI_FEATURES_IMPLICIT: + { + *match = 1; + + if (*dir == MATCH_UNKNOWN || *dir == MATCH_NONE) { + if ((di_args->q = diffuse_ft_install_state(f, + (ipfw_insn_features *)*cmd, args, ulp, + pktlen)) == NULL) { + /* Exit. */ + *retval = IP_FW_DENY; + *l = 0; + *done = 1; + } + } else if (di_args->q) { + /* + * Check if we need to add further features for + * this rule. + */ + if (diffuse_ft_update_features(di_args->q, + (ipfw_insn_features *)*cmd, args, + ulp) < 0) { + *match = 0; + *l = 0; + } + } + } + break; + + case O_DI_FLOW_TABLE: + { + *match = 1; + + /* Lookup will trigger feature update. */ + if (*dir != MATCH_UNKNOWN || + (di_args->q = diffuse_ft_lookup_entry(&args->f_id, args, + ulp, pktlen, dir)) == NULL) { + break; + } + + if (di_args->q->ftype & DI_MATCH_ONCE) { + di_args->no_class = 1; + + } else if (di_args->q->ftype & DI_MATCH_SAMPLE_REG) { + if (di_args->q->pkts_after_last < + di_args->q->sample_int) { + di_args->q->pkts_after_last++; + di_args->no_class = 1; + } else { + di_args->q->pkts_after_last = 1; + } + + } else if (di_args->q->ftype & DI_MATCH_SAMPLE_RAND) { + if (random() >= di_args->q->sample_prob) + di_args->no_class = 1; + + } else if (di_args->q->ftype & DI_MATCH_ONCE_CLASS) { + if (!SLIST_EMPTY(&di_args->q->flow_classes)) + di_args->no_class = 1; + + } else if (di_args->q->ftype & DI_MATCH_ONCE_EXP) { + if (!SLIST_EMPTY(&di_args->q->ex_list)) + di_args->no_class = 1; + } + + diffuse_ft_unlock(); + } + break; + + case O_DI_AFTER_EACH_RULE: + /* Called after every rule. */ + break; + + case O_DI_FEATURE_MATCH: + { + ipfw_insn_feature_match *fm; + int ret; + int32_t val; + + ret = 0; + *match = 0; + fm = (ipfw_insn_feature_match *)*cmd; + + ret = diffuse_ft_get_stat(di_args->q, fm->fdir, fm->fptr, + fm->sidx, &val); + if (!ret) + break; + + switch(fm->comp) { + case DI_COMP_LT: + *match = val < fm->thresh ? 1 : 0; + break; + case DI_COMP_LE: + *match = val <= fm->thresh ? 1 : 0; + break; + case DI_COMP_EQ: + *match = val == fm->thresh ? 1 : 0; + break; + case DI_COMP_GE: + *match = val >= fm->thresh ? 1 : 0; + break; + case DI_COMP_GT: + *match = val > fm->thresh ? 1 : 0; + break; + default: + printf("invalid comparator\n"); + } + } + break; + + case O_DI_ML_CLASSIFY: + case O_DI_ML_CLASSIFY_IMPLICIT: + { + ipfw_insn_ml_classify *clcmd = (ipfw_insn_ml_classify *)*cmd; + struct di_class_tag *ctag; + struct di_classifier *cl; + int32_t fvec[clcmd->clptr->fscnt]; + int class, confirm, prev_class, ret; + + cl = clcmd->clptr; + confirm = ret = 0; + class = prev_class = -1; + *match = 1; + + /* + * We can have multiple tags per mbuf, but only one tag per + * classifier. + */ + ctag = (struct di_class_tag *)m_tag_locate(args->m, + MTAG_DIFFUSE_CLASS, cl->id, NULL); + if (ctag != NULL) { + if (di_args->tcmd && + di_args->tcmd->tcnt > ctag->class) { + /* + * Ensure we get IPFW tags based on previous + * classification. + */ + ipfw_tag_packet(args->m, + di_args->tcmd->tags[ctag->class]); + } + goto done; + } + + /* Get existing class (if any). */ + class = diffuse_ft_get_class(di_args->q, cl->name, &prev_class, + &confirm); + DID2("old class: %d", class); + if (!di_args->no_class || class == -1) { + ret = diffuse_ft_get_stats(di_args->q, cl->fscnt, + cl->fstats, cl->fstats_ptr, fvec); + if (ret) { + class = cl->alg->classify(&cl->conf, fvec, + cl->fscnt); + DID2("new class: %d", class); + + /* Add class tag in flow table. */ + confirm = cl->confirm; + diffuse_ft_add_class(di_args->q, cl->name, + class, &prev_class, &confirm); + } + } + + if (class != -1) { + if ((ctag = (struct di_class_tag *)m_tag_alloc( + MTAG_DIFFUSE_CLASS, + cl->id, + sizeof(struct di_class_tag) - sizeof(struct m_tag), + M_NOWAIT)) != NULL) { + ctag->class = class; + ctag->prev_class = prev_class; + ctag->confirm = confirm; + m_tag_prepend(args->m, (struct m_tag *)ctag); + } + + if (di_args->tcmd && di_args->tcmd->tcnt > class) { + /* Tag using IPFW tag. */ + ipfw_tag_packet(args->m, + di_args->tcmd->tags[class]); + } + } + +done: + if (opcode == O_DI_ML_CLASSIFY) { + /* Update stats, but only if explicit action.*/ + f->pcnt++; + f->bcnt += pktlen; + f->timestamp = time_uptime; + *l = 0; /* Continue with next rule. */ + } + + } + break; + + case O_DI_CLASS_TAGS: + *match = 1; + di_args->tcmd = (ipfw_insn_class_tags *)*cmd; + break; + + case O_DI_EXPORT: + { + ipfw_insn_export *ex; + struct di_export_rec *ex_rec; + + ex = (ipfw_insn_export *)*cmd; + *match = 1; + + if (di_args->q != NULL) { + ex_rec = NULL; + + if (diffuse_ft_do_export(di_args->q, + ex->eptr->conf.confirm)) { + ex_rec = diffuse_export_add_rec(di_args->q, + ex->eptr, 1); + } + diffuse_ft_unlock(); + diffuse_ft_add_export(di_args->q, ex_rec, ex->eptr); + + /* Update stats. */ + f->pcnt++; + f->bcnt += pktlen; + f->timestamp = time_uptime; + } + + *l = 0; /* Continue with next rule. */ + } + break; + + case O_DI_AFTER_ALL_RULES: + { + /* + * Do this bit after rule parsing outside lock, if we wanted to + * use in-kernel shortcut to add/remove rules it must be done + * after IPFW_RUNLOCK(&V_layer3_chain) + */ + + DI_UNLOCK(); + + /* Limit total number of recs. */ + diffuse_export_prune_recs(); + + DID2("export recs %u", di_config.export_rec_count); + + } + break; + + case O_DI_MATCH_IF_CLASS: + { + ipfw_insn_match_if_class *mic; + struct di_class_tag *ctag; + int i; + + mic = (ipfw_insn_match_if_class *)*cmd; + *match = 0; + +#ifdef DIFFUSE_DEBUG2 + { + struct m_tag *t; + for (t = m_tag_first(args->m); t != NULL; + t = m_tag_next(args->m, t)) { + DID2("tag id %d len %d", t->m_tag_id, + t->m_tag_len); + } + } +#endif + + ctag = (struct di_class_tag *)m_tag_locate(args->m, + MTAG_DIFFUSE_CLASS, mic->clptr->id, NULL); + if (ctag != NULL) { + for (i = 0; i < mic->mcnt; i++) { + if ((ctag->confirm >= mic->clptr->confirm && + ctag->class == mic->match_classes[i]) || + (mic->clptr->confirm > 0 && + ctag->confirm < mic->clptr->confirm && + ctag->prev_class == + mic->match_classes[i])) { + *match = 1; + break; + } + } + } + } + break; + + default: + return (-1); + } + + return (0); +} + +static void +diffuse_remove_rule(struct ip_fw *rule) +{ + int l, cmdlen; + ipfw_insn *cmd; + + cmdlen = 0; + + /* Decrease features ref counters. */ + for (l = rule->cmd_len, cmd = rule->cmd; l > 0; l -= cmdlen, + cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + /* + * We don't need a check here on cmdlen because the checks were + * done previously in ipfw_chk_struct. + */ + + /* + * Don't need to handle O_DI_FEATURE_MATCH or O_DI_MATCH_IF + * because they generate O_DI_FEATURES, O_DI_ML_CLASSIFY. + */ + + switch(cmd->opcode) { + case O_DI_FEATURES: + case O_DI_FEATURES_IMPLICIT: + DI_WLOCK(); + ref_cnt_features((ipfw_insn_features *)cmd, 0); + DI_UNLOCK(); + break; + + case O_DI_ML_CLASSIFY: + case O_DI_ML_CLASSIFY_IMPLICIT: + DI_WLOCK(); + ref_cnt_classifier((ipfw_insn_ml_classify *)cmd, 0); + DI_UNLOCK(); + break; + + case O_DI_EXPORT: + DI_WLOCK(); + ref_cnt_export((ipfw_insn_export *)cmd, 0); + DI_UNLOCK(); + /* Remove all export records associated with rule. */ + diffuse_export_remove_recs( + ((ipfw_insn_export *)cmd)->ename); + break; + } + } + + /* Remove all entries in flow table associated with rule. */ + diffuse_ft_remove_entries(rule); +} + +/* We assume that past this function rule adding WILL succeed. */ +static int +diffuse_add_rule(struct ip_fw *rule) +{ + ipfw_insn_features *fcmd; + ipfw_insn *cmd; + int cmdlen, l; + + fcmd = NULL; + cmdlen = 0; + + DI_WLOCK(); + + /* + * First pass: link rules to instances of features, classifiers, + * exports. + */ + for (l = rule->cmd_len, cmd = rule->cmd; l > 0; l -= cmdlen, + cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + /* + * We don't need check here on cmdlen because the checks were + * done previously in ipfw_chk_struct. + */ + + switch(cmd->opcode) { + case O_DI_FEATURES: + case O_DI_FEATURES_IMPLICIT: + if (chk_features((ipfw_insn_features *)cmd)) + goto bad; + + fcmd = (ipfw_insn_features *)cmd; + break; + + case O_DI_FEATURE_MATCH: + if (fcmd == NULL) { + /* Must have _previous_ feature instruction. */ + goto bad; + } + if (chk_feature_match((ipfw_insn_feature_match *)cmd, + fcmd)) { + goto bad; + } + break; + + case O_DI_ML_CLASSIFY: + case O_DI_ML_CLASSIFY_IMPLICIT: + if (fcmd == NULL) { + /* Must have _previous_ feature instruction. */ + goto bad; + } + + if (chk_classifier((ipfw_insn_ml_classify *)cmd, + fcmd)) { + goto bad; + } + break; + + case O_DI_CLASS_TAGS: + if (chk_class_tags((ipfw_insn_class_tags *)cmd)) + goto bad; + break; + + case O_DI_EXPORT: + if (chk_export((ipfw_insn_export *)cmd)) + goto bad; + break; + + case O_DI_MATCH_IF_CLASS: + if (chk_match_if_class((ipfw_insn_match_if_class *)cmd)) + goto bad; + break; + } + } + + cmdlen = 0; + + /* Second pass: increase ref counters. */ + for (l = rule->cmd_len, cmd = rule->cmd; l > 0; l -= cmdlen, + cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + switch(cmd->opcode) { + case O_DI_FEATURES: + case O_DI_FEATURES_IMPLICIT: + ref_cnt_features((ipfw_insn_features *)cmd, 1); + break; + + case O_DI_ML_CLASSIFY: + case O_DI_ML_CLASSIFY_IMPLICIT: + ref_cnt_classifier((ipfw_insn_ml_classify *)cmd, 1); + break; + + case O_DI_EXPORT: + ref_cnt_export((ipfw_insn_export *)cmd, 1); + break; + } + } + + DI_UNLOCK(); + return (0); + +bad: + DI_UNLOCK(); + return (EINVAL); +} + +/* Functions called from ipfw */ +ipfw_ext_t diffuse_ext = { + .chk_rule_cmd = diffuse_chk_rule_cmd, + .chk_pkt_cmd = diffuse_chk_pkt_cmd, + .remove_rule = diffuse_remove_rule, + .add_rule = diffuse_add_rule, +}; + +/* + * Configuration functions + */ + +/* Config classifier. */ +static int +config_classifier(struct di_ctl_classifier *c, struct di_oid *c_conf, + struct di_oid *arg) +{ + + return (add_classifier_instance(c, c_conf)); +} + +/* Configure a feature. */ +static int +config_feature(struct di_ctl_feature *f, struct di_oid *f_conf, struct di_oid *arg) +{ + + return (add_feature_instance(f->mod_name, f->name, f_conf)); +} + +/* Config export. */ +static int +config_export(struct di_export *e, struct di_oid *arg) +{ + struct socket *sock; + int ret; + + sock = diffuse_export_open(&e->conf); + if (sock == NULL) { + DID("can't open socket"); + return (1); + } + + DI_WLOCK(); + ret = add_export_instance(e->name, e, sock); + DI_UNLOCK(); + + if (ret && sock) { + /* If error need to close open socket. */ + diffuse_export_close(sock); + } + + return (ret); +} + +/* Delete/zero all objects. */ +static void +diffuse_flush(int reset_counters_only) +{ + + /* XXX: currently we only have flush/zero commands for flow table. */ + DID("flowtable flush %i", reset_counters_only); + diffuse_ft_flush(reset_counters_only); +} + +/* Compute space needed for feature config export. */ +static int +compute_space_feature(struct di_oid *cmd) +{ + struct di_feature *s; + char *name; + int needed; + + needed = 0; + + if (cmd != NULL) + name = ((struct di_feature *)cmd)->name; + else + name = "all"; + + if (strcmp(name, "all")) { + s = search_feature_instance(name); + if (s == NULL) + return (-1); + + needed += sizeof(struct di_feature); + needed += s->alg->get_conf(&s->conf, NULL, 1); + } else { + /* All feature instances. */ + LIST_FOREACH(s, &di_config.feature_inst_list, next) { + needed += sizeof(struct di_feature); + needed += s->alg->get_conf(&s->conf, NULL, 1); + } + } + + return (needed); +} + +/* + * Compute space needed for classifier config export. + * If name == "all", copy all names but not the confs. + */ +static int +compute_space_class(struct di_oid *cmd) +{ + struct di_classifier *s; + char *name; + int needed; + + needed = 0; + name = ((struct di_classifier *)cmd)->name; + + if (strcmp(name, "all")) { + s = search_classifier_instance(name); + if (s == NULL) + return (-1); + + needed += sizeof(struct di_classifier); + needed += (s->fscnt + s->ccnt)* sizeof(struct di_feature_stat); + needed += s->alg->get_conf(&s->conf, NULL, 1); + } else { + /* All classifier instances. */ + LIST_FOREACH(s, &di_config.classifier_inst_list, next) { + needed += sizeof(struct di_classifier); + needed += (s->fscnt + s->ccnt) * sizeof(struct di_feature_stat); + } + } + + return (needed); +} + +/* + * Compute space needed for export config export. + * If name == "all", copy all names but not the confs. + */ +static int +compute_space_export(struct di_oid *cmd) +{ + struct di_export *s; + char *name; + int needed; + + needed = 0; + name = ((struct di_export *)cmd)->name; + + if (strcmp(name, "all")) { + s = search_export_instance(name); + if (s == NULL) + return (-1); + + needed += sizeof(struct di_export); + } else { + /* All export instances. */ + LIST_FOREACH(s, &di_config.export_list, next) { + needed += sizeof(struct di_export); + } + } + + return (needed); +} + +/* Copy feature. */ +static int +copy_feature(char *buf, struct di_feature *s) +{ + struct di_ctl_feature *f; + + f = (struct di_ctl_feature *)buf; + + f->oid.type = DI_FEATURE; + f->oid.subtype = 0; + f->oid.len = sizeof(struct di_feature); + strcpy(f->name, s->name); + strcpy(f->mod_name, s->alg->name); + + return (sizeof(struct di_ctl_feature)); +} + +/* Copy feature configuration/parameters. */ +static int +copy_feature_conf(char *buf, struct di_feature *s) +{ + int size; + struct di_oid *fconf; + + fconf = (struct di_oid *)buf; + + size = s->alg->get_conf(&s->conf, fconf, 0); + fconf->type = DI_FEATURE_CONFIG; + fconf->subtype = 0; + fconf->len = size; + + return (size); +} + +/* Copy all features. */ +static int +copy_features(struct di_oid *cmd, char **buf) +{ + struct di_feature *s; + char *name; + int size, r; + + size = 0; + + if (cmd != NULL) + name = ((struct di_feature *)cmd)->name; + else + name = "all"; + + if (strcmp(name, "all") != 0) { + s = search_feature_instance(name); + if (s == NULL) + return (-1); + + r = copy_feature(*buf, s); + *buf += r; + size += r; + r = copy_feature_conf(*buf, s); + *buf += r; + size += r; + } else { + /* All feature instances. */ + LIST_FOREACH(s, &di_config.feature_inst_list, next) { + r = copy_feature(*buf, s); + *buf += r; + size += r; + r = copy_feature_conf(*buf, s); + *buf += r; + size += r; + } + } + + return (size); +} + +/* Copy classifier. */ +static int +copy_classifier(char *buf, struct di_classifier *s) +{ + struct di_ctl_classifier *c; + + c = (struct di_ctl_classifier *)buf; + + c->oid.type = DI_CLASSIFIER; + c->oid.subtype = 0; + c->oid.len = sizeof(struct di_classifier) + + (s->fscnt + s->ccnt) * sizeof(struct di_feature_stat); + strcpy(c->name, s->name); + strcpy(c->mod_name, s->alg->name); + c->fscnt = s->fscnt; + c->ccnt = s->ccnt; + c->confirm = s->confirm; + memcpy(c->fstats, s->fstats, (s->fscnt + s->ccnt) * + sizeof(struct di_feature_stat)); + + return (c->oid.len); +} + +/* Copy classifier configuration/model. */ +static int +copy_classifier_conf(char *buf, struct di_classifier *s) +{ + struct di_oid *cconf; + int size; + + cconf = (struct di_oid *)buf; + + size = s->alg->get_conf(&s->conf, cconf, 0); + cconf->type = DI_CLASSIFIER_CONFIG; + cconf->subtype = 0; + cconf->len = size; + + return (size); +} + +/* + * Copy classifiers. + * If name == "all", copy all names but not the configurations. + */ +static int +copy_classifiers(struct di_oid *cmd, char **buf) +{ + struct di_classifier *s; + char *name; + int r, size; + + size = 0; + name = ((struct di_classifier *)cmd)->name; + + if (strcmp(name, "all")) { + s = search_classifier_instance(name); + if (s == NULL) + return (-1); + + r = copy_classifier(*buf, s); + *buf += r; + size += r; + r = copy_classifier_conf(*buf, s); + *buf += r; + size += r; + } else { + /* All classifier instances. */ + LIST_FOREACH(s, &di_config.classifier_inst_list, next) { + r = copy_classifier(*buf, s); + *buf += r; + size += r; + } + } + + return (size); +} + +/* Copy export. */ +static int +copy_export(char *buf, struct di_export *s) +{ + struct di_ctl_export *e; + + e = (struct di_ctl_export *)buf; + + e->oid.type = DI_EXPORT; + e->oid.subtype = 0; + e->oid.len = sizeof(struct di_export); + strcpy(e->name, s->name); + e->conf = s->conf; + + return (sizeof(struct di_export)); +} + +/* + * Copy exports. + * If name == "all", copy all names but not the configurations. + */ +static int +copy_exports(struct di_oid *cmd, char **buf) +{ + struct di_export *s; + char *name; + int r, size; + + size = 0; + name = ((struct di_export *)cmd)->name; + + if (strcmp(name, "all")) { + s = search_export_instance(name); + if (s == NULL) + return (-1); + + r = copy_export(*buf, s); + *buf += r; + size += r; + } else { + /* All export instances. */ + LIST_FOREACH(s, &di_config.export_list, next) { + r = copy_export(*buf, s); + *buf += r; + size += r; + } + } + + return (size); +} + +/* + * Main handler for getting info. + * XXX: Support comma separated lists of features/classifiers/exports. + */ +static int +get_info(struct sockopt *sopt) +{ +#define TRIES 10 + struct di_oid *base, *cmd, *o; + struct di_oid oid; + char *start, *buf; + size_t sopt_valsize; + int error, ftable_len, have, i, l, need; + + ftable_len = have = need = 0; + l = sizeof(struct di_oid); + start = NULL; + base = cmd = NULL; + + /* Save and restore original sopt_valsize around copyin. */ + sopt_valsize = sopt->sopt_valsize; + error = sooptcopyin(sopt, &oid, l, l); + sopt->sopt_valsize = sopt_valsize; + + if (error) + return (error); + + if (oid.type != DI_CMD_GET) + return (EINVAL); + + if (oid.subtype == DI_FEATURE) { + DID("show feature"); + l += sizeof(struct di_feature); + } else if (oid.subtype == DI_CLASSIFIER) { + DID("show classifier"); + l += sizeof(struct di_classifier); + } else if (oid.subtype == DI_EXPORT) { + DID("show export"); + l += sizeof(struct di_export); + } else { + DID("show flow table"); + } + + cmd = base = malloc(l, M_DIFFUSE, M_WAITOK); + + error = sooptcopyin(sopt, cmd, l, l); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; + + cmd = (struct di_oid *)((char *)cmd + cmd->len); + + /* + * Count space (under lock) and allocate (outside lock). + * Exit with lock held if we manage to get enough buffer. + * Try a few times then give up. + */ + /* XXX: Should rewrite this bit of code. */ + for (have = 0, i = 0; i < TRIES; i++) { + DI_WLOCK(); + if (oid.subtype == DI_FEATURE) { + need = compute_space_feature(cmd); + } else if (oid.subtype == DI_CLASSIFIER) { + need = compute_space_class(cmd); + } else if (oid.subtype == DI_EXPORT) { + need = compute_space_export(cmd); + } else if (oid.subtype == DI_FLOW_TABLE) { + need = compute_space_feature(NULL); + if (need >= 0) { + ftable_len = sizeof(struct di_oid) + + diffuse_ft_len(oid.flags & + DI_FT_GET_EXPIRED); + need += ftable_len; + } + } + + if (need < 0) { + DI_UNLOCK(); + error = EINVAL; + goto done; + } + need += sizeof(struct di_oid); + + DID2("need %d bytes", need); + DID2("have %d bytes", have); + + /* So we get this into userspace. */ + base->id = need; + if (have >= need) + break; + + DI_UNLOCK(); + + if (start) + free(start, M_DIFFUSE); + + start = NULL; + /* Stop if socket option buffer still too small or last try. */ + if (need > sopt_valsize || i == TRIES - 1) + break; + + have = need; + start = malloc(have, M_DIFFUSE, M_WAITOK | M_ZERO); + if (start == NULL) { + error = ENOMEM; + goto done; + } + } + + if (start == NULL) { + /* + * If sopt buffer too small or run out of tries tell user space + * to allocate more memory. + */ + error = sooptcopyout(sopt, base, l); + goto done; + } + +#ifdef DIFFUSE_DEBUG + if (oid.subtype == DI_FEATURE || oid.subtype == DI_FLOW_TABLE) { + DID("have %d features", di_config.feature_count); + if (oid.subtype == DI_FLOW_TABLE) + DID("have %d flows", diffuse_ft_entries()); + } else if (oid.subtype == DI_CLASSIFIER) { + DID("have %d classifiers", di_config.classifier_count); + } else if (oid.subtype == DI_EXPORT) { + DID("have %d exports", di_config.export_count); + } +#endif + + sopt->sopt_valsize = sopt_valsize; + bcopy(base, start, sizeof(struct di_oid)); + ((struct di_oid *)(start))->len = sizeof(struct di_oid); + buf = start + sizeof(struct di_oid); + /* Copy objects. */ + if (oid.subtype == DI_FEATURE) { + if (copy_features(cmd, &buf) < 0) { + error = EINVAL; + DI_UNLOCK(); + goto done; + } + } else if (oid.subtype == DI_CLASSIFIER) { + if (copy_classifiers(cmd, &buf) < 0) { + error = EINVAL; + DI_UNLOCK(); + goto done; + } + } else if (oid.subtype == DI_EXPORT) { + if (copy_exports(cmd, &buf) < 0) { + error = EINVAL; + DI_UNLOCK(); + goto done; + } + } else if (oid.subtype == DI_FLOW_TABLE) { + if (copy_features(NULL, &buf) < 0) { + error = EINVAL; + DI_UNLOCK(); + goto done; + } + + o = (struct di_oid *)buf; + o->type = DI_FLOW_TABLE; + o->subtype = 0; + o->len = ftable_len; + buf += sizeof(struct di_oid); + + /* Copy flow table. */ + diffuse_get_ft(&buf, buf + have, oid.flags & DI_FT_GET_EXPIRED); + } + DID("total size %ld", (long int)(buf - start)); + + DI_UNLOCK(); + + error = sooptcopyout(sopt, start, buf - start); +done: + if (base) + free(base, M_DIFFUSE); + + if (start) + free(start, M_DIFFUSE); + + return (error); +} + +/* + * Main handler for configuration. We are guaranteed to be called with an oid + * which is at least a di_oid. The first object is the command (config, + * delete, flush, ...) + */ +static int +do_config(void *p, int l) +{ + struct di_oid *arg, *next, *o, *prev; + int action, err; + + arg = NULL; + action = err = 0; + + o = p; + if (o->id != DI_API_VERSION) { + DID("invalid api version got %d need %d", o->id, + DI_API_VERSION); + return (EINVAL); + } + + for (; l >= sizeof(*o); o = next) { + prev = arg; + + if (o->len < sizeof(*o) || l < o->len) { + DID("bad len o->len %d len %d", o->len, l); + err = EINVAL; + break; + } + l -= o->len; + next = (struct di_oid *)((char *)o + o->len); + err = 0; + + switch (o->type) { + default: + DID("cmd %d not implemented", o->type); + break; + + case DI_CMD_CONFIG: /* Simply a header. */ + action = DI_CMD_CONFIG; + break; + + case DI_CMD_DELETE: + action = DI_CMD_DELETE; + break; + + case DI_CMD_ZERO: + DI_WLOCK(); + diffuse_flush(1); + DI_UNLOCK(); + break; + + case DI_CMD_FLUSH: + DI_WLOCK(); + diffuse_flush(0); + DI_UNLOCK(); + break; + + case DI_FEATURE: + if (action == DI_CMD_CONFIG) { + DID("configure feature"); + DI_WLOCK(); + err = config_feature((struct di_ctl_feature *)o, + next, arg); + DI_UNLOCK(); + + /* Eat the feature conf. */ + next = (struct di_oid *)((char *)o + o->len); + l -= next->len; + } else if (action == DI_CMD_DELETE) { + DID("delete feature"); + DI_WLOCK(); + err = remove_feature_instance( + ((struct di_feature *)o)->name); + DI_UNLOCK(); + } + break; + + case DI_CLASSIFIER: + if (action == DI_CMD_CONFIG) { + DID("configure classifier"); + DI_WLOCK(); + err = config_classifier( + (struct di_ctl_classifier *)o, next, arg); + DI_UNLOCK(); + + /* Eat the classifier conf. */ + next = (struct di_oid *)((char *)o + o->len); + l -= next->len; + } else if (action == DI_CMD_DELETE) { + DID("delete classifier"); + DI_WLOCK(); + err = remove_classifier_instance( + ((struct di_classifier *)o)->name); + DI_UNLOCK(); + } + break; + + case DI_EXPORT: + if (action == DI_CMD_CONFIG) { + DID("configure export"); + err = config_export((struct di_export *)o, arg); + } else if (action == DI_CMD_DELETE) { + DID("delete export"); + DI_WLOCK(); + err = remove_export_instance( + ((struct di_export *)o)->name); + DI_UNLOCK(); + } + break; + } + + if (prev) + arg = NULL; + + if (err != 0) + break; + } + + return (err); +} + +/* Handler for the various socket options (userspace commands). */ +static int +diffuse_ctl(struct sockopt *sopt) +{ + void *p; + int error, l; + + p = NULL; + + /* Use same privilieges as ipfw (see sys/priv.h). */ + error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); + if (error) + return (error); + + /* Disallow sets in really-really secure mode. */ + if (sopt->sopt_dir == SOPT_SET) { + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error) + return (error); + } + + switch (sopt->sopt_name) { + default : + DID("unknown option %d", sopt->sopt_name); + error = EINVAL; + break; + + case IP_DIFFUSE : + if (sopt->sopt_dir == SOPT_GET) { + error = get_info(sopt); + break; + } + l = sopt->sopt_valsize; + if (l < sizeof(struct di_oid) || l > 65535) { + /* XXX: What max value? */ + error = EINVAL; + DID("argument len %d invalid", l); + break; + } + p = malloc(l, M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, p, l, l); + if (error) + break; + + error = do_config(p, l); + break; + } + + if (p != NULL) + free(p, M_TEMP); + + return (error); +} + +static void +diffuse_init(void) +{ + + if (di_config.init_done) + return; + + /* Initialise everything. */ + + diffuse_ft_attach(); + diffuse_ft_init(); + diffuse_export_init(); + + /* By default use explicit removal messages. */ + di_config.an_rule_removal = DIP_TIMEOUT_NONE; + + SLIST_INIT(&di_config.feature_list); + LIST_INIT(&di_config.feature_inst_list); + SLIST_INIT(&di_config.classifier_list); + LIST_INIT(&di_config.classifier_inst_list); + LIST_INIT(&di_config.export_list); + TAILQ_INIT(&di_config.export_rec_list); + + DI_LOCK_INIT(); + + callout_init(&di_timeout, CALLOUT_MPSAFE); + callout_reset(&di_timeout, hz / 10, diffuse_timeout, NULL); + + di_config.init_done = 1; + printf("DIFFUSE initialised\n"); +} + +#ifdef KLD_MODULE +static void +diffuse_destroy(int last) +{ + + callout_drain(&di_timeout); + + DI_WLOCK(); + if (last) { + DID("DIFFUSE removed last instance\n"); + diffuse_ctl_ptr = NULL; + ipfw_diffuse_ext = NULL; + } + DI_UNLOCK(); + + diffuse_remove_feature_instances(NULL, 1); + diffuse_remove_classifier_instances(NULL, 1); + diffuse_export_remove_recs(NULL); + diffuse_remove_export_instances(1); + diffuse_export_uninit(); + diffuse_ft_uninit(); + diffuse_ft_detach(); + DI_LOCK_DESTROY(); +} +#endif /* KLD_MODULE */ + +/* The main module event function. */ +static int +diffuse_modevent(module_t mod, int type, void *data) +{ + + if (type == MOD_LOAD) { + if (diffuse_ctl_ptr != NULL) { + DID("DIFFUSE already loaded"); + return (EEXIST); + } + diffuse_init(); + diffuse_ctl_ptr = diffuse_ctl; + ipfw_diffuse_ext = &diffuse_ext; + + return (0); + } else if (type == MOD_QUIESCE || type == MOD_SHUTDOWN) { + if (feature_instance_used()) { + DID("feature(s) referenced by rules, cannot unload"); + return (EBUSY); + } + + if (classifier_instance_used()) { + DID("classifier(s) referenced by rules, cannot unload"); + return (EBUSY); + } + + if (export_instance_used()) { + DID("export(s) referenced by rules, cannot unload"); + return (EBUSY); + } + + return (0); + } else if (type == MOD_UNLOAD) { +#if !defined(KLD_MODULE) + DID("statically compiled, cannot unload"); + return (EINVAL); +#else + diffuse_destroy(1 /* last */); + + return (0); +#endif + } else + return (EOPNOTSUPP); +} + +/* Modevent stuff for feature modules. */ + +static int +load_feature(struct di_feature_alg *f) +{ + struct di_feature_alg *s; + int error; + + s = NULL; + error = 0; + + if (f == NULL) + return (EINVAL); + + diffuse_init(); /* Just in case, we need the lock. */ + + /* Check that mandatory funcs exist. */ + if (f->init_instance == NULL || + f->destroy_instance == NULL || + f->init_stats == NULL || + f->destroy_stats == NULL || + f->update_stats == NULL || + f->reset_stats == NULL || + f->get_stats == NULL || + f->get_stat_names == NULL) { + DID("missing function for %s", f->name); + return (EINVAL); + } + + /* Search if feature already exists. */ + DI_WLOCK(); + SLIST_FOREACH(s, &di_config.feature_list, next) { + if (strcmp(s->name, f->name) == 0) { + /* Feature already exists. */ + DID("%s already loaded", f->name); + error = EEXIST; + break; + } + } + if (!error) { + SLIST_INSERT_HEAD(&di_config.feature_list, f, next); + error = add_feature_instance(f->name, f->name, NULL); + } + + DI_UNLOCK(); + + if (error) + DID("%s instantiating failed", f->name); + else + DID("feature %s %sloaded", f->name, s ? "not ":""); + + return (error); +} + +static int +unload_feature(struct di_feature_alg *f) +{ + struct di_feature_alg *tmp, *r; + int err; + + err = EINVAL; + + DID("called for %s", f->name); + + DI_WLOCK(); + SLIST_FOREACH_SAFE(r, &di_config.feature_list, next, tmp) { + if (strcmp(f->name, r->name) != 0) + continue; + + DID("ref_count = %d", r->ref_count); + err = (r->ref_count != 0) ? EBUSY : 0; + if (err == 0) { + SLIST_REMOVE(&di_config.feature_list, r, + di_feature_alg, next); + } + break; + } + DI_UNLOCK(); + + DID("feature %s %sunloaded", f->name, err ? "not " : ""); + + return (err); +} + +int +diffuse_feature_modevent(module_t mod, int cmd, void *arg) +{ + struct di_feature_alg *f; + + f = arg; + + if (cmd == MOD_LOAD) { + return (load_feature(f)); + } else if (cmd == MOD_UNLOAD) { + diffuse_remove_feature_instances(f->name, 1); + return (unload_feature(f)); + } else { + return (EOPNOTSUPP); + } +} + +/* Modevent stuff for classifier modules. */ + +static int +load_classifier(struct di_classifier_alg *c) +{ + struct di_classifier_alg *s; + int error; + + s = NULL; + error = 0; + + if (c == NULL) + return (EINVAL); + + diffuse_init(); /* Just in case, we need the lock. */ + + /* Check that mandatory funcs exist. */ + if (c->init_instance == NULL || + c->destroy_instance == NULL || + c->classify == NULL || + c->get_class_cnt == NULL || + c->get_feature_cnt == NULL) { + DID("missing function for %s", c->name); + return (EINVAL); + } + + /* Search if classifier already exists. */ + DI_WLOCK(); + SLIST_FOREACH(s, &di_config.classifier_list, next) { + if (strcmp(s->name, c->name) == 0) { + DID("%s already loaded", c->name); + error = EEXIST; + break; + } + } + if (!error) + SLIST_INSERT_HEAD(&di_config.classifier_list, c, next); + + /* Can't add default instance for classifiers. */ + + DI_UNLOCK(); + + DID("classifier %s %sloaded", c->name, s ? "not ":""); + + return (error); +} + +static int +unload_classifier(struct di_classifier_alg *c) +{ + struct di_classifier_alg *r, *tmp; + int err; + + err = EINVAL; + + DID("called for %s", c->name); + + DI_WLOCK(); + SLIST_FOREACH_SAFE(r, &di_config.classifier_list, next, tmp) { + if (strcmp(c->name, r->name) != 0) + continue; + + DID("ref_count = %d", r->ref_count); + err = (r->ref_count != 0) ? EBUSY : 0; + if (err == 0) { + SLIST_REMOVE(&di_config.classifier_list, r, + di_classifier_alg, next); + } + break; + } + DI_UNLOCK(); + + DID("classifier %s %sunloaded", c->name, err ? "not " : ""); + return (err); +} + +int +diffuse_classifier_modevent(module_t mod, int cmd, void *arg) +{ + struct di_classifier_alg *c; + + c = arg; + + if (cmd == MOD_LOAD) { + return (load_classifier(c)); + } else if (cmd == MOD_UNLOAD) { + diffuse_remove_classifier_instances(c->name, 1); + return (unload_classifier(c)); + } else { + return (EOPNOTSUPP); + } +} + +static moduledata_t diffuse_mod = { + .name = "diffuse", + .evhand = diffuse_modevent, + .priv = NULL +}; + +#define DI_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN +#define DI_MODEV_ORD (SI_ORDER_ANY - 128) /* After ipfw. */ +DECLARE_MODULE(diffuse, diffuse_mod, DI_SI_SUB, DI_MODEV_ORD); +MODULE_DEPEND(diffuse, ipfw, 2, 2, 2); +MODULE_VERSION(diffuse, 1); diff -r 3abcaa91ffe8 sys/sys/time.h --- a/sys/sys/time.h Tue Oct 04 19:24:06 2011 +1100 +++ b/sys/sys/time.h Tue Oct 04 22:36:46 2011 +1100 @@ -227,6 +227,8 @@ } while (0) #endif +#define tvtoms(tvp) ((tvp)->tv_sec * 1000 + (tvp)->tv_usec / 1000) + /* * Names of the interval timers, and structure * defining a timer setting.