commit 854570b001a6d67ba42fb0412047f13866d3e2a0 Author: Andrey V. Elsukov Date: Thu Jan 14 16:32:06 2021 +0300 [ipfw] add ability to enable/disable skipto cache in runtime * Add IP_FW_SKIPTO_CACHE sockopt to enable/disable skipto cache. * Add net.inet.ip.fw.skipto_cache read-only variable to check status. * Modify jump() function to honor status of skipto cache. * Fix O_CALLRETURN opcode: keep ruleset id in the stack and do not return if current ruleset id doesn't match to saved value. diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 74eafe6cbe3..b45930efffc 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -2525,6 +2525,18 @@ ipfw_sets_handler(char *av[]) errx(EX_USAGE, "invalid set command %s\n", *av); } +static void +manage_skipto_cache(int op) +{ + ipfw_cmd_header req; + + memset(&req, 0, sizeof(req)); + req.size = sizeof(req); + req.cmd = op ? SKIPTO_CACHE_ENABLE : SKIPTO_CACHE_DISABLE; + + do_set3(IP_FW_SKIPTO_CACHE, &req.opheader, sizeof(req)); +} + void ipfw_sysctl_handler(char *av[], int which) { @@ -2549,6 +2561,8 @@ ipfw_sysctl_handler(char *av[], int which) } else if (_substrcmp(*av, "dyn_keepalive") == 0) { sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0, &which, sizeof(which)); + } else if (_substrcmp(*av, "skipto_cache") == 0) { + manage_skipto_cache(which); #ifndef NO_ALTQ } else if (_substrcmp(*av, "altq") == 0) { altq_set_enabled(which); diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 9fd42cadca1..2949f9326a5 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -116,6 +116,7 @@ typedef struct _ip_fw3_opheader { #define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */ #define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */ +#define IP_FW_SKIPTO_CACHE 118 /* Manage skipto cache */ #define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */ #define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */ @@ -421,6 +422,11 @@ enum ipfw_return_type { RETURN_NEXT_RULE, }; +enum ipfw_skipto_cache_op { + SKIPTO_CACHE_DISABLE = 0, + SKIPTO_CACHE_ENABLE, +}; + /* * This is used to forward to a given address (ip). */ @@ -1068,6 +1074,12 @@ typedef struct _ipfw_ta_info { uint64_t spare1; } ipfw_ta_info; +typedef struct _ipfw_cmd_header { /* control command header */ + ip_fw3_opheader opheader; /* IP_FW3 opcode */ + uint32_t size; /* Total size (incl. header) */ + uint32_t cmd; /* command */ +} ipfw_cmd_header; + typedef struct _ipfw_obj_header { ip_fw3_opheader opheader; /* IP_FW3 opcode */ uint32_t spare; diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index ae94e77c552..0424551b858 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -145,15 +145,14 @@ VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; #ifndef LINEAR_SKIPTO -static uint32_t jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, - uint32_t num, int tablearg, int jump_backwards); -#define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) +VNET_DEFINE(int, skipto_cache) = 0; #else -static uint32_t jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, - uint32_t num, int tablearg, int jump_backwards); -#define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back) +VNET_DEFINE(int, skipto_cache) = 1; #endif +static uint32_t jump(struct ip_fw_chain *chain, struct ip_fw *f, + uint32_t num, int tablearg, bool jump_backwards); + /* * Each rule belongs to one of 32 different sets (0..31). * The variable set_disable contains one bit per set. @@ -164,8 +163,6 @@ static uint32_t jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, * Rules in set RESVD_SET can only be deleted individually. */ VNET_DEFINE(u_int32_t, set_disable); -#define V_set_disable VNET(set_disable) - VNET_DEFINE(int, fw_verbose); /* counter for ipfw_log(NULL...) */ VNET_DEFINE(u_int64_t, norule_counter); @@ -204,6 +201,9 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, "Log matches to ipfw rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, skipto_cache, + CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(skipto_cache), 0, + "Status of linear skipto cache: 1 - enabled, 0 - disabled."); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, "Set upper limit of matches of ipfw rules logged"); @@ -1230,60 +1230,41 @@ set_match(struct ip_fw_args *args, int slot, args->flags |= IPFW_ARGS_REF; } -#ifndef LINEAR_SKIPTO -/* - * Helper function to enable cached rule lookups using - * cached_id and cached_pos fields in ipfw rule. - */ static uint32_t -jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, uint32_t num, - int tablearg, int jump_backwards) +jump(struct ip_fw_chain *chain, struct ip_fw *f, uint32_t num, + int tablearg, bool jump_backwards) { - uint32_t f_pos; + uint32_t f_pos, tmp; - /* If possible use cached f_pos (in f->cached_pos), - * whose version is written in f->cached_id - * (horrible hacks to avoid changing the ABI). + /* + * If skipto cache is disabled and tablearg isn't used, + * try to use cached f_pos. */ - if (num != IP_FW_TARG && f->cached_id == chain->id) + if (V_skipto_cache == 0 && + num != IP_FW_TARG && f->cached_id == chain->id) f_pos = f->cached_pos; else { - int i = IP_FW_ARG_TABLEARG(chain, num, skipto); - /* make sure we do not jump backward */ - if (jump_backwards == 0 && i <= f->rulenum) - i = f->rulenum + 1; - if (chain->idxmap != NULL) - f_pos = chain->idxmap[i]; - else - f_pos = ipfw_find_rule(chain, i, 0); - /* update the cache */ - if (num != IP_FW_TARG) { - f->cached_id = chain->id; - f->cached_pos = f_pos; - } - } - - return (f_pos); -} -#else -/* - * Helper function to enable real fast rule lookups. - */ -static uint32_t -jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, uint32_t num, - int tablearg, int jump_backwards) -{ - uint32_t f_pos; - - num = IP_FW_ARG_TABLEARG(chain, num, skipto); - /* make sure we do not jump backward */ - if (jump_backwards == 0 && num <= f->rulenum) - num = f->rulenum + 1; - f_pos = chain->idxmap[num]; + /* + * Make sure we do not jump backward. + */ + tmp = IP_FW_ARG_TABLEARG(chain, num, skipto); + if (!jump_backwards && tmp <= f->rulenum) + tmp = f->rulenum + 1; + if (V_skipto_cache == 0) { + f_pos = ipfw_find_rule(chain, tmp, 0); + /* + * Update the cache, unless tablearg is used. + */ + if (num != IP_FW_TARG) { + f->cached_id = chain->id; + f->cached_pos = f_pos; + } + } else + f_pos = chain->idxmap[tmp]; + } return (f_pos); } -#endif #define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) /* @@ -2855,8 +2836,8 @@ do { \ case O_SKIPTO: IPFW_INC_RULE_COUNTER(f, pktlen); - f_pos = JUMP(chain, f, - insntod(cmd, u32)->d[0], tablearg, 0); + f_pos = jump(chain, f, + insntod(cmd, u32)->d[0], tablearg, false); /* * Skip disabled rules, and re-enter * the inner loop with the correct @@ -2906,12 +2887,31 @@ do { \ break; mtag = m_tag_next(m, mtag); } + + /* + * We keep ruleset id in the first element + * of stack. If it doesn't match chain->id, + * then we can't trust information in the + * stack, since rules were changed. + * We reset stack pointer to be able reuse + * tag if it will be needed. + */ + if (mtag != NULL) { + stack = (uint32_t *)(mtag + 1); + if (stack[0] != chain->id) { + stack[0] = chain->id; + mtag->m_tag_id = 0; + } + } + if (mtag == NULL && IS_CALL) { mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, IPFW_CALLSTACK_SIZE * sizeof(uint32_t), M_NOWAIT); if (mtag != NULL) m_tag_prepend(m, mtag); + stack = (uint32_t *)(mtag + 1); + stack[0] = chain->id; } /* @@ -2924,7 +2924,8 @@ do { \ break; } if (IS_CALL && (mtag == NULL || - mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { + mtag->m_tag_id >= + IPFW_CALLSTACK_SIZE - 1)) { printf("ipfw: call stack error, " "go to next rule\n"); l = 0; /* exit inner loop */ @@ -2935,15 +2936,12 @@ do { \ stack = (uint32_t *)(mtag + 1); if (IS_CALL) { - stack[mtag->m_tag_id] = f_pos; - mtag->m_tag_id++; - f_pos = JUMP(chain, f, + stack[++mtag->m_tag_id] = f_pos; + f_pos = jump(chain, f, insntod(cmd, u32)->d[0], - tablearg, 1); + tablearg, true); } else { /* `return' action */ - mtag->m_tag_id--; - jmpto = stack[mtag->m_tag_id]; - + jmpto = stack[mtag->m_tag_id--]; if (cmd->arg1 == RETURN_NEXT_RULE) f_pos = jmpto + 1; else /* RETURN_NEXT_RULENUM */ @@ -2958,6 +2956,7 @@ do { \ * f_pos, f, l and cmd. * Also clear cmdlen and skip_or */ + MPASS(f_pos < chain->n_rules); for (; f_pos < chain->n_rules - 1 && (V_set_disable & (1 << chain->map[f_pos]->set)); f_pos++) @@ -3457,9 +3456,7 @@ vnet_ipfw_init(const void *unused) ipfw_dyn_init(chain); ipfw_eaction_init(chain, first); -#ifdef LINEAR_SKIPTO ipfw_init_skipto_cache(chain); -#endif ipfw_bpf_init(first); /* First set up some values that are compile time options */ @@ -3516,9 +3513,7 @@ vnet_ipfw_uninit(const void *unused) for (i = 0; i < chain->n_rules; i++) ipfw_reap_add(chain, &reap, chain->map[i]); free(chain->map, M_IPFW); -#ifdef LINEAR_SKIPTO ipfw_destroy_skipto_cache(chain); -#endif IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); ipfw_destroy_tables(chain, last); diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index 773c2ccdf01..3b700b9c370 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -240,6 +240,9 @@ VNET_DECLARE(struct ip_fw_chain, layer3_chain); VNET_DECLARE(int, ipfw_vnet_ready); #define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) +VNET_DECLARE(int, skipto_cache); +#define V_skipto_cache VNET(skipto_cache) + VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) @@ -671,6 +674,7 @@ enum ipfw_opcheck_result ipfw_check_opcode(ipfw_insn **, int *, struct rule_check_info *); void ipfw_init_skipto_cache(struct ip_fw_chain *chain); void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); +void ipfw_enable_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl3(struct sockopt *sopt); int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule, diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 4c8a32ba034..a840c0f4596 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -143,7 +143,8 @@ static uint64_t ctl3_refct, ctl3_gencnt; static int ipfw_flush_sopt_data(struct sockopt_data *sd); static sopt_handler_f dump_config, add_rules, del_rules, clear_rules, - move_rules, manage_sets, dump_soptcodes, dump_srvobjects; + move_rules, manage_sets, dump_soptcodes, dump_srvobjects, + manage_skiptocache; static struct ipfw_sopt_handler scodes[] = { { IP_FW_XGET, IP_FW3_OPVER, HDIR_GET, dump_config }, @@ -157,6 +158,7 @@ static struct ipfw_sopt_handler scodes[] = { { IP_FW_SET_ENABLE, IP_FW3_OPVER, HDIR_SET, manage_sets }, { IP_FW_DUMP_SOPTCODES, IP_FW3_OPVER, HDIR_GET, dump_soptcodes }, { IP_FW_DUMP_SRVOBJECTS, IP_FW3_OPVER, HDIR_GET, dump_srvobjects }, + { IP_FW_SKIPTO_CACHE, IP_FW3_OPVER, HDIR_BOTH, manage_skiptocache }, }; static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd, @@ -308,9 +310,9 @@ ipfw_init_skipto_cache(struct ip_fw_chain *chain) { uint32_t *idxmap, *idxmap_back; - idxmap = malloc((IPFW_DEFAULT_RULE + 1) * sizeof(uint32_t *), + idxmap = malloc((IPFW_DEFAULT_RULE + 1) * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); - idxmap_back = malloc((IPFW_DEFAULT_RULE + 1) * sizeof(uint32_t *), + idxmap_back = malloc((IPFW_DEFAULT_RULE + 1) * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); /* @@ -329,7 +331,8 @@ ipfw_init_skipto_cache(struct ip_fw_chain *chain) /* Set backup pointer first to permit building cache */ chain->idxmap_back = idxmap_back; - update_skipto_cache(chain, chain->map); + if (V_skipto_cache != 0) + update_skipto_cache(chain, chain->map); IPFW_WLOCK(chain); /* It is now safe to set chain->idxmap ptr */ chain->idxmap = idxmap; @@ -345,10 +348,8 @@ void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain) { - if (chain->idxmap != NULL) - free(chain->idxmap, M_IPFW); - if (chain->idxmap != NULL) - free(chain->idxmap_back, M_IPFW); + free(chain->idxmap, M_IPFW); + free(chain->idxmap_back, M_IPFW); } @@ -571,7 +572,8 @@ ipfw_commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, } krule->id = chain->id + 1; - update_skipto_cache(chain, map); + if (V_skipto_cache != 0) + update_skipto_cache(chain, map); map = swap_map(chain, map, chain->n_rules + 1); IPFW_UH_WUNLOCK(chain); if (map) @@ -2827,6 +2829,49 @@ dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, return (0); } +void +ipfw_enable_skipto_cache(struct ip_fw_chain *chain) +{ + + IPFW_UH_WLOCK_ASSERT(chain); + update_skipto_cache(chain, chain->map); + + IPFW_WLOCK(chain); + swap_skipto_cache(chain); + V_skipto_cache = 1; + IPFW_WUNLOCK(chain); +} + +/* + * Enables or disable skipto cache. + * Request: [ ipfw_cmd_header ] size = ipfw_cmd_header.size + * Reply: [ ipfw_cmd_header ] + * Returns 0 on success + */ +static int +manage_skiptocache(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_cmd_header *hdr; + + if (sd->valsize != sizeof(*hdr)) + return (EINVAL); + + hdr = (ipfw_cmd_header *)ipfw_get_sopt_space(sd, sd->valsize); + if (hdr->cmd != SKIPTO_CACHE_DISABLE && + hdr->cmd != SKIPTO_CACHE_ENABLE) + return (EOPNOTSUPP); + + IPFW_UH_WLOCK(chain); + if (hdr->cmd != V_skipto_cache) { + if (hdr->cmd == SKIPTO_CACHE_ENABLE) + ipfw_enable_skipto_cache(chain); + V_skipto_cache = hdr->cmd; + } + IPFW_UH_WUNLOCK(chain); + return (0); +} + /* * Compares two sopt handlers (code, version and handler ptr). * Used both as qsort() and bsearch().