Index: ipfw/ip_fw2.c =================================================================== --- ipfw/ip_fw2.c (revision 241789) +++ ipfw/ip_fw2.c (working copy) @@ -115,11 +115,13 @@ static int default_to_accept; #endif VNET_DEFINE(int, autoinc_step); +VNET_DEFINE(int, fast_skipto); VNET_DEFINE(int, fw_one_pass) = 1; VNET_DEFINE(unsigned int, fw_tables_max); /* Use 128 tables by default */ static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; +static int fw_fast_skipto = 0; /* * Each rule belongs to one of 32 different sets (0..31). @@ -151,6 +153,7 @@ ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; #ifdef SYSCTL_NODE uint32_t dummy_def = IPFW_DEFAULT_RULE; static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); +static int sysctl_ipfw_fast_skipto(SYSCTL_HANDLER_ARGS); SYSBEGIN(f3) @@ -182,6 +185,11 @@ SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_ CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, "Number of static rules"); +TUNABLE_INT("net.inet.ip.fw.fast_skipto", &fw_fast_skipto); +SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, fast_skipto, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_ipfw_fast_skipto, "I", + "Enable fast skipto algorithm (requires more memory)"); + #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6); SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); @@ -2103,20 +2111,35 @@ do { \ l = 0; /* exit inner loop */ break; - case O_SKIPTO: + case O_SKIPTO: { + int i; + f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; - /* If possible use cached f_pos (in f->next_rule), - * whose version is written in f->next_rule - * (horrible hacks to avoid changing the ABI). - */ - if (cmd->arg1 != IP_FW_TABLEARG && - (uintptr_t)f->x_next == chain->id) { + if (V_fast_skipto != 0) { + i = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg: + cmd->arg1; + /* + * Make sure we do not jump backward and + * we are not out of range. + */ + if (i <= f->rulenum) + i = f->rulenum + 1; + else if (i > IPFW_DEFAULT_RULE) + i = IPFW_DEFAULT_RULE; + f_pos = chain->map_idx[i - 1]; + } else if (cmd->arg1 != IP_FW_TABLEARG && + (uintptr_t)f->x_next == chain->id) { + /* If possible use cached f_pos + * (in f->next_rule), whose version is + * written in f->next_rule (horrible + * hacks to avoid changing the ABI). + */ f_pos = (uintptr_t)f->next_rule; } else { - int i = (cmd->arg1 == IP_FW_TABLEARG) ? - tablearg : cmd->arg1; + i = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg: cmd->arg1; /* make sure we do not jump backward */ if (i <= f->rulenum) i = f->rulenum + 1; @@ -2149,6 +2172,7 @@ do { \ skip_or = 0; continue; break; /* not reached */ + } case O_CALLRETURN: { /* @@ -2519,7 +2543,21 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) return (ipfw_resize_tables(&V_layer3_chain, ntables)); } + +static int +sysctl_ipfw_fast_skipto(SYSCTL_HANDLER_ARGS) +{ + int error, newval; + + newval = V_fast_skipto; + error = sysctl_handle_int(oidp, &newval, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + + return (ipfw_enable_fast_skipto(&V_layer3_chain, newval)); +} #endif + /* * Module and VNET glue */ @@ -2580,6 +2618,15 @@ ipfw_init(void) if (default_fw_tables > IPFW_TABLES_MAX) default_fw_tables = IPFW_TABLES_MAX; + /* + * Automatically turn on fast skipto if we have more + * than 512 MBytes of memory and it wasn't disabled via + * loader tunable. + */ + if (testenv("net.inet.ip.fw.fast_skipto") == 0 && + ctob(physmem) / 1024 / 1024 > 512) + fw_fast_skipto = 1; + ipfw_log_bpf(1); /* init */ return (error); } @@ -2626,8 +2673,7 @@ vnet_ipfw_init(const void *unused) chain->n_rules = 1; chain->static_len = sizeof(struct ip_fw); chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); - if (chain->map) - rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO); + rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO); /* Set initial number of tables */ V_fw_tables_max = default_fw_tables; @@ -2651,6 +2697,7 @@ vnet_ipfw_init(const void *unused) IPFW_LOCK_INIT(chain); ipfw_dyn_init(); + ipfw_enable_fast_skipto(chain, fw_fast_skipto); /* First set up some values that are compile time options */ V_ipfw_vnet_ready = 1; /* Open for business */ @@ -2691,6 +2738,7 @@ vnet_ipfw_uninit(const void *unused) */ (void)ipfw_attach_hooks(0 /* detach */); V_ip_fw_ctl_ptr = NULL; + ipfw_enable_fast_skipto(chain, 0); IPFW_UH_WLOCK(chain); IPFW_UH_WUNLOCK(chain); IPFW_UH_WLOCK(chain); Index: ipfw/ip_fw_private.h =================================================================== --- ipfw/ip_fw_private.h (revision 241789) +++ ipfw/ip_fw_private.h (working copy) @@ -209,6 +209,9 @@ VNET_DECLARE(u_int32_t, set_disable); VNET_DECLARE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) +VNET_DECLARE(int, fast_skipto); +#define V_fast_skipto VNET(fast_skipto) + VNET_DECLARE(unsigned int, fw_tables_max); #define V_fw_tables_max VNET(fw_tables_max) @@ -219,6 +222,7 @@ struct ip_fw_chain { int n_rules; /* number of static rules */ int static_len; /* total len of static rules */ struct ip_fw **map; /* array of rule ptrs to ease lookup */ + int *map_idx; /* array of the map indexes for fast skipto */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct radix_node_head **tables; /* IPv4 tables */ struct radix_node_head **xtables; /* extended tables */ @@ -232,6 +236,8 @@ struct ip_fw_chain { #endif uint32_t id; /* ruleset id */ uint32_t gencnt; /* generation count */ + + int *map_idx2; /* used to build new map indexes */ }; struct sockopt; /* used by tcp_var.h */ @@ -268,6 +274,7 @@ int ipfw_find_rule(struct ip_fw_chain *chain, uint int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule); int ipfw_ctl(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); +int ipfw_enable_fast_skipto(struct ip_fw_chain *chain, int enable); void ipfw_reap_rules(struct ip_fw *head); /* In ip_fw_table.c */ Index: ipfw/ip_fw_sockopt.c =================================================================== --- ipfw/ip_fw_sockopt.c (revision 241789) +++ ipfw/ip_fw_sockopt.c (working copy) @@ -99,6 +99,70 @@ ipfw_find_rule(struct ip_fw_chain *chain, uint32_t } /* + * Fill the idx array with indexes of map elements. + * The idx array has IPFW_DEFAULT_RULE number of elements, each + * element corresponds to the rule's number and contains index of + * map element with this rulenum. The space between rule numbers + * is filled with index of next nearest rule. E.g. if we have + * rules map[0]->rulenum == 100 and map[1]->rulenum == 200, then + * idx[0..99] = 0, idx[100..199] = 1. + */ +static void +fill_map_idx(struct ip_fw** map, int map_len, int *idx) +{ + int i, num; + + for (num = 0, i = 0; num < map_len; num++) { + for (; i < map[num]->rulenum && i < IPFW_DEFAULT_RULE; i++) + idx[i] = num; + } +} + +int +ipfw_enable_fast_skipto(struct ip_fw_chain *chain, int enable) +{ + int *idx, *oidx; + + if (V_fast_skipto == enable) + return (0); + + if (enable != 0) { + idx = malloc(sizeof(*chain->map_idx) * IPFW_DEFAULT_RULE, + M_IPFW, M_WAITOK | M_ZERO); + oidx = malloc(sizeof(*chain->map_idx2) * IPFW_DEFAULT_RULE, + M_IPFW, M_WAITOK | M_ZERO); + + IPFW_UH_WLOCK(chain); + if (V_fast_skipto) { + /* Another thread won the race. */ + IPFW_UH_WUNLOCK(chain); + free(idx, M_IPFW); + free(oidx, M_IPFW); + return (0); + } + chain->map_idx = idx; + chain->map_idx2 = oidx; + fill_map_idx(chain->map, chain->n_rules, chain->map_idx); + V_fast_skipto = 1; + IPFW_UH_WUNLOCK(chain); + } else { + IPFW_UH_WLOCK(chain); + if (V_fast_skipto == 0) { + /* Another thread won the race. */ + IPFW_UH_WUNLOCK(chain); + return (0); + } + V_fast_skipto = 0; + idx = chain->map_idx; + oidx = chain->map_idx2; + IPFW_UH_WUNLOCK(chain); + free(idx, M_IPFW); + free(oidx, M_IPFW); + } + return (0); +} + +/* * allocate a new map, returns the chain locked. extra is the number * of entries to add or delete. */ @@ -135,12 +199,21 @@ static struct ip_fw ** swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) { struct ip_fw **old_map; + int *idx; + if (V_fast_skipto != 0) + fill_map_idx(new_map, new_len, chain->map_idx2); + IPFW_WLOCK(chain); chain->id++; chain->n_rules = new_len; old_map = chain->map; chain->map = new_map; + if (V_fast_skipto != 0) { + idx = chain->map_idx; + chain->map_idx = chain->map_idx2; + chain->map_idx2 = idx; + } IPFW_WUNLOCK(chain); return old_map; }