diff --git a/sys/conf/NOTES b/sys/conf/NOTES index c7dd5a03984..ebc09f6cbdd 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -634,6 +634,12 @@ options IPSEC #IP security (requires device crypto) options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security +# Option IPSEC_NETISR enables deferred IPsec processing for outbound packets. +# Complex network configuration may require large kernel stack for the +# processing of each packet. Netisr queuing reduces this requirement by the +# shrinking the call chains. +#options IPSEC_NETISR #deferred IPsec processing + # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV diff --git a/sys/conf/options b/sys/conf/options index ab9073e7697..fa3222c7270 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -429,6 +429,7 @@ IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_NETISR opt_ipsec.h IPSTEALTH KRPC LIBALIAS diff --git a/sys/net/netisr.h b/sys/net/netisr.h index 63764a74f2e..b1994bca62e 100644 --- a/sys/net/netisr.h +++ b/sys/net/netisr.h @@ -59,6 +59,7 @@ #define NETISR_EPAIR 8 /* if_epair(4) */ #define NETISR_IP_DIRECT 9 /* direct-dispatch IPv4 */ #define NETISR_IPV6_DIRECT 10 /* direct-dispatch IPv6 */ +#define NETISR_IPSEC 11 /* IPsec processing queue */ /* * Protocol ordering and affinity policy constants. See the detailed diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index e120f654aac..c5105745c73 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,39 @@ SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ +#ifdef IPSEC_NETISR +static struct netisr_handler ipsec_output_nh = { + .nh_name = "ipsec", + .nh_handler = ipsec_netisr_output, + .nh_proto = NETISR_IPSEC, + .nh_policy = NETISR_POLICY_SOURCE, +}; + +static int +sysctl_ipsec_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ipsec_output_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ipsec_output_nh, qlimit)); +} +#ifdef INET +SYSCTL_PROC(_net_inet_ipsec, OID_AUTO, intr_queue_maxlen, + CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_ipsec_queue_maxlen, "I", + "Maximum size of the IPsec output queue"); +#endif +#ifdef INET6 +SYSCTL_PROC(_net_inet6_ipsec6, OID_AUTO, intr_queue_maxlen, + CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_ipsec_queue_maxlen, "I", + "Maximum size of the IPsec output queue"); +#endif +#endif /* IPSEC_NETISR */ + static int ipsec_in_reject(struct secpolicy *, struct inpcb *, const struct mbuf *); @@ -1374,6 +1408,14 @@ def_policy_init(const void *unused __unused) key_bumpspgen(); } else printf("%s: failed to initialize default policy\n", __func__); +#ifdef IPSEC_NETISR +#ifdef VIMAGE + if (!IS_DEFAULT_VNET(curvnet)) + netisr_register_vnet(&ipsec_output_nh); + else +#endif + netisr_register(&ipsec_output_nh); +#endif /* IPSEC_NETISR */ } @@ -1381,6 +1423,14 @@ static void def_policy_uninit(const void *unused __unused) { +#ifdef IPSEC_NETISR +#ifdef VIMAGE + if (!IS_DEFAULT_VNET(curvnet)) + netisr_unregister_vnet(&ipsec_output_nh); + else +#endif + netisr_unregister(&ipsec_output_nh); +#endif /* IPSEC_NETISR */ if (V_def_policy != NULL) { key_freesp(&V_def_policy); key_bumpspgen(); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 64b9e0a7df1..796eb6c7ee3 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -244,6 +244,10 @@ struct ipsecstat { #define IPSECCTL_ESP_RANDPAD 13 #ifdef _KERNEL +#include +#if KSTACK_PAGES < 4 +#define IPSEC_NETISR +#endif #include struct ipsec_ctx_data; @@ -336,6 +340,8 @@ int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, u_int); +void ipsec_netisr_output(struct mbuf *); + extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 392008e9197..0595e2ef8f3 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,82 @@ static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx); +#ifdef IPSEC_NETISR +#define MTAG_IPSEC 1487673374 +struct ipsec_nh_ctx { + struct secpolicy *sp; + struct secasvar *sav; + u_int idx; +}; + +#ifdef INET +static int ipsec4_xform_output(struct mbuf *, struct secpolicy *, + struct secasvar *, u_int); +#endif +#ifdef INET6 +static int ipsec6_xform_output(struct mbuf *, struct secpolicy *, + struct secasvar *, u_int); +#endif + +static int +ipsec_queue_output(struct mbuf *m, struct secpolicy *sp, + struct secasvar *sav, u_int idx) +{ + struct ipsec_nh_ctx *ctx; + struct m_tag *mtag; + + mtag = m_tag_alloc(MTAG_IPSEC, 0, sizeof(*ctx), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return (ENOMEM); + } + m_tag_prepend(m, mtag); + ctx = (struct ipsec_nh_ctx *)(mtag + 1); + ctx->sp = sp; + ctx->sav = sav; + ctx->idx = idx; + return (netisr_queue_src(NETISR_IPSEC, (uintptr_t)sav->spi, m)); +} + +void +ipsec_netisr_output(struct mbuf *m) +{ + struct ipsec_nh_ctx *ctx; + struct secpolicy *sp; + struct secasvar *sav; + struct m_tag *mtag; + + mtag = m_tag_locate(m, MTAG_IPSEC, 0, NULL); + if (mtag == NULL) { + m_freem(m); + return; + } + ctx = (struct ipsec_nh_ctx *)(mtag + 1); + sp = ctx->sp; + sav = ctx->sav; + switch (sav->sah->saidx.dst.sa.sa_family) { +#ifdef INET + case AF_INET: + if (ipsec4_xform_output(m, sp, sav, ctx->idx) == 0) + return; + IPSECSTAT_INC(ips_out_inval); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (ipsec6_xform_output(m, sp, sav, ctx->idx) == 0) + return; + IPSEC6STAT_INC(ips_out_inval); + break; +#endif + default: + m_freem(m); + } + key_freesav(&sav); + key_freesp(&sp); +} +#endif /* IPSEC_NETISR */ + #ifdef INET static struct secasvar * ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) @@ -181,34 +258,14 @@ next: * IPsec output logic for IPv4. */ static int -ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +ipsec4_xform_output(struct mbuf *m, struct secpolicy *sp, + struct secasvar *sav, u_int idx) { - char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; union sockaddr_union *dst; - struct secasvar *sav; struct ip *ip; int error, i, off; - IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); - - /* - * We hold the reference to SP. Content of SP couldn't be changed. - * Craft secasindex and do lookup for suitable SA. - * Then do encapsulation if needed and call xform's output. - * We need to store SP in the xform callback parameters. - * In xform callback we will extract SP and it can be used to - * determine next transform. At the end of transform we can - * release reference to SP. - */ - sav = ipsec4_allocsa(m, sp, &idx, &error); - if (sav == NULL) { - if (error == EJUSTRETURN) { /* No IPsec required */ - key_freesp(&sp); - return (error); - } - goto bad; - } /* * XXXAE: most likely ip_sum at this point is wrong. */ @@ -230,12 +287,8 @@ ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) ip->ip_sum = in_cksum(m, ip->ip_hl << 2); error = ipsec_encap(&m, &sav->sah->saidx); if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); + DPRINTF(("%s: encapsulation for SPI 0x%08x failed\n", + __func__, ntohl(sav->spi))); /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } @@ -273,11 +326,48 @@ ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) goto bad; } error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); - if (error != 0) { - key_freesav(&sav); - key_freesp(&sp); - } + /* mbuf was consumed by xform_output */ + return (error); +bad: + if (m != NULL) + m_freem(m); return (error); +} + +static int +ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + struct secasvar *sav; + int error; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + /* + * We hold the reference to SP. Content of SP couldn't be changed. + * Craft secasindex and do lookup for suitable SA. + * Then do encapsulation if needed and call xform's output. + * We need to store SP in the xform callback parameters. + * In xform callback we will extract SP and it can be used to + * determine next transform. At the end of transform we can + * release reference to SP. + */ + sav = ipsec4_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } +#ifdef IPSEC_NETISR + error = ipsec_queue_output(m, sp, sav, idx); +#else + error = ipsec4_xform_output(m, sp, sav, idx); +#endif /* IPSEC_NETISR */ + if (error == 0) + return (error); + if (error == ENOMEM) + IPSECSTAT_INC(ips_out_nomem); + m = NULL; /* mbuf was consumed by netisr/xform_output */ bad: IPSECSTAT_INC(ips_out_inval); if (m != NULL) @@ -499,26 +589,14 @@ next: * IPsec output logic for IPv6. */ static int -ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +ipsec6_xform_output(struct mbuf *m, struct secpolicy *sp, + struct secasvar *sav, u_int idx) { - char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; union sockaddr_union *dst; - struct secasvar *sav; struct ip6_hdr *ip6; int error, i, off; - IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); - - sav = ipsec6_allocsa(m, sp, &idx, &error); - if (sav == NULL) { - if (error == EJUSTRETURN) { /* No IPsec required */ - key_freesp(&sp); - return (error); - } - goto bad; - } - /* Fix IP length in case if it is not set yet. */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); @@ -543,12 +621,8 @@ ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) } error = ipsec_encap(&m, &sav->sah->saidx); if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); + DPRINTF(("%s: encapsulation for SPI 0x%08x failed\n", + __func__, ntohl(sav->spi))); /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } @@ -581,11 +655,40 @@ ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) goto bad; } error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); - if (error != 0) { - key_freesav(&sav); - key_freesp(&sp); - } + /* mbuf was consumed by xform_output */ + return (error); +bad: + if (m != NULL) + m_freem(m); return (error); +} + +static int +ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + struct secasvar *sav; + int error; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + + sav = ipsec6_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } +#ifdef IPSEC_NETISR + error = ipsec_queue_output(m, sp, sav, idx); +#else + error = ipsec6_xform_output(m, sp, sav, idx); +#endif /* IPSEC_NETISR */ + if (error == 0) + return (error); + if (error == ENOMEM) + IPSEC6STAT_INC(ips_out_nomem); + m = NULL; /* mbuf was consumed by netisr/xform_output */ bad: IPSEC6STAT_INC(ips_out_inval); if (m != NULL) @@ -968,4 +1071,3 @@ ipsec_encap(struct mbuf **mp, struct secasindex *saidx) (*mp)->m_flags &= ~(M_BCAST | M_MCAST); return (0); } -