2019-05-27 08:55:01 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2007-10-08 17:16:30 -07:00
|
|
|
/*
|
|
|
|
* xfrm_output.c - Common IPsec encapsulation code.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/netdevice.h>
|
2007-11-13 21:43:11 -08:00
|
|
|
#include <linux/netfilter.h>
|
2007-10-08 17:16:30 -07:00
|
|
|
#include <linux/skbuff.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
|
|
|
#include <linux/slab.h>
|
2007-10-08 17:16:30 -07:00
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <net/dst.h>
|
2023-06-08 19:17:37 +00:00
|
|
|
#include <net/gso.h>
|
2020-05-04 10:06:03 +02:00
|
|
|
#include <net/icmp.h>
|
2019-03-29 21:16:29 +01:00
|
|
|
#include <net/inet_ecn.h>
|
2007-10-08 17:16:30 -07:00
|
|
|
#include <net/xfrm.h>
|
|
|
|
|
2020-05-04 10:06:08 +02:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
#include <net/ip6_route.h>
|
|
|
|
#include <net/ipv6_stubs.h>
|
|
|
|
#endif
|
|
|
|
|
2019-03-29 21:16:29 +01:00
|
|
|
#include "xfrm_inout.h"
|
|
|
|
|
2015-09-15 20:04:18 -05:00
|
|
|
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
|
2019-03-29 21:16:25 +01:00
|
|
|
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
|
2007-11-13 21:43:43 -08:00
|
|
|
|
2012-03-21 23:32:39 +00:00
|
|
|
static int xfrm_skb_check_space(struct sk_buff *skb)
|
2007-10-08 17:25:08 -07:00
|
|
|
{
|
2009-06-02 05:19:30 +00:00
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
2007-11-13 21:33:01 -08:00
|
|
|
int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
|
2007-10-08 17:25:08 -07:00
|
|
|
- skb_headroom(skb);
|
2008-05-12 20:48:31 -07:00
|
|
|
int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
|
2007-10-08 17:25:08 -07:00
|
|
|
|
2008-09-30 02:03:19 -07:00
|
|
|
if (nhead <= 0) {
|
|
|
|
if (ntail <= 0)
|
|
|
|
return 0;
|
|
|
|
nhead = 0;
|
|
|
|
} else if (ntail < 0)
|
|
|
|
ntail = 0;
|
|
|
|
|
|
|
|
return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
|
2007-10-08 17:25:08 -07:00
|
|
|
}
|
|
|
|
|
2015-05-12 18:29:44 +08:00
|
|
|
/* Children define the path of the packet through the
|
|
|
|
* Linux networking. Thus, destinations are stackable.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
|
|
|
|
{
|
2017-11-28 15:40:22 -05:00
|
|
|
struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb)));
|
2015-05-12 18:29:44 +08:00
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
return child;
|
|
|
|
}
|
|
|
|
|
2019-03-29 21:16:25 +01:00
|
|
|
/* Add encapsulation header.
|
|
|
|
*
|
|
|
|
* The IP header will be moved forward to make space for the encapsulation
|
|
|
|
* header.
|
|
|
|
*/
|
|
|
|
static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct iphdr *iph = ip_hdr(skb);
|
|
|
|
int ihl = iph->ihl * 4;
|
|
|
|
|
|
|
|
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
|
|
|
|
|
|
|
|
skb_set_network_header(skb, -x->props.header_len);
|
|
|
|
skb->mac_header = skb->network_header +
|
|
|
|
offsetof(struct iphdr, protocol);
|
|
|
|
skb->transport_header = skb->network_header + ihl;
|
|
|
|
__skb_pull(skb, ihl);
|
|
|
|
memmove(skb_network_header(skb), iph, ihl);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-11 12:50:11 +02:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6_MIP6)
|
2021-06-11 12:50:14 +02:00
|
|
|
static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type)
|
2021-06-11 12:50:11 +02:00
|
|
|
{
|
|
|
|
const unsigned char *nh = skb_network_header(skb);
|
2021-06-11 12:50:14 +02:00
|
|
|
unsigned int offset = sizeof(struct ipv6hdr);
|
|
|
|
unsigned int packet_len;
|
2021-06-11 12:50:11 +02:00
|
|
|
int found_rhdr = 0;
|
|
|
|
|
2021-06-11 12:50:14 +02:00
|
|
|
packet_len = skb_tail_pointer(skb) - nh;
|
2021-06-11 12:50:11 +02:00
|
|
|
*nexthdr = &ipv6_hdr(skb)->nexthdr;
|
|
|
|
|
2021-06-11 12:50:14 +02:00
|
|
|
while (offset <= packet_len) {
|
|
|
|
struct ipv6_opt_hdr *exthdr;
|
|
|
|
|
2021-06-11 12:50:11 +02:00
|
|
|
switch (**nexthdr) {
|
|
|
|
case NEXTHDR_HOP:
|
|
|
|
break;
|
|
|
|
case NEXTHDR_ROUTING:
|
2021-06-11 12:50:14 +02:00
|
|
|
if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) {
|
|
|
|
struct ipv6_rt_hdr *rt;
|
|
|
|
|
|
|
|
rt = (struct ipv6_rt_hdr *)(nh + offset);
|
|
|
|
if (rt->type != 0)
|
|
|
|
return offset;
|
|
|
|
}
|
2021-06-11 12:50:11 +02:00
|
|
|
found_rhdr = 1;
|
|
|
|
break;
|
|
|
|
case NEXTHDR_DEST:
|
|
|
|
/* HAO MUST NOT appear more than once.
|
|
|
|
* XXX: It is better to try to find by the end of
|
|
|
|
* XXX: packet if HAO exists.
|
|
|
|
*/
|
|
|
|
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
|
|
|
|
net_dbg_ratelimited("mip6: hao exists already, override\n");
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found_rhdr)
|
|
|
|
return offset;
|
|
|
|
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2021-06-11 12:50:14 +02:00
|
|
|
if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
|
|
|
|
return -EINVAL;
|
2021-06-11 12:50:12 +02:00
|
|
|
|
2021-06-11 12:50:14 +02:00
|
|
|
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
|
|
|
|
offset);
|
2021-06-11 12:50:12 +02:00
|
|
|
offset += ipv6_optlen(exthdr);
|
2021-06-11 12:50:14 +02:00
|
|
|
if (offset > IPV6_MAXPLEN)
|
|
|
|
return -EINVAL;
|
2021-06-11 12:50:12 +02:00
|
|
|
*nexthdr = &exthdr->nexthdr;
|
|
|
|
}
|
|
|
|
|
2021-06-11 12:50:14 +02:00
|
|
|
return -EINVAL;
|
2021-06-11 12:50:12 +02:00
|
|
|
}
|
2021-06-11 12:50:11 +02:00
|
|
|
#endif
|
|
|
|
|
2021-06-15 16:27:20 +02:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2021-06-11 12:50:10 +02:00
|
|
|
static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
|
|
|
|
{
|
2021-06-11 12:50:11 +02:00
|
|
|
switch (x->type->proto) {
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6_MIP6)
|
|
|
|
case IPPROTO_DSTOPTS:
|
2021-06-11 12:50:12 +02:00
|
|
|
case IPPROTO_ROUTING:
|
2021-06-11 12:50:14 +02:00
|
|
|
return mip6_rthdr_offset(skb, prevhdr, x->type->proto);
|
2021-06-11 12:50:11 +02:00
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-06-11 12:50:13 +02:00
|
|
|
return ip6_find_1stfragopt(skb, prevhdr);
|
2021-06-11 12:50:10 +02:00
|
|
|
}
|
2021-06-15 16:27:20 +02:00
|
|
|
#endif
|
2021-06-11 12:50:10 +02:00
|
|
|
|
2019-03-29 21:16:25 +01:00
|
|
|
/* Add encapsulation header.
|
|
|
|
*
|
|
|
|
* The IP header and mutable extension headers will be moved forward to make
|
|
|
|
* space for the encapsulation header.
|
|
|
|
*/
|
|
|
|
static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
2019-03-29 21:16:31 +01:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2019-03-29 21:16:25 +01:00
|
|
|
struct ipv6hdr *iph;
|
|
|
|
u8 *prevhdr;
|
|
|
|
int hdr_len;
|
|
|
|
|
|
|
|
iph = ipv6_hdr(skb);
|
|
|
|
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
|
|
|
|
|
2021-06-11 12:50:10 +02:00
|
|
|
hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
|
2019-03-29 21:16:25 +01:00
|
|
|
if (hdr_len < 0)
|
|
|
|
return hdr_len;
|
|
|
|
skb_set_mac_header(skb,
|
|
|
|
(prevhdr - x->props.header_len) - skb->data);
|
|
|
|
skb_set_network_header(skb, -x->props.header_len);
|
|
|
|
skb->transport_header = skb->network_header + hdr_len;
|
|
|
|
__skb_pull(skb, hdr_len);
|
|
|
|
memmove(ipv6_hdr(skb), iph, hdr_len);
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
WARN_ON_ONCE(1);
|
2019-03-29 21:16:31 +01:00
|
|
|
return -EAFNOSUPPORT;
|
2019-03-29 21:16:25 +01:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add route optimization header space.
|
|
|
|
*
|
|
|
|
* The IP header and mutable extension headers will be moved forward to make
|
|
|
|
* space for the route optimization header.
|
|
|
|
*/
|
|
|
|
static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
2019-03-29 21:16:31 +01:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2019-03-29 21:16:25 +01:00
|
|
|
struct ipv6hdr *iph;
|
|
|
|
u8 *prevhdr;
|
|
|
|
int hdr_len;
|
|
|
|
|
|
|
|
iph = ipv6_hdr(skb);
|
|
|
|
|
2021-06-11 12:50:10 +02:00
|
|
|
hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
|
2019-03-29 21:16:25 +01:00
|
|
|
if (hdr_len < 0)
|
|
|
|
return hdr_len;
|
|
|
|
skb_set_mac_header(skb,
|
|
|
|
(prevhdr - x->props.header_len) - skb->data);
|
|
|
|
skb_set_network_header(skb, -x->props.header_len);
|
|
|
|
skb->transport_header = skb->network_header + hdr_len;
|
|
|
|
__skb_pull(skb, hdr_len);
|
|
|
|
memmove(ipv6_hdr(skb), iph, hdr_len);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
WARN_ON_ONCE(1);
|
2019-03-29 21:16:31 +01:00
|
|
|
return -EAFNOSUPPORT;
|
2019-03-29 21:16:25 +01:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2019-03-29 21:16:29 +01:00
|
|
|
/* Add encapsulation header.
|
|
|
|
*
|
|
|
|
* The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
|
|
|
|
*/
|
|
|
|
static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct ip_beet_phdr *ph;
|
|
|
|
struct iphdr *top_iph;
|
|
|
|
int hdrlen, optlen;
|
|
|
|
|
|
|
|
hdrlen = 0;
|
|
|
|
optlen = XFRM_MODE_SKB_CB(skb)->optlen;
|
|
|
|
if (unlikely(optlen))
|
|
|
|
hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
|
|
|
|
|
|
|
|
skb_set_network_header(skb, -x->props.header_len - hdrlen +
|
|
|
|
(XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
|
|
|
|
if (x->sel.family != AF_INET6)
|
|
|
|
skb->network_header += IPV4_BEET_PHMAXLEN;
|
|
|
|
skb->mac_header = skb->network_header +
|
|
|
|
offsetof(struct iphdr, protocol);
|
|
|
|
skb->transport_header = skb->network_header + sizeof(*top_iph);
|
|
|
|
|
|
|
|
xfrm4_beet_make_header(skb);
|
|
|
|
|
|
|
|
ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
|
|
|
|
|
|
|
|
top_iph = ip_hdr(skb);
|
|
|
|
|
|
|
|
if (unlikely(optlen)) {
|
|
|
|
if (WARN_ON(optlen < 0))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ph->padlen = 4 - (optlen & 4);
|
|
|
|
ph->hdrlen = optlen / 8;
|
|
|
|
ph->nexthdr = top_iph->protocol;
|
|
|
|
if (ph->padlen)
|
|
|
|
memset(ph + 1, IPOPT_NOP, ph->padlen);
|
|
|
|
|
|
|
|
top_iph->protocol = IPPROTO_BEETPH;
|
|
|
|
top_iph->ihl = sizeof(struct iphdr) / 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
top_iph->saddr = x->props.saddr.a4;
|
|
|
|
top_iph->daddr = x->id.daddr.a4;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add encapsulation header.
|
|
|
|
*
|
|
|
|
* The top IP header will be constructed per RFC 2401.
|
|
|
|
*/
|
|
|
|
static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
2022-05-18 14:05:48 -07:00
|
|
|
bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU);
|
2019-03-29 21:16:29 +01:00
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
|
|
struct iphdr *top_iph;
|
|
|
|
int flags;
|
|
|
|
|
|
|
|
skb_set_inner_network_header(skb, skb_network_offset(skb));
|
|
|
|
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
|
|
|
|
|
|
|
|
skb_set_network_header(skb, -x->props.header_len);
|
|
|
|
skb->mac_header = skb->network_header +
|
|
|
|
offsetof(struct iphdr, protocol);
|
|
|
|
skb->transport_header = skb->network_header + sizeof(*top_iph);
|
|
|
|
top_iph = ip_hdr(skb);
|
|
|
|
|
|
|
|
top_iph->ihl = 5;
|
|
|
|
top_iph->version = 4;
|
|
|
|
|
|
|
|
top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
|
|
|
|
|
|
|
|
/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
|
|
|
|
if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
|
|
|
|
top_iph->tos = 0;
|
|
|
|
else
|
|
|
|
top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
|
|
|
|
top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
|
|
|
|
XFRM_MODE_SKB_CB(skb)->tos);
|
|
|
|
|
|
|
|
flags = x->props.flags;
|
|
|
|
if (flags & XFRM_STATE_NOECN)
|
|
|
|
IP_ECN_clear(top_iph);
|
|
|
|
|
2022-05-18 14:05:48 -07:00
|
|
|
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ?
|
2019-03-29 21:16:29 +01:00
|
|
|
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
|
|
|
|
|
|
|
|
top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
|
|
|
|
|
|
|
|
top_iph->saddr = x->props.saddr.a4;
|
|
|
|
top_iph->daddr = x->id.daddr.a4;
|
|
|
|
ip_select_ident(dev_net(dst->dev), skb, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
|
|
struct ipv6hdr *top_iph;
|
|
|
|
int dsfield;
|
|
|
|
|
|
|
|
skb_set_inner_network_header(skb, skb_network_offset(skb));
|
|
|
|
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
|
|
|
|
|
|
|
|
skb_set_network_header(skb, -x->props.header_len);
|
|
|
|
skb->mac_header = skb->network_header +
|
|
|
|
offsetof(struct ipv6hdr, nexthdr);
|
|
|
|
skb->transport_header = skb->network_header + sizeof(*top_iph);
|
|
|
|
top_iph = ipv6_hdr(skb);
|
|
|
|
|
|
|
|
top_iph->version = 6;
|
|
|
|
|
|
|
|
memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
|
|
|
|
sizeof(top_iph->flow_lbl));
|
|
|
|
top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
|
|
|
|
|
|
|
|
if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
|
|
|
|
dsfield = 0;
|
|
|
|
else
|
|
|
|
dsfield = XFRM_MODE_SKB_CB(skb)->tos;
|
|
|
|
dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
|
|
|
|
if (x->props.flags & XFRM_STATE_NOECN)
|
|
|
|
dsfield &= ~INET_ECN_MASK;
|
|
|
|
ipv6_change_dsfield(top_iph, 0, dsfield);
|
|
|
|
top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
|
|
|
|
top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
|
|
|
|
top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct ipv6hdr *top_iph;
|
|
|
|
struct ip_beet_phdr *ph;
|
|
|
|
int optlen, hdr_len;
|
|
|
|
|
|
|
|
hdr_len = 0;
|
|
|
|
optlen = XFRM_MODE_SKB_CB(skb)->optlen;
|
|
|
|
if (unlikely(optlen))
|
|
|
|
hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
|
|
|
|
|
|
|
|
skb_set_network_header(skb, -x->props.header_len - hdr_len);
|
|
|
|
if (x->sel.family != AF_INET6)
|
|
|
|
skb->network_header += IPV4_BEET_PHMAXLEN;
|
|
|
|
skb->mac_header = skb->network_header +
|
|
|
|
offsetof(struct ipv6hdr, nexthdr);
|
|
|
|
skb->transport_header = skb->network_header + sizeof(*top_iph);
|
|
|
|
ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
|
|
|
|
|
|
|
|
xfrm6_beet_make_header(skb);
|
|
|
|
|
|
|
|
top_iph = ipv6_hdr(skb);
|
|
|
|
if (unlikely(optlen)) {
|
|
|
|
if (WARN_ON(optlen < 0))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ph->padlen = 4 - (optlen & 4);
|
|
|
|
ph->hdrlen = optlen / 8;
|
|
|
|
ph->nexthdr = top_iph->nexthdr;
|
|
|
|
if (ph->padlen)
|
|
|
|
memset(ph + 1, IPOPT_NOP, ph->padlen);
|
|
|
|
|
|
|
|
top_iph->nexthdr = IPPROTO_BEETPH;
|
|
|
|
}
|
|
|
|
|
|
|
|
top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
|
|
|
|
top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Add encapsulation header.
|
|
|
|
*
|
|
|
|
* On exit, the transport header will be set to the start of the
|
|
|
|
* encapsulation header to be filled in by x->type->output and the mac
|
|
|
|
* header will be set to the nextheader (protocol for IPv4) field of the
|
|
|
|
* extension header directly preceding the encapsulation header, or in
|
|
|
|
* its absence, that of the top IP header.
|
|
|
|
* The value of the network header will always point to the top IP header
|
|
|
|
* while skb->data will point to the payload.
|
|
|
|
*/
|
2019-03-29 21:16:25 +01:00
|
|
|
static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = xfrm_inner_extract_output(x, skb);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
|
|
|
|
skb->protocol = htons(ETH_P_IP);
|
|
|
|
|
2023-03-10 17:40:32 +08:00
|
|
|
switch (x->props.mode) {
|
2019-03-29 21:16:29 +01:00
|
|
|
case XFRM_MODE_BEET:
|
|
|
|
return xfrm4_beet_encap_add(x, skb);
|
|
|
|
case XFRM_MODE_TUNNEL:
|
|
|
|
return xfrm4_tunnel_encap_add(x, skb);
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return -EOPNOTSUPP;
|
2019-03-29 21:16:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = xfrm_inner_extract_output(x, skb);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
skb->ignore_df = 1;
|
|
|
|
skb->protocol = htons(ETH_P_IPV6);
|
|
|
|
|
2023-03-10 17:40:32 +08:00
|
|
|
switch (x->props.mode) {
|
2019-03-29 21:16:29 +01:00
|
|
|
case XFRM_MODE_BEET:
|
|
|
|
return xfrm6_beet_encap_add(x, skb);
|
|
|
|
case XFRM_MODE_TUNNEL:
|
|
|
|
return xfrm6_tunnel_encap_add(x, skb);
|
|
|
|
default:
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2019-03-29 21:16:25 +01:00
|
|
|
#endif
|
2019-03-29 21:16:29 +01:00
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return -EAFNOSUPPORT;
|
2019-03-29 21:16:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
2023-03-10 17:40:32 +08:00
|
|
|
switch (x->props.mode) {
|
2019-03-29 21:16:25 +01:00
|
|
|
case XFRM_MODE_BEET:
|
|
|
|
case XFRM_MODE_TUNNEL:
|
2023-03-10 17:40:32 +08:00
|
|
|
if (x->props.family == AF_INET)
|
2019-03-29 21:16:25 +01:00
|
|
|
return xfrm4_prepare_output(x, skb);
|
2023-03-10 17:40:32 +08:00
|
|
|
if (x->props.family == AF_INET6)
|
2019-03-29 21:16:25 +01:00
|
|
|
return xfrm6_prepare_output(x, skb);
|
|
|
|
break;
|
|
|
|
case XFRM_MODE_TRANSPORT:
|
2023-03-10 17:40:32 +08:00
|
|
|
if (x->props.family == AF_INET)
|
2019-03-29 21:16:25 +01:00
|
|
|
return xfrm4_transport_output(x, skb);
|
2023-03-10 17:40:32 +08:00
|
|
|
if (x->props.family == AF_INET6)
|
2019-03-29 21:16:25 +01:00
|
|
|
return xfrm6_transport_output(x, skb);
|
|
|
|
break;
|
|
|
|
case XFRM_MODE_ROUTEOPTIMIZATION:
|
2023-03-10 17:40:32 +08:00
|
|
|
if (x->props.family == AF_INET6)
|
2019-03-29 21:16:25 +01:00
|
|
|
return xfrm6_ro_output(x, skb);
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
break;
|
|
|
|
default:
|
2024-11-14 02:07:01 -05:00
|
|
|
if (x->mode_cbs && x->mode_cbs->prepare_output)
|
|
|
|
return x->mode_cbs->prepare_output(x, skb);
|
2019-03-29 21:16:25 +01:00
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_NET_PKTGEN)
|
|
|
|
int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return xfrm_outer_mode_output(x, skb);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output);
|
|
|
|
#endif
|
|
|
|
|
2007-11-13 21:43:43 -08:00
|
|
|
static int xfrm_output_one(struct sk_buff *skb, int err)
|
2007-10-08 17:16:30 -07:00
|
|
|
{
|
2009-06-02 05:19:30 +00:00
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
2007-10-08 17:16:30 -07:00
|
|
|
struct xfrm_state *x = dst->xfrm;
|
2008-11-25 17:38:20 -08:00
|
|
|
struct net *net = xs_net(x);
|
2007-10-08 17:16:30 -07:00
|
|
|
|
2022-12-02 20:41:30 +02:00
|
|
|
if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
|
2007-11-13 21:43:43 -08:00
|
|
|
goto resume;
|
2007-10-08 17:16:30 -07:00
|
|
|
|
|
|
|
do {
|
2012-03-21 23:32:39 +00:00
|
|
|
err = xfrm_skb_check_space(skb);
|
2007-12-24 16:00:09 -08:00
|
|
|
if (err) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
2007-12-18 22:14:25 -08:00
|
|
|
goto error_nolock;
|
2007-12-24 16:00:09 -08:00
|
|
|
}
|
2007-12-18 22:14:25 -08:00
|
|
|
|
2018-06-12 12:44:26 +02:00
|
|
|
skb->mark = xfrm_smark_get(skb->mark, x);
|
net: xfrm: support setting an output mark.
On systems that use mark-based routing it may be necessary for
routing lookups to use marks in order for packets to be routed
correctly. An example of such a system is Android, which uses
socket marks to route packets via different networks.
Currently, routing lookups in tunnel mode always use a mark of
zero, making routing incorrect on such systems.
This patch adds a new output_mark element to the xfrm state and
a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output
mark differs from the existing xfrm mark in two ways:
1. The xfrm mark is used to match xfrm policies and states, while
the xfrm output mark is used to set the mark (and influence
the routing) of the packets emitted by those states.
2. The existing mark is constrained to be a subset of the bits of
the originating socket or transformed packet, but the output
mark is arbitrary and depends only on the state.
The use of a separate mark provides additional flexibility. For
example:
- A packet subject to two transforms (e.g., transport mode inside
tunnel mode) can have two different output marks applied to it,
one for the transport mode SA and one for the tunnel mode SA.
- On a system where socket marks determine routing, the packets
emitted by an IPsec tunnel can be routed based on a mark that
is determined by the tunnel, not by the marks of the
unencrypted packets.
- Support for setting the output marks can be introduced without
breaking any existing setups that employ both mark-based
routing and xfrm tunnel mode. Simply changing the code to use
the xfrm mark for routing output packets could xfrm mark could
change behaviour in a way that breaks these setups.
If the output mark is unspecified or set to zero, the mark is not
set or changed.
Tested: make allyesconfig; make -j64
Tested: https://android-review.googlesource.com/452776
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2017-08-11 02:11:33 +09:00
|
|
|
|
2019-03-29 21:16:25 +01:00
|
|
|
err = xfrm_outer_mode_output(x, skb);
|
2007-12-24 16:00:09 -08:00
|
|
|
if (err) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
|
2007-12-18 22:14:25 -08:00
|
|
|
goto error_nolock;
|
2007-12-24 16:00:09 -08:00
|
|
|
}
|
2007-11-13 21:39:38 -08:00
|
|
|
|
2007-10-08 17:16:30 -07:00
|
|
|
spin_lock_bh(&x->lock);
|
2012-12-28 16:06:28 +08:00
|
|
|
|
|
|
|
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
|
2013-05-22 01:40:47 +00:00
|
|
|
err = -EINVAL;
|
2013-02-01 13:17:16 +08:00
|
|
|
goto error;
|
2012-12-28 16:06:28 +08:00
|
|
|
}
|
|
|
|
|
2007-12-18 22:14:25 -08:00
|
|
|
err = xfrm_state_check_expire(x);
|
2007-12-24 16:00:09 -08:00
|
|
|
if (err) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);
|
2007-10-08 17:16:30 -07:00
|
|
|
goto error;
|
2007-12-24 16:00:09 -08:00
|
|
|
}
|
2007-10-08 17:16:30 -07:00
|
|
|
|
2021-06-18 15:52:00 +02:00
|
|
|
err = xfrm_replay_overflow(x, skb);
|
2011-03-08 00:08:32 +00:00
|
|
|
if (err) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
|
|
|
|
goto error;
|
2007-10-08 17:25:53 -07:00
|
|
|
}
|
|
|
|
|
2007-10-08 17:16:30 -07:00
|
|
|
x->curlft.bytes += skb->len;
|
|
|
|
x->curlft.packets++;
|
2022-10-21 15:42:01 +02:00
|
|
|
x->lastused = ktime_get_real_seconds();
|
2007-10-08 17:16:30 -07:00
|
|
|
|
|
|
|
spin_unlock_bh(&x->lock);
|
|
|
|
|
2011-03-15 21:08:28 +00:00
|
|
|
skb_dst_force(skb);
|
2018-09-11 10:31:15 +02:00
|
|
|
if (!skb_dst(skb)) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
2018-10-27 06:12:06 +00:00
|
|
|
err = -EHOSTUNREACH;
|
2018-09-11 10:31:15 +02:00
|
|
|
goto error_nolock;
|
|
|
|
}
|
2011-03-15 21:08:28 +00:00
|
|
|
|
2017-04-14 10:06:10 +02:00
|
|
|
if (xfrm_offload(skb)) {
|
|
|
|
x->type_offload->encap(x, skb);
|
|
|
|
} else {
|
2017-10-30 10:04:04 +01:00
|
|
|
/* Inner headers are invalid now. */
|
|
|
|
skb->encapsulation = 0;
|
|
|
|
|
2017-04-14 10:06:10 +02:00
|
|
|
err = x->type->output(x, skb);
|
|
|
|
if (err == -EINPROGRESS)
|
|
|
|
goto out;
|
|
|
|
}
|
2007-11-13 21:43:43 -08:00
|
|
|
|
|
|
|
resume:
|
2007-12-20 20:43:36 -08:00
|
|
|
if (err) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR);
|
2007-10-09 13:33:35 -07:00
|
|
|
goto error_nolock;
|
2007-12-20 20:43:36 -08:00
|
|
|
}
|
2007-10-09 13:33:35 -07:00
|
|
|
|
2010-06-04 01:57:38 +00:00
|
|
|
dst = skb_dst_pop(skb);
|
2009-06-02 05:19:30 +00:00
|
|
|
if (!dst) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
2007-10-08 17:16:30 -07:00
|
|
|
err = -EHOSTUNREACH;
|
|
|
|
goto error_nolock;
|
|
|
|
}
|
2011-03-15 21:09:32 +00:00
|
|
|
skb_dst_set(skb, dst);
|
2007-10-08 17:16:30 -07:00
|
|
|
x = dst->xfrm;
|
2019-03-29 21:16:32 +01:00
|
|
|
} while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL));
|
2007-10-08 17:16:30 -07:00
|
|
|
|
2013-06-01 16:23:15 +00:00
|
|
|
return 0;
|
2007-10-08 17:16:30 -07:00
|
|
|
|
|
|
|
error:
|
|
|
|
spin_unlock_bh(&x->lock);
|
2007-11-13 21:43:11 -08:00
|
|
|
error_nolock:
|
|
|
|
kfree_skb(skb);
|
2013-06-01 16:23:15 +00:00
|
|
|
out:
|
|
|
|
return err;
|
2007-11-13 21:43:11 -08:00
|
|
|
}
|
|
|
|
|
2021-03-02 08:00:04 +13:00
|
|
|
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
|
2007-11-13 21:43:11 -08:00
|
|
|
{
|
2015-09-15 20:04:16 -05:00
|
|
|
struct net *net = xs_net(skb_dst(skb)->xfrm);
|
2015-09-17 17:21:31 -05:00
|
|
|
|
2007-11-13 21:43:43 -08:00
|
|
|
while (likely((err = xfrm_output_one(skb, err)) == 0)) {
|
2019-09-29 20:54:03 +02:00
|
|
|
nf_reset_ct(skb);
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2021-03-02 08:00:04 +13:00
|
|
|
err = skb_dst(skb)->ops->local_out(net, sk, skb);
|
2007-11-13 21:43:11 -08:00
|
|
|
if (unlikely(err != 1))
|
2007-11-13 21:43:43 -08:00
|
|
|
goto out;
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2009-06-02 05:19:30 +00:00
|
|
|
if (!skb_dst(skb)->xfrm)
|
2021-03-02 08:00:04 +13:00
|
|
|
return dst_output(net, sk, skb);
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2009-06-02 05:19:30 +00:00
|
|
|
err = nf_hook(skb_dst(skb)->ops->family,
|
2021-03-02 08:00:04 +13:00
|
|
|
NF_INET_POST_ROUTING, net, sk, skb,
|
2009-06-02 05:19:30 +00:00
|
|
|
NULL, skb_dst(skb)->dev, xfrm_output2);
|
2007-11-13 21:43:11 -08:00
|
|
|
if (unlikely(err != 1))
|
2007-11-13 21:43:43 -08:00
|
|
|
goto out;
|
2007-11-13 21:43:11 -08:00
|
|
|
}
|
|
|
|
|
2007-11-13 21:43:43 -08:00
|
|
|
if (err == -EINPROGRESS)
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
out:
|
2007-11-13 21:43:11 -08:00
|
|
|
return err;
|
|
|
|
}
|
2007-11-13 21:43:43 -08:00
|
|
|
EXPORT_SYMBOL_GPL(xfrm_output_resume);
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2025-02-19 12:20:37 +02:00
|
|
|
static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x,
|
|
|
|
struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
|
|
struct net *net = xs_net(x);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
dst = skb_dst_pop(skb);
|
|
|
|
if (!dst) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EHOSTUNREACH;
|
|
|
|
}
|
|
|
|
skb_dst_set(skb, dst);
|
|
|
|
nf_reset_ct(skb);
|
|
|
|
|
|
|
|
err = skb_dst(skb)->ops->local_out(net, sk, skb);
|
|
|
|
if (unlikely(err != 1)) {
|
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* In transport mode, network destination is
|
|
|
|
* directly reachable, while in tunnel mode,
|
|
|
|
* inner packet network may not be. In packet
|
|
|
|
* offload type, HW is responsible for hard
|
|
|
|
* header packet mangling so directly xmit skb
|
|
|
|
* to netdevice.
|
|
|
|
*/
|
|
|
|
skb->dev = x->xso.dev;
|
|
|
|
__skb_push(skb, skb->dev->hard_header_len);
|
|
|
|
return dev_queue_xmit(skb);
|
|
|
|
}
|
|
|
|
|
2015-09-15 20:04:18 -05:00
|
|
|
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
|
2007-11-13 21:43:11 -08:00
|
|
|
{
|
2021-03-02 08:00:04 +13:00
|
|
|
return xfrm_output_resume(sk, skb, 1);
|
2007-11-13 21:43:43 -08:00
|
|
|
}
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2015-09-15 20:04:18 -05:00
|
|
|
static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
|
2007-11-13 21:43:43 -08:00
|
|
|
{
|
2020-01-13 18:42:28 -05:00
|
|
|
struct sk_buff *segs, *nskb;
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2020-03-26 15:33:14 +08:00
|
|
|
BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET);
|
|
|
|
BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET);
|
2007-11-13 21:43:11 -08:00
|
|
|
segs = skb_gso_segment(skb, 0);
|
|
|
|
kfree_skb(skb);
|
2008-04-29 01:03:09 -07:00
|
|
|
if (IS_ERR(segs))
|
2007-11-13 21:43:11 -08:00
|
|
|
return PTR_ERR(segs);
|
2014-10-20 13:49:17 +02:00
|
|
|
if (segs == NULL)
|
|
|
|
return -EINVAL;
|
2007-11-13 21:43:11 -08:00
|
|
|
|
2020-01-13 18:42:28 -05:00
|
|
|
skb_list_walk_safe(segs, segs, nskb) {
|
2007-11-13 21:43:11 -08:00
|
|
|
int err;
|
|
|
|
|
2018-07-29 20:42:53 -07:00
|
|
|
skb_mark_not_on_list(segs);
|
2015-09-15 20:04:18 -05:00
|
|
|
err = xfrm_output2(net, sk, segs);
|
2007-11-13 21:43:11 -08:00
|
|
|
|
|
|
|
if (unlikely(err)) {
|
2014-09-10 01:08:46 +02:00
|
|
|
kfree_skb_list(nskb);
|
2007-11-13 21:43:11 -08:00
|
|
|
return err;
|
|
|
|
}
|
2020-01-13 18:42:28 -05:00
|
|
|
}
|
2007-11-13 21:43:11 -08:00
|
|
|
|
|
|
|
return 0;
|
2007-10-08 17:16:30 -07:00
|
|
|
}
|
2007-11-13 21:43:43 -08:00
|
|
|
|
2021-06-14 17:33:48 +03:00
|
|
|
/* For partial checksum offload, the outer header checksum is calculated
|
|
|
|
* by software and the inner header checksum is calculated by hardware.
|
|
|
|
* This requires hardware to know the inner packet type to calculate
|
|
|
|
* the inner header checksum. Save inner ip protocol here to avoid
|
|
|
|
* traversing the packet in the vendor's xmit code.
|
2022-01-03 13:19:29 +02:00
|
|
|
* For IPsec tunnel mode save the ip protocol from the IP header of the
|
|
|
|
* plain text packet. Otherwise If the encap type is IPIP, just save
|
|
|
|
* skb->inner_ipproto in any other case get the ip protocol from the IP
|
|
|
|
* header.
|
2021-06-14 17:33:48 +03:00
|
|
|
*/
|
2022-01-03 13:19:29 +02:00
|
|
|
static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x)
|
2021-06-14 17:33:48 +03:00
|
|
|
{
|
|
|
|
struct xfrm_offload *xo = xfrm_offload(skb);
|
|
|
|
const struct ethhdr *eth;
|
|
|
|
|
|
|
|
if (!xo)
|
|
|
|
return;
|
|
|
|
|
2022-01-03 13:19:29 +02:00
|
|
|
if (x->outer_mode.encap == XFRM_MODE_TUNNEL) {
|
|
|
|
switch (x->outer_mode.family) {
|
|
|
|
case AF_INET:
|
|
|
|
xo->inner_ipproto = ip_hdr(skb)->protocol;
|
|
|
|
break;
|
|
|
|
case AF_INET6:
|
|
|
|
xo->inner_ipproto = ipv6_hdr(skb)->nexthdr;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2024-11-14 02:07:02 -05:00
|
|
|
if (x->outer_mode.encap == XFRM_MODE_IPTFS) {
|
|
|
|
xo->inner_ipproto = IPPROTO_AGGFRAG;
|
|
|
|
return;
|
|
|
|
}
|
2022-01-03 13:19:29 +02:00
|
|
|
|
|
|
|
/* non-Tunnel Mode */
|
|
|
|
if (!skb->encapsulation)
|
|
|
|
return;
|
|
|
|
|
2021-06-14 17:33:48 +03:00
|
|
|
if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
|
|
|
|
xo->inner_ipproto = skb->inner_ipproto;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
|
|
|
|
return;
|
|
|
|
|
|
|
|
eth = (struct ethhdr *)skb_inner_mac_header(skb);
|
|
|
|
|
|
|
|
switch (ntohs(eth->h_proto)) {
|
|
|
|
case ETH_P_IPV6:
|
|
|
|
xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr;
|
|
|
|
break;
|
|
|
|
case ETH_P_IP:
|
|
|
|
xo->inner_ipproto = inner_ip_hdr(skb)->protocol;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-05 22:19:04 -04:00
|
|
|
int xfrm_output(struct sock *sk, struct sk_buff *skb)
|
2007-11-13 21:43:43 -08:00
|
|
|
{
|
2009-06-02 05:19:30 +00:00
|
|
|
struct net *net = dev_net(skb_dst(skb)->dev);
|
2017-04-14 10:06:10 +02:00
|
|
|
struct xfrm_state *x = skb_dst(skb)->xfrm;
|
2024-03-04 12:24:09 +00:00
|
|
|
int family;
|
2007-11-13 21:43:43 -08:00
|
|
|
int err;
|
|
|
|
|
2024-03-04 12:24:09 +00:00
|
|
|
family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family
|
|
|
|
: skb_dst(skb)->ops->family;
|
|
|
|
|
|
|
|
switch (family) {
|
2020-05-04 10:06:09 +02:00
|
|
|
case AF_INET:
|
|
|
|
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
|
|
|
|
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
|
|
|
|
break;
|
|
|
|
case AF_INET6:
|
|
|
|
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
|
|
|
|
|
|
|
|
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-12-02 20:41:30 +02:00
|
|
|
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
|
|
|
|
if (!xfrm_dev_offload_ok(skb, x)) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -EHOSTUNREACH;
|
|
|
|
}
|
|
|
|
|
2025-02-19 12:20:37 +02:00
|
|
|
/* Exclusive direct xmit for tunnel mode, as
|
|
|
|
* some filtering or matching rules may apply
|
|
|
|
* in transport mode.
|
|
|
|
*/
|
|
|
|
if (x->props.mode == XFRM_MODE_TUNNEL)
|
|
|
|
return xfrm_dev_direct_output(sk, x, skb);
|
|
|
|
|
2022-12-02 20:41:30 +02:00
|
|
|
return xfrm_output_resume(sk, skb, 0);
|
|
|
|
}
|
|
|
|
|
2017-04-14 10:06:10 +02:00
|
|
|
secpath_reset(skb);
|
|
|
|
|
|
|
|
if (xfrm_dev_offload_ok(skb, x)) {
|
|
|
|
struct sec_path *sp;
|
|
|
|
|
2018-12-18 17:15:26 +01:00
|
|
|
sp = secpath_set(skb);
|
2017-04-14 10:06:10 +02:00
|
|
|
if (!sp) {
|
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
|
|
|
kfree_skb(skb);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
sp->olen++;
|
2018-12-18 17:15:26 +01:00
|
|
|
sp->xvec[sp->len++] = x;
|
2017-04-14 10:06:10 +02:00
|
|
|
xfrm_state_hold(x);
|
|
|
|
|
2022-01-03 13:19:29 +02:00
|
|
|
xfrm_get_inner_ipproto(skb, x);
|
2021-06-14 17:33:48 +03:00
|
|
|
skb->encapsulation = 1;
|
|
|
|
|
2017-04-14 10:06:10 +02:00
|
|
|
if (skb_is_gso(skb)) {
|
xfrm_output: Force software GSO only in tunnel mode
The cited commit fixed a software GSO bug with VXLAN + IPSec in tunnel
mode. Unfortunately, it is slightly broader than necessary, as it also
severely affects performance for Geneve + IPSec transport mode over a
device capable of both HW GSO and IPSec crypto offload. In this case,
xfrm_output unnecessarily triggers software GSO instead of letting the
HW do it. In simple iperf3 tests over Geneve + IPSec transport mode over
a back-2-back pair of NICs with MTU 1500, the performance was observed
to be up to 6x worse when doing software GSO compared to leaving it to
the hardware.
This commit makes xfrm_output only trigger software GSO in crypto
offload cases for already encapsulated packets in tunnel mode, as not
doing so would then cause the inner tunnel skb->inner_networking_header
to be overwritten and break software GSO for that packet later if the
device turns out to not be capable of HW GSO.
Taking a closer look at the conditions for the original bug, to better
understand the reasons for this change:
- vxlan_build_skb -> iptunnel_handle_offloads sets inner_protocol and
inner network header.
- then, udp_tunnel_xmit_skb -> ip_tunnel_xmit adds outer transport and
network headers.
- later in the xmit path, xfrm_output -> xfrm_outer_mode_output ->
xfrm4_prepare_output -> xfrm4_tunnel_encap_add overwrites the inner
network header with the one set in ip_tunnel_xmit before adding the
second outer header.
- __dev_queue_xmit -> validate_xmit_skb checks whether GSO segmentation
needs to happen based on dev features. In the original bug, the hw
couldn't segment the packets, so skb_gso_segment was invoked.
- deep in the .gso_segment callback machinery, __skb_udp_tunnel_segment
tries to use the wrong inner network header, expecting the one set in
iptunnel_handle_offloads but getting the one set by xfrm instead.
- a bit later, ipv6_gso_segment accesses the wrong memory based on that
wrong inner network header.
With the new change, the original bug (or similar ones) cannot happen
again, as xfrm will now trigger software GSO before applying a tunnel.
This concern doesn't exist in packet offload mode, when the HW adds
encapsulation headers. For the non-offloaded packets (crypto in SW),
software GSO is still done unconditionally in the else branch.
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Yael Chemla <ychemla@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Fixes: a204aef9fd77 ("xfrm: call xfrm_output_gso when inner_protocol is set in xfrm_output")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-02-19 12:52:48 +02:00
|
|
|
if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL)
|
2020-04-20 21:51:09 +08:00
|
|
|
return xfrm_output_gso(net, sk, skb);
|
2017-04-14 10:06:10 +02:00
|
|
|
|
2020-04-20 21:51:09 +08:00
|
|
|
skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
|
|
|
|
goto out;
|
2017-04-14 10:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)
|
|
|
|
goto out;
|
2020-04-20 21:51:09 +08:00
|
|
|
} else {
|
|
|
|
if (skb_is_gso(skb))
|
|
|
|
return xfrm_output_gso(net, sk, skb);
|
2017-04-14 10:06:10 +02:00
|
|
|
}
|
|
|
|
|
2007-11-13 21:43:43 -08:00
|
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
|
|
err = skb_checksum_help(skb);
|
|
|
|
if (err) {
|
2008-11-25 17:59:52 -08:00
|
|
|
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
|
2007-11-13 21:43:43 -08:00
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-14 10:06:10 +02:00
|
|
|
out:
|
2015-09-15 20:04:18 -05:00
|
|
|
return xfrm_output2(net, sk, skb);
|
2007-11-13 21:43:43 -08:00
|
|
|
}
|
2014-05-12 19:54:34 +02:00
|
|
|
EXPORT_SYMBOL_GPL(xfrm_output);
|
2008-03-24 14:51:51 -07:00
|
|
|
|
2025-02-19 15:51:01 +02:00
|
|
|
int xfrm4_tunnel_check_size(struct sk_buff *skb)
|
2020-05-04 10:06:03 +02:00
|
|
|
{
|
|
|
|
int mtu, ret = 0;
|
|
|
|
|
|
|
|
if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
mtu = dst_mtu(skb_dst(skb));
|
|
|
|
if ((!skb_is_gso(skb) && skb->len > mtu) ||
|
|
|
|
(skb_is_gso(skb) &&
|
|
|
|
!skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
|
|
|
|
skb->protocol = htons(ETH_P_IP);
|
|
|
|
|
2025-01-16 11:46:03 +01:00
|
|
|
if (skb->sk && sk_fullsock(skb->sk))
|
2020-05-04 10:06:03 +02:00
|
|
|
xfrm_local_error(skb, mtu);
|
|
|
|
else
|
|
|
|
icmp_send(skb, ICMP_DEST_UNREACH,
|
|
|
|
ICMP_FRAG_NEEDED, htonl(mtu));
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
2025-02-19 15:51:01 +02:00
|
|
|
EXPORT_SYMBOL_GPL(xfrm4_tunnel_check_size);
|
2020-05-04 10:06:03 +02:00
|
|
|
|
|
|
|
static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2021-03-19 18:27:58 +08:00
|
|
|
if (x->outer_mode.encap == XFRM_MODE_BEET &&
|
|
|
|
ip_is_fragment(ip_hdr(skb))) {
|
|
|
|
net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n");
|
|
|
|
return -EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
|
2020-05-04 10:06:03 +02:00
|
|
|
err = xfrm4_tunnel_check_size(skb);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
|
|
|
|
|
|
|
|
xfrm4_extract_header(skb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-05-04 10:06:08 +02:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2025-02-19 15:51:01 +02:00
|
|
|
int xfrm6_tunnel_check_size(struct sk_buff *skb)
|
2020-05-04 10:06:08 +02:00
|
|
|
{
|
|
|
|
int mtu, ret = 0;
|
|
|
|
struct dst_entry *dst = skb_dst(skb);
|
2025-01-16 11:46:03 +01:00
|
|
|
struct sock *sk = skb_to_full_sk(skb);
|
2020-05-04 10:06:08 +02:00
|
|
|
|
|
|
|
if (skb->ignore_df)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
mtu = dst_mtu(dst);
|
|
|
|
if (mtu < IPV6_MIN_MTU)
|
|
|
|
mtu = IPV6_MIN_MTU;
|
|
|
|
|
|
|
|
if ((!skb_is_gso(skb) && skb->len > mtu) ||
|
|
|
|
(skb_is_gso(skb) &&
|
|
|
|
!skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
|
|
|
|
skb->dev = dst->dev;
|
|
|
|
skb->protocol = htons(ETH_P_IPV6);
|
|
|
|
|
2025-01-16 11:46:03 +01:00
|
|
|
if (xfrm6_local_dontfrag(sk))
|
2020-05-04 10:06:08 +02:00
|
|
|
ipv6_stub->xfrm6_local_rxpmtu(skb, mtu);
|
2025-01-16 11:46:03 +01:00
|
|
|
else if (sk)
|
2020-05-04 10:06:08 +02:00
|
|
|
xfrm_local_error(skb, mtu);
|
|
|
|
else
|
|
|
|
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
2025-02-19 15:51:01 +02:00
|
|
|
EXPORT_SYMBOL_GPL(xfrm6_tunnel_check_size);
|
2020-05-04 10:06:08 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = xfrm6_tunnel_check_size(skb);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
|
|
|
|
|
|
|
|
xfrm6_extract_header(skb);
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return -EAFNOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2019-03-29 21:16:25 +01:00
|
|
|
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
|
2008-03-24 14:51:51 -07:00
|
|
|
{
|
2023-03-10 17:40:32 +08:00
|
|
|
switch (skb->protocol) {
|
|
|
|
case htons(ETH_P_IP):
|
2020-05-04 10:06:03 +02:00
|
|
|
return xfrm4_extract_output(x, skb);
|
2023-03-10 17:40:32 +08:00
|
|
|
case htons(ETH_P_IPV6):
|
2020-05-04 10:06:08 +02:00
|
|
|
return xfrm6_extract_output(x, skb);
|
2020-05-04 10:06:03 +02:00
|
|
|
}
|
2019-03-29 21:16:30 +01:00
|
|
|
|
2020-05-04 10:06:08 +02:00
|
|
|
return -EAFNOSUPPORT;
|
2008-03-24 14:51:51 -07:00
|
|
|
}
|
|
|
|
|
2013-08-14 13:05:23 +02:00
|
|
|
void xfrm_local_error(struct sk_buff *skb, int mtu)
|
|
|
|
{
|
2013-08-18 13:47:01 +02:00
|
|
|
unsigned int proto;
|
2013-08-14 13:05:23 +02:00
|
|
|
struct xfrm_state_afinfo *afinfo;
|
|
|
|
|
2013-08-18 13:47:01 +02:00
|
|
|
if (skb->protocol == htons(ETH_P_IP))
|
|
|
|
proto = AF_INET;
|
2020-05-26 17:41:46 +08:00
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6) &&
|
|
|
|
skb->sk->sk_family == AF_INET6)
|
2013-08-18 13:47:01 +02:00
|
|
|
proto = AF_INET6;
|
|
|
|
else
|
|
|
|
return;
|
|
|
|
|
|
|
|
afinfo = xfrm_state_get_afinfo(proto);
|
2018-03-16 11:35:51 +09:00
|
|
|
if (afinfo) {
|
2017-01-09 14:20:47 +01:00
|
|
|
afinfo->local_error(skb, mtu);
|
2018-03-16 11:35:51 +09:00
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
2013-08-14 13:05:23 +02:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(xfrm_local_error);
|