mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

During ILA address translations, the L4 checksums can be handled in
different ways. One of them, adj-transport, consist in parsing the
transport layer and updating any found checksum. This logic relies on
inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when
in state CHECKSUM_COMPLETE.
This bug can be reproduced with a simple ILA to SIR mapping, assuming
packets are received with CHECKSUM_COMPLETE:
$ ip a show dev eth0
14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet6 3333:0:0:1::c078/64 scope global
valid_lft forever preferred_lft forever
inet6 fd00:10:244:1::c078/128 scope global nodad
valid_lft forever preferred_lft forever
inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
$ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \
csum-mode adj-transport ident-type luid dev eth0
Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on
[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with
SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed
skb->csum. The translation and drop are visible on pwru [1] traces:
IFACE TUPLE FUNC
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow
eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM)
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head
eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem
This is happening because inet_proto_csum_replace_by_diff is updating
skb->csum when it shouldn't. The L4 checksum is updated such that it
"cancels" the IPv6 address change in terms of checksum computation, so
the impact on skb->csum is null.
Note this would be different for an IPv4 packet since three fields
would be updated: the IPv4 address, the IP checksum, and the L4
checksum. Two would cancel each other and skb->csum would still need
to be updated to take the L4 checksum change into account.
This patch fixes it by passing an ipv6 flag to
inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're
in the IPv6 case. Note the behavior of the only other user of
inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in
this patch and fixed in the subsequent patch.
With the fix, using the reproduction from above, I can confirm
skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP
SYN proceeds to the application after the ILA translation.
Link: https://github.com/cilium/pwru [1]
Fixes: 65d7ab8de5
("net: Identifier Locator Addressing module")
Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
191 lines
4.8 KiB
C
191 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Checksumming functions for IP, TCP, UDP and so on
|
|
*
|
|
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
|
|
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
|
|
* Borrows very liberally from tcp.c and ip.c, see those
|
|
* files for more names.
|
|
*/
|
|
|
|
#ifndef _CHECKSUM_H
|
|
#define _CHECKSUM_H
|
|
|
|
#include <linux/errno.h>
|
|
#include <asm/types.h>
|
|
#include <asm/byteorder.h>
|
|
#include <asm/checksum.h>
|
|
#if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER)
|
|
#include <linux/uaccess.h>
|
|
#endif
|
|
|
|
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
|
|
static __always_inline
|
|
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
|
|
int len)
|
|
{
|
|
if (copy_from_user(dst, src, len))
|
|
return 0;
|
|
return csum_partial(dst, len, ~0U);
|
|
}
|
|
#endif
|
|
|
|
#ifndef HAVE_CSUM_COPY_USER
|
|
static __always_inline __wsum csum_and_copy_to_user
|
|
(const void *src, void __user *dst, int len)
|
|
{
|
|
__wsum sum = csum_partial(src, len, ~0U);
|
|
|
|
if (copy_to_user(dst, src, len) == 0)
|
|
return sum;
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifndef _HAVE_ARCH_CSUM_AND_COPY
|
|
static __always_inline __wsum
|
|
csum_partial_copy_nocheck(const void *src, void *dst, int len)
|
|
{
|
|
memcpy(dst, src, len);
|
|
return csum_partial(dst, len, 0);
|
|
}
|
|
#endif
|
|
|
|
#ifndef HAVE_ARCH_CSUM_ADD
|
|
static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
|
|
{
|
|
u32 res = (__force u32)csum;
|
|
res += (__force u32)addend;
|
|
return (__force __wsum)(res + (res < (__force u32)addend));
|
|
}
|
|
#endif
|
|
|
|
static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
|
|
{
|
|
return csum_add(csum, ~addend);
|
|
}
|
|
|
|
static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
|
|
{
|
|
u16 res = (__force u16)csum;
|
|
|
|
res += (__force u16)addend;
|
|
return (__force __sum16)(res + (res < (__force u16)addend));
|
|
}
|
|
|
|
static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
|
|
{
|
|
return csum16_add(csum, ~addend);
|
|
}
|
|
|
|
#ifndef HAVE_ARCH_CSUM_SHIFT
|
|
static __always_inline __wsum csum_shift(__wsum sum, int offset)
|
|
{
|
|
/* rotate sum to align it with a 16b boundary */
|
|
if (offset & 1)
|
|
return (__force __wsum)ror32((__force u32)sum, 8);
|
|
return sum;
|
|
}
|
|
#endif
|
|
|
|
static __always_inline __wsum
|
|
csum_block_add(__wsum csum, __wsum csum2, int offset)
|
|
{
|
|
return csum_add(csum, csum_shift(csum2, offset));
|
|
}
|
|
|
|
static __always_inline __wsum
|
|
csum_block_sub(__wsum csum, __wsum csum2, int offset)
|
|
{
|
|
return csum_block_add(csum, ~csum2, offset);
|
|
}
|
|
|
|
static __always_inline __wsum csum_unfold(__sum16 n)
|
|
{
|
|
return (__force __wsum)n;
|
|
}
|
|
|
|
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
|
|
|
|
static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
|
|
{
|
|
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
|
|
}
|
|
|
|
static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
|
|
{
|
|
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
|
|
|
|
*sum = csum_fold(csum_add(tmp, (__force __wsum)to));
|
|
}
|
|
|
|
/* Implements RFC 1624 (Incremental Internet Checksum)
|
|
* 3. Discussion states :
|
|
* HC' = ~(~HC + ~m + m')
|
|
* m : old value of a 16bit field
|
|
* m' : new value of a 16bit field
|
|
*/
|
|
static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
|
|
{
|
|
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
|
|
}
|
|
|
|
static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
|
|
{
|
|
*csum = csum_add(csum_sub(*csum, old), new);
|
|
}
|
|
|
|
static inline unsigned short csum_from32to16(unsigned int sum)
|
|
{
|
|
sum += (sum >> 16) | (sum << 16);
|
|
return (unsigned short)(sum >> 16);
|
|
}
|
|
|
|
struct sk_buff;
|
|
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
|
|
__be32 from, __be32 to, bool pseudohdr);
|
|
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
|
|
const __be32 *from, const __be32 *to,
|
|
bool pseudohdr);
|
|
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
|
|
__wsum diff, bool pseudohdr, bool ipv6);
|
|
|
|
static __always_inline
|
|
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
|
|
__be16 from, __be16 to, bool pseudohdr)
|
|
{
|
|
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
|
|
(__force __be32)to, pseudohdr);
|
|
}
|
|
|
|
static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
|
|
int start, int offset)
|
|
{
|
|
__sum16 *psum = (__sum16 *)(ptr + offset);
|
|
__wsum delta;
|
|
|
|
/* Subtract out checksum up to start */
|
|
csum = csum_sub(csum, csum_partial(ptr, start, 0));
|
|
|
|
/* Set derived checksum in packet */
|
|
delta = csum_sub((__force __wsum)csum_fold(csum),
|
|
(__force __wsum)*psum);
|
|
*psum = csum_fold(csum);
|
|
|
|
return delta;
|
|
}
|
|
|
|
static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
|
|
{
|
|
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
|
|
}
|
|
|
|
static __always_inline __wsum wsum_negate(__wsum val)
|
|
{
|
|
return (__force __wsum)-((__force u32)val);
|
|
}
|
|
#endif
|