linux/net/rxrpc/peer_event.c

444 lines
11 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* Peer event handling, typically ICMP messages.
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
#include <linux/udp.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/icmp.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *);
static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *,
enum rxrpc_call_completion, int);
/*
* Find the peer associated with a local error.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
const struct sk_buff *skb,
struct sockaddr_rxrpc *srx)
{
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
_enter("");
memset(srx, 0, sizeof(*srx));
srx->transport_type = local->srx.transport_type;
srx->transport_len = local->srx.transport_len;
srx->transport.family = local->srx.transport.family;
/* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
* versa?
*/
switch (srx->transport.family) {
case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
case SO_EE_ORIGIN_ICMP6:
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset + 12,
sizeof(struct in_addr));
break;
default:
memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
sizeof(struct in_addr));
break;
}
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
default:
memcpy(&srx->transport.sin6.sin6_addr,
&ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr));
break;
}
break;
#endif
default:
BUG();
}
return rxrpc_lookup_peer_rcu(local, srx);
}
/*
* Handle an MTU/fragmentation problem.
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
unsigned int max_data;
/* wind down the local interface MTU */
if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
peer->if_mtu = mtu;
if (mtu == 0) {
/* they didn't give us a size, estimate one */
mtu = peer->if_mtu;
if (mtu > 1500) {
mtu >>= 1;
if (mtu < 1500)
mtu = 1500;
} else {
mtu -= 100;
if (mtu < peer->hdrsize)
mtu = peer->hdrsize + 4;
}
}
max_data = max_t(int, mtu - peer->hdrsize, 500);
if (max_data < peer->max_data) {
if (peer->pmtud_good > max_data)
peer->pmtud_good = max_data;
if (peer->pmtud_bad > max_data + 1)
peer->pmtud_bad = max_data + 1;
trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp);
peer->max_data = max_data;
}
}
/*
* Handle an error received on the local endpoint.
*/
void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb)
{
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
struct sockaddr_rxrpc srx;
struct rxrpc_peer *peer = NULL;
_enter("L=%x", local->debug_id);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
return;
}
rcu_read_lock();
peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error))
peer = NULL;
rcu_read_unlock();
if (!peer)
return;
trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
serr->ee.ee_type == ICMP_DEST_UNREACH &&
serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
rxrpc_adjust_mtu(peer, serr->ee.ee_info);
goto out;
}
if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 &&
serr->ee.ee_type == ICMPV6_PKT_TOOBIG &&
serr->ee.ee_code == 0)) {
rxrpc_adjust_mtu(peer, serr->ee.ee_info);
goto out;
}
rxrpc_store_error(peer, skb);
out:
rxrpc_put_peer(peer, rxrpc_peer_put_input_error);
}
/*
* Map an error report to error codes on the peer record.
*/
static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb)
{
enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
struct sock_extended_err *ee = &serr->ee;
int err = ee->ee_errno;
_enter("");
switch (ee->ee_origin) {
case SO_EE_ORIGIN_NONE:
case SO_EE_ORIGIN_LOCAL:
compl = RXRPC_CALL_LOCAL_ERROR;
break;
case SO_EE_ORIGIN_ICMP6:
if (err == EACCES)
err = EHOSTUNREACH;
fallthrough;
case SO_EE_ORIGIN_ICMP:
default:
break;
}
rxrpc_distribute_error(peer, skb, compl, err);
}
/*
* Distribute an error that occurred on a peer.
*/
static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
enum rxrpc_call_completion compl, int err)
{
struct rxrpc_call *call;
HLIST_HEAD(error_targets);
spin_lock_irq(&peer->lock);
hlist_move_list(&peer->error_targets, &error_targets);
while (!hlist_empty(&error_targets)) {
call = hlist_entry(error_targets.first,
struct rxrpc_call, error_link);
hlist_del_init(&call->error_link);
spin_unlock_irq(&peer->lock);
rxrpc_see_call(call, rxrpc_call_see_distribute_error);
rxrpc_set_call_completion(call, compl, 0, -err);
2024-12-04 07:46:41 +00:00
rxrpc_input_call_event(call);
spin_lock_irq(&peer->lock);
}
spin_unlock_irq(&peer->lock);
}
/*
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
* Perform keep-alive pings.
*/
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
struct list_head *collector,
time64_t base,
u8 cursor)
{
struct rxrpc_peer *peer;
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
time64_t keepalive_at;
rxrpc: Fix locking issues in rxrpc_put_peer_locked() Now that rxrpc_put_local() may call kthread_stop(), it can't be called under spinlock as it might sleep. This can cause a problem in the peer keepalive code in rxrpc as it tries to avoid dropping the peer_hash_lock from the point it needs to re-add peer->keepalive_link to going round the loop again in rxrpc_peer_keepalive_dispatch(). Fix this by just dropping the lock when we don't need it and accepting that we'll have to take it again. This code is only called about every 20s for each peer, so not very often. This allows rxrpc_put_peer_unlocked() to be removed also. If triggered, this bug produces an oops like the following, as reproduced by a syzbot reproducer for a different oops[1]: BUG: sleeping function called from invalid context at kernel/sched/completion.c:101 ... RCU nest depth: 0, expected: 0 3 locks held by kworker/u9:0/50: #0: ffff88810e74a138 ((wq_completion)krxrpcd){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #1: ffff8881013a7e20 ((work_completion)(&rxnet->peer_keepalive_work)){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #2: ffff88817d366390 (&rxnet->peer_hash_lock){+.+.}-{2:2}, at: rxrpc_peer_keepalive_dispatch+0x2bd/0x35f ... Call Trace: <TASK> dump_stack_lvl+0x4c/0x5f __might_resched+0x2cf/0x2f2 __wait_for_common+0x87/0x1e8 kthread_stop+0x14d/0x255 rxrpc_peer_keepalive_dispatch+0x333/0x35f rxrpc_peer_keepalive_worker+0x2e9/0x449 process_one_work+0x3c1/0x636 worker_thread+0x25f/0x359 kthread+0x1a6/0x1b5 ret_from_fork+0x1f/0x30 Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1] Signed-off-by: David S. Miller <davem@davemloft.net>
2022-12-15 16:20:21 +00:00
bool use;
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
int slot;
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_lock_bh(&rxnet->peer_hash_lock);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
while (!list_empty(collector)) {
peer = list_entry(collector->next,
struct rxrpc_peer, keepalive_link);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
list_del_init(&peer->keepalive_link);
if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
continue;
rxrpc: Fix locking issues in rxrpc_put_peer_locked() Now that rxrpc_put_local() may call kthread_stop(), it can't be called under spinlock as it might sleep. This can cause a problem in the peer keepalive code in rxrpc as it tries to avoid dropping the peer_hash_lock from the point it needs to re-add peer->keepalive_link to going round the loop again in rxrpc_peer_keepalive_dispatch(). Fix this by just dropping the lock when we don't need it and accepting that we'll have to take it again. This code is only called about every 20s for each peer, so not very often. This allows rxrpc_put_peer_unlocked() to be removed also. If triggered, this bug produces an oops like the following, as reproduced by a syzbot reproducer for a different oops[1]: BUG: sleeping function called from invalid context at kernel/sched/completion.c:101 ... RCU nest depth: 0, expected: 0 3 locks held by kworker/u9:0/50: #0: ffff88810e74a138 ((wq_completion)krxrpcd){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #1: ffff8881013a7e20 ((work_completion)(&rxnet->peer_keepalive_work)){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #2: ffff88817d366390 (&rxnet->peer_hash_lock){+.+.}-{2:2}, at: rxrpc_peer_keepalive_dispatch+0x2bd/0x35f ... Call Trace: <TASK> dump_stack_lvl+0x4c/0x5f __might_resched+0x2cf/0x2f2 __wait_for_common+0x87/0x1e8 kthread_stop+0x14d/0x255 rxrpc_peer_keepalive_dispatch+0x333/0x35f rxrpc_peer_keepalive_worker+0x2e9/0x449 process_one_work+0x3c1/0x636 worker_thread+0x25f/0x359 kthread+0x1a6/0x1b5 ret_from_fork+0x1f/0x30 Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1] Signed-off-by: David S. Miller <davem@davemloft.net>
2022-12-15 16:20:21 +00:00
use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc: Fix locking issues in rxrpc_put_peer_locked() Now that rxrpc_put_local() may call kthread_stop(), it can't be called under spinlock as it might sleep. This can cause a problem in the peer keepalive code in rxrpc as it tries to avoid dropping the peer_hash_lock from the point it needs to re-add peer->keepalive_link to going round the loop again in rxrpc_peer_keepalive_dispatch(). Fix this by just dropping the lock when we don't need it and accepting that we'll have to take it again. This code is only called about every 20s for each peer, so not very often. This allows rxrpc_put_peer_unlocked() to be removed also. If triggered, this bug produces an oops like the following, as reproduced by a syzbot reproducer for a different oops[1]: BUG: sleeping function called from invalid context at kernel/sched/completion.c:101 ... RCU nest depth: 0, expected: 0 3 locks held by kworker/u9:0/50: #0: ffff88810e74a138 ((wq_completion)krxrpcd){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #1: ffff8881013a7e20 ((work_completion)(&rxnet->peer_keepalive_work)){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #2: ffff88817d366390 (&rxnet->peer_hash_lock){+.+.}-{2:2}, at: rxrpc_peer_keepalive_dispatch+0x2bd/0x35f ... Call Trace: <TASK> dump_stack_lvl+0x4c/0x5f __might_resched+0x2cf/0x2f2 __wait_for_common+0x87/0x1e8 kthread_stop+0x14d/0x255 rxrpc_peer_keepalive_dispatch+0x333/0x35f rxrpc_peer_keepalive_worker+0x2e9/0x449 process_one_work+0x3c1/0x636 worker_thread+0x25f/0x359 kthread+0x1a6/0x1b5 ret_from_fork+0x1f/0x30 Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1] Signed-off-by: David S. Miller <davem@davemloft.net>
2022-12-15 16:20:21 +00:00
if (use) {
keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
slot = keepalive_at - base;
_debug("%02x peer %u t=%d {%pISp}",
cursor, peer->debug_id, slot, &peer->srx.transport);
if (keepalive_at <= base ||
keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
rxrpc_send_keepalive(peer);
slot = RXRPC_KEEPALIVE_TIME;
}
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
/* A transmission to this peer occurred since last we
* examined it so put it into the appropriate future
* bucket.
*/
slot += cursor;
slot &= mask;
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_lock_bh(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
}
rxrpc: Fix locking issues in rxrpc_put_peer_locked() Now that rxrpc_put_local() may call kthread_stop(), it can't be called under spinlock as it might sleep. This can cause a problem in the peer keepalive code in rxrpc as it tries to avoid dropping the peer_hash_lock from the point it needs to re-add peer->keepalive_link to going round the loop again in rxrpc_peer_keepalive_dispatch(). Fix this by just dropping the lock when we don't need it and accepting that we'll have to take it again. This code is only called about every 20s for each peer, so not very often. This allows rxrpc_put_peer_unlocked() to be removed also. If triggered, this bug produces an oops like the following, as reproduced by a syzbot reproducer for a different oops[1]: BUG: sleeping function called from invalid context at kernel/sched/completion.c:101 ... RCU nest depth: 0, expected: 0 3 locks held by kworker/u9:0/50: #0: ffff88810e74a138 ((wq_completion)krxrpcd){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #1: ffff8881013a7e20 ((work_completion)(&rxnet->peer_keepalive_work)){+.+.}-{0:0}, at: process_one_work+0x294/0x636 #2: ffff88817d366390 (&rxnet->peer_hash_lock){+.+.}-{2:2}, at: rxrpc_peer_keepalive_dispatch+0x2bd/0x35f ... Call Trace: <TASK> dump_stack_lvl+0x4c/0x5f __might_resched+0x2cf/0x2f2 __wait_for_common+0x87/0x1e8 kthread_stop+0x14d/0x255 rxrpc_peer_keepalive_dispatch+0x333/0x35f rxrpc_peer_keepalive_worker+0x2e9/0x449 process_one_work+0x3c1/0x636 worker_thread+0x25f/0x359 kthread+0x1a6/0x1b5 ret_from_fork+0x1f/0x30 Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1] Signed-off-by: David S. Miller <davem@davemloft.net>
2022-12-15 16:20:21 +00:00
rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_lock_bh(&rxnet->peer_hash_lock);
}
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
}
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
/*
* Perform keep-alive pings with VERSION packets to keep any NAT alive.
*/
void rxrpc_peer_keepalive_worker(struct work_struct *work)
{
struct rxrpc_net *rxnet =
container_of(work, struct rxrpc_net, peer_keepalive_work);
const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
time64_t base, now, delay;
u8 cursor, stop;
LIST_HEAD(collector);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
now = ktime_get_seconds();
base = rxnet->peer_keepalive_base;
cursor = rxnet->peer_keepalive_cursor;
_enter("%lld,%u", base - now, cursor);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
if (!rxnet->live)
return;
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
/* Remove to a temporary list all the peers that are currently lodged
* in expired buckets plus all new peers.
*
* Everything in the bucket at the cursor is processed this
* second; the bucket at cursor + 1 goes at now + 1s and so
* on...
*/
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_lock_bh(&rxnet->peer_hash_lock);
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
list_splice_init(&rxnet->peer_keepalive_new, &collector);
stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
while (base <= now && (s8)(cursor - stop) < 0) {
list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
&collector);
base++;
cursor++;
}
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
base = now;
rxrpc, afs: Fix peer hash locking vs RCU callback In its address list, afs now retains pointers to and refs on one or more rxrpc_peer objects. The address list is freed under RCU and at this time, it puts the refs on those peers. Now, when an rxrpc_peer object runs out of refs, it gets removed from the peer hash table and, for that, rxrpc has to take a spinlock. However, it is now being called from afs's RCU cleanup, which takes place in BH context - but it is just taking an ordinary spinlock. The put may also be called from non-BH context, and so there exists the possibility of deadlock if the BH-based RCU cleanup happens whilst the hash spinlock is held. This led to the attached lockdep complaint. Fix this by changing spinlocks of rxnet->peer_hash_lock back to BH-disabling locks. ================================ WARNING: inconsistent lock state 6.13.0-rc5-build2+ #1223 Tainted: G E -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffff88810babe228 (&rxnet->peer_hash_lock){+.?.}-{3:3}, at: rxrpc_put_peer+0xcb/0x180 {SOFTIRQ-ON-W} state was registered at: mark_usage+0x164/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_peer_keepalive_worker+0x144/0x440 process_one_work+0x486/0x7c0 process_scheduled_works+0x73/0x90 worker_thread+0x1c8/0x2a0 kthread+0x19b/0x1b0 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 972402 hardirqs last enabled at (972402): [<ffffffff8244360e>] _raw_spin_unlock_irqrestore+0x2e/0x50 hardirqs last disabled at (972401): [<ffffffff82443328>] _raw_spin_lock_irqsave+0x18/0x60 softirqs last enabled at (972300): [<ffffffff810ffbbe>] handle_softirqs+0x3ee/0x430 softirqs last disabled at (972313): [<ffffffff810ffc54>] __irq_exit_rcu+0x44/0x110 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rxnet->peer_hash_lock); <Interrupt> lock(&rxnet->peer_hash_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: ffffffff83576be0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire+0x7/0x30 stack backtrace: CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G E 6.13.0-rc5-build2+ #1223 Tainted: [E]=UNSIGNED_MODULE Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <IRQ> dump_stack_lvl+0x57/0x80 print_usage_bug.part.0+0x227/0x240 valid_state+0x53/0x70 mark_lock_irq+0xa5/0x2f0 mark_lock+0xf7/0x170 mark_usage+0xe1/0x180 __lock_acquire+0x544/0x990 lock_acquire.part.0+0x103/0x280 _raw_spin_lock+0x2f/0x40 rxrpc_put_peer+0xcb/0x180 afs_free_addrlist+0x46/0x90 [kafs] rcu_do_batch+0x2d2/0x640 rcu_core+0x2f7/0x350 handle_softirqs+0x1ee/0x430 __irq_exit_rcu+0x44/0x110 irq_exit_rcu+0xa/0x30 sysvec_apic_timer_interrupt+0x7f/0xa0 </IRQ> Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: Simon Horman <horms@kernel.org> cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2095618.1737622752@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-01-23 08:59:12 +00:00
spin_unlock_bh(&rxnet->peer_hash_lock);
rxnet->peer_keepalive_base = base;
rxnet->peer_keepalive_cursor = cursor;
rxrpc: Fix the keepalive generator [ver #2] AF_RXRPC has a keepalive message generator that generates a message for a peer ~20s after the last transmission to that peer to keep firewall ports open. The implementation is incorrect in the following ways: (1) It mixes up ktime_t and time64_t types. (2) It uses ktime_get_real(), the output of which may jump forward or backward due to adjustments to the time of day. (3) If the current time jumps forward too much or jumps backwards, the generator function will crank the base of the time ring round one slot at a time (ie. a 1s period) until it catches up, spewing out VERSION packets as it goes. Fix the problem by: (1) Only using time64_t. There's no need for sub-second resolution. (2) Use ktime_get_seconds() rather than ktime_get_real() so that time isn't perceived to go backwards. (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two parts: (a) The "worker" function that manages the buckets and the timer. (b) The "dispatch" function that takes the pending peers and potentially transmits a keepalive packet before putting them back in the ring into the slot appropriate to the revised last-Tx time. (4) Taking everything that's pending out of the ring and splicing it into a temporary collector list for processing. In the case that there's been a significant jump forward, the ring gets entirely emptied and then the time base can be warped forward before the peers are processed. The warping can't happen if the ring isn't empty because the slot a peer is in is keepalive-time dependent, relative to the base time. (5) Limit the number of iterations of the bucket array when scanning it. (6) Set the timer to skip any empty slots as there's no point waking up if there's nothing to do yet. This can be triggered by an incoming call from a server after a reboot with AF_RXRPC and AFS built into the kernel causing a peer record to be set up before userspace is started. The system clock is then adjusted by userspace, thereby potentially causing the keepalive generator to have a meltdown - which leads to a message like: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23] ... Workqueue: krxrpcd rxrpc_peer_keepalive_worker EIP: lock_acquire+0x69/0x80 ... Call Trace: ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? _raw_spin_lock_bh+0x29/0x60 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? rxrpc_peer_keepalive_worker+0x5e/0x350 ? __lock_acquire+0x3d3/0x870 ? process_one_work+0x110/0x340 ? process_one_work+0x166/0x340 ? process_one_work+0x110/0x340 ? worker_thread+0x39/0x3c0 ? kthread+0xdb/0x110 ? cancel_delayed_work+0x90/0x90 ? kthread_stop+0x70/0x70 ? ret_from_fork+0x19/0x24 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-08 11:30:02 +01:00
rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
ASSERT(list_empty(&collector));
/* Schedule the timer for the next occupied timeslot. */
cursor = rxnet->peer_keepalive_cursor;
stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
for (; (s8)(cursor - stop) < 0; cursor++) {
if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
break;
base++;
}
now = ktime_get_seconds();
delay = base - now;
if (delay < 1)
delay = 1;
delay *= HZ;
if (rxnet->live)
timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
_leave("");
}
/*
* Do path MTU probing.
*/
void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
bool sendmsg_fail)
{
struct rxrpc_peer *peer = conn->peer;
unsigned int max_data = peer->max_data;
int good, trial, bad, jumbo;
good = peer->pmtud_good;
trial = peer->pmtud_trial;
bad = peer->pmtud_bad;
if (good >= bad - 1) {
conn->pmtud_probe = 0;
peer->pmtud_lost = false;
return;
}
if (!peer->pmtud_probing)
goto send_probe;
if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) {
/* Retry a lost probe. */
if (!peer->pmtud_lost) {
trace_rxrpc_pmtud_lost(conn, acked_serial);
conn->pmtud_probe = 0;
peer->pmtud_lost = true;
goto send_probe;
}
/* The probed size didn't seem to get through. */
bad = trial;
peer->pmtud_bad = bad;
if (bad <= max_data)
max_data = bad - 1;
} else {
/* It did get through. */
good = trial;
peer->pmtud_good = good;
if (good > max_data)
max_data = good;
}
max_data = umin(max_data, peer->ackr_max_data);
if (max_data != peer->max_data)
peer->max_data = max_data;
jumbo = max_data + sizeof(struct rxrpc_jumbo_header);
jumbo /= RXRPC_JUMBO_SUBPKTLEN;
peer->pmtud_jumbo = jumbo;
trace_rxrpc_pmtud_rx(conn, acked_serial);
conn->pmtud_probe = 0;
peer->pmtud_lost = false;
if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2))
trial = RXRPC_JUMBO(2);
else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4))
trial = RXRPC_JUMBO(4);
else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3))
trial = RXRPC_JUMBO(3);
else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6))
trial = RXRPC_JUMBO(6);
else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5))
trial = RXRPC_JUMBO(5);
else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8))
trial = RXRPC_JUMBO(8);
else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7))
trial = RXRPC_JUMBO(7);
else
trial = (good + bad) / 2;
peer->pmtud_trial = trial;
if (good >= bad)
return;
send_probe:
peer->pmtud_pending = true;
}