2024-07-02 00:40:22 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/* Network filesystem read subrequest retrying.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
static void netfs_reissue_read(struct netfs_io_request *rreq,
|
|
|
|
struct netfs_io_subrequest *subreq)
|
|
|
|
{
|
2024-12-16 20:41:17 +00:00
|
|
|
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
2024-07-02 00:40:22 +01:00
|
|
|
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
2025-02-12 22:24:00 +00:00
|
|
|
netfs_stat(&netfs_n_rh_retry_read_subreq);
|
2024-07-02 00:40:22 +01:00
|
|
|
subreq->rreq->netfs_ops->issue_read(subreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through the list of failed/short reads, retrying all retryable ones. We
|
|
|
|
* need to switch failed cache reads to network downloads.
|
|
|
|
*/
|
|
|
|
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
|
|
|
{
|
|
|
|
struct netfs_io_subrequest *subreq;
|
2024-12-16 20:41:17 +00:00
|
|
|
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
|
|
|
struct list_head *next;
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
_enter("R=%x", rreq->debug_id);
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
if (list_empty(&stream->subrequests))
|
2024-07-02 00:40:22 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (rreq->netfs_ops->retry_request)
|
|
|
|
rreq->netfs_ops->retry_request(rreq, NULL);
|
|
|
|
|
|
|
|
/* If there's no renegotiation to do, just resend each retryable subreq
|
|
|
|
* up to the first permanently failed one.
|
|
|
|
*/
|
|
|
|
if (!rreq->netfs_ops->prepare_read &&
|
2024-12-16 20:34:45 +00:00
|
|
|
!rreq->cache_resources.ops) {
|
2024-12-16 20:41:17 +00:00
|
|
|
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
2024-07-02 00:40:22 +01:00
|
|
|
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
|
|
|
|
break;
|
|
|
|
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
netfs: Work around recursion by abandoning retry if nothing read
syzkaller reported recursion with a loop of three calls (netfs_rreq_assess,
netfs_retry_reads and netfs_rreq_terminated) hitting the limit of the stack
during an unbuffered or direct I/O read.
There are a number of issues:
(1) There is no limit on the number of retries.
(2) A subrequest is supposed to be abandoned if it does not transfer
anything (NETFS_SREQ_NO_PROGRESS), but that isn't checked under all
circumstances.
(3) The actual root cause, which is this:
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_terminated(rreq, ...);
When we do a retry, we bump the rreq->nr_outstanding counter to
prevent the final cleanup phase running before we've finished
dispatching the retries. The problem is if we hit 0, we have to do
the cleanup phase - but we're in the cleanup phase and end up
repeating the retry cycle, hence the recursion.
Work around the problem by limiting the number of retries. This is based
on Lizhi Xu's patch[1], and makes the following changes:
(1) Replace NETFS_SREQ_NO_PROGRESS with NETFS_SREQ_MADE_PROGRESS and make
the filesystem set it if it managed to read or write at least one byte
of data. Clear this bit before issuing a subrequest.
(2) Add a ->retry_count member to the subrequest and increment it any time
we do a retry.
(3) Remove the NETFS_SREQ_RETRYING flag as it is superfluous with
->retry_count. If the latter is non-zero, we're doing a retry.
(4) Abandon a subrequest if retry_count is non-zero and we made no
progress.
(5) Use ->retry_count in both the write-side and the read-size.
[?] Question: Should I set a hard limit on retry_count in both read and
write? Say it hits 50, we always abandon it. The problem is that
these changes only mitigate the issue. As long as it made at least one
byte of progress, the recursion is still an issue. This patch
mitigates the problem, but does not fix the underlying cause. I have
patches that will do that, but it's an intrusive fix that's currently
pending for the next merge window.
The oops generated by KASAN looks something like:
BUG: TASK stack guard page was hit at ffffc9000482ff48 (stack is ffffc90004830000..ffffc90004838000)
Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN NOPTI
...
RIP: 0010:mark_lock+0x25/0xc60 kernel/locking/lockdep.c:4686
...
mark_usage kernel/locking/lockdep.c:4646 [inline]
__lock_acquire+0x906/0x3ce0 kernel/locking/lockdep.c:5156
lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5825
local_lock_acquire include/linux/local_lock_internal.h:29 [inline]
___slab_alloc+0x123/0x1880 mm/slub.c:3695
__slab_alloc.constprop.0+0x56/0xb0 mm/slub.c:3908
__slab_alloc_node mm/slub.c:3961 [inline]
slab_alloc_node mm/slub.c:4122 [inline]
kmem_cache_alloc_noprof+0x2a7/0x2f0 mm/slub.c:4141
radix_tree_node_alloc.constprop.0+0x1e8/0x350 lib/radix-tree.c:253
idr_get_free+0x528/0xa40 lib/radix-tree.c:1506
idr_alloc_u32+0x191/0x2f0 lib/idr.c:46
idr_alloc+0xc1/0x130 lib/idr.c:87
p9_tag_alloc+0x394/0x870 net/9p/client.c:321
p9_client_prepare_req+0x19f/0x4d0 net/9p/client.c:644
p9_client_zc_rpc.constprop.0+0x105/0x880 net/9p/client.c:793
p9_client_read_once+0x443/0x820 net/9p/client.c:1570
p9_client_read+0x13f/0x1b0 net/9p/client.c:1534
v9fs_issue_read+0x115/0x310 fs/9p/vfs_addr.c:74
netfs_retry_read_subrequests fs/netfs/read_retry.c:60 [inline]
netfs_retry_reads+0x153a/0x1d00 fs/netfs/read_retry.c:232
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
...
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_dispatch_unbuffered_reads fs/netfs/direct_read.c:103 [inline]
netfs_unbuffered_read fs/netfs/direct_read.c:127 [inline]
netfs_unbuffered_read_iter_locked+0x12f6/0x19b0 fs/netfs/direct_read.c:221
netfs_unbuffered_read_iter+0xc5/0x100 fs/netfs/direct_read.c:256
v9fs_file_read_iter+0xbf/0x100 fs/9p/vfs_file.c:361
do_iter_readv_writev+0x614/0x7f0 fs/read_write.c:832
vfs_readv+0x4cf/0x890 fs/read_write.c:1025
do_preadv fs/read_write.c:1142 [inline]
__do_sys_preadv fs/read_write.c:1192 [inline]
__se_sys_preadv fs/read_write.c:1187 [inline]
__x64_sys_preadv+0x22d/0x310 fs/read_write.c:1187
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83
Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading")
Closes: https://syzkaller.appspot.com/bug?extid=1fc6f64c40a9d143cfb6
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20241108034020.3695718-1-lizhi.xu@windriver.com/ [1]
Link: https://lore.kernel.org/r/20241213135013.2964079-9-dhowells@redhat.com
Tested-by: syzbot+885c03ad650731743489@syzkaller.appspotmail.com
Suggested-by: Lizhi Xu <lizhi.xu@windriver.com>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Reported-by: syzbot+885c03ad650731743489@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-12-13 13:50:08 +00:00
|
|
|
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
|
|
|
subreq->retry_count++;
|
2024-07-02 00:40:22 +01:00
|
|
|
netfs_reset_iter(subreq);
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
2024-07-02 00:40:22 +01:00
|
|
|
netfs_reissue_read(rreq, subreq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Okay, we need to renegotiate all the download requests and flip any
|
|
|
|
* failed cache reads over to being download requests and negotiate
|
|
|
|
* those also. All fully successful subreqs have been removed from the
|
|
|
|
* list and any spare data from those has been donated.
|
|
|
|
*
|
|
|
|
* What we do is decant the list and rebuild it one subreq at a time so
|
|
|
|
* that we don't end up with donations jumping over a gap we're busy
|
|
|
|
* populating with smaller subrequests. In the event that the subreq
|
|
|
|
* we just launched finishes before we insert the next subreq, it'll
|
|
|
|
* fill in rreq->prev_donated instead.
|
2024-12-16 20:41:17 +00:00
|
|
|
*
|
2024-07-02 00:40:22 +01:00
|
|
|
* Note: Alternatively, we could split the tail subrequest right before
|
|
|
|
* we reissue it and fix up the donations under lock.
|
|
|
|
*/
|
2024-12-16 20:41:17 +00:00
|
|
|
next = stream->subrequests.next;
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
do {
|
2024-12-16 20:41:17 +00:00
|
|
|
struct netfs_io_subrequest *from, *to, *tmp;
|
2024-07-02 00:40:22 +01:00
|
|
|
struct iov_iter source;
|
|
|
|
unsigned long long start, len;
|
2024-12-16 20:41:17 +00:00
|
|
|
size_t part;
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
bool boundary = false, subreq_superfluous = false;
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
/* Go through the subreqs and find the next span of contiguous
|
|
|
|
* buffer that we then rejig (cifs, for example, needs the
|
|
|
|
* rsize renegotiating) and reissue.
|
|
|
|
*/
|
2024-12-16 20:41:17 +00:00
|
|
|
from = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
|
|
|
to = from;
|
2024-07-02 00:40:22 +01:00
|
|
|
start = from->start + from->transferred;
|
|
|
|
len = from->len - from->transferred;
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
_debug("from R=%08x[%x] s=%llx ctl=%zx/%zx",
|
2024-07-02 00:40:22 +01:00
|
|
|
rreq->debug_id, from->debug_index,
|
2024-12-16 20:41:17 +00:00
|
|
|
from->start, from->transferred, from->len);
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
|
|
|
|
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
|
|
|
|
goto abandon;
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
list_for_each_continue(next, &stream->subrequests) {
|
|
|
|
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
|
|
|
if (subreq->start + subreq->transferred != start + len ||
|
|
|
|
test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
|
2024-07-02 00:40:22 +01:00
|
|
|
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
|
|
|
break;
|
2024-12-16 20:41:17 +00:00
|
|
|
to = subreq;
|
|
|
|
len += to->len;
|
2024-07-02 00:40:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
|
|
|
|
|
|
|
|
/* Determine the set of buffers we're going to use. Each
|
|
|
|
* subreq gets a subset of a single overall contiguous buffer.
|
|
|
|
*/
|
|
|
|
netfs_reset_iter(from);
|
|
|
|
source = from->io_iter;
|
|
|
|
source.count = len;
|
|
|
|
|
|
|
|
/* Work through the sublist. */
|
2024-12-16 20:41:17 +00:00
|
|
|
subreq = from;
|
|
|
|
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
if (!len) {
|
|
|
|
subreq_superfluous = true;
|
2024-12-16 20:41:17 +00:00
|
|
|
break;
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
}
|
2024-07-02 00:40:22 +01:00
|
|
|
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
|
|
|
subreq->start = start - subreq->transferred;
|
|
|
|
subreq->len = len + subreq->transferred;
|
|
|
|
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
netfs: Work around recursion by abandoning retry if nothing read
syzkaller reported recursion with a loop of three calls (netfs_rreq_assess,
netfs_retry_reads and netfs_rreq_terminated) hitting the limit of the stack
during an unbuffered or direct I/O read.
There are a number of issues:
(1) There is no limit on the number of retries.
(2) A subrequest is supposed to be abandoned if it does not transfer
anything (NETFS_SREQ_NO_PROGRESS), but that isn't checked under all
circumstances.
(3) The actual root cause, which is this:
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_terminated(rreq, ...);
When we do a retry, we bump the rreq->nr_outstanding counter to
prevent the final cleanup phase running before we've finished
dispatching the retries. The problem is if we hit 0, we have to do
the cleanup phase - but we're in the cleanup phase and end up
repeating the retry cycle, hence the recursion.
Work around the problem by limiting the number of retries. This is based
on Lizhi Xu's patch[1], and makes the following changes:
(1) Replace NETFS_SREQ_NO_PROGRESS with NETFS_SREQ_MADE_PROGRESS and make
the filesystem set it if it managed to read or write at least one byte
of data. Clear this bit before issuing a subrequest.
(2) Add a ->retry_count member to the subrequest and increment it any time
we do a retry.
(3) Remove the NETFS_SREQ_RETRYING flag as it is superfluous with
->retry_count. If the latter is non-zero, we're doing a retry.
(4) Abandon a subrequest if retry_count is non-zero and we made no
progress.
(5) Use ->retry_count in both the write-side and the read-size.
[?] Question: Should I set a hard limit on retry_count in both read and
write? Say it hits 50, we always abandon it. The problem is that
these changes only mitigate the issue. As long as it made at least one
byte of progress, the recursion is still an issue. This patch
mitigates the problem, but does not fix the underlying cause. I have
patches that will do that, but it's an intrusive fix that's currently
pending for the next merge window.
The oops generated by KASAN looks something like:
BUG: TASK stack guard page was hit at ffffc9000482ff48 (stack is ffffc90004830000..ffffc90004838000)
Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN NOPTI
...
RIP: 0010:mark_lock+0x25/0xc60 kernel/locking/lockdep.c:4686
...
mark_usage kernel/locking/lockdep.c:4646 [inline]
__lock_acquire+0x906/0x3ce0 kernel/locking/lockdep.c:5156
lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5825
local_lock_acquire include/linux/local_lock_internal.h:29 [inline]
___slab_alloc+0x123/0x1880 mm/slub.c:3695
__slab_alloc.constprop.0+0x56/0xb0 mm/slub.c:3908
__slab_alloc_node mm/slub.c:3961 [inline]
slab_alloc_node mm/slub.c:4122 [inline]
kmem_cache_alloc_noprof+0x2a7/0x2f0 mm/slub.c:4141
radix_tree_node_alloc.constprop.0+0x1e8/0x350 lib/radix-tree.c:253
idr_get_free+0x528/0xa40 lib/radix-tree.c:1506
idr_alloc_u32+0x191/0x2f0 lib/idr.c:46
idr_alloc+0xc1/0x130 lib/idr.c:87
p9_tag_alloc+0x394/0x870 net/9p/client.c:321
p9_client_prepare_req+0x19f/0x4d0 net/9p/client.c:644
p9_client_zc_rpc.constprop.0+0x105/0x880 net/9p/client.c:793
p9_client_read_once+0x443/0x820 net/9p/client.c:1570
p9_client_read+0x13f/0x1b0 net/9p/client.c:1534
v9fs_issue_read+0x115/0x310 fs/9p/vfs_addr.c:74
netfs_retry_read_subrequests fs/netfs/read_retry.c:60 [inline]
netfs_retry_reads+0x153a/0x1d00 fs/netfs/read_retry.c:232
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
...
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_retry_reads+0x155e/0x1d00 fs/netfs/read_retry.c:235
netfs_rreq_assess+0x5d3/0x870 fs/netfs/read_collect.c:371
netfs_rreq_terminated+0xe5/0x110 fs/netfs/read_collect.c:407
netfs_dispatch_unbuffered_reads fs/netfs/direct_read.c:103 [inline]
netfs_unbuffered_read fs/netfs/direct_read.c:127 [inline]
netfs_unbuffered_read_iter_locked+0x12f6/0x19b0 fs/netfs/direct_read.c:221
netfs_unbuffered_read_iter+0xc5/0x100 fs/netfs/direct_read.c:256
v9fs_file_read_iter+0xbf/0x100 fs/9p/vfs_file.c:361
do_iter_readv_writev+0x614/0x7f0 fs/read_write.c:832
vfs_readv+0x4cf/0x890 fs/read_write.c:1025
do_preadv fs/read_write.c:1142 [inline]
__do_sys_preadv fs/read_write.c:1192 [inline]
__se_sys_preadv fs/read_write.c:1187 [inline]
__x64_sys_preadv+0x22d/0x310 fs/read_write.c:1187
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83
Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading")
Closes: https://syzkaller.appspot.com/bug?extid=1fc6f64c40a9d143cfb6
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20241108034020.3695718-1-lizhi.xu@windriver.com/ [1]
Link: https://lore.kernel.org/r/20241213135013.2964079-9-dhowells@redhat.com
Tested-by: syzbot+885c03ad650731743489@syzkaller.appspotmail.com
Suggested-by: Lizhi Xu <lizhi.xu@windriver.com>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Reported-by: syzbot+885c03ad650731743489@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-12-13 13:50:08 +00:00
|
|
|
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
|
|
|
subreq->retry_count++;
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
|
|
|
|
|
|
|
/* Renegotiate max_len (rsize) */
|
2024-12-16 20:41:17 +00:00
|
|
|
stream->sreq_max_len = subreq->len;
|
2025-01-07 14:43:30 +00:00
|
|
|
if (rreq->netfs_ops->prepare_read &&
|
|
|
|
rreq->netfs_ops->prepare_read(subreq) < 0) {
|
2024-07-02 00:40:22 +01:00
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
|
|
|
|
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
2024-12-16 20:41:17 +00:00
|
|
|
goto abandon;
|
2024-07-02 00:40:22 +01:00
|
|
|
}
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
part = umin(len, stream->sreq_max_len);
|
|
|
|
if (unlikely(stream->sreq_max_segs))
|
|
|
|
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
|
2024-07-02 00:40:22 +01:00
|
|
|
subreq->len = subreq->transferred + part;
|
|
|
|
subreq->io_iter = source;
|
|
|
|
iov_iter_truncate(&subreq->io_iter, part);
|
|
|
|
iov_iter_advance(&source, part);
|
|
|
|
len -= part;
|
|
|
|
start += part;
|
|
|
|
if (!len) {
|
|
|
|
if (boundary)
|
|
|
|
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
|
|
|
} else {
|
|
|
|
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
|
|
|
}
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
2024-07-02 00:40:22 +01:00
|
|
|
netfs_reissue_read(rreq, subreq);
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
if (subreq == to) {
|
|
|
|
subreq_superfluous = false;
|
2024-07-02 00:40:22 +01:00
|
|
|
break;
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
}
|
2024-07-02 00:40:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we managed to use fewer subreqs, we can discard the
|
2024-12-16 20:41:17 +00:00
|
|
|
* excess; if we used the same number, then we're done.
|
2024-07-02 00:40:22 +01:00
|
|
|
*/
|
2024-12-16 20:41:17 +00:00
|
|
|
if (!len) {
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
if (!subreq_superfluous)
|
2024-12-16 20:41:17 +00:00
|
|
|
continue;
|
|
|
|
list_for_each_entry_safe_from(subreq, tmp,
|
|
|
|
&stream->subrequests, rreq_link) {
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
|
2024-12-16 20:41:17 +00:00
|
|
|
list_del(&subreq->rreq_link);
|
|
|
|
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
|
|
|
if (subreq == to)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
continue;
|
2024-07-02 00:40:22 +01:00
|
|
|
}
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
/* We ran out of subrequests, so we need to allocate some more
|
|
|
|
* and insert them after.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
subreq = netfs_alloc_subrequest(rreq);
|
|
|
|
if (!subreq) {
|
|
|
|
subreq = to;
|
|
|
|
goto abandon_after;
|
|
|
|
}
|
|
|
|
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
|
|
|
subreq->start = start;
|
|
|
|
subreq->len = len;
|
|
|
|
subreq->stream_nr = stream->stream_nr;
|
|
|
|
subreq->retry_count = 1;
|
|
|
|
|
|
|
|
trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
|
|
|
|
refcount_read(&subreq->ref),
|
|
|
|
netfs_sreq_trace_new);
|
|
|
|
|
|
|
|
list_add(&subreq->rreq_link, &to->rreq_link);
|
|
|
|
to = list_next_entry(to, rreq_link);
|
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
|
|
|
|
|
|
|
stream->sreq_max_len = umin(len, rreq->rsize);
|
|
|
|
stream->sreq_max_segs = 0;
|
|
|
|
if (unlikely(stream->sreq_max_segs))
|
|
|
|
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
|
|
|
|
|
|
|
|
netfs_stat(&netfs_n_rh_download);
|
|
|
|
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
|
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
|
|
|
|
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
|
|
|
goto abandon;
|
|
|
|
}
|
|
|
|
|
|
|
|
part = umin(len, stream->sreq_max_len);
|
|
|
|
subreq->len = subreq->transferred + part;
|
|
|
|
subreq->io_iter = source;
|
|
|
|
iov_iter_truncate(&subreq->io_iter, part);
|
|
|
|
iov_iter_advance(&source, part);
|
|
|
|
|
|
|
|
len -= part;
|
|
|
|
start += part;
|
|
|
|
if (!len && boundary) {
|
|
|
|
__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
|
|
|
|
boundary = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
netfs_reissue_read(rreq, subreq);
|
|
|
|
} while (len);
|
|
|
|
|
|
|
|
} while (!list_is_head(next, &stream->subrequests));
|
2024-07-02 00:40:22 +01:00
|
|
|
|
|
|
|
return;
|
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
/* If we hit an error, fail all remaining incomplete subrequests */
|
|
|
|
abandon_after:
|
|
|
|
if (list_is_last(&subreq->rreq_link, &stream->subrequests))
|
|
|
|
return;
|
|
|
|
subreq = list_next_entry(subreq, rreq_link);
|
2024-07-02 00:40:22 +01:00
|
|
|
abandon:
|
2024-12-16 20:41:17 +00:00
|
|
|
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
|
|
|
if (!subreq->error &&
|
|
|
|
!test_bit(NETFS_SREQ_FAILED, &subreq->flags) &&
|
|
|
|
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
|
|
|
continue;
|
|
|
|
subreq->error = -ENOMEM;
|
|
|
|
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
2024-07-02 00:40:22 +01:00
|
|
|
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Retry reads.
|
|
|
|
*/
|
|
|
|
void netfs_retry_reads(struct netfs_io_request *rreq)
|
|
|
|
{
|
2024-12-16 20:41:17 +00:00
|
|
|
struct netfs_io_subrequest *subreq;
|
|
|
|
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
DEFINE_WAIT(myself);
|
|
|
|
|
2025-02-12 22:24:00 +00:00
|
|
|
netfs_stat(&netfs_n_rh_retry_read_req);
|
|
|
|
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
|
2024-07-02 00:40:22 +01:00
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
/* Wait for all outstanding I/O to quiesce before performing retries as
|
|
|
|
* we may need to renegotiate the I/O sizes.
|
|
|
|
*/
|
|
|
|
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
|
|
|
|
for (;;) {
|
|
|
|
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
|
|
|
|
|
|
|
|
if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
|
|
|
|
break;
|
|
|
|
|
|
|
|
trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
|
|
|
|
schedule();
|
|
|
|
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
|
|
|
|
}
|
|
|
|
|
|
|
|
finish_wait(&rreq->waitq, &myself);
|
2024-12-16 20:41:17 +00:00
|
|
|
}
|
netfs: Fix a number of read-retry hangs
Fix a number of hangs in the netfslib read-retry code, including:
(1) netfs_reissue_read() doubles up the getting of references on
subrequests, thereby leaking the subrequest and causing inode eviction
to wait indefinitely. This can lead to the kernel reporting a hang in
the filesystem's evict_inode().
Fix this by removing the get from netfs_reissue_read() and adding one
to netfs_retry_read_subrequests() to deal with the one place that
didn't double up.
(2) The loop in netfs_retry_read_subrequests() that retries a sequence of
failed subrequests doesn't record whether or not it retried the one
that the "subreq" pointer points to when it leaves the loop. It may
not if renegotiation/repreparation of the subrequests means that fewer
subrequests are needed to span the cumulative range of the sequence.
Because it doesn't record this, the piece of code that discards
now-superfluous subrequests doesn't know whether it should discard the
one "subreq" points to - and so it doesn't.
Fix this by noting whether the last subreq it examines is superfluous
and if it is, then getting rid of it and all subsequent subrequests.
If that one one wasn't superfluous, then we would have tried to go
round the previous loop again and so there can be no further unretried
subrequests in the sequence.
(3) netfs_retry_read_subrequests() gets yet an extra ref on any additional
subrequests it has to get because it ran out of ones it could reuse to
to renegotiation/repreparation shrinking the subrequests.
Fix this by removing that extra ref.
(4) In netfs_retry_reads(), it was using wait_on_bit() to wait for
NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the
sequence - but netfs_read_subreq_terminated() is now using a wait
queue on the request instead and so this wait will never finish.
Fix this by waiting on the wait queue instead. To make this work, a
new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell
the wake-up code to wake up the wait queue rather than requeuing the
request's work item.
Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is
no longer used.
(5) Whilst not strictly anything to do with the hang,
netfs_retry_read_subrequests() was also doubly incrementing the
subreq_counter and re-setting the debug index, leaving a gap in the
trace. This is also fixed.
One of these hangs was observed with 9p and with cifs. Others were forced
by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read()
was created to provide an changing pattern of maximum subrequest sizes:
static int afs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
if (!S_ISREG(subreq->rreq->inode->i_mode))
return 0;
if (subreq->retry_count < 20)
rreq->io_streams[0].sreq_max_len =
umax(200, 2222 - subreq->retry_count * 40);
else
rreq->io_streams[0].sreq_max_len = 3333;
return 0;
}
and pointed to by afs_req_ops. Then the following:
struct netfs_io_subrequest *subreq = op->fetch.subreq;
if (subreq->error == 0 &&
S_ISREG(subreq->rreq->inode->i_mode) &&
subreq->retry_count < 20) {
subreq->transferred = subreq->already_done;
__clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
afs_fetch_data_notify(op);
return;
}
was inserted into afs_fetch_data_success() at the beginning and struct
netfs_io_subrequest given an extra field, "already_done" that was set to
the value in "subreq->transferred" by netfs_reissue_read().
When reading a 4K file, the subrequests would get gradually smaller, a new
subrequest would be allocated around the 3rd retry and then eventually be
rendered superfluous when the 20th retry was hit and the limit on the first
subrequest was eased.
Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Steve French <stfrench@microsoft.com>
cc: Ihor Solodrai <ihor.solodrai@pm.me>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 22:23:59 +00:00
|
|
|
clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
|
2024-07-02 00:40:22 +01:00
|
|
|
|
2024-12-16 20:41:17 +00:00
|
|
|
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
|
2024-07-02 00:40:22 +01:00
|
|
|
netfs_retry_read_subrequests(rreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlock any the pages that haven't been unlocked yet due to abandoned
|
|
|
|
* subrequests.
|
|
|
|
*/
|
|
|
|
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
|
|
|
|
{
|
|
|
|
struct folio_queue *p;
|
|
|
|
|
2024-12-16 20:40:55 +00:00
|
|
|
for (p = rreq->buffer.tail; p; p = p->next) {
|
2024-07-02 00:40:22 +01:00
|
|
|
for (int slot = 0; slot < folioq_count(p); slot++) {
|
|
|
|
struct folio *folio = folioq_folio(p, slot);
|
|
|
|
|
|
|
|
if (folio && !folioq_is_marked2(p, slot)) {
|
|
|
|
trace_netfs_folio(folio, netfs_folio_trace_abandon);
|
|
|
|
folio_unlock(folio);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|