2022-05-25 10:40:19 -06:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/io_uring.h>
|
|
|
|
|
|
|
|
#include <uapi/linux/io_uring.h>
|
|
|
|
|
|
|
|
#include "io_uring.h"
|
|
|
|
#include "sqpoll.h"
|
|
|
|
#include "fdinfo.h"
|
2022-06-16 10:22:02 +01:00
|
|
|
#include "cancel.h"
|
2022-06-18 19:44:33 -06:00
|
|
|
#include "rsrc.h"
|
2022-05-25 10:40:19 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
|
|
|
|
const struct cred *cred)
|
|
|
|
{
|
|
|
|
struct user_namespace *uns = seq_user_ns(m);
|
|
|
|
struct group_info *gi;
|
|
|
|
kernel_cap_t cap;
|
|
|
|
int g;
|
|
|
|
|
|
|
|
seq_printf(m, "%5d\n", id);
|
|
|
|
seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
|
|
|
|
seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
|
|
|
|
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
|
|
|
|
seq_puts(m, "\n\tGroups:\t");
|
|
|
|
gi = cred->group_info;
|
|
|
|
for (g = 0; g < gi->ngroups; g++) {
|
|
|
|
seq_put_decimal_ull(m, g ? " " : "",
|
|
|
|
from_kgid_munged(uns, gi->gid[g]));
|
|
|
|
}
|
|
|
|
seq_puts(m, "\n\tCapEff:\t");
|
|
|
|
cap = cred->cap_effective;
|
2023-02-28 11:39:09 -08:00
|
|
|
seq_put_hex_ll(m, NULL, cap.val, 16);
|
2022-05-25 10:40:19 -06:00
|
|
|
seq_putc(m, '\n');
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-07-10 13:13:54 -06:00
|
|
|
/*
|
|
|
|
* Caller holds a reference to the file already, we don't need to do
|
|
|
|
* anything else to get an extra reference.
|
|
|
|
*/
|
2024-03-29 17:19:45 -06:00
|
|
|
__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
|
2022-05-25 10:40:19 -06:00
|
|
|
{
|
2024-03-29 17:19:45 -06:00
|
|
|
struct io_ring_ctx *ctx = file->private_data;
|
2022-05-25 10:40:19 -06:00
|
|
|
struct io_overflow_cqe *ocqe;
|
|
|
|
struct io_rings *r = ctx->rings;
|
2024-02-28 17:12:51 +08:00
|
|
|
struct rusage sq_usage;
|
2022-05-25 10:40:19 -06:00
|
|
|
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
|
|
|
|
unsigned int sq_head = READ_ONCE(r->sq.head);
|
|
|
|
unsigned int sq_tail = READ_ONCE(r->sq.tail);
|
|
|
|
unsigned int cq_head = READ_ONCE(r->cq.head);
|
|
|
|
unsigned int cq_tail = READ_ONCE(r->cq.tail);
|
|
|
|
unsigned int cq_shift = 0;
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
unsigned int sq_shift = 0;
|
2022-05-25 10:40:19 -06:00
|
|
|
unsigned int sq_entries, cq_entries;
|
2023-10-21 12:30:29 -06:00
|
|
|
int sq_pid = -1, sq_cpu = -1;
|
2024-02-28 17:12:51 +08:00
|
|
|
u64 sq_total_time = 0, sq_work_time = 0;
|
2022-05-25 10:40:19 -06:00
|
|
|
bool has_lock;
|
|
|
|
unsigned int i;
|
|
|
|
|
2022-09-11 06:36:09 -06:00
|
|
|
if (ctx->flags & IORING_SETUP_CQE32)
|
2022-05-25 10:40:19 -06:00
|
|
|
cq_shift = 1;
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
if (ctx->flags & IORING_SETUP_SQE128)
|
|
|
|
sq_shift = 1;
|
2022-05-25 10:40:19 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* we may get imprecise sqe and cqe info if uring is actively running
|
|
|
|
* since we get cached_sq_head and cached_cq_tail without uring_lock
|
|
|
|
* and sq_tail and cq_head are changed by userspace. But it's ok since
|
|
|
|
* we usually use these info when it is stuck.
|
|
|
|
*/
|
|
|
|
seq_printf(m, "SqMask:\t0x%x\n", sq_mask);
|
|
|
|
seq_printf(m, "SqHead:\t%u\n", sq_head);
|
|
|
|
seq_printf(m, "SqTail:\t%u\n", sq_tail);
|
|
|
|
seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head);
|
|
|
|
seq_printf(m, "CqMask:\t0x%x\n", cq_mask);
|
|
|
|
seq_printf(m, "CqHead:\t%u\n", cq_head);
|
|
|
|
seq_printf(m, "CqTail:\t%u\n", cq_tail);
|
|
|
|
seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail);
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head);
|
2022-05-25 10:40:19 -06:00
|
|
|
sq_entries = min(sq_tail - sq_head, ctx->sq_entries);
|
|
|
|
for (i = 0; i < sq_entries; i++) {
|
|
|
|
unsigned int entry = i + sq_head;
|
|
|
|
struct io_uring_sqe *sqe;
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
unsigned int sq_idx;
|
2022-05-25 10:40:19 -06:00
|
|
|
|
2023-09-01 13:59:19 -06:00
|
|
|
if (ctx->flags & IORING_SETUP_NO_SQARRAY)
|
|
|
|
break;
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
|
2022-05-25 10:40:19 -06:00
|
|
|
if (sq_idx > sq_mask)
|
|
|
|
continue;
|
2022-10-11 01:59:57 +01:00
|
|
|
sqe = &ctx->sq_sqes[sq_idx << sq_shift];
|
io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128
If we have doubly sized SQEs, then we need to shift the sq index by 1
to account for using two entries for a single request. The CQE dumping
gets this right, but the SQE one does not.
Improve the SQE dumping in general, the information dumped is pretty
sparse and doesn't even cover the whole basic part of the SQE. Include
information on the extended part of the SQE, if doubly sized SQEs are
in use. A typical dump now looks like the following:
[...]
SQEs: 32
32: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2721, e0:0x0, e1:0xffffb8041000, e2:0x100000000000, e3:0x5500, e4:0x7, e5:0x0, e6:0x0, e7:0x0
33: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2722, e0:0x0, e1:0xffffb8043000, e2:0x100000000000, e3:0x5508, e4:0x7, e5:0x0, e6:0x0, e7:0x0
34: opcode:URING_CMD, fd:0, flags:1, off:3225964160, addr:0x0, rw_flags:0x0, buf_index:0 user_data:2723, e0:0x0, e1:0xffffb8045000, e2:0x100000000000, e3:0x5510, e4:0x7, e5:0x0, e6:0x0, e7:0x0
[...]
Fixes: ebdeb7c01d02 ("io_uring: add support for 128-byte SQEs")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-09-11 06:40:37 -06:00
|
|
|
seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
|
|
|
|
"addr:0x%llx, rw_flags:0x%x, buf_index:%d "
|
|
|
|
"user_data:%llu",
|
|
|
|
sq_idx, io_uring_get_opcode(sqe->opcode), sqe->fd,
|
|
|
|
sqe->flags, (unsigned long long) sqe->off,
|
|
|
|
(unsigned long long) sqe->addr, sqe->rw_flags,
|
|
|
|
sqe->buf_index, sqe->user_data);
|
|
|
|
if (sq_shift) {
|
|
|
|
u64 *sqeb = (void *) (sqe + 1);
|
|
|
|
int size = sizeof(struct io_uring_sqe) / sizeof(u64);
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = 0; j < size; j++) {
|
|
|
|
seq_printf(m, ", e%d:0x%llx", j,
|
|
|
|
(unsigned long long) *sqeb);
|
|
|
|
sqeb++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
seq_printf(m, "\n");
|
2022-05-25 10:40:19 -06:00
|
|
|
}
|
|
|
|
seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head);
|
|
|
|
cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
|
|
|
|
for (i = 0; i < cq_entries; i++) {
|
|
|
|
unsigned int entry = i + cq_head;
|
|
|
|
struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift];
|
|
|
|
|
2022-09-11 06:36:09 -06:00
|
|
|
seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x",
|
2022-05-25 10:40:19 -06:00
|
|
|
entry & cq_mask, cqe->user_data, cqe->res,
|
|
|
|
cqe->flags);
|
2022-09-11 06:36:09 -06:00
|
|
|
if (cq_shift)
|
|
|
|
seq_printf(m, ", extra1:%llu, extra2:%llu\n",
|
|
|
|
cqe->big_cqe[0], cqe->big_cqe[1]);
|
|
|
|
seq_printf(m, "\n");
|
2022-05-25 10:40:19 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Avoid ABBA deadlock between the seq lock and the io_uring mutex,
|
|
|
|
* since fdinfo case grabs it in the opposite direction of normal use
|
|
|
|
* cases. If we fail to get the lock, we just don't iterate any
|
|
|
|
* structures that could be going away outside the io_uring mutex.
|
|
|
|
*/
|
|
|
|
has_lock = mutex_trylock(&ctx->uring_lock);
|
|
|
|
|
|
|
|
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
|
2023-10-21 12:30:29 -06:00
|
|
|
struct io_sq_data *sq = ctx->sq_data;
|
|
|
|
|
2024-03-08 19:32:56 -05:00
|
|
|
/*
|
|
|
|
* sq->thread might be NULL if we raced with the sqpoll
|
|
|
|
* thread termination.
|
|
|
|
*/
|
|
|
|
if (sq->thread) {
|
|
|
|
sq_pid = sq->task_pid;
|
|
|
|
sq_cpu = sq->sq_cpu;
|
|
|
|
getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
|
|
|
|
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
|
|
|
|
+ sq_usage.ru_stime.tv_usec);
|
|
|
|
sq_work_time = sq->work_time;
|
|
|
|
}
|
2022-05-25 10:40:19 -06:00
|
|
|
}
|
|
|
|
|
2023-10-21 12:30:29 -06:00
|
|
|
seq_printf(m, "SqThread:\t%d\n", sq_pid);
|
|
|
|
seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
|
2024-02-28 17:12:51 +08:00
|
|
|
seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
|
|
|
|
seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
|
2022-05-25 10:40:19 -06:00
|
|
|
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
|
|
|
|
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
|
|
|
|
struct file *f = io_file_from_index(&ctx->file_table, i);
|
|
|
|
|
|
|
|
if (f)
|
|
|
|
seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
|
|
|
|
else
|
|
|
|
seq_printf(m, "%5u: <none>\n", i);
|
|
|
|
}
|
|
|
|
seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
|
|
|
|
for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
|
io_uring/rsrc: get rid of per-ring io_rsrc_node list
Work in progress, but get rid of the per-ring serialization of resource
nodes, like registered buffers and files. Main issue here is that one
node can otherwise hold up a bunch of other nodes from getting freed,
which is especially a problem for file resource nodes and networked
workloads where some descriptors may not see activity in a long time.
As an example, instantiate an io_uring ring fd and create a sparse
registered file table. Even 2 will do. Then create a socket and register
it as fixed file 0, F0. The number of open files in the app is now 5,
with 0/1/2 being the usual stdin/out/err, 3 being the ring fd, and 4
being the socket. Register this socket (eg "the listener") in slot 0 of
the registered file table. Now add an operation on the socket that uses
slot 0. Finally, loop N times, where each loop creates a new socket,
registers said socket as a file, then unregisters the socket, and
finally closes the socket. This is roughly similar to what a basic
accept loop would look like.
At the end of this loop, it's not unreasonable to expect that there
would still be 5 open files. Each socket created and registered in the
loop is also unregistered and closed. But since the listener socket
registered first still has references to its resource node due to still
being active, each subsequent socket unregistration is stuck behind it
for reclaim. Hence 5 + N files are still open at that point, where N is
awaiting the final put held up by the listener socket.
Rewrite the io_rsrc_node handling to NOT rely on serialization. Struct
io_kiocb now gets explicit resource nodes assigned, with each holding a
reference to the parent node. A parent node is either of type FILE or
BUFFER, which are the two types of nodes that exist. A request can have
two nodes assigned, if it's using both registered files and buffers.
Since request issue and task_work completion is both under the ring
private lock, no atomics are needed to handle these references. It's a
simple unlocked inc/dec. As before, the registered buffer or file table
each hold a reference as well to the registered nodes. Final put of the
node will remove the node and free the underlying resource, eg unmap the
buffer or put the file.
Outside of removing the stall in resource reclaim described above, it
has the following advantages:
1) It's a lot simpler than the previous scheme, and easier to follow.
No need to specific quiesce handling anymore.
2) There are no resource node allocations in the fast path, all of that
happens at resource registration time.
3) The structs related to resource handling can all get simplified
quite a bit, like io_rsrc_node and io_rsrc_data. io_rsrc_put can
go away completely.
4) Handling of resource tags is much simpler, and doesn't require
persistent storage as it can simply get assigned up front at
registration time. Just copy them in one-by-one at registration time
and assign to the resource node.
The only real downside is that a request is now explicitly limited to
pinning 2 resources, one file and one buffer, where before just
assigning a resource node to a request would pin all of them. The upside
is that it's easier to follow now, as an individual resource is
explicitly referenced and assigned to the request.
With this in place, the above mentioned example will be using exactly 5
files at the end of the loop, not N.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-10-25 19:27:39 -06:00
|
|
|
struct io_mapped_ubuf *buf = ctx->user_bufs[i]->buf;
|
2022-05-25 10:40:19 -06:00
|
|
|
|
2024-09-15 08:53:45 -06:00
|
|
|
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, buf->len);
|
2022-05-25 10:40:19 -06:00
|
|
|
}
|
|
|
|
if (has_lock && !xa_empty(&ctx->personalities)) {
|
|
|
|
unsigned long index;
|
|
|
|
const struct cred *cred;
|
|
|
|
|
|
|
|
seq_printf(m, "Personalities:\n");
|
|
|
|
xa_for_each(&ctx->personalities, index, cred)
|
|
|
|
io_uring_show_cred(m, index, cred);
|
|
|
|
}
|
|
|
|
|
|
|
|
seq_puts(m, "PollList:\n");
|
2024-09-30 14:22:36 -06:00
|
|
|
for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) {
|
2022-06-16 10:22:10 +01:00
|
|
|
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
|
2022-05-25 10:40:19 -06:00
|
|
|
struct io_kiocb *req;
|
|
|
|
|
2022-06-16 10:22:02 +01:00
|
|
|
hlist_for_each_entry(req, &hb->list, hash_node)
|
2022-05-25 10:40:19 -06:00
|
|
|
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
|
|
|
|
task_work_pending(req->task));
|
|
|
|
}
|
|
|
|
|
2023-01-10 10:24:52 -07:00
|
|
|
if (has_lock)
|
|
|
|
mutex_unlock(&ctx->uring_lock);
|
|
|
|
|
2022-05-25 10:40:19 -06:00
|
|
|
seq_puts(m, "CqOverflowList:\n");
|
2022-06-16 10:22:02 +01:00
|
|
|
spin_lock(&ctx->completion_lock);
|
2022-05-25 10:40:19 -06:00
|
|
|
list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
|
|
|
|
struct io_uring_cqe *cqe = &ocqe->cqe;
|
|
|
|
|
|
|
|
seq_printf(m, " user_data=%llu, res=%d, flags=%x\n",
|
|
|
|
cqe->user_data, cqe->res, cqe->flags);
|
|
|
|
|
|
|
|
}
|
|
|
|
spin_unlock(&ctx->completion_lock);
|
io_uring: add napi busy settings to the fdinfo output
This info may be useful when attempting to debug a problem involving a
ring using the NAPI feature.
Here is an example of the output:
ip-172-31-39-89 /proc/772/fdinfo # cat 14
pos: 0
flags: 02000002
mnt_id: 16
ino: 10243
SqMask: 0xff
SqHead: 633
SqTail: 633
CachedSqHead: 633
CqMask: 0x3fff
CqHead: 430250
CqTail: 430250
CachedCqTail: 430250
SQEs: 0
CQEs: 0
SqThread: 885
SqThreadCpu: 0
SqTotalTime: 52793826
SqWorkTime: 3590465
UserFiles: 0
UserBufs: 0
PollList:
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=6, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=6, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
op=10, task_works=0
CqOverflowList:
NAPI: enabled
napi_busy_poll_to: 1
napi_prefer_busy_poll: true
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/bb184f8b62703ddd3e6e19eae7ab6c67b97e1e10.1722293317.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-07-29 18:38:33 -04:00
|
|
|
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
if (ctx->napi_enabled) {
|
|
|
|
seq_puts(m, "NAPI:\tenabled\n");
|
|
|
|
seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt);
|
|
|
|
if (ctx->napi_prefer_busy_poll)
|
|
|
|
seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
|
|
|
|
else
|
|
|
|
seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
|
|
|
|
} else {
|
|
|
|
seq_puts(m, "NAPI:\tdisabled\n");
|
|
|
|
}
|
|
|
|
#endif
|
2022-05-25 10:40:19 -06:00
|
|
|
}
|
|
|
|
#endif
|