io_uring/rsrc: unify file and buffer resource tables

For files, there's nr_user_files/file_table/file_data, and buffers have
nr_user_bufs/user_bufs/buf_data. There's no reason why file_table and
file_data can't be the same thing, and ditto for the buffer side. That
gets rid of more io_ring_ctx state that's in two spots rather than just
being in one spot, as it should be. Put all the registered file data in
one locations, and ditto on the buffer front.

This also avoids having both io_rsrc_data->nodes being an allocated
array, and ->user_bufs[] or ->file_table.nodes. There's no reason to
have this information duplicated. Keep it in one spot, io_rsrc_data,
along with how many resources are available.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2024-10-26 14:50:13 -06:00
parent f38f284764
commit 3597f2786b
15 changed files with 123 additions and 212 deletions

View file

@ -55,8 +55,13 @@ struct io_wq_work {
int cancel_seq; int cancel_seq;
}; };
struct io_rsrc_data {
unsigned int nr;
struct io_rsrc_node **nodes;
};
struct io_file_table { struct io_file_table {
struct io_rsrc_node **nodes; struct io_rsrc_data data;
unsigned long *bitmap; unsigned long *bitmap;
unsigned int alloc_hint; unsigned int alloc_hint;
}; };
@ -276,9 +281,7 @@ struct io_ring_ctx {
struct io_wq_work_list iopoll_list; struct io_wq_work_list iopoll_list;
struct io_file_table file_table; struct io_file_table file_table;
struct io_rsrc_node **user_bufs; struct io_rsrc_data buf_table;
unsigned nr_user_files;
unsigned nr_user_bufs;
struct io_submit_state submit_state; struct io_submit_state submit_state;
@ -366,10 +369,6 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq; struct wait_queue_head poll_wq;
struct io_restriction restrictions; struct io_restriction restrictions;
/* slow path rsrc auxilary data, used by update/register */
struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data;
u32 pers_next; u32 pers_next;
struct xarray personalities; struct xarray personalities;

View file

@ -240,9 +240,9 @@ static int __io_sync_cancel(struct io_uring_task *tctx,
/* fixed must be grabbed every time since we drop the uring_lock */ /* fixed must be grabbed every time since we drop the uring_lock */
if ((cd->flags & IORING_ASYNC_CANCEL_FD) && if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
if (unlikely(fd >= ctx->nr_user_files)) if (unlikely(fd >= ctx->file_table.data.nr))
return -EBADF; return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files); fd = array_index_nospec(fd, ctx->file_table.data.nr);
cd->file = io_file_from_index(&ctx->file_table, fd); cd->file = io_file_from_index(&ctx->file_table, fd);
if (!cd->file) if (!cd->file)
return -EBADF; return -EBADF;

View file

@ -165,8 +165,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu); seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time); seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time); seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr);
for (i = 0; has_lock && i < ctx->nr_user_files; i++) { for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) {
struct file *f = io_file_from_index(&ctx->file_table, i); struct file *f = io_file_from_index(&ctx->file_table, i);
if (f) if (f)
@ -174,9 +174,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
else else
seq_printf(m, "%5u: <none>\n", i); seq_printf(m, "%5u: <none>\n", i);
} }
seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr);
for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { for (i = 0; has_lock && i < ctx->buf_table.nr; i++) {
struct io_mapped_ubuf *buf = ctx->user_bufs[i]->buf; struct io_mapped_ubuf *buf = ctx->buf_table.nodes[i]->buf;
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, buf->len); seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, buf->len);
} }

View file

@ -38,25 +38,19 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{ {
table->nodes = kvmalloc_array(nr_files, sizeof(struct io_src_node *), if (io_rsrc_data_alloc(&table->data, nr_files))
GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (unlikely(!table->nodes))
return false; return false;
table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT); table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
if (unlikely(!table->bitmap)) { if (table->bitmap)
kvfree(table->nodes); return true;
return false; io_rsrc_data_free(&table->data);
} return false;
return true;
} }
void io_free_file_tables(struct io_file_table *table) void io_free_file_tables(struct io_file_table *table)
{ {
kvfree(table->nodes); io_rsrc_data_free(&table->data);
bitmap_free(table->bitmap); bitmap_free(table->bitmap);
table->nodes = NULL;
table->bitmap = NULL; table->bitmap = NULL;
} }
@ -68,22 +62,22 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (io_is_uring_fops(file)) if (io_is_uring_fops(file))
return -EBADF; return -EBADF;
if (!ctx->file_data) if (!ctx->file_table.data.nr)
return -ENXIO; return -ENXIO;
if (slot_index >= ctx->nr_user_files) if (slot_index >= ctx->file_table.data.nr)
return -EINVAL; return -EINVAL;
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) if (!node)
return -ENOMEM; return -ENOMEM;
slot_index = array_index_nospec(slot_index, ctx->nr_user_files); slot_index = array_index_nospec(slot_index, ctx->file_table.data.nr);
if (ctx->file_table.nodes[slot_index]) if (ctx->file_table.data.nodes[slot_index])
io_put_rsrc_node(ctx->file_table.nodes[slot_index]); io_put_rsrc_node(ctx->file_table.data.nodes[slot_index]);
else else
io_file_bitmap_set(&ctx->file_table, slot_index); io_file_bitmap_set(&ctx->file_table, slot_index);
ctx->file_table.nodes[slot_index] = node; ctx->file_table.data.nodes[slot_index] = node;
io_fixed_file_set(node, file); io_fixed_file_set(node, file);
return 0; return 0;
} }
@ -129,16 +123,16 @@ int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
{ {
if (unlikely(!ctx->file_data)) if (unlikely(!ctx->file_table.data.nr))
return -ENXIO; return -ENXIO;
if (offset >= ctx->nr_user_files) if (offset >= ctx->file_table.data.nr)
return -EINVAL; return -EINVAL;
offset = array_index_nospec(offset, ctx->nr_user_files); offset = array_index_nospec(offset, ctx->file_table.data.nr);
if (!ctx->file_table.nodes[offset]) if (!ctx->file_table.data.nodes[offset])
return -EBADF; return -EBADF;
io_put_rsrc_node(ctx->file_table.nodes[offset]); io_put_rsrc_node(ctx->file_table.data.nodes[offset]);
ctx->file_table.nodes[offset] = NULL; ctx->file_table.data.nodes[offset] = NULL;
io_file_bitmap_clear(&ctx->file_table, offset); io_file_bitmap_clear(&ctx->file_table, offset);
return 0; return 0;
} }
@ -153,7 +147,7 @@ int io_register_file_alloc_range(struct io_ring_ctx *ctx,
return -EFAULT; return -EFAULT;
if (check_add_overflow(range.off, range.len, &end)) if (check_add_overflow(range.off, range.len, &end))
return -EOVERFLOW; return -EOVERFLOW;
if (range.resv || end > ctx->nr_user_files) if (range.resv || end > ctx->file_table.data.nr)
return -EINVAL; return -EINVAL;
io_file_table_set_alloc_range(ctx, range.off, range.len); io_file_table_set_alloc_range(ctx, range.off, range.len);

View file

@ -52,7 +52,7 @@ static inline struct file *io_slot_file(struct io_rsrc_node *node)
static inline struct file *io_file_from_index(struct io_file_table *table, static inline struct file *io_file_from_index(struct io_file_table *table,
int index) int index)
{ {
struct io_rsrc_node *node = table->nodes[index]; struct io_rsrc_node *node = table->data.nodes[index];
if (node) if (node)
return io_slot_file(node); return io_slot_file(node);

View file

@ -1879,11 +1879,10 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
struct file *file = NULL; struct file *file = NULL;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
if (unlikely((unsigned int)fd >= ctx->file_table.data.nr))
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
goto out; goto out;
fd = array_index_nospec(fd, ctx->nr_user_files); fd = array_index_nospec(fd, ctx->file_table.data.nr);
node = ctx->file_table.nodes[fd]; node = ctx->file_table.data.nodes[fd];
if (node) { if (node) {
io_req_assign_rsrc_node(req, node); io_req_assign_rsrc_node(req, node);
req->flags |= io_slot_flags(node); req->flags |= io_slot_flags(node);

View file

@ -180,8 +180,8 @@ static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_fl
int idx = msg->src_fd; int idx = msg->src_fd;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
if (likely(idx < ctx->nr_user_files)) { if (likely(idx < ctx->file_table.data.nr)) {
idx = array_index_nospec(idx, ctx->nr_user_files); idx = array_index_nospec(idx, ctx->file_table.data.nr);
file = io_file_from_index(&ctx->file_table, idx); file = io_file_from_index(&ctx->file_table, idx);
if (file) if (file)
get_file(file); get_file(file);

View file

@ -1347,9 +1347,9 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
ret = -EFAULT; ret = -EFAULT;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
if (sr->buf_index < ctx->nr_user_bufs) { if (sr->buf_index < ctx->buf_table.nr) {
idx = array_index_nospec(sr->buf_index, ctx->nr_user_bufs); idx = array_index_nospec(sr->buf_index, ctx->buf_table.nr);
node = ctx->user_bufs[idx]; node = ctx->buf_table.nodes[idx];
io_req_assign_rsrc_node(sr->notif, node); io_req_assign_rsrc_node(sr->notif, node);
ret = 0; ret = 0;
} }

View file

@ -66,9 +66,9 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags)
ret = -EFAULT; ret = -EFAULT;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
if (nop->buffer < ctx->nr_user_bufs) { if (nop->buffer < ctx->buf_table.nr) {
idx = array_index_nospec(nop->buffer, ctx->nr_user_bufs); idx = array_index_nospec(nop->buffer, ctx->buf_table.nr);
node = READ_ONCE(ctx->user_bufs[idx]); node = READ_ONCE(ctx->buf_table.nodes[idx]);
io_req_assign_rsrc_node(req, node); io_req_assign_rsrc_node(req, node);
ret = 0; ret = 0;
} }

View file

@ -937,7 +937,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args); ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
ctx->buf_table.nr, ret);
if (!use_registered_ring) if (!use_registered_ring)
fput(file); fput(file);
return ret; return ret;

View file

@ -142,39 +142,28 @@ struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
return node; return node;
} }
static void io_rsrc_data_free(struct io_rsrc_data *data) __cold void io_rsrc_data_free(struct io_rsrc_data *data)
{ {
int i; if (!data->nr)
return;
for (i = 0; i < data->nr; i++) { while (data->nr--) {
struct io_rsrc_node *node = data->nodes[i]; if (data->nodes[data->nr])
io_put_rsrc_node(data->nodes[data->nr]);
if (node)
io_put_rsrc_node(node);
} }
kvfree(data->nodes); kvfree(data->nodes);
kfree(data); data->nodes = NULL;
data->nr = 0;
} }
__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, unsigned nr, __cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr)
struct io_rsrc_data **pdata)
{ {
struct io_rsrc_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->nodes = kvmalloc_array(nr, sizeof(struct io_rsrc_node *), data->nodes = kvmalloc_array(nr, sizeof(struct io_rsrc_node *),
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!data->nodes) { if (data->nodes) {
kfree(data); data->nr = nr;
return -ENOMEM; return 0;
} }
return -ENOMEM;
data->nr = nr;
*pdata = data;
return 0;
} }
static int __io_sqe_files_update(struct io_ring_ctx *ctx, static int __io_sqe_files_update(struct io_ring_ctx *ctx,
@ -186,9 +175,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
int fd, i, err = 0; int fd, i, err = 0;
unsigned int done; unsigned int done;
if (!ctx->file_data) if (!ctx->file_table.data.nr)
return -ENXIO; return -ENXIO;
if (up->offset + nr_args > ctx->nr_user_files) if (up->offset + nr_args > ctx->file_table.data.nr)
return -EINVAL; return -EINVAL;
for (done = 0; done < nr_args; done++) { for (done = 0; done < nr_args; done++) {
@ -206,10 +195,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (fd == IORING_REGISTER_FILES_SKIP) if (fd == IORING_REGISTER_FILES_SKIP)
continue; continue;
i = array_index_nospec(up->offset + done, ctx->nr_user_files); i = array_index_nospec(up->offset + done, ctx->file_table.data.nr);
if (ctx->file_table.nodes[i]) { if (ctx->file_table.data.nodes[i]) {
io_put_rsrc_node(ctx->file_table.nodes[i]); io_put_rsrc_node(ctx->file_table.data.nodes[i]);
ctx->file_table.nodes[i] = NULL; ctx->file_table.data.nodes[i] = NULL;
io_file_bitmap_clear(&ctx->file_table, i); io_file_bitmap_clear(&ctx->file_table, i);
} }
if (fd != -1) { if (fd != -1) {
@ -234,7 +223,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
fput(file); fput(file);
break; break;
} }
ctx->file_table.nodes[i] = node; ctx->file_table.data.nodes[i] = node;
if (tag) if (tag)
node->tag = tag; node->tag = tag;
io_fixed_file_set(node, file); io_fixed_file_set(node, file);
@ -256,9 +245,9 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
__u32 done; __u32 done;
int i, err; int i, err;
if (!ctx->buf_data) if (!ctx->buf_table.nr)
return -ENXIO; return -ENXIO;
if (up->offset + nr_args > ctx->nr_user_bufs) if (up->offset + nr_args > ctx->buf_table.nr)
return -EINVAL; return -EINVAL;
for (done = 0; done < nr_args; done++) { for (done = 0; done < nr_args; done++) {
@ -282,16 +271,16 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
err = -EINVAL; err = -EINVAL;
break; break;
} }
i = array_index_nospec(up->offset + done, ctx->nr_user_bufs);
node = io_sqe_buffer_register(ctx, iov, &last_hpage); node = io_sqe_buffer_register(ctx, iov, &last_hpage);
if (IS_ERR(node)) { if (IS_ERR(node)) {
err = PTR_ERR(node); err = PTR_ERR(node);
break; break;
} }
if (ctx->user_bufs[i]) i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
io_put_rsrc_node(ctx->user_bufs[i]); if (ctx->buf_table.nodes[i])
io_put_rsrc_node(ctx->buf_table.nodes[i]);
ctx->user_bufs[i] = node; ctx->buf_table.nodes[i] = node;
if (tag) if (tag)
node->tag = tag; node->tag = tag;
if (ctx->compat) if (ctx->compat)
@ -409,7 +398,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
struct file *file; struct file *file;
int ret, fd; int ret, fd;
if (!req->ctx->file_data) if (!req->ctx->file_table.data.nr)
return -ENXIO; return -ENXIO;
for (done = 0; done < up->nr_args; done++) { for (done = 0; done < up->nr_args; done++) {
@ -494,35 +483,13 @@ void io_free_rsrc_node(struct io_rsrc_node *node)
kfree(node); kfree(node);
} }
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{ {
int i; if (!ctx->file_table.data.nr)
return -ENXIO;
lockdep_assert_held(&ctx->uring_lock);
for (i = 0; i < ctx->nr_user_files; i++) {
struct io_rsrc_node *node = ctx->file_table.nodes[i];
if (node) {
io_put_rsrc_node(node);
io_file_bitmap_clear(&ctx->file_table, i);
ctx->file_table.nodes[i] = NULL;
}
}
io_free_file_tables(&ctx->file_table); io_free_file_tables(&ctx->file_table);
io_file_table_set_alloc_range(ctx, 0, 0); io_file_table_set_alloc_range(ctx, 0, 0);
io_rsrc_data_free(ctx->file_data);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
}
int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
if (!ctx->file_data)
return -ENXIO;
__io_sqe_files_unregister(ctx);
return 0; return 0;
} }
@ -534,7 +501,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
int fd, ret; int fd, ret;
unsigned i; unsigned i;
if (ctx->file_data) if (ctx->file_table.data.nr)
return -EBUSY; return -EBUSY;
if (!nr_args) if (!nr_args)
return -EINVAL; return -EINVAL;
@ -542,17 +509,10 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EMFILE; return -EMFILE;
if (nr_args > rlimit(RLIMIT_NOFILE)) if (nr_args > rlimit(RLIMIT_NOFILE))
return -EMFILE; return -EMFILE;
ret = io_rsrc_data_alloc(ctx, nr_args, &ctx->file_data); if (!io_alloc_file_tables(&ctx->file_table, nr_args))
if (ret)
return ret;
if (!io_alloc_file_tables(&ctx->file_table, nr_args)) {
io_rsrc_data_free(ctx->file_data);
ctx->file_data = NULL;
return -ENOMEM; return -ENOMEM;
}
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { for (i = 0; i < nr_args; i++) {
struct io_rsrc_node *node; struct io_rsrc_node *node;
u64 tag = 0; u64 tag = 0;
@ -589,44 +549,24 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
} }
if (tag) if (tag)
node->tag = tag; node->tag = tag;
ctx->file_table.nodes[i] = node; ctx->file_table.data.nodes[i] = node;
io_fixed_file_set(node, file); io_fixed_file_set(node, file);
io_file_bitmap_set(&ctx->file_table, i); io_file_bitmap_set(&ctx->file_table, i);
} }
/* default it to the whole table */ /* default it to the whole table */
io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files); io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr);
return 0; return 0;
fail: fail:
__io_sqe_files_unregister(ctx); io_sqe_files_unregister(ctx);
return ret; return ret;
} }
static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
unsigned int i;
lockdep_assert_held(&ctx->uring_lock);
for (i = 0; i < ctx->nr_user_bufs; i++) {
if (ctx->user_bufs[i]) {
io_put_rsrc_node(ctx->user_bufs[i]);
ctx->user_bufs[i] = NULL;
}
}
kvfree(ctx->user_bufs);
ctx->user_bufs = NULL;
io_rsrc_data_free(ctx->buf_data);
ctx->buf_data = NULL;
ctx->nr_user_bufs = 0;
}
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{ {
if (!ctx->buf_data) if (!ctx->buf_table.nr)
return -ENXIO; return -ENXIO;
io_rsrc_data_free(&ctx->buf_table);
__io_sqe_buffers_unregister(ctx);
return 0; return 0;
} }
@ -653,8 +593,8 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
} }
/* check previously registered pages */ /* check previously registered pages */
for (i = 0; i < ctx->nr_user_bufs; i++) { for (i = 0; i < ctx->buf_table.nr; i++) {
struct io_rsrc_node *node = ctx->user_bufs[i]; struct io_rsrc_node *node = ctx->buf_table.nodes[i];
struct io_mapped_ubuf *imu = node->buf; struct io_mapped_ubuf *imu = node->buf;
for (j = 0; j < imu->nr_bvecs; j++) { for (j = 0; j < imu->nr_bvecs; j++) {
@ -805,6 +745,9 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
struct io_imu_folio_data data; struct io_imu_folio_data data;
bool coalesced; bool coalesced;
if (!iov->iov_base)
return rsrc_empty_node;
node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node) if (!node)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
@ -864,40 +807,29 @@ done:
return node; return node;
} }
static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
{
ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL);
return ctx->user_bufs ? 0 : -ENOMEM;
}
int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned int nr_args, u64 __user *tags) unsigned int nr_args, u64 __user *tags)
{ {
struct page *last_hpage = NULL; struct page *last_hpage = NULL;
struct io_rsrc_data *data; struct io_rsrc_data data;
struct iovec fast_iov, *iov = &fast_iov; struct iovec fast_iov, *iov = &fast_iov;
const struct iovec __user *uvec; const struct iovec __user *uvec;
int i, ret; int i, ret;
BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
if (ctx->user_bufs) if (ctx->buf_table.nr)
return -EBUSY; return -EBUSY;
if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
return -EINVAL; return -EINVAL;
ret = io_rsrc_data_alloc(ctx, nr_args, &data); ret = io_rsrc_data_alloc(&data, nr_args);
if (ret) if (ret)
return ret; return ret;
ret = io_buffers_map_alloc(ctx, nr_args);
if (ret) {
io_rsrc_data_free(data);
return ret;
}
if (!arg) if (!arg)
memset(iov, 0, sizeof(*iov)); memset(iov, 0, sizeof(*iov));
for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { for (i = 0; i < nr_args; i++) {
struct io_rsrc_node *node; struct io_rsrc_node *node;
u64 tag = 0; u64 tag = 0;
@ -935,14 +867,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
} }
if (tag) if (tag)
node->tag = tag; node->tag = tag;
ctx->user_bufs[i] = node; data.nodes[i] = node;
} }
WARN_ON_ONCE(ctx->buf_data); ctx->buf_table = data;
ctx->buf_data = data;
if (ret) if (ret)
__io_sqe_buffers_unregister(ctx); io_sqe_buffers_unregister(ctx);
return ret; return ret;
} }
@ -1009,8 +939,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx) static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx)
{ {
struct io_rsrc_node **user_bufs; struct io_rsrc_data data;
struct io_rsrc_data *data;
int i, ret, nbufs; int i, ret, nbufs;
/* /*
@ -1021,43 +950,37 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
mutex_lock(&src_ctx->uring_lock); mutex_lock(&src_ctx->uring_lock);
ret = -ENXIO; ret = -ENXIO;
nbufs = src_ctx->nr_user_bufs; nbufs = src_ctx->buf_table.nr;
if (!nbufs) if (!nbufs)
goto out_unlock; goto out_unlock;
ret = io_rsrc_data_alloc(ctx, nbufs, &data); ret = io_rsrc_data_alloc(&data, nbufs);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
ret = -ENOMEM;
user_bufs = kvmalloc_array(nbufs, sizeof(struct io_rsrc_node *),
GFP_KERNEL | __GFP_ZERO);
if (!user_bufs)
goto out_free_data;
for (i = 0; i < nbufs; i++) { for (i = 0; i < nbufs; i++) {
struct io_rsrc_node *src_node = src_ctx->user_bufs[i]; struct io_rsrc_node *src_node = src_ctx->buf_table.nodes[i];
struct io_rsrc_node *dst_node; struct io_rsrc_node *dst_node;
if (src_node == rsrc_empty_node) { if (src_node == rsrc_empty_node) {
dst_node = rsrc_empty_node; dst_node = rsrc_empty_node;
} else { } else {
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) if (!dst_node) {
ret = -ENOMEM;
goto out_put_free; goto out_put_free;
}
refcount_inc(&src_node->buf->refs); refcount_inc(&src_node->buf->refs);
dst_node->buf = src_node->buf; dst_node->buf = src_node->buf;
} }
user_bufs[i] = dst_node; data.nodes[i] = dst_node;
} }
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */ /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
mutex_unlock(&src_ctx->uring_lock); mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
if (!ctx->user_bufs) { if (!ctx->buf_table.nr) {
ctx->user_bufs = user_bufs; ctx->buf_table = data;
ctx->buf_data = data;
ctx->nr_user_bufs = nbufs;
return 0; return 0;
} }
@ -1068,12 +991,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
i = nbufs; i = nbufs;
out_put_free: out_put_free:
while (i--) { while (i--) {
io_buffer_unmap(src_ctx, user_bufs[i]); io_buffer_unmap(src_ctx, data.nodes[i]);
kfree(user_bufs[i]); kfree(data.nodes[i]);
} }
kvfree(user_bufs); io_rsrc_data_free(&data);
out_free_data:
io_rsrc_data_free(data);
out_unlock: out_unlock:
mutex_unlock(&src_ctx->uring_lock); mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
@ -1094,7 +1015,7 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
struct file *file; struct file *file;
int ret; int ret;
if (ctx->user_bufs || ctx->nr_user_bufs) if (ctx->buf_table.nr)
return -EBUSY; return -EBUSY;
if (copy_from_user(&buf, arg, sizeof(buf))) if (copy_from_user(&buf, arg, sizeof(buf)))
return -EFAULT; return -EFAULT;

View file

@ -13,11 +13,6 @@ enum {
IORING_RSRC_BUFFER = 1, IORING_RSRC_BUFFER = 1,
}; };
struct io_rsrc_data {
unsigned int nr;
struct io_rsrc_node **nodes;
};
struct io_rsrc_node { struct io_rsrc_node {
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx;
int refs; int refs;
@ -50,6 +45,8 @@ struct io_imu_folio_data {
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
void io_free_rsrc_node(struct io_rsrc_node *node); void io_free_rsrc_node(struct io_rsrc_node *node);
void io_rsrc_data_free(struct io_rsrc_data *data);
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
int io_import_fixed(int ddir, struct iov_iter *iter, int io_import_fixed(int ddir, struct iov_iter *iter,
struct io_mapped_ubuf *imu, struct io_mapped_ubuf *imu,

View file

@ -339,10 +339,10 @@ static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
if (unlikely(req->buf_index >= ctx->nr_user_bufs)) if (unlikely(req->buf_index >= ctx->buf_table.nr))
return -EFAULT; return -EFAULT;
index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); index = array_index_nospec(req->buf_index, ctx->buf_table.nr);
node = ctx->user_bufs[index]; node = ctx->buf_table.nodes[index];
io_req_assign_rsrc_node(req, node); io_req_assign_rsrc_node(req, node);
io = req->async_data; io = req->async_data;

View file

@ -66,10 +66,10 @@ static struct file *io_splice_get_file(struct io_kiocb *req,
return io_file_get_normal(req, sp->splice_fd_in); return io_file_get_normal(req, sp->splice_fd_in);
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
if (unlikely(sp->splice_fd_in >= ctx->nr_user_files)) if (unlikely(sp->splice_fd_in >= ctx->file_table.data.nr))
goto out; goto out;
sp->splice_fd_in = array_index_nospec(sp->splice_fd_in, ctx->nr_user_files); sp->splice_fd_in = array_index_nospec(sp->splice_fd_in, ctx->file_table.data.nr);
node = ctx->file_table.nodes[sp->splice_fd_in]; node = ctx->file_table.data.nodes[sp->splice_fd_in];
if (node) { if (node) {
node->refs++; node->refs++;
sp->rsrc_node = node; sp->rsrc_node = node;

View file

@ -212,15 +212,15 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
u16 index; u16 index;
index = READ_ONCE(sqe->buf_index); index = READ_ONCE(sqe->buf_index);
if (unlikely(index >= ctx->nr_user_bufs)) if (unlikely(index >= ctx->buf_table.nr))
return -EFAULT; return -EFAULT;
req->buf_index = array_index_nospec(index, ctx->nr_user_bufs); req->buf_index = array_index_nospec(index, ctx->buf_table.nr);
/* /*
* Pi node upfront, prior to io_uring_cmd_import_fixed() * Pi node upfront, prior to io_uring_cmd_import_fixed()
* being called. This prevents destruction of the mapped buffer * being called. This prevents destruction of the mapped buffer
* we'll need at actual import time. * we'll need at actual import time.
*/ */
io_req_assign_rsrc_node(req, ctx->user_bufs[req->buf_index]); io_req_assign_rsrc_node(req, ctx->buf_table.nodes[req->buf_index]);
} }
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);