io_uring/zcrx: add support for multiple ifqs

Allow the user to register multiple ifqs / zcrx contexts. With that we
can use multiple interfaces / interface queues in a single io_uring
instance.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/668b03bee03b5216564482edcfefbc2ee337dd30.1745141261.git.asml.silence@gmail.com
[axboe: fold in fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2025-04-20 10:31:20 +01:00 committed by Jens Axboe
parent 632b318672
commit 76f1cc98b2
4 changed files with 56 additions and 30 deletions

View file

@ -40,8 +40,6 @@ enum io_uring_cmd_flags {
IO_URING_F_TASK_DEAD = (1 << 13),
};
struct io_zcrx_ifq;
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@ -394,7 +392,8 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
struct io_zcrx_ifq *ifq;
/* Stores zcrx object pointers of type struct io_zcrx_ifq */
struct xarray zcrx_ctxs;
u32 pers_next;
struct xarray personalities;

View file

@ -359,6 +359,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->tctx_list);
ctx->submit_state.free_list.next = NULL;
INIT_HLIST_HEAD(&ctx->waitid_list);
xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC);
#ifdef CONFIG_FUTEX
INIT_HLIST_HEAD(&ctx->futex_list);
#endif
@ -2889,7 +2890,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
io_cqring_overflow_kill(ctx);
mutex_unlock(&ctx->uring_lock);
}
if (ctx->ifq) {
if (!xa_empty(&ctx->zcrx_ctxs)) {
mutex_lock(&ctx->uring_lock);
io_shutdown_zcrx_ifqs(ctx);
mutex_unlock(&ctx->uring_lock);

View file

@ -1189,11 +1189,10 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
if (ifq_idx != 0)
return -EINVAL;
zc->ifq = req->ctx->ifq;
zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
if (!zc->ifq)
return -EINVAL;
zc->len = READ_ONCE(sqe->len);
zc->flags = READ_ONCE(sqe->ioprio);
zc->msg_flags = READ_ONCE(sqe->msg_flags);

View file

@ -156,8 +156,10 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_ifq_reg *reg,
struct io_uring_region_desc *rd)
struct io_uring_region_desc *rd,
u32 id)
{
u64 mmap_offset;
size_t off, size;
void *ptr;
int ret;
@ -167,7 +169,10 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
if (size > rd->size)
return -EINVAL;
ret = io_create_region(ifq->ctx, &ifq->region, rd, IORING_MAP_OFF_ZCRX_REGION);
mmap_offset = IORING_MAP_OFF_ZCRX_REGION;
mmap_offset += id << IORING_OFF_PBUF_SHIFT;
ret = io_create_region(ifq->ctx, &ifq->region, rd, mmap_offset);
if (ret < 0)
return ret;
@ -179,9 +184,6 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
{
if (WARN_ON_ONCE(ifq->ctx->ifq))
return;
io_free_region(ifq->ctx, &ifq->region);
ifq->rq_ring = NULL;
ifq->rqes = NULL;
@ -343,11 +345,11 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
unsigned int id)
{
struct io_zcrx_ifq *ifq = xa_load(&ctx->zcrx_ctxs, id);
lockdep_assert_held(&ctx->mmap_lock);
if (id != 0 || !ctx->ifq)
return NULL;
return &ctx->ifq->region;
return ifq ? &ifq->region : NULL;
}
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
@ -359,6 +361,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_region_desc rd;
struct io_zcrx_ifq *ifq;
int ret;
u32 id;
/*
* 1. Interface queue allocation.
@ -371,8 +374,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
ctx->flags & IORING_SETUP_CQE32))
return -EINVAL;
if (ctx->ifq)
return -EBUSY;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
@ -396,7 +397,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (!ifq)
return -ENOMEM;
ret = io_allocate_rbuf_ring(ifq, &reg, &rd);
scoped_guard(mutex, &ctx->mmap_lock) {
/* preallocate id */
ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
if (ret)
goto ifq_free;
}
ret = io_allocate_rbuf_ring(ifq, &reg, &rd, id);
if (ret)
goto err;
@ -428,6 +436,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
reg.offsets.rqes = sizeof(struct io_uring);
reg.offsets.head = offsetof(struct io_uring, head);
reg.offsets.tail = offsetof(struct io_uring, tail);
reg.zcrx_id = id;
scoped_guard(mutex, &ctx->mmap_lock) {
/* publish ifq */
ret = -ENOMEM;
if (xa_store(&ctx->zcrx_ctxs, id, ifq, GFP_KERNEL))
goto err;
}
if (copy_to_user(arg, &reg, sizeof(reg)) ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
@ -435,26 +451,34 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
ret = -EFAULT;
goto err;
}
scoped_guard(mutex, &ctx->mmap_lock)
ctx->ifq = ifq;
return 0;
err:
scoped_guard(mutex, &ctx->mmap_lock)
xa_erase(&ctx->zcrx_ctxs, id);
ifq_free:
io_zcrx_ifq_free(ifq);
return ret;
}
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
{
struct io_zcrx_ifq *ifq = ctx->ifq;
struct io_zcrx_ifq *ifq;
unsigned long id;
lockdep_assert_held(&ctx->uring_lock);
if (!ifq)
return;
while (1) {
scoped_guard(mutex, &ctx->mmap_lock) {
ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
if (ifq)
xa_erase(&ctx->zcrx_ctxs, id);
}
if (!ifq)
break;
io_zcrx_ifq_free(ifq);
}
scoped_guard(mutex, &ctx->mmap_lock)
ctx->ifq = NULL;
io_zcrx_ifq_free(ifq);
xa_destroy(&ctx->zcrx_ctxs);
}
static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area)
@ -511,12 +535,15 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
{
struct io_zcrx_ifq *ifq;
unsigned long index;
lockdep_assert_held(&ctx->uring_lock);
if (!ctx->ifq)
return;
io_zcrx_scrub(ctx->ifq);
io_close_queue(ctx->ifq);
xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
io_zcrx_scrub(ifq);
io_close_queue(ifq);
}
}
static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)