mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
io_uring/zcrx: dmabuf backed zerocopy receive
Add support for dmabuf backed zcrx areas. To use it, the user should pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags field and pass a dmabuf fd in the dmabuf_fd field. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com Link: https://lore.kernel.org/io-uring/6e37db97303212bbd8955f9501cf99b579f8aece.1746547722.git.asml.silence@gmail.com [axboe: fold in fixup] Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
8a62804248
commit
a5c98e9424
3 changed files with 160 additions and 18 deletions
|
@ -990,12 +990,16 @@ struct io_uring_zcrx_offsets {
|
|||
__u64 __resv[2];
|
||||
};
|
||||
|
||||
enum io_uring_zcrx_area_flags {
|
||||
IORING_ZCRX_AREA_DMABUF = 1,
|
||||
};
|
||||
|
||||
struct io_uring_zcrx_area_reg {
|
||||
__u64 addr;
|
||||
__u64 len;
|
||||
__u64 rq_area_token;
|
||||
__u32 flags;
|
||||
__u32 __resv1;
|
||||
__u32 dmabuf_fd;
|
||||
__u64 __resv2[2];
|
||||
};
|
||||
|
||||
|
|
165
io_uring/zcrx.c
165
io_uring/zcrx.c
|
@ -47,8 +47,135 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
|
|||
return area->mem.pages[net_iov_idx(niov)];
|
||||
}
|
||||
|
||||
static void io_release_dmabuf(struct io_zcrx_mem *mem)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return;
|
||||
|
||||
if (mem->sgt)
|
||||
dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
|
||||
DMA_FROM_DEVICE);
|
||||
if (mem->attach)
|
||||
dma_buf_detach(mem->dmabuf, mem->attach);
|
||||
if (mem->dmabuf)
|
||||
dma_buf_put(mem->dmabuf);
|
||||
|
||||
mem->sgt = NULL;
|
||||
mem->attach = NULL;
|
||||
mem->dmabuf = NULL;
|
||||
}
|
||||
|
||||
static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
unsigned long off = (unsigned long)area_reg->addr;
|
||||
unsigned long len = (unsigned long)area_reg->len;
|
||||
unsigned long total_size = 0;
|
||||
struct scatterlist *sg;
|
||||
int dmabuf_fd = area_reg->dmabuf_fd;
|
||||
int i, ret;
|
||||
|
||||
if (WARN_ON_ONCE(!ifq->dev))
|
||||
return -EFAULT;
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return -EINVAL;
|
||||
|
||||
mem->is_dmabuf = true;
|
||||
mem->dmabuf = dma_buf_get(dmabuf_fd);
|
||||
if (IS_ERR(mem->dmabuf)) {
|
||||
ret = PTR_ERR(mem->dmabuf);
|
||||
mem->dmabuf = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
|
||||
if (IS_ERR(mem->attach)) {
|
||||
ret = PTR_ERR(mem->attach);
|
||||
mem->attach = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(mem->sgt)) {
|
||||
ret = PTR_ERR(mem->sgt);
|
||||
mem->sgt = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_sgtable_dma_sg(mem->sgt, sg, i)
|
||||
total_size += sg_dma_len(sg);
|
||||
|
||||
if (total_size < off + len)
|
||||
return -EINVAL;
|
||||
|
||||
mem->dmabuf_offset = off;
|
||||
mem->size = len;
|
||||
return 0;
|
||||
err:
|
||||
io_release_dmabuf(mem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
{
|
||||
unsigned long off = area->mem.dmabuf_offset;
|
||||
struct scatterlist *sg;
|
||||
unsigned i, niov_idx = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
|
||||
dma_addr_t dma = sg_dma_address(sg);
|
||||
unsigned long sg_len = sg_dma_len(sg);
|
||||
unsigned long sg_off = min(sg_len, off);
|
||||
|
||||
off -= sg_off;
|
||||
sg_len -= sg_off;
|
||||
dma += sg_off;
|
||||
|
||||
while (sg_len && niov_idx < area->nia.num_niovs) {
|
||||
struct net_iov *niov = &area->nia.niovs[niov_idx];
|
||||
|
||||
if (net_mp_niov_set_dma_addr(niov, dma))
|
||||
return 0;
|
||||
sg_len -= PAGE_SIZE;
|
||||
dma += PAGE_SIZE;
|
||||
niov_idx++;
|
||||
}
|
||||
}
|
||||
return niov_idx;
|
||||
}
|
||||
|
||||
static int io_import_umem(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
|
||||
if (area_reg->dmabuf_fd)
|
||||
return -EINVAL;
|
||||
if (!area_reg->addr)
|
||||
return -EFAULT;
|
||||
pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
mem->pages = pages;
|
||||
mem->nr_folios = nr_pages;
|
||||
mem->size = area_reg->len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_release_area_mem(struct io_zcrx_mem *mem)
|
||||
{
|
||||
if (mem->is_dmabuf) {
|
||||
io_release_dmabuf(mem);
|
||||
return;
|
||||
}
|
||||
if (mem->pages) {
|
||||
unpin_user_pages(mem->pages, mem->nr_folios);
|
||||
kvfree(mem->pages);
|
||||
|
@ -59,27 +186,17 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
|
|||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
int ret;
|
||||
|
||||
ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!area_reg->addr)
|
||||
return -EFAULT;
|
||||
if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
mem->pages = pages;
|
||||
mem->nr_folios = nr_pages;
|
||||
mem->size = area_reg->len;
|
||||
return 0;
|
||||
if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
|
||||
return io_import_dmabuf(ifq, mem, area_reg);
|
||||
return io_import_umem(ifq, mem, area_reg);
|
||||
}
|
||||
|
||||
static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
|
||||
|
@ -101,7 +218,10 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
|
|||
{
|
||||
int i;
|
||||
|
||||
io_zcrx_unmap_umem(ifq, area, nr_mapped);
|
||||
if (area->mem.is_dmabuf)
|
||||
io_release_dmabuf(&area->mem);
|
||||
else
|
||||
io_zcrx_unmap_umem(ifq, area, nr_mapped);
|
||||
|
||||
for (i = 0; i < area->nia.num_niovs; i++)
|
||||
net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
|
||||
|
@ -145,7 +265,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
|||
if (area->is_mapped)
|
||||
return 0;
|
||||
|
||||
nr = io_zcrx_map_area_umem(ifq, area);
|
||||
if (area->mem.is_dmabuf)
|
||||
nr = io_zcrx_map_area_dmabuf(ifq, area);
|
||||
else
|
||||
nr = io_zcrx_map_area_umem(ifq, area);
|
||||
|
||||
if (nr != area->nia.num_niovs) {
|
||||
__io_zcrx_unmap_area(ifq, area, nr);
|
||||
return -EINVAL;
|
||||
|
@ -251,6 +375,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
|
|||
kfree(area);
|
||||
}
|
||||
|
||||
#define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
|
||||
|
||||
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_area **res,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
|
@ -259,9 +385,11 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
|
|||
unsigned nr_iovs;
|
||||
int i, ret;
|
||||
|
||||
if (area_reg->flags || area_reg->rq_area_token)
|
||||
if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
|
||||
return -EINVAL;
|
||||
if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
|
||||
if (area_reg->rq_area_token)
|
||||
return -EINVAL;
|
||||
if (area_reg->__resv2[0] || area_reg->__resv2[1])
|
||||
return -EINVAL;
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
@ -819,6 +947,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
|||
size_t copied = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (area->mem.is_dmabuf)
|
||||
return -EFAULT;
|
||||
|
||||
while (len) {
|
||||
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
|
||||
const int dst_off = 0;
|
||||
|
|
|
@ -3,15 +3,22 @@
|
|||
#define IOU_ZC_RX_H
|
||||
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/socket.h>
|
||||
#include <net/page_pool/types.h>
|
||||
#include <net/net_trackers.h>
|
||||
|
||||
struct io_zcrx_mem {
|
||||
unsigned long size;
|
||||
bool is_dmabuf;
|
||||
|
||||
struct page **pages;
|
||||
unsigned long nr_folios;
|
||||
|
||||
struct dma_buf_attachment *attach;
|
||||
struct dma_buf *dmabuf;
|
||||
struct sg_table *sgt;
|
||||
unsigned long dmabuf_offset;
|
||||
};
|
||||
|
||||
struct io_zcrx_area {
|
||||
|
|
Loading…
Add table
Reference in a new issue