mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
net: devmem: Implement TX path
Augment dmabuf binding to be able to handle TX. Additional to all the RX binding, we also create tx_vec needed for the TX path. Provide API for sendmsg to be able to send dmabufs bound to this device: - Provide a new dmabuf_tx_cmsg which includes the dmabuf to send from. - MSG_ZEROCOPY with SCM_DEVMEM_DMABUF cmsg indicates send from dma-buf. Devmem is uncopyable, so piggyback off the existing MSG_ZEROCOPY implementation, while disabling instances where MSG_ZEROCOPY falls back to copying. We additionally pipe the binding down to the new zerocopy_fill_skb_from_devmem which fills a TX skb with net_iov netmems instead of the traditional page netmems. We also special case skb_frag_dma_map to return the dma-address of these dmabuf net_iovs instead of attempting to map pages. The TX path may release the dmabuf in a context where we cannot wait. This happens when the user unbinds a TX dmabuf while there are still references to its netmems in the TX path. In that case, the netmems will be put_netmem'd from a context where we can't unmap the dmabuf, Resolve this by making __net_devmem_dmabuf_binding_free schedule_work'd. Based on work by Stanislav Fomichev <sdf@fomichev.me>. A lot of the meat of the implementation came from devmem TCP RFC v1[1], which included the TX path, but Stan did all the rebasing on top of netmem/net_iov. Cc: Stanislav Fomichev <sdf@fomichev.me> Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com> Signed-off-by: Mina Almasry <almasrymina@google.com> Acked-by: Stanislav Fomichev <sdf@fomichev.me> Link: https://patch.msgid.link/20250508004830.4100853-5-almasrymina@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
parent
8802087d20
commit
bd61848900
13 changed files with 340 additions and 60 deletions
|
@ -1707,13 +1707,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
|
|||
extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops;
|
||||
|
||||
struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
|
||||
struct ubuf_info *uarg);
|
||||
struct ubuf_info *uarg, bool devmem);
|
||||
|
||||
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
|
||||
|
||||
struct net_devmem_dmabuf_binding;
|
||||
|
||||
int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
|
||||
struct sk_buff *skb, struct iov_iter *from,
|
||||
size_t length);
|
||||
size_t length,
|
||||
struct net_devmem_dmabuf_binding *binding);
|
||||
|
||||
int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
|
||||
struct iov_iter *from, size_t length);
|
||||
|
@ -1721,12 +1724,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
|
|||
static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
|
||||
struct msghdr *msg, int len)
|
||||
{
|
||||
return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
|
||||
return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len,
|
||||
NULL);
|
||||
}
|
||||
|
||||
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
|
||||
struct msghdr *msg, int len,
|
||||
struct ubuf_info *uarg);
|
||||
struct ubuf_info *uarg,
|
||||
struct net_devmem_dmabuf_binding *binding);
|
||||
|
||||
/* Internal */
|
||||
#define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
|
||||
|
@ -3697,6 +3702,10 @@ static inline dma_addr_t __skb_frag_dma_map(struct device *dev,
|
|||
size_t offset, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
if (skb_frag_is_net_iov(frag)) {
|
||||
return netmem_to_net_iov(frag->netmem)->dma_addr + offset +
|
||||
frag->offset;
|
||||
}
|
||||
return dma_map_page(dev, skb_frag_page(frag),
|
||||
skb_frag_off(frag) + offset, size, dir);
|
||||
}
|
||||
|
|
|
@ -1851,6 +1851,7 @@ struct sockcm_cookie {
|
|||
u32 tsflags;
|
||||
u32 ts_opt_id;
|
||||
u32 priority;
|
||||
u32 dmabuf_id;
|
||||
};
|
||||
|
||||
static inline void sockcm_init(struct sockcm_cookie *sockc,
|
||||
|
|
|
@ -810,7 +810,7 @@ static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
|||
return io_zcrx_copy_frag(req, ifq, frag, off, len);
|
||||
|
||||
niov = netmem_to_net_iov(frag->netmem);
|
||||
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
|
||||
if (!niov->pp || niov->pp->mp_ops != &io_uring_pp_zc_ops ||
|
||||
io_pp_to_ifq(niov->pp) != ifq)
|
||||
return -EFAULT;
|
||||
|
||||
|
|
|
@ -63,6 +63,8 @@
|
|||
#include <net/busy_poll.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include "devmem.h"
|
||||
|
||||
/*
|
||||
* Is a socket 'connection oriented' ?
|
||||
*/
|
||||
|
@ -691,9 +693,49 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
zerocopy_fill_skb_from_devmem(struct sk_buff *skb, struct iov_iter *from,
|
||||
int length,
|
||||
struct net_devmem_dmabuf_binding *binding)
|
||||
{
|
||||
int i = skb_shinfo(skb)->nr_frags;
|
||||
size_t virt_addr, size, off;
|
||||
struct net_iov *niov;
|
||||
|
||||
/* Devmem filling works by taking an IOVEC from the user where the
|
||||
* iov_addrs are interpreted as an offset in bytes into the dma-buf to
|
||||
* send from. We do not support other iter types.
|
||||
*/
|
||||
if (iov_iter_type(from) != ITER_IOVEC)
|
||||
return -EFAULT;
|
||||
|
||||
while (length && iov_iter_count(from)) {
|
||||
if (i == MAX_SKB_FRAGS)
|
||||
return -EMSGSIZE;
|
||||
|
||||
virt_addr = (size_t)iter_iov_addr(from);
|
||||
niov = net_devmem_get_niov_at(binding, virt_addr, &off, &size);
|
||||
if (!niov)
|
||||
return -EFAULT;
|
||||
|
||||
size = min_t(size_t, size, length);
|
||||
size = min_t(size_t, size, iter_iov_len(from));
|
||||
|
||||
get_netmem(net_iov_to_netmem(niov));
|
||||
skb_add_rx_frag_netmem(skb, i, net_iov_to_netmem(niov), off,
|
||||
size, PAGE_SIZE);
|
||||
iov_iter_advance(from, size);
|
||||
length -= size;
|
||||
i++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
|
||||
struct sk_buff *skb, struct iov_iter *from,
|
||||
size_t length)
|
||||
size_t length,
|
||||
struct net_devmem_dmabuf_binding *binding)
|
||||
{
|
||||
unsigned long orig_size = skb->truesize;
|
||||
unsigned long truesize;
|
||||
|
@ -701,6 +743,8 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
|
|||
|
||||
if (msg && msg->msg_ubuf && msg->sg_from_iter)
|
||||
ret = msg->sg_from_iter(skb, from, length);
|
||||
else if (binding)
|
||||
ret = zerocopy_fill_skb_from_devmem(skb, from, length, binding);
|
||||
else
|
||||
ret = zerocopy_fill_skb_from_iter(skb, from, length);
|
||||
|
||||
|
@ -734,7 +778,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
|
|||
if (skb_copy_datagram_from_iter(skb, 0, from, copy))
|
||||
return -EFAULT;
|
||||
|
||||
return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U);
|
||||
return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(zerocopy_sg_from_iter);
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/page_pool/helpers.h>
|
||||
#include <net/page_pool/memory_provider.h>
|
||||
#include <net/sock.h>
|
||||
#include <trace/events/page_pool.h>
|
||||
|
||||
#include "devmem.h"
|
||||
|
@ -52,8 +53,10 @@ static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
|
|||
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
|
||||
void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
|
||||
{
|
||||
struct net_devmem_dmabuf_binding *binding = container_of(wq, typeof(*binding), unbind_w);
|
||||
|
||||
size_t size, avail;
|
||||
|
||||
gen_pool_for_each_chunk(binding->chunk_pool,
|
||||
|
@ -71,8 +74,10 @@ void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
|
|||
dma_buf_detach(binding->dmabuf, binding->attachment);
|
||||
dma_buf_put(binding->dmabuf);
|
||||
xa_destroy(&binding->bound_rxqs);
|
||||
kvfree(binding->tx_vec);
|
||||
kfree(binding);
|
||||
}
|
||||
EXPORT_SYMBOL(__net_devmem_dmabuf_binding_free);
|
||||
|
||||
struct net_iov *
|
||||
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
|
||||
|
@ -117,6 +122,13 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
|
|||
unsigned long xa_idx;
|
||||
unsigned int rxq_idx;
|
||||
|
||||
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
|
||||
|
||||
/* Ensure no tx net_devmem_lookup_dmabuf() are in flight after the
|
||||
* erase.
|
||||
*/
|
||||
synchronize_net();
|
||||
|
||||
if (binding->list.next)
|
||||
list_del(&binding->list);
|
||||
|
||||
|
@ -131,8 +143,6 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
|
|||
__net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
|
||||
}
|
||||
|
||||
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
|
||||
|
||||
net_devmem_dmabuf_binding_put(binding);
|
||||
}
|
||||
|
||||
|
@ -166,8 +176,9 @@ err_close_rxq:
|
|||
}
|
||||
|
||||
struct net_devmem_dmabuf_binding *
|
||||
net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
||||
struct netlink_ext_ack *extack)
|
||||
net_devmem_bind_dmabuf(struct net_device *dev,
|
||||
enum dma_data_direction direction,
|
||||
unsigned int dmabuf_fd, struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct net_devmem_dmabuf_binding *binding;
|
||||
static u32 id_alloc_next;
|
||||
|
@ -189,13 +200,6 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
|||
}
|
||||
|
||||
binding->dev = dev;
|
||||
|
||||
err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
|
||||
binding, xa_limit_32b, &id_alloc_next,
|
||||
GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto err_free_binding;
|
||||
|
||||
xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
|
||||
|
||||
refcount_set(&binding->ref, 1);
|
||||
|
@ -206,26 +210,36 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
|||
if (IS_ERR(binding->attachment)) {
|
||||
err = PTR_ERR(binding->attachment);
|
||||
NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
|
||||
goto err_free_id;
|
||||
goto err_free_binding;
|
||||
}
|
||||
|
||||
binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
|
||||
DMA_FROM_DEVICE);
|
||||
direction);
|
||||
if (IS_ERR(binding->sgt)) {
|
||||
err = PTR_ERR(binding->sgt);
|
||||
NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
|
||||
goto err_detach;
|
||||
}
|
||||
|
||||
if (direction == DMA_TO_DEVICE) {
|
||||
binding->tx_vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
|
||||
sizeof(struct net_iov *),
|
||||
GFP_KERNEL);
|
||||
if (!binding->tx_vec) {
|
||||
err = -ENOMEM;
|
||||
goto err_unmap;
|
||||
}
|
||||
}
|
||||
|
||||
/* For simplicity we expect to make PAGE_SIZE allocations, but the
|
||||
* binding can be much more flexible than that. We may be able to
|
||||
* allocate MTU sized chunks here. Leave that for future work...
|
||||
*/
|
||||
binding->chunk_pool =
|
||||
gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
|
||||
binding->chunk_pool = gen_pool_create(PAGE_SHIFT,
|
||||
dev_to_node(&dev->dev));
|
||||
if (!binding->chunk_pool) {
|
||||
err = -ENOMEM;
|
||||
goto err_unmap;
|
||||
goto err_tx_vec;
|
||||
}
|
||||
|
||||
virtual = 0;
|
||||
|
@ -270,24 +284,32 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
|||
niov->owner = &owner->area;
|
||||
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
|
||||
net_devmem_get_dma_addr(niov));
|
||||
if (direction == DMA_TO_DEVICE)
|
||||
binding->tx_vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
|
||||
}
|
||||
|
||||
virtual += len;
|
||||
}
|
||||
|
||||
err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
|
||||
binding, xa_limit_32b, &id_alloc_next,
|
||||
GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto err_free_chunks;
|
||||
|
||||
return binding;
|
||||
|
||||
err_free_chunks:
|
||||
gen_pool_for_each_chunk(binding->chunk_pool,
|
||||
net_devmem_dmabuf_free_chunk_owner, NULL);
|
||||
gen_pool_destroy(binding->chunk_pool);
|
||||
err_tx_vec:
|
||||
kvfree(binding->tx_vec);
|
||||
err_unmap:
|
||||
dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
|
||||
DMA_FROM_DEVICE);
|
||||
err_detach:
|
||||
dma_buf_detach(dmabuf, binding->attachment);
|
||||
err_free_id:
|
||||
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
|
||||
err_free_binding:
|
||||
kfree(binding);
|
||||
err_put_dmabuf:
|
||||
|
@ -295,6 +317,21 @@ err_put_dmabuf:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id)
|
||||
{
|
||||
struct net_devmem_dmabuf_binding *binding;
|
||||
|
||||
rcu_read_lock();
|
||||
binding = xa_load(&net_devmem_dmabuf_bindings, id);
|
||||
if (binding) {
|
||||
if (!net_devmem_dmabuf_binding_get(binding))
|
||||
binding = NULL;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return binding;
|
||||
}
|
||||
|
||||
void net_devmem_get_net_iov(struct net_iov *niov)
|
||||
{
|
||||
net_devmem_dmabuf_binding_get(net_devmem_iov_binding(niov));
|
||||
|
@ -305,6 +342,49 @@ void net_devmem_put_net_iov(struct net_iov *niov)
|
|||
net_devmem_dmabuf_binding_put(net_devmem_iov_binding(niov));
|
||||
}
|
||||
|
||||
struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
|
||||
unsigned int dmabuf_id)
|
||||
{
|
||||
struct net_devmem_dmabuf_binding *binding;
|
||||
struct dst_entry *dst = __sk_dst_get(sk);
|
||||
int err = 0;
|
||||
|
||||
binding = net_devmem_lookup_dmabuf(dmabuf_id);
|
||||
if (!binding || !binding->tx_vec) {
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* The dma-addrs in this binding are only reachable to the corresponding
|
||||
* net_device.
|
||||
*/
|
||||
if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) {
|
||||
err = -ENODEV;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
return binding;
|
||||
|
||||
out_err:
|
||||
if (binding)
|
||||
net_devmem_dmabuf_binding_put(binding);
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct net_iov *
|
||||
net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding,
|
||||
size_t virt_addr, size_t *off, size_t *size)
|
||||
{
|
||||
if (virt_addr >= binding->dmabuf->size)
|
||||
return NULL;
|
||||
|
||||
*off = virt_addr % PAGE_SIZE;
|
||||
*size = PAGE_SIZE - *off;
|
||||
|
||||
return binding->tx_vec[virt_addr / PAGE_SIZE];
|
||||
}
|
||||
|
||||
/*** "Dmabuf devmem memory provider" ***/
|
||||
|
||||
int mp_dmabuf_devmem_init(struct page_pool *pool)
|
||||
|
|
|
@ -23,8 +23,9 @@ struct net_devmem_dmabuf_binding {
|
|||
|
||||
/* The user holds a ref (via the netlink API) for as long as they want
|
||||
* the binding to remain alive. Each page pool using this binding holds
|
||||
* a ref to keep the binding alive. Each allocated net_iov holds a
|
||||
* ref.
|
||||
* a ref to keep the binding alive. The page_pool does not release the
|
||||
* ref until all the net_iovs allocated from this binding are released
|
||||
* back to the page_pool.
|
||||
*
|
||||
* The binding undos itself and unmaps the underlying dmabuf once all
|
||||
* those refs are dropped and the binding is no longer desired or in
|
||||
|
@ -32,7 +33,10 @@ struct net_devmem_dmabuf_binding {
|
|||
*
|
||||
* net_devmem_get_net_iov() on dmabuf net_iovs will increment this
|
||||
* reference, making sure that the binding remains alive until all the
|
||||
* net_iovs are no longer used.
|
||||
* net_iovs are no longer used. net_iovs allocated from this binding
|
||||
* that are stuck in the TX path for any reason (such as awaiting
|
||||
* retransmits) hold a reference to the binding until the skb holding
|
||||
* them is freed.
|
||||
*/
|
||||
refcount_t ref;
|
||||
|
||||
|
@ -48,6 +52,14 @@ struct net_devmem_dmabuf_binding {
|
|||
* active.
|
||||
*/
|
||||
u32 id;
|
||||
|
||||
/* Array of net_iov pointers for this binding, sorted by virtual
|
||||
* address. This array is convenient to map the virtual addresses to
|
||||
* net_iovs in the TX path.
|
||||
*/
|
||||
struct net_iov **tx_vec;
|
||||
|
||||
struct work_struct unbind_w;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_NET_DEVMEM)
|
||||
|
@ -64,14 +76,17 @@ struct dmabuf_genpool_chunk_owner {
|
|||
dma_addr_t base_dma_addr;
|
||||
};
|
||||
|
||||
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
|
||||
void __net_devmem_dmabuf_binding_free(struct work_struct *wq);
|
||||
struct net_devmem_dmabuf_binding *
|
||||
net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
||||
struct netlink_ext_ack *extack);
|
||||
net_devmem_bind_dmabuf(struct net_device *dev,
|
||||
enum dma_data_direction direction,
|
||||
unsigned int dmabuf_fd, struct netlink_ext_ack *extack);
|
||||
struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id);
|
||||
void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
|
||||
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
|
||||
struct net_devmem_dmabuf_binding *binding,
|
||||
struct netlink_ext_ack *extack);
|
||||
void net_devmem_bind_tx_release(struct sock *sk);
|
||||
|
||||
static inline struct dmabuf_genpool_chunk_owner *
|
||||
net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
|
||||
|
@ -100,10 +115,10 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
|
|||
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static inline bool
|
||||
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
|
||||
{
|
||||
refcount_inc(&binding->ref);
|
||||
return refcount_inc_not_zero(&binding->ref);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -112,7 +127,8 @@ net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding)
|
|||
if (!refcount_dec_and_test(&binding->ref))
|
||||
return;
|
||||
|
||||
__net_devmem_dmabuf_binding_free(binding);
|
||||
INIT_WORK(&binding->unbind_w, __net_devmem_dmabuf_binding_free);
|
||||
schedule_work(&binding->unbind_w);
|
||||
}
|
||||
|
||||
void net_devmem_get_net_iov(struct net_iov *niov);
|
||||
|
@ -123,6 +139,11 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
|
|||
void net_devmem_free_dmabuf(struct net_iov *ppiov);
|
||||
|
||||
bool net_is_devmem_iov(struct net_iov *niov);
|
||||
struct net_devmem_dmabuf_binding *
|
||||
net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id);
|
||||
struct net_iov *
|
||||
net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding, size_t addr,
|
||||
size_t *off, size_t *size);
|
||||
|
||||
#else
|
||||
struct net_devmem_dmabuf_binding;
|
||||
|
@ -140,18 +161,23 @@ static inline void net_devmem_put_net_iov(struct net_iov *niov)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
|
||||
static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct net_devmem_dmabuf_binding *
|
||||
net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
|
||||
enum dma_data_direction direction,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static inline struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
|
||||
{
|
||||
|
@ -190,6 +216,19 @@ static inline bool net_is_devmem_iov(struct net_iov *niov)
|
|||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline struct net_devmem_dmabuf_binding *
|
||||
net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id)
|
||||
{
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static inline struct net_iov *
|
||||
net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding, size_t addr,
|
||||
size_t *off, size_t *size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _NET_DEVMEM_H */
|
||||
|
|
|
@ -907,7 +907,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
|
|||
goto err_unlock;
|
||||
}
|
||||
|
||||
binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
|
||||
binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd,
|
||||
info->extack);
|
||||
if (IS_ERR(binding)) {
|
||||
err = PTR_ERR(binding);
|
||||
goto err_unlock;
|
||||
|
@ -968,10 +969,74 @@ err_genlmsg_free:
|
|||
return err;
|
||||
}
|
||||
|
||||
/* stub */
|
||||
int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
return 0;
|
||||
struct net_devmem_dmabuf_binding *binding;
|
||||
struct netdev_nl_sock *priv;
|
||||
struct net_device *netdev;
|
||||
u32 ifindex, dmabuf_fd;
|
||||
struct sk_buff *rsp;
|
||||
int err = 0;
|
||||
void *hdr;
|
||||
|
||||
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
|
||||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
|
||||
return -EINVAL;
|
||||
|
||||
ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
|
||||
dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
|
||||
|
||||
priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
|
||||
if (IS_ERR(priv))
|
||||
return PTR_ERR(priv);
|
||||
|
||||
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||
if (!rsp)
|
||||
return -ENOMEM;
|
||||
|
||||
hdr = genlmsg_iput(rsp, info);
|
||||
if (!hdr) {
|
||||
err = -EMSGSIZE;
|
||||
goto err_genlmsg_free;
|
||||
}
|
||||
|
||||
mutex_lock(&priv->lock);
|
||||
|
||||
netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
|
||||
if (!netdev) {
|
||||
err = -ENODEV;
|
||||
goto err_unlock_sock;
|
||||
}
|
||||
|
||||
if (!netif_device_present(netdev)) {
|
||||
err = -ENODEV;
|
||||
goto err_unlock_netdev;
|
||||
}
|
||||
|
||||
binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd,
|
||||
info->extack);
|
||||
if (IS_ERR(binding)) {
|
||||
err = PTR_ERR(binding);
|
||||
goto err_unlock_netdev;
|
||||
}
|
||||
|
||||
list_add(&binding->list, &priv->bindings);
|
||||
|
||||
nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
|
||||
genlmsg_end(rsp, hdr);
|
||||
|
||||
netdev_unlock(netdev);
|
||||
mutex_unlock(&priv->lock);
|
||||
|
||||
return genlmsg_reply(rsp, info);
|
||||
|
||||
err_unlock_netdev:
|
||||
netdev_unlock(netdev);
|
||||
err_unlock_sock:
|
||||
mutex_unlock(&priv->lock);
|
||||
err_genlmsg_free:
|
||||
nlmsg_free(rsp);
|
||||
return err;
|
||||
}
|
||||
|
||||
void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
|
||||
|
|
|
@ -1655,7 +1655,8 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
|
||||
|
||||
static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
|
||||
static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size,
|
||||
bool devmem)
|
||||
{
|
||||
struct ubuf_info_msgzc *uarg;
|
||||
struct sk_buff *skb;
|
||||
|
@ -1670,7 +1671,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
|
|||
uarg = (void *)skb->cb;
|
||||
uarg->mmp.user = NULL;
|
||||
|
||||
if (mm_account_pinned_pages(&uarg->mmp, size)) {
|
||||
if (likely(!devmem) && mm_account_pinned_pages(&uarg->mmp, size)) {
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1693,7 +1694,7 @@ static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
|
|||
}
|
||||
|
||||
struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
|
||||
struct ubuf_info *uarg)
|
||||
struct ubuf_info *uarg, bool devmem)
|
||||
{
|
||||
if (uarg) {
|
||||
struct ubuf_info_msgzc *uarg_zc;
|
||||
|
@ -1723,7 +1724,8 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
|
|||
|
||||
next = (u32)atomic_read(&sk->sk_zckey);
|
||||
if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
|
||||
if (mm_account_pinned_pages(&uarg_zc->mmp, size))
|
||||
if (likely(!devmem) &&
|
||||
mm_account_pinned_pages(&uarg_zc->mmp, size))
|
||||
return NULL;
|
||||
uarg_zc->len++;
|
||||
uarg_zc->bytelen = bytelen;
|
||||
|
@ -1738,7 +1740,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
|
|||
}
|
||||
|
||||
new_alloc:
|
||||
return msg_zerocopy_alloc(sk, size);
|
||||
return msg_zerocopy_alloc(sk, size, devmem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
|
||||
|
||||
|
@ -1842,7 +1844,8 @@ EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops);
|
|||
|
||||
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
|
||||
struct msghdr *msg, int len,
|
||||
struct ubuf_info *uarg)
|
||||
struct ubuf_info *uarg,
|
||||
struct net_devmem_dmabuf_binding *binding)
|
||||
{
|
||||
int err, orig_len = skb->len;
|
||||
|
||||
|
@ -1861,7 +1864,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
|
|||
return -EEXIST;
|
||||
}
|
||||
|
||||
err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
|
||||
err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len,
|
||||
binding);
|
||||
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
|
||||
struct sock *save_sk = skb->sk;
|
||||
|
||||
|
|
|
@ -3018,6 +3018,11 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
|
|||
return -EPERM;
|
||||
sockc->priority = *(u32 *)CMSG_DATA(cmsg);
|
||||
break;
|
||||
case SCM_DEVMEM_DMABUF:
|
||||
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
|
||||
return -EINVAL;
|
||||
sockc->dmabuf_id = *(u32 *)CMSG_DATA(cmsg);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -1014,7 +1014,8 @@ static int __ip_append_data(struct sock *sk,
|
|||
uarg = msg->msg_ubuf;
|
||||
}
|
||||
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
|
||||
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
|
||||
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
|
||||
false);
|
||||
if (!uarg)
|
||||
return -ENOBUFS;
|
||||
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
|
||||
|
|
|
@ -1059,6 +1059,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
|
|||
|
||||
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
||||
{
|
||||
struct net_devmem_dmabuf_binding *binding = NULL;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct ubuf_info *uarg = NULL;
|
||||
struct sk_buff *skb;
|
||||
|
@ -1066,11 +1067,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
|||
int flags, err, copied = 0;
|
||||
int mss_now = 0, size_goal, copied_syn = 0;
|
||||
int process_backlog = 0;
|
||||
bool sockc_valid = true;
|
||||
int zc = 0;
|
||||
long timeo;
|
||||
|
||||
flags = msg->msg_flags;
|
||||
|
||||
sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) };
|
||||
if (msg->msg_controllen) {
|
||||
err = sock_cmsg_send(sk, msg, &sockc);
|
||||
if (unlikely(err))
|
||||
/* Don't return error until MSG_FASTOPEN has been
|
||||
* processed; that may succeed even if the cmsg is
|
||||
* invalid.
|
||||
*/
|
||||
sockc_valid = false;
|
||||
}
|
||||
|
||||
if ((flags & MSG_ZEROCOPY) && size) {
|
||||
if (msg->msg_ubuf) {
|
||||
uarg = msg->msg_ubuf;
|
||||
|
@ -1078,7 +1091,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
|||
zc = MSG_ZEROCOPY;
|
||||
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
|
||||
skb = tcp_write_queue_tail(sk);
|
||||
uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
|
||||
uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb),
|
||||
sockc_valid && !!sockc.dmabuf_id);
|
||||
if (!uarg) {
|
||||
err = -ENOBUFS;
|
||||
goto out_err;
|
||||
|
@ -1087,12 +1101,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
|||
zc = MSG_ZEROCOPY;
|
||||
else
|
||||
uarg_to_msgzc(uarg)->zerocopy = 0;
|
||||
|
||||
if (sockc_valid && sockc.dmabuf_id) {
|
||||
binding = net_devmem_get_binding(sk, sockc.dmabuf_id);
|
||||
if (IS_ERR(binding)) {
|
||||
err = PTR_ERR(binding);
|
||||
binding = NULL;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
|
||||
if (sk->sk_route_caps & NETIF_F_SG)
|
||||
zc = MSG_SPLICE_PAGES;
|
||||
}
|
||||
|
||||
if (sockc_valid && sockc.dmabuf_id &&
|
||||
(!(flags & MSG_ZEROCOPY) || !sock_flag(sk, SOCK_ZEROCOPY))) {
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (unlikely(flags & MSG_FASTOPEN ||
|
||||
inet_test_bit(DEFER_CONNECT, sk)) &&
|
||||
!tp->repair) {
|
||||
|
@ -1131,13 +1160,10 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
|
|||
/* 'common' sending to sendq */
|
||||
}
|
||||
|
||||
sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)};
|
||||
if (msg->msg_controllen) {
|
||||
err = sock_cmsg_send(sk, msg, &sockc);
|
||||
if (unlikely(err)) {
|
||||
if (!sockc_valid) {
|
||||
if (!err)
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* This should be in poll */
|
||||
|
@ -1258,7 +1284,8 @@ new_segment:
|
|||
goto wait_for_space;
|
||||
}
|
||||
|
||||
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
|
||||
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg,
|
||||
binding);
|
||||
if (err == -EMSGSIZE || err == -EEXIST) {
|
||||
tcp_mark_push(tp, skb);
|
||||
goto new_segment;
|
||||
|
@ -1339,6 +1366,8 @@ out_nopush:
|
|||
/* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
|
||||
if (uarg && !msg->msg_ubuf)
|
||||
net_zcopy_put(uarg);
|
||||
if (binding)
|
||||
net_devmem_dmabuf_binding_put(binding);
|
||||
return copied + copied_syn;
|
||||
|
||||
do_error:
|
||||
|
@ -1356,6 +1385,9 @@ out_err:
|
|||
sk->sk_write_space(sk);
|
||||
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
|
||||
}
|
||||
if (binding)
|
||||
net_devmem_dmabuf_binding_put(binding);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
|
||||
|
|
|
@ -1524,7 +1524,8 @@ emsgsize:
|
|||
uarg = msg->msg_ubuf;
|
||||
}
|
||||
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
|
||||
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
|
||||
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
|
||||
false);
|
||||
if (!uarg)
|
||||
return -ENOBUFS;
|
||||
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
|
||||
|
|
|
@ -87,7 +87,7 @@ static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
|
|||
|
||||
uarg = msg_zerocopy_realloc(sk_vsock(vsk),
|
||||
iter->count,
|
||||
NULL);
|
||||
NULL, false);
|
||||
if (!uarg)
|
||||
return -1;
|
||||
|
||||
|
@ -107,8 +107,7 @@ static int virtio_transport_fill_skb(struct sk_buff *skb,
|
|||
{
|
||||
if (zcopy)
|
||||
return __zerocopy_sg_from_iter(info->msg, NULL, skb,
|
||||
&info->msg->msg_iter,
|
||||
len);
|
||||
&info->msg->msg_iter, len, NULL);
|
||||
|
||||
return memcpy_from_msg(skb_put(skb, len), info->msg, len);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue