mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-21 06:50:25 +00:00
RDMA/rxe: Allow registering MRs for On-Demand Paging
Allow userspace to register an ODP-enabled MR, in which case the flag IB_ACCESS_ON_DEMAND is passed to rxe_reg_user_mr(). However, there is no RDMA operation enabled right now. They will be supported later in the subsequent two patches. rxe_odp_do_pagefault() is called to initialize an ODP-enabled MR. It syncs process address space from the CPU page table to the driver page table (dma_list/pfn_list in umem_odp) when called with RXE_PAGEFAULT_SNAPSHOT flag. Additionally, It can be used to trigger page fault when pages being accessed are not present or do not have proper read/write permissions, and possibly to prefetch pages in the future. Link: https://patch.msgid.link/r/20241220100936.2193541-4-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
b601792392
commit
d03fb5c659
6 changed files with 128 additions and 6 deletions
|
@ -92,6 +92,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
|
|||
dev_put(ndev);
|
||||
|
||||
rxe->max_ucontext = RXE_MAX_UCONTEXT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
|
||||
rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
|
||||
|
||||
/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
|
||||
rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize port attributes */
|
||||
|
|
|
@ -184,4 +184,16 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
|
|||
/* rxe_odp.c */
|
||||
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
u64 iova, int access_flags, struct rxe_mr *mr);
|
||||
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
static inline int
|
||||
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
|
||||
int access_flags, struct rxe_mr *mr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
|
||||
#endif /* RXE_LOC_H */
|
||||
|
|
|
@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
|||
return err;
|
||||
}
|
||||
|
||||
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
||||
if (mr->umem->is_odp)
|
||||
return -EOPNOTSUPP;
|
||||
else
|
||||
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
||||
}
|
||||
|
||||
/* copy data in or out of a wqe, i.e. sg list
|
||||
|
@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
|||
struct page *page;
|
||||
u64 *va;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (mr->umem->is_odp)
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* See IBA oA19-28 */
|
||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||
|
|
|
@ -36,3 +36,89 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
|||
const struct mmu_interval_notifier_ops rxe_mn_ops = {
|
||||
.invalidate = rxe_ib_invalidate_range,
|
||||
};
|
||||
|
||||
#define RXE_PAGEFAULT_RDONLY BIT(1)
|
||||
#define RXE_PAGEFAULT_SNAPSHOT BIT(2)
|
||||
static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
|
||||
u64 access_mask;
|
||||
int np;
|
||||
|
||||
access_mask = ODP_READ_ALLOWED_BIT;
|
||||
if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
|
||||
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||
|
||||
/*
|
||||
* ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
|
||||
* Callers must release the lock later to let invalidation handler
|
||||
* do its work again.
|
||||
*/
|
||||
np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
|
||||
access_mask, fault);
|
||||
return np;
|
||||
}
|
||||
|
||||
static int rxe_odp_init_pages(struct rxe_mr *mr)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
int ret;
|
||||
|
||||
ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
|
||||
mr->umem->length,
|
||||
RXE_PAGEFAULT_SNAPSHOT);
|
||||
|
||||
if (ret >= 0)
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return ret >= 0 ? 0 : ret;
|
||||
}
|
||||
|
||||
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
u64 iova, int access_flags, struct rxe_mr *mr)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp;
|
||||
int err;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rxe_mr_init(access_flags, mr);
|
||||
|
||||
if (!start && length == U64_MAX) {
|
||||
if (iova != 0)
|
||||
return -EINVAL;
|
||||
if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
|
||||
return -EINVAL;
|
||||
|
||||
/* Never reach here, for implicit ODP is not implemented. */
|
||||
}
|
||||
|
||||
umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
|
||||
&rxe_mn_ops);
|
||||
if (IS_ERR(umem_odp)) {
|
||||
rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
|
||||
(int)PTR_ERR(umem_odp));
|
||||
return PTR_ERR(umem_odp);
|
||||
}
|
||||
|
||||
umem_odp->private = mr;
|
||||
|
||||
mr->umem = &umem_odp->umem;
|
||||
mr->access = access_flags;
|
||||
mr->ibmr.length = length;
|
||||
mr->ibmr.iova = iova;
|
||||
mr->page_offset = ib_umem_offset(&umem_odp->umem);
|
||||
|
||||
err = rxe_odp_init_pages(mr);
|
||||
if (err) {
|
||||
ib_umem_odp_release(umem_odp);
|
||||
return err;
|
||||
}
|
||||
|
||||
mr->state = RXE_MR_STATE_VALID;
|
||||
mr->ibmr.type = IB_MR_TYPE_USER;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
|
|||
struct rxe_mr *mr = qp->resp.mr;
|
||||
struct resp_res *res = qp->resp.res;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (mr->umem->is_odp)
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* oA19-14, oA19-15 */
|
||||
if (res && res->replay)
|
||||
return RESPST_ACKNOWLEDGE;
|
||||
|
@ -702,10 +706,13 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
|
|||
if (!res->replay) {
|
||||
u64 iova = qp->resp.va + qp->resp.offset;
|
||||
|
||||
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
||||
atmeth_comp(pkt),
|
||||
atmeth_swap_add(pkt),
|
||||
&res->atomic.orig_val);
|
||||
if (mr->umem->is_odp)
|
||||
err = RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
else
|
||||
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
||||
atmeth_comp(pkt),
|
||||
atmeth_swap_add(pkt),
|
||||
&res->atomic.orig_val);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
|
@ -1298,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
|
|||
mr->ibmr.pd = ibpd;
|
||||
mr->ibmr.device = ibpd->device;
|
||||
|
||||
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
||||
if (access & IB_ACCESS_ON_DEMAND)
|
||||
err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
|
||||
else
|
||||
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
||||
if (err) {
|
||||
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
|
||||
goto err_cleanup;
|
||||
|
|
Loading…
Add table
Reference in a new issue