mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-21 06:50:25 +00:00
RDMA/rxe: Allow registering MRs for On-Demand Paging
Allow userspace to register an ODP-enabled MR, in which case the flag IB_ACCESS_ON_DEMAND is passed to rxe_reg_user_mr(). However, there is no RDMA operation enabled right now. They will be supported later in the subsequent two patches. rxe_odp_do_pagefault() is called to initialize an ODP-enabled MR. It syncs process address space from the CPU page table to the driver page table (dma_list/pfn_list in umem_odp) when called with RXE_PAGEFAULT_SNAPSHOT flag. Additionally, It can be used to trigger page fault when pages being accessed are not present or do not have proper read/write permissions, and possibly to prefetch pages in the future. Link: https://patch.msgid.link/r/20241220100936.2193541-4-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
b601792392
commit
d03fb5c659
6 changed files with 128 additions and 6 deletions
|
@ -92,6 +92,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
|
||||||
dev_put(ndev);
|
dev_put(ndev);
|
||||||
|
|
||||||
rxe->max_ucontext = RXE_MAX_UCONTEXT;
|
rxe->max_ucontext = RXE_MAX_UCONTEXT;
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
|
||||||
|
rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
|
||||||
|
|
||||||
|
/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
|
||||||
|
rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize port attributes */
|
/* initialize port attributes */
|
||||||
|
|
|
@ -184,4 +184,16 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
|
||||||
/* rxe_odp.c */
|
/* rxe_odp.c */
|
||||||
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
|
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
|
||||||
|
|
||||||
|
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||||
|
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||||
|
u64 iova, int access_flags, struct rxe_mr *mr);
|
||||||
|
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||||
|
static inline int
|
||||||
|
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
|
||||||
|
int access_flags, struct rxe_mr *mr)
|
||||||
|
{
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||||
|
|
||||||
#endif /* RXE_LOC_H */
|
#endif /* RXE_LOC_H */
|
||||||
|
|
|
@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
if (mr->umem->is_odp)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
else
|
||||||
|
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy data in or out of a wqe, i.e. sg list
|
/* copy data in or out of a wqe, i.e. sg list
|
||||||
|
@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||||
struct page *page;
|
struct page *page;
|
||||||
u64 *va;
|
u64 *va;
|
||||||
|
|
||||||
|
/* ODP is not supported right now. WIP. */
|
||||||
|
if (mr->umem->is_odp)
|
||||||
|
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||||
|
|
||||||
/* See IBA oA19-28 */
|
/* See IBA oA19-28 */
|
||||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||||
|
|
|
@ -36,3 +36,89 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
||||||
const struct mmu_interval_notifier_ops rxe_mn_ops = {
|
const struct mmu_interval_notifier_ops rxe_mn_ops = {
|
||||||
.invalidate = rxe_ib_invalidate_range,
|
.invalidate = rxe_ib_invalidate_range,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define RXE_PAGEFAULT_RDONLY BIT(1)
|
||||||
|
#define RXE_PAGEFAULT_SNAPSHOT BIT(2)
|
||||||
|
static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
|
||||||
|
{
|
||||||
|
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||||
|
bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
|
||||||
|
u64 access_mask;
|
||||||
|
int np;
|
||||||
|
|
||||||
|
access_mask = ODP_READ_ALLOWED_BIT;
|
||||||
|
if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
|
||||||
|
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
|
||||||
|
* Callers must release the lock later to let invalidation handler
|
||||||
|
* do its work again.
|
||||||
|
*/
|
||||||
|
np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
|
||||||
|
access_mask, fault);
|
||||||
|
return np;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int rxe_odp_init_pages(struct rxe_mr *mr)
|
||||||
|
{
|
||||||
|
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
|
||||||
|
mr->umem->length,
|
||||||
|
RXE_PAGEFAULT_SNAPSHOT);
|
||||||
|
|
||||||
|
if (ret >= 0)
|
||||||
|
mutex_unlock(&umem_odp->umem_mutex);
|
||||||
|
|
||||||
|
return ret >= 0 ? 0 : ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||||
|
u64 iova, int access_flags, struct rxe_mr *mr)
|
||||||
|
{
|
||||||
|
struct ib_umem_odp *umem_odp;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
rxe_mr_init(access_flags, mr);
|
||||||
|
|
||||||
|
if (!start && length == U64_MAX) {
|
||||||
|
if (iova != 0)
|
||||||
|
return -EINVAL;
|
||||||
|
if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Never reach here, for implicit ODP is not implemented. */
|
||||||
|
}
|
||||||
|
|
||||||
|
umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
|
||||||
|
&rxe_mn_ops);
|
||||||
|
if (IS_ERR(umem_odp)) {
|
||||||
|
rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
|
||||||
|
(int)PTR_ERR(umem_odp));
|
||||||
|
return PTR_ERR(umem_odp);
|
||||||
|
}
|
||||||
|
|
||||||
|
umem_odp->private = mr;
|
||||||
|
|
||||||
|
mr->umem = &umem_odp->umem;
|
||||||
|
mr->access = access_flags;
|
||||||
|
mr->ibmr.length = length;
|
||||||
|
mr->ibmr.iova = iova;
|
||||||
|
mr->page_offset = ib_umem_offset(&umem_odp->umem);
|
||||||
|
|
||||||
|
err = rxe_odp_init_pages(mr);
|
||||||
|
if (err) {
|
||||||
|
ib_umem_odp_release(umem_odp);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
mr->state = RXE_MR_STATE_VALID;
|
||||||
|
mr->ibmr.type = IB_MR_TYPE_USER;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
|
@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
|
||||||
struct rxe_mr *mr = qp->resp.mr;
|
struct rxe_mr *mr = qp->resp.mr;
|
||||||
struct resp_res *res = qp->resp.res;
|
struct resp_res *res = qp->resp.res;
|
||||||
|
|
||||||
|
/* ODP is not supported right now. WIP. */
|
||||||
|
if (mr->umem->is_odp)
|
||||||
|
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||||
|
|
||||||
/* oA19-14, oA19-15 */
|
/* oA19-14, oA19-15 */
|
||||||
if (res && res->replay)
|
if (res && res->replay)
|
||||||
return RESPST_ACKNOWLEDGE;
|
return RESPST_ACKNOWLEDGE;
|
||||||
|
@ -702,10 +706,13 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
|
||||||
if (!res->replay) {
|
if (!res->replay) {
|
||||||
u64 iova = qp->resp.va + qp->resp.offset;
|
u64 iova = qp->resp.va + qp->resp.offset;
|
||||||
|
|
||||||
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
if (mr->umem->is_odp)
|
||||||
atmeth_comp(pkt),
|
err = RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||||
atmeth_swap_add(pkt),
|
else
|
||||||
&res->atomic.orig_val);
|
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
||||||
|
atmeth_comp(pkt),
|
||||||
|
atmeth_swap_add(pkt),
|
||||||
|
&res->atomic.orig_val);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
|
|
@ -1298,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
|
||||||
mr->ibmr.pd = ibpd;
|
mr->ibmr.pd = ibpd;
|
||||||
mr->ibmr.device = ibpd->device;
|
mr->ibmr.device = ibpd->device;
|
||||||
|
|
||||||
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
if (access & IB_ACCESS_ON_DEMAND)
|
||||||
|
err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
|
||||||
|
else
|
||||||
|
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
||||||
if (err) {
|
if (err) {
|
||||||
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
|
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
|
||||||
goto err_cleanup;
|
goto err_cleanup;
|
||||||
|
|
Loading…
Add table
Reference in a new issue