linux/drivers/infiniband/hw/mlx5/umr.h

/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */

#ifndef _MLX5_IB_UMR_H
#define _MLX5_IB_UMR_H

#include "mlx5_ib.h"


#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)

#define MLX5_IB_UMR_OCTOWORD	       16
#define MLX5_IB_UMR_XLT_ALIGNMENT      64

int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);

int mlx5r_umr_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev);

static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
					  size_t length)
{
	/*
	 * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
	 * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
	 * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
	 * can never be enabled without this capability. Simplify this weird
	 * quirky hardware by just saying it can't use PAS lists with UMR at
	 * all.
	 */
	if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
		return false;

	/*
	 * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
	 * used.
	 */
	if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
	    length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
		return false;
	return true;
}

/*
 * true if an existing MR can be reconfigured to new access_flags using UMR.
 * Older HW cannot use UMR to update certain elements of the MKC. See
 * get_umr_update_access_mask() and umr_check_mkey_mask()
 */
static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
					  unsigned int current_access_flags,
					  unsigned int target_access_flags)
{
	unsigned int diffs = current_access_flags ^ target_access_flags;

	if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
	    MLX5_CAP_GEN(dev->mdev, atomic) &&
	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
		return false;

	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
		return false;

	if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
	    (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
	     MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) &&
	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
		return false;

	return true;
}

static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
{
	return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
	       MLX5_IB_UMR_OCTOWORD;
}

struct mlx5r_umr_context {
	struct ib_cqe cqe;
	enum ib_wc_status status;
	struct completion done;
};

struct mlx5r_umr_wqe {
	struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
	struct mlx5_mkey_seg mkey_seg;
	struct mlx5_wqe_data_seg data_seg;
};

int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
			      int access_flags);
int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
					       unsigned int flags,
					       size_t start_block,
					       size_t nblocks);
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
				  size_t start_block, size_t nblocks);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
			 int page_shift, int flags);
int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
				   unsigned int page_shift,
				   bool dd);
int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
				 unsigned int page_shift);

#endif /* _MLX5_IB_UMR_H */
RDMA/mlx5: Move init and cleanup of UMR to umr.c The first patch in a series to split UMR logic to a dedicated file. As a start, move the init and cleanup of UMR resources to umr.c. Link: https://lore.kernel.org/r/849e632dd1945a2534712a320cc5779f2149ba96.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:56 +03:00			`/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */`
			`/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */`

			`#ifndef _MLX5_IB_UMR_H`
			`#define _MLX5_IB_UMR_H`

			`#include "mlx5_ib.h"`

RDMA/mlx5: Move umr checks to umr.h Move mlx5_ib_can_load_pas_with_umr() and mlx5_ib_can_reconfig_with_umr() to umr.h and rename them accordingly. Link: https://lore.kernel.org/r/1b799b0142534a63dfd5bacc5f8ad2256d7777ad.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:57 +03:00
			`#define MLX5_MAX_UMR_SHIFT 16`
			`#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)`

RDMA/mlx5: Move mkey ctrl segment logic to umr.c Move set_reg_umr_segment() and its helpers to umr.c. Link: https://lore.kernel.org/r/5a7fac8ae8543521d19d174663245ae84b910310.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:58 +03:00			`#define MLX5_IB_UMR_OCTOWORD 16`
			`#define MLX5_IB_UMR_XLT_ALIGNMENT 64`

RDMA/mlx5: Move init and cleanup of UMR to umr.c The first patch in a series to split UMR logic to a dedicated file. As a start, move the init and cleanup of UMR resources to umr.c. Link: https://lore.kernel.org/r/849e632dd1945a2534712a320cc5779f2149ba96.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:56 +03:00			`int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);`
			`void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);`

IB/mlx5: Create UMR QP just before first reg_mr occurs UMR QP is not used in some cases, so move QP and its CQ creations from driver load flow to the time first reg_mr occurs, that is when MR interfaces are first called. The initialization of dev->umrc.pd and dev->umrc.lock is still done in driver load because pd is needed for mlx5_mkey_cache_init and the lock is reused to protect against the concurrent creation. When testing 4G bytes memory registration latency with rtool [1] and 8 threads in parallel, there is minor performance degradation (<5% for the max latency) is seen for the first reg_mr with this change. Link: https://github.com/paravmellanox/rtool [1] Signed-off-by: Jianbo Liu <jianbol@nvidia.com> Link: https://lore.kernel.org/r/55d3c4f8a542fd974d8a4c5816eccfb318a59b38.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> 2024-06-03 13:26:38 +03:00			`int mlx5r_umr_init(struct mlx5_ib_dev *dev);`
			`void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev);`

RDMA/mlx5: Move umr checks to umr.h Move mlx5_ib_can_load_pas_with_umr() and mlx5_ib_can_reconfig_with_umr() to umr.h and rename them accordingly. Link: https://lore.kernel.org/r/1b799b0142534a63dfd5bacc5f8ad2256d7777ad.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:57 +03:00			`static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,`
			`size_t length)`
			`{`
			`/*`
			`* umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is`
			`* always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka`
			`* MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey`
			`* can never be enabled without this capability. Simplify this weird`
			`* quirky hardware by just saying it can't use PAS lists with UMR at`
			`* all.`
			`*/`
			`if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))`
			`return false;`

			`/*`
			`* length is the size of the MR in bytes when mlx5_ib_update_xlt() is`
			`* used.`
			`*/`
			`if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&`
			`length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)`
			`return false;`
			`return true;`
			`}`

			`/*`
			`* true if an existing MR can be reconfigured to new access_flags using UMR.`
			`* Older HW cannot use UMR to update certain elements of the MKC. See`
			`* get_umr_update_access_mask() and umr_check_mkey_mask()`
			`*/`
			`static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,`
			`unsigned int current_access_flags,`
			`unsigned int target_access_flags)`
			`{`
			`unsigned int diffs = current_access_flags ^ target_access_flags;`

			`if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&`
			`MLX5_CAP_GEN(dev->mdev, atomic) &&`
			`MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))`
			`return false;`

			`if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&`
			`MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&`
			`!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))`
			`return false;`

			`if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&`
RDMA/mlx5: Allow relaxed ordering read in VFs and VMs According to PCIe spec, Enable Relaxed Ordering value in the VF's PCI config space is wired to 0 and PF relaxed ordering (RO) setting should be applied to the VF. In QEMU (and maybe others), when assigning VFs, the RO bit in PCI config space is not emulated properly and is always set to 0. Therefore, pcie_relaxed_ordering_enabled() always returns 0 for VFs and VMs and thus MKeys can't be created with RO read even if the PF supports it. pcie_relaxed_ordering_enabled() check was added to avoid a syndrome when creating a MKey with relaxed ordering (RO) enabled when the driver's relaxed_ordering_read_pci_enabled HCA capability is out of sync with FW. With the new relaxed_ordering_read capability this can't happen, as it's set regardless of RO value in PCI config space and thus can't change during runtime. Hence, to allow RO read in VFs and VMs, use the new HCA capability relaxed_ordering_read without checking pcie_relaxed_ordering_enabled(). The old capability checks are kept for backward compatibility with older FWs. Allowing RO in VFs and VMs is valuable since it can greatly improve performance on some setups. For example, testing throughput of a VF on an AMD EPYC 7763 and ConnectX-6 Dx setup showed roughly 60% performance improvement. Signed-off-by: Avihai Horon <avihaih@nvidia.com> Reviewed-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Aya Levin <ayal@nvidia.com> Link: https://lore.kernel.org/r/e7048640d66c341a8fa0465e099926e7989184bc.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> 2023-04-10 16:07:53 +03:00			`(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) \|\|`
			`MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) &&`
RDMA/mlx5: Move umr checks to umr.h Move mlx5_ib_can_load_pas_with_umr() and mlx5_ib_can_reconfig_with_umr() to umr.h and rename them accordingly. Link: https://lore.kernel.org/r/1b799b0142534a63dfd5bacc5f8ad2256d7777ad.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:57 +03:00			`!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))`
			`return false;`

			`return true;`
			`}`

RDMA/mlx5: Move mkey ctrl segment logic to umr.c Move set_reg_umr_segment() and its helpers to umr.c. Link: https://lore.kernel.org/r/5a7fac8ae8543521d19d174663245ae84b910310.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:58 +03:00			`static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)`
			`{`
			`return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /`
			`MLX5_IB_UMR_OCTOWORD;`
			`}`

RDMA/mlx5: Introduce mlx5_umr_post_send_wait() Introduce mlx5_umr_post_send_wait() that uses a UMR adjusted flow for posting WQEs. The next patches will gradually move UMR operations to use this flow. Once done, will get rid of mlx5_ib_post_send_wait(). mlx5_umr_post_send_wait gets already written WQE segments and will only memcpy it to the SQ. This way, we avoid packing all the data in a WR just to unpack it into the WQE. Link: https://lore.kernel.org/r/f027dd592fde62402b2d49efded8d1d22229d22b.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:24:01 +03:00			`struct mlx5r_umr_context {`
			`struct ib_cqe cqe;`
			`enum ib_wc_status status;`
			`struct completion done;`
			`};`

			`struct mlx5r_umr_wqe {`
			`struct mlx5_wqe_umr_ctrl_seg ctrl_seg;`
			`struct mlx5_mkey_seg mkey_seg;`
			`struct mlx5_wqe_data_seg data_seg;`
			`};`

RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs Move the revoke_mr logic to umr.c, and using mlx5_umr_post_send_wait() instead of mlx5_ib_post_send_wait(). In the new implementation, do not zero out the access flags. Before reusing the MR, we will update it to the required access. Link: https://lore.kernel.org/r/63717dfdaf6007f81b3e6dbf598f5bf3875ce86f.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:24:02 +03:00			`int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);`
RDMA/mlx5: Use mlx5_umr_post_send_wait() to rereg pd access Move rereg_pd_access logic to umr.c, and use mlx5_umr_post_send_wait() instead of mlx5_ib_post_send_wait(). Link: https://lore.kernel.org/r/18da4f47edbc2561f652b7ee4e7a5269e866af77.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:24:03 +03:00			`int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr mr, struct ib_pd pd,`
			`int access_flags);`
RDMA/mlx5: Optimize DMABUF mkey page size The current implementation of DMABUF memory registration uses a fixed page size for the memory key (mkey), which can lead to suboptimal performance when the underlying memory layout may offer better page size. The optimization improves performance by reducing the number of page table entries required for the mkey, leading to less MTT/KSM descriptors that the HCA must go through to find translations, fewer cache-lines, and shorter UMR work requests on mkey updates such as when re-registering or reusing a cacheable mkey. To ensure safe page size updates, the implementation uses a 5-step process: 1. Make the first X entries non-present, while X is calculated to be minimal according to a large page shift that can be used to cover the MR length. 2. Update the page size to the large supported page size 3. Load the remaining N-X entries according to the (optimized) page shift 4. Update the page size according to the (optimized) page shift 5. Load the first X entries with the correct translations This ensures that at no point is the MR accessible with a partially updated translation table, maintaining correctness and preventing access to stale or inconsistent mappings, such as having an mkey advertising the new page size while some of the underlying page table entries still contain the old page size translations. Signed-off-by: Edward Srouji <edwards@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Link: https://patch.msgid.link/bc05a6b2142c02f96a90635f9a4458ee4bbbf39f.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> 2025-07-09 09:42:11 +03:00			`int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,`
			`unsigned int flags,`
			`size_t start_block,`
			`size_t nblocks);`
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> 2024-08-01 15:05:16 +03:00			`int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);`
RDMA/mlx5: Optimize DMABUF mkey page size The current implementation of DMABUF memory registration uses a fixed page size for the memory key (mkey), which can lead to suboptimal performance when the underlying memory layout may offer better page size. The optimization improves performance by reducing the number of page table entries required for the mkey, leading to less MTT/KSM descriptors that the HCA must go through to find translations, fewer cache-lines, and shorter UMR work requests on mkey updates such as when re-registering or reusing a cacheable mkey. To ensure safe page size updates, the implementation uses a 5-step process: 1. Make the first X entries non-present, while X is calculated to be minimal according to a large page shift that can be used to cover the MR length. 2. Update the page size to the large supported page size 3. Load the remaining N-X entries according to the (optimized) page shift 4. Update the page size according to the (optimized) page shift 5. Load the first X entries with the correct translations This ensures that at no point is the MR accessible with a partially updated translation table, maintaining correctness and preventing access to stale or inconsistent mappings, such as having an mkey advertising the new page size while some of the underlying page table entries still contain the old page size translations. Signed-off-by: Edward Srouji <edwards@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Link: https://patch.msgid.link/bc05a6b2142c02f96a90635f9a4458ee4bbbf39f.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> 2025-07-09 09:42:11 +03:00			`int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,`
			`size_t start_block, size_t nblocks);`
			`int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);`
RDMA/mlx5: Use mlx5_umr_post_send_wait() to update xlt Move mlx5_ib_update_mr_pas logic to umr.c, and use mlx5_umr_post_send_wait() instead of mlx5_ib_post_send_wait(). Since it is the last use of mlx5_ib_post_send_wait(), remove it. Link: https://lore.kernel.org/r/55a4972f156aba3592a2fc9bcb33e2059acf295f.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:24:06 +03:00			`int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,`
			`int page_shift, int flags);`
RDMA/mlx5: Optimize DMABUF mkey page size The current implementation of DMABUF memory registration uses a fixed page size for the memory key (mkey), which can lead to suboptimal performance when the underlying memory layout may offer better page size. The optimization improves performance by reducing the number of page table entries required for the mkey, leading to less MTT/KSM descriptors that the HCA must go through to find translations, fewer cache-lines, and shorter UMR work requests on mkey updates such as when re-registering or reusing a cacheable mkey. To ensure safe page size updates, the implementation uses a 5-step process: 1. Make the first X entries non-present, while X is calculated to be minimal according to a large page shift that can be used to cover the MR length. 2. Update the page size to the large supported page size 3. Load the remaining N-X entries according to the (optimized) page shift 4. Update the page size according to the (optimized) page shift 5. Load the first X entries with the correct translations This ensures that at no point is the MR accessible with a partially updated translation table, maintaining correctness and preventing access to stale or inconsistent mappings, such as having an mkey advertising the new page size while some of the underlying page table entries still contain the old page size translations. Signed-off-by: Edward Srouji <edwards@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Link: https://patch.msgid.link/bc05a6b2142c02f96a90635f9a4458ee4bbbf39f.1751979184.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> 2025-07-09 09:42:11 +03:00			`int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,`
			`unsigned int page_shift,`
			`bool dd);`
			`int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,`
			`unsigned int page_shift);`
RDMA/mlx5: Use mlx5_umr_post_send_wait() to revoke MRs Move the revoke_mr logic to umr.c, and using mlx5_umr_post_send_wait() instead of mlx5_ib_post_send_wait(). In the new implementation, do not zero out the access flags. Before reusing the MR, we will update it to the required access. Link: https://lore.kernel.org/r/63717dfdaf6007f81b3e6dbf598f5bf3875ce86f.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:24:02 +03:00
RDMA/mlx5: Move init and cleanup of UMR to umr.c The first patch in a series to split UMR logic to a dedicated file. As a start, move the init and cleanup of UMR resources to umr.c. Link: https://lore.kernel.org/r/849e632dd1945a2534712a320cc5779f2149ba96.1649747695.git.leonro@nvidia.com Signed-off-by: Aharon Landau <aharonl@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> 2022-04-12 10:23:56 +03:00			`#endif /* _MLX5_IB_UMR_H */`