mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Rename the shortterm-related identifiers to wait-related. The usage of shortterm_users refcount is now beyond its name. It is also used for references which live longer than an ioctl execution. E.g. vdev holds idev's shortterm_users refcount on vdev allocation, releases it during idev's pre_destroy(). Rename the refcount as wait_cnt, since it is always used to sync the referencing & the destruction of the object by waiting for it to go to zero. List all changed identifiers: iommufd_object::shortterm_users -> iommufd_object::wait_cnt REMOVE_WAIT_SHORTTERM -> REMOVE_WAIT iommufd_object_dec_wait_shortterm() -> iommufd_object_dec_wait() zerod_shortterm -> zerod_wait_cnt No functional change intended. Link: https://patch.msgid.link/r/20250716070349.1807226-9-yilun.xu@linux.intel.com Suggested-by: Kevin Tian <kevin.tian@intel.com> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com> Tested-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Xu Yilun <yilun.xu@linux.intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
430 lines
11 KiB
C
430 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
|
|
*/
|
|
#include "iommufd_private.h"
|
|
|
|
void iommufd_viommu_destroy(struct iommufd_object *obj)
|
|
{
|
|
struct iommufd_viommu *viommu =
|
|
container_of(obj, struct iommufd_viommu, obj);
|
|
|
|
if (viommu->ops && viommu->ops->destroy)
|
|
viommu->ops->destroy(viommu);
|
|
refcount_dec(&viommu->hwpt->common.obj.users);
|
|
xa_destroy(&viommu->vdevs);
|
|
}
|
|
|
|
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
|
|
{
|
|
struct iommu_viommu_alloc *cmd = ucmd->cmd;
|
|
const struct iommu_user_data user_data = {
|
|
.type = cmd->type,
|
|
.uptr = u64_to_user_ptr(cmd->data_uptr),
|
|
.len = cmd->data_len,
|
|
};
|
|
struct iommufd_hwpt_paging *hwpt_paging;
|
|
struct iommufd_viommu *viommu;
|
|
struct iommufd_device *idev;
|
|
const struct iommu_ops *ops;
|
|
size_t viommu_size;
|
|
int rc;
|
|
|
|
if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT)
|
|
return -EOPNOTSUPP;
|
|
|
|
idev = iommufd_get_device(ucmd, cmd->dev_id);
|
|
if (IS_ERR(idev))
|
|
return PTR_ERR(idev);
|
|
|
|
ops = dev_iommu_ops(idev->dev);
|
|
if (!ops->get_viommu_size || !ops->viommu_init) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_idev;
|
|
}
|
|
|
|
viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
|
|
if (!viommu_size) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_idev;
|
|
}
|
|
|
|
/*
|
|
* It is a driver bug for providing a viommu_size smaller than the core
|
|
* vIOMMU structure size
|
|
*/
|
|
if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_idev;
|
|
}
|
|
|
|
hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
|
|
if (IS_ERR(hwpt_paging)) {
|
|
rc = PTR_ERR(hwpt_paging);
|
|
goto out_put_idev;
|
|
}
|
|
|
|
if (!hwpt_paging->nest_parent) {
|
|
rc = -EINVAL;
|
|
goto out_put_hwpt;
|
|
}
|
|
|
|
viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd(
|
|
ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU);
|
|
if (IS_ERR(viommu)) {
|
|
rc = PTR_ERR(viommu);
|
|
goto out_put_hwpt;
|
|
}
|
|
|
|
xa_init(&viommu->vdevs);
|
|
viommu->type = cmd->type;
|
|
viommu->ictx = ucmd->ictx;
|
|
viommu->hwpt = hwpt_paging;
|
|
refcount_inc(&viommu->hwpt->common.obj.users);
|
|
INIT_LIST_HEAD(&viommu->veventqs);
|
|
init_rwsem(&viommu->veventqs_rwsem);
|
|
/*
|
|
* It is the most likely case that a physical IOMMU is unpluggable. A
|
|
* pluggable IOMMU instance (if exists) is responsible for refcounting
|
|
* on its own.
|
|
*/
|
|
viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev);
|
|
|
|
rc = ops->viommu_init(viommu, hwpt_paging->common.domain,
|
|
user_data.len ? &user_data : NULL);
|
|
if (rc)
|
|
goto out_put_hwpt;
|
|
|
|
/* It is a driver bug that viommu->ops isn't filled */
|
|
if (WARN_ON_ONCE(!viommu->ops)) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_hwpt;
|
|
}
|
|
|
|
cmd->out_viommu_id = viommu->obj.id;
|
|
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
|
|
|
out_put_hwpt:
|
|
iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
|
|
out_put_idev:
|
|
iommufd_put_object(ucmd->ictx, &idev->obj);
|
|
return rc;
|
|
}
|
|
|
|
void iommufd_vdevice_abort(struct iommufd_object *obj)
|
|
{
|
|
struct iommufd_vdevice *vdev =
|
|
container_of(obj, struct iommufd_vdevice, obj);
|
|
struct iommufd_viommu *viommu = vdev->viommu;
|
|
struct iommufd_device *idev = vdev->idev;
|
|
|
|
lockdep_assert_held(&idev->igroup->lock);
|
|
|
|
if (vdev->destroy)
|
|
vdev->destroy(vdev);
|
|
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
|
|
xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL);
|
|
refcount_dec(&viommu->obj.users);
|
|
idev->vdev = NULL;
|
|
}
|
|
|
|
void iommufd_vdevice_destroy(struct iommufd_object *obj)
|
|
{
|
|
struct iommufd_vdevice *vdev =
|
|
container_of(obj, struct iommufd_vdevice, obj);
|
|
struct iommufd_device *idev = vdev->idev;
|
|
struct iommufd_ctx *ictx = idev->ictx;
|
|
|
|
mutex_lock(&idev->igroup->lock);
|
|
iommufd_vdevice_abort(obj);
|
|
mutex_unlock(&idev->igroup->lock);
|
|
iommufd_put_object(ictx, &idev->obj);
|
|
}
|
|
|
|
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
|
|
{
|
|
struct iommu_vdevice_alloc *cmd = ucmd->cmd;
|
|
struct iommufd_vdevice *vdev, *curr;
|
|
size_t vdev_size = sizeof(*vdev);
|
|
struct iommufd_viommu *viommu;
|
|
struct iommufd_device *idev;
|
|
u64 virt_id = cmd->virt_id;
|
|
int rc = 0;
|
|
|
|
/* virt_id indexes an xarray */
|
|
if (virt_id > ULONG_MAX)
|
|
return -EINVAL;
|
|
|
|
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
|
|
if (IS_ERR(viommu))
|
|
return PTR_ERR(viommu);
|
|
|
|
idev = iommufd_get_device(ucmd, cmd->dev_id);
|
|
if (IS_ERR(idev)) {
|
|
rc = PTR_ERR(idev);
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) {
|
|
rc = -EINVAL;
|
|
goto out_put_idev;
|
|
}
|
|
|
|
mutex_lock(&idev->igroup->lock);
|
|
if (idev->destroying) {
|
|
rc = -ENOENT;
|
|
goto out_unlock_igroup;
|
|
}
|
|
|
|
if (idev->vdev) {
|
|
rc = -EEXIST;
|
|
goto out_unlock_igroup;
|
|
}
|
|
|
|
if (viommu->ops && viommu->ops->vdevice_size) {
|
|
/*
|
|
* It is a driver bug for:
|
|
* - ops->vdevice_size smaller than the core structure size
|
|
* - not implementing a pairing ops->vdevice_init op
|
|
*/
|
|
if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
|
|
!viommu->ops->vdevice_init)) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_idev;
|
|
}
|
|
vdev_size = viommu->ops->vdevice_size;
|
|
}
|
|
|
|
vdev = (struct iommufd_vdevice *)_iommufd_object_alloc(
|
|
ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE);
|
|
if (IS_ERR(vdev)) {
|
|
rc = PTR_ERR(vdev);
|
|
goto out_unlock_igroup;
|
|
}
|
|
|
|
vdev->virt_id = virt_id;
|
|
vdev->viommu = viommu;
|
|
refcount_inc(&viommu->obj.users);
|
|
/*
|
|
* A wait_cnt reference is held on the idev so long as we have the
|
|
* pointer. iommufd_device_pre_destroy() will revoke it before the
|
|
* idev real destruction.
|
|
*/
|
|
vdev->idev = idev;
|
|
|
|
/*
|
|
* iommufd_device_destroy() delays until idev->vdev is NULL before
|
|
* freeing the idev, which only happens once the vdev is finished
|
|
* destruction.
|
|
*/
|
|
idev->vdev = vdev;
|
|
|
|
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
|
|
if (curr) {
|
|
rc = xa_err(curr) ?: -EEXIST;
|
|
goto out_abort;
|
|
}
|
|
|
|
if (viommu->ops && viommu->ops->vdevice_init) {
|
|
rc = viommu->ops->vdevice_init(vdev);
|
|
if (rc)
|
|
goto out_abort;
|
|
}
|
|
|
|
cmd->out_vdevice_id = vdev->obj.id;
|
|
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
|
if (rc)
|
|
goto out_abort;
|
|
iommufd_object_finalize(ucmd->ictx, &vdev->obj);
|
|
goto out_unlock_igroup;
|
|
|
|
out_abort:
|
|
iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
|
|
out_unlock_igroup:
|
|
mutex_unlock(&idev->igroup->lock);
|
|
out_put_idev:
|
|
if (rc)
|
|
iommufd_put_object(ucmd->ictx, &idev->obj);
|
|
out_put_viommu:
|
|
iommufd_put_object(ucmd->ictx, &viommu->obj);
|
|
return rc;
|
|
}
|
|
|
|
static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
|
|
struct iommufd_access *access,
|
|
u64 base_iova, size_t length)
|
|
{
|
|
u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
|
|
u64 offset = base_iova - aligned_iova;
|
|
|
|
iommufd_access_unpin_pages(access, aligned_iova,
|
|
PAGE_ALIGN(length + offset));
|
|
iommufd_access_detach_internal(access);
|
|
iommufd_access_destroy_internal(ictx, access);
|
|
}
|
|
|
|
void iommufd_hw_queue_destroy(struct iommufd_object *obj)
|
|
{
|
|
struct iommufd_hw_queue *hw_queue =
|
|
container_of(obj, struct iommufd_hw_queue, obj);
|
|
|
|
if (hw_queue->destroy)
|
|
hw_queue->destroy(hw_queue);
|
|
if (hw_queue->access)
|
|
iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx,
|
|
hw_queue->access,
|
|
hw_queue->base_addr,
|
|
hw_queue->length);
|
|
if (hw_queue->viommu)
|
|
refcount_dec(&hw_queue->viommu->obj.users);
|
|
}
|
|
|
|
/*
|
|
* When the HW accesses the guest queue via physical addresses, the underlying
|
|
* physical pages of the guest queue must be contiguous. Also, for the security
|
|
* concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of
|
|
* the guest queue from the nesting parent iopt while the HW is still accessing
|
|
* the guest queue memory physically, such a HW queue must require an access to
|
|
* pin the underlying pages and prevent that from happening.
|
|
*/
|
|
static struct iommufd_access *
|
|
iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
|
|
struct iommufd_viommu *viommu, phys_addr_t *base_pa)
|
|
{
|
|
u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
|
|
u64 offset = cmd->nesting_parent_iova - aligned_iova;
|
|
struct iommufd_access *access;
|
|
struct page **pages;
|
|
size_t max_npages;
|
|
size_t length;
|
|
size_t i;
|
|
int rc;
|
|
|
|
/* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
|
|
if (check_add_overflow(offset, cmd->length, &length))
|
|
return ERR_PTR(-ERANGE);
|
|
if (check_add_overflow(length, PAGE_SIZE - 1, &length))
|
|
return ERR_PTR(-ERANGE);
|
|
max_npages = length / PAGE_SIZE;
|
|
/* length needs to be page aligned too */
|
|
length = max_npages * PAGE_SIZE;
|
|
|
|
/*
|
|
* Use kvcalloc() to avoid memory fragmentation for a large page array.
|
|
* Set __GFP_NOWARN to avoid syzkaller blowups
|
|
*/
|
|
pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN);
|
|
if (!pages)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
access = iommufd_access_create_internal(viommu->ictx);
|
|
if (IS_ERR(access)) {
|
|
rc = PTR_ERR(access);
|
|
goto out_free;
|
|
}
|
|
|
|
rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas);
|
|
if (rc)
|
|
goto out_destroy;
|
|
|
|
rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
|
|
if (rc)
|
|
goto out_detach;
|
|
|
|
/* Validate if the underlying physical pages are contiguous */
|
|
for (i = 1; i < max_npages; i++) {
|
|
if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1)
|
|
continue;
|
|
rc = -EFAULT;
|
|
goto out_unpin;
|
|
}
|
|
|
|
*base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
|
|
kfree(pages);
|
|
return access;
|
|
|
|
out_unpin:
|
|
iommufd_access_unpin_pages(access, aligned_iova, length);
|
|
out_detach:
|
|
iommufd_access_detach_internal(access);
|
|
out_destroy:
|
|
iommufd_access_destroy_internal(viommu->ictx, access);
|
|
out_free:
|
|
kfree(pages);
|
|
return ERR_PTR(rc);
|
|
}
|
|
|
|
int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd)
|
|
{
|
|
struct iommu_hw_queue_alloc *cmd = ucmd->cmd;
|
|
struct iommufd_hw_queue *hw_queue;
|
|
struct iommufd_viommu *viommu;
|
|
struct iommufd_access *access;
|
|
size_t hw_queue_size;
|
|
phys_addr_t base_pa;
|
|
u64 last;
|
|
int rc;
|
|
|
|
if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT)
|
|
return -EOPNOTSUPP;
|
|
if (!cmd->length)
|
|
return -EINVAL;
|
|
if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1,
|
|
&last))
|
|
return -EOVERFLOW;
|
|
|
|
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
|
|
if (IS_ERR(viommu))
|
|
return PTR_ERR(viommu);
|
|
|
|
if (!viommu->ops || !viommu->ops->get_hw_queue_size ||
|
|
!viommu->ops->hw_queue_init_phys) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type);
|
|
if (!hw_queue_size) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
/*
|
|
* It is a driver bug for providing a hw_queue_size smaller than the
|
|
* core HW queue structure size
|
|
*/
|
|
if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) {
|
|
rc = -EOPNOTSUPP;
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd(
|
|
ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE);
|
|
if (IS_ERR(hw_queue)) {
|
|
rc = PTR_ERR(hw_queue);
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa);
|
|
if (IS_ERR(access)) {
|
|
rc = PTR_ERR(access);
|
|
goto out_put_viommu;
|
|
}
|
|
|
|
hw_queue->viommu = viommu;
|
|
refcount_inc(&viommu->obj.users);
|
|
hw_queue->access = access;
|
|
hw_queue->type = cmd->type;
|
|
hw_queue->length = cmd->length;
|
|
hw_queue->base_addr = cmd->nesting_parent_iova;
|
|
|
|
rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa);
|
|
if (rc)
|
|
goto out_put_viommu;
|
|
|
|
cmd->out_hw_queue_id = hw_queue->obj.id;
|
|
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
|
|
|
out_put_viommu:
|
|
iommufd_put_object(ucmd->ictx, &viommu->obj);
|
|
return rc;
|
|
}
|