linux/drivers/iommu/iommufd/eventq.c

547 lines
14 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2024 Intel Corporation
*/
#define pr_fmt(fmt) "iommufd: " fmt
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/iommufd.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"
#include "iommufd_private.h"
/* IOMMUFD_OBJ_FAULT Functions */
void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
struct iommufd_attach_handle *handle)
{
struct iommufd_fault *fault = hwpt->fault;
struct iopf_group *group, *next;
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
struct list_head free_list;
unsigned long index;
if (!fault || !handle)
return;
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
INIT_LIST_HEAD(&free_list);
mutex_lock(&fault->mutex);
spin_lock(&fault->common.lock);
list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
if (group->attach_handle != &handle->handle)
continue;
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
list_move(&group->node, &free_list);
}
spin_unlock(&fault->common.lock);
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
list_for_each_entry_safe(group, next, &free_list, node) {
list_del(&group->node);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
xa_for_each(&fault->response, index, group) {
if (group->attach_handle != &handle->handle)
continue;
xa_erase(&fault->response, index);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
mutex_unlock(&fault->mutex);
}
void iommufd_fault_destroy(struct iommufd_object *obj)
{
struct iommufd_eventq *eventq =
container_of(obj, struct iommufd_eventq, obj);
struct iommufd_fault *fault = eventq_to_fault(eventq);
struct iopf_group *group, *next;
unsigned long index;
/*
* The iommufd object's reference count is zero at this point.
* We can be confident that no other threads are currently
* accessing this pointer. Therefore, acquiring the mutex here
* is unnecessary.
*/
list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
list_del(&group->node);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
xa_for_each(&fault->response, index, group) {
xa_erase(&fault->response, index);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
xa_destroy(&fault->response);
mutex_destroy(&fault->mutex);
}
static void iommufd_compose_fault_message(struct iommu_fault *fault,
struct iommu_hwpt_pgfault *hwpt_fault,
struct iommufd_device *idev,
u32 cookie)
{
hwpt_fault->flags = fault->prm.flags;
hwpt_fault->dev_id = idev->obj.id;
hwpt_fault->pasid = fault->prm.pasid;
hwpt_fault->grpid = fault->prm.grpid;
hwpt_fault->perm = fault->prm.perm;
hwpt_fault->addr = fault->prm.addr;
hwpt_fault->length = 0;
hwpt_fault->cookie = cookie;
}
/* Fetch the first node out of the fault->deliver list */
static struct iopf_group *
iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
{
struct list_head *list = &fault->common.deliver;
struct iopf_group *group = NULL;
spin_lock(&fault->common.lock);
if (!list_empty(list)) {
group = list_first_entry(list, struct iopf_group, node);
list_del(&group->node);
}
spin_unlock(&fault->common.lock);
return group;
}
/* Restore a node back to the head of the fault->deliver list */
static void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
struct iopf_group *group)
{
spin_lock(&fault->common.lock);
list_add(&group->node, &fault->common.deliver);
spin_unlock(&fault->common.lock);
}
static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
struct iommufd_eventq *eventq = filep->private_data;
struct iommufd_fault *fault = eventq_to_fault(eventq);
iommufd: Fix struct iommu_hwpt_pgfault init and padding The iommu_hwpt_pgfault is used to report IO page fault data to userspace, but iommufd_fault_fops_read was never zeroing its padding. This leaks the content of the kernel stack memory to userspace. Also, the iommufd uAPI requires explicit padding and use of __aligned_u64 to ensure ABI compatibility's with 32 bit. pahole result, before: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ /* XXX 4 bytes hole, try to pack */ __u64 addr; /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 36, holes: 1, sum holes: 4 */ /* last cacheline: 40 bytes */ }; pahole result, after: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ __u32 __reserved; /* 20 4 */ __u64 addr __attribute__((__aligned__(8))); /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 9 */ /* forced alignments: 1 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://patch.msgid.link/r/20250120195051.2450-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-20 11:50:51 -08:00
struct iommu_hwpt_pgfault data = {};
struct iommufd_device *idev;
struct iopf_group *group;
struct iopf_fault *iopf;
size_t done = 0;
int rc = 0;
if (*ppos || count % fault_size)
return -ESPIPE;
mutex_lock(&fault->mutex);
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
while ((group = iommufd_fault_deliver_fetch(fault))) {
if (done >= count ||
group->fault_count * fault_size > count - done) {
iommufd_fault_deliver_restore(fault, group);
break;
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
}
rc = xa_alloc(&fault->response, &group->cookie, group,
xa_limit_32b, GFP_KERNEL);
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
if (rc) {
iommufd_fault_deliver_restore(fault, group);
break;
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
}
idev = to_iommufd_handle(group->attach_handle)->idev;
list_for_each_entry(iopf, &group->faults, list) {
iommufd_compose_fault_message(&iopf->fault,
&data, idev,
group->cookie);
if (copy_to_user(buf + done, &data, fault_size)) {
xa_erase(&fault->response, group->cookie);
iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2025-01-17 11:29:01 -08:00
iommufd_fault_deliver_restore(fault, group);
rc = -EFAULT;
break;
}
done += fault_size;
}
}
mutex_unlock(&fault->mutex);
return done == 0 ? rc : done;
}
static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
size_t count, loff_t *ppos)
{
size_t response_size = sizeof(struct iommu_hwpt_page_response);
struct iommufd_eventq *eventq = filep->private_data;
struct iommufd_fault *fault = eventq_to_fault(eventq);
struct iommu_hwpt_page_response response;
struct iopf_group *group;
size_t done = 0;
int rc = 0;
if (*ppos || count % response_size)
return -ESPIPE;
mutex_lock(&fault->mutex);
while (count > done) {
rc = copy_from_user(&response, buf + done, response_size);
if (rc)
break;
static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
(int)IOMMU_PAGE_RESP_SUCCESS);
static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
(int)IOMMU_PAGE_RESP_INVALID);
if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
response.code != IOMMUFD_PAGE_RESP_INVALID) {
rc = -EINVAL;
break;
}
group = xa_erase(&fault->response, response.cookie);
if (!group) {
rc = -EINVAL;
break;
}
iopf_group_response(group, response.code);
iopf_free_group(group);
done += response_size;
}
mutex_unlock(&fault->mutex);
return done == 0 ? rc : done;
}
/* IOMMUFD_OBJ_VEVENTQ Functions */
void iommufd_veventq_abort(struct iommufd_object *obj)
{
struct iommufd_eventq *eventq =
container_of(obj, struct iommufd_eventq, obj);
struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
struct iommufd_viommu *viommu = veventq->viommu;
struct iommufd_vevent *cur, *next;
lockdep_assert_held_write(&viommu->veventqs_rwsem);
list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
list_del(&cur->node);
if (cur != &veventq->lost_events_header)
kfree(cur);
}
refcount_dec(&viommu->obj.users);
list_del(&veventq->node);
}
void iommufd_veventq_destroy(struct iommufd_object *obj)
{
struct iommufd_veventq *veventq = eventq_to_veventq(
container_of(obj, struct iommufd_eventq, obj));
down_write(&veventq->viommu->veventqs_rwsem);
iommufd_veventq_abort(obj);
up_write(&veventq->viommu->veventqs_rwsem);
}
static struct iommufd_vevent *
iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
{
struct iommufd_eventq *eventq = &veventq->common;
struct list_head *list = &eventq->deliver;
struct iommufd_vevent *vevent = NULL;
spin_lock(&eventq->lock);
if (!list_empty(list)) {
struct iommufd_vevent *next;
next = list_first_entry(list, struct iommufd_vevent, node);
/* Make a copy of the lost_events_header for copy_to_user */
if (next == &veventq->lost_events_header) {
vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
if (!vevent)
goto out_unlock;
}
list_del(&next->node);
if (vevent)
memcpy(vevent, next, sizeof(*vevent));
else
vevent = next;
}
out_unlock:
spin_unlock(&eventq->lock);
return vevent;
}
static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
struct iommufd_vevent *vevent)
{
struct iommufd_eventq *eventq = &veventq->common;
struct list_head *list = &eventq->deliver;
spin_lock(&eventq->lock);
if (vevent_for_lost_events_header(vevent)) {
/* Remove the copy of the lost_events_header */
kfree(vevent);
vevent = NULL;
/* An empty list needs the lost_events_header back */
if (list_empty(list))
vevent = &veventq->lost_events_header;
}
if (vevent)
list_add(&vevent->node, list);
spin_unlock(&eventq->lock);
}
static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
struct iommufd_eventq *eventq = filep->private_data;
struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
struct iommufd_vevent_header *hdr;
struct iommufd_vevent *cur;
size_t done = 0;
int rc = 0;
if (*ppos)
return -ESPIPE;
while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
/* Validate the remaining bytes against the header size */
if (done >= count || sizeof(*hdr) > count - done) {
iommufd_veventq_deliver_restore(veventq, cur);
break;
}
hdr = &cur->header;
/* If being a normal vEVENT, validate against the full size */
if (!vevent_for_lost_events_header(cur) &&
sizeof(hdr) + cur->data_len > count - done) {
iommufd_veventq_deliver_restore(veventq, cur);
break;
}
if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
iommufd_veventq_deliver_restore(veventq, cur);
rc = -EFAULT;
break;
}
done += sizeof(*hdr);
if (cur->data_len &&
copy_to_user(buf + done, cur->event_data, cur->data_len)) {
iommufd_veventq_deliver_restore(veventq, cur);
rc = -EFAULT;
break;
}
spin_lock(&eventq->lock);
if (!vevent_for_lost_events_header(cur))
veventq->num_events--;
spin_unlock(&eventq->lock);
done += cur->data_len;
kfree(cur);
}
return done == 0 ? rc : done;
}
/* Common Event Queue Functions */
static __poll_t iommufd_eventq_fops_poll(struct file *filep,
struct poll_table_struct *wait)
{
struct iommufd_eventq *eventq = filep->private_data;
__poll_t pollflags = 0;
if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
pollflags |= EPOLLOUT;
poll_wait(filep, &eventq->wait_queue, wait);
spin_lock(&eventq->lock);
if (!list_empty(&eventq->deliver))
pollflags |= EPOLLIN | EPOLLRDNORM;
spin_unlock(&eventq->lock);
return pollflags;
}
static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep)
{
struct iommufd_eventq *eventq = filep->private_data;
refcount_dec(&eventq->obj.users);
iommufd_ctx_put(eventq->ictx);
return 0;
}
#define INIT_EVENTQ_FOPS(read_op, write_op) \
((const struct file_operations){ \
.owner = THIS_MODULE, \
.open = nonseekable_open, \
.read = read_op, \
.write = write_op, \
.poll = iommufd_eventq_fops_poll, \
.release = iommufd_eventq_fops_release, \
})
static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name,
struct iommufd_ctx *ictx,
const struct file_operations *fops)
{
struct file *filep;
int fdno;
spin_lock_init(&eventq->lock);
INIT_LIST_HEAD(&eventq->deliver);
init_waitqueue_head(&eventq->wait_queue);
filep = anon_inode_getfile(name, fops, eventq, O_RDWR);
if (IS_ERR(filep))
return PTR_ERR(filep);
eventq->ictx = ictx;
iommufd_ctx_get(eventq->ictx);
eventq->filep = filep;
refcount_inc(&eventq->obj.users);
fdno = get_unused_fd_flags(O_CLOEXEC);
if (fdno < 0)
fput(filep);
return fdno;
}
static const struct file_operations iommufd_fault_fops =
INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write);
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
{
struct iommu_fault_alloc *cmd = ucmd->cmd;
struct iommufd_fault *fault;
int fdno;
int rc;
if (cmd->flags)
return -EOPNOTSUPP;
fault = __iommufd_object_alloc_ucmd(ucmd, fault, IOMMUFD_OBJ_FAULT,
common.obj);
if (IS_ERR(fault))
return PTR_ERR(fault);
xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
mutex_init(&fault->mutex);
fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
ucmd->ictx, &iommufd_fault_fops);
if (fdno < 0)
return fdno;
cmd->out_fault_id = fault->common.obj.id;
cmd->out_fault_fd = fdno;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_put_fdno;
fd_install(fdno, fault->common.filep);
return 0;
out_put_fdno:
put_unused_fd(fdno);
fput(fault->common.filep);
return rc;
}
int iommufd_fault_iopf_handler(struct iopf_group *group)
{
struct iommufd_hw_pagetable *hwpt;
struct iommufd_fault *fault;
hwpt = group->attach_handle->domain->iommufd_hwpt;
fault = hwpt->fault;
spin_lock(&fault->common.lock);
list_add_tail(&group->node, &fault->common.deliver);
spin_unlock(&fault->common.lock);
wake_up_interruptible(&fault->common.wait_queue);
return 0;
}
static const struct file_operations iommufd_veventq_fops =
INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
{
struct iommu_veventq_alloc *cmd = ucmd->cmd;
struct iommufd_veventq *veventq;
struct iommufd_viommu *viommu;
int fdno;
int rc;
if (cmd->flags || cmd->__reserved ||
cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
return -EOPNOTSUPP;
if (!cmd->veventq_depth)
return -EINVAL;
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
if (IS_ERR(viommu))
return PTR_ERR(viommu);
down_write(&viommu->veventqs_rwsem);
if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
rc = -EEXIST;
goto out_unlock_veventqs;
}
veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
IOMMUFD_OBJ_VEVENTQ, common.obj);
if (IS_ERR(veventq)) {
rc = PTR_ERR(veventq);
goto out_unlock_veventqs;
}
veventq->type = cmd->type;
veventq->viommu = viommu;
refcount_inc(&viommu->obj.users);
veventq->depth = cmd->veventq_depth;
list_add_tail(&veventq->node, &viommu->veventqs);
veventq->lost_events_header.header.flags =
IOMMU_VEVENTQ_FLAG_LOST_EVENTS;
fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
ucmd->ictx, &iommufd_veventq_fops);
if (fdno < 0) {
rc = fdno;
goto out_abort;
}
cmd->out_veventq_id = veventq->common.obj.id;
cmd->out_veventq_fd = fdno;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_put_fdno;
iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
fd_install(fdno, veventq->common.filep);
goto out_unlock_veventqs;
out_put_fdno:
put_unused_fd(fdno);
fput(veventq->common.filep);
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
out_unlock_veventqs:
up_write(&viommu->veventqs_rwsem);
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
}