linux/drivers/iommu/of_iommu.c
Robin Murphy bcb81ac6ae iommu: Get DT/ACPI parsing into the proper probe path
In hindsight, there were some crucial subtleties overlooked when moving
{of,acpi}_dma_configure() to driver probe time to allow waiting for
IOMMU drivers with -EPROBE_DEFER, and these have become an
ever-increasing source of problems. The IOMMU API has some fundamental
assumptions that iommu_probe_device() is called for every device added
to the system, in the order in which they are added. Calling it in a
random order or not at all dependent on driver binding leads to
malformed groups, a potential lack of isolation for devices with no
driver, and all manner of unexpected concurrency and race conditions.
We've attempted to mitigate the latter with point-fix bodges like
iommu_probe_device_lock, but it's a losing battle and the time has come
to bite the bullet and address the true source of the problem instead.

The crux of the matter is that the firmware parsing actually serves two
distinct purposes; one is identifying the IOMMU instance associated with
a device so we can check its availability, the second is actually
telling that instance about the relevant firmware-provided data for the
device. However the latter also depends on the former, and at the time
there was no good place to defer and retry that separately from the
availability check we also wanted for client driver probe.

Nowadays, though, we have a proper notion of multiple IOMMU instances in
the core API itself, and each one gets a chance to probe its own devices
upon registration, so we can finally make that work as intended for
DT/IORT/VIOT platforms too. All we need is for iommu_probe_device() to
be able to run the iommu_fwspec machinery currently buried deep in the
wrong end of {of,acpi}_dma_configure(). Luckily it turns out to be
surprisingly straightforward to bootstrap this transformation by pretty
much just calling the same path twice. At client driver probe time,
dev->driver is obviously set; conversely at device_add(), or a
subsequent bus_iommu_probe(), any device waiting for an IOMMU really
should *not* have a driver already, so we can use that as a condition to
disambiguate the two cases, and avoid recursing back into the IOMMU core
at the wrong times.

Obviously this isn't the nicest thing, but for now it gives us a
functional baseline to then unpick the layers in between without many
more awkward cross-subsystem patches. There are some minor side-effects
like dma_range_map potentially being created earlier, and some debug
prints being repeated, but these aren't significantly detrimental. Let's
make things work first, then deal with making them nice.

With the basic flow finally in the right order again, the next step is
probably turning the bus->dma_configure paths inside-out, since all we
really need from bus code is its notion of which device and input ID(s)
to parse the common firmware properties with...

Acked-by: Bjorn Helgaas <bhelgaas@google.com> # pci-driver.c
Acked-by: Rob Herring (Arm) <robh@kernel.org> # of/device.c
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/e3b191e6fd6ca9a1e84c5e5e40044faf97abb874.1740753261.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
2025-03-11 14:05:43 +01:00

272 lines
6.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* OF helpers for IOMMU
*
* Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
*/
#include <linux/export.h>
#include <linux/iommu.h>
#include <linux/limits.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/fsl/mc.h>
#include "iommu-priv.h"
static int of_iommu_xlate(struct device *dev,
struct of_phandle_args *iommu_spec)
{
const struct iommu_ops *ops;
int ret;
if (!of_device_is_available(iommu_spec->np))
return -ENODEV;
ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np));
if (ret)
return ret;
ops = iommu_ops_from_fwnode(&iommu_spec->np->fwnode);
if (!ops->of_xlate || !try_module_get(ops->owner))
return -ENODEV;
ret = ops->of_xlate(dev, iommu_spec);
module_put(ops->owner);
return ret;
}
static int of_iommu_configure_dev_id(struct device_node *master_np,
struct device *dev,
const u32 *id)
{
struct of_phandle_args iommu_spec = { .args_count = 1 };
int err;
err = of_map_id(master_np, *id, "iommu-map",
"iommu-map-mask", &iommu_spec.np,
iommu_spec.args);
if (err)
return err;
err = of_iommu_xlate(dev, &iommu_spec);
of_node_put(iommu_spec.np);
return err;
}
static int of_iommu_configure_dev(struct device_node *master_np,
struct device *dev)
{
struct of_phandle_args iommu_spec;
int err = -ENODEV, idx = 0;
while (!of_parse_phandle_with_args(master_np, "iommus",
"#iommu-cells",
idx, &iommu_spec)) {
err = of_iommu_xlate(dev, &iommu_spec);
of_node_put(iommu_spec.np);
idx++;
if (err)
break;
}
return err;
}
struct of_pci_iommu_alias_info {
struct device *dev;
struct device_node *np;
};
static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
{
struct of_pci_iommu_alias_info *info = data;
u32 input_id = alias;
return of_iommu_configure_dev_id(info->np, info->dev, &input_id);
}
static int of_iommu_configure_device(struct device_node *master_np,
struct device *dev, const u32 *id)
{
return (id) ? of_iommu_configure_dev_id(master_np, dev, id) :
of_iommu_configure_dev(master_np, dev);
}
static void of_pci_check_device_ats(struct device *dev, struct device_node *np)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (fwspec && of_property_read_bool(np, "ats-supported"))
fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS;
}
/*
* Returns:
* 0 on success, an iommu was configured
* -ENODEV if the device does not have any IOMMU
* -EPROBEDEFER if probing should be tried again
* -errno fatal errors
*/
int of_iommu_configure(struct device *dev, struct device_node *master_np,
const u32 *id)
{
bool dev_iommu_present;
int err;
if (!master_np)
return -ENODEV;
/* Serialise to make dev->iommu stable under our potential fwspec */
mutex_lock(&iommu_probe_device_lock);
if (dev_iommu_fwspec_get(dev)) {
mutex_unlock(&iommu_probe_device_lock);
return 0;
}
dev_iommu_present = dev->iommu;
/*
* We don't currently walk up the tree looking for a parent IOMMU.
* See the `Notes:' section of
* Documentation/devicetree/bindings/iommu/iommu.txt
*/
if (dev_is_pci(dev)) {
struct of_pci_iommu_alias_info info = {
.dev = dev,
.np = master_np,
};
pci_request_acs();
err = pci_for_each_dma_alias(to_pci_dev(dev),
of_pci_iommu_init, &info);
of_pci_check_device_ats(dev, master_np);
} else {
err = of_iommu_configure_device(master_np, dev, id);
}
if (err && dev_iommu_present)
iommu_fwspec_free(dev);
else if (err && dev->iommu)
dev_iommu_free(dev);
mutex_unlock(&iommu_probe_device_lock);
/*
* If we're not on the iommu_probe_device() path (as indicated by the
* initial dev->iommu) then try to simulate it. This should no longer
* happen unless of_dma_configure() is being misused outside bus code.
*/
if (!err && dev->bus && !dev_iommu_present)
err = iommu_probe_device(dev);
if (err && err != -EPROBE_DEFER)
dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
return err;
}
static enum iommu_resv_type __maybe_unused
iommu_resv_region_get_type(struct device *dev,
struct resource *phys,
phys_addr_t start, size_t length)
{
phys_addr_t end = start + length - 1;
/*
* IOMMU regions without an associated physical region cannot be
* mapped and are simply reservations.
*/
if (phys->start >= phys->end)
return IOMMU_RESV_RESERVED;
/* may be IOMMU_RESV_DIRECT_RELAXABLE for certain cases */
if (start == phys->start && end == phys->end)
return IOMMU_RESV_DIRECT;
dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", phys,
&start, &end);
return IOMMU_RESV_RESERVED;
}
/**
* of_iommu_get_resv_regions - reserved region driver helper for device tree
* @dev: device for which to get reserved regions
* @list: reserved region list
*
* IOMMU drivers can use this to implement their .get_resv_regions() callback
* for memory regions attached to a device tree node. See the reserved-memory
* device tree bindings on how to use these:
*
* Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
*/
void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
{
#if IS_ENABLED(CONFIG_OF_ADDRESS)
struct of_phandle_iterator it;
int err;
of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
const __be32 *maps, *end;
struct resource phys;
int size;
memset(&phys, 0, sizeof(phys));
/*
* The "reg" property is optional and can be omitted by reserved-memory regions
* that represent reservations in the IOVA space, which are regions that should
* not be mapped.
*/
if (of_property_present(it.node, "reg")) {
err = of_address_to_resource(it.node, 0, &phys);
if (err < 0) {
dev_err(dev, "failed to parse memory region %pOF: %d\n",
it.node, err);
continue;
}
}
maps = of_get_property(it.node, "iommu-addresses", &size);
if (!maps)
continue;
end = maps + size / sizeof(__be32);
while (maps < end) {
struct device_node *np;
u32 phandle;
phandle = be32_to_cpup(maps++);
np = of_find_node_by_phandle(phandle);
if (np == dev->of_node) {
int prot = IOMMU_READ | IOMMU_WRITE;
struct iommu_resv_region *region;
enum iommu_resv_type type;
phys_addr_t iova;
size_t length;
if (of_dma_is_coherent(dev->of_node))
prot |= IOMMU_CACHE;
maps = of_translate_dma_region(np, maps, &iova, &length);
if (length == 0) {
dev_warn(dev, "Cannot reserve IOVA region of 0 size\n");
continue;
}
type = iommu_resv_region_get_type(dev, &phys, iova, length);
region = iommu_alloc_resv_region(iova, length, prot, type,
GFP_KERNEL);
if (region)
list_add_tail(&region->list, list);
}
}
}
#endif
}
EXPORT_SYMBOL(of_iommu_get_resv_regions);