mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Pass the phys_addr_t down through the call chain from the top instead of passing a pgd_t * KVA. This moves the __pa() into domain_setup_first_level() which is the first function to obtain the pgd from the IOMMU page table in this call chain. The SVA flow is also adjusted to get the pa of the mm->pgd. iommput will move the __pa() into iommupt code, it never shares the KVA of the page table with the driver. Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/1-v3-dbbe6f7e7ae3+124ffe-vtd_prep_jgg@nvidia.com Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20250714045028.958850-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon <will@kernel.org>
1156 lines
29 KiB
C
1156 lines
29 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* intel-pasid.c - PASID idr, table and entry manipulation
|
|
*
|
|
* Copyright (C) 2018 Intel Corporation
|
|
*
|
|
* Author: Lu Baolu <baolu.lu@linux.intel.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "DMAR: " fmt
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/cpufeature.h>
|
|
#include <linux/dmar.h>
|
|
#include <linux/iommu.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/pci-ats.h>
|
|
#include <linux/spinlock.h>
|
|
|
|
#include "iommu.h"
|
|
#include "pasid.h"
|
|
#include "../iommu-pages.h"
|
|
|
|
/*
|
|
* Intel IOMMU system wide PASID name space:
|
|
*/
|
|
u32 intel_pasid_max_id = PASID_MAX;
|
|
|
|
/*
|
|
* Per device pasid table management:
|
|
*/
|
|
|
|
/*
|
|
* Allocate a pasid table for @dev. It should be called in a
|
|
* single-thread context.
|
|
*/
|
|
int intel_pasid_alloc_table(struct device *dev)
|
|
{
|
|
struct device_domain_info *info;
|
|
struct pasid_table *pasid_table;
|
|
struct pasid_dir_entry *dir;
|
|
u32 max_pasid = 0;
|
|
int order, size;
|
|
|
|
might_sleep();
|
|
info = dev_iommu_priv_get(dev);
|
|
if (WARN_ON(!info || !dev_is_pci(dev)))
|
|
return -ENODEV;
|
|
if (WARN_ON(info->pasid_table))
|
|
return -EEXIST;
|
|
|
|
pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
|
|
if (!pasid_table)
|
|
return -ENOMEM;
|
|
|
|
if (info->pasid_supported)
|
|
max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
|
|
intel_pasid_max_id);
|
|
|
|
size = max_pasid >> (PASID_PDE_SHIFT - 3);
|
|
order = size ? get_order(size) : 0;
|
|
dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
|
|
1 << (order + PAGE_SHIFT));
|
|
if (!dir) {
|
|
kfree(pasid_table);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
pasid_table->table = dir;
|
|
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
|
|
info->pasid_table = pasid_table;
|
|
|
|
if (!ecap_coherent(info->iommu->ecap))
|
|
clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void intel_pasid_free_table(struct device *dev)
|
|
{
|
|
struct device_domain_info *info;
|
|
struct pasid_table *pasid_table;
|
|
struct pasid_dir_entry *dir;
|
|
struct pasid_entry *table;
|
|
int i, max_pde;
|
|
|
|
info = dev_iommu_priv_get(dev);
|
|
if (!info || !dev_is_pci(dev) || !info->pasid_table)
|
|
return;
|
|
|
|
pasid_table = info->pasid_table;
|
|
info->pasid_table = NULL;
|
|
|
|
/* Free scalable mode PASID directory tables: */
|
|
dir = pasid_table->table;
|
|
max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
|
|
for (i = 0; i < max_pde; i++) {
|
|
table = get_pasid_table_from_pde(&dir[i]);
|
|
iommu_free_pages(table);
|
|
}
|
|
|
|
iommu_free_pages(pasid_table->table);
|
|
kfree(pasid_table);
|
|
}
|
|
|
|
struct pasid_table *intel_pasid_get_table(struct device *dev)
|
|
{
|
|
struct device_domain_info *info;
|
|
|
|
info = dev_iommu_priv_get(dev);
|
|
if (!info)
|
|
return NULL;
|
|
|
|
return info->pasid_table;
|
|
}
|
|
|
|
static int intel_pasid_get_dev_max_id(struct device *dev)
|
|
{
|
|
struct device_domain_info *info;
|
|
|
|
info = dev_iommu_priv_get(dev);
|
|
if (!info || !info->pasid_table)
|
|
return 0;
|
|
|
|
return info->pasid_table->max_pasid;
|
|
}
|
|
|
|
static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
|
|
{
|
|
struct device_domain_info *info;
|
|
struct pasid_table *pasid_table;
|
|
struct pasid_dir_entry *dir;
|
|
struct pasid_entry *entries;
|
|
int dir_index, index;
|
|
|
|
pasid_table = intel_pasid_get_table(dev);
|
|
if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
|
|
return NULL;
|
|
|
|
dir = pasid_table->table;
|
|
info = dev_iommu_priv_get(dev);
|
|
dir_index = pasid >> PASID_PDE_SHIFT;
|
|
index = pasid & PASID_PTE_MASK;
|
|
|
|
retry:
|
|
entries = get_pasid_table_from_pde(&dir[dir_index]);
|
|
if (!entries) {
|
|
u64 tmp;
|
|
|
|
entries = iommu_alloc_pages_node_sz(info->iommu->node,
|
|
GFP_ATOMIC, SZ_4K);
|
|
if (!entries)
|
|
return NULL;
|
|
|
|
/*
|
|
* The pasid directory table entry won't be freed after
|
|
* allocation. No worry about the race with free and
|
|
* clear. However, this entry might be populated by others
|
|
* while we are preparing it. Use theirs with a retry.
|
|
*/
|
|
tmp = 0ULL;
|
|
if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
|
|
(u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
|
|
iommu_free_pages(entries);
|
|
goto retry;
|
|
}
|
|
if (!ecap_coherent(info->iommu->ecap)) {
|
|
clflush_cache_range(entries, VTD_PAGE_SIZE);
|
|
clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
|
|
}
|
|
}
|
|
|
|
return &entries[index];
|
|
}
|
|
|
|
/*
|
|
* Interfaces for PASID table entry manipulation:
|
|
*/
|
|
static void
|
|
intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
|
|
{
|
|
struct pasid_entry *pe;
|
|
|
|
pe = intel_pasid_get_entry(dev, pasid);
|
|
if (WARN_ON(!pe))
|
|
return;
|
|
|
|
if (fault_ignore && pasid_pte_is_present(pe))
|
|
pasid_clear_entry_with_fpd(pe);
|
|
else
|
|
pasid_clear_entry(pe);
|
|
}
|
|
|
|
static void
|
|
pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
|
|
u16 did, u32 pasid)
|
|
{
|
|
struct qi_desc desc;
|
|
|
|
desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
|
|
QI_PC_PASID(pasid) | QI_PC_TYPE;
|
|
desc.qw1 = 0;
|
|
desc.qw2 = 0;
|
|
desc.qw3 = 0;
|
|
|
|
qi_submit_sync(iommu, &desc, 1, 0);
|
|
}
|
|
|
|
static void
|
|
devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
|
|
struct device *dev, u32 pasid)
|
|
{
|
|
struct device_domain_info *info;
|
|
u16 sid, qdep, pfsid;
|
|
|
|
info = dev_iommu_priv_get(dev);
|
|
if (!info || !info->ats_enabled)
|
|
return;
|
|
|
|
if (pci_dev_is_disconnected(to_pci_dev(dev)))
|
|
return;
|
|
|
|
sid = PCI_DEVID(info->bus, info->devfn);
|
|
qdep = info->ats_qdep;
|
|
pfsid = info->pfsid;
|
|
|
|
/*
|
|
* When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
|
|
* devTLB flush w/o PASID should be used. For non-zero PASID under
|
|
* SVA usage, device could do DMA with multiple PASIDs. It is more
|
|
* efficient to flush devTLB specific to the PASID.
|
|
*/
|
|
if (pasid == IOMMU_NO_PASID)
|
|
qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
|
|
else
|
|
qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
|
|
}
|
|
|
|
void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
|
|
u32 pasid, bool fault_ignore)
|
|
{
|
|
struct pasid_entry *pte;
|
|
u16 did, pgtt;
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (WARN_ON(!pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return;
|
|
}
|
|
|
|
if (!pasid_pte_is_present(pte)) {
|
|
if (!pasid_pte_is_fault_disabled(pte)) {
|
|
WARN_ON(READ_ONCE(pte->val[0]) != 0);
|
|
spin_unlock(&iommu->lock);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* When a PASID is used for SVA by a device, it's possible
|
|
* that the pasid entry is non-present with the Fault
|
|
* Processing Disabled bit set. Clear the pasid entry and
|
|
* drain the PRQ for the PASID before return.
|
|
*/
|
|
pasid_clear_entry(pte);
|
|
spin_unlock(&iommu->lock);
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
|
|
return;
|
|
}
|
|
|
|
did = pasid_get_domain_id(pte);
|
|
pgtt = pasid_pte_get_pgtt(pte);
|
|
intel_pasid_clear_entry(dev, pasid, fault_ignore);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
if (!ecap_coherent(iommu->ecap))
|
|
clflush_cache_range(pte, sizeof(*pte));
|
|
|
|
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
|
|
|
|
if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
|
|
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
|
|
else
|
|
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
|
|
|
|
devtlb_invalidation_with_pasid(iommu, dev, pasid);
|
|
if (!fault_ignore)
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
}
|
|
|
|
/*
|
|
* This function flushes cache for a newly setup pasid table entry.
|
|
* Caller of it should not modify the in-use pasid table entries.
|
|
*/
|
|
static void pasid_flush_caches(struct intel_iommu *iommu,
|
|
struct pasid_entry *pte,
|
|
u32 pasid, u16 did)
|
|
{
|
|
if (!ecap_coherent(iommu->ecap))
|
|
clflush_cache_range(pte, sizeof(*pte));
|
|
|
|
if (cap_caching_mode(iommu->cap)) {
|
|
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
|
|
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
|
|
} else {
|
|
iommu_flush_write_buffer(iommu);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This function is supposed to be used after caller updates the fields
|
|
* except for the SSADE and P bit of a pasid table entry. It does the
|
|
* below:
|
|
* - Flush cacheline if needed
|
|
* - Flush the caches per Table 28 ”Guidance to Software for Invalidations“
|
|
* of VT-d spec 5.0.
|
|
*/
|
|
static void intel_pasid_flush_present(struct intel_iommu *iommu,
|
|
struct device *dev,
|
|
u32 pasid, u16 did,
|
|
struct pasid_entry *pte)
|
|
{
|
|
if (!ecap_coherent(iommu->ecap))
|
|
clflush_cache_range(pte, sizeof(*pte));
|
|
|
|
/*
|
|
* VT-d spec 5.0 table28 states guides for cache invalidation:
|
|
*
|
|
* - PASID-selective-within-Domain PASID-cache invalidation
|
|
* - PASID-selective PASID-based IOTLB invalidation
|
|
* - If (pasid is RID_PASID)
|
|
* - Global Device-TLB invalidation to affected functions
|
|
* Else
|
|
* - PASID-based Device-TLB invalidation (with S=1 and
|
|
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
|
|
*/
|
|
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
|
|
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
|
|
|
|
devtlb_invalidation_with_pasid(iommu, dev, pasid);
|
|
}
|
|
|
|
/*
|
|
* Set up the scalable mode pasid table entry for first only
|
|
* translation type.
|
|
*/
|
|
static void pasid_pte_config_first_level(struct intel_iommu *iommu,
|
|
struct pasid_entry *pte,
|
|
phys_addr_t fsptptr, u16 did,
|
|
int flags)
|
|
{
|
|
lockdep_assert_held(&iommu->lock);
|
|
|
|
pasid_clear_entry(pte);
|
|
|
|
/* Setup the first level page table pointer: */
|
|
pasid_set_flptr(pte, fsptptr);
|
|
|
|
if (flags & PASID_FLAG_FL5LP)
|
|
pasid_set_flpm(pte, 1);
|
|
|
|
if (flags & PASID_FLAG_PAGE_SNOOP)
|
|
pasid_set_pgsnp(pte);
|
|
|
|
pasid_set_domain_id(pte, did);
|
|
pasid_set_address_width(pte, iommu->agaw);
|
|
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
|
|
|
/* Setup Present and PASID Granular Transfer Type: */
|
|
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
|
|
pasid_set_present(pte);
|
|
}
|
|
|
|
int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
|
|
phys_addr_t fsptptr, u32 pasid, u16 did,
|
|
int flags)
|
|
{
|
|
struct pasid_entry *pte;
|
|
|
|
if (!ecap_flts(iommu->ecap)) {
|
|
pr_err("No first level translation support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
|
|
pr_err("No 5-level paging support for first-level on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EBUSY;
|
|
}
|
|
|
|
pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags);
|
|
|
|
spin_unlock(&iommu->lock);
|
|
|
|
pasid_flush_caches(iommu, pte, pasid, did);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
|
|
struct device *dev, phys_addr_t fsptptr,
|
|
u32 pasid, u16 did, u16 old_did,
|
|
int flags)
|
|
{
|
|
struct pasid_entry *pte, new_pte;
|
|
|
|
if (!ecap_flts(iommu->ecap)) {
|
|
pr_err("No first level translation support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
|
|
pr_err("No 5-level paging support for first-level on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags);
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (!pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
WARN_ON(old_did != pasid_get_domain_id(pte));
|
|
|
|
*pte = new_pte;
|
|
spin_unlock(&iommu->lock);
|
|
|
|
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set up the scalable mode pasid entry for second only translation type.
|
|
*/
|
|
static void pasid_pte_config_second_level(struct intel_iommu *iommu,
|
|
struct pasid_entry *pte,
|
|
u64 pgd_val, int agaw, u16 did,
|
|
bool dirty_tracking)
|
|
{
|
|
lockdep_assert_held(&iommu->lock);
|
|
|
|
pasid_clear_entry(pte);
|
|
pasid_set_domain_id(pte, did);
|
|
pasid_set_slptr(pte, pgd_val);
|
|
pasid_set_address_width(pte, agaw);
|
|
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
|
|
pasid_set_fault_enable(pte);
|
|
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
|
if (dirty_tracking)
|
|
pasid_set_ssade(pte);
|
|
|
|
pasid_set_present(pte);
|
|
}
|
|
|
|
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
|
|
struct dmar_domain *domain,
|
|
struct device *dev, u32 pasid)
|
|
{
|
|
struct pasid_entry *pte;
|
|
struct dma_pte *pgd;
|
|
u64 pgd_val;
|
|
u16 did;
|
|
|
|
/*
|
|
* If hardware advertises no support for second level
|
|
* translation, return directly.
|
|
*/
|
|
if (!ecap_slts(iommu->ecap)) {
|
|
pr_err("No second level translation support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pgd = domain->pgd;
|
|
pgd_val = virt_to_phys(pgd);
|
|
did = domain_id_iommu(domain, iommu);
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EBUSY;
|
|
}
|
|
|
|
pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
|
|
did, domain->dirty_tracking);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
pasid_flush_caches(iommu, pte, pasid, did);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
|
|
struct dmar_domain *domain,
|
|
struct device *dev, u16 old_did,
|
|
u32 pasid)
|
|
{
|
|
struct pasid_entry *pte, new_pte;
|
|
struct dma_pte *pgd;
|
|
u64 pgd_val;
|
|
u16 did;
|
|
|
|
/*
|
|
* If hardware advertises no support for second level
|
|
* translation, return directly.
|
|
*/
|
|
if (!ecap_slts(iommu->ecap)) {
|
|
pr_err("No second level translation support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pgd = domain->pgd;
|
|
pgd_val = virt_to_phys(pgd);
|
|
did = domain_id_iommu(domain, iommu);
|
|
|
|
pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
|
|
domain->agaw, did,
|
|
domain->dirty_tracking);
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (!pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
WARN_ON(old_did != pasid_get_domain_id(pte));
|
|
|
|
*pte = new_pte;
|
|
spin_unlock(&iommu->lock);
|
|
|
|
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set up dirty tracking on a second only or nested translation type.
|
|
*/
|
|
int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
|
|
struct device *dev, u32 pasid,
|
|
bool enabled)
|
|
{
|
|
struct pasid_entry *pte;
|
|
u16 did, pgtt;
|
|
|
|
spin_lock(&iommu->lock);
|
|
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
dev_err_ratelimited(
|
|
dev, "Failed to get pasid entry of PASID %d\n", pasid);
|
|
return -ENODEV;
|
|
}
|
|
|
|
did = pasid_get_domain_id(pte);
|
|
pgtt = pasid_pte_get_pgtt(pte);
|
|
if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
|
|
pgtt != PASID_ENTRY_PGTT_NESTED) {
|
|
spin_unlock(&iommu->lock);
|
|
dev_err_ratelimited(
|
|
dev,
|
|
"Dirty tracking not supported on translation type %d\n",
|
|
pgtt);
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (pasid_get_ssade(pte) == enabled) {
|
|
spin_unlock(&iommu->lock);
|
|
return 0;
|
|
}
|
|
|
|
if (enabled)
|
|
pasid_set_ssade(pte);
|
|
else
|
|
pasid_clear_ssade(pte);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
if (!ecap_coherent(iommu->ecap))
|
|
clflush_cache_range(pte, sizeof(*pte));
|
|
|
|
/*
|
|
* From VT-d spec table 25 "Guidance to Software for Invalidations":
|
|
*
|
|
* - PASID-selective-within-Domain PASID-cache invalidation
|
|
* If (PGTT=SS or Nested)
|
|
* - Domain-selective IOTLB invalidation
|
|
* Else
|
|
* - PASID-selective PASID-based IOTLB invalidation
|
|
* - If (pasid is RID_PASID)
|
|
* - Global Device-TLB invalidation to affected functions
|
|
* Else
|
|
* - PASID-based Device-TLB invalidation (with S=1 and
|
|
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
|
|
*/
|
|
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
|
|
|
|
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
|
|
|
|
devtlb_invalidation_with_pasid(iommu, dev, pasid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set up the scalable mode pasid entry for passthrough translation type.
|
|
*/
|
|
static void pasid_pte_config_pass_through(struct intel_iommu *iommu,
|
|
struct pasid_entry *pte, u16 did)
|
|
{
|
|
lockdep_assert_held(&iommu->lock);
|
|
|
|
pasid_clear_entry(pte);
|
|
pasid_set_domain_id(pte, did);
|
|
pasid_set_address_width(pte, iommu->agaw);
|
|
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
|
|
pasid_set_fault_enable(pte);
|
|
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
|
pasid_set_present(pte);
|
|
}
|
|
|
|
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
|
|
struct device *dev, u32 pasid)
|
|
{
|
|
u16 did = FLPT_DEFAULT_DID;
|
|
struct pasid_entry *pte;
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EBUSY;
|
|
}
|
|
|
|
pasid_pte_config_pass_through(iommu, pte, did);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
pasid_flush_caches(iommu, pte, pasid, did);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
|
|
struct device *dev, u16 old_did,
|
|
u32 pasid)
|
|
{
|
|
struct pasid_entry *pte, new_pte;
|
|
u16 did = FLPT_DEFAULT_DID;
|
|
|
|
pasid_pte_config_pass_through(iommu, &new_pte, did);
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (!pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
WARN_ON(old_did != pasid_get_domain_id(pte));
|
|
|
|
*pte = new_pte;
|
|
spin_unlock(&iommu->lock);
|
|
|
|
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set the page snoop control for a pasid entry which has been set up.
|
|
*/
|
|
void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
|
|
struct device *dev, u32 pasid)
|
|
{
|
|
struct pasid_entry *pte;
|
|
u16 did;
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
|
|
spin_unlock(&iommu->lock);
|
|
return;
|
|
}
|
|
|
|
pasid_set_pgsnp(pte);
|
|
did = pasid_get_domain_id(pte);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
intel_pasid_flush_present(iommu, dev, pasid, did, pte);
|
|
}
|
|
|
|
static void pasid_pte_config_nestd(struct intel_iommu *iommu,
|
|
struct pasid_entry *pte,
|
|
struct iommu_hwpt_vtd_s1 *s1_cfg,
|
|
struct dmar_domain *s2_domain,
|
|
u16 did)
|
|
{
|
|
struct dma_pte *pgd = s2_domain->pgd;
|
|
|
|
lockdep_assert_held(&iommu->lock);
|
|
|
|
pasid_clear_entry(pte);
|
|
|
|
if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
|
|
pasid_set_flpm(pte, 1);
|
|
|
|
pasid_set_flptr(pte, s1_cfg->pgtbl_addr);
|
|
|
|
if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
|
|
pasid_set_sre(pte);
|
|
if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
|
|
pasid_set_wpe(pte);
|
|
}
|
|
|
|
if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
|
|
pasid_set_eafe(pte);
|
|
|
|
if (s2_domain->force_snooping)
|
|
pasid_set_pgsnp(pte);
|
|
|
|
pasid_set_slptr(pte, virt_to_phys(pgd));
|
|
pasid_set_fault_enable(pte);
|
|
pasid_set_domain_id(pte, did);
|
|
pasid_set_address_width(pte, s2_domain->agaw);
|
|
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
|
|
if (s2_domain->dirty_tracking)
|
|
pasid_set_ssade(pte);
|
|
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
|
|
pasid_set_present(pte);
|
|
}
|
|
|
|
/**
|
|
* intel_pasid_setup_nested() - Set up PASID entry for nested translation.
|
|
* @iommu: IOMMU which the device belong to
|
|
* @dev: Device to be set up for translation
|
|
* @pasid: PASID to be programmed in the device PASID table
|
|
* @domain: User stage-1 domain nested on a stage-2 domain
|
|
*
|
|
* This is used for nested translation. The input domain should be
|
|
* nested type and nested on a parent with 'is_nested_parent' flag
|
|
* set.
|
|
*/
|
|
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
|
|
u32 pasid, struct dmar_domain *domain)
|
|
{
|
|
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
|
|
struct dmar_domain *s2_domain = domain->s2_domain;
|
|
u16 did = domain_id_iommu(domain, iommu);
|
|
struct pasid_entry *pte;
|
|
|
|
/* Address width should match the address width supported by hardware */
|
|
switch (s1_cfg->addr_width) {
|
|
case ADDR_WIDTH_4LEVEL:
|
|
break;
|
|
case ADDR_WIDTH_5LEVEL:
|
|
if (!cap_fl5lp_support(iommu->cap)) {
|
|
dev_err_ratelimited(dev,
|
|
"5-level paging not supported\n");
|
|
return -EINVAL;
|
|
}
|
|
break;
|
|
default:
|
|
dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
|
|
s1_cfg->addr_width);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
|
|
pr_err_ratelimited("No supervisor request support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
|
|
pr_err_ratelimited("No extended access flag support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
if (pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EBUSY;
|
|
}
|
|
|
|
pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
pasid_flush_caches(iommu, pte, pasid, did);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int intel_pasid_replace_nested(struct intel_iommu *iommu,
|
|
struct device *dev, u32 pasid,
|
|
u16 old_did, struct dmar_domain *domain)
|
|
{
|
|
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
|
|
struct dmar_domain *s2_domain = domain->s2_domain;
|
|
u16 did = domain_id_iommu(domain, iommu);
|
|
struct pasid_entry *pte, new_pte;
|
|
|
|
/* Address width should match the address width supported by hardware */
|
|
switch (s1_cfg->addr_width) {
|
|
case ADDR_WIDTH_4LEVEL:
|
|
break;
|
|
case ADDR_WIDTH_5LEVEL:
|
|
if (!cap_fl5lp_support(iommu->cap)) {
|
|
dev_err_ratelimited(dev,
|
|
"5-level paging not supported\n");
|
|
return -EINVAL;
|
|
}
|
|
break;
|
|
default:
|
|
dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
|
|
s1_cfg->addr_width);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
|
|
pr_err_ratelimited("No supervisor request support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
|
|
pr_err_ratelimited("No extended access flag support on %s\n",
|
|
iommu->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
|
|
|
|
spin_lock(&iommu->lock);
|
|
pte = intel_pasid_get_entry(dev, pasid);
|
|
if (!pte) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (!pasid_pte_is_present(pte)) {
|
|
spin_unlock(&iommu->lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
WARN_ON(old_did != pasid_get_domain_id(pte));
|
|
|
|
*pte = new_pte;
|
|
spin_unlock(&iommu->lock);
|
|
|
|
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
|
intel_iommu_drain_pasid_prq(dev, pasid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Interfaces to setup or teardown a pasid table to the scalable-mode
|
|
* context table entry:
|
|
*/
|
|
|
|
static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct intel_iommu *iommu = info->iommu;
|
|
struct context_entry *context;
|
|
u16 did;
|
|
|
|
spin_lock(&iommu->lock);
|
|
context = iommu_context_addr(iommu, bus, devfn, false);
|
|
if (!context) {
|
|
spin_unlock(&iommu->lock);
|
|
return;
|
|
}
|
|
|
|
did = context_domain_id(context);
|
|
context_clear_entry(context);
|
|
__iommu_flush_cache(iommu, context, sizeof(*context));
|
|
spin_unlock(&iommu->lock);
|
|
intel_context_flush_no_pasid(info, context, did);
|
|
}
|
|
|
|
static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
|
|
{
|
|
struct device *dev = data;
|
|
|
|
if (dev == &pdev->dev)
|
|
device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void intel_pasid_teardown_sm_context(struct device *dev)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
|
|
if (!dev_is_pci(dev)) {
|
|
device_pasid_table_teardown(dev, info->bus, info->devfn);
|
|
return;
|
|
}
|
|
|
|
pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
|
|
}
|
|
|
|
/*
|
|
* Get the PASID directory size for scalable mode context entry.
|
|
* Value of X in the PDTS field of a scalable mode context entry
|
|
* indicates PASID directory with 2^(X + 7) entries.
|
|
*/
|
|
static unsigned long context_get_sm_pds(struct pasid_table *table)
|
|
{
|
|
unsigned long pds, max_pde;
|
|
|
|
max_pde = table->max_pasid >> PASID_PDE_SHIFT;
|
|
pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
|
|
if (pds < 7)
|
|
return 0;
|
|
|
|
return pds - 7;
|
|
}
|
|
|
|
static int context_entry_set_pasid_table(struct context_entry *context,
|
|
struct device *dev)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct pasid_table *table = info->pasid_table;
|
|
struct intel_iommu *iommu = info->iommu;
|
|
unsigned long pds;
|
|
|
|
context_clear_entry(context);
|
|
|
|
pds = context_get_sm_pds(table);
|
|
context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
|
|
context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
|
|
|
|
if (info->ats_supported)
|
|
context_set_sm_dte(context);
|
|
if (info->pasid_supported)
|
|
context_set_pasid(context);
|
|
if (info->pri_supported)
|
|
context_set_sm_pre(context);
|
|
|
|
context_set_fault_enable(context);
|
|
context_set_present(context);
|
|
__iommu_flush_cache(iommu, context, sizeof(*context));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
struct intel_iommu *iommu = info->iommu;
|
|
struct context_entry *context;
|
|
|
|
spin_lock(&iommu->lock);
|
|
context = iommu_context_addr(iommu, bus, devfn, true);
|
|
if (!context) {
|
|
spin_unlock(&iommu->lock);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (context_present(context) && !context_copied(iommu, bus, devfn)) {
|
|
spin_unlock(&iommu->lock);
|
|
return 0;
|
|
}
|
|
|
|
if (context_copied(iommu, bus, devfn)) {
|
|
context_clear_entry(context);
|
|
__iommu_flush_cache(iommu, context, sizeof(*context));
|
|
|
|
/*
|
|
* For kdump cases, old valid entries may be cached due to
|
|
* the in-flight DMA and copied pgtable, but there is no
|
|
* unmapping behaviour for them, thus we need explicit cache
|
|
* flushes for all affected domain IDs and PASIDs used in
|
|
* the copied PASID table. Given that we have no idea about
|
|
* which domain IDs and PASIDs were used in the copied tables,
|
|
* upgrade them to global PASID and IOTLB cache invalidation.
|
|
*/
|
|
iommu->flush.flush_context(iommu, 0,
|
|
PCI_DEVID(bus, devfn),
|
|
DMA_CCMD_MASK_NOBIT,
|
|
DMA_CCMD_DEVICE_INVL);
|
|
qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
|
|
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
|
|
devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
|
|
|
|
/*
|
|
* At this point, the device is supposed to finish reset at
|
|
* its driver probe stage, so no in-flight DMA will exist,
|
|
* and we don't need to worry anymore hereafter.
|
|
*/
|
|
clear_context_copied(iommu, bus, devfn);
|
|
}
|
|
|
|
context_entry_set_pasid_table(context, dev);
|
|
spin_unlock(&iommu->lock);
|
|
|
|
/*
|
|
* It's a non-present to present mapping. If hardware doesn't cache
|
|
* non-present entry we don't need to flush the caches. If it does
|
|
* cache non-present entries, then it does so in the special
|
|
* domain #0, which we have to flush:
|
|
*/
|
|
if (cap_caching_mode(iommu->cap)) {
|
|
iommu->flush.flush_context(iommu, 0,
|
|
PCI_DEVID(bus, devfn),
|
|
DMA_CCMD_MASK_NOBIT,
|
|
DMA_CCMD_DEVICE_INVL);
|
|
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
|
|
{
|
|
struct device *dev = data;
|
|
|
|
if (dev != &pdev->dev)
|
|
return 0;
|
|
|
|
return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
|
|
}
|
|
|
|
/*
|
|
* Set the device's PASID table to its context table entry.
|
|
*
|
|
* The PASID table is set to the context entries of both device itself
|
|
* and its alias requester ID for DMA.
|
|
*/
|
|
int intel_pasid_setup_sm_context(struct device *dev)
|
|
{
|
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
|
|
|
if (!dev_is_pci(dev))
|
|
return device_pasid_table_setup(dev, info->bus, info->devfn);
|
|
|
|
return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
|
|
}
|
|
|
|
/*
|
|
* Global Device-TLB invalidation following changes in a context entry which
|
|
* was present.
|
|
*/
|
|
static void __context_flush_dev_iotlb(struct device_domain_info *info)
|
|
{
|
|
if (!info->ats_enabled)
|
|
return;
|
|
|
|
qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
|
|
info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
|
|
|
|
/*
|
|
* There is no guarantee that the device DMA is stopped when it reaches
|
|
* here. Therefore, always attempt the extra device TLB invalidation
|
|
* quirk. The impact on performance is acceptable since this is not a
|
|
* performance-critical path.
|
|
*/
|
|
quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
|
|
info->ats_qdep);
|
|
}
|
|
|
|
/*
|
|
* Cache invalidations after change in a context table entry that was present
|
|
* according to the Spec 6.5.3.3 (Guidance to Software for Invalidations).
|
|
* This helper can only be used when IOMMU is working in the legacy mode or
|
|
* IOMMU is in scalable mode but all PASID table entries of the device are
|
|
* non-present.
|
|
*/
|
|
void intel_context_flush_no_pasid(struct device_domain_info *info,
|
|
struct context_entry *context, u16 did)
|
|
{
|
|
struct intel_iommu *iommu = info->iommu;
|
|
|
|
/*
|
|
* Device-selective context-cache invalidation. The Domain-ID field
|
|
* of the Context-cache Invalidate Descriptor is ignored by hardware
|
|
* when operating in scalable mode. Therefore the @did value doesn't
|
|
* matter in scalable mode.
|
|
*/
|
|
iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
|
|
DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
|
|
|
|
/*
|
|
* For legacy mode:
|
|
* - Domain-selective IOTLB invalidation
|
|
* - Global Device-TLB invalidation to all affected functions
|
|
*/
|
|
if (!sm_supported(iommu)) {
|
|
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
|
|
__context_flush_dev_iotlb(info);
|
|
|
|
return;
|
|
}
|
|
|
|
__context_flush_dev_iotlb(info);
|
|
}
|