summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
commitdc50eab76b709d68175a358d6e23a5a3890764d3 (patch)
treec754d0390db060af0213ff994f0ac310e4cfd6e9 /drivers/iommu/intel
parentAdding debian version 6.6.15-2. (diff)
downloadlinux-dc50eab76b709d68175a358d6e23a5a3890764d3.tar.xz
linux-dc50eab76b709d68175a358d6e23a5a3890764d3.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--drivers/iommu/intel/Kconfig3
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/debugfs.c215
-rw-r--r--drivers/iommu/intel/iommu.c238
-rw-r--r--drivers/iommu/intel/iommu.h85
-rw-r--r--drivers/iommu/intel/nested.c129
-rw-r--r--drivers/iommu/intel/pasid.c222
-rw-r--r--drivers/iommu/intel/pasid.h5
8 files changed, 831 insertions, 68 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f5..012cd2541a 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -11,10 +11,11 @@ config DMAR_DEBUG
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
- depends on PCI_MSI && ACPI && (X86 || IA64)
+ depends on PCI_MSI && ACPI && X86
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
+ select IOMMUFD_DRIVER if IOMMUFD
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 7af3b8a4f2..5dabf081a7 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f92528510..dee61e513b 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = {
IOMMU_REGSET_ENTRY(VCRSP),
};
+static struct dentry *intel_iommu_debug;
+
static int iommu_regset_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
@@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level)
static inline void
dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
{
- seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
- iova >> VTD_PAGE_SHIFT, path[5], path[4],
- path[3], path[2], path[1]);
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]);
+ if (path[2]) {
+ seq_printf(m, "\t0x%016llx", path[2]);
+ if (path[1])
+ seq_printf(m, "\t0x%016llx", path[1]);
+ }
+ seq_putc(m, '\n');
}
static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
@@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int __show_device_domain_translation(struct device *dev, void *data)
+static int domain_translation_struct_show(struct seq_file *m,
+ struct device_domain_info *info,
+ ioasid_t pasid)
{
- struct dmar_domain *domain;
- struct seq_file *m = data;
- u64 path[6] = { 0 };
-
- domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
- if (!domain)
- return 0;
+ bool scalable, found = false;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u16 devfn, bus, seg;
- seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
- (u64)virt_to_phys(domain->pgd));
- seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+ bus = info->bus;
+ devfn = info->devfn;
+ seg = info->segment;
- pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
- seq_putc(m, '\n');
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct context_entry *context;
+ u64 pgd, path[6] = { 0 };
+ u32 sts, agaw;
- /* Don't iterate */
- return 1;
-}
+ if (seg != iommu->segment)
+ continue;
-static int show_device_domain_translation(struct device *dev, void *data)
-{
- struct iommu_group *group;
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+ scalable = true;
+ else
+ scalable = false;
- group = iommu_group_get(dev);
- if (group) {
/*
- * The group->mutex is held across the callback, which will
- * block calls to iommu_attach/detach_group/device. Hence,
+ * The iommu->lock is held across the callback, which will
+ * block calls to domain_attach/domain_detach. Hence,
* the domain of the device will not change during traversal.
*
- * All devices in an iommu group share a single domain, hence
- * we only dump the domain of the first device. Even though,
- * this code still possibly races with the iommu_unmap()
+ * Traversing page table possibly races with the iommu_unmap()
* interface. This could be solved by RCU-freeing the page
* table pages in the iommu_unmap() path.
*/
- iommu_group_for_each_dev(group, data,
- __show_device_domain_translation);
- iommu_group_put(group);
+ spin_lock(&iommu->lock);
+
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context || !context_present(context))
+ goto iommu_unlock;
+
+ if (scalable) { /* scalable mode */
+ struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+ struct pasid_dir_entry *dir_tbl, *dir_entry;
+ u16 dir_idx, tbl_idx, pgtt;
+ u64 pasid_dir_ptr;
+
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+
+ /* Dump specified device domain mappings with PASID. */
+ dir_idx = pasid >> PASID_PDE_SHIFT;
+ tbl_idx = pasid & PASID_PTE_MASK;
+
+ dir_tbl = phys_to_virt(pasid_dir_ptr);
+ dir_entry = &dir_tbl[dir_idx];
+
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (!pasid_tbl)
+ goto iommu_unlock;
+
+ pasid_tbl_entry = &pasid_tbl[tbl_idx];
+ if (!pasid_pte_is_present(pasid_tbl_entry))
+ goto iommu_unlock;
+
+ /*
+ * According to PASID Granular Translation Type(PGTT),
+ * get the page table pointer.
+ */
+ pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6;
+ agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2;
+
+ switch (pgtt) {
+ case PASID_ENTRY_PGTT_FL_ONLY:
+ pgd = pasid_tbl_entry->val[2];
+ break;
+ case PASID_ENTRY_PGTT_SL_ONLY:
+ case PASID_ENTRY_PGTT_NESTED:
+ pgd = pasid_tbl_entry->val[0];
+ break;
+ default:
+ goto iommu_unlock;
+ }
+ pgd &= VTD_PAGE_MASK;
+ } else { /* legacy mode */
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+ }
+
+ seq_printf(m, "Device %04x:%02x:%02x.%x ",
+ iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (scalable)
+ seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd);
+ else
+ seq_printf(m, "@0x%llx\n", pgd);
+
+ seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+ "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+ pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+
+ found = true;
+iommu_unlock:
+ spin_unlock(&iommu->lock);
+ if (found)
+ break;
}
+ rcu_read_unlock();
return 0;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int dev_domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct device_domain_info *info = (struct device_domain_info *)m->private;
+
+ return domain_translation_struct_show(m, info, IOMMU_NO_PASID);
+}
+DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct);
+
+static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused)
{
- return bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
+ struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private;
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+
+ return domain_translation_struct_show(m, info, dev_pasid->pasid);
}
-DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct);
static void invalidation_queue_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = {
void __init intel_iommu_debugfs_init(void)
{
- struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
- iommu_debugfs_dir);
+ intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir);
debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
- debugfs_create_file("domain_translation_struct", 0444,
- intel_iommu_debug, NULL,
- &domain_translation_struct_fops);
debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
@@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
NULL, &dmar_perf_latency_fops);
}
+
+/*
+ * Create a debugfs directory for each device, and then create a
+ * debugfs file in this directory for users to dump the page table
+ * of the default domain. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct
+ */
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info)
+{
+ info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug);
+
+ debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry,
+ info, &dev_domain_translation_struct_fops);
+}
+
+/* Remove the device debugfs directory. */
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info)
+{
+ debugfs_remove_recursive(info->debugfs_dentry);
+}
+
+/*
+ * Create a debugfs directory per pair of {device, pasid}, then create the
+ * corresponding debugfs file in this directory for users to dump its page
+ * table. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct
+ *
+ * The debugfs only dumps the page tables whose mappings are created and
+ * destroyed by the iommu_map/unmap() interfaces. Check the mapping type
+ * of the domain before creating debugfs directory.
+ */
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+ char dir_name[10];
+
+ sprintf(dir_name, "%x", dev_pasid->pasid);
+ dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry);
+
+ debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry,
+ dev_pasid, &pasid_domain_translation_struct_fops);
+}
+
+/* Remove the device pasid debugfs directory. */
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ debugfs_remove_recursive(dev_pasid->debugfs_dentry);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 744e4e6b8d..a8366b1f4f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -282,7 +282,6 @@ static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-static void device_block_translation(struct device *dev);
static void intel_iommu_domain_free(struct iommu_domain *domain);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
@@ -300,6 +299,7 @@ static int iommu_skip_te_disable;
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
+static const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@@ -540,8 +540,6 @@ static int domain_update_device_node(struct dmar_domain *domain)
return nid;
}
-static void domain_update_iotlb(struct dmar_domain *domain);
-
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@@ -560,7 +558,7 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
}
/* Some capabilities may be different across iommus */
-static void domain_update_iommu_cap(struct dmar_domain *domain)
+void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
@@ -1362,7 +1360,7 @@ domain_lookup_dev_info(struct dmar_domain *domain,
return NULL;
}
-static void domain_update_iotlb(struct dmar_domain *domain)
+void domain_update_iotlb(struct dmar_domain *domain)
{
struct dev_pasid_info *dev_pasid;
struct device_domain_info *info;
@@ -1778,8 +1776,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
return domain;
}
-static int domain_attach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
unsigned long ndomains;
@@ -1828,8 +1825,7 @@ err_unlock:
return ret;
}
-static void domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
@@ -2196,6 +2192,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
+ if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
+ pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
+ return -EINVAL;
+ }
+
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
@@ -3961,7 +3962,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
* all DMA requests without PASID from the device are blocked. If the page
* table has been set, clean up the data structures.
*/
-static void device_block_translation(struct device *dev)
+void device_block_translation(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
@@ -4016,9 +4017,9 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
}
static struct iommu_domain blocking_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
- .free = intel_iommu_domain_free
}
};
@@ -4028,8 +4029,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
struct iommu_domain *domain;
switch (type) {
- case IOMMU_DOMAIN_BLOCKED:
- return &blocking_domain;
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
@@ -4061,14 +4060,72 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
+static struct iommu_domain *
+intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *dmar_domain;
+ struct iommu_domain *domain;
+
+ /* Must be NESTING domain */
+ if (parent) {
+ if (!nested_supported(iommu) || flags)
+ return ERR_PTR(-EOPNOTSUPP);
+ return intel_nested_domain_alloc(parent, user_data);
+ }
+
+ if (flags &
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (nested_parent && !nested_supported(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data || (dirty_tracking && !ssads_supported(iommu)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /*
+ * domain_alloc_user op needs to fully initialize a domain before
+ * return, so uses iommu_domain_alloc() here for simple.
+ */
+ domain = iommu_domain_alloc(dev->bus);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ dmar_domain = to_dmar_domain(domain);
+
+ if (nested_parent) {
+ dmar_domain->nested_parent = true;
+ INIT_LIST_HEAD(&dmar_domain->s1_domains);
+ spin_lock_init(&dmar_domain->s1_lock);
+ }
+
+ if (dirty_tracking) {
+ if (dmar_domain->use_first_level) {
+ iommu_domain_free(domain);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ domain->dirty_ops = &intel_dirty_ops;
+ }
+
+ return domain;
+}
+
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
- if (domain != &si_domain->domain && domain != &blocking_domain)
- domain_exit(to_dmar_domain(domain));
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ WARN_ON(dmar_domain->nested_parent &&
+ !list_empty(&dmar_domain->s1_domains));
+ if (domain != &si_domain->domain)
+ domain_exit(dmar_domain);
}
-static int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
@@ -4081,6 +4138,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
+ if (domain->dirty_ops && !ssads_supported(iommu))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -4336,6 +4396,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return ecap_sc_support(info->iommu->ecap);
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return ssads_supported(info->iommu);
default:
return false;
}
@@ -4413,6 +4475,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
+ intel_iommu_debugfs_create_dev(info);
+
return &iommu->iommu;
}
@@ -4422,6 +4486,7 @@ static void intel_iommu_release_device(struct device *dev)
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
+ intel_iommu_debugfs_remove_dev(info);
dev_iommu_priv_set(dev, NULL);
kfree(info);
set_dma_ops(dev, NULL);
@@ -4666,8 +4731,8 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
-static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
@@ -4677,6 +4742,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
xa_for_each(&dmar_domain->iommu_array, i, info)
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
+ return 0;
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
@@ -4714,6 +4780,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_detach_iommu(dmar_domain, iommu);
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
@@ -4733,6 +4800,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
+ if (domain->dirty_ops)
+ return -EINVAL;
+
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
@@ -4766,6 +4836,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ if (domain->type & __IOMMU_DOMAIN_PAGING)
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
return 0;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
@@ -4784,6 +4857,7 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
if (!vtd)
return ERR_PTR(-ENOMEM);
+ vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
vtd->cap_reg = iommu->cap;
vtd->ecap_reg = iommu->ecap;
*length = sizeof(*vtd);
@@ -4791,10 +4865,136 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
return vtd;
}
+/*
+ * Set dirty tracking for the device list of a domain. The caller must
+ * hold the domain->lock when calling it.
+ */
+static int device_set_dirty_tracking(struct list_head *devices, bool enable)
+{
+ struct device_domain_info *info;
+ int ret = 0;
+
+ list_for_each_entry(info, devices, link) {
+ ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
+ IOMMU_NO_PASID, enable);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *s1_domain;
+ unsigned long flags;
+ int ret;
+
+ spin_lock(&domain->s1_lock);
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ ret = device_set_dirty_tracking(&s1_domain->devices, enable);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ if (ret)
+ goto err_unwind;
+ }
+ spin_unlock(&domain->s1_lock);
+ return 0;
+
+err_unwind:
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ device_set_dirty_tracking(&s1_domain->devices,
+ domain->dirty_tracking);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ }
+ spin_unlock(&domain->s1_lock);
+ return ret;
+}
+
+static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ int ret;
+
+ spin_lock(&dmar_domain->lock);
+ if (dmar_domain->dirty_tracking == enable)
+ goto out_unlock;
+
+ ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
+ if (ret)
+ goto err_unwind;
+
+ if (dmar_domain->nested_parent) {
+ ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
+ if (ret)
+ goto err_unwind;
+ }
+
+ dmar_domain->dirty_tracking = enable;
+out_unlock:
+ spin_unlock(&dmar_domain->lock);
+
+ return 0;
+
+err_unwind:
+ device_set_dirty_tracking(&dmar_domain->devices,
+ dmar_domain->dirty_tracking);
+ spin_unlock(&dmar_domain->lock);
+ return ret;
+}
+
+static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long end = iova + size - 1;
+ unsigned long pgsize;
+
+ /*
+ * IOMMUFD core calls into a dirty tracking disabled domain without an
+ * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+ * have occurred when we stopped dirty tracking. This ensures that we
+ * never inherit dirtied bits from a previous cycle.
+ */
+ if (!dmar_domain->dirty_tracking && dirty->bitmap)
+ return -EINVAL;
+
+ do {
+ struct dma_pte *pte;
+ int lvl = 0;
+
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
+ GFP_ATOMIC);
+ pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
+ if (!pte || !dma_pte_present(pte)) {
+ iova += pgsize;
+ continue;
+ }
+
+ if (dma_sl_pte_test_and_clear_dirty(pte, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
+static const struct iommu_dirty_ops intel_dirty_ops = {
+ .set_dirty_tracking = intel_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
+};
+
const struct iommu_ops intel_iommu_ops = {
+ .blocked_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
+ .domain_alloc_user = intel_iommu_domain_alloc_user,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e6a3e70656..efc00d2b45 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+#include <uapi/linux/iommufd.h>
/*
* VT-d hardware uses 4KiB page size regardless of host page size.
@@ -48,6 +49,9 @@
#define DMA_FL_PTE_DIRTY BIT_ULL(6)
#define DMA_FL_PTE_XD BIT_ULL(63)
+#define DMA_SL_PTE_DIRTY_BIT 9
+#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
+
#define ADDR_WIDTH_5LEVEL (57)
#define ADDR_WIDTH_4LEVEL (48)
@@ -539,6 +543,10 @@ enum {
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
#define pasid_supported(iommu) (sm_supported(iommu) && \
ecap_pasid((iommu)->ecap))
+#define ssads_supported(iommu) (sm_supported(iommu) && \
+ ecap_slads((iommu)->ecap))
+#define nested_supported(iommu) (sm_supported(iommu) && \
+ ecap_nest((iommu)->ecap))
struct pasid_entry;
struct pasid_state_entry;
@@ -592,6 +600,8 @@ struct dmar_domain {
* otherwise, goes through the second
* level.
*/
+ u8 dirty_tracking:1; /* Dirty tracking is enabled */
+ u8 nested_parent:1; /* Has other domains nested on it */
u8 has_mappings:1; /* Has mappings configured through
* iommu_map() interface.
*/
@@ -600,15 +610,44 @@ struct dmar_domain {
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
+ union {
+ /* DMA remapping domain */
+ struct {
+ /* virtual address */
+ struct dma_pte *pgd;
+ /* max guest address width */
+ int gaw;
+ /*
+ * adjusted guest address width:
+ * 0: level 2 30-bit
+ * 1: level 3 39-bit
+ * 2: level 4 48-bit
+ * 3: level 5 57-bit
+ */
+ int agaw;
+ /* maximum mapped address */
+ u64 max_addr;
+ /* Protect the s1_domains list */
+ spinlock_t s1_lock;
+ /* Track s1_domains nested on this domain */
+ struct list_head s1_domains;
+ };
+
+ /* Nested user domain */
+ struct {
+ /* parent page table which the user domain is nested on */
+ struct dmar_domain *s2_domain;
+ /* user page table pointer (in GPA) */
+ unsigned long s1_pgtbl;
+ /* page table attributes */
+ struct iommu_hwpt_vtd_s1 s1_cfg;
+ /* link to parent domain siblings */
+ struct list_head s2_link;
+ };
+ };
struct iommu_domain domain; /* generic domain data structure for
iommu core */
@@ -719,12 +758,18 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to device directory dentry */
+#endif
};
struct dev_pasid_info {
struct list_head link_domain; /* link to domain siblings */
struct device *dev;
ioasid_t pasid;
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */
+#endif
};
static inline void __iommu_flush_cache(
@@ -784,6 +829,16 @@ static inline bool dma_pte_present(struct dma_pte *pte)
return (pte->val & 3) != 0;
}
+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+ unsigned long flags)
+{
+ if (flags & IOMMU_DIRTY_NO_CLEAR)
+ return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+
+ return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+ (unsigned long *)&pte->val);
+}
+
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
return (pte->val & DMA_PTE_LARGE_PAGE);
@@ -839,12 +894,22 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
+void domain_update_iotlb(struct dmar_domain *domain);
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void device_block_translation(struct device *dev);
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+void domain_update_iommu_cap(struct dmar_domain *domain);
+
int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
+ const struct iommu_user_data *user_data);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
@@ -886,8 +951,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
void intel_iommu_debugfs_init(void);
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid);
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid);
#else
static inline void intel_iommu_debugfs_init(void) {}
+static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {}
+static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
extern const struct attribute_group *intel_iommu_groups[];
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
new file mode 100644
index 0000000000..92e82b33ea
--- /dev/null
+++ b/drivers/iommu/intel/nested.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * nested.c - nested mode translation support
+ *
+ * Copyright (C) 2023 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ * Jacob Pan <jacob.jun.pan@linux.intel.com>
+ * Yi Liu <yi.l.liu@intel.com>
+ */
+
+#define pr_fmt(fmt) "DMAR: " fmt
+
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+#include "iommu.h"
+#include "pasid.h"
+
+static int intel_nested_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
+ int ret = 0;
+
+ if (info->domain)
+ device_block_translation(dev);
+
+ if (iommu->agaw < dmar_domain->s2_domain->agaw) {
+ dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
+ return -ENODEV;
+ }
+
+ /*
+ * Stage-1 domain cannot work alone, it is nested on a s2_domain.
+ * The s2_domain will be used in nested translation, hence needs
+ * to ensure the s2_domain is compatible with this IOMMU.
+ */
+ ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
+ if (ret) {
+ dev_err_ratelimited(dev, "s2 domain is not compatible\n");
+ return ret;
+ }
+
+ ret = domain_attach_iommu(dmar_domain, iommu);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to attach domain to iommu\n");
+ return ret;
+ }
+
+ ret = intel_pasid_setup_nested(iommu, dev,
+ IOMMU_NO_PASID, dmar_domain);
+ if (ret) {
+ domain_detach_iommu(dmar_domain, iommu);
+ dev_err_ratelimited(dev, "Failed to setup pasid entry\n");
+ return ret;
+ }
+
+ info->domain = dmar_domain;
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&info->link, &dmar_domain->devices);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+
+ domain_update_iotlb(dmar_domain);
+
+ return 0;
+}
+
+static void intel_nested_domain_free(struct iommu_domain *domain)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct dmar_domain *s2_domain = dmar_domain->s2_domain;
+
+ spin_lock(&s2_domain->s1_lock);
+ list_del(&dmar_domain->s2_link);
+ spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain);
+}
+
+static const struct iommu_domain_ops intel_nested_domain_ops = {
+ .attach_dev = intel_nested_attach_dev,
+ .free = intel_nested_domain_free,
+};
+
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct dmar_domain *s2_domain = to_dmar_domain(parent);
+ struct iommu_hwpt_vtd_s1 vtd;
+ struct dmar_domain *domain;
+ int ret;
+
+ /* Must be nested domain */
+ if (user_data->type != IOMMU_HWPT_DATA_VTD_S1)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (parent->ops != intel_iommu_ops.default_domain_ops ||
+ !s2_domain->nested_parent)
+ return ERR_PTR(-EINVAL);
+
+ ret = iommu_copy_struct_from_user(&vtd, user_data,
+ IOMMU_HWPT_DATA_VTD_S1, __reserved);
+ if (ret)
+ return ERR_PTR(ret);
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ domain->use_first_level = true;
+ domain->s2_domain = s2_domain;
+ domain->s1_pgtbl = vtd.pgtbl_addr;
+ domain->s1_cfg = vtd;
+ domain->domain.ops = &intel_nested_domain_ops;
+ domain->domain.type = IOMMU_DOMAIN_NESTED;
+ INIT_LIST_HEAD(&domain->devices);
+ INIT_LIST_HEAD(&domain->dev_pasids);
+ spin_lock_init(&domain->lock);
+ xa_init(&domain->iommu_array);
+
+ spin_lock(&s2_domain->s1_lock);
+ list_add(&domain->s2_link, &s2_domain->s1_domains);
+ spin_unlock(&s2_domain->s1_lock);
+
+ return &domain->domain;
+}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 8f92b92f3d..6e102cbbde 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
WRITE_ONCE(*ptr, (old & ~mask) | bits);
}
+static inline u64 pasid_get_bits(u64 *ptr)
+{
+ return READ_ONCE(*ptr);
+}
+
/*
* Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
* PASID entry.
@@ -336,6 +341,45 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe)
}
/*
+ * Enable second level A/D bits by setting the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_ssade(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9);
+}
+
+/*
+ * Disable second level A/D bits by clearing the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_clear_ssade(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 9, 0);
+}
+
+/*
+ * Checks if second level A/D bits specifically the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry is set.
+ */
+static inline bool pasid_get_ssade(struct pasid_entry *pe)
+{
+ return pasid_get_bits(&pe->val[0]) & (1 << 9);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
* Setup the WPE(Write Protect Enable) field (Bit 132) of a
* scalable mode PASID entry.
*/
@@ -402,6 +446,15 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value)
pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
}
+/*
+ * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
+ * of a scalable mode PASID entry.
+ */
+static inline void pasid_set_eafe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
+}
+
static void
pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
u16 did, u32 pasid)
@@ -627,6 +680,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (domain->dirty_tracking)
+ pasid_set_ssade(pte);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
@@ -637,6 +692,77 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
/*
+ * Set up dirty tracking on a second only or nested translation type.
+ */
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ bool enabled)
+{
+ struct pasid_entry *pte;
+ u16 did, pgtt;
+
+ spin_lock(&iommu->lock);
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ dev_err_ratelimited(
+ dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ did = pasid_get_domain_id(pte);
+ pgtt = pasid_pte_get_pgtt(pte);
+ if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
+ pgtt != PASID_ENTRY_PGTT_NESTED) {
+ spin_unlock(&iommu->lock);
+ dev_err_ratelimited(
+ dev,
+ "Dirty tracking not supported on translation type %d\n",
+ pgtt);
+ return -EOPNOTSUPP;
+ }
+
+ if (pasid_get_ssade(pte) == enabled) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ if (enabled)
+ pasid_set_ssade(pte);
+ else
+ pasid_clear_ssade(pte);
+ spin_unlock(&iommu->lock);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ /*
+ * From VT-d spec table 25 "Guidance to Software for Invalidations":
+ *
+ * - PASID-selective-within-Domain PASID-cache invalidation
+ * If (PGTT=SS or Nested)
+ * - Domain-selective IOTLB invalidation
+ * Else
+ * - PASID-selective PASID-based IOTLB invalidation
+ * - If (pasid is RID_PASID)
+ * - Global Device-TLB invalidation to affected functions
+ * Else
+ * - PASID-based Device-TLB invalidation (with S=1 and
+ * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
+ */
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+ /* Device IOTLB doesn't need to be flushed in caching mode. */
+ if (!cap_caching_mode(iommu->cap))
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+
+ return 0;
+}
+
+/*
* Set up the scalable mode pasid entry for passthrough translation type.
*/
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
@@ -713,3 +839,99 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
if (!cap_caching_mode(iommu->cap))
devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
+ * @iommu: IOMMU which the device belong to
+ * @dev: Device to be set up for translation
+ * @pasid: PASID to be programmed in the device PASID table
+ * @domain: User stage-1 domain nested on a stage-2 domain
+ *
+ * This is used for nested translation. The input domain should be
+ * nested type and nested on a parent with 'is_nested_parent' flag
+ * set.
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, struct dmar_domain *domain)
+{
+ struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
+ pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
+ struct dmar_domain *s2_domain = domain->s2_domain;
+ u16 did = domain_id_iommu(domain, iommu);
+ struct dma_pte *pgd = s2_domain->pgd;
+ struct pasid_entry *pte;
+
+ /* Address width should match the address width supported by hardware */
+ switch (s1_cfg->addr_width) {
+ case ADDR_WIDTH_4LEVEL:
+ break;
+ case ADDR_WIDTH_5LEVEL:
+ if (!cap_fl5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
+ s1_cfg->addr_width);
+ return -EINVAL;
+ }
+
+ if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EBUSY;
+ }
+
+ pasid_clear_entry(pte);
+
+ if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
+ pasid_set_flpm(pte, 1);
+
+ pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
+ pasid_set_sre(pte);
+ if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
+ pasid_set_wpe(pte);
+ }
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
+ pasid_set_eafe(pte);
+
+ if (s2_domain->force_snooping)
+ pasid_set_pgsnp(pte);
+
+ pasid_set_slptr(pte, virt_to_phys(pgd));
+ pasid_set_fault_enable(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, s2_domain->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (s2_domain->dirty_tracking)
+ pasid_set_ssade(pte);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 4e9e68c3c3..3568adca1f 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -106,9 +106,14 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ bool enabled);
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, struct dmar_domain *domain);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
bool fault_ignore);