summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
commit9f0fc191371843c4fc000a226b0a26b6c059aacd (patch)
tree35f8be3ef04506ac891ad001e8c41e535ae8d01d /drivers/iommu
parentReleasing progress-linux version 6.6.15-2~progress7.99u1. (diff)
downloadlinux-9f0fc191371843c4fc000a226b0a26b6c059aacd.tar.xz
linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig19
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd/Kconfig10
-rw-r--r--drivers/iommu/amd/Makefile1
-rw-r--r--drivers/iommu/amd/amd_iommu.h35
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h64
-rw-r--r--drivers/iommu/amd/init.c117
-rw-r--r--drivers/iommu/amd/io_pgtable.c68
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c8
-rw-r--r--drivers/iommu/amd/iommu.c724
-rw-r--r--drivers/iommu/amd/iommu_v2.c996
-rw-r--r--drivers/iommu/apple-dart.c138
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c82
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c251
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h17
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c45
-rw-r--r--drivers/iommu/dma-iommu.c200
-rw-r--r--drivers/iommu/exynos-iommu.c83
-rw-r--r--drivers/iommu/fsl_pamu_domain.c41
-rw-r--r--drivers/iommu/intel/Kconfig3
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/debugfs.c215
-rw-r--r--drivers/iommu/intel/iommu.c238
-rw-r--r--drivers/iommu/intel/iommu.h85
-rw-r--r--drivers/iommu/intel/nested.c129
-rw-r--r--drivers/iommu/intel/pasid.c222
-rw-r--r--drivers/iommu/intel/pasid.h5
-rw-r--r--drivers/iommu/iommu-sva.c2
-rw-r--r--drivers/iommu/iommu.c498
-rw-r--r--drivers/iommu/iommufd/Makefile1
-rw-r--r--drivers/iommu/iommufd/device.c188
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c307
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c172
-rw-r--r--drivers/iommu/iommufd/ioas.c14
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h152
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h39
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c474
-rw-r--r--drivers/iommu/iommufd/main.c163
-rw-r--r--drivers/iommu/iommufd/selftest.c347
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c24
-rw-r--r--drivers/iommu/iova.c95
-rw-r--r--drivers/iommu/ipmmu-vmsa.c72
-rw-r--r--drivers/iommu/msm_iommu.c35
-rw-r--r--drivers/iommu/mtk_iommu.c35
-rw-r--r--drivers/iommu/mtk_iommu_v1.c28
-rw-r--r--drivers/iommu/omap-iommu.c69
-rw-r--r--drivers/iommu/omap-iommu.h2
-rw-r--r--drivers/iommu/rockchip-iommu.c59
-rw-r--r--drivers/iommu/s390-iommu.c424
-rw-r--r--drivers/iommu/sprd-iommu.c36
-rw-r--r--drivers/iommu/sun50i-iommu.c80
-rw-r--r--drivers/iommu/tegra-gart.c371
-rw-r--r--drivers/iommu/tegra-smmu.c58
-rw-r--r--drivers/iommu/virtio-iommu.c4
55 files changed, 4514 insertions, 3036 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 2b12b583ef..7673bb8294 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -7,6 +7,10 @@ config IOMMU_IOVA
config IOMMU_API
bool
+config IOMMUFD_DRIVER
+ bool
+ default n
+
menuconfig IOMMU_SUPPORT
bool "IOMMU Hardware Support"
depends on MMU
@@ -91,7 +95,7 @@ config IOMMU_DEBUGFS
choice
prompt "IOMMU default domain type"
depends on IOMMU_API
- default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64
+ default IOMMU_DEFAULT_DMA_LAZY if X86 || S390
default IOMMU_DEFAULT_DMA_STRICT
help
Choose the type of IOMMU domain used to manage DMA API usage by
@@ -146,7 +150,7 @@ config OF_IOMMU
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
- def_bool ARM64 || IA64 || X86
+ def_bool ARM64 || X86 || S390
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
@@ -236,17 +240,6 @@ config SUN50I_IOMMU
help
Support for the IOMMU introduced in the Allwinner H6 SoCs.
-config TEGRA_IOMMU_GART
- bool "Tegra GART IOMMU Support"
- depends on ARCH_TEGRA_2x_SOC
- depends on TEGRA_MC
- select IOMMU_API
- help
- Enables support for remapping discontiguous physical memory
- shared with the operating system into contiguous I/O virtual
- space through the GART (Graphics Address Relocation Table)
- hardware included on Tegra SoCs.
-
config TEGRA_IOMMU_SMMU
bool "NVIDIA Tegra SMMU Support"
depends on ARCH_TEGRA
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 769e43d780..95ad9dbfbd 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
-obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 9b5fc3356b..443b2c13c3 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_IO_PGTABLE
+ select IOMMUFD_DRIVER if IOMMUFD
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
@@ -22,15 +23,6 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
-config AMD_IOMMU_V2
- tristate "AMD IOMMU Version 2 driver"
- depends on AMD_IOMMU
- select MMU_NOTIFIER
- help
- This option enables support for the AMD IOMMUv2 features of the IOMMU
- hardware. Select this option if you want to use devices that support
- the PCI PRI and PASID interface.
-
config AMD_IOMMU_DEBUGFS
bool "Enable AMD IOMMU internals in DebugFS"
depends on AMD_IOMMU && IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 773d8aa002..f454fbb156 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
-obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index e2857109e9..86be1edd50 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
bool amd_iommu_v2_supported(void);
struct amd_iommu *get_amd_iommu(unsigned int idx);
u8 amd_iommu_pc_get_max_banks(unsigned int idx);
@@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+/* Device capabilities */
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
+
int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
@@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
+static inline bool check_feature(u64 mask)
+{
+ return (amd_iommu_efr & mask);
+}
+
+static inline bool check_feature2(u64 mask)
+{
+ return (amd_iommu_efr2 & mask);
+}
+
+static inline int check_feature_gpt_level(void)
+{
+ return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
+}
+
+static inline bool amd_iommu_gt_ppr_supported(void)
{
- return !!(iommu->features & mask);
+ return (check_feature(FEATURE_GT) &&
+ check_feature(FEATURE_PPR));
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
static inline
void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
{
- atomic64_set(&domain->iop.pt_root, root);
domain->iop.root = (u64 *)(root & PAGE_MASK);
domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
}
@@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
-extern u64 amd_iommu_efr;
-extern u64 amd_iommu_efr2;
-
extern bool amd_iommu_snp_en;
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 7dc30c2b56..90b7d7950a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -97,7 +97,9 @@
#define FEATURE_GATS_MASK (3ULL)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
#define FEATURE_GIOSUP BIT_ULL(48)
+#define FEATURE_HASUP BIT_ULL(49)
#define FEATURE_EPHSUP BIT_ULL(50)
+#define FEATURE_HDSUP BIT_ULL(52)
#define FEATURE_SNP BIT_ULL(63)
#define FEATURE_PASID_SHIFT 32
@@ -212,6 +214,7 @@
/* macros and definitions for device table entries */
#define DEV_ENTRY_VALID 0x00
#define DEV_ENTRY_TRANSLATION 0x01
+#define DEV_ENTRY_HAD 0x07
#define DEV_ENTRY_PPR 0x34
#define DEV_ENTRY_IR 0x3d
#define DEV_ENTRY_IW 0x3e
@@ -371,9 +374,15 @@
(1ULL << (12 + (9 * (level))))
/*
+ * The IOPTE dirty bit
+ */
+#define IOMMU_PTE_HD_BIT (6)
+
+/*
* Bit value definition for I/O PTE fields
*/
#define IOMMU_PTE_PR BIT_ULL(0)
+#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT)
#define IOMMU_PTE_U BIT_ULL(59)
#define IOMMU_PTE_FC BIT_ULL(60)
#define IOMMU_PTE_IR BIT_ULL(61)
@@ -384,6 +393,7 @@
*/
#define DTE_FLAG_V BIT_ULL(0)
#define DTE_FLAG_TV BIT_ULL(1)
+#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_GLX_SHIFT (56)
@@ -413,6 +423,7 @@
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
+#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
#define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK))
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
@@ -451,6 +462,10 @@
#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
+/* Timeout stuff */
+#define LOOP_TIMEOUT 100000
+#define MMIO_STATUS_TIMEOUT 2000000
+
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
@@ -505,19 +520,6 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
-/*
- * This struct is used to pass information about
- * incoming PPR faults around.
- */
-struct amd_iommu_fault {
- u64 address; /* IO virtual address of the fault*/
- u32 pasid; /* Address space identifier */
- u32 sbdf; /* Originating PCI device id */
- u16 tag; /* PPR tag */
- u16 flags; /* Fault flags */
-
-};
-
struct amd_iommu;
struct iommu_domain;
@@ -544,7 +546,6 @@ struct amd_io_pgtable {
struct io_pgtable iop;
int mode;
u64 *root;
- atomic64_t pt_root; /* pgtable root and pgtable mode */
u64 *pgd; /* v2 pgtable pgd pointer */
};
@@ -563,6 +564,7 @@ struct protection_domain {
int nid; /* Node ID */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
+ bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
@@ -676,9 +678,6 @@ struct amd_iommu {
/* Extended features 2 */
u64 features2;
- /* IOMMUv2 */
- bool is_iommu_v2;
-
/* PCI device id of the IOMMU device */
u16 devid;
@@ -799,6 +798,14 @@ struct devid_map {
bool cmd_line;
};
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
+/* Device may request execution on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8
+/* Device may request super-user privileges */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10
+
/*
* This struct contains device specific data for the IOMMU
*/
@@ -811,13 +818,15 @@ struct iommu_dev_data {
struct protection_domain *domain; /* Domain the device is bound to */
struct device *dev;
u16 devid; /* PCI Device ID */
- bool iommu_v2; /* Device can make use of IOMMUv2 */
- struct {
- bool enabled;
- int qdep;
- } ats; /* ATS state */
- bool pri_tlp; /* PASID TLB required for
+
+ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
+ int ats_qdep;
+ u8 ats_enabled :1; /* ATS state */
+ u8 pri_enabled :1; /* PRI state */
+ u8 pasid_enabled:1; /* PASID state */
+ u8 pri_tlp :1; /* PASID TLB required for
PPR completions */
+ u8 ppr :1; /* Enable device PPR support */
bool use_vapic; /* Enable device to use vapic mode */
bool defer_attach;
@@ -884,16 +893,15 @@ extern unsigned amd_iommu_aperture_order;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
-/* Smallest max PASID supported by any IOMMU in the system */
-extern u32 amd_iommu_max_pasid;
-
-extern bool amd_iommu_v2_present;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
+/* Global EFR and EFR2 registers */
+extern u64 amd_iommu_efr;
+extern u64 amd_iommu_efr2;
+
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 45efb7e5d7..64bcf3df37 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -83,8 +83,6 @@
#define ACPI_DEVFLAG_LINT1 0x80
#define ACPI_DEVFLAG_ATSDIS 0x10000000
-#define LOOP_TIMEOUT 2000000
-
#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
| ((dev & 0x1f) << 3) | (fn & 0x7))
@@ -187,9 +185,6 @@ static int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
bool amd_iommu_iotlb_sup __read_mostly = true;
-u32 amd_iommu_max_pasid __read_mostly = ~0;
-
-bool amd_iommu_v2_present __read_mostly;
static bool amd_iommu_pc_present __read_mostly;
bool amdr_ivrs_remap_support __read_mostly;
@@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void)
* Iterate through all the IOMMUs to get common EFR
* masks among all IOMMUs and warn if found inconsistency.
*/
-static void get_global_efr(void)
+static __init void get_global_efr(void)
{
struct amd_iommu *iommu;
@@ -304,16 +299,6 @@ static void get_global_efr(void)
pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
}
-static bool check_feature_on_all_iommus(u64 mask)
-{
- return !!(amd_iommu_efr & mask);
-}
-
-static inline int check_feature_gpt_level(void)
-{
- return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
-}
-
/*
* For IVHD type 0x11/0x40, EFR is also available via IVHD.
* Default to IVHD EFR since it is available sooner
@@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
u64 entry = start & PM_ADDR_MASK;
- if (!check_feature_on_all_iommus(FEATURE_SNP))
+ if (!check_feature(FEATURE_SNP))
return;
/* Note:
@@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
void *buf = (void *)__get_free_pages(gfp, order);
if (buf &&
- check_feature_on_all_iommus(FEATURE_SNP) &&
+ check_feature(FEATURE_SNP) &&
set_memory_4k((unsigned long)buf, (1 << order))) {
free_pages((unsigned long)buf, order);
buf = NULL;
@@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_GAINT_EN);
iommu_feature_enable(iommu, CONTROL_GALOG_EN);
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & (MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return -EINVAL;
return 0;
@@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu)
static void iommu_enable_gt(struct amd_iommu *iommu)
{
- if (!iommu_feature(iommu, FEATURE_GT))
+ if (!check_feature(FEATURE_GT))
return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
@@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
u64 val;
struct pci_dev *pdev = iommu->dev;
- if (!iommu_feature(iommu, FEATURE_PC))
+ if (!check_feature(FEATURE_PC))
return;
amd_iommu_pc_present = true;
@@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct amd_iommu *iommu = dev_to_amd_iommu(dev);
- return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
+ return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
}
static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
@@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
- if (!iommu->features) {
- iommu->features = features;
- iommu->features2 = features2;
+ if (!amd_iommu_efr) {
+ amd_iommu_efr = features;
+ amd_iommu_efr2 = features2;
return;
}
@@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
* Sanity check and warn if EFR values from
* IVHD and MMIO conflict.
*/
- if (features != iommu->features ||
- features2 != iommu->features2) {
+ if (features != amd_iommu_efr ||
+ features2 != amd_iommu_efr2) {
pr_warn(FW_WARN
"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
- features, iommu->features,
- features2, iommu->features2);
+ features, amd_iommu_efr,
+ features2, amd_iommu_efr2);
}
}
@@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
late_iommu_features_init(iommu);
- if (iommu_feature(iommu, FEATURE_GT)) {
+ if (check_feature(FEATURE_GT)) {
int glxval;
- u32 max_pasid;
u64 pasmax;
- pasmax = iommu->features & FEATURE_PASID_MASK;
+ pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
pasmax >>= FEATURE_PASID_SHIFT;
- max_pasid = (1 << (pasmax + 1)) - 1;
+ iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
- amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
+ BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
- BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
-
- glxval = iommu->features & FEATURE_GLXVAL_MASK;
+ glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK;
glxval >>= FEATURE_GLXVAL_SHIFT;
if (amd_iommu_max_glx_val == -1)
@@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
}
- if (iommu_feature(iommu, FEATURE_GT) &&
- iommu_feature(iommu, FEATURE_PPR)) {
- iommu->is_iommu_v2 = true;
- amd_iommu_v2_present = true;
- }
-
- if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+ if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
@@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
if (amd_iommu_pgtable == AMD_IOMMU_V2) {
- if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
- !iommu_feature(iommu, FEATURE_GT)) {
+ if (!check_feature(FEATURE_GIOSUP) ||
+ !check_feature(FEATURE_GT)) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = AMD_IOMMU_V1;
- } else if (iommu_default_passthrough()) {
- pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
}
}
@@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
static void print_iommu_info(void)
{
+ int i;
static const char * const feat_str[] = {
"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
"IA", "GA", "HE", "PC"
};
- struct amd_iommu *iommu;
-
- for_each_iommu(iommu) {
- struct pci_dev *pdev = iommu->dev;
- int i;
- pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
+ if (amd_iommu_efr) {
+ pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
- if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
-
- for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
- if (iommu_feature(iommu, (1ULL << i)))
- pr_cont(" %s", feat_str[i]);
- }
+ for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+ if (check_feature(1ULL << i))
+ pr_cont(" %s", feat_str[i]);
+ }
- if (iommu->features & FEATURE_GAM_VAPIC)
- pr_cont(" GA_vAPIC");
+ if (check_feature(FEATURE_GAM_VAPIC))
+ pr_cont(" GA_vAPIC");
- if (iommu->features & FEATURE_SNP)
- pr_cont(" SNP");
+ if (check_feature(FEATURE_SNP))
+ pr_cont(" SNP");
- pr_cont("\n");
- }
+ pr_cont("\n");
}
+
if (irq_remapping_enabled) {
pr_info("Interrupt remapping enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
@@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void)
* Need to set and poll check the GALOGRun bit to zero before
* we can set/ modify GA Log registers safely.
*/
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return;
}
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
+ !check_feature(FEATURE_GAM_VAPIC)) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
return;
}
@@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void)
* (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
* setting up IOMMUv1 page table.
*/
- return amd_iommu_v2_present && !amd_iommu_snp_en;
+ return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
}
-EXPORT_SYMBOL(amd_iommu_v2_supported);
struct amd_iommu *get_amd_iommu(unsigned int idx)
{
@@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void)
return -EINVAL;
}
- amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
+ amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en)
return -EINVAL;
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2892aa1b4d..6c0621f6f5 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,6 +486,73 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
return (__pte & ~offset_mask) | (iova & offset_mask);
}
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
+ unsigned long flags)
+{
+ bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count && test_only; i++) {
+ if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
+ dirty = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < count && !test_only; i++) {
+ if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
+ (unsigned long *)&ptep[i])) {
+ dirty = true;
+ }
+ }
+
+ return dirty;
+}
+
+static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long end = iova + size - 1;
+
+ do {
+ unsigned long pgsize = 0;
+ u64 *ptep, pte;
+
+ ptep = fetch_pte(pgtable, iova, &pgsize);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
+ pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
+ iova += pgsize;
+ continue;
+ }
+
+ /*
+ * Mark the whole IOVA range as dirty even if only one of
+ * the replicated PTEs were marked dirty.
+ */
+ if (pte_test_and_clear_dirty(ptep, pgsize, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
/*
* ----------------------------------------------------
*/
@@ -527,6 +594,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
pgtable->iop.ops.map_pages = iommu_v1_map_pages;
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+ pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
return &pgtable->iop;
}
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index e9ef2e0a62..f818a7e254 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop)
if (!(pdom->flags & PD_IOMMUV2_MASK))
return;
- /*
- * Make changes visible to IOMMUs. No need to clear gcr3 entry
- * as gcr3 table is already freed.
- */
+ /* Clear gcr3 entry */
+ amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
+
+ /* Make changes visible to IOMMUs */
amd_iommu_domain_update(pdom);
/* Free page table */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 95bd7c25ba..fcc987f5d4 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -37,6 +37,7 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/dma.h>
+#include <uapi/linux/iommufd.h>
#include "amd_iommu.h"
#include "../dma-iommu.h"
@@ -44,8 +45,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define LOOP_TIMEOUT 100000
-
/* IO virtual address start page frame number */
#define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -65,8 +64,8 @@ LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
const struct iommu_ops amd_iommu_ops;
+const struct iommu_dirty_ops amd_dirty_ops;
-static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
/*
@@ -79,7 +78,6 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
-static int domain_enable_v2(struct protection_domain *domain, int pasids);
/****************************************************************************
*
@@ -322,24 +320,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
return entry->group;
}
-static bool pci_iommuv2_capable(struct pci_dev *pdev)
+static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data)
{
- static const int caps[] = {
- PCI_EXT_CAP_ID_PRI,
- PCI_EXT_CAP_ID_PASID,
- };
- int i, pos;
+ return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP);
+}
- if (!pci_ats_supported(pdev))
- return false;
+static u32 pdev_get_caps(struct pci_dev *pdev)
+{
+ int features;
+ u32 flags = 0;
+
+ if (pci_ats_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
- for (i = 0; i < 2; ++i) {
- pos = pci_find_ext_capability(pdev, caps[i]);
- if (pos == 0)
- return false;
+ if (pci_pri_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+ features = pci_pasid_features(pdev);
+ if (features >= 0) {
+ flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+
+ if (features & PCI_PASID_CAP_EXEC)
+ flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
+
+ if (features & PCI_PASID_CAP_PRIV)
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
}
- return true;
+ return flags;
+}
+
+static inline int pdev_enable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->ats_enabled)
+ return 0;
+
+ if (amd_iommu_iotlb_sup &&
+ (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) {
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (!ret) {
+ dev_data->ats_enabled = 1;
+ dev_data->ats_qdep = pci_ats_queue_depth(pdev);
+ }
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->ats_enabled) {
+ pci_disable_ats(pdev);
+ dev_data->ats_enabled = 0;
+ }
+}
+
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pri_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) {
+ /*
+ * First reset the PRI state of the device.
+ * FIXME: Hardcode number of outstanding requests for now
+ */
+ if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) {
+ dev_data->pri_enabled = 1;
+ dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
+
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pri_enabled) {
+ pci_disable_pri(pdev);
+ dev_data->pri_enabled = 0;
+ }
+}
+
+static inline int pdev_enable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pasid_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) {
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (!ret)
+ dev_data->pasid_enabled = 1;
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pasid_enabled) {
+ pci_disable_pasid(pdev);
+ dev_data->pasid_enabled = 0;
+ }
+}
+
+static void pdev_enable_caps(struct pci_dev *pdev)
+{
+ pdev_enable_cap_ats(pdev);
+ pdev_enable_cap_pasid(pdev);
+ amd_iommu_pdev_enable_cap_pri(pdev);
+
+}
+
+static void pdev_disable_caps(struct pci_dev *pdev)
+{
+ pdev_disable_cap_ats(pdev);
+ pdev_disable_cap_pasid(pdev);
+ amd_iommu_pdev_disable_cap_pri(pdev);
}
/*
@@ -399,8 +514,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
* it'll be forced to go into translation mode.
*/
if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
- dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
- dev_data->iommu_v2 = iommu->is_iommu_v2;
+ dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
+ dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
dev_iommu_priv_set(dev, dev_data);
@@ -701,24 +816,6 @@ static void iommu_poll_events(struct amd_iommu *iommu)
writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
-{
- struct amd_iommu_fault fault;
-
- if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
- pr_err_ratelimited("Unknown PPR request received\n");
- return;
- }
-
- fault.address = raw[1];
- fault.pasid = PPR_PASID(raw[0]);
- fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0]));
- fault.tag = PPR_TAG(raw[0]);
- fault.flags = PPR_FLAGS(raw[0]);
-
- atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
-}
-
static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
u32 head, tail;
@@ -764,8 +861,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
- /* Handle PPR entry */
- iommu_handle_ppr_entry(iommu, entry);
+ /* TODO: PPR Handler will be added when we add IOPF support */
/* Refresh ring-buffer information */
head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
@@ -1094,7 +1190,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
}
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
- int status, int tag, bool gn)
+ int status, int tag, u8 gn)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1298,7 +1394,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
- if (iommu_feature(iommu, FEATURE_IA)) {
+ if (check_feature(FEATURE_IA)) {
amd_iommu_flush_all(iommu);
} else {
amd_iommu_flush_dte_all(iommu);
@@ -1317,7 +1413,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
struct iommu_cmd cmd;
int qdep;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return -EINVAL;
@@ -1368,7 +1464,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
return ret;
}
- if (dev_data->ats.enabled)
+ if (dev_data->ats_enabled)
ret = device_flush_iotlb(dev_data, 0, ~0UL);
return ret;
@@ -1401,7 +1497,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
list_for_each_entry(dev_data, &domain->dev_list, list) {
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
ret |= device_flush_iotlb(dev_data, address, size);
@@ -1577,6 +1673,42 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
+/*
+ * Number of GCR3 table levels required. Level must be 4-Kbyte
+ * page and can contain up to 512 entries.
+ */
+static int get_gcr3_levels(int pasids)
+{
+ int levels;
+
+ if (pasids == -1)
+ return amd_iommu_max_glx_val;
+
+ levels = get_count_order(pasids);
+
+ return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
+}
+
+/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
+static int setup_gcr3_table(struct protection_domain *domain, int pasids)
+{
+ int levels = get_gcr3_levels(pasids);
+
+ if (levels > amd_iommu_max_glx_val)
+ return -EINVAL;
+
+ domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+ if (domain->gcr3_tbl == NULL)
+ return -ENOMEM;
+
+ domain->glx = levels;
+ domain->flags |= PD_IOMMUV2_MASK;
+
+ amd_iommu_domain_update(domain);
+
+ return 0;
+}
+
static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
struct protection_domain *domain, bool ats, bool ppr)
{
@@ -1605,10 +1737,11 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
if (ats)
flags |= DTE_FLAG_IOTLB;
- if (ppr) {
- if (iommu_feature(iommu, FEATURE_EPHSUP))
- pte_root |= 1ULL << DEV_ENTRY_PPR;
- }
+ if (ppr)
+ pte_root |= 1ULL << DEV_ENTRY_PPR;
+
+ if (domain->dirty_tracking)
+ pte_root |= DTE_FLAG_HAD;
if (domain->flags & PD_IOMMUV2_MASK) {
u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
@@ -1685,7 +1818,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return;
- ats = dev_data->ats.enabled;
+ ats = dev_data->ats_enabled;
/* Update data structures */
dev_data->domain = domain;
@@ -1701,7 +1834,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
/* Update device table */
set_dte_entry(iommu, dev_data->devid, domain,
- ats, dev_data->iommu_v2);
+ ats, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@@ -1736,48 +1869,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1;
}
-static void pdev_iommuv2_disable(struct pci_dev *pdev)
-{
- pci_disable_ats(pdev);
- pci_disable_pri(pdev);
- pci_disable_pasid(pdev);
-}
-
-static int pdev_pri_ats_enable(struct pci_dev *pdev)
-{
- int ret;
-
- /* Only allow access to user-accessible pages */
- ret = pci_enable_pasid(pdev, 0);
- if (ret)
- return ret;
-
- /* First reset the PRI state of the device */
- ret = pci_reset_pri(pdev);
- if (ret)
- goto out_err_pasid;
-
- /* Enable PRI */
- /* FIXME: Hardcode number of outstanding requests for now */
- ret = pci_enable_pri(pdev, 32);
- if (ret)
- goto out_err_pasid;
-
- ret = pci_enable_ats(pdev, PAGE_SHIFT);
- if (ret)
- goto out_err_pri;
-
- return 0;
-
-out_err_pri:
- pci_disable_pri(pdev);
-
-out_err_pasid:
- pci_disable_pasid(pdev);
-
- return ret;
-}
-
/*
* If a device is not yet associated with a domain, this function makes the
* device visible in the domain
@@ -1786,9 +1877,8 @@ static int attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
- struct pci_dev *pdev;
unsigned long flags;
- int ret;
+ int ret = 0;
spin_lock_irqsave(&domain->lock, flags);
@@ -1796,45 +1886,13 @@ static int attach_device(struct device *dev,
spin_lock(&dev_data->lock);
- ret = -EBUSY;
- if (dev_data->domain != NULL)
+ if (dev_data->domain != NULL) {
+ ret = -EBUSY;
goto out;
-
- if (!dev_is_pci(dev))
- goto skip_ats_check;
-
- pdev = to_pci_dev(dev);
- if (domain->flags & PD_IOMMUV2_MASK) {
- struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
-
- ret = -EINVAL;
-
- /*
- * In case of using AMD_IOMMU_V1 page table mode and the device
- * is enabling for PPR/ATS support (using v2 table),
- * we need to make sure that the domain type is identity map.
- */
- if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
- def_domain->type != IOMMU_DOMAIN_IDENTITY) {
- goto out;
- }
-
- if (dev_data->iommu_v2) {
- if (pdev_pri_ats_enable(pdev) != 0)
- goto out;
-
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
- dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
- }
- } else if (amd_iommu_iotlb_sup &&
- pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
}
-skip_ats_check:
- ret = 0;
+ if (dev_is_pci(dev))
+ pdev_enable_caps(to_pci_dev(dev));
do_attach(dev_data, domain);
@@ -1882,15 +1940,8 @@ static void detach_device(struct device *dev)
do_detach(dev_data);
- if (!dev_is_pci(dev))
- goto out;
-
- if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
- pdev_iommuv2_disable(to_pci_dev(dev));
- else if (dev_data->ats.enabled)
- pci_disable_ats(to_pci_dev(dev));
-
- dev_data->ats.enabled = false;
+ if (dev_is_pci(dev))
+ pdev_disable_caps(to_pci_dev(dev));
out:
spin_unlock(&dev_data->lock);
@@ -1980,7 +2031,7 @@ static void update_device_table(struct protection_domain *domain)
if (!iommu)
continue;
set_dte_entry(iommu, dev_data->devid, domain,
- dev_data->ats.enabled, dev_data->iommu_v2);
+ dev_data->ats_enabled, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
}
}
@@ -2014,9 +2065,11 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void cleanup_domain(struct protection_domain *domain)
{
struct iommu_dev_data *entry;
- unsigned long flags;
- spin_lock_irqsave(&domain->lock, flags);
+ lockdep_assert_held(&domain->lock);
+
+ if (!domain->dev_cnt)
+ return;
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
@@ -2024,8 +2077,7 @@ static void cleanup_domain(struct protection_domain *domain)
BUG_ON(!entry->domain);
do_detach(entry);
}
-
- spin_unlock_irqrestore(&domain->lock, flags);
+ WARN_ON(domain->dev_cnt != 0);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -2036,6 +2088,12 @@ static void protection_domain_free(struct protection_domain *domain)
if (domain->iop.pgtbl_cfg.tlb)
free_io_pgtable_ops(&domain->iop.iop.ops);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
+
+ if (domain->iop.root)
+ free_page((unsigned long)domain->iop.root);
+
if (domain->id)
domain_id_free(domain->id);
@@ -2048,18 +2106,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
if (mode != PAGE_MODE_NONE) {
pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root) {
- domain_id_free(domain->id);
+ if (!pt_root)
return -ENOMEM;
- }
}
amd_iommu_domain_set_pgtable(domain, pt_root, mode);
@@ -2069,20 +2119,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
static int protection_domain_init_v2(struct protection_domain *domain)
{
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
domain->flags |= PD_GIOV_MASK;
domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
- if (domain_enable_v2(domain, 1)) {
- domain_id_free(domain->id);
+ if (setup_gcr3_table(domain, 1))
return -ENOMEM;
- }
return 0;
}
@@ -2092,57 +2134,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
int pgtable;
- int mode = DEFAULT_PGTABLE_LEVEL;
int ret;
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ goto out_err;
+
+ spin_lock_init(&domain->lock);
+ INIT_LIST_HEAD(&domain->dev_list);
+ domain->nid = NUMA_NO_NODE;
+
+ switch (type) {
+ /* No need to allocate io pgtable ops in passthrough mode */
+ case IOMMU_DOMAIN_IDENTITY:
+ return domain;
+ case IOMMU_DOMAIN_DMA:
+ pgtable = amd_iommu_pgtable;
+ break;
/*
- * Force IOMMU v1 page table when iommu=pt and
- * when allocating domain for pass-through devices.
+ * Force IOMMU v1 page table when allocating
+ * domain for pass-through devices.
*/
- if (type == IOMMU_DOMAIN_IDENTITY) {
- pgtable = AMD_IOMMU_V1;
- mode = PAGE_MODE_NONE;
- } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ case IOMMU_DOMAIN_UNMANAGED:
pgtable = AMD_IOMMU_V1;
- } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) {
- pgtable = amd_iommu_pgtable;
- } else {
- return NULL;
+ break;
+ default:
+ goto out_err;
}
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
switch (pgtable) {
case AMD_IOMMU_V1:
- ret = protection_domain_init_v1(domain, mode);
+ ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL);
break;
case AMD_IOMMU_V2:
ret = protection_domain_init_v2(domain);
break;
default:
ret = -EINVAL;
+ break;
}
if (ret)
goto out_err;
- /* No need to allocate io pgtable ops in passthrough mode */
- if (type == IOMMU_DOMAIN_IDENTITY)
- return domain;
-
- domain->nid = NUMA_NO_NODE;
-
pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
- if (!pgtbl_ops) {
- domain_id_free(domain->id);
+ if (!pgtbl_ops)
goto out_err;
- }
return domain;
out_err:
- kfree(domain);
+ protection_domain_free(domain);
return NULL;
}
@@ -2155,44 +2200,94 @@ static inline u64 dma_max_address(void)
return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
}
-static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
+static bool amd_iommu_hd_support(struct amd_iommu *iommu)
+{
+ return iommu && (iommu->features & FEATURE_HDSUP);
+}
+
+static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
+ struct device *dev, u32 flags)
{
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
struct protection_domain *domain;
+ struct amd_iommu *iommu = NULL;
+
+ if (dev) {
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+ }
/*
* Since DTE[Mode]=0 is prohibited on SNP-enabled system,
* default to use IOMMU_DOMAIN_DMA[_FQ].
*/
if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
- return NULL;
+ return ERR_PTR(-EINVAL);
+
+ if (dirty_tracking && !amd_iommu_hd_support(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
domain = protection_domain_alloc(type);
if (!domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = dma_max_address();
domain->domain.geometry.force_aperture = true;
+ if (iommu) {
+ domain->domain.type = type;
+ domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap;
+ domain->domain.ops = iommu->iommu.ops->default_domain_ops;
+
+ if (dirty_tracking)
+ domain->domain.dirty_ops = &amd_dirty_ops;
+ }
+
return &domain->domain;
}
-static void amd_iommu_domain_free(struct iommu_domain *dom)
+static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
{
- struct protection_domain *domain;
+ struct iommu_domain *domain;
- domain = to_pdomain(dom);
+ domain = do_iommu_domain_alloc(type, NULL, 0);
+ if (IS_ERR(domain))
+ return NULL;
+
+ return domain;
+}
+
+static struct iommu_domain *
+amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+
+{
+ unsigned int type = IOMMU_DOMAIN_UNMANAGED;
- if (domain->dev_cnt > 0)
- cleanup_domain(domain);
+ if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+ return ERR_PTR(-EOPNOTSUPP);
- BUG_ON(domain->dev_cnt != 0);
+ return do_iommu_domain_alloc(type, dev, flags);
+}
+
+static void amd_iommu_domain_free(struct iommu_domain *dom)
+{
+ struct protection_domain *domain;
+ unsigned long flags;
if (!dom)
return;
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ domain = to_pdomain(dom);
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ cleanup_domain(domain);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
protection_domain_free(domain);
}
@@ -2214,6 +2309,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
dev_data->defer_attach = false;
+ /*
+ * Restrict to devices with compatible IOMMU hardware support
+ * when enforcement of dirty tracking is enabled.
+ */
+ if (dom->dirty_ops && !amd_iommu_hd_support(iommu))
+ return -EINVAL;
+
if (dev_data->domain)
detach_device(dev);
@@ -2233,14 +2335,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
-static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
- unsigned long iova, size_t size)
+static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
if (ops->map_pages)
domain_flush_np_cache(domain, iova, size);
+ return 0;
}
static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
@@ -2332,6 +2435,11 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return true;
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
+ case IOMMU_CAP_DIRTY_TRACKING: {
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+
+ return amd_iommu_hd_support(iommu);
+ }
default:
break;
}
@@ -2339,6 +2447,73 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return false;
}
+static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+{
+ struct protection_domain *pdomain = to_pdomain(domain);
+ struct dev_table_entry *dev_table;
+ struct iommu_dev_data *dev_data;
+ bool domain_flush = false;
+ struct amd_iommu *iommu;
+ unsigned long flags;
+ u64 pte_root;
+
+ spin_lock_irqsave(&pdomain->lock, flags);
+ if (!(pdomain->dirty_tracking ^ enable)) {
+ spin_unlock_irqrestore(&pdomain->lock, flags);
+ return 0;
+ }
+
+ list_for_each_entry(dev_data, &pdomain->dev_list, list) {
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ continue;
+
+ dev_table = get_dev_table(iommu);
+ pte_root = dev_table[dev_data->devid].data[0];
+
+ pte_root = (enable ? pte_root | DTE_FLAG_HAD :
+ pte_root & ~DTE_FLAG_HAD);
+
+ /* Flush device DTE */
+ dev_table[dev_data->devid].data[0] = pte_root;
+ device_flush_dte(dev_data);
+ domain_flush = true;
+ }
+
+ /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */
+ if (domain_flush) {
+ amd_iommu_domain_flush_tlb_pde(pdomain);
+ amd_iommu_domain_flush_complete(pdomain);
+ }
+ pdomain->dirty_tracking = enable;
+ spin_unlock_irqrestore(&pdomain->lock, flags);
+
+ return 0;
+}
+
+static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct protection_domain *pdomain = to_pdomain(domain);
+ struct io_pgtable_ops *ops = &pdomain->iop.iop.ops;
+ unsigned long lflags;
+
+ if (!ops || !ops->read_and_clear_dirty)
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&pdomain->lock, lflags);
+ if (!pdomain->dirty_tracking && dirty->bitmap) {
+ spin_unlock_irqrestore(&pdomain->lock, lflags);
+ return -EINVAL;
+ }
+ spin_unlock_irqrestore(&pdomain->lock, lflags);
+
+ return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
+}
+
static void amd_iommu_get_resv_regions(struct device *dev,
struct list_head *head)
{
@@ -2406,7 +2581,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev)
return dev_data->defer_attach;
}
-EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -2446,7 +2620,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
* and require remapping.
* - SNP is enabled, because it prohibits DTE[Mode]=0.
*/
- if (dev_data->iommu_v2 &&
+ if (pdev_pasid_supported(dev_data) &&
!cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
!amd_iommu_snp_en) {
return IOMMU_DOMAIN_IDENTITY;
@@ -2461,9 +2635,15 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
return true;
}
+const struct iommu_dirty_ops amd_dirty_ops = {
+ .set_dirty_tracking = amd_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
+};
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
+ .domain_alloc_user = amd_iommu_domain_alloc_user,
.probe_device = amd_iommu_probe_device,
.release_device = amd_iommu_release_device,
.probe_finalize = amd_iommu_probe_finalize,
@@ -2485,93 +2665,6 @@ const struct iommu_ops amd_iommu_ops = {
}
};
-/*****************************************************************************
- *
- * The next functions do a basic initialization of IOMMU for pass through
- * mode
- *
- * In passthrough mode the IOMMU is initialized and enabled but not used for
- * DMA-API translation.
- *
- *****************************************************************************/
-
-/* IOMMUv2 specific functions */
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
-
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
-
-void amd_iommu_domain_direct_map(struct iommu_domain *dom)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- if (domain->iop.pgtbl_cfg.tlb)
- free_io_pgtable_ops(&domain->iop.iop.ops);
-
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-EXPORT_SYMBOL(amd_iommu_domain_direct_map);
-
-/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
-static int domain_enable_v2(struct protection_domain *domain, int pasids)
-{
- int levels;
-
- /* Number of GCR3 table levels required */
- for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
- levels += 1;
-
- if (levels > amd_iommu_max_glx_val)
- return -EINVAL;
-
- domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
- if (domain->gcr3_tbl == NULL)
- return -ENOMEM;
-
- domain->glx = levels;
- domain->flags |= PD_IOMMUV2_MASK;
-
- amd_iommu_domain_update(domain);
-
- return 0;
-}
-
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
-{
- struct protection_domain *pdom = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&pdom->lock, flags);
-
- /*
- * Save us all sanity checks whether devices already in the
- * domain support IOMMUv2. Just force that the domain has no
- * devices attached when it is switched into IOMMUv2 mode.
- */
- ret = -EBUSY;
- if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
- goto out;
-
- if (!pdom->gcr3_tbl)
- ret = domain_enable_v2(pdom, pasids);
-
-out:
- spin_unlock_irqrestore(&pdom->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-
static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
@@ -2609,10 +2702,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
There might be non-IOMMUv2 capable devices in an IOMMUv2
* domain.
*/
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
continue;
@@ -2653,7 +2746,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_page);
static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
{
@@ -2673,7 +2765,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_tlb);
static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
{
@@ -2753,7 +2844,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
{
@@ -2767,7 +2857,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag)
@@ -2786,49 +2875,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
return iommu_queue_command(iommu, &cmd);
}
-EXPORT_SYMBOL(amd_iommu_complete_ppr);
-
-int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info)
-{
- int max_pasids;
- int pos;
-
- if (pdev == NULL || info == NULL)
- return -EINVAL;
-
- if (!amd_iommu_v2_supported())
- return -EINVAL;
-
- memset(info, 0, sizeof(*info));
-
- if (pci_ats_supported(pdev))
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
- if (pos) {
- int features;
-
- max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
- max_pasids = min(max_pasids, (1 << 20));
-
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
- info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
-
- features = pci_pasid_features(pdev);
- if (features & PCI_PASID_CAP_EXEC)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
- if (features & PCI_PASID_CAP_PRIV)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(amd_iommu_device_info);
#ifdef CONFIG_IRQ_REMAP
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
deleted file mode 100644
index 57c2fb1146..0000000000
--- a/drivers/iommu/amd/iommu_v2.c
+++ /dev/null
@@ -1,996 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt) "AMD-Vi: " fmt
-
-#include <linux/refcount.h>
-#include <linux/mmu_notifier.h>
-#include <linux/amd-iommu.h>
-#include <linux/mm_types.h>
-#include <linux/profile.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/wait.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/cc_platform.h>
-
-#include "amd_iommu.h"
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
-
-#define PRI_QUEUE_SIZE 512
-
-struct pri_queue {
- atomic_t inflight;
- bool finish;
- int status;
-};
-
-struct pasid_state {
- struct list_head list; /* For global state-list */
- refcount_t count; /* Reference count */
- unsigned mmu_notifier_count; /* Counting nested mmu_notifier
- calls */
- struct mm_struct *mm; /* mm_struct for the faults */
- struct mmu_notifier mn; /* mmu_notifier handle */
- struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
- struct device_state *device_state; /* Link to our device_state */
- u32 pasid; /* PASID index */
- bool invalid; /* Used during setup and
- teardown of the pasid */
- spinlock_t lock; /* Protect pri_queues and
- mmu_notifer_count */
- wait_queue_head_t wq; /* To wait for count == 0 */
-};
-
-struct device_state {
- struct list_head list;
- u32 sbdf;
- atomic_t count;
- struct pci_dev *pdev;
- struct pasid_state **states;
- struct iommu_domain *domain;
- int pasid_levels;
- int max_pasids;
- amd_iommu_invalid_ppr_cb inv_ppr_cb;
- amd_iommu_invalidate_ctx inv_ctx_cb;
- spinlock_t lock;
- wait_queue_head_t wq;
-};
-
-struct fault {
- struct work_struct work;
- struct device_state *dev_state;
- struct pasid_state *state;
- struct mm_struct *mm;
- u64 address;
- u32 pasid;
- u16 tag;
- u16 finish;
- u16 flags;
-};
-
-static LIST_HEAD(state_list);
-static DEFINE_SPINLOCK(state_lock);
-
-static struct workqueue_struct *iommu_wq;
-
-static void free_pasid_states(struct device_state *dev_state);
-
-static struct device_state *__get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
-
- list_for_each_entry(dev_state, &state_list, list) {
- if (dev_state->sbdf == sbdf)
- return dev_state;
- }
-
- return NULL;
-}
-
-static struct device_state *get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
- unsigned long flags;
-
- spin_lock_irqsave(&state_lock, flags);
- dev_state = __get_device_state(sbdf);
- if (dev_state != NULL)
- atomic_inc(&dev_state->count);
- spin_unlock_irqrestore(&state_lock, flags);
-
- return dev_state;
-}
-
-static void free_device_state(struct device_state *dev_state)
-{
- struct iommu_group *group;
-
- /* Get rid of any remaining pasid states */
- free_pasid_states(dev_state);
-
- /*
- * Wait until the last reference is dropped before freeing
- * the device state.
- */
- wait_event(dev_state->wq, !atomic_read(&dev_state->count));
-
- /*
- * First detach device from domain - No more PRI requests will arrive
- * from that device after it is unbound from the IOMMUv2 domain.
- */
- group = iommu_group_get(&dev_state->pdev->dev);
- if (WARN_ON(!group))
- return;
-
- iommu_detach_group(dev_state->domain, group);
-
- iommu_group_put(group);
-
- /* Everything is down now, free the IOMMUv2 domain */
- iommu_domain_free(dev_state->domain);
-
- /* Finally get rid of the device-state */
- kfree(dev_state);
-}
-
-static void put_device_state(struct device_state *dev_state)
-{
- if (atomic_dec_and_test(&dev_state->count))
- wake_up(&dev_state->wq);
-}
-
-/* Must be called under dev_state->lock */
-static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
- u32 pasid, bool alloc)
-{
- struct pasid_state **root, **ptr;
- int level, index;
-
- level = dev_state->pasid_levels;
- root = dev_state->states;
-
- while (true) {
-
- index = (pasid >> (9 * level)) & 0x1ff;
- ptr = &root[index];
-
- if (level == 0)
- break;
-
- if (*ptr == NULL) {
- if (!alloc)
- return NULL;
-
- *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
- if (*ptr == NULL)
- return NULL;
- }
-
- root = (struct pasid_state **)*ptr;
- level -= 1;
- }
-
- return ptr;
-}
-
-static int set_pasid_state(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- ret = -ENOMEM;
- if (ptr == NULL)
- goto out_unlock;
-
- ret = -ENOMEM;
- if (*ptr != NULL)
- goto out_unlock;
-
- *ptr = pasid_state;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- if (ptr == NULL)
- goto out_unlock;
-
- *ptr = NULL;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-}
-
-static struct pasid_state *get_pasid_state(struct device_state *dev_state,
- u32 pasid)
-{
- struct pasid_state **ptr, *ret = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, false);
-
- if (ptr == NULL)
- goto out_unlock;
-
- ret = *ptr;
- if (ret)
- refcount_inc(&ret->count);
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void free_pasid_state(struct pasid_state *pasid_state)
-{
- kfree(pasid_state);
-}
-
-static void put_pasid_state(struct pasid_state *pasid_state)
-{
- if (refcount_dec_and_test(&pasid_state->count))
- wake_up(&pasid_state->wq);
-}
-
-static void put_pasid_state_wait(struct pasid_state *pasid_state)
-{
- if (!refcount_dec_and_test(&pasid_state->count))
- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
- free_pasid_state(pasid_state);
-}
-
-static void unbind_pasid(struct pasid_state *pasid_state)
-{
- struct iommu_domain *domain;
-
- domain = pasid_state->device_state->domain;
-
- /*
- * Mark pasid_state as invalid, no more faults will we added to the
- * work queue after this is visible everywhere.
- */
- pasid_state->invalid = true;
-
- /* Make sure this is visible */
- smp_wmb();
-
- /* After this the device/pasid can't access the mm anymore */
- amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
-
- /* Make sure no more pending faults are in the queue */
- flush_workqueue(iommu_wq);
-}
-
-static void free_pasid_states_level1(struct pasid_state **tbl)
-{
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- free_page((unsigned long)tbl[i]);
- }
-}
-
-static void free_pasid_states_level2(struct pasid_state **tbl)
-{
- struct pasid_state **ptr;
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- ptr = (struct pasid_state **)tbl[i];
- free_pasid_states_level1(ptr);
- }
-}
-
-static void free_pasid_states(struct device_state *dev_state)
-{
- struct pasid_state *pasid_state;
- int i;
-
- for (i = 0; i < dev_state->max_pasids; ++i) {
- pasid_state = get_pasid_state(dev_state, i);
- if (pasid_state == NULL)
- continue;
-
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * This will call the mn_release function and
- * unbind the PASID
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
- }
-
- if (dev_state->pasid_levels == 2)
- free_pasid_states_level2(dev_state->states);
- else if (dev_state->pasid_levels == 1)
- free_pasid_states_level1(dev_state->states);
- else
- BUG_ON(dev_state->pasid_levels != 0);
-
- free_page((unsigned long)dev_state->states);
-}
-
-static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
-{
- return container_of(mn, struct pasid_state, mn);
-}
-
-static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
-
- if ((start ^ (end - 1)) < PAGE_SIZE)
- amd_iommu_flush_page(dev_state->domain, pasid_state->pasid,
- start);
- else
- amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid);
-}
-
-static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- bool run_inv_ctx_cb;
-
- might_sleep();
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
- run_inv_ctx_cb = !pasid_state->invalid;
-
- if (run_inv_ctx_cb && dev_state->inv_ctx_cb)
- dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
-
- unbind_pasid(pasid_state);
-}
-
-static const struct mmu_notifier_ops iommu_mn = {
- .release = mn_release,
- .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
-};
-
-static void set_pri_tag_status(struct pasid_state *pasid_state,
- u16 tag, int status)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- pasid_state->pri[tag].status = status;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void finish_pri_tag(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u16 tag)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
- pasid_state->pri[tag].finish) {
- amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
- pasid_state->pri[tag].status, tag);
- pasid_state->pri[tag].finish = false;
- pasid_state->pri[tag].status = PPR_SUCCESS;
- }
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void handle_fault_error(struct fault *fault)
-{
- int status;
-
- if (!fault->dev_state->inv_ppr_cb) {
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- return;
- }
-
- status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
- fault->pasid,
- fault->address,
- fault->flags);
- switch (status) {
- case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
- set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
- break;
- case AMD_IOMMU_INV_PRI_RSP_INVALID:
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- break;
- case AMD_IOMMU_INV_PRI_RSP_FAIL:
- set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
- break;
- default:
- BUG();
- }
-}
-
-static bool access_error(struct vm_area_struct *vma, struct fault *fault)
-{
- unsigned long requested = 0;
-
- if (fault->flags & PPR_FAULT_EXEC)
- requested |= VM_EXEC;
-
- if (fault->flags & PPR_FAULT_READ)
- requested |= VM_READ;
-
- if (fault->flags & PPR_FAULT_WRITE)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
-static void do_fault(struct work_struct *work)
-{
- struct fault *fault = container_of(work, struct fault, work);
- struct vm_area_struct *vma;
- vm_fault_t ret = VM_FAULT_ERROR;
- unsigned int flags = 0;
- struct mm_struct *mm;
- u64 address;
-
- mm = fault->state->mm;
- address = fault->address;
-
- if (fault->flags & PPR_FAULT_USER)
- flags |= FAULT_FLAG_USER;
- if (fault->flags & PPR_FAULT_WRITE)
- flags |= FAULT_FLAG_WRITE;
- flags |= FAULT_FLAG_REMOTE;
-
- mmap_read_lock(mm);
- vma = vma_lookup(mm, address);
- if (!vma)
- /* failed to get a vma in the right range */
- goto out;
-
- /* Check if we have the right permissions on the vma */
- if (access_error(vma, fault))
- goto out;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
-out:
- mmap_read_unlock(mm);
-
- if (ret & VM_FAULT_ERROR)
- /* failed to service fault */
- handle_fault_error(fault);
-
- finish_pri_tag(fault->dev_state, fault->state, fault->tag);
-
- put_pasid_state(fault->state);
-
- kfree(fault);
-}
-
-static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
-{
- struct amd_iommu_fault *iommu_fault;
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct pci_dev *pdev = NULL;
- unsigned long flags;
- struct fault *fault;
- bool finish;
- u16 tag, devid, seg_id;
- int ret;
-
- iommu_fault = data;
- tag = iommu_fault->tag & 0x1ff;
- finish = (iommu_fault->tag >> 9) & 1;
-
- seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf);
- devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf);
- pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid),
- devid & 0xff);
- if (!pdev)
- return -ENODEV;
-
- ret = NOTIFY_DONE;
-
- /* In kdump kernel pci dev is not initialized yet -> send INVALID */
- if (amd_iommu_is_attach_deferred(&pdev->dev)) {
- amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out;
- }
-
- dev_state = get_device_state(iommu_fault->sbdf);
- if (dev_state == NULL)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
- if (pasid_state == NULL || pasid_state->invalid) {
- /* We know the device but not the PASID -> send INVALID */
- amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out_drop_state;
- }
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- atomic_inc(&pasid_state->pri[tag].inflight);
- if (finish)
- pasid_state->pri[tag].finish = true;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-
- fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
- if (fault == NULL) {
- /* We are OOM - send success and let the device re-fault */
- finish_pri_tag(dev_state, pasid_state, tag);
- goto out_drop_state;
- }
-
- fault->dev_state = dev_state;
- fault->address = iommu_fault->address;
- fault->state = pasid_state;
- fault->tag = tag;
- fault->finish = finish;
- fault->pasid = iommu_fault->pasid;
- fault->flags = iommu_fault->flags;
- INIT_WORK(&fault->work, do_fault);
-
- queue_work(iommu_wq, &fault->work);
-
- ret = NOTIFY_OK;
-
-out_drop_state:
-
- if (ret != NOTIFY_OK && pasid_state)
- put_pasid_state(pasid_state);
-
- put_device_state(dev_state);
-
-out:
- pci_dev_put(pdev);
- return ret;
-}
-
-static struct notifier_block ppr_nb = {
- .notifier_call = ppr_notifier,
-};
-
-int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct mm_struct *mm;
- u32 sbdf;
- int ret;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
-
- if (dev_state == NULL)
- return -EINVAL;
-
- ret = -EINVAL;
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- ret = -ENOMEM;
- pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
- if (pasid_state == NULL)
- goto out;
-
-
- refcount_set(&pasid_state->count, 1);
- init_waitqueue_head(&pasid_state->wq);
- spin_lock_init(&pasid_state->lock);
-
- mm = get_task_mm(task);
- pasid_state->mm = mm;
- pasid_state->device_state = dev_state;
- pasid_state->pasid = pasid;
- pasid_state->invalid = true; /* Mark as valid only if we are
- done with setting up the pasid */
- pasid_state->mn.ops = &iommu_mn;
-
- if (pasid_state->mm == NULL)
- goto out_free;
-
- ret = mmu_notifier_register(&pasid_state->mn, mm);
- if (ret)
- goto out_free;
-
- ret = set_pasid_state(dev_state, pasid_state, pasid);
- if (ret)
- goto out_unregister;
-
- ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
- __pa(pasid_state->mm->pgd));
- if (ret)
- goto out_clear_state;
-
- /* Now we are ready to handle faults */
- pasid_state->invalid = false;
-
- /*
- * Drop the reference to the mm_struct here. We rely on the
- * mmu_notifier release call-back to inform us when the mm
- * is going away.
- */
- mmput(mm);
-
- return 0;
-
-out_clear_state:
- clear_pasid_state(dev_state, pasid);
-
-out_unregister:
- mmu_notifier_unregister(&pasid_state->mn, mm);
- mmput(mm);
-
-out_free:
- free_pasid_state(pasid_state);
-
-out:
- put_device_state(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_bind_pasid);
-
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- u32 sbdf;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
- if (dev_state == NULL)
- return;
-
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, pasid);
- if (pasid_state == NULL)
- goto out;
- /*
- * Drop reference taken here. We are safe because we still hold
- * the reference taken in the amd_iommu_bind_pasid function.
- */
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * Call mmu_notifier_unregister to drop our reference
- * to pasid_state->mm
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-out:
- /* Drop reference taken in this function */
- put_device_state(dev_state);
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_unbind_pasid);
-
-int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
-{
- struct device_state *dev_state;
- struct iommu_group *group;
- unsigned long flags;
- int ret, tmp;
- u32 sbdf;
-
- might_sleep();
-
- /*
- * When memory encryption is active the device is likely not in a
- * direct-mapped domain. Forbid using IOMMUv2 functionality for now.
- */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -ENODEV;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- if (pasids <= 0 || pasids > (PASID_MASK + 1))
- return -EINVAL;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
- if (dev_state == NULL)
- return -ENOMEM;
-
- spin_lock_init(&dev_state->lock);
- init_waitqueue_head(&dev_state->wq);
- dev_state->pdev = pdev;
- dev_state->sbdf = sbdf;
-
- tmp = pasids;
- for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
- dev_state->pasid_levels += 1;
-
- atomic_set(&dev_state->count, 1);
- dev_state->max_pasids = pasids;
-
- ret = -ENOMEM;
- dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
- if (dev_state->states == NULL)
- goto out_free_dev_state;
-
- dev_state->domain = iommu_domain_alloc(&pci_bus_type);
- if (dev_state->domain == NULL)
- goto out_free_states;
-
- /* See iommu_is_default_domain() */
- dev_state->domain->type = IOMMU_DOMAIN_IDENTITY;
- amd_iommu_domain_direct_map(dev_state->domain);
-
- ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
- if (ret)
- goto out_free_domain;
-
- group = iommu_group_get(&pdev->dev);
- if (!group) {
- ret = -EINVAL;
- goto out_free_domain;
- }
-
- ret = iommu_attach_group(dev_state->domain, group);
- if (ret != 0)
- goto out_drop_group;
-
- iommu_group_put(group);
-
- spin_lock_irqsave(&state_lock, flags);
-
- if (__get_device_state(sbdf) != NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- ret = -EBUSY;
- goto out_free_domain;
- }
-
- list_add_tail(&dev_state->list, &state_list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- return 0;
-
-out_drop_group:
- iommu_group_put(group);
-
-out_free_domain:
- iommu_domain_free(dev_state->domain);
-
-out_free_states:
- free_page((unsigned long)dev_state->states);
-
-out_free_dev_state:
- kfree(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_init_device);
-
-void amd_iommu_free_device(struct pci_dev *pdev)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- return;
- }
-
- list_del(&dev_state->list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- put_device_state(dev_state);
- free_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_free_device);
-
-int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ppr_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
-
-int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ctx_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
-
-static int __init amd_iommu_v2_init(void)
-{
- int ret;
-
- if (!amd_iommu_v2_supported()) {
- pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
- /*
- * Load anyway to provide the symbols to other modules
- * which may use AMD IOMMUv2 optionally.
- */
- return 0;
- }
-
- ret = -ENOMEM;
- iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
- if (iommu_wq == NULL)
- goto out;
-
- amd_iommu_register_ppr_notifier(&ppr_nb);
-
- pr_info("AMD IOMMUv2 loaded and initialized\n");
-
- return 0;
-
-out:
- return ret;
-}
-
-static void __exit amd_iommu_v2_exit(void)
-{
- struct device_state *dev_state, *next;
- unsigned long flags;
- LIST_HEAD(freelist);
-
- if (!amd_iommu_v2_supported())
- return;
-
- amd_iommu_unregister_ppr_notifier(&ppr_nb);
-
- flush_workqueue(iommu_wq);
-
- /*
- * The loop below might call flush_workqueue(), so call
- * destroy_workqueue() after it
- */
- spin_lock_irqsave(&state_lock, flags);
-
- list_for_each_entry_safe(dev_state, next, &state_list, list) {
- WARN_ON_ONCE(1);
-
- put_device_state(dev_state);
- list_del(&dev_state->list);
- list_add_tail(&dev_state->list, &freelist);
- }
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- /*
- * Since free_device_state waits on the count to be zero,
- * we need to free dev_state outside the spinlock.
- */
- list_for_each_entry_safe(dev_state, next, &freelist, list) {
- list_del(&dev_state->list);
- free_device_state(dev_state);
- }
-
- destroy_workqueue(iommu_wq);
-}
-
-module_init(amd_iommu_v2_init);
-module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 0b89275084..ee05f4824b 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -196,7 +196,6 @@ struct apple_dart_hw {
* @lock: lock for hardware operations involving this dart
* @pgsize: pagesize supported by this DART
* @supports_bypass: indicates if this DART supports bypass mode
- * @force_bypass: force bypass mode due to pagesize mismatch?
* @sid2group: maps stream ids to iommu_groups
* @iommu: iommu core device
*/
@@ -217,7 +216,6 @@ struct apple_dart {
u32 pgsize;
u32 num_streams;
u32 supports_bypass : 1;
- u32 force_bypass : 1;
struct iommu_group *sid2group[DART_MAX_STREAMS];
struct iommu_device iommu;
@@ -506,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain,
apple_dart_domain_flush_tlb(to_dart_domain(domain));
}
-static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
apple_dart_domain_flush_tlb(to_dart_domain(domain));
+ return 0;
}
static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
@@ -568,15 +567,17 @@ apple_dart_setup_translation(struct apple_dart_domain *domain,
stream_map->dart->hw->invalidate_tlb(stream_map);
}
-static int apple_dart_finalize_domain(struct iommu_domain *domain,
+static int apple_dart_finalize_domain(struct apple_dart_domain *dart_domain,
struct apple_dart_master_cfg *cfg)
{
- struct apple_dart_domain *dart_domain = to_dart_domain(domain);
struct apple_dart *dart = cfg->stream_maps[0].dart;
struct io_pgtable_cfg pgtbl_cfg;
int ret = 0;
int i, j;
+ if (dart->pgsize > PAGE_SIZE)
+ return -EINVAL;
+
mutex_lock(&dart_domain->init_lock);
if (dart_domain->finalized)
@@ -597,17 +598,18 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
.iommu_dev = dart->dev,
};
- dart_domain->pgtbl_ops =
- alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, domain);
+ dart_domain->pgtbl_ops = alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg,
+ &dart_domain->domain);
if (!dart_domain->pgtbl_ops) {
ret = -ENOMEM;
goto done;
}
- domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias);
- domain->geometry.force_aperture = true;
+ dart_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+ dart_domain->domain.geometry.aperture_start = 0;
+ dart_domain->domain.geometry.aperture_end =
+ (dma_addr_t)DMA_BIT_MASK(dart->ias);
+ dart_domain->domain.geometry.force_aperture = true;
dart_domain->finalized = true;
@@ -651,47 +653,72 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
true);
}
-static int apple_dart_attach_dev(struct iommu_domain *domain,
- struct device *dev)
+static int apple_dart_attach_dev_paging(struct iommu_domain *domain,
+ struct device *dev)
{
int ret, i;
struct apple_dart_stream_map *stream_map;
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
struct apple_dart_domain *dart_domain = to_dart_domain(domain);
- if (cfg->stream_maps[0].dart->force_bypass &&
- domain->type != IOMMU_DOMAIN_IDENTITY)
- return -EINVAL;
- if (!cfg->stream_maps[0].dart->supports_bypass &&
- domain->type == IOMMU_DOMAIN_IDENTITY)
- return -EINVAL;
+ ret = apple_dart_finalize_domain(dart_domain, cfg);
+ if (ret)
+ return ret;
- ret = apple_dart_finalize_domain(domain, cfg);
+ ret = apple_dart_domain_add_streams(dart_domain, cfg);
if (ret)
return ret;
- switch (domain->type) {
- default:
- ret = apple_dart_domain_add_streams(dart_domain, cfg);
- if (ret)
- return ret;
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_setup_translation(dart_domain, stream_map);
+ return 0;
+}
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_setup_translation(dart_domain, stream_map);
- break;
- case IOMMU_DOMAIN_BLOCKED:
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_hw_disable_dma(stream_map);
- break;
- case IOMMU_DOMAIN_IDENTITY:
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_hw_enable_bypass(stream_map);
- break;
- }
+static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ struct apple_dart_stream_map *stream_map;
+ int i;
- return ret;
+ if (!cfg->stream_maps[0].dart->supports_bypass)
+ return -EINVAL;
+
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_hw_enable_bypass(stream_map);
+ return 0;
}
+static const struct iommu_domain_ops apple_dart_identity_ops = {
+ .attach_dev = apple_dart_attach_dev_identity,
+};
+
+static struct iommu_domain apple_dart_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &apple_dart_identity_ops,
+};
+
+static int apple_dart_attach_dev_blocked(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ struct apple_dart_stream_map *stream_map;
+ int i;
+
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_hw_disable_dma(stream_map);
+ return 0;
+}
+
+static const struct iommu_domain_ops apple_dart_blocked_ops = {
+ .attach_dev = apple_dart_attach_dev_blocked,
+};
+
+static struct iommu_domain apple_dart_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &apple_dart_blocked_ops,
+};
+
static struct iommu_device *apple_dart_probe_device(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -717,24 +744,26 @@ static void apple_dart_release_device(struct device *dev)
kfree(cfg);
}
-static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+static struct iommu_domain *apple_dart_domain_alloc_paging(struct device *dev)
{
struct apple_dart_domain *dart_domain;
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
- type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
- return NULL;
-
dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL);
if (!dart_domain)
return NULL;
mutex_init(&dart_domain->init_lock);
- /* no need to allocate pgtbl_ops or do any other finalization steps */
- if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED)
- dart_domain->finalized = true;
+ if (dev) {
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ int ret;
+ ret = apple_dart_finalize_domain(dart_domain, cfg);
+ if (ret) {
+ kfree(dart_domain);
+ return ERR_PTR(ret);
+ }
+ }
return &dart_domain->domain;
}
@@ -770,8 +799,6 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
if (cfg_dart) {
if (cfg_dart->supports_bypass != dart->supports_bypass)
return -EINVAL;
- if (cfg_dart->force_bypass != dart->force_bypass)
- return -EINVAL;
if (cfg_dart->pgsize != dart->pgsize)
return -EINVAL;
}
@@ -913,7 +940,7 @@ static int apple_dart_def_domain_type(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
- if (cfg->stream_maps[0].dart->force_bypass)
+ if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE)
return IOMMU_DOMAIN_IDENTITY;
if (!cfg->stream_maps[0].dart->supports_bypass)
return IOMMU_DOMAIN_DMA;
@@ -947,7 +974,9 @@ static void apple_dart_get_resv_regions(struct device *dev,
}
static const struct iommu_ops apple_dart_iommu_ops = {
- .domain_alloc = apple_dart_domain_alloc,
+ .identity_domain = &apple_dart_identity_domain,
+ .blocked_domain = &apple_dart_blocked_domain,
+ .domain_alloc_paging = apple_dart_domain_alloc_paging,
.probe_device = apple_dart_probe_device,
.release_device = apple_dart_release_device,
.device_group = apple_dart_device_group,
@@ -957,7 +986,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.pgsize_bitmap = -1UL, /* Restricted during dart probe */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = apple_dart_attach_dev,
+ .attach_dev = apple_dart_attach_dev_paging,
.map_pages = apple_dart_map_pages,
.unmap_pages = apple_dart_unmap_pages,
.flush_iotlb_all = apple_dart_flush_iotlb_all,
@@ -1111,8 +1140,6 @@ static int apple_dart_probe(struct platform_device *pdev)
goto err_clk_disable;
}
- dart->force_bypass = dart->pgsize > PAGE_SIZE;
-
ret = apple_dart_hw_reset(dart);
if (ret)
goto err_clk_disable;
@@ -1136,7 +1163,8 @@ static int apple_dart_probe(struct platform_device *pdev)
dev_info(
&pdev->dev,
"DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n",
- dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass);
+ dart->pgsize, dart->num_streams, dart->supports_bypass,
+ dart->pgsize > PAGE_SIZE);
return 0;
err_sysfs_remove:
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 8a16cd3ef4..4a27fbdb2d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier {
#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn)
struct arm_smmu_bond {
- struct iommu_sva sva;
struct mm_struct *mm;
struct arm_smmu_mmu_notifier *smmu_mn;
struct list_head list;
- refcount_t refs;
};
#define sva_to_bond(handle) \
@@ -38,6 +36,25 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
/*
+ * Write the CD to the CD tables for all masters that this domain is attached
+ * to. Note that this is only used to update existing CD entries in the target
+ * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
+ */
+static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
+ int ssid,
+ struct arm_smmu_ctx_desc *cd)
+{
+ struct arm_smmu_master *master;
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ arm_smmu_write_ctx_desc(master, ssid, cd);
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+}
+
+/*
* Check if the CPU ASID is available on the SMMU side. If a private context
* descriptor is using it, try to replace it.
*/
@@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return cd;
}
- smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+ smmu_domain = container_of(cd, struct arm_smmu_domain, cd);
smmu = smmu_domain->smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
@@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
- arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
@@ -229,7 +246,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
smmu_domain);
}
- arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size);
+ arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start,
+ size);
}
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -247,10 +265,11 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
* but disable translation.
*/
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm),
+ &quiet_cd);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
- arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
+ arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0);
smmu_mn->cleared = true;
mutex_unlock(&sva_lock);
@@ -304,16 +323,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
goto err_free_cd;
}
- ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
- if (ret)
- goto err_put_notifier;
-
list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
return smmu_mn;
-err_put_notifier:
- /* Frees smmu_mn */
- mmu_notifier_put(&smmu_mn->mn);
err_free_cd:
arm_smmu_free_shared_cd(cd);
return ERR_PTR(ret);
@@ -329,7 +341,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
return;
list_del(&smmu_mn->list);
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
/*
* If we went through clear(), we've already invalidated, and no
@@ -337,7 +348,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
*/
if (!smmu_mn->cleared) {
arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid);
- arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
+ arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0,
+ 0);
}
/* Frees smmu_mn */
@@ -345,8 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
arm_smmu_free_shared_cd(cd);
}
-static struct iommu_sva *
-__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
+ struct mm_struct *mm)
{
int ret;
struct arm_smmu_bond *bond;
@@ -355,23 +367,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (!master || !master->sva_enabled)
- return ERR_PTR(-ENODEV);
-
- /* If bind() was already called for this {dev, mm} pair, reuse it. */
- list_for_each_entry(bond, &master->bonds, list) {
- if (bond->mm == mm) {
- refcount_inc(&bond->refs);
- return &bond->sva;
- }
- }
+ return -ENODEV;
bond = kzalloc(sizeof(*bond), GFP_KERNEL);
if (!bond)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
bond->mm = mm;
- bond->sva.dev = dev;
- refcount_set(&bond->refs, 1);
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
@@ -379,12 +381,18 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
goto err_free_bond;
}
+ ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd);
+ if (ret)
+ goto err_put_notifier;
+
list_add(&bond->list, &master->bonds);
- return &bond->sva;
+ return 0;
+err_put_notifier:
+ arm_smmu_mmu_notifier_put(bond->smmu_mn);
err_free_bond:
kfree(bond);
- return ERR_PTR(ret);
+ return ret;
}
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
@@ -543,6 +551,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
mutex_lock(&sva_lock);
+
+ arm_smmu_write_ctx_desc(master, id, NULL);
+
list_for_each_entry(t, &master->bonds, list) {
if (t->mm == mm) {
bond = t;
@@ -550,7 +561,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
}
}
- if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+ if (!WARN_ON(!bond)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
kfree(bond);
@@ -562,13 +573,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
{
int ret = 0;
- struct iommu_sva *handle;
struct mm_struct *mm = domain->mm;
mutex_lock(&sva_lock);
- handle = __arm_smmu_sva_bind(dev, mm);
- if (IS_ERR(handle))
- ret = PTR_ERR(handle);
+ ret = __arm_smmu_sva_bind(dev, id, mm);
mutex_unlock(&sva_lock);
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd0a596f98..7445454c2a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
-static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
size_t i;
- unsigned long flags;
- struct arm_smmu_master *master;
struct arm_smmu_cmdq_batch cmds;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_CD,
.cfgi = {
@@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
};
cmds.num = 0;
-
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head) {
- for (i = 0; i < master->num_streams; i++) {
- cmd.cfgi.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
- }
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.cfgi.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
WRITE_ONCE(*dst, cpu_to_le64(val));
}
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
- u32 ssid)
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
{
__le64 *l1ptr;
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
- return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
- l1_desc = &cdcfg->l1_desc[idx];
+ l1_desc = &cd_table->l1_desc[idx];
if (!l1_desc->l2ptr) {
if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
return NULL;
- l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+ l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
- arm_smmu_sync_cd(smmu_domain, ssid, false);
+ arm_smmu_sync_cd(master, ssid, false);
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
}
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
struct arm_smmu_ctx_desc *cd)
{
/*
@@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
return -E2BIG;
- cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+ cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (!cdptr)
return -ENOMEM;
@@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
cdptr[3] = cpu_to_le64(cd->mair);
/*
- * STE is live, and the SMMU might read dwords of this CD in any
+ * STE may be live, and the SMMU might read dwords of this CD in any
* order. Ensure that it observes valid values before reading
* V=1.
*/
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
val = cd->tcr |
#ifdef __BIG_ENDIAN
@@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
- if (smmu_domain->stall_enabled)
+ if (cd_table->stall_enabled)
val |= CTXDESC_CD_0_S;
}
@@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
* without first making the structure invalid.
*/
WRITE_ONCE(cdptr[0], cpu_to_le64(val));
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
return 0;
}
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
{
int ret;
size_t l1size;
size_t max_contexts;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- max_contexts = 1 << cfg->s1cdmax;
+ cd_table->stall_enabled = master->stall_enabled;
+ cd_table->s1cdmax = master->ssid_bits;
+ max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
- cdcfg->num_l1_ents = max_contexts;
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+ cd_table->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
- cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+ cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
- cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
- sizeof(*cdcfg->l1_desc),
+ cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
+ sizeof(*cd_table->l1_desc),
GFP_KERNEL);
- if (!cdcfg->l1_desc)
+ if (!cd_table->l1_desc)
return -ENOMEM;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
}
- cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
+ cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
GFP_KERNEL);
- if (!cdcfg->cdtab) {
+ if (!cd_table->cdtab) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
ret = -ENOMEM;
goto err_free_l1;
@@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
return 0;
err_free_l1:
- if (cdcfg->l1_desc) {
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ if (cd_table->l1_desc) {
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
}
return ret;
}
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
size_t size, l1size;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (cdcfg->l1_desc) {
+ if (cd_table->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
- for (i = 0; i < cdcfg->num_l1_ents; i++) {
- if (!cdcfg->l1_desc[i].l2ptr)
+ for (i = 0; i < cd_table->num_l1_ents; i++) {
+ if (!cd_table->l1_desc[i].l2ptr)
continue;
dmam_free_coherent(smmu->dev, size,
- cdcfg->l1_desc[i].l2ptr,
- cdcfg->l1_desc[i].l2ptr_dma);
+ cd_table->l1_desc[i].l2ptr,
+ cd_table->l1_desc[i].l2ptr_dma);
}
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
} else {
- l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
}
- dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
- cdcfg->cdtab_dma = 0;
- cdcfg->cdtab = NULL;
+ dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
+ cd_table->cdtab_dma = 0;
+ cd_table->cdtab = NULL;
}
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
@@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
- struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
- s1_cfg = &smmu_domain->s1_cfg;
+ cd_table = &master->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
@@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
- if (s1_cfg) {
+ if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
@@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
}
if (s2_cfg) {
@@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
+ arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ cmd.tlbi.asid = smmu_domain->cd.asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
- /* Free the CD and ASID, if we allocated them */
+ /* Free the ASID or VMID */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- if (cfg->cdcfg.cdtab)
- arm_smmu_free_cd_tables(smmu_domain);
- arm_smmu_free_asid(&cfg->cd);
+ arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- refcount_set(&cfg->cd.refs, 1);
+ refcount_set(&cd->refs, 1);
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
- ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
if (ret)
goto out_unlock;
- cfg->s1cdmax = master->ssid_bits;
-
- smmu_domain->stall_enabled = master->stall_enabled;
-
- ret = arm_smmu_alloc_cd_tables(smmu_domain);
- if (ret)
- goto out_free_asid;
-
- cfg->cd.asid = (u16)asid;
- cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
- cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ cd->asid = (u16)asid;
+ cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+ cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
- cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
-
- /*
- * Note that this will end up calling arm_smmu_sync_cd() before
- * the master has been added to the devices list for this domain.
- * This isn't an issue because the STE hasn't been installed yet.
- */
- ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
- if (ret)
- goto out_free_cd_tables;
+ cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
mutex_unlock(&arm_smmu_asid_lock);
return 0;
-out_free_cd_tables:
- arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
- arm_smmu_free_asid(&cfg->cd);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
@@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
return 0;
}
-static int arm_smmu_domain_finalise(struct iommu_domain *domain,
- struct arm_smmu_master *master)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain)
{
int ret;
unsigned long ias, oas;
@@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_domain *,
- struct arm_smmu_master *,
struct io_pgtable_cfg *);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
- ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
@@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
master->domain = NULL;
master->ats_enabled = false;
arm_smmu_install_ste_for_dev(master);
+ /*
+ * Clearing the CD entry isn't strictly required to detach the domain
+ * since the table is uninstalled anyway, but it helps avoid confusion
+ * in the call to arm_smmu_write_ctx_desc on the next attach (which
+ * expects the entry to be empty).
+ */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
+ arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (!smmu_domain->smmu) {
smmu_domain->smmu = smmu;
- ret = arm_smmu_domain_finalise(domain, master);
- if (ret) {
+ ret = arm_smmu_domain_finalise(domain);
+ if (ret)
smmu_domain->smmu = NULL;
- goto out_unlock;
- }
- } else if (smmu_domain->smmu != smmu) {
- ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ } else if (smmu_domain->smmu != smmu)
ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- smmu_domain->stall_enabled != master->stall_enabled) {
- ret = -EINVAL;
- goto out_unlock;
- }
+
+ mutex_unlock(&smmu_domain->init_mutex);
+ if (ret)
+ return ret;
master->domain = smmu_domain;
@@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
- arm_smmu_install_ste_for_dev(master);
-
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ if (!master->cd_table.cdtab) {
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ /*
+ * Prevent SVA from concurrently modifying the CD or writing to
+ * the CD entry
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
+ mutex_unlock(&arm_smmu_asid_lock);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ arm_smmu_install_ste_for_dev(master);
+
arm_smmu_enable_ats(master);
+ return 0;
+
+out_list_del:
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_del(&master->domain_head);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-out_unlock:
- mutex_unlock(&smmu_domain->init_mutex);
return ret;
}
@@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev)
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
+ if (master->cd_table.cdtab)
+ arm_smmu_free_cd_tables(master);
kfree(master);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9915850dd4..961205ba86 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
- struct arm_smmu_ctx_desc_cfg cdcfg;
- struct arm_smmu_ctx_desc cd;
u8 s1fmt;
+ /* log2 of the maximum number of CDs supported by this table */
u8 s1cdmax;
+ /* Whether CD entries in this table have the stall bit set. */
+ u8 stall_enabled:1;
};
struct arm_smmu_s2_cfg {
@@ -697,6 +695,8 @@ struct arm_smmu_master {
struct arm_smmu_domain *domain;
struct list_head domain_head;
struct arm_smmu_stream *streams;
+ /* Locked by the iommu core using the group mutex */
+ struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
bool ats_enabled;
bool stall_enabled;
@@ -719,13 +719,12 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
- bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
- struct arm_smmu_s1_cfg s1_cfg;
- struct arm_smmu_s2_cfg s2_cfg;
+ struct arm_smmu_ctx_desc cd;
+ struct arm_smmu_s2_cfg s2_cfg;
};
struct iommu_domain domain;
@@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;
extern struct arm_smmu_ctx_desc quiet_cd;
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 40503376d8..d326fa230b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -252,6 +252,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sc8280xp-mdss" },
+ { .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ .compatible = "qcom,sm6350-mdss" },
@@ -533,6 +534,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
+ { .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 775a3cbaff..97b2122032 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -332,12 +332,10 @@ out_unlock:
return ret;
}
-static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev)
{
struct qcom_iommu_domain *qcom_domain;
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
/*
* Allocate the domain and initialise some of its data structures.
* We can't really do anything meaningful until we've added a
@@ -400,6 +398,44 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
return 0;
}
+static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct qcom_iommu_domain *qcom_domain;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
+ unsigned int i;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ qcom_domain = to_qcom_iommu_domain(domain);
+ if (WARN_ON(!qcom_domain->iommu))
+ return -EINVAL;
+
+ pm_runtime_get_sync(qcom_iommu->dev);
+ for (i = 0; i < fwspec->num_ids; i++) {
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
+
+ /* Disable the context bank: */
+ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+ ctx->domain = NULL;
+ }
+ pm_runtime_put_sync(qcom_iommu->dev);
+ return 0;
+}
+
+static struct iommu_domain_ops qcom_iommu_identity_ops = {
+ .attach_dev = qcom_iommu_identity_attach,
+};
+
+static struct iommu_domain qcom_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &qcom_iommu_identity_ops,
+};
+
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -565,8 +601,9 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
}
static const struct iommu_ops qcom_iommu_ops = {
+ .identity_domain = &qcom_iommu_identity_domain,
.capable = qcom_iommu_capable,
- .domain_alloc = qcom_iommu_domain_alloc,
+ .domain_alloc_paging = qcom_iommu_domain_alloc_paging,
.probe_device = qcom_iommu_probe_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e5d087bd6d..037fcf8264 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -44,14 +44,28 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
};
+enum iommu_dma_queue_type {
+ IOMMU_DMA_OPTS_PER_CPU_QUEUE,
+ IOMMU_DMA_OPTS_SINGLE_QUEUE,
+};
+
+struct iommu_dma_options {
+ enum iommu_dma_queue_type qt;
+ size_t fq_size;
+ unsigned int fq_timeout;
+};
+
struct iommu_dma_cookie {
enum iommu_dma_cookie_type type;
union {
/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
struct {
struct iova_domain iovad;
-
- struct iova_fq __percpu *fq; /* Flush queue */
+ /* Flush queue */
+ union {
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
+ };
/* Number of TLB flushes that have been started */
atomic64_t fq_flush_start_cnt;
/* Number of TLB flushes that have been finished */
@@ -68,6 +82,8 @@ struct iommu_dma_cookie {
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
+ /* Options for dma-iommu use */
+ struct iommu_dma_options options;
struct mutex mutex;
};
@@ -85,10 +101,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
/* Number of entries per flush queue */
-#define IOVA_FQ_SIZE 256
+#define IOVA_DEFAULT_FQ_SIZE 256
+#define IOVA_SINGLE_FQ_SIZE 32768
/* Timeout (in ms) after which entries are flushed from the queue */
-#define IOVA_FQ_TIMEOUT 10
+#define IOVA_DEFAULT_FQ_TIMEOUT 10
+#define IOVA_SINGLE_FQ_TIMEOUT 1000
/* Flush queue entry for deferred flushing */
struct iova_fq_entry {
@@ -100,18 +118,19 @@ struct iova_fq_entry {
/* Per-CPU flush queue structure */
struct iova_fq {
- struct iova_fq_entry entries[IOVA_FQ_SIZE];
- unsigned int head, tail;
spinlock_t lock;
+ unsigned int head, tail;
+ unsigned int mod_mask;
+ struct iova_fq_entry entries[];
};
#define fq_ring_for_each(i, fq) \
- for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
static inline bool fq_full(struct iova_fq *fq)
{
assert_spin_locked(&fq->lock);
- return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+ return (((fq->tail + 1) & fq->mod_mask) == fq->head);
}
static inline unsigned int fq_ring_add(struct iova_fq *fq)
@@ -120,12 +139,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
assert_spin_locked(&fq->lock);
- fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+ fq->tail = (idx + 1) & fq->mod_mask;
return idx;
}
-static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
{
u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
unsigned int idx;
@@ -142,10 +161,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
fq->entries[idx].iova_pfn,
fq->entries[idx].pages);
- fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
+ fq->head = (fq->head + 1) & fq->mod_mask;
}
}
+static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&fq->lock, flags);
+ fq_ring_free_locked(cookie, fq);
+ spin_unlock_irqrestore(&fq->lock, flags);
+}
+
static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
{
atomic64_inc(&cookie->fq_flush_start_cnt);
@@ -161,14 +189,11 @@ static void fq_flush_timeout(struct timer_list *t)
atomic_set(&cookie->fq_timer_on, 0);
fq_flush_iotlb(cookie);
- for_each_possible_cpu(cpu) {
- unsigned long flags;
- struct iova_fq *fq;
-
- fq = per_cpu_ptr(cookie->fq, cpu);
- spin_lock_irqsave(&fq->lock, flags);
- fq_ring_free(cookie, fq);
- spin_unlock_irqrestore(&fq->lock, flags);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
+ fq_ring_free(cookie, cookie->single_fq);
+ } else {
+ for_each_possible_cpu(cpu)
+ fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
}
}
@@ -189,7 +214,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
*/
smp_mb();
- fq = raw_cpu_ptr(cookie->fq);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ fq = cookie->single_fq;
+ else
+ fq = raw_cpu_ptr(cookie->percpu_fq);
+
spin_lock_irqsave(&fq->lock, flags);
/*
@@ -197,11 +226,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
* flushed out on another CPU. This makes the fq_full() check below less
* likely to be true.
*/
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
if (fq_full(fq)) {
fq_flush_iotlb(cookie);
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
}
idx = fq_ring_add(fq);
@@ -217,34 +246,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
if (!atomic_read(&cookie->fq_timer_on) &&
!atomic_xchg(&cookie->fq_timer_on, 1))
mod_timer(&cookie->fq_timer,
- jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+ jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
}
-static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+static void iommu_dma_free_fq_single(struct iova_fq *fq)
{
- int cpu, idx;
+ int idx;
- if (!cookie->fq)
- return;
+ fq_ring_for_each(idx, fq)
+ put_pages_list(&fq->entries[idx].freelist);
+ vfree(fq);
+}
+
+static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
+{
+ int cpu, idx;
- del_timer_sync(&cookie->fq_timer);
/* The IOVAs will be torn down separately, so just free our queued pages */
for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
+ struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
fq_ring_for_each(idx, fq)
put_pages_list(&fq->entries[idx].freelist);
}
- free_percpu(cookie->fq);
+ free_percpu(percpu_fq);
+}
+
+static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+{
+ if (!cookie->fq_domain)
+ return;
+
+ del_timer_sync(&cookie->fq_timer);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ iommu_dma_free_fq_single(cookie->single_fq);
+ else
+ iommu_dma_free_fq_percpu(cookie->percpu_fq);
+}
+
+static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
+{
+ int i;
+
+ fq->head = 0;
+ fq->tail = 0;
+ fq->mod_mask = fq_size - 1;
+
+ spin_lock_init(&fq->lock);
+
+ for (i = 0; i < fq_size; i++)
+ INIT_LIST_HEAD(&fq->entries[i].freelist);
+}
+
+static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq *queue;
+
+ queue = vmalloc(struct_size(queue, entries, fq_size));
+ if (!queue)
+ return -ENOMEM;
+ iommu_dma_init_one_fq(queue, fq_size);
+ cookie->single_fq = queue;
+
+ return 0;
+}
+
+static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq __percpu *queue;
+ int cpu;
+
+ queue = __alloc_percpu(struct_size(queue, entries, fq_size),
+ __alignof__(*queue));
+ if (!queue)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
+ cookie->percpu_fq = queue;
+ return 0;
}
/* sysfs updates are serialised by the mutex of the group owning @domain */
int iommu_dma_init_fq(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
- struct iova_fq __percpu *queue;
- int i, cpu;
+ int rc;
if (cookie->fq_domain)
return 0;
@@ -252,26 +342,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
atomic64_set(&cookie->fq_flush_start_cnt, 0);
atomic64_set(&cookie->fq_flush_finish_cnt, 0);
- queue = alloc_percpu(struct iova_fq);
- if (!queue) {
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ rc = iommu_dma_init_fq_single(cookie);
+ else
+ rc = iommu_dma_init_fq_percpu(cookie);
+
+ if (rc) {
pr_warn("iova flush queue initialization failed\n");
return -ENOMEM;
}
- for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(queue, cpu);
-
- fq->head = 0;
- fq->tail = 0;
-
- spin_lock_init(&fq->lock);
-
- for (i = 0; i < IOVA_FQ_SIZE; i++)
- INIT_LIST_HEAD(&fq->entries[i].freelist);
- }
-
- cookie->fq = queue;
-
timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
atomic_set(&cookie->fq_timer_on, 0);
/*
@@ -556,6 +636,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
}
/**
+ * iommu_dma_init_options - Initialize dma-iommu options
+ * @options: The options to be initialized
+ * @dev: Device the options are set for
+ *
+ * This allows tuning dma-iommu specific to device properties
+ */
+static void iommu_dma_init_options(struct iommu_dma_options *options,
+ struct device *dev)
+{
+ /* Shadowing IOTLB flushes do better with a single large queue */
+ if (dev->iommu->shadow_on_flush) {
+ options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
+ options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
+ options->fq_size = IOVA_SINGLE_FQ_SIZE;
+ } else {
+ options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
+ options->fq_size = IOVA_DEFAULT_FQ_SIZE;
+ options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
+ }
+}
+
+/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
@@ -615,6 +717,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (ret)
goto done_unlock;
+ iommu_dma_init_options(&cookie->options, dev);
+
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c275fe71c4..2c6e9094f1 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -24,6 +24,7 @@
typedef u32 sysmmu_iova_t;
typedef u32 sysmmu_pte_t;
+static struct iommu_domain exynos_identity_domain;
/* We do not consider super section mapping (16MB) */
#define SECT_ORDER 20
@@ -829,7 +830,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (data->domain) {
+ if (&data->domain->domain != &exynos_identity_domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -847,7 +848,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (data->domain) {
+ if (&data->domain->domain != &exynos_identity_domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
@@ -886,7 +887,7 @@ static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
DMA_TO_DEVICE);
}
-static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
{
struct exynos_iommu_domain *domain;
dma_addr_t handle;
@@ -895,9 +896,6 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
/* Check if correct PTE offsets are initialized */
BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
@@ -980,17 +978,20 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
kfree(domain);
}
-static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
- struct device *dev)
+static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
- struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
- phys_addr_t pagetable = virt_to_phys(domain->pgtable);
+ struct exynos_iommu_domain *domain;
+ phys_addr_t pagetable;
struct sysmmu_drvdata *data, *next;
unsigned long flags;
- if (!has_sysmmu(dev) || owner->domain != iommu_domain)
- return;
+ if (owner->domain == identity_domain)
+ return 0;
+
+ domain = to_exynos_domain(owner->domain);
+ pagetable = virt_to_phys(domain->pgtable);
mutex_lock(&owner->rpm_lock);
@@ -1009,15 +1010,25 @@ static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
list_del_init(&data->domain_node);
spin_unlock(&data->lock);
}
- owner->domain = NULL;
+ owner->domain = identity_domain;
spin_unlock_irqrestore(&domain->lock, flags);
mutex_unlock(&owner->rpm_lock);
- dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", __func__,
- &pagetable);
+ dev_dbg(dev, "%s: Restored IOMMU to IDENTITY from pgtable %pa\n",
+ __func__, &pagetable);
+ return 0;
}
+static struct iommu_domain_ops exynos_identity_ops = {
+ .attach_dev = exynos_iommu_identity_attach,
+};
+
+static struct iommu_domain exynos_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &exynos_identity_ops,
+};
+
static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
struct device *dev)
{
@@ -1026,12 +1037,11 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
struct sysmmu_drvdata *data;
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
unsigned long flags;
+ int err;
- if (!has_sysmmu(dev))
- return -ENODEV;
-
- if (owner->domain)
- exynos_iommu_detach_device(owner->domain, dev);
+ err = exynos_iommu_identity_attach(&exynos_identity_domain, dev);
+ if (err)
+ return err;
mutex_lock(&owner->rpm_lock);
@@ -1219,7 +1229,7 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
*/
static int exynos_iommu_map(struct iommu_domain *iommu_domain,
unsigned long l_iova, phys_addr_t paddr, size_t size,
- int prot, gfp_t gfp)
+ size_t count, int prot, gfp_t gfp, size_t *mapped)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_pte_t *entry;
@@ -1253,6 +1263,8 @@ static int exynos_iommu_map(struct iommu_domain *iommu_domain,
if (ret)
pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n",
__func__, ret, size, iova);
+ else
+ *mapped = size;
spin_unlock_irqrestore(&domain->pgtablelock, flags);
@@ -1274,7 +1286,7 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
}
static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
- unsigned long l_iova, size_t size,
+ unsigned long l_iova, size_t size, size_t count,
struct iommu_iotlb_gather *gather)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
@@ -1407,26 +1419,12 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
return &data->iommu;
}
-static void exynos_iommu_set_platform_dma(struct device *dev)
-{
- struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
-
- if (owner->domain) {
- struct iommu_group *group = iommu_group_get(dev);
-
- if (group) {
- exynos_iommu_detach_device(owner->domain, dev);
- iommu_group_put(group);
- }
- }
-}
-
static void exynos_iommu_release_device(struct device *dev)
{
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
- exynos_iommu_set_platform_dma(dev);
+ WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev));
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1457,6 +1455,7 @@ static int exynos_iommu_of_xlate(struct device *dev,
INIT_LIST_HEAD(&owner->controllers);
mutex_init(&owner->rpm_lock);
+ owner->domain = &exynos_identity_domain;
dev_iommu_priv_set(dev, owner);
}
@@ -1471,19 +1470,17 @@ static int exynos_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops exynos_iommu_ops = {
- .domain_alloc = exynos_iommu_domain_alloc,
+ .identity_domain = &exynos_identity_domain,
+ .domain_alloc_paging = exynos_iommu_domain_alloc_paging,
.device_group = generic_device_group,
-#ifdef CONFIG_ARM
- .set_platform_dma_ops = exynos_iommu_set_platform_dma,
-#endif
.probe_device = exynos_iommu_probe_device,
.release_device = exynos_iommu_release_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
.of_xlate = exynos_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = exynos_iommu_attach_device,
- .map = exynos_iommu_map,
- .unmap = exynos_iommu_unmap,
+ .map_pages = exynos_iommu_map,
+ .unmap_pages = exynos_iommu_unmap,
.iova_to_phys = exynos_iommu_iova_to_phys,
.free = exynos_iommu_domain_free,
}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 4ac0e247ec..e9d2bff465 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -196,6 +196,13 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type)
{
struct fsl_dma_domain *dma_domain;
+ /*
+ * FIXME: This isn't creating an unmanaged domain since the
+ * default_domain_ops do not have any map/unmap function it doesn't meet
+ * the requirements for __IOMMU_DOMAIN_PAGING. The only purpose seems to
+ * allow drivers/soc/fsl/qbman/qman_portal.c to do
+ * fsl_pamu_configure_l1_stash()
+ */
if (type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
@@ -283,16 +290,34 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
return ret;
}
-static void fsl_pamu_set_platform_dma(struct device *dev)
+/*
+ * FIXME: fsl/pamu is completely broken in terms of how it works with the iommu
+ * API. Immediately after probe the HW is left in an IDENTITY translation and
+ * the driver provides a non-working UNMANAGED domain that it can switch over
+ * to. However it cannot switch back to an IDENTITY translation, instead it
+ * switches to what looks like BLOCKING.
+ */
+static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+ struct fsl_dma_domain *dma_domain;
const u32 *prop;
int len;
struct pci_dev *pdev = NULL;
struct pci_controller *pci_ctl;
/*
+ * Hack to keep things working as they always have, only leaving an
+ * UNMANAGED domain makes it BLOCKING.
+ */
+ if (domain == platform_domain || !domain ||
+ domain->type != IOMMU_DOMAIN_UNMANAGED)
+ return 0;
+
+ dma_domain = to_fsl_dma_domain(domain);
+
+ /*
* Use LIODN of the PCI controller while detaching a
* PCI device.
*/
@@ -312,8 +337,18 @@ static void fsl_pamu_set_platform_dma(struct device *dev)
detach_device(dev, dma_domain);
else
pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
+ return 0;
}
+static struct iommu_domain_ops fsl_pamu_platform_ops = {
+ .attach_dev = fsl_pamu_platform_attach,
+};
+
+static struct iommu_domain fsl_pamu_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &fsl_pamu_platform_ops,
+};
+
/* Set the domain stash attribute */
int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
{
@@ -395,11 +430,11 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
}
static const struct iommu_ops fsl_pamu_ops = {
+ .default_domain = &fsl_pamu_platform_domain,
.capable = fsl_pamu_capable,
.domain_alloc = fsl_pamu_domain_alloc,
.probe_device = fsl_pamu_probe_device,
.device_group = fsl_pamu_device_group,
- .set_platform_dma_ops = fsl_pamu_set_platform_dma,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = fsl_pamu_attach_device,
.iova_to_phys = fsl_pamu_iova_to_phys,
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f5..012cd2541a 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -11,10 +11,11 @@ config DMAR_DEBUG
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
- depends on PCI_MSI && ACPI && (X86 || IA64)
+ depends on PCI_MSI && ACPI && X86
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
+ select IOMMUFD_DRIVER if IOMMUFD
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 7af3b8a4f2..5dabf081a7 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f92528510..dee61e513b 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = {
IOMMU_REGSET_ENTRY(VCRSP),
};
+static struct dentry *intel_iommu_debug;
+
static int iommu_regset_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
@@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level)
static inline void
dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
{
- seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
- iova >> VTD_PAGE_SHIFT, path[5], path[4],
- path[3], path[2], path[1]);
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]);
+ if (path[2]) {
+ seq_printf(m, "\t0x%016llx", path[2]);
+ if (path[1])
+ seq_printf(m, "\t0x%016llx", path[1]);
+ }
+ seq_putc(m, '\n');
}
static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
@@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int __show_device_domain_translation(struct device *dev, void *data)
+static int domain_translation_struct_show(struct seq_file *m,
+ struct device_domain_info *info,
+ ioasid_t pasid)
{
- struct dmar_domain *domain;
- struct seq_file *m = data;
- u64 path[6] = { 0 };
-
- domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
- if (!domain)
- return 0;
+ bool scalable, found = false;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u16 devfn, bus, seg;
- seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
- (u64)virt_to_phys(domain->pgd));
- seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+ bus = info->bus;
+ devfn = info->devfn;
+ seg = info->segment;
- pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
- seq_putc(m, '\n');
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct context_entry *context;
+ u64 pgd, path[6] = { 0 };
+ u32 sts, agaw;
- /* Don't iterate */
- return 1;
-}
+ if (seg != iommu->segment)
+ continue;
-static int show_device_domain_translation(struct device *dev, void *data)
-{
- struct iommu_group *group;
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+ scalable = true;
+ else
+ scalable = false;
- group = iommu_group_get(dev);
- if (group) {
/*
- * The group->mutex is held across the callback, which will
- * block calls to iommu_attach/detach_group/device. Hence,
+ * The iommu->lock is held across the callback, which will
+ * block calls to domain_attach/domain_detach. Hence,
* the domain of the device will not change during traversal.
*
- * All devices in an iommu group share a single domain, hence
- * we only dump the domain of the first device. Even though,
- * this code still possibly races with the iommu_unmap()
+ * Traversing page table possibly races with the iommu_unmap()
* interface. This could be solved by RCU-freeing the page
* table pages in the iommu_unmap() path.
*/
- iommu_group_for_each_dev(group, data,
- __show_device_domain_translation);
- iommu_group_put(group);
+ spin_lock(&iommu->lock);
+
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context || !context_present(context))
+ goto iommu_unlock;
+
+ if (scalable) { /* scalable mode */
+ struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+ struct pasid_dir_entry *dir_tbl, *dir_entry;
+ u16 dir_idx, tbl_idx, pgtt;
+ u64 pasid_dir_ptr;
+
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+
+ /* Dump specified device domain mappings with PASID. */
+ dir_idx = pasid >> PASID_PDE_SHIFT;
+ tbl_idx = pasid & PASID_PTE_MASK;
+
+ dir_tbl = phys_to_virt(pasid_dir_ptr);
+ dir_entry = &dir_tbl[dir_idx];
+
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (!pasid_tbl)
+ goto iommu_unlock;
+
+ pasid_tbl_entry = &pasid_tbl[tbl_idx];
+ if (!pasid_pte_is_present(pasid_tbl_entry))
+ goto iommu_unlock;
+
+ /*
+ * According to PASID Granular Translation Type(PGTT),
+ * get the page table pointer.
+ */
+ pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6;
+ agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2;
+
+ switch (pgtt) {
+ case PASID_ENTRY_PGTT_FL_ONLY:
+ pgd = pasid_tbl_entry->val[2];
+ break;
+ case PASID_ENTRY_PGTT_SL_ONLY:
+ case PASID_ENTRY_PGTT_NESTED:
+ pgd = pasid_tbl_entry->val[0];
+ break;
+ default:
+ goto iommu_unlock;
+ }
+ pgd &= VTD_PAGE_MASK;
+ } else { /* legacy mode */
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+ }
+
+ seq_printf(m, "Device %04x:%02x:%02x.%x ",
+ iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (scalable)
+ seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd);
+ else
+ seq_printf(m, "@0x%llx\n", pgd);
+
+ seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+ "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+ pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+
+ found = true;
+iommu_unlock:
+ spin_unlock(&iommu->lock);
+ if (found)
+ break;
}
+ rcu_read_unlock();
return 0;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int dev_domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct device_domain_info *info = (struct device_domain_info *)m->private;
+
+ return domain_translation_struct_show(m, info, IOMMU_NO_PASID);
+}
+DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct);
+
+static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused)
{
- return bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
+ struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private;
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+
+ return domain_translation_struct_show(m, info, dev_pasid->pasid);
}
-DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct);
static void invalidation_queue_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = {
void __init intel_iommu_debugfs_init(void)
{
- struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
- iommu_debugfs_dir);
+ intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir);
debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
- debugfs_create_file("domain_translation_struct", 0444,
- intel_iommu_debug, NULL,
- &domain_translation_struct_fops);
debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
@@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
NULL, &dmar_perf_latency_fops);
}
+
+/*
+ * Create a debugfs directory for each device, and then create a
+ * debugfs file in this directory for users to dump the page table
+ * of the default domain. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct
+ */
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info)
+{
+ info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug);
+
+ debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry,
+ info, &dev_domain_translation_struct_fops);
+}
+
+/* Remove the device debugfs directory. */
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info)
+{
+ debugfs_remove_recursive(info->debugfs_dentry);
+}
+
+/*
+ * Create a debugfs directory per pair of {device, pasid}, then create the
+ * corresponding debugfs file in this directory for users to dump its page
+ * table. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct
+ *
+ * The debugfs only dumps the page tables whose mappings are created and
+ * destroyed by the iommu_map/unmap() interfaces. Check the mapping type
+ * of the domain before creating debugfs directory.
+ */
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+ char dir_name[10];
+
+ sprintf(dir_name, "%x", dev_pasid->pasid);
+ dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry);
+
+ debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry,
+ dev_pasid, &pasid_domain_translation_struct_fops);
+}
+
+/* Remove the device pasid debugfs directory. */
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ debugfs_remove_recursive(dev_pasid->debugfs_dentry);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 744e4e6b8d..a8366b1f4f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -282,7 +282,6 @@ static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-static void device_block_translation(struct device *dev);
static void intel_iommu_domain_free(struct iommu_domain *domain);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
@@ -300,6 +299,7 @@ static int iommu_skip_te_disable;
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
+static const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@@ -540,8 +540,6 @@ static int domain_update_device_node(struct dmar_domain *domain)
return nid;
}
-static void domain_update_iotlb(struct dmar_domain *domain);
-
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@@ -560,7 +558,7 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
}
/* Some capabilities may be different across iommus */
-static void domain_update_iommu_cap(struct dmar_domain *domain)
+void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
@@ -1362,7 +1360,7 @@ domain_lookup_dev_info(struct dmar_domain *domain,
return NULL;
}
-static void domain_update_iotlb(struct dmar_domain *domain)
+void domain_update_iotlb(struct dmar_domain *domain)
{
struct dev_pasid_info *dev_pasid;
struct device_domain_info *info;
@@ -1778,8 +1776,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
return domain;
}
-static int domain_attach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
unsigned long ndomains;
@@ -1828,8 +1825,7 @@ err_unlock:
return ret;
}
-static void domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
@@ -2196,6 +2192,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
+ if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
+ pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
+ return -EINVAL;
+ }
+
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
@@ -3961,7 +3962,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
* all DMA requests without PASID from the device are blocked. If the page
* table has been set, clean up the data structures.
*/
-static void device_block_translation(struct device *dev)
+void device_block_translation(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
@@ -4016,9 +4017,9 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
}
static struct iommu_domain blocking_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
- .free = intel_iommu_domain_free
}
};
@@ -4028,8 +4029,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
struct iommu_domain *domain;
switch (type) {
- case IOMMU_DOMAIN_BLOCKED:
- return &blocking_domain;
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
@@ -4061,14 +4060,72 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
+static struct iommu_domain *
+intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *dmar_domain;
+ struct iommu_domain *domain;
+
+ /* Must be NESTING domain */
+ if (parent) {
+ if (!nested_supported(iommu) || flags)
+ return ERR_PTR(-EOPNOTSUPP);
+ return intel_nested_domain_alloc(parent, user_data);
+ }
+
+ if (flags &
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (nested_parent && !nested_supported(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data || (dirty_tracking && !ssads_supported(iommu)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /*
+ * domain_alloc_user op needs to fully initialize a domain before
+ * return, so uses iommu_domain_alloc() here for simple.
+ */
+ domain = iommu_domain_alloc(dev->bus);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ dmar_domain = to_dmar_domain(domain);
+
+ if (nested_parent) {
+ dmar_domain->nested_parent = true;
+ INIT_LIST_HEAD(&dmar_domain->s1_domains);
+ spin_lock_init(&dmar_domain->s1_lock);
+ }
+
+ if (dirty_tracking) {
+ if (dmar_domain->use_first_level) {
+ iommu_domain_free(domain);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ domain->dirty_ops = &intel_dirty_ops;
+ }
+
+ return domain;
+}
+
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
- if (domain != &si_domain->domain && domain != &blocking_domain)
- domain_exit(to_dmar_domain(domain));
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ WARN_ON(dmar_domain->nested_parent &&
+ !list_empty(&dmar_domain->s1_domains));
+ if (domain != &si_domain->domain)
+ domain_exit(dmar_domain);
}
-static int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
@@ -4081,6 +4138,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
+ if (domain->dirty_ops && !ssads_supported(iommu))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -4336,6 +4396,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return ecap_sc_support(info->iommu->ecap);
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return ssads_supported(info->iommu);
default:
return false;
}
@@ -4413,6 +4475,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
+ intel_iommu_debugfs_create_dev(info);
+
return &iommu->iommu;
}
@@ -4422,6 +4486,7 @@ static void intel_iommu_release_device(struct device *dev)
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
+ intel_iommu_debugfs_remove_dev(info);
dev_iommu_priv_set(dev, NULL);
kfree(info);
set_dma_ops(dev, NULL);
@@ -4666,8 +4731,8 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
-static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
@@ -4677,6 +4742,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
xa_for_each(&dmar_domain->iommu_array, i, info)
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
+ return 0;
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
@@ -4714,6 +4780,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_detach_iommu(dmar_domain, iommu);
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
@@ -4733,6 +4800,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
+ if (domain->dirty_ops)
+ return -EINVAL;
+
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
@@ -4766,6 +4836,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ if (domain->type & __IOMMU_DOMAIN_PAGING)
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
return 0;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
@@ -4784,6 +4857,7 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
if (!vtd)
return ERR_PTR(-ENOMEM);
+ vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
vtd->cap_reg = iommu->cap;
vtd->ecap_reg = iommu->ecap;
*length = sizeof(*vtd);
@@ -4791,10 +4865,136 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
return vtd;
}
+/*
+ * Set dirty tracking for the device list of a domain. The caller must
+ * hold the domain->lock when calling it.
+ */
+static int device_set_dirty_tracking(struct list_head *devices, bool enable)
+{
+ struct device_domain_info *info;
+ int ret = 0;
+
+ list_for_each_entry(info, devices, link) {
+ ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
+ IOMMU_NO_PASID, enable);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *s1_domain;
+ unsigned long flags;
+ int ret;
+
+ spin_lock(&domain->s1_lock);
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ ret = device_set_dirty_tracking(&s1_domain->devices, enable);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ if (ret)
+ goto err_unwind;
+ }
+ spin_unlock(&domain->s1_lock);
+ return 0;
+
+err_unwind:
+ list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+ spin_lock_irqsave(&s1_domain->lock, flags);
+ device_set_dirty_tracking(&s1_domain->devices,
+ domain->dirty_tracking);
+ spin_unlock_irqrestore(&s1_domain->lock, flags);
+ }
+ spin_unlock(&domain->s1_lock);
+ return ret;
+}
+
+static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ int ret;
+
+ spin_lock(&dmar_domain->lock);
+ if (dmar_domain->dirty_tracking == enable)
+ goto out_unlock;
+
+ ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
+ if (ret)
+ goto err_unwind;
+
+ if (dmar_domain->nested_parent) {
+ ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
+ if (ret)
+ goto err_unwind;
+ }
+
+ dmar_domain->dirty_tracking = enable;
+out_unlock:
+ spin_unlock(&dmar_domain->lock);
+
+ return 0;
+
+err_unwind:
+ device_set_dirty_tracking(&dmar_domain->devices,
+ dmar_domain->dirty_tracking);
+ spin_unlock(&dmar_domain->lock);
+ return ret;
+}
+
+static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long end = iova + size - 1;
+ unsigned long pgsize;
+
+ /*
+ * IOMMUFD core calls into a dirty tracking disabled domain without an
+ * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+ * have occurred when we stopped dirty tracking. This ensures that we
+ * never inherit dirtied bits from a previous cycle.
+ */
+ if (!dmar_domain->dirty_tracking && dirty->bitmap)
+ return -EINVAL;
+
+ do {
+ struct dma_pte *pte;
+ int lvl = 0;
+
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
+ GFP_ATOMIC);
+ pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
+ if (!pte || !dma_pte_present(pte)) {
+ iova += pgsize;
+ continue;
+ }
+
+ if (dma_sl_pte_test_and_clear_dirty(pte, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
+static const struct iommu_dirty_ops intel_dirty_ops = {
+ .set_dirty_tracking = intel_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
+};
+
const struct iommu_ops intel_iommu_ops = {
+ .blocked_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
+ .domain_alloc_user = intel_iommu_domain_alloc_user,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e6a3e70656..efc00d2b45 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+#include <uapi/linux/iommufd.h>
/*
* VT-d hardware uses 4KiB page size regardless of host page size.
@@ -48,6 +49,9 @@
#define DMA_FL_PTE_DIRTY BIT_ULL(6)
#define DMA_FL_PTE_XD BIT_ULL(63)
+#define DMA_SL_PTE_DIRTY_BIT 9
+#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
+
#define ADDR_WIDTH_5LEVEL (57)
#define ADDR_WIDTH_4LEVEL (48)
@@ -539,6 +543,10 @@ enum {
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
#define pasid_supported(iommu) (sm_supported(iommu) && \
ecap_pasid((iommu)->ecap))
+#define ssads_supported(iommu) (sm_supported(iommu) && \
+ ecap_slads((iommu)->ecap))
+#define nested_supported(iommu) (sm_supported(iommu) && \
+ ecap_nest((iommu)->ecap))
struct pasid_entry;
struct pasid_state_entry;
@@ -592,6 +600,8 @@ struct dmar_domain {
* otherwise, goes through the second
* level.
*/
+ u8 dirty_tracking:1; /* Dirty tracking is enabled */
+ u8 nested_parent:1; /* Has other domains nested on it */
u8 has_mappings:1; /* Has mappings configured through
* iommu_map() interface.
*/
@@ -600,15 +610,44 @@ struct dmar_domain {
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
+ union {
+ /* DMA remapping domain */
+ struct {
+ /* virtual address */
+ struct dma_pte *pgd;
+ /* max guest address width */
+ int gaw;
+ /*
+ * adjusted guest address width:
+ * 0: level 2 30-bit
+ * 1: level 3 39-bit
+ * 2: level 4 48-bit
+ * 3: level 5 57-bit
+ */
+ int agaw;
+ /* maximum mapped address */
+ u64 max_addr;
+ /* Protect the s1_domains list */
+ spinlock_t s1_lock;
+ /* Track s1_domains nested on this domain */
+ struct list_head s1_domains;
+ };
+
+ /* Nested user domain */
+ struct {
+ /* parent page table which the user domain is nested on */
+ struct dmar_domain *s2_domain;
+ /* user page table pointer (in GPA) */
+ unsigned long s1_pgtbl;
+ /* page table attributes */
+ struct iommu_hwpt_vtd_s1 s1_cfg;
+ /* link to parent domain siblings */
+ struct list_head s2_link;
+ };
+ };
struct iommu_domain domain; /* generic domain data structure for
iommu core */
@@ -719,12 +758,18 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to device directory dentry */
+#endif
};
struct dev_pasid_info {
struct list_head link_domain; /* link to domain siblings */
struct device *dev;
ioasid_t pasid;
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */
+#endif
};
static inline void __iommu_flush_cache(
@@ -784,6 +829,16 @@ static inline bool dma_pte_present(struct dma_pte *pte)
return (pte->val & 3) != 0;
}
+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+ unsigned long flags)
+{
+ if (flags & IOMMU_DIRTY_NO_CLEAR)
+ return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+
+ return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+ (unsigned long *)&pte->val);
+}
+
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
return (pte->val & DMA_PTE_LARGE_PAGE);
@@ -839,12 +894,22 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
+void domain_update_iotlb(struct dmar_domain *domain);
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void device_block_translation(struct device *dev);
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+void domain_update_iommu_cap(struct dmar_domain *domain);
+
int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
+ const struct iommu_user_data *user_data);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
@@ -886,8 +951,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
void intel_iommu_debugfs_init(void);
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid);
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid);
#else
static inline void intel_iommu_debugfs_init(void) {}
+static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {}
+static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
extern const struct attribute_group *intel_iommu_groups[];
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
new file mode 100644
index 0000000000..92e82b33ea
--- /dev/null
+++ b/drivers/iommu/intel/nested.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * nested.c - nested mode translation support
+ *
+ * Copyright (C) 2023 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ * Jacob Pan <jacob.jun.pan@linux.intel.com>
+ * Yi Liu <yi.l.liu@intel.com>
+ */
+
+#define pr_fmt(fmt) "DMAR: " fmt
+
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+#include "iommu.h"
+#include "pasid.h"
+
+static int intel_nested_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
+ int ret = 0;
+
+ if (info->domain)
+ device_block_translation(dev);
+
+ if (iommu->agaw < dmar_domain->s2_domain->agaw) {
+ dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
+ return -ENODEV;
+ }
+
+ /*
+ * Stage-1 domain cannot work alone, it is nested on a s2_domain.
+ * The s2_domain will be used in nested translation, hence needs
+ * to ensure the s2_domain is compatible with this IOMMU.
+ */
+ ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
+ if (ret) {
+ dev_err_ratelimited(dev, "s2 domain is not compatible\n");
+ return ret;
+ }
+
+ ret = domain_attach_iommu(dmar_domain, iommu);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to attach domain to iommu\n");
+ return ret;
+ }
+
+ ret = intel_pasid_setup_nested(iommu, dev,
+ IOMMU_NO_PASID, dmar_domain);
+ if (ret) {
+ domain_detach_iommu(dmar_domain, iommu);
+ dev_err_ratelimited(dev, "Failed to setup pasid entry\n");
+ return ret;
+ }
+
+ info->domain = dmar_domain;
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&info->link, &dmar_domain->devices);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+
+ domain_update_iotlb(dmar_domain);
+
+ return 0;
+}
+
+static void intel_nested_domain_free(struct iommu_domain *domain)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct dmar_domain *s2_domain = dmar_domain->s2_domain;
+
+ spin_lock(&s2_domain->s1_lock);
+ list_del(&dmar_domain->s2_link);
+ spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain);
+}
+
+static const struct iommu_domain_ops intel_nested_domain_ops = {
+ .attach_dev = intel_nested_attach_dev,
+ .free = intel_nested_domain_free,
+};
+
+struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct dmar_domain *s2_domain = to_dmar_domain(parent);
+ struct iommu_hwpt_vtd_s1 vtd;
+ struct dmar_domain *domain;
+ int ret;
+
+ /* Must be nested domain */
+ if (user_data->type != IOMMU_HWPT_DATA_VTD_S1)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (parent->ops != intel_iommu_ops.default_domain_ops ||
+ !s2_domain->nested_parent)
+ return ERR_PTR(-EINVAL);
+
+ ret = iommu_copy_struct_from_user(&vtd, user_data,
+ IOMMU_HWPT_DATA_VTD_S1, __reserved);
+ if (ret)
+ return ERR_PTR(ret);
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL_ACCOUNT);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ domain->use_first_level = true;
+ domain->s2_domain = s2_domain;
+ domain->s1_pgtbl = vtd.pgtbl_addr;
+ domain->s1_cfg = vtd;
+ domain->domain.ops = &intel_nested_domain_ops;
+ domain->domain.type = IOMMU_DOMAIN_NESTED;
+ INIT_LIST_HEAD(&domain->devices);
+ INIT_LIST_HEAD(&domain->dev_pasids);
+ spin_lock_init(&domain->lock);
+ xa_init(&domain->iommu_array);
+
+ spin_lock(&s2_domain->s1_lock);
+ list_add(&domain->s2_link, &s2_domain->s1_domains);
+ spin_unlock(&s2_domain->s1_lock);
+
+ return &domain->domain;
+}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 8f92b92f3d..6e102cbbde 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
WRITE_ONCE(*ptr, (old & ~mask) | bits);
}
+static inline u64 pasid_get_bits(u64 *ptr)
+{
+ return READ_ONCE(*ptr);
+}
+
/*
* Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
* PASID entry.
@@ -336,6 +341,45 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe)
}
/*
+ * Enable second level A/D bits by setting the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_ssade(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9);
+}
+
+/*
+ * Disable second level A/D bits by clearing the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_clear_ssade(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 9, 0);
+}
+
+/*
+ * Checks if second level A/D bits specifically the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry is set.
+ */
+static inline bool pasid_get_ssade(struct pasid_entry *pe)
+{
+ return pasid_get_bits(&pe->val[0]) & (1 << 9);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
* Setup the WPE(Write Protect Enable) field (Bit 132) of a
* scalable mode PASID entry.
*/
@@ -402,6 +446,15 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value)
pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
}
+/*
+ * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
+ * of a scalable mode PASID entry.
+ */
+static inline void pasid_set_eafe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
+}
+
static void
pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
u16 did, u32 pasid)
@@ -627,6 +680,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (domain->dirty_tracking)
+ pasid_set_ssade(pte);
pasid_set_present(pte);
spin_unlock(&iommu->lock);
@@ -637,6 +692,77 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
/*
+ * Set up dirty tracking on a second only or nested translation type.
+ */
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ bool enabled)
+{
+ struct pasid_entry *pte;
+ u16 did, pgtt;
+
+ spin_lock(&iommu->lock);
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ dev_err_ratelimited(
+ dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ did = pasid_get_domain_id(pte);
+ pgtt = pasid_pte_get_pgtt(pte);
+ if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
+ pgtt != PASID_ENTRY_PGTT_NESTED) {
+ spin_unlock(&iommu->lock);
+ dev_err_ratelimited(
+ dev,
+ "Dirty tracking not supported on translation type %d\n",
+ pgtt);
+ return -EOPNOTSUPP;
+ }
+
+ if (pasid_get_ssade(pte) == enabled) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ if (enabled)
+ pasid_set_ssade(pte);
+ else
+ pasid_clear_ssade(pte);
+ spin_unlock(&iommu->lock);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ /*
+ * From VT-d spec table 25 "Guidance to Software for Invalidations":
+ *
+ * - PASID-selective-within-Domain PASID-cache invalidation
+ * If (PGTT=SS or Nested)
+ * - Domain-selective IOTLB invalidation
+ * Else
+ * - PASID-selective PASID-based IOTLB invalidation
+ * - If (pasid is RID_PASID)
+ * - Global Device-TLB invalidation to affected functions
+ * Else
+ * - PASID-based Device-TLB invalidation (with S=1 and
+ * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
+ */
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+ /* Device IOTLB doesn't need to be flushed in caching mode. */
+ if (!cap_caching_mode(iommu->cap))
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+
+ return 0;
+}
+
+/*
* Set up the scalable mode pasid entry for passthrough translation type.
*/
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
@@ -713,3 +839,99 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
if (!cap_caching_mode(iommu->cap))
devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
+ * @iommu: IOMMU which the device belong to
+ * @dev: Device to be set up for translation
+ * @pasid: PASID to be programmed in the device PASID table
+ * @domain: User stage-1 domain nested on a stage-2 domain
+ *
+ * This is used for nested translation. The input domain should be
+ * nested type and nested on a parent with 'is_nested_parent' flag
+ * set.
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, struct dmar_domain *domain)
+{
+ struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
+ pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
+ struct dmar_domain *s2_domain = domain->s2_domain;
+ u16 did = domain_id_iommu(domain, iommu);
+ struct dma_pte *pgd = s2_domain->pgd;
+ struct pasid_entry *pte;
+
+ /* Address width should match the address width supported by hardware */
+ switch (s1_cfg->addr_width) {
+ case ADDR_WIDTH_4LEVEL:
+ break;
+ case ADDR_WIDTH_5LEVEL:
+ if (!cap_fl5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
+ s1_cfg->addr_width);
+ return -EINVAL;
+ }
+
+ if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EBUSY;
+ }
+
+ pasid_clear_entry(pte);
+
+ if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
+ pasid_set_flpm(pte, 1);
+
+ pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
+ pasid_set_sre(pte);
+ if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
+ pasid_set_wpe(pte);
+ }
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
+ pasid_set_eafe(pte);
+
+ if (s2_domain->force_snooping)
+ pasid_set_pgsnp(pte);
+
+ pasid_set_slptr(pte, virt_to_phys(pgd));
+ pasid_set_fault_enable(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, s2_domain->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (s2_domain->dirty_tracking)
+ pasid_set_ssade(pte);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 4e9e68c3c3..3568adca1f 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -106,9 +106,14 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ bool enabled);
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, struct dmar_domain *domain);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
bool fault_ignore);
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index b78671a8a9..4a2f569974 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -141,7 +141,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{
struct iommu_domain *domain = handle->domain;
- return domain->mm->pasid;
+ return mm_get_enqcmd_pasid(domain->mm);
}
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3a67e63628..33e2a9b5d3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -37,7 +37,6 @@
#include "iommu-priv.h"
#include "iommu-sva.h"
-#include "iommu-priv.h"
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
@@ -96,8 +95,8 @@ static const char * const iommu_group_resv_type_string[] = {
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
- unsigned type);
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
@@ -184,6 +183,8 @@ static const char *iommu_domain_type_str(unsigned int t)
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_DMA_FQ:
return "Translated";
+ case IOMMU_DOMAIN_PLATFORM:
+ return "Platform";
default:
return "Unknown";
}
@@ -290,6 +291,10 @@ void iommu_device_unregister(struct iommu_device *iommu)
spin_lock(&iommu_device_lock);
list_del(&iommu->list);
spin_unlock(&iommu_device_lock);
+
+ /* Pairs with the alloc in generic_single_device_group() */
+ iommu_group_put(iommu->singleton_group);
+ iommu->singleton_group = NULL;
}
EXPORT_SYMBOL_GPL(iommu_device_unregister);
@@ -404,6 +409,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
ret = PTR_ERR(iommu_dev);
goto err_module_put;
}
+ dev->iommu->iommu_dev = iommu_dev;
ret = iommu_device_link(iommu_dev, dev);
if (ret)
@@ -418,7 +424,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
}
dev->iommu_group = group;
- dev->iommu->iommu_dev = iommu_dev;
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
if (ops->is_attach_deferred)
dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
@@ -432,6 +437,7 @@ err_release:
err_module_put:
module_put(ops->owner);
err_free:
+ dev->iommu->iommu_dev = NULL;
dev_iommu_free(dev);
return ret;
}
@@ -1635,6 +1641,27 @@ struct iommu_group *generic_device_group(struct device *dev)
EXPORT_SYMBOL_GPL(generic_device_group);
/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per iommu driver instance shared by every device
+ * probed by that iommu driver.
+ */
+struct iommu_group *generic_single_device_group(struct device *dev)
+{
+ struct iommu_device *iommu = dev->iommu->iommu_dev;
+
+ if (!iommu->singleton_group) {
+ struct iommu_group *group;
+
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return group;
+ iommu->singleton_group = group;
+ }
+ return iommu_group_ref_get(iommu->singleton_group);
+}
+EXPORT_SYMBOL_GPL(generic_single_device_group);
+
+/*
* Use standard PCI bus topology, isolation features, and DMA alias quirks
* to find or create an IOMMU group for a device.
*/
@@ -1715,26 +1742,29 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);
-static int iommu_get_def_domain_type(struct device *dev)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
- return IOMMU_DOMAIN_DMA;
-
- if (ops->def_domain_type)
- return ops->def_domain_type(dev);
-
- return 0;
-}
-
static struct iommu_domain *
-__iommu_group_alloc_default_domain(const struct bus_type *bus,
- struct iommu_group *group, int req_type)
+__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
if (group->default_domain && group->default_domain->type == req_type)
return group->default_domain;
- return __iommu_domain_alloc(bus, req_type);
+ return __iommu_group_domain_alloc(group, req_type);
+}
+
+/*
+ * Returns the iommu_ops for the devices in an iommu group.
+ *
+ * It is assumed that all devices in an iommu group are managed by a single
+ * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device
+ * in the group.
+ */
+static const struct iommu_ops *group_iommu_ops(struct iommu_group *group)
+{
+ struct group_device *device =
+ list_first_entry(&group->devices, struct group_device, list);
+
+ lockdep_assert_held(&group->mutex);
+
+ return dev_iommu_ops(device->dev);
}
/*
@@ -1744,27 +1774,36 @@ __iommu_group_alloc_default_domain(const struct bus_type *bus,
static struct iommu_domain *
iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
- const struct bus_type *bus =
- list_first_entry(&group->devices, struct group_device, list)
- ->dev->bus;
+ const struct iommu_ops *ops = group_iommu_ops(group);
struct iommu_domain *dom;
lockdep_assert_held(&group->mutex);
+ /*
+ * Allow legacy drivers to specify the domain that will be the default
+ * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
+ * domain. Do not use in new drivers.
+ */
+ if (ops->default_domain) {
+ if (req_type)
+ return ERR_PTR(-EINVAL);
+ return ops->default_domain;
+ }
+
if (req_type)
- return __iommu_group_alloc_default_domain(bus, group, req_type);
+ return __iommu_group_alloc_default_domain(group, req_type);
/* The driver gave no guidance on what type to use, try the default */
- dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
- if (dom)
+ dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
+ if (!IS_ERR(dom))
return dom;
/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
- return NULL;
- dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
- if (!dom)
- return NULL;
+ return ERR_PTR(-EINVAL);
+ dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
+ if (IS_ERR(dom))
+ return dom;
pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
iommu_def_domain_type, group->name);
@@ -1808,40 +1847,109 @@ static int iommu_bus_notifier(struct notifier_block *nb,
return 0;
}
-/* A target_type of 0 will select the best domain type and cannot fail */
+/*
+ * Combine the driver's chosen def_domain_type across all the devices in a
+ * group. Drivers must give a consistent result.
+ */
+static int iommu_get_def_domain_type(struct iommu_group *group,
+ struct device *dev, int cur_type)
+{
+ const struct iommu_ops *ops = group_iommu_ops(group);
+ int type;
+
+ if (!ops->def_domain_type)
+ return cur_type;
+
+ type = ops->def_domain_type(dev);
+ if (!type || cur_type == type)
+ return cur_type;
+ if (!cur_type)
+ return type;
+
+ dev_err_ratelimited(
+ dev,
+ "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
+ iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
+ group->id);
+
+ /*
+ * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
+ * takes precedence.
+ */
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ return type;
+ return cur_type;
+}
+
+/*
+ * A target_type of 0 will select the best domain type. 0 can be returned in
+ * this case meaning the global default should be used.
+ */
static int iommu_get_default_domain_type(struct iommu_group *group,
int target_type)
{
- int best_type = target_type;
+ struct device *untrusted = NULL;
struct group_device *gdev;
- struct device *last_dev;
+ int driver_type = 0;
lockdep_assert_held(&group->mutex);
+ /*
+ * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
+ * identity_domain and it will automatically become their default
+ * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
+ * Override the selection to IDENTITY.
+ */
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+ static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
+ IS_ENABLED(CONFIG_IOMMU_DMA)));
+ driver_type = IOMMU_DOMAIN_IDENTITY;
+ }
+
for_each_group_device(group, gdev) {
- unsigned int type = iommu_get_def_domain_type(gdev->dev);
-
- if (best_type && type && best_type != type) {
- if (target_type) {
- dev_err_ratelimited(
- gdev->dev,
- "Device cannot be in %s domain\n",
- iommu_domain_type_str(target_type));
+ driver_type = iommu_get_def_domain_type(group, gdev->dev,
+ driver_type);
+
+ if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
+ /*
+ * No ARM32 using systems will set untrusted, it cannot
+ * work.
+ */
+ if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
return -1;
- }
+ untrusted = gdev->dev;
+ }
+ }
- dev_warn(
- gdev->dev,
- "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
- iommu_domain_type_str(type), dev_name(last_dev),
- iommu_domain_type_str(best_type));
- return 0;
+ /*
+ * If the common dma ops are not selected in kconfig then we cannot use
+ * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
+ * selected.
+ */
+ if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
+ if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
+ return -1;
+ if (!driver_type)
+ driver_type = IOMMU_DOMAIN_IDENTITY;
+ }
+
+ if (untrusted) {
+ if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
+ dev_err_ratelimited(
+ untrusted,
+ "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
+ group->id, iommu_domain_type_str(driver_type));
+ return -1;
}
- if (!best_type)
- best_type = type;
- last_dev = gdev->dev;
+ driver_type = IOMMU_DOMAIN_DMA;
}
- return best_type;
+
+ if (target_type) {
+ if (driver_type && target_type != driver_type)
+ return -1;
+ return target_type;
+ }
+ return driver_type;
}
static void iommu_group_do_probe_finalize(struct device *dev)
@@ -1970,18 +2078,33 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
- unsigned type)
+static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
+ struct device *dev,
+ unsigned int type)
{
struct iommu_domain *domain;
unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
- if (bus == NULL || bus->iommu_ops == NULL)
- return NULL;
+ if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
+ return ops->identity_domain;
+ else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
+ return ops->blocked_domain;
+ else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
+ domain = ops->domain_alloc_paging(dev);
+ else if (ops->domain_alloc)
+ domain = ops->domain_alloc(alloc_type);
+ else
+ return ERR_PTR(-EOPNOTSUPP);
- domain = bus->iommu_ops->domain_alloc(alloc_type);
+ /*
+ * Many domain_alloc ops now return ERR_PTR, make things easier for the
+ * driver by accepting ERR_PTR from all domain_alloc ops instead of
+ * having two rules.
+ */
+ if (IS_ERR(domain))
+ return domain;
if (!domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
domain->type = type;
/*
@@ -1989,21 +2112,44 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
* may override this later
*/
if (!domain->pgsize_bitmap)
- domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+ domain->pgsize_bitmap = ops->pgsize_bitmap;
if (!domain->ops)
- domain->ops = bus->iommu_ops->default_domain_ops;
+ domain->ops = ops->default_domain_ops;
- if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
- iommu_domain_free(domain);
- domain = NULL;
+ if (iommu_is_dma_domain(domain)) {
+ int rc;
+
+ rc = iommu_get_dma_cookie(domain);
+ if (rc) {
+ iommu_domain_free(domain);
+ return ERR_PTR(rc);
+ }
}
return domain;
}
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
+{
+ struct device *dev =
+ list_first_entry(&group->devices, struct group_device, list)
+ ->dev;
+
+ return __iommu_domain_alloc(group_iommu_ops(group), dev, type);
+}
+
struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
{
- return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
+ struct iommu_domain *domain;
+
+ if (bus == NULL || bus->iommu_ops == NULL)
+ return NULL;
+ domain = __iommu_domain_alloc(bus->iommu_ops, NULL,
+ IOMMU_DOMAIN_UNMANAGED);
+ if (IS_ERR(domain))
+ return NULL;
+ return domain;
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
@@ -2012,7 +2158,8 @@ void iommu_domain_free(struct iommu_domain *domain)
if (domain->type == IOMMU_DOMAIN_SVA)
mmdrop(domain->mm);
iommu_put_dma_cookie(domain);
- domain->ops->free(domain);
+ if (domain->ops->free)
+ domain->ops->free(domain);
}
EXPORT_SYMBOL_GPL(iommu_domain_free);
@@ -2062,10 +2209,10 @@ static int __iommu_attach_device(struct iommu_domain *domain,
*/
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
int ret;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -2082,8 +2229,6 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device);
@@ -2098,9 +2243,9 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
- group = iommu_group_get(dev);
if (!group)
return;
@@ -2112,24 +2257,18 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_detach_device);
struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
{
- struct iommu_domain *domain;
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
- group = iommu_group_get(dev);
if (!group)
return NULL;
- domain = group->domain;
-
- iommu_group_put(group);
-
- return domain;
+ return group->domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
@@ -2275,21 +2414,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
if (group->domain == new_domain)
return 0;
- /*
- * New drivers should support default domains, so set_platform_dma()
- * op will never be called. Otherwise the NULL domain represents some
- * platform specific behavior.
- */
- if (!new_domain) {
- for_each_group_device(group, gdev) {
- const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
- if (!WARN_ON(!ops->set_platform_dma_ops))
- ops->set_platform_dma_ops(gdev->dev);
- }
- group->domain = NULL;
- return 0;
- }
+ if (WARN_ON(!new_domain))
+ return -EINVAL;
/*
* Changing the domain is done by calling attach_dev() on the new
@@ -2325,19 +2451,15 @@ err_revert:
*/
last_gdev = gdev;
for_each_group_device(group, gdev) {
- const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
/*
- * If set_platform_dma_ops is not present a NULL domain can
- * happen only for first probe, in which case we leave
- * group->domain as NULL and let release clean everything up.
+ * A NULL domain can happen only for first probe, in which case
+ * we leave group->domain as NULL and let release clean
+ * everything up.
*/
if (group->domain)
WARN_ON(__iommu_device_set_domain(
group, gdev->dev, group->domain,
IOMMU_SET_DOMAIN_MUST_SUCCEED));
- else if (ops->set_platform_dma_ops)
- ops->set_platform_dma_ops(gdev->dev);
if (gdev == last_gdev)
break;
}
@@ -2418,30 +2540,6 @@ out_set_count:
return pgsize;
}
-static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot,
- gfp_t gfp, size_t *mapped)
-{
- const struct iommu_domain_ops *ops = domain->ops;
- size_t pgsize, count;
- int ret;
-
- pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
-
- pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
- iova, &paddr, pgsize, count);
-
- if (ops->map_pages) {
- ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
- gfp, mapped);
- } else {
- ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
- *mapped = ret ? 0 : pgsize;
- }
-
- return ret;
-}
-
static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
@@ -2452,13 +2550,12 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
- if (unlikely(!(ops->map || ops->map_pages) ||
- domain->pgsize_bitmap == 0UL))
- return -ENODEV;
-
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
+ if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
+ return -ENODEV;
+
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2476,10 +2573,14 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
- size_t mapped = 0;
+ size_t pgsize, count, mapped = 0;
- ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
- &mapped);
+ pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+ pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+ iova, &paddr, pgsize, count);
+ ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+ gfp, &mapped);
/*
* Some pages may have been mapped, even if an error occurred,
* so we should account for those so they can be unmapped.
@@ -2516,25 +2617,21 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
return -EINVAL;
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, size);
+ if (ret == 0 && ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, size);
+ if (ret)
+ goto out_err;
+ }
return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_map);
-static size_t __iommu_unmap_pages(struct iommu_domain *domain,
- unsigned long iova, size_t size,
- struct iommu_iotlb_gather *iotlb_gather)
-{
- const struct iommu_domain_ops *ops = domain->ops;
- size_t pgsize, count;
+out_err:
+ /* undo mappings already done */
+ iommu_unmap(domain, iova, size);
- pgsize = iommu_pgsize(domain, iova, iova, size, &count);
- return ops->unmap_pages ?
- ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
- ops->unmap(domain, iova, pgsize, iotlb_gather);
+ return ret;
}
+EXPORT_SYMBOL_GPL(iommu_map);
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
@@ -2545,11 +2642,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
- if (unlikely(!(ops->unmap || ops->unmap_pages) ||
- domain->pgsize_bitmap == 0UL))
+ if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return 0;
- if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
return 0;
/* find out the minimum page size supported */
@@ -2573,9 +2669,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
- unmapped_page = __iommu_unmap_pages(domain, iova,
- size - unmapped,
- iotlb_gather);
+ size_t pgsize, count;
+
+ pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count);
+ unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
if (!unmapped_page)
break;
@@ -2658,8 +2755,11 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, mapped);
+ if (ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+ }
return mapped;
out_err:
@@ -2957,21 +3057,9 @@ static int iommu_setup_default_domain(struct iommu_group *group,
if (req_type < 0)
return -EINVAL;
- /*
- * There are still some drivers which don't support default domains, so
- * we ignore the failure and leave group->default_domain NULL.
- *
- * We assume that the iommu driver starts up the device in
- * 'set_platform_dma_ops' mode if it does not support default domains.
- */
dom = iommu_group_alloc_default_domain(group, req_type);
- if (!dom) {
- /* Once in default_domain mode we never leave */
- if (group->default_domain)
- return -ENODEV;
- group->default_domain = NULL;
- return 0;
- }
+ if (IS_ERR(dom))
+ return PTR_ERR(dom);
if (group->default_domain == dom)
return 0;
@@ -3114,24 +3202,6 @@ out_unlock:
return ret ?: count;
}
-static bool iommu_is_default_domain(struct iommu_group *group)
-{
- if (group->domain == group->default_domain)
- return true;
-
- /*
- * If the default domain was set to identity and it is still an identity
- * domain then we consider this a pass. This happens because of
- * amd_iommu_init_device() replacing the default idenytity domain with an
- * identity domain that has a different configuration for AMDGPU.
- */
- if (group->default_domain &&
- group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
- group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
- return true;
- return false;
-}
-
/**
* iommu_device_use_default_domain() - Device driver wants to handle device
* DMA through the kernel DMA API.
@@ -3142,7 +3212,8 @@ static bool iommu_is_default_domain(struct iommu_group *group)
*/
int iommu_device_use_default_domain(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller is the driver core during the pre-probe path */
+ struct iommu_group *group = dev->iommu_group;
int ret = 0;
if (!group)
@@ -3150,7 +3221,7 @@ int iommu_device_use_default_domain(struct device *dev)
mutex_lock(&group->mutex);
if (group->owner_cnt) {
- if (group->owner || !iommu_is_default_domain(group) ||
+ if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
ret = -EBUSY;
goto unlock_out;
@@ -3161,8 +3232,6 @@ int iommu_device_use_default_domain(struct device *dev)
unlock_out:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
@@ -3176,7 +3245,8 @@ unlock_out:
*/
void iommu_device_unuse_default_domain(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller is the driver core during the post-probe path */
+ struct iommu_group *group = dev->iommu_group;
if (!group)
return;
@@ -3186,29 +3256,27 @@ void iommu_device_unuse_default_domain(struct device *dev)
group->owner_cnt--;
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
- struct group_device *dev =
- list_first_entry(&group->devices, struct group_device, list);
+ struct iommu_domain *domain;
if (group->blocking_domain)
return 0;
- group->blocking_domain =
- __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
- if (!group->blocking_domain) {
+ domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
+ if (IS_ERR(domain)) {
/*
* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
* create an empty domain instead.
*/
- group->blocking_domain = __iommu_domain_alloc(
- dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
- if (!group->blocking_domain)
- return -EINVAL;
+ domain = __iommu_group_domain_alloc(group,
+ IOMMU_DOMAIN_UNMANAGED);
+ if (IS_ERR(domain))
+ return PTR_ERR(domain);
}
+ group->blocking_domain = domain;
return 0;
}
@@ -3273,13 +3341,13 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
*/
int iommu_device_claim_dma_owner(struct device *dev, void *owner)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
int ret = 0;
if (WARN_ON(!owner))
return -EINVAL;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -3296,8 +3364,6 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner)
ret = __iommu_take_dma_ownership(group, owner);
unlock_out:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
@@ -3335,7 +3401,8 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
*/
void iommu_device_release_dma_owner(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
mutex_lock(&group->mutex);
if (group->owner_cnt > 1)
@@ -3343,7 +3410,6 @@ void iommu_device_release_dma_owner(struct device *dev)
else
__iommu_release_dma_ownership(group);
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
@@ -3404,14 +3470,14 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
void *curr;
int ret;
if (!domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -3429,8 +3495,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
@@ -3447,14 +3511,13 @@ EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid);
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
mutex_unlock(&group->mutex);
-
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
@@ -3476,10 +3539,10 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
ioasid_t pasid,
unsigned int type)
{
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
struct iommu_domain *domain;
- struct iommu_group *group;
- group = iommu_group_get(dev);
if (!group)
return NULL;
@@ -3488,7 +3551,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
if (type && domain && domain->type != type)
domain = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
- iommu_group_put(group);
return domain;
}
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 8aeba81800..34b4461469 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -11,3 +11,4 @@ iommufd-y := \
iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
obj-$(CONFIG_IOMMUFD) += iommufd.o
+obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index ce78c36715..873630c111 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -293,7 +293,7 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
- struct iommufd_hw_pagetable *hwpt)
+ struct iommufd_hwpt_paging *hwpt_paging)
{
phys_addr_t sw_msi_start = igroup->sw_msi_start;
int rc;
@@ -311,8 +311,9 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
* matches what the IRQ layer actually expects in a newly created
* domain.
*/
- if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) {
- rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
+ if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
+ rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
+ sw_msi_start);
if (rc)
return rc;
@@ -320,7 +321,31 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
* iommu_get_msi_cookie() can only be called once per domain,
* it returns -EBUSY on later calls.
*/
- hwpt->msi_cookie = true;
+ hwpt_paging->msi_cookie = true;
+ }
+ return 0;
+}
+
+static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_device *idev)
+{
+ int rc;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
+ idev->dev,
+ &idev->igroup->sw_msi_start);
+ if (rc)
+ return rc;
+
+ if (list_empty(&idev->igroup->device_list)) {
+ rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
+ if (rc) {
+ iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
+ idev->dev);
+ return rc;
+ }
}
return 0;
}
@@ -337,18 +362,12 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
goto err_unlock;
}
- /* Try to upgrade the domain we have */
- if (idev->enforce_cache_coherency) {
- rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+ if (hwpt_is_paging(hwpt)) {
+ rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev);
if (rc)
goto err_unlock;
}
- rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev,
- &idev->igroup->sw_msi_start);
- if (rc)
- goto err_unlock;
-
/*
* Only attach to the group once for the first device that is in the
* group. All the other devices will follow this attachment. The user
@@ -357,10 +376,6 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* attachment.
*/
if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt);
- if (rc)
- goto err_unresv;
-
rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
if (rc)
goto err_unresv;
@@ -371,7 +386,9 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
mutex_unlock(&idev->igroup->lock);
return 0;
err_unresv:
- iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+ if (hwpt_is_paging(hwpt))
+ iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
+ idev->dev);
err_unlock:
mutex_unlock(&idev->igroup->lock);
return rc;
@@ -388,7 +405,9 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
iommu_detach_group(hwpt->domain, idev->igroup->group);
idev->igroup->hwpt = NULL;
}
- iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+ if (hwpt_is_paging(hwpt))
+ iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
+ idev->dev);
mutex_unlock(&idev->igroup->lock);
/* Caller must destroy hwpt */
@@ -407,14 +426,55 @@ iommufd_device_do_attach(struct iommufd_device *idev,
return NULL;
}
+static void
+iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ struct iommufd_device *cur;
+
+ lockdep_assert_held(&igroup->lock);
+
+ list_for_each_entry(cur, &igroup->device_list, group_item)
+ iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
+}
+
+static int
+iommufd_group_do_replace_paging(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
+ struct iommufd_device *cur;
+ int rc;
+
+ lockdep_assert_held(&igroup->lock);
+
+ if (!hwpt_is_paging(old_hwpt) ||
+ hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
+ list_for_each_entry(cur, &igroup->device_list, group_item) {
+ rc = iopt_table_enforce_dev_resv_regions(
+ &hwpt_paging->ioas->iopt, cur->dev, NULL);
+ if (rc)
+ goto err_unresv;
+ }
+ }
+
+ rc = iommufd_group_setup_msi(igroup, hwpt_paging);
+ if (rc)
+ goto err_unresv;
+ return 0;
+
+err_unresv:
+ iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
+ return rc;
+}
+
static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device *idev,
struct iommufd_hw_pagetable *hwpt)
{
struct iommufd_group *igroup = idev->igroup;
struct iommufd_hw_pagetable *old_hwpt;
- unsigned int num_devices = 0;
- struct iommufd_device *cur;
+ unsigned int num_devices;
int rc;
mutex_lock(&idev->igroup->lock);
@@ -429,42 +489,27 @@ iommufd_device_do_replace(struct iommufd_device *idev,
return NULL;
}
- /* Try to upgrade the domain we have */
- list_for_each_entry(cur, &igroup->device_list, group_item) {
- num_devices++;
- if (cur->enforce_cache_coherency) {
- rc = iommufd_hw_pagetable_enforce_cc(hwpt);
- if (rc)
- goto err_unlock;
- }
- }
-
old_hwpt = igroup->hwpt;
- if (hwpt->ioas != old_hwpt->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item) {
- rc = iopt_table_enforce_dev_resv_regions(
- &hwpt->ioas->iopt, cur->dev, NULL);
- if (rc)
- goto err_unresv;
- }
+ if (hwpt_is_paging(hwpt)) {
+ rc = iommufd_group_do_replace_paging(igroup,
+ to_hwpt_paging(hwpt));
+ if (rc)
+ goto err_unlock;
}
- rc = iommufd_group_setup_msi(idev->igroup, hwpt);
- if (rc)
- goto err_unresv;
-
rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
if (rc)
goto err_unresv;
- if (hwpt->ioas != old_hwpt->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item)
- iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
- cur->dev);
- }
+ if (hwpt_is_paging(old_hwpt) &&
+ (!hwpt_is_paging(hwpt) ||
+ to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas))
+ iommufd_group_remove_reserved_iova(igroup,
+ to_hwpt_paging(old_hwpt));
igroup->hwpt = hwpt;
+ num_devices = list_count_nodes(&igroup->device_list);
/*
* Move the refcounts held by the device_list to the new hwpt. Retain a
* refcount for this thread as the caller will free it.
@@ -478,8 +523,9 @@ iommufd_device_do_replace(struct iommufd_device *idev,
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- list_for_each_entry(cur, &igroup->device_list, group_item)
- iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
+ if (hwpt_is_paging(hwpt))
+ iommufd_group_remove_reserved_iova(igroup,
+ to_hwpt_paging(old_hwpt));
err_unlock:
mutex_unlock(&idev->igroup->lock);
return ERR_PTR(rc);
@@ -507,6 +553,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
*/
bool immediate_attach = do_attach == iommufd_device_do_attach;
struct iommufd_hw_pagetable *destroy_hwpt;
+ struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
/*
@@ -515,15 +562,16 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
* other.
*/
mutex_lock(&ioas->mutex);
- list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
- if (!hwpt->auto_domain)
+ list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
+ if (!hwpt_paging->auto_domain)
continue;
+ hwpt = &hwpt_paging->common;
if (!iommufd_lock_obj(&hwpt->obj))
continue;
destroy_hwpt = (*do_attach)(idev, hwpt);
if (IS_ERR(destroy_hwpt)) {
- iommufd_put_object(&hwpt->obj);
+ iommufd_put_object(idev->ictx, &hwpt->obj);
/*
* -EINVAL means the domain is incompatible with the
* device. Other error codes should propagate to
@@ -535,16 +583,17 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
goto out_unlock;
}
*pt_id = hwpt->obj.id;
- iommufd_put_object(&hwpt->obj);
+ iommufd_put_object(idev->ictx, &hwpt->obj);
goto out_unlock;
}
- hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
- immediate_attach);
- if (IS_ERR(hwpt)) {
- destroy_hwpt = ERR_CAST(hwpt);
+ hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
+ immediate_attach, NULL);
+ if (IS_ERR(hwpt_paging)) {
+ destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock;
}
+ hwpt = &hwpt_paging->common;
if (!immediate_attach) {
destroy_hwpt = (*do_attach)(idev, hwpt);
@@ -554,7 +603,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
destroy_hwpt = NULL;
}
- hwpt->auto_domain = true;
+ hwpt_paging->auto_domain = true;
*pt_id = hwpt->obj.id;
iommufd_object_finalize(idev->ictx, &hwpt->obj);
@@ -579,7 +628,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
return PTR_ERR(pt_obj);
switch (pt_obj->type) {
- case IOMMUFD_OBJ_HW_PAGETABLE: {
+ case IOMMUFD_OBJ_HWPT_NESTED:
+ case IOMMUFD_OBJ_HWPT_PAGING: {
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
@@ -602,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
destroy_hwpt = ERR_PTR(-EINVAL);
goto out_put_pt_obj;
}
- iommufd_put_object(pt_obj);
+ iommufd_put_object(idev->ictx, pt_obj);
/* This destruction has to be after we unlock everything */
if (destroy_hwpt)
@@ -610,15 +660,15 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
return 0;
out_put_pt_obj:
- iommufd_put_object(pt_obj);
+ iommufd_put_object(idev->ictx, pt_obj);
return PTR_ERR(destroy_hwpt);
}
/**
* iommufd_device_attach - Connect a device to an iommu_domain
* @idev: device to attach
- * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
- * Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
+ * Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
* This connects the device to an iommu_domain, either automatically or manually
* selected. Once this completes the device could do DMA.
@@ -646,8 +696,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
/**
* iommufd_device_replace - Change the device's iommu_domain
* @idev: device to change
- * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
- * Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
+ * Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
* This is the same as::
*
@@ -742,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
if (IS_ERR(ioas))
return PTR_ERR(ioas);
rc = iommufd_access_change_ioas(access, ioas);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(access->ictx, &ioas->obj);
return rc;
}
@@ -891,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
access->ops->unmap(access->data, iova, length);
- iommufd_put_object(&access->obj);
+ iommufd_put_object(access->ictx, &access->obj);
xa_lock(&ioas->iopt.access_list);
}
xa_unlock(&ioas->iopt.access_list);
@@ -1185,10 +1235,14 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
*/
cmd->data_len = data_len;
+ cmd->out_capabilities = 0;
+ if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
+ cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);
out_put:
- iommufd_put_object(&idev->obj);
+ iommufd_put_object(ucmd->ictx, &idev->obj);
return rc;
}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index cf2c1504e2..6f680959b2 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -5,62 +5,87 @@
#include <linux/iommu.h>
#include <uapi/linux/iommufd.h>
+#include "../iommu-priv.h"
#include "iommufd_private.h"
-void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
+void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
{
- struct iommufd_hw_pagetable *hwpt =
- container_of(obj, struct iommufd_hw_pagetable, obj);
+ struct iommufd_hwpt_paging *hwpt_paging =
+ container_of(obj, struct iommufd_hwpt_paging, common.obj);
- if (!list_empty(&hwpt->hwpt_item)) {
- mutex_lock(&hwpt->ioas->mutex);
- list_del(&hwpt->hwpt_item);
- mutex_unlock(&hwpt->ioas->mutex);
+ if (!list_empty(&hwpt_paging->hwpt_item)) {
+ mutex_lock(&hwpt_paging->ioas->mutex);
+ list_del(&hwpt_paging->hwpt_item);
+ mutex_unlock(&hwpt_paging->ioas->mutex);
- iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain);
+ iopt_table_remove_domain(&hwpt_paging->ioas->iopt,
+ hwpt_paging->common.domain);
}
- if (hwpt->domain)
- iommu_domain_free(hwpt->domain);
+ if (hwpt_paging->common.domain)
+ iommu_domain_free(hwpt_paging->common.domain);
- refcount_dec(&hwpt->ioas->obj.users);
+ refcount_dec(&hwpt_paging->ioas->obj.users);
}
-void iommufd_hw_pagetable_abort(struct iommufd_object *obj)
+void iommufd_hwpt_paging_abort(struct iommufd_object *obj)
{
- struct iommufd_hw_pagetable *hwpt =
- container_of(obj, struct iommufd_hw_pagetable, obj);
+ struct iommufd_hwpt_paging *hwpt_paging =
+ container_of(obj, struct iommufd_hwpt_paging, common.obj);
/* The ioas->mutex must be held until finalize is called. */
- lockdep_assert_held(&hwpt->ioas->mutex);
+ lockdep_assert_held(&hwpt_paging->ioas->mutex);
- if (!list_empty(&hwpt->hwpt_item)) {
- list_del_init(&hwpt->hwpt_item);
- iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain);
+ if (!list_empty(&hwpt_paging->hwpt_item)) {
+ list_del_init(&hwpt_paging->hwpt_item);
+ iopt_table_remove_domain(&hwpt_paging->ioas->iopt,
+ hwpt_paging->common.domain);
}
- iommufd_hw_pagetable_destroy(obj);
+ iommufd_hwpt_paging_destroy(obj);
}
-int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
+void iommufd_hwpt_nested_destroy(struct iommufd_object *obj)
{
- if (hwpt->enforce_cache_coherency)
+ struct iommufd_hwpt_nested *hwpt_nested =
+ container_of(obj, struct iommufd_hwpt_nested, common.obj);
+
+ if (hwpt_nested->common.domain)
+ iommu_domain_free(hwpt_nested->common.domain);
+
+ refcount_dec(&hwpt_nested->parent->common.obj.users);
+}
+
+void iommufd_hwpt_nested_abort(struct iommufd_object *obj)
+{
+ iommufd_hwpt_nested_destroy(obj);
+}
+
+static int
+iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
+{
+ struct iommu_domain *paging_domain = hwpt_paging->common.domain;
+
+ if (hwpt_paging->enforce_cache_coherency)
return 0;
- if (hwpt->domain->ops->enforce_cache_coherency)
- hwpt->enforce_cache_coherency =
- hwpt->domain->ops->enforce_cache_coherency(
- hwpt->domain);
- if (!hwpt->enforce_cache_coherency)
+ if (paging_domain->ops->enforce_cache_coherency)
+ hwpt_paging->enforce_cache_coherency =
+ paging_domain->ops->enforce_cache_coherency(
+ paging_domain);
+ if (!hwpt_paging->enforce_cache_coherency)
return -EINVAL;
return 0;
}
/**
- * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device
+ * iommufd_hwpt_paging_alloc() - Get a PAGING iommu_domain for a device
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
+ * @flags: Flags from userspace
* @immediate_attach: True if idev should be attached to the hwpt
+ * @user_data: The user provided driver specific data describing the domain to
+ * create
*
* Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT
* will be linked to the given ioas and upon return the underlying iommu_domain
@@ -70,28 +95,52 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt)
* iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on
* the returned hwpt.
*/
-struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, bool immediate_attach)
+struct iommufd_hwpt_paging *
+iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
+ struct iommufd_device *idev, u32 flags,
+ bool immediate_attach,
+ const struct iommu_user_data *user_data)
{
+ const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+ struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
int rc;
lockdep_assert_held(&ioas->mutex);
- hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
- if (IS_ERR(hwpt))
- return hwpt;
+ if ((flags || user_data) && !ops->domain_alloc_user)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (flags & ~valid_flags)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ hwpt_paging = __iommufd_object_alloc(
+ ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj);
+ if (IS_ERR(hwpt_paging))
+ return ERR_CAST(hwpt_paging);
+ hwpt = &hwpt_paging->common;
- INIT_LIST_HEAD(&hwpt->hwpt_item);
+ INIT_LIST_HEAD(&hwpt_paging->hwpt_item);
/* Pairs with iommufd_hw_pagetable_destroy() */
refcount_inc(&ioas->obj.users);
- hwpt->ioas = ioas;
+ hwpt_paging->ioas = ioas;
+ hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
- hwpt->domain = iommu_domain_alloc(idev->dev->bus);
- if (!hwpt->domain) {
- rc = -ENOMEM;
- goto out_abort;
+ if (ops->domain_alloc_user) {
+ hwpt->domain = ops->domain_alloc_user(idev->dev, flags, NULL,
+ user_data);
+ if (IS_ERR(hwpt->domain)) {
+ rc = PTR_ERR(hwpt->domain);
+ hwpt->domain = NULL;
+ goto out_abort;
+ }
+ } else {
+ hwpt->domain = iommu_domain_alloc(idev->dev->bus);
+ if (!hwpt->domain) {
+ rc = -ENOMEM;
+ goto out_abort;
+ }
}
/*
@@ -100,9 +149,16 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
* doing any maps. It is an iommu driver bug to report
* IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on
* a new domain.
+ *
+ * The cache coherency mode must be configured here and unchanged later.
+ * Note that a HWPT (non-CC) created for a device (non-CC) can be later
+ * reused by another device (either non-CC or CC). However, A HWPT (CC)
+ * created for a device (CC) cannot be reused by another device (non-CC)
+ * but only devices (CC). Instead user space in this case would need to
+ * allocate a separate HWPT (non-CC).
*/
if (idev->enforce_cache_coherency) {
- rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+ rc = iommufd_hwpt_paging_enforce_cc(hwpt_paging);
if (WARN_ON(rc))
goto out_abort;
}
@@ -119,11 +175,11 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
- rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain);
+ rc = iopt_table_add_domain(&ioas->iopt, hwpt->domain);
if (rc)
goto out_detach;
- list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
- return hwpt;
+ list_add_tail(&hwpt_paging->hwpt_item, &ioas->hwpt_list);
+ return hwpt_paging;
out_detach:
if (immediate_attach)
@@ -133,32 +189,121 @@ out_abort:
return ERR_PTR(rc);
}
+/**
+ * iommufd_hwpt_nested_alloc() - Get a NESTED iommu_domain for a device
+ * @ictx: iommufd context
+ * @parent: Parent PAGING-type hwpt to associate the domain with
+ * @idev: Device to get an iommu_domain for
+ * @flags: Flags from userspace
+ * @user_data: user_data pointer. Must be valid
+ *
+ * Allocate a new iommu_domain (must be IOMMU_DOMAIN_NESTED) and return it as
+ * a NESTED hw_pagetable. The given parent PAGING-type hwpt must be capable of
+ * being a parent.
+ */
+static struct iommufd_hwpt_nested *
+iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *parent,
+ struct iommufd_device *idev, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+ struct iommufd_hwpt_nested *hwpt_nested;
+ struct iommufd_hw_pagetable *hwpt;
+ int rc;
+
+ if (flags || !user_data->len || !ops->domain_alloc_user)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (parent->auto_domain || !parent->nest_parent)
+ return ERR_PTR(-EINVAL);
+
+ hwpt_nested = __iommufd_object_alloc(
+ ictx, hwpt_nested, IOMMUFD_OBJ_HWPT_NESTED, common.obj);
+ if (IS_ERR(hwpt_nested))
+ return ERR_CAST(hwpt_nested);
+ hwpt = &hwpt_nested->common;
+
+ refcount_inc(&parent->common.obj.users);
+ hwpt_nested->parent = parent;
+
+ hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
+ parent->common.domain, user_data);
+ if (IS_ERR(hwpt->domain)) {
+ rc = PTR_ERR(hwpt->domain);
+ hwpt->domain = NULL;
+ goto out_abort;
+ }
+
+ if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
+ rc = -EINVAL;
+ goto out_abort;
+ }
+ return hwpt_nested;
+
+out_abort:
+ iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
+ return ERR_PTR(rc);
+}
+
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
{
struct iommu_hwpt_alloc *cmd = ucmd->cmd;
+ const struct iommu_user_data user_data = {
+ .type = cmd->data_type,
+ .uptr = u64_to_user_ptr(cmd->data_uptr),
+ .len = cmd->data_len,
+ };
struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_ioas *ioas = NULL;
+ struct iommufd_object *pt_obj;
struct iommufd_device *idev;
- struct iommufd_ioas *ioas;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->__reserved)
return -EOPNOTSUPP;
+ if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) ||
+ (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len))
+ return -EINVAL;
idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev))
return PTR_ERR(idev);
- ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id);
- if (IS_ERR(ioas)) {
- rc = PTR_ERR(ioas);
+ pt_obj = iommufd_get_object(ucmd->ictx, cmd->pt_id, IOMMUFD_OBJ_ANY);
+ if (IS_ERR(pt_obj)) {
+ rc = -EINVAL;
goto out_put_idev;
}
- mutex_lock(&ioas->mutex);
- hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false);
- if (IS_ERR(hwpt)) {
- rc = PTR_ERR(hwpt);
- goto out_unlock;
+ if (pt_obj->type == IOMMUFD_OBJ_IOAS) {
+ struct iommufd_hwpt_paging *hwpt_paging;
+
+ ioas = container_of(pt_obj, struct iommufd_ioas, obj);
+ mutex_lock(&ioas->mutex);
+ hwpt_paging = iommufd_hwpt_paging_alloc(
+ ucmd->ictx, ioas, idev, cmd->flags, false,
+ user_data.len ? &user_data : NULL);
+ if (IS_ERR(hwpt_paging)) {
+ rc = PTR_ERR(hwpt_paging);
+ goto out_unlock;
+ }
+ hwpt = &hwpt_paging->common;
+ } else if (pt_obj->type == IOMMUFD_OBJ_HWPT_PAGING) {
+ struct iommufd_hwpt_nested *hwpt_nested;
+
+ hwpt_nested = iommufd_hwpt_nested_alloc(
+ ucmd->ictx,
+ container_of(pt_obj, struct iommufd_hwpt_paging,
+ common.obj),
+ idev, cmd->flags, &user_data);
+ if (IS_ERR(hwpt_nested)) {
+ rc = PTR_ERR(hwpt_nested);
+ goto out_unlock;
+ }
+ hwpt = &hwpt_nested->common;
+ } else {
+ rc = -EINVAL;
+ goto out_put_pt;
}
cmd->out_hwpt_id = hwpt->obj.id;
@@ -171,9 +316,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
out_hwpt:
iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
out_unlock:
- mutex_unlock(&ioas->mutex);
- iommufd_put_object(&ioas->obj);
+ if (ioas)
+ mutex_unlock(&ioas->mutex);
+out_put_pt:
+ iommufd_put_object(ucmd->ictx, pt_obj);
out_put_idev:
- iommufd_put_object(&idev->obj);
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+ return rc;
+}
+
+int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hwpt_set_dirty_tracking *cmd = ucmd->cmd;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_ioas *ioas;
+ int rc = -EOPNOTSUPP;
+ bool enable;
+
+ if (cmd->flags & ~IOMMU_HWPT_DIRTY_TRACKING_ENABLE)
+ return rc;
+
+ hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
+ if (IS_ERR(hwpt_paging))
+ return PTR_ERR(hwpt_paging);
+
+ ioas = hwpt_paging->ioas;
+ enable = cmd->flags & IOMMU_HWPT_DIRTY_TRACKING_ENABLE;
+
+ rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain,
+ enable);
+
+ iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
+ return rc;
+}
+
+int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_ioas *ioas;
+ int rc = -EOPNOTSUPP;
+
+ if ((cmd->flags & ~(IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR)) ||
+ cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
+ if (IS_ERR(hwpt_paging))
+ return PTR_ERR(hwpt_paging);
+
+ ioas = hwpt_paging->ioas;
+ rc = iopt_read_and_clear_dirty_data(
+ &ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd);
+
+ iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
return rc;
}
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 117a39ae2e..504ac1b01b 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <uapi/linux/iommufd.h>
#include "io_pagetable.h"
#include "double_span.h"
@@ -424,6 +425,177 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
return 0;
}
+struct iova_bitmap_fn_arg {
+ unsigned long flags;
+ struct io_pagetable *iopt;
+ struct iommu_domain *domain;
+ struct iommu_dirty_bitmap *dirty;
+};
+
+static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length,
+ void *opaque)
+{
+ struct iopt_area *area;
+ struct iopt_area_contig_iter iter;
+ struct iova_bitmap_fn_arg *arg = opaque;
+ struct iommu_domain *domain = arg->domain;
+ struct iommu_dirty_bitmap *dirty = arg->dirty;
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ unsigned long last_iova = iova + length - 1;
+ unsigned long flags = arg->flags;
+ int ret;
+
+ iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
+ unsigned long last = min(last_iova, iopt_area_last_iova(area));
+
+ ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
+ last - iter.cur_iova + 1, flags,
+ dirty);
+ if (ret)
+ return ret;
+ }
+
+ if (!iopt_area_contig_done(&iter))
+ return -EINVAL;
+ return 0;
+}
+
+static int
+iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ struct io_pagetable *iopt, unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ struct iommu_iotlb_gather gather;
+ struct iommu_dirty_bitmap dirty;
+ struct iova_bitmap_fn_arg arg;
+ struct iova_bitmap *iter;
+ int ret = 0;
+
+ if (!ops || !ops->read_and_clear_dirty)
+ return -EOPNOTSUPP;
+
+ iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
+ bitmap->page_size,
+ u64_to_user_ptr(bitmap->data));
+ if (IS_ERR(iter))
+ return -ENOMEM;
+
+ iommu_dirty_bitmap_init(&dirty, iter, &gather);
+
+ arg.flags = flags;
+ arg.iopt = iopt;
+ arg.domain = domain;
+ arg.dirty = &dirty;
+ iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
+
+ if (!(flags & IOMMU_DIRTY_NO_CLEAR))
+ iommu_iotlb_sync(domain, &gather);
+
+ iova_bitmap_free(iter);
+
+ return ret;
+}
+
+int iommufd_check_iova_range(struct io_pagetable *iopt,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ size_t iommu_pgsize = iopt->iova_alignment;
+ u64 last_iova;
+
+ if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
+ return -EOVERFLOW;
+
+ if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
+ return -EOVERFLOW;
+
+ if ((bitmap->iova & (iommu_pgsize - 1)) ||
+ ((last_iova + 1) & (iommu_pgsize - 1)))
+ return -EINVAL;
+
+ if (!bitmap->page_size)
+ return -EINVAL;
+
+ if ((bitmap->iova & (bitmap->page_size - 1)) ||
+ ((last_iova + 1) & (bitmap->page_size - 1)))
+ return -EINVAL;
+
+ return 0;
+}
+
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ int ret;
+
+ ret = iommufd_check_iova_range(iopt, bitmap);
+ if (ret)
+ return ret;
+
+ down_read(&iopt->iova_rwsem);
+ ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
+ up_read(&iopt->iova_rwsem);
+
+ return ret;
+}
+
+static int iopt_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain)
+{
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ struct iommu_iotlb_gather gather;
+ struct iommu_dirty_bitmap dirty;
+ struct iopt_area *area;
+ int ret = 0;
+
+ lockdep_assert_held_read(&iopt->iova_rwsem);
+
+ iommu_dirty_bitmap_init(&dirty, NULL, &gather);
+
+ for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+ area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+ if (!area->pages)
+ continue;
+
+ ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area),
+ iopt_area_length(area), 0,
+ &dirty);
+ if (ret)
+ break;
+ }
+
+ iommu_iotlb_sync(domain, &gather);
+ return ret;
+}
+
+int iopt_set_dirty_tracking(struct io_pagetable *iopt,
+ struct iommu_domain *domain, bool enable)
+{
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ int ret = 0;
+
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ down_read(&iopt->iova_rwsem);
+
+ /* Clear dirty bits from PTEs to ensure a clean snapshot */
+ if (enable) {
+ ret = iopt_clear_dirty_data(iopt, domain);
+ if (ret)
+ goto out_unlock;
+ }
+
+ ret = ops->set_dirty_tracking(domain, enable);
+
+out_unlock:
+ up_read(&iopt->iova_rwsem);
+ return ret;
+}
+
int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, struct list_head *pages_list)
{
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index d5624577f7..7422482765 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
rc = -EMSGSIZE;
out_put:
up_read(&ioas->iopt.iova_rwsem);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
@@ -175,7 +175,7 @@ out_free:
interval_tree_remove(node, &allowed_iova);
kfree(container_of(node, struct iopt_allowed, node));
}
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
@@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
cmd->iova = iova;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put:
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
@@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
return PTR_ERR(src_ioas);
rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
&pages_list);
- iommufd_put_object(&src_ioas->obj);
+ iommufd_put_object(ucmd->ictx, &src_ioas->obj);
if (rc)
return rc;
@@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
cmd->dst_iova = iova;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put_dst:
- iommufd_put_object(&dst_ioas->obj);
+ iommufd_put_object(ucmd->ictx, &dst_ioas->obj);
out_pages:
iopt_free_pages_list(&pages_list);
return rc;
@@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put:
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
@@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
rc = -EOPNOTSUPP;
}
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 2c58670011..abae041e25 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -8,6 +8,9 @@
#include <linux/xarray.h>
#include <linux/refcount.h>
#include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/iova_bitmap.h>
+#include <uapi/linux/iommufd.h>
struct iommu_domain;
struct iommu_group;
@@ -18,6 +21,7 @@ struct iommufd_ctx {
struct file *file;
struct xarray objects;
struct xarray groups;
+ wait_queue_head_t destroy_wait;
u8 account_mode;
/* Compatibility with VFIO no iommu */
@@ -70,6 +74,13 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap);
+int iopt_set_dirty_tracking(struct io_pagetable *iopt,
+ struct iommu_domain *domain, bool enable);
+
void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
unsigned long length);
int iopt_table_add_domain(struct io_pagetable *iopt,
@@ -113,7 +124,8 @@ enum iommufd_object_type {
IOMMUFD_OBJ_NONE,
IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
IOMMUFD_OBJ_DEVICE,
- IOMMUFD_OBJ_HW_PAGETABLE,
+ IOMMUFD_OBJ_HWPT_PAGING,
+ IOMMUFD_OBJ_HWPT_NESTED,
IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS,
#ifdef CONFIG_IOMMUFD_TEST
@@ -124,7 +136,7 @@ enum iommufd_object_type {
/* Base struct for all objects with a userspace ID handle. */
struct iommufd_object {
- struct rw_semaphore destroy_rwsem;
+ refcount_t shortterm_users;
refcount_t users;
enum iommufd_object_type type;
unsigned int id;
@@ -132,10 +144,15 @@ struct iommufd_object {
static inline bool iommufd_lock_obj(struct iommufd_object *obj)
{
- if (!down_read_trylock(&obj->destroy_rwsem))
+ if (!refcount_inc_not_zero(&obj->users))
return false;
- if (!refcount_inc_not_zero(&obj->users)) {
- up_read(&obj->destroy_rwsem);
+ if (!refcount_inc_not_zero(&obj->shortterm_users)) {
+ /*
+ * If the caller doesn't already have a ref on obj this must be
+ * called under the xa_lock. Otherwise the caller is holding a
+ * ref on users. Thus it cannot be one before this decrement.
+ */
+ refcount_dec(&obj->users);
return false;
}
return true;
@@ -143,10 +160,16 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj)
struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
enum iommufd_object_type type);
-static inline void iommufd_put_object(struct iommufd_object *obj)
+static inline void iommufd_put_object(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
{
+ /*
+ * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
+ * a spurious !0 users with a 0 shortterm_users.
+ */
refcount_dec(&obj->users);
- up_read(&obj->destroy_rwsem);
+ if (refcount_dec_and_test(&obj->shortterm_users))
+ wake_up_interruptible_all(&ictx->destroy_wait);
}
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
@@ -154,24 +177,56 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj, bool allow_fail);
+
+enum {
+ REMOVE_WAIT_SHORTTERM = 1,
+};
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy, u32 id,
+ unsigned int flags);
+
+/*
+ * The caller holds a users refcount and wants to destroy the object. At this
+ * point the caller has no shortterm_users reference and at least the xarray
+ * will be holding one.
+ */
static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
- __iommufd_object_destroy_user(ictx, obj, false);
+ int ret;
+
+ ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+
+ /*
+ * If there is a bug and we couldn't destroy the object then we did put
+ * back the caller's users refcount and will eventually try to free it
+ * again during close.
+ */
+ WARN_ON(ret);
}
-static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj)
+
+/*
+ * The HWPT allocated by autodomains is used in possibly many devices and
+ * is automatically destroyed when its refcount reaches zero.
+ *
+ * If userspace uses the HWPT manually, even for a short term, then it will
+ * disrupt this refcounting and the auto-free in the kernel will not work.
+ * Userspace that tries to use the automatically allocated HWPT must be careful
+ * to ensure that it is consistently destroyed, eg by not racing accesses
+ * and by not attaching an automatic HWPT to a device manually.
+ */
+static inline void
+iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
{
- __iommufd_object_destroy_user(ictx, obj, true);
+ iommufd_object_remove(ictx, obj, obj->id, 0);
}
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
enum iommufd_object_type type);
-#define iommufd_object_alloc(ictx, ptr, type) \
+#define __iommufd_object_alloc(ictx, ptr, type, obj) \
container_of(_iommufd_object_alloc( \
ictx, \
sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
@@ -180,6 +235,9 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
type), \
typeof(*(ptr)), obj)
+#define iommufd_object_alloc(ictx, ptr, type) \
+ __iommufd_object_alloc(ictx, ptr, type, obj)
+
/*
* The IO Address Space (IOAS) pagetable is a virtual page table backed by the
* io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
@@ -222,6 +280,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
+int iommufd_check_iova_range(struct io_pagetable *iopt,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap);
/*
* A HW pagetable is called an iommu_domain inside the kernel. This user object
@@ -231,35 +291,75 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
*/
struct iommufd_hw_pagetable {
struct iommufd_object obj;
- struct iommufd_ioas *ioas;
struct iommu_domain *domain;
+};
+
+struct iommufd_hwpt_paging {
+ struct iommufd_hw_pagetable common;
+ struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
bool msi_cookie : 1;
+ bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
};
-struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, bool immediate_attach);
-int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt);
+struct iommufd_hwpt_nested {
+ struct iommufd_hw_pagetable common;
+ struct iommufd_hwpt_paging *parent;
+};
+
+static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
+{
+ return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
+}
+
+static inline struct iommufd_hwpt_paging *
+to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
+{
+ return container_of(hwpt, struct iommufd_hwpt_paging, common);
+}
+
+static inline struct iommufd_hwpt_paging *
+iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_HWPT_PAGING),
+ struct iommufd_hwpt_paging, common.obj);
+}
+int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
+int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
+
+struct iommufd_hwpt_paging *
+iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
+ struct iommufd_device *idev, u32 flags,
+ bool immediate_attach,
+ const struct iommu_user_data *user_data);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device *idev);
-void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
-void iommufd_hw_pagetable_abort(struct iommufd_object *obj);
+void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
+void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
+void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
+void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt)
{
- lockdep_assert_not_held(&hwpt->ioas->mutex);
- if (hwpt->auto_domain)
- iommufd_object_deref_user(ictx, &hwpt->obj);
- else
- refcount_dec(&hwpt->obj.users);
+ if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
+ struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
+
+ lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
+
+ if (hwpt_paging->auto_domain) {
+ iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
+ return;
+ }
+ }
+ refcount_dec(&hwpt->obj.users);
}
struct iommufd_group {
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index 3f3644375b..7910fbe196 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -19,6 +19,8 @@ enum {
IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE,
IOMMU_TEST_OP_ACCESS_REPLACE_IOAS,
+ IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
+ IOMMU_TEST_OP_DIRTY,
};
enum {
@@ -40,6 +42,15 @@ enum {
MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0,
};
+enum {
+ MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
+};
+
+enum {
+ MOCK_NESTED_DOMAIN_IOTLB_ID_MAX = 3,
+ MOCK_NESTED_DOMAIN_IOTLB_NUM = 4,
+};
+
struct iommu_test_cmd {
__u32 size;
__u32 op;
@@ -57,6 +68,13 @@ struct iommu_test_cmd {
__u32 out_idev_id;
} mock_domain;
struct {
+ __u32 out_stdev_id;
+ __u32 out_hwpt_id;
+ __u32 out_idev_id;
+ /* Expand mock_domain to set mock device flags */
+ __u32 dev_flags;
+ } mock_domain_flags;
+ struct {
__u32 pt_id;
} mock_domain_replace;
struct {
@@ -95,6 +113,14 @@ struct iommu_test_cmd {
struct {
__u32 ioas_id;
} access_replace_ioas;
+ struct {
+ __u32 flags;
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 page_size;
+ __aligned_u64 uptr;
+ __aligned_u64 out_nr_dirty;
+ } dirty;
};
__u32 last;
};
@@ -109,4 +135,17 @@ struct iommu_test_hw_info {
__u32 test_reg;
};
+/* Should not be equal to any defined value in enum iommu_hwpt_data_type */
+#define IOMMU_HWPT_DATA_SELFTEST 0xdead
+#define IOMMU_TEST_IOTLB_DEFAULT 0xbadbeef
+
+/**
+ * struct iommu_hwpt_selftest
+ *
+ * @iotlb: default mock iotlb value, IOMMU_TEST_IOTLB_DEFAULT
+ */
+struct iommu_hwpt_selftest {
+ __u32 iotlb;
+};
+
#endif
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c
new file mode 100644
index 0000000000..db8c46bee1
--- /dev/null
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+#include <linux/iova_bitmap.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+
+#define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
+
+/*
+ * struct iova_bitmap_map - A bitmap representing an IOVA range
+ *
+ * Main data structure for tracking mapped user pages of bitmap data.
+ *
+ * For example, for something recording dirty IOVAs, it will be provided a
+ * struct iova_bitmap structure, as a general structure for iterating the
+ * total IOVA range. The struct iova_bitmap_map, though, represents the
+ * subset of said IOVA space that is pinned by its parent structure (struct
+ * iova_bitmap).
+ *
+ * The user does not need to exact location of the bits in the bitmap.
+ * From user perspective the only API available is iova_bitmap_set() which
+ * records the IOVA *range* in the bitmap by setting the corresponding
+ * bits.
+ *
+ * The bitmap is an array of u64 whereas each bit represents an IOVA of
+ * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
+ *
+ * data[(iova / page_size) / 64] & (1ULL << (iova % 64))
+ */
+struct iova_bitmap_map {
+ /* base IOVA representing bit 0 of the first page */
+ unsigned long iova;
+
+ /* page size order that each bit granules to */
+ unsigned long pgshift;
+
+ /* page offset of the first user page pinned */
+ unsigned long pgoff;
+
+ /* number of pages pinned */
+ unsigned long npages;
+
+ /* pinned pages representing the bitmap data */
+ struct page **pages;
+};
+
+/*
+ * struct iova_bitmap - The IOVA bitmap object
+ *
+ * Main data structure for iterating over the bitmap data.
+ *
+ * Abstracts the pinning work and iterates in IOVA ranges.
+ * It uses a windowing scheme and pins the bitmap in relatively
+ * big ranges e.g.
+ *
+ * The bitmap object uses one base page to store all the pinned pages
+ * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
+ * 512 struct page pointers which, if the base page size is 4K, it means
+ * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
+ * also 4K then the range window to iterate is 64G.
+ *
+ * For example iterating on a total IOVA range of 4G..128G, it will walk
+ * through this set of ranges:
+ *
+ * 4G - 68G-1 (64G)
+ * 68G - 128G-1 (64G)
+ *
+ * An example of the APIs on how to use/iterate over the IOVA bitmap:
+ *
+ * bitmap = iova_bitmap_alloc(iova, length, page_size, data);
+ * if (IS_ERR(bitmap))
+ * return PTR_ERR(bitmap);
+ *
+ * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
+ *
+ * iova_bitmap_free(bitmap);
+ *
+ * Each iteration of the @dirty_reporter_fn is called with a unique @iova
+ * and @length argument, indicating the current range available through the
+ * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
+ * areas (@iova_length) within that provided range, as following:
+ *
+ * iova_bitmap_set(bitmap, iova, iova_length);
+ *
+ * The internals of the object uses an index @mapped_base_index that indexes
+ * which u64 word of the bitmap is mapped, up to @mapped_total_index.
+ * Those keep being incremented until @mapped_total_index is reached while
+ * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
+ *
+ * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
+ * some form of IOVA range tracking that co-relates to the user passed
+ * bitmap.
+ */
+struct iova_bitmap {
+ /* IOVA range representing the currently mapped bitmap data */
+ struct iova_bitmap_map mapped;
+
+ /* userspace address of the bitmap */
+ u8 __user *bitmap;
+
+ /* u64 index that @mapped points to */
+ unsigned long mapped_base_index;
+
+ /* how many u64 can we walk in total */
+ unsigned long mapped_total_index;
+
+ /* base IOVA of the whole bitmap */
+ unsigned long iova;
+
+ /* length of the IOVA range for the whole bitmap */
+ size_t length;
+
+ /* length of the IOVA range set ahead the pinned pages */
+ unsigned long set_ahead_length;
+};
+
+/*
+ * Converts a relative IOVA to a bitmap index.
+ * This function provides the index into the u64 array (bitmap::bitmap)
+ * for a given IOVA offset.
+ * Relative IOVA means relative to the bitmap::mapped base IOVA
+ * (stored in mapped::iova). All computations in this file are done using
+ * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
+ * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
+ */
+static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
+ unsigned long iova)
+{
+ unsigned long pgsize = 1 << bitmap->mapped.pgshift;
+
+ return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
+}
+
+/*
+ * Converts a bitmap index to a *relative* IOVA.
+ */
+static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
+ unsigned long index)
+{
+ unsigned long pgshift = bitmap->mapped.pgshift;
+
+ return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
+}
+
+/*
+ * Returns the base IOVA of the mapped range.
+ */
+static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
+{
+ unsigned long skip = bitmap->mapped_base_index;
+
+ return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
+}
+
+/*
+ * Pins the bitmap user pages for the current range window.
+ * This is internal to IOVA bitmap and called when advancing the
+ * index (@mapped_base_index) or allocating the bitmap.
+ */
+static int iova_bitmap_get(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+ unsigned long npages;
+ u8 __user *addr;
+ long ret;
+
+ /*
+ * @mapped_base_index is the index of the currently mapped u64 words
+ * that we have access. Anything before @mapped_base_index is not
+ * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
+ * mapped but capped at a maximum number of pages.
+ */
+ npages = DIV_ROUND_UP((bitmap->mapped_total_index -
+ bitmap->mapped_base_index) *
+ sizeof(*bitmap->bitmap), PAGE_SIZE);
+
+ /*
+ * Bitmap address to be pinned is calculated via pointer arithmetic
+ * with bitmap u64 word index.
+ */
+ addr = bitmap->bitmap + bitmap->mapped_base_index;
+
+ /*
+ * We always cap at max number of 'struct page' a base page can fit.
+ * This is, for example, on x86 means 2M of bitmap data max.
+ */
+ npages = min(npages + !!offset_in_page(addr),
+ PAGE_SIZE / sizeof(struct page *));
+
+ ret = pin_user_pages_fast((unsigned long)addr, npages,
+ FOLL_WRITE, mapped->pages);
+ if (ret <= 0)
+ return -EFAULT;
+
+ mapped->npages = (unsigned long)ret;
+ /* Base IOVA where @pages point to i.e. bit 0 of the first page */
+ mapped->iova = iova_bitmap_mapped_iova(bitmap);
+
+ /*
+ * offset of the page where pinned pages bit 0 is located.
+ * This handles the case where the bitmap is not PAGE_SIZE
+ * aligned.
+ */
+ mapped->pgoff = offset_in_page(addr);
+ return 0;
+}
+
+/*
+ * Unpins the bitmap user pages and clears @npages
+ * (un)pinning is abstracted from API user and it's done when advancing
+ * the index or freeing the bitmap.
+ */
+static void iova_bitmap_put(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+
+ if (mapped->npages) {
+ unpin_user_pages(mapped->pages, mapped->npages);
+ mapped->npages = 0;
+ }
+}
+
+/**
+ * iova_bitmap_alloc() - Allocates an IOVA bitmap object
+ * @iova: Start address of the IOVA range
+ * @length: Length of the IOVA range
+ * @page_size: Page size of the IOVA bitmap. It defines what each bit
+ * granularity represents
+ * @data: Userspace address of the bitmap
+ *
+ * Allocates an IOVA object and initializes all its fields including the
+ * first user pages of @data.
+ *
+ * Return: A pointer to a newly allocated struct iova_bitmap
+ * or ERR_PTR() on error.
+ */
+struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
+ unsigned long page_size, u64 __user *data)
+{
+ struct iova_bitmap_map *mapped;
+ struct iova_bitmap *bitmap;
+ int rc;
+
+ bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
+ if (!bitmap)
+ return ERR_PTR(-ENOMEM);
+
+ mapped = &bitmap->mapped;
+ mapped->pgshift = __ffs(page_size);
+ bitmap->bitmap = (u8 __user *)data;
+ bitmap->mapped_total_index =
+ iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
+ bitmap->iova = iova;
+ bitmap->length = length;
+ mapped->iova = iova;
+ mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
+ if (!mapped->pages) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = iova_bitmap_get(bitmap);
+ if (rc)
+ goto err;
+ return bitmap;
+
+err:
+ iova_bitmap_free(bitmap);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD);
+
+/**
+ * iova_bitmap_free() - Frees an IOVA bitmap object
+ * @bitmap: IOVA bitmap to free
+ *
+ * It unpins and releases pages array memory and clears any leftover
+ * state.
+ */
+void iova_bitmap_free(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+
+ iova_bitmap_put(bitmap);
+
+ if (mapped->pages) {
+ free_page((unsigned long)mapped->pages);
+ mapped->pages = NULL;
+ }
+
+ kfree(bitmap);
+}
+EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD);
+
+/*
+ * Returns the remaining bitmap indexes from mapped_total_index to process for
+ * the currently pinned bitmap pages.
+ */
+static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
+{
+ unsigned long remaining, bytes;
+
+ bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
+
+ remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
+ remaining = min_t(unsigned long, remaining,
+ DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
+
+ return remaining;
+}
+
+/*
+ * Returns the length of the mapped IOVA range.
+ */
+static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
+{
+ unsigned long max_iova = bitmap->iova + bitmap->length - 1;
+ unsigned long iova = iova_bitmap_mapped_iova(bitmap);
+ unsigned long remaining;
+
+ /*
+ * iova_bitmap_mapped_remaining() returns a number of indexes which
+ * when converted to IOVA gives us a max length that the bitmap
+ * pinned data can cover. Afterwards, that is capped to
+ * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
+ */
+ remaining = iova_bitmap_index_to_offset(bitmap,
+ iova_bitmap_mapped_remaining(bitmap));
+
+ if (iova + remaining - 1 > max_iova)
+ remaining -= ((iova + remaining - 1) - max_iova);
+
+ return remaining;
+}
+
+/*
+ * Returns true if there's not more data to iterate.
+ */
+static bool iova_bitmap_done(struct iova_bitmap *bitmap)
+{
+ return bitmap->mapped_base_index >= bitmap->mapped_total_index;
+}
+
+static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
+ size_t set_ahead_length)
+{
+ int ret = 0;
+
+ while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
+ unsigned long length = iova_bitmap_mapped_length(bitmap);
+ unsigned long iova = iova_bitmap_mapped_iova(bitmap);
+
+ ret = iova_bitmap_get(bitmap);
+ if (ret)
+ break;
+
+ length = min(length, set_ahead_length);
+ iova_bitmap_set(bitmap, iova, length);
+
+ set_ahead_length -= length;
+ bitmap->mapped_base_index +=
+ iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
+ iova_bitmap_put(bitmap);
+ }
+
+ bitmap->set_ahead_length = 0;
+ return ret;
+}
+
+/*
+ * Advances to the next range, releases the current pinned
+ * pages and pins the next set of bitmap pages.
+ * Returns 0 on success or otherwise errno.
+ */
+static int iova_bitmap_advance(struct iova_bitmap *bitmap)
+{
+ unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
+ unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
+
+ bitmap->mapped_base_index += count;
+
+ iova_bitmap_put(bitmap);
+ if (iova_bitmap_done(bitmap))
+ return 0;
+
+ /* Iterate, set and skip any bits requested for next iteration */
+ if (bitmap->set_ahead_length) {
+ int ret;
+
+ ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
+ if (ret)
+ return ret;
+ }
+
+ /* When advancing the index we pin the next set of bitmap pages */
+ return iova_bitmap_get(bitmap);
+}
+
+/**
+ * iova_bitmap_for_each() - Iterates over the bitmap
+ * @bitmap: IOVA bitmap to iterate
+ * @opaque: Additional argument to pass to the callback
+ * @fn: Function that gets called for each IOVA range
+ *
+ * Helper function to iterate over bitmap data representing a portion of IOVA
+ * space. It hides the complexity of iterating bitmaps and translating the
+ * mapped bitmap user pages into IOVA ranges to process.
+ *
+ * Return: 0 on success, and an error on failure either upon
+ * iteration or when the callback returns an error.
+ */
+int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
+ iova_bitmap_fn_t fn)
+{
+ int ret = 0;
+
+ for (; !iova_bitmap_done(bitmap) && !ret;
+ ret = iova_bitmap_advance(bitmap)) {
+ ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
+ iova_bitmap_mapped_length(bitmap), opaque);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD);
+
+/**
+ * iova_bitmap_set() - Records an IOVA range in bitmap
+ * @bitmap: IOVA bitmap
+ * @iova: IOVA to start
+ * @length: IOVA range length
+ *
+ * Set the bits corresponding to the range [iova .. iova+length-1] in
+ * the user bitmap.
+ *
+ */
+void iova_bitmap_set(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+ unsigned long cur_bit = ((iova - mapped->iova) >>
+ mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
+ unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
+ mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
+ unsigned long last_page_idx = mapped->npages - 1;
+
+ do {
+ unsigned int page_idx = cur_bit / BITS_PER_PAGE;
+ unsigned int offset = cur_bit % BITS_PER_PAGE;
+ unsigned int nbits = min(BITS_PER_PAGE - offset,
+ last_bit - cur_bit + 1);
+ void *kaddr;
+
+ if (unlikely(page_idx > last_page_idx))
+ break;
+
+ kaddr = kmap_local_page(mapped->pages[page_idx]);
+ bitmap_set(kaddr, offset, nbits);
+ kunmap_local(kaddr);
+ cur_bit += nbits;
+ } while (cur_bit <= last_bit);
+
+ if (unlikely(cur_bit <= last_bit)) {
+ bitmap->set_ahead_length =
+ ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
+ }
+}
+EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index e71523cbd0..c9091e46d2 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
enum iommufd_object_type type)
{
- static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX];
struct iommufd_object *obj;
int rc;
@@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
if (!obj)
return ERR_PTR(-ENOMEM);
obj->type = type;
- /*
- * In most cases the destroy_rwsem is obtained with try so it doesn't
- * interact with lockdep, however on destroy we have to sleep. This
- * means if we have to destroy an object while holding a get on another
- * object it triggers lockdep. Using one locking class per object type
- * is a simple and reasonable way to avoid this.
- */
- __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem",
- &obj_keys[type]);
+ /* Starts out bias'd by 1 until it is removed from the xarray */
+ refcount_set(&obj->shortterm_users, 1);
refcount_set(&obj->users, 1);
/*
@@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
return obj;
}
+static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy)
+{
+ if (refcount_dec_and_test(&to_destroy->shortterm_users))
+ return 0;
+
+ if (wait_event_timeout(ictx->destroy_wait,
+ refcount_read(&to_destroy->shortterm_users) ==
+ 0,
+ msecs_to_jiffies(10000)))
+ return 0;
+
+ pr_crit("Time out waiting for iommufd object to become free\n");
+ refcount_inc(&to_destroy->shortterm_users);
+ return -EBUSY;
+}
+
/*
* Remove the given object id from the xarray if the only reference to the
- * object is held by the xarray. The caller must call ops destroy().
+ * object is held by the xarray.
*/
-static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
- u32 id, bool extra_put)
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy, u32 id,
+ unsigned int flags)
{
struct iommufd_object *obj;
XA_STATE(xas, &ictx->objects, id);
-
- xa_lock(&ictx->objects);
- obj = xas_load(&xas);
- if (xa_is_zero(obj) || !obj) {
- obj = ERR_PTR(-ENOENT);
- goto out_xa;
- }
+ bool zerod_shortterm = false;
+ int ret;
/*
- * If the caller is holding a ref on obj we put it here under the
- * spinlock.
+ * The purpose of the shortterm_users is to ensure deterministic
+ * destruction of objects used by external drivers and destroyed by this
+ * function. Any temporary increment of the refcount must increment
+ * shortterm_users, such as during ioctl execution.
*/
- if (extra_put)
+ if (flags & REMOVE_WAIT_SHORTTERM) {
+ ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy);
+ if (ret) {
+ /*
+ * We have a bug. Put back the callers reference and
+ * defer cleaning this object until close.
+ */
+ refcount_dec(&to_destroy->users);
+ return ret;
+ }
+ zerod_shortterm = true;
+ }
+
+ xa_lock(&ictx->objects);
+ obj = xas_load(&xas);
+ if (to_destroy) {
+ /*
+ * If the caller is holding a ref on obj we put it here under
+ * the spinlock.
+ */
refcount_dec(&obj->users);
+ if (WARN_ON(obj != to_destroy)) {
+ ret = -ENOENT;
+ goto err_xa;
+ }
+ } else if (xa_is_zero(obj) || !obj) {
+ ret = -ENOENT;
+ goto err_xa;
+ }
+
if (!refcount_dec_if_one(&obj->users)) {
- obj = ERR_PTR(-EBUSY);
- goto out_xa;
+ ret = -EBUSY;
+ goto err_xa;
}
xas_store(&xas, NULL);
if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
ictx->vfio_ioas = NULL;
-
-out_xa:
xa_unlock(&ictx->objects);
- /* The returned object reference count is zero */
- return obj;
-}
-
-/*
- * The caller holds a users refcount and wants to destroy the object. Returns
- * true if the object was destroyed. In all cases the caller no longer has a
- * reference on obj.
- */
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj, bool allow_fail)
-{
- struct iommufd_object *ret;
-
- /*
- * The purpose of the destroy_rwsem is to ensure deterministic
- * destruction of objects used by external drivers and destroyed by this
- * function. Any temporary increment of the refcount must hold the read
- * side of this, such as during ioctl execution.
- */
- down_write(&obj->destroy_rwsem);
- ret = iommufd_object_remove(ictx, obj->id, true);
- up_write(&obj->destroy_rwsem);
-
- if (allow_fail && IS_ERR(ret))
- return;
-
/*
- * If there is a bug and we couldn't destroy the object then we did put
- * back the caller's refcount and will eventually try to free it again
- * during close.
+ * Since users is zero any positive users_shortterm must be racing
+ * iommufd_put_object(), or we have a bug.
*/
- if (WARN_ON(IS_ERR(ret)))
- return;
+ if (!zerod_shortterm) {
+ ret = iommufd_object_dec_wait_shortterm(ictx, obj);
+ if (WARN_ON(ret))
+ return ret;
+ }
iommufd_object_ops[obj->type].destroy(obj);
kfree(obj);
+ return 0;
+
+err_xa:
+ if (zerod_shortterm) {
+ /* Restore the xarray owned reference */
+ refcount_set(&obj->shortterm_users, 1);
+ }
+ xa_unlock(&ictx->objects);
+
+ /* The returned object reference count is zero */
+ return ret;
}
static int iommufd_destroy(struct iommufd_ucmd *ucmd)
{
struct iommu_destroy *cmd = ucmd->cmd;
- struct iommufd_object *obj;
- obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
- iommufd_object_ops[obj->type].destroy(obj);
- kfree(obj);
- return 0;
+ return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0);
}
static int iommufd_fops_open(struct inode *inode, struct file *filp)
@@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
xa_init(&ictx->groups);
ictx->file = filp;
+ init_waitqueue_head(&ictx->destroy_wait);
filp->private_data = ictx;
return 0;
}
@@ -307,6 +321,8 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt;
+ struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
+ struct iommu_hwpt_set_dirty_tracking set_dirty_tracking;
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
@@ -342,6 +358,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
__reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
__reserved),
+ IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
+ struct iommu_hwpt_get_dirty_bitmap, data),
+ IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking,
+ struct iommu_hwpt_set_dirty_tracking, __reserved),
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
struct iommu_ioas_alloc, out_ioas_id),
IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
@@ -482,9 +502,13 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
[IOMMUFD_OBJ_IOAS] = {
.destroy = iommufd_ioas_destroy,
},
- [IOMMUFD_OBJ_HW_PAGETABLE] = {
- .destroy = iommufd_hw_pagetable_destroy,
- .abort = iommufd_hw_pagetable_abort,
+ [IOMMUFD_OBJ_HWPT_PAGING] = {
+ .destroy = iommufd_hwpt_paging_destroy,
+ .abort = iommufd_hwpt_paging_abort,
+ },
+ [IOMMUFD_OBJ_HWPT_NESTED] = {
+ .destroy = iommufd_hwpt_nested_destroy,
+ .abort = iommufd_hwpt_nested_abort,
},
#ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = {
@@ -552,5 +576,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR);
MODULE_ALIAS("devname:vfio/vfio");
#endif
MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
+MODULE_IMPORT_NS(IOMMUFD);
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 56506d5753..022ef8f550 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -20,10 +20,13 @@
static DECLARE_FAULT_ATTR(fail_iommufd);
static struct dentry *dbgfs_root;
static struct platform_device *selftest_iommu_dev;
+static const struct iommu_ops mock_ops;
+static struct iommu_domain_ops domain_nested_ops;
size_t iommufd_test_memory_limit = 65536;
enum {
+ MOCK_DIRTY_TRACK = 1,
MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
/*
@@ -36,6 +39,7 @@ enum {
_MOCK_PFN_START = MOCK_PFN_MASK + 1,
MOCK_PFN_START_IOVA = _MOCK_PFN_START,
MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
+ MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1,
};
/*
@@ -82,20 +86,28 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
if (IS_ERR(ioas))
return;
*iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
}
struct mock_iommu_domain {
+ unsigned long flags;
struct iommu_domain domain;
struct xarray pfns;
};
+struct mock_iommu_domain_nested {
+ struct iommu_domain domain;
+ struct mock_iommu_domain *parent;
+ u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM];
+};
+
enum selftest_obj_type {
TYPE_IDEV,
};
struct mock_dev {
struct device dev;
+ unsigned long flags;
};
struct selftest_obj {
@@ -111,18 +123,18 @@ struct selftest_obj {
};
};
-static void mock_domain_blocking_free(struct iommu_domain *domain)
-{
-}
-
static int mock_domain_nop_attach(struct iommu_domain *domain,
struct device *dev)
{
+ struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
+
+ if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY))
+ return -EINVAL;
+
return 0;
}
static const struct iommu_domain_ops mock_blocking_ops = {
- .free = mock_domain_blocking_free,
.attach_dev = mock_domain_nop_attach,
};
@@ -146,15 +158,70 @@ static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type)
return info;
}
-static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
+static int mock_domain_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
{
- struct mock_iommu_domain *mock;
+ struct mock_iommu_domain *mock =
+ container_of(domain, struct mock_iommu_domain, domain);
+ unsigned long flags = mock->flags;
- if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED)
- return &mock_blocking_domain;
+ if (enable && !domain->dirty_ops)
+ return -EINVAL;
- if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
+ /* No change? */
+ if (!(enable ^ !!(flags & MOCK_DIRTY_TRACK)))
+ return 0;
+
+ flags = (enable ? flags | MOCK_DIRTY_TRACK : flags & ~MOCK_DIRTY_TRACK);
+
+ mock->flags = flags;
+ return 0;
+}
+
+static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct mock_iommu_domain *mock =
+ container_of(domain, struct mock_iommu_domain, domain);
+ unsigned long i, max = size / MOCK_IO_PAGE_SIZE;
+ void *ent, *old;
+
+ if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap)
+ return -EINVAL;
+
+ for (i = 0; i < max; i++) {
+ unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE;
+
+ ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
+ if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) {
+ /* Clear dirty */
+ if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
+ unsigned long val;
+
+ val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
+ old = xa_store(&mock->pfns,
+ cur / MOCK_IO_PAGE_SIZE,
+ xa_mk_value(val), GFP_KERNEL);
+ WARN_ON_ONCE(ent != old);
+ }
+ iommu_dirty_bitmap_record(dirty, cur,
+ MOCK_IO_PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+const struct iommu_dirty_ops dirty_ops = {
+ .set_dirty_tracking = mock_domain_set_dirty_tracking,
+ .read_and_clear_dirty = mock_domain_read_and_clear_dirty,
+};
+
+static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
+{
+ struct mock_iommu_domain *mock;
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
if (!mock)
@@ -162,10 +229,78 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
+ mock->domain.ops = mock_ops.default_domain_ops;
+ mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
xa_init(&mock->pfns);
return &mock->domain;
}
+static struct iommu_domain *
+__mock_domain_alloc_nested(struct mock_iommu_domain *mock_parent,
+ const struct iommu_hwpt_selftest *user_cfg)
+{
+ struct mock_iommu_domain_nested *mock_nested;
+ int i;
+
+ mock_nested = kzalloc(sizeof(*mock_nested), GFP_KERNEL);
+ if (!mock_nested)
+ return ERR_PTR(-ENOMEM);
+ mock_nested->parent = mock_parent;
+ mock_nested->domain.ops = &domain_nested_ops;
+ mock_nested->domain.type = IOMMU_DOMAIN_NESTED;
+ for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++)
+ mock_nested->iotlb[i] = user_cfg->iotlb;
+ return &mock_nested->domain;
+}
+
+static struct iommu_domain *
+mock_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct mock_iommu_domain *mock_parent;
+ struct iommu_hwpt_selftest user_cfg;
+ int rc;
+
+ /* must be mock_domain */
+ if (!parent) {
+ struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
+ bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY;
+ struct iommu_domain *domain;
+
+ if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data || (has_dirty_flag && no_dirty_ops))
+ return ERR_PTR(-EOPNOTSUPP);
+ domain = mock_domain_alloc_paging(NULL);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+ if (has_dirty_flag)
+ container_of(domain, struct mock_iommu_domain, domain)
+ ->domain.dirty_ops = &dirty_ops;
+ return domain;
+ }
+
+ /* must be mock_domain_nested */
+ if (user_data->type != IOMMU_HWPT_DATA_SELFTEST || flags)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!parent || parent->ops != mock_ops.default_domain_ops)
+ return ERR_PTR(-EINVAL);
+
+ mock_parent = container_of(parent, struct mock_iommu_domain, domain);
+ if (!mock_parent)
+ return ERR_PTR(-EINVAL);
+
+ rc = iommu_copy_struct_from_user(&user_cfg, user_data,
+ IOMMU_HWPT_DATA_SELFTEST, iotlb);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return __mock_domain_alloc_nested(mock_parent, &user_cfg);
+}
+
static void mock_domain_free(struct iommu_domain *domain)
{
struct mock_iommu_domain *mock =
@@ -243,7 +378,7 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
- WARN_ON(!ent);
+
/*
* iommufd generates unmaps that must be a strict
* superset of the map's performend So every starting
@@ -253,13 +388,13 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
* passed to map_pages
*/
if (first) {
- WARN_ON(!(xa_to_value(ent) &
- MOCK_PFN_START_IOVA));
+ WARN_ON(ent && !(xa_to_value(ent) &
+ MOCK_PFN_START_IOVA));
first = false;
}
if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
- WARN_ON(!(xa_to_value(ent) &
- MOCK_PFN_LAST_IOVA));
+ WARN_ON(ent && !(xa_to_value(ent) &
+ MOCK_PFN_LAST_IOVA));
iova += MOCK_IO_PAGE_SIZE;
ret += MOCK_IO_PAGE_SIZE;
@@ -283,15 +418,18 @@ static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain,
static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
{
- return cap == IOMMU_CAP_CACHE_COHERENCY;
-}
+ struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
-static void mock_domain_set_plaform_dma_ops(struct device *dev)
-{
- /*
- * mock doesn't setup default domains because we can't hook into the
- * normal probe path
- */
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return !(mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY);
+ default:
+ break;
+ }
+
+ return false;
}
static struct iommu_device mock_iommu_device = {
@@ -303,12 +441,18 @@ static struct iommu_device *mock_probe_device(struct device *dev)
}
static const struct iommu_ops mock_ops = {
+ /*
+ * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type()
+ * because it is zero.
+ */
+ .default_domain = &mock_blocking_domain,
+ .blocked_domain = &mock_blocking_domain,
.owner = THIS_MODULE,
.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
.hw_info = mock_domain_hw_info,
- .domain_alloc = mock_domain_alloc,
+ .domain_alloc_paging = mock_domain_alloc_paging,
+ .domain_alloc_user = mock_domain_alloc_user,
.capable = mock_domain_capable,
- .set_platform_dma_ops = mock_domain_set_plaform_dma_ops,
.device_group = generic_device_group,
.probe_device = mock_probe_device,
.default_domain_ops =
@@ -321,26 +465,67 @@ static const struct iommu_ops mock_ops = {
},
};
+static void mock_domain_free_nested(struct iommu_domain *domain)
+{
+ struct mock_iommu_domain_nested *mock_nested =
+ container_of(domain, struct mock_iommu_domain_nested, domain);
+
+ kfree(mock_nested);
+}
+
+static struct iommu_domain_ops domain_nested_ops = {
+ .free = mock_domain_free_nested,
+ .attach_dev = mock_domain_nop_attach,
+};
+
static inline struct iommufd_hw_pagetable *
-get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
- struct mock_iommu_domain **mock)
+__get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, u32 hwpt_type)
{
- struct iommufd_hw_pagetable *hwpt;
struct iommufd_object *obj;
- obj = iommufd_get_object(ucmd->ictx, mockpt_id,
- IOMMUFD_OBJ_HW_PAGETABLE);
+ obj = iommufd_get_object(ucmd->ictx, mockpt_id, hwpt_type);
if (IS_ERR(obj))
return ERR_CAST(obj);
- hwpt = container_of(obj, struct iommufd_hw_pagetable, obj);
- if (hwpt->domain->ops != mock_ops.default_domain_ops) {
- iommufd_put_object(&hwpt->obj);
+ return container_of(obj, struct iommufd_hw_pagetable, obj);
+}
+
+static inline struct iommufd_hw_pagetable *
+get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
+ struct mock_iommu_domain **mock)
+{
+ struct iommufd_hw_pagetable *hwpt;
+
+ hwpt = __get_md_pagetable(ucmd, mockpt_id, IOMMUFD_OBJ_HWPT_PAGING);
+ if (IS_ERR(hwpt))
+ return hwpt;
+ if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED ||
+ hwpt->domain->ops != mock_ops.default_domain_ops) {
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
return ERR_PTR(-EINVAL);
}
*mock = container_of(hwpt->domain, struct mock_iommu_domain, domain);
return hwpt;
}
+static inline struct iommufd_hw_pagetable *
+get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id,
+ struct mock_iommu_domain_nested **mock_nested)
+{
+ struct iommufd_hw_pagetable *hwpt;
+
+ hwpt = __get_md_pagetable(ucmd, mockpt_id, IOMMUFD_OBJ_HWPT_NESTED);
+ if (IS_ERR(hwpt))
+ return hwpt;
+ if (hwpt->domain->type != IOMMU_DOMAIN_NESTED ||
+ hwpt->domain->ops != &domain_nested_ops) {
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
+ return ERR_PTR(-EINVAL);
+ }
+ *mock_nested = container_of(hwpt->domain,
+ struct mock_iommu_domain_nested, domain);
+ return hwpt;
+}
+
struct mock_bus_type {
struct bus_type bus;
struct notifier_block nb;
@@ -362,16 +547,20 @@ static void mock_dev_release(struct device *dev)
kfree(mdev);
}
-static struct mock_dev *mock_dev_create(void)
+static struct mock_dev *mock_dev_create(unsigned long dev_flags)
{
struct mock_dev *mdev;
int rc;
+ if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY))
+ return ERR_PTR(-EINVAL);
+
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev)
return ERR_PTR(-ENOMEM);
device_initialize(&mdev->dev);
+ mdev->flags = dev_flags;
mdev->dev.release = mock_dev_release;
mdev->dev.bus = &iommufd_mock_bus_type.bus;
@@ -407,6 +596,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
struct iommufd_device *idev;
struct selftest_obj *sobj;
u32 pt_id = cmd->id;
+ u32 dev_flags = 0;
u32 idev_id;
int rc;
@@ -417,7 +607,10 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
sobj->idev.ictx = ucmd->ictx;
sobj->type = TYPE_IDEV;
- sobj->idev.mock_dev = mock_dev_create();
+ if (cmd->op == IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS)
+ dev_flags = cmd->mock_domain_flags.dev_flags;
+
+ sobj->idev.mock_dev = mock_dev_create(dev_flags);
if (IS_ERR(sobj->idev.mock_dev)) {
rc = PTR_ERR(sobj->idev.mock_dev);
goto out_sobj;
@@ -488,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_dev_obj:
- iommufd_put_object(dev_obj);
+ iommufd_put_object(ucmd->ictx, dev_obj);
return rc;
}
@@ -506,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
down_write(&ioas->iopt.iova_rwsem);
rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
up_write(&ioas->iopt.iova_rwsem);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return rc;
}
@@ -561,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
rc = 0;
out_put:
- iommufd_put_object(&hwpt->obj);
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
return rc;
}
@@ -977,6 +1170,73 @@ static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE);
static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH ==
__IOMMUFD_ACCESS_RW_SLOW_PATH);
+static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
+ unsigned long iova, size_t length,
+ unsigned long page_size, void __user *uptr,
+ u32 flags)
+{
+ unsigned long bitmap_size, i, max;
+ struct iommu_test_cmd *cmd = ucmd->cmd;
+ struct iommufd_hw_pagetable *hwpt;
+ struct mock_iommu_domain *mock;
+ int rc, count = 0;
+ void *tmp;
+
+ if (!page_size || !length || iova % page_size || length % page_size ||
+ !uptr)
+ return -EINVAL;
+
+ hwpt = get_md_pagetable(ucmd, mockpt_id, &mock);
+ if (IS_ERR(hwpt))
+ return PTR_ERR(hwpt);
+
+ if (!(mock->flags & MOCK_DIRTY_TRACK)) {
+ rc = -EINVAL;
+ goto out_put;
+ }
+
+ max = length / page_size;
+ bitmap_size = max / BITS_PER_BYTE;
+
+ tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT);
+ if (!tmp) {
+ rc = -ENOMEM;
+ goto out_put;
+ }
+
+ if (copy_from_user(tmp, uptr, bitmap_size)) {
+ rc = -EFAULT;
+ goto out_free;
+ }
+
+ for (i = 0; i < max; i++) {
+ unsigned long cur = iova + i * page_size;
+ void *ent, *old;
+
+ if (!test_bit(i, (unsigned long *)tmp))
+ continue;
+
+ ent = xa_load(&mock->pfns, cur / page_size);
+ if (ent) {
+ unsigned long val;
+
+ val = xa_to_value(ent) | MOCK_PFN_DIRTY_IOVA;
+ old = xa_store(&mock->pfns, cur / page_size,
+ xa_mk_value(val), GFP_KERNEL);
+ WARN_ON_ONCE(ent != old);
+ count++;
+ }
+ }
+
+ cmd->dirty.out_nr_dirty = count;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_free:
+ kvfree(tmp);
+out_put:
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
+ return rc;
+}
+
void iommufd_selftest_destroy(struct iommufd_object *obj)
{
struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj);
@@ -1000,6 +1260,7 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->add_reserved.start,
cmd->add_reserved.length);
case IOMMU_TEST_OP_MOCK_DOMAIN:
+ case IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS:
return iommufd_test_mock_domain(ucmd, cmd);
case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE:
return iommufd_test_mock_domain_replace(
@@ -1041,6 +1302,12 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
return -EINVAL;
iommufd_test_memory_limit = cmd->memory_limit.limit;
return 0;
+ case IOMMU_TEST_OP_DIRTY:
+ return iommufd_test_dirty(ucmd, cmd->id, cmd->dirty.iova,
+ cmd->dirty.length,
+ cmd->dirty.page_size,
+ u64_to_user_ptr(cmd->dirty.uptr),
+ cmd->dirty.flags);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 6c810bf80f..a3ad5f0b6c 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
if (IS_ERR(ioas))
return PTR_ERR(ioas);
*out_ioas_id = ioas->obj.id;
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return 0;
}
EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
@@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
ret = 0;
- iommufd_put_object(&ictx->vfio_ioas->obj);
+ iommufd_put_object(ictx, &ictx->vfio_ioas->obj);
goto out_abort;
}
ictx->vfio_ioas = ioas;
@@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
if (IS_ERR(ioas))
return PTR_ERR(ioas);
cmd->ioas_id = ioas->obj.id;
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
case IOMMU_VFIO_IOAS_SET:
@@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
xa_lock(&ucmd->ictx->objects);
ucmd->ictx->vfio_ioas = ioas;
xa_unlock(&ucmd->ictx->objects);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ucmd->ictx, &ioas->obj);
return 0;
case IOMMU_VFIO_IOAS_CLEAR:
@@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
iova = map.iova;
rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
map.size, iommu_prot, 0);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return rc;
}
@@ -249,13 +249,13 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
rc = -EFAULT;
err_put:
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return rc;
}
static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
{
- struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_ioas *ioas;
int rc = 1;
@@ -264,15 +264,15 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
return PTR_ERR(ioas);
mutex_lock(&ioas->mutex);
- list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
- if (!hwpt->enforce_cache_coherency) {
+ list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
+ if (!hwpt_paging->enforce_cache_coherency) {
rc = 0;
break;
}
}
mutex_unlock(&ioas->mutex);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return rc;
}
@@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
*/
if (type == VFIO_TYPE1_IOMMU)
rc = iopt_disable_large_pages(&ioas->iopt);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return rc;
}
@@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
out_put:
up_read(&ioas->iopt.iova_rwsem);
- iommufd_put_object(&ioas->obj);
+ iommufd_put_object(ictx, &ioas->obj);
return rc;
}
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 10b9646009..d30e453d0f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -11,6 +11,7 @@
#include <linux/smp.h>
#include <linux/bitops.h>
#include <linux/cpu.h>
+#include <linux/workqueue.h>
/* The anchor node sits above the top of the usable address space */
#define IOVA_ANCHOR ~0UL
@@ -622,15 +623,21 @@ EXPORT_SYMBOL_GPL(reserve_iova);
/*
* As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
* assure size of 'iova_magazine' to be 1024 bytes, so that no memory
- * will be wasted.
+ * will be wasted. Since only full magazines are inserted into the depot,
+ * we don't need to waste PFN capacity on a separate list head either.
*/
#define IOVA_MAG_SIZE 127
-#define MAX_GLOBAL_MAGS 32 /* magazines per bin */
+
+#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
struct iova_magazine {
- unsigned long size;
+ union {
+ unsigned long size;
+ struct iova_magazine *next;
+ };
unsigned long pfns[IOVA_MAG_SIZE];
};
+static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
struct iova_cpu_rcache {
spinlock_t lock;
@@ -640,9 +647,11 @@ struct iova_cpu_rcache {
struct iova_rcache {
spinlock_t lock;
- unsigned long depot_size;
- struct iova_magazine *depot[MAX_GLOBAL_MAGS];
+ unsigned int depot_size;
+ struct iova_magazine *depot;
struct iova_cpu_rcache __percpu *cpu_rcaches;
+ struct iova_domain *iovad;
+ struct delayed_work work;
};
static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
@@ -717,6 +726,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
mag->pfns[mag->size++] = pfn;
}
+static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
+{
+ struct iova_magazine *mag = rcache->depot;
+
+ rcache->depot = mag->next;
+ mag->size = IOVA_MAG_SIZE;
+ rcache->depot_size--;
+ return mag;
+}
+
+static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
+{
+ mag->next = rcache->depot;
+ rcache->depot = mag;
+ rcache->depot_size++;
+}
+
+static void iova_depot_work_func(struct work_struct *work)
+{
+ struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
+ struct iova_magazine *mag = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcache->lock, flags);
+ if (rcache->depot_size > num_online_cpus())
+ mag = iova_depot_pop(rcache);
+ spin_unlock_irqrestore(&rcache->lock, flags);
+
+ if (mag) {
+ iova_magazine_free_pfns(mag, rcache->iovad);
+ iova_magazine_free(mag);
+ schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
+ }
+}
+
int iova_domain_init_rcaches(struct iova_domain *iovad)
{
unsigned int cpu;
@@ -734,7 +778,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
rcache = &iovad->rcaches[i];
spin_lock_init(&rcache->lock);
- rcache->depot_size = 0;
+ rcache->iovad = iovad;
+ INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
cache_line_size());
if (!rcache->cpu_rcaches) {
@@ -776,7 +821,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
struct iova_rcache *rcache,
unsigned long iova_pfn)
{
- struct iova_magazine *mag_to_free = NULL;
struct iova_cpu_rcache *cpu_rcache;
bool can_insert = false;
unsigned long flags;
@@ -794,13 +838,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
if (new_mag) {
spin_lock(&rcache->lock);
- if (rcache->depot_size < MAX_GLOBAL_MAGS) {
- rcache->depot[rcache->depot_size++] =
- cpu_rcache->loaded;
- } else {
- mag_to_free = cpu_rcache->loaded;
- }
+ iova_depot_push(rcache, cpu_rcache->loaded);
spin_unlock(&rcache->lock);
+ schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
cpu_rcache->loaded = new_mag;
can_insert = true;
@@ -812,11 +852,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
spin_unlock_irqrestore(&cpu_rcache->lock, flags);
- if (mag_to_free) {
- iova_magazine_free_pfns(mag_to_free, iovad);
- iova_magazine_free(mag_to_free);
- }
-
return can_insert;
}
@@ -854,9 +889,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
has_pfn = true;
} else {
spin_lock(&rcache->lock);
- if (rcache->depot_size > 0) {
+ if (rcache->depot) {
iova_magazine_free(cpu_rcache->loaded);
- cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
+ cpu_rcache->loaded = iova_depot_pop(rcache);
has_pfn = true;
}
spin_unlock(&rcache->lock);
@@ -895,9 +930,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
struct iova_rcache *rcache;
struct iova_cpu_rcache *cpu_rcache;
unsigned int cpu;
- int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+ for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i];
if (!rcache->cpu_rcaches)
break;
@@ -907,8 +941,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
iova_magazine_free(cpu_rcache->prev);
}
free_percpu(rcache->cpu_rcaches);
- for (j = 0; j < rcache->depot_size; ++j)
- iova_magazine_free(rcache->depot[j]);
+ cancel_delayed_work_sync(&rcache->work);
+ while (rcache->depot)
+ iova_magazine_free(iova_depot_pop(rcache));
}
kfree(iovad->rcaches);
@@ -942,16 +977,16 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
{
struct iova_rcache *rcache;
unsigned long flags;
- int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+ for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i];
spin_lock_irqsave(&rcache->lock, flags);
- for (j = 0; j < rcache->depot_size; ++j) {
- iova_magazine_free_pfns(rcache->depot[j], iovad);
- iova_magazine_free(rcache->depot[j]);
+ while (rcache->depot) {
+ struct iova_magazine *mag = iova_depot_pop(rcache);
+
+ iova_magazine_free_pfns(mag, iovad);
+ iova_magazine_free(mag);
}
- rcache->depot_size = 0;
spin_unlock_irqrestore(&rcache->lock, flags);
}
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 65ff69477c..ace1fc4bd3 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -64,7 +64,6 @@ struct ipmmu_vmsa_device {
struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
s8 utlb_ctx[IPMMU_UTLB_MAX];
- struct iommu_group *group;
struct dma_iommu_mapping *mapping;
};
@@ -295,6 +294,18 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
mmu->utlb_ctx[utlb] = domain->context_id;
}
+/*
+ * Disable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
+ unsigned int utlb)
+{
+ struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+ ipmmu_imuctr_write(mmu, utlb, 0);
+ mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
+}
+
static void ipmmu_tlb_flush_all(void *cookie)
{
struct ipmmu_vmsa_domain *domain = cookie;
@@ -551,13 +562,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
* IOMMU Operations
*/
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev)
{
struct ipmmu_vmsa_domain *domain;
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
-
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
@@ -627,6 +635,36 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
return 0;
}
+static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct ipmmu_vmsa_domain *domain;
+ unsigned int i;
+
+ if (io_domain == identity_domain || !io_domain)
+ return 0;
+
+ domain = to_vmsa_domain(io_domain);
+ for (i = 0; i < fwspec->num_ids; ++i)
+ ipmmu_utlb_disable(domain, fwspec->ids[i]);
+
+ /*
+ * TODO: Optimize by disabling the context when no device is attached.
+ */
+ return 0;
+}
+
+static struct iommu_domain_ops ipmmu_iommu_identity_ops = {
+ .attach_dev = ipmmu_iommu_identity_attach,
+};
+
+static struct iommu_domain ipmmu_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &ipmmu_iommu_identity_ops,
+};
+
static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -833,28 +871,18 @@ static void ipmmu_release_device(struct device *dev)
arm_iommu_release_mapping(mmu->mapping);
}
-static struct iommu_group *ipmmu_find_group(struct device *dev)
-{
- struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
-
- if (mmu->group)
- return iommu_group_ref_get(mmu->group);
-
- group = iommu_group_alloc();
- if (!IS_ERR(group))
- mmu->group = group;
-
- return group;
-}
-
static const struct iommu_ops ipmmu_ops = {
- .domain_alloc = ipmmu_domain_alloc,
+ .identity_domain = &ipmmu_iommu_identity_domain,
+ .domain_alloc_paging = ipmmu_domain_alloc_paging,
.probe_device = ipmmu_probe_device,
.release_device = ipmmu_release_device,
.probe_finalize = ipmmu_probe_finalize,
+ /*
+ * FIXME: The device grouping is a fixed property of the hardware's
+ * ability to isolate and control DMA, it should not depend on kconfig.
+ */
.device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
- ? generic_device_group : ipmmu_find_group,
+ ? generic_device_group : generic_single_device_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 79d89bad51..f86af9815d 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -302,13 +302,10 @@ static void __program_context(void __iomem *base, int ctx,
SET_M(base, ctx, 1);
}
-static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev)
{
struct msm_priv *priv;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto fail_nomem;
@@ -443,15 +440,20 @@ fail:
return ret;
}
-static void msm_iommu_set_platform_dma(struct device *dev)
+static int msm_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct msm_priv *priv = to_msm_priv(domain);
+ struct msm_priv *priv;
unsigned long flags;
struct msm_iommu_dev *iommu;
struct msm_iommu_ctx_dev *master;
- int ret;
+ int ret = 0;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+ priv = to_msm_priv(domain);
free_io_pgtable_ops(priv->iop);
spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -468,8 +470,18 @@ static void msm_iommu_set_platform_dma(struct device *dev)
}
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
+ return ret;
}
+static struct iommu_domain_ops msm_iommu_identity_ops = {
+ .attach_dev = msm_iommu_identity_attach,
+};
+
+static struct iommu_domain msm_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &msm_iommu_identity_ops,
+};
+
static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t pa, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -486,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct msm_priv *priv = to_msm_priv(domain);
__flush_iotlb_range(iova, size, SZ_4K, false, priv);
+ return 0;
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -675,10 +688,10 @@ fail:
}
static struct iommu_ops msm_iommu_ops = {
- .domain_alloc = msm_iommu_domain_alloc,
+ .identity_domain = &msm_iommu_identity_domain,
+ .domain_alloc_paging = msm_iommu_domain_alloc_paging,
.probe_device = msm_iommu_probe_device,
.device_group = generic_device_group,
- .set_platform_dma_ops = msm_iommu_set_platform_dma,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index fab6c347ce..75279500a4 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -688,13 +688,10 @@ update_iova_region:
return 0;
}
-static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev)
{
struct mtk_iommu_domain *dom;
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -776,6 +773,28 @@ err_unlock:
return ret;
}
+static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
+
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ mtk_iommu_config(data, dev, false, 0);
+ return 0;
+}
+
+static struct iommu_domain_ops mtk_iommu_identity_ops = {
+ .attach_dev = mtk_iommu_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &mtk_iommu_identity_ops,
+};
+
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -817,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank);
}
-static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank);
+ return 0;
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -995,7 +1015,8 @@ static void mtk_iommu_get_resv_regions(struct device *dev,
}
static const struct iommu_ops mtk_iommu_ops = {
- .domain_alloc = mtk_iommu_domain_alloc,
+ .identity_domain = &mtk_iommu_identity_domain,
+ .domain_alloc_paging = mtk_iommu_domain_alloc_paging,
.probe_device = mtk_iommu_probe_device,
.release_device = mtk_iommu_release_device,
.device_group = mtk_iommu_device_group,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 8a0a5e5d04..67e044c1a7 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -270,13 +270,10 @@ static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data)
return 0;
}
-static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev)
{
struct mtk_iommu_v1_domain *dom;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -319,13 +316,24 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device
return 0;
}
-static void mtk_iommu_v1_set_platform_dma(struct device *dev)
+static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
mtk_iommu_v1_config(data, dev, false);
+ return 0;
}
+static struct iommu_domain_ops mtk_iommu_v1_identity_ops = {
+ .attach_dev = mtk_iommu_v1_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_v1_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &mtk_iommu_v1_identity_ops,
+};
+
static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -441,11 +449,6 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_arg
return 0;
}
-static int mtk_iommu_v1_def_domain_type(struct device *dev)
-{
- return IOMMU_DOMAIN_UNMANAGED;
-}
-
static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -578,14 +581,13 @@ static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data)
}
static const struct iommu_ops mtk_iommu_v1_ops = {
- .domain_alloc = mtk_iommu_v1_domain_alloc,
+ .identity_domain = &mtk_iommu_v1_identity_domain,
+ .domain_alloc_paging = mtk_iommu_v1_domain_alloc_paging,
.probe_device = mtk_iommu_v1_probe_device,
.probe_finalize = mtk_iommu_v1_probe_finalize,
.release_device = mtk_iommu_v1_release_device,
- .def_domain_type = mtk_iommu_v1_def_domain_type,
.device_group = generic_device_group,
.pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE,
- .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_v1_attach_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 537e402f9b..c66b070841 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1225,18 +1225,15 @@ static int omap_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, obj);
if (omap_iommu_can_register(pdev)) {
- obj->group = iommu_group_alloc();
- if (IS_ERR(obj->group))
- return PTR_ERR(obj->group);
-
err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
obj->name);
if (err)
- goto out_group;
+ return err;
err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev);
if (err)
goto out_sysfs;
+ obj->has_iommu_driver = true;
}
pm_runtime_enable(obj->dev);
@@ -1252,8 +1249,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
out_sysfs:
iommu_device_sysfs_remove(&obj->iommu);
-out_group:
- iommu_group_put(obj->group);
return err;
}
@@ -1261,10 +1256,7 @@ static void omap_iommu_remove(struct platform_device *pdev)
{
struct omap_iommu *obj = platform_get_drvdata(pdev);
- if (obj->group) {
- iommu_group_put(obj->group);
- obj->group = NULL;
-
+ if (obj->has_iommu_driver) {
iommu_device_sysfs_remove(&obj->iommu);
iommu_device_unregister(&obj->iommu);
}
@@ -1318,7 +1310,8 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
}
static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
- phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
+ phys_addr_t pa, size_t bytes, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1356,13 +1349,15 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
oiommu = iommu->iommu_dev;
iopgtable_clear_entry(oiommu, da);
}
+ } else {
+ *mapped = bytes;
}
return ret;
}
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1555,23 +1550,35 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
omap_domain->dev = NULL;
}
-static void omap_iommu_set_platform_dma(struct device *dev)
+static int omap_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+ struct omap_iommu_domain *omap_domain;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+ omap_domain = to_omap_domain(domain);
spin_lock(&omap_domain->lock);
_omap_iommu_detach_dev(omap_domain, dev);
spin_unlock(&omap_domain->lock);
+ return 0;
}
-static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
+static struct iommu_domain_ops omap_iommu_identity_ops = {
+ .attach_dev = omap_iommu_identity_attach,
+};
+
+static struct iommu_domain omap_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &omap_iommu_identity_ops,
+};
+
+static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev)
{
struct omap_iommu_domain *omap_domain;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
if (!omap_domain)
return NULL;
@@ -1717,31 +1724,17 @@ static void omap_iommu_release_device(struct device *dev)
}
-static struct iommu_group *omap_iommu_device_group(struct device *dev)
-{
- struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
- struct iommu_group *group = ERR_PTR(-EINVAL);
-
- if (!arch_data)
- return ERR_PTR(-ENODEV);
-
- if (arch_data->iommu_dev)
- group = iommu_group_ref_get(arch_data->iommu_dev->group);
-
- return group;
-}
-
static const struct iommu_ops omap_iommu_ops = {
- .domain_alloc = omap_iommu_domain_alloc,
+ .identity_domain = &omap_iommu_identity_domain,
+ .domain_alloc_paging = omap_iommu_domain_alloc_paging,
.probe_device = omap_iommu_probe_device,
.release_device = omap_iommu_release_device,
- .device_group = omap_iommu_device_group,
- .set_platform_dma_ops = omap_iommu_set_platform_dma,
+ .device_group = generic_single_device_group,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = omap_iommu_attach_dev,
- .map = omap_iommu_map,
- .unmap = omap_iommu_unmap,
+ .map_pages = omap_iommu_map,
+ .unmap_pages = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
.free = omap_iommu_domain_free,
}
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 18ee713ede..27697109ec 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -80,7 +80,7 @@ struct omap_iommu {
u32 id;
struct iommu_device iommu;
- struct iommu_group *group;
+ bool has_iommu_driver;
u8 pwrst;
};
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 8ff69fbf9f..2685861c0a 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -113,7 +113,6 @@ struct rk_iommu {
struct iommu_device iommu;
struct list_head node; /* entry in rk_iommu_domain.iommus */
struct iommu_domain *domain; /* domain to which iommu is attached */
- struct iommu_group *group;
};
struct rk_iommudata {
@@ -817,7 +816,8 @@ unwind:
}
static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -850,12 +850,14 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
paddr, size, prot);
spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+ if (!ret)
+ *mapped = size;
return ret;
}
static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -989,13 +991,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain,
return 0;
}
-static void rk_iommu_identity_free(struct iommu_domain *domain)
-{
-}
-
static struct iommu_domain_ops rk_identity_ops = {
.attach_dev = rk_iommu_identity_attach,
- .free = rk_iommu_identity_free,
};
static struct iommu_domain rk_identity_domain = {
@@ -1003,13 +1000,6 @@ static struct iommu_domain rk_identity_domain = {
.ops = &rk_identity_ops,
};
-#ifdef CONFIG_ARM
-static void rk_iommu_set_platform_dma(struct device *dev)
-{
- WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev));
-}
-#endif
-
static int rk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -1055,16 +1045,10 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
return ret;
}
-static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
{
struct rk_iommu_domain *rk_domain;
- if (type == IOMMU_DOMAIN_IDENTITY)
- return &rk_identity_domain;
-
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
-
if (!dma_dev)
return NULL;
@@ -1155,15 +1139,6 @@ static void rk_iommu_release_device(struct device *dev)
device_link_del(data->link);
}
-static struct iommu_group *rk_iommu_device_group(struct device *dev)
-{
- struct rk_iommu *iommu;
-
- iommu = rk_iommu_from_dev(dev);
-
- return iommu_group_ref_get(iommu->group);
-}
-
static int rk_iommu_of_xlate(struct device *dev,
struct of_phandle_args *args)
{
@@ -1186,19 +1161,17 @@ static int rk_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops rk_iommu_ops = {
- .domain_alloc = rk_iommu_domain_alloc,
+ .identity_domain = &rk_identity_domain,
+ .domain_alloc_paging = rk_iommu_domain_alloc_paging,
.probe_device = rk_iommu_probe_device,
.release_device = rk_iommu_release_device,
- .device_group = rk_iommu_device_group,
-#ifdef CONFIG_ARM
- .set_platform_dma_ops = rk_iommu_set_platform_dma,
-#endif
+ .device_group = generic_single_device_group,
.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
.of_xlate = rk_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = rk_iommu_attach_device,
- .map = rk_iommu_map,
- .unmap = rk_iommu_unmap,
+ .map_pages = rk_iommu_map,
+ .unmap_pages = rk_iommu_unmap,
.iova_to_phys = rk_iommu_iova_to_phys,
.free = rk_iommu_domain_free,
}
@@ -1280,15 +1253,9 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (err)
return err;
- iommu->group = iommu_group_alloc();
- if (IS_ERR(iommu->group)) {
- err = PTR_ERR(iommu->group);
- goto err_unprepare_clocks;
- }
-
err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
if (err)
- goto err_put_group;
+ goto err_unprepare_clocks;
err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
if (err)
@@ -1325,8 +1292,6 @@ err_pm_disable:
pm_runtime_disable(dev);
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_put_group:
- iommu_group_put(iommu->group);
err_unprepare_clocks:
clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
return err;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbf59a8db2..9a5196f523 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -14,16 +14,300 @@
#include <linux/rcupdate.h>
#include <asm/pci_dma.h>
+#include "dma-iommu.h"
+
static const struct iommu_ops s390_iommu_ops;
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+
+static u64 s390_iommu_aperture;
+static u32 s390_iommu_aperture_factor = 1;
+
struct s390_domain {
struct iommu_domain domain;
struct list_head devices;
+ struct zpci_iommu_ctrs ctrs;
unsigned long *dma_table;
spinlock_t list_lock;
struct rcu_head rcu;
};
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
+{
+ *entry &= ZPCI_PTE_FLAG_MASK;
+ *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= (sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
+{
+ *entry &= ZPCI_STE_FLAG_MASK;
+ *entry |= (pto & ZPCI_STE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+ dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+ ZPCI_TABLE_SIZE,
+ ZPCI_TABLE_ALIGN,
+ 0, NULL);
+ if (!dma_region_table_cache)
+ return -ENOMEM;
+
+ dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+ ZPCI_PT_SIZE,
+ ZPCI_PT_ALIGN,
+ 0, NULL);
+ if (!dma_page_table_cache) {
+ kmem_cache_destroy(dma_region_table_cache);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_region_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+ *entry = ZPCI_TABLE_INVALID;
+ return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+ kmem_cache_free(dma_region_table_cache, table);
+}
+
+static void dma_free_page_table(void *table)
+{
+ kmem_cache_free(dma_page_table_cache, table);
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+ unsigned long *sto = get_rt_sto(entry);
+ int sx;
+
+ for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+ if (reg_entry_isvalid(sto[sx]))
+ dma_free_page_table(get_st_pto(sto[sx]));
+
+ dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(unsigned long *table)
+{
+ int rtx;
+
+ if (!table)
+ return;
+
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx]))
+ dma_free_seg_table(table[rtx]);
+
+ dma_free_cpu_table(table);
+}
+
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_page_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+ *entry = ZPCI_PTE_INVALID;
+ return table;
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
+{
+ unsigned long old_rte, rte;
+ unsigned long *sto;
+
+ rte = READ_ONCE(*rtep);
+ if (reg_entry_isvalid(rte)) {
+ sto = get_rt_sto(rte);
+ } else {
+ sto = dma_alloc_cpu_table(gfp);
+ if (!sto)
+ return NULL;
+
+ set_rt_sto(&rte, virt_to_phys(sto));
+ validate_rt_entry(&rte);
+ entry_clr_protected(&rte);
+
+ old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+ if (old_rte != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(sto);
+ sto = get_rt_sto(old_rte);
+ }
+ }
+ return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
+{
+ unsigned long old_ste, ste;
+ unsigned long *pto;
+
+ ste = READ_ONCE(*step);
+ if (reg_entry_isvalid(ste)) {
+ pto = get_st_pto(ste);
+ } else {
+ pto = dma_alloc_page_table(gfp);
+ if (!pto)
+ return NULL;
+ set_st_pto(&ste, virt_to_phys(pto));
+ validate_st_entry(&ste);
+ entry_clr_protected(&ste);
+
+ old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+ if (old_ste != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_page_table(pto);
+ pto = get_st_pto(old_ste);
+ }
+ }
+ return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+{
+ unsigned long *sto, *pto;
+ unsigned int rtx, sx, px;
+
+ rtx = calc_rtx(dma_addr);
+ sto = dma_get_seg_table_origin(&rto[rtx], gfp);
+ if (!sto)
+ return NULL;
+
+ sx = calc_sx(dma_addr);
+ pto = dma_get_page_table_origin(&sto[sx], gfp);
+ if (!pto)
+ return NULL;
+
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
+{
+ unsigned long pte;
+
+ pte = READ_ONCE(*ptep);
+ if (flags & ZPCI_PTE_INVALID) {
+ invalidate_pt_entry(&pte);
+ } else {
+ set_pt_pfaa(&pte, page_addr);
+ validate_pt_entry(&pte);
+ }
+
+ if (flags & ZPCI_TABLE_PROTECTED)
+ entry_set_protected(&pte);
+ else
+ entry_clr_protected(&pte);
+
+ xchg(ptep, pte);
+}
+
static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
{
return container_of(dom, struct s390_domain, domain);
@@ -31,21 +315,22 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
+ case IOMMU_CAP_DEFERRED_FLUSH:
+ return zdev->pft != PCI_FUNC_TYPE_ISM;
default:
return false;
}
}
-static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
{
struct s390_domain *s390_domain;
- if (domain_type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
if (!s390_domain)
return NULL;
@@ -84,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain)
call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
}
-static void __s390_iommu_detach_device(struct zpci_dev *zdev)
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- struct s390_domain *s390_domain = zdev->s390_domain;
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
- if (!s390_domain)
- return;
-
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
@@ -118,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return -EINVAL;
if (zdev->s390_domain)
- __s390_iommu_detach_device(zdev);
- else if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status);
@@ -130,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
*/
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
- zdev->dma_table = s390_domain->dma_table;
zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
@@ -142,14 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return 0;
}
-static void s390_iommu_set_platform_dma(struct device *dev)
-{
- struct zpci_dev *zdev = to_zpci_dev(dev);
-
- __s390_iommu_detach_device(zdev);
- zpci_dma_init_device(zdev);
-}
-
static void s390_iommu_get_resv_regions(struct device *dev,
struct list_head *list)
{
@@ -190,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+ if (zdev->tlb_refresh)
+ dev->iommu->shadow_on_flush = 1;
+
return &zdev->iommu_dev;
}
@@ -202,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev)
* to the device, but keep it attached to other devices in the group.
*/
if (zdev)
- __s390_iommu_detach_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+}
+
+static int zpci_refresh_all(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
}
static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -212,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
- zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
+ atomic64_inc(&s390_domain->ctrs.global_rpcits);
+ zpci_refresh_all(zdev);
}
rcu_read_unlock();
}
@@ -231,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
+ atomic64_inc(&s390_domain->ctrs.sync_rpcits);
zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
size);
}
rcu_read_unlock();
}
-static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev;
+ int ret = 0;
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
if (!zdev->tlb_refresh)
continue;
- zpci_refresh_trans((u64)zdev->fh << 32,
- iova, size);
+ atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
+ ret = zpci_refresh_trans((u64)zdev->fh << 32,
+ iova, size);
+ /*
+ * let the hypervisor discover invalidated entries
+ * allowing it to free IOVAs and unpin pages
+ */
+ if (ret == -ENOMEM) {
+ ret = zpci_refresh_all(zdev);
+ if (ret)
+ break;
+ }
}
rcu_read_unlock();
+
+ return ret;
}
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
@@ -330,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
if (!IS_ALIGNED(iova | paddr, pgsize))
return -EINVAL;
- if (!(prot & IOMMU_READ))
- return -EINVAL;
-
if (!(prot & IOMMU_WRITE))
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags, gfp);
- if (!rc)
+ pgcount, flags, gfp);
+ if (!rc) {
*mapped = size;
+ atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
+ }
return rc;
}
@@ -395,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
return 0;
iommu_iotlb_gather_add_range(gather, iova, size);
+ atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
return size;
}
+static void s390_iommu_probe_finalize(struct device *dev)
+{
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
+}
+
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
+{
+ if (!zdev || !zdev->s390_domain)
+ return NULL;
+ return &zdev->s390_domain->ctrs;
+}
+
int zpci_init_iommu(struct zpci_dev *zdev)
{
+ u64 aperture_size;
int rc = 0;
rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -412,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_sysfs;
+ zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+ aperture_size = min3(s390_iommu_aperture,
+ ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
+ zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
return 0;
out_sysfs:
@@ -427,13 +742,52 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
}
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strcmp(str, "strict")) {
+ pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
+ iommu_set_dma_strict();
+ }
+ return 1;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
+
+static int __init s390_iommu_aperture_setup(char *str)
+{
+ if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
+ s390_iommu_aperture_factor = 1;
+ return 1;
+}
+
+__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
+
+static int __init s390_iommu_init(void)
+{
+ int rc;
+
+ iommu_dma_forcedac = true;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
+ if (!s390_iommu_aperture_factor)
+ s390_iommu_aperture = ULONG_MAX;
+ else
+ s390_iommu_aperture *= s390_iommu_aperture_factor;
+
+ rc = dma_alloc_cpu_table_caches();
+ if (rc)
+ return rc;
+
+ return rc;
+}
+subsys_initcall(s390_iommu_init);
+
static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
- .domain_alloc = s390_domain_alloc,
+ .domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
+ .probe_finalize = s390_iommu_probe_finalize,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
- .set_platform_dma_ops = s390_iommu_set_platform_dma,
.pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 2fa9afebd4..2eb9fb4670 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -70,7 +70,6 @@ struct sprd_iommu_device {
void __iomem *base;
struct device *dev;
struct iommu_device iommu;
- struct iommu_group *group;
struct clk *eb;
};
@@ -134,13 +133,10 @@ sprd_iommu_pgt_size(struct iommu_domain *domain)
SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32);
}
-static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
+static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev)
{
struct sprd_iommu_domain *dom;
- if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -345,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
return size;
}
-static void sprd_iommu_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sprd_iommu_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sprd_iommu_domain *dom = to_sprd_domain(domain);
unsigned int reg;
@@ -358,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain,
/* clear IOMMU TLB buffer after page table updated */
sprd_iommu_write(dom->sdev, reg, 0xffffffff);
+ return 0;
}
static void sprd_iommu_sync(struct iommu_domain *domain,
@@ -399,13 +396,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev)
return &sdev->iommu;
}
-static struct iommu_group *sprd_iommu_device_group(struct device *dev)
-{
- struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev);
-
- return iommu_group_ref_get(sdev->group);
-}
-
static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
struct platform_device *pdev;
@@ -421,9 +411,9 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
static const struct iommu_ops sprd_iommu_ops = {
- .domain_alloc = sprd_iommu_domain_alloc,
+ .domain_alloc_paging = sprd_iommu_domain_alloc_paging,
.probe_device = sprd_iommu_probe_device,
- .device_group = sprd_iommu_device_group,
+ .device_group = generic_single_device_group,
.of_xlate = sprd_iommu_of_xlate,
.pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE,
.owner = THIS_MODULE,
@@ -496,16 +486,9 @@ static int sprd_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sdev);
sdev->dev = dev;
- /* All the client devices are in the same iommu-group */
- sdev->group = iommu_group_alloc();
- if (IS_ERR(sdev->group)) {
- ret = PTR_ERR(sdev->group);
- goto free_page;
- }
-
ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev));
if (ret)
- goto put_group;
+ goto free_page;
ret = iommu_device_register(&sdev->iommu, &sprd_iommu_ops, dev);
if (ret)
@@ -530,8 +513,6 @@ unregister_iommu:
iommu_device_unregister(&sdev->iommu);
remove_sysfs:
iommu_device_sysfs_remove(&sdev->iommu);
-put_group:
- iommu_group_put(sdev->group);
free_page:
dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
return ret;
@@ -543,9 +524,6 @@ static void sprd_iommu_remove(struct platform_device *pdev)
dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
- iommu_group_put(sdev->group);
- sdev->group = NULL;
-
platform_set_drvdata(pdev, NULL);
iommu_device_sysfs_remove(&sdev->iommu);
iommu_device_unregister(&sdev->iommu);
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 74c5cb93e9..41484a5a39 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -107,7 +107,6 @@ struct sun50i_iommu {
struct clk *clk;
struct iommu_domain *domain;
- struct iommu_group *group;
struct kmem_cache *pt_pool;
};
@@ -402,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
}
-static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -412,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
spin_lock_irqsave(&iommu->iommu_lock, flags);
sun50i_iommu_zap_range(iommu, iova, size);
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
}
static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -589,7 +590,8 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
}
static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -616,13 +618,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
*pte_addr = sun50i_mk_pte(paddr, prot);
sun50i_table_flush(sun50i_domain, pte_addr, 1);
+ *mapped = size;
out:
return ret;
}
static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
phys_addr_t pt_phys;
@@ -667,14 +670,11 @@ static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
sun50i_iova_get_page_offset(iova);
}
-static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *
+sun50i_iommu_domain_alloc_paging(struct device *dev)
{
struct sun50i_iommu_domain *sun50i_domain;
- if (type != IOMMU_DOMAIN_DMA &&
- type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
if (!sun50i_domain)
return NULL;
@@ -757,21 +757,32 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
iommu->domain = NULL;
}
-static void sun50i_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
- struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+ struct sun50i_iommu_domain *sun50i_domain;
dev_dbg(dev, "Detaching from IOMMU domain\n");
- if (iommu->domain != domain)
- return;
+ if (iommu->domain == identity_domain)
+ return 0;
+ sun50i_domain = to_sun50i_domain(iommu->domain);
if (refcount_dec_and_test(&sun50i_domain->refcnt))
sun50i_iommu_detach_domain(iommu, sun50i_domain);
+ return 0;
}
+static struct iommu_domain_ops sun50i_iommu_identity_ops = {
+ .attach_dev = sun50i_iommu_identity_attach,
+};
+
+static struct iommu_domain sun50i_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &sun50i_iommu_identity_ops,
+};
+
static int sun50i_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -789,8 +800,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain,
if (iommu->domain == domain)
return 0;
- if (iommu->domain)
- sun50i_iommu_detach_device(iommu->domain, dev);
+ sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev);
sun50i_iommu_attach_domain(iommu, sun50i_domain);
@@ -808,13 +818,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
return &iommu->iommu;
}
-static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
-{
- struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
-
- return iommu_group_ref_get(iommu->group);
-}
-
static int sun50i_iommu_of_xlate(struct device *dev,
struct of_phandle_args *args)
{
@@ -827,9 +830,10 @@ static int sun50i_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops sun50i_iommu_ops = {
+ .identity_domain = &sun50i_iommu_identity_domain,
.pgsize_bitmap = SZ_4K,
- .device_group = sun50i_iommu_device_group,
- .domain_alloc = sun50i_iommu_domain_alloc,
+ .device_group = generic_single_device_group,
+ .domain_alloc_paging = sun50i_iommu_domain_alloc_paging,
.of_xlate = sun50i_iommu_of_xlate,
.probe_device = sun50i_iommu_probe_device,
.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -838,8 +842,8 @@ static const struct iommu_ops sun50i_iommu_ops = {
.iotlb_sync_map = sun50i_iommu_iotlb_sync_map,
.iotlb_sync = sun50i_iommu_iotlb_sync,
.iova_to_phys = sun50i_iommu_iova_to_phys,
- .map = sun50i_iommu_map,
- .unmap = sun50i_iommu_unmap,
+ .map_pages = sun50i_iommu_map,
+ .unmap_pages = sun50i_iommu_unmap,
.free = sun50i_iommu_domain_free,
}
};
@@ -985,6 +989,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
spin_lock_init(&iommu->iommu_lock);
+ iommu->domain = &sun50i_iommu_identity_domain;
platform_set_drvdata(pdev, iommu);
iommu->dev = &pdev->dev;
@@ -995,42 +1000,36 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
if (!iommu->pt_pool)
return -ENOMEM;
- iommu->group = iommu_group_alloc();
- if (IS_ERR(iommu->group)) {
- ret = PTR_ERR(iommu->group);
- goto err_free_cache;
- }
-
iommu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(iommu->base)) {
ret = PTR_ERR(iommu->base);
- goto err_free_group;
+ goto err_free_cache;
}
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
- goto err_free_group;
+ goto err_free_cache;
}
iommu->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(iommu->clk)) {
dev_err(&pdev->dev, "Couldn't get our clock.\n");
ret = PTR_ERR(iommu->clk);
- goto err_free_group;
+ goto err_free_cache;
}
iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
if (IS_ERR(iommu->reset)) {
dev_err(&pdev->dev, "Couldn't get our reset line.\n");
ret = PTR_ERR(iommu->reset);
- goto err_free_group;
+ goto err_free_cache;
}
ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
NULL, dev_name(&pdev->dev));
if (ret)
- goto err_free_group;
+ goto err_free_cache;
ret = iommu_device_register(&iommu->iommu, &sun50i_iommu_ops, &pdev->dev);
if (ret)
@@ -1049,9 +1048,6 @@ err_unregister:
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_free_group:
- iommu_group_put(iommu->group);
-
err_free_cache:
kmem_cache_destroy(iommu->pt_pool);
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
deleted file mode 100644
index a482ff838b..0000000000
--- a/drivers/iommu/tegra-gart.c
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * IOMMU API for Graphics Address Relocation Table on Tegra20
- *
- * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved.
- *
- * Author: Hiroshi DOYU <hdoyu@nvidia.com>
- */
-
-#define dev_fmt(fmt) "gart: " fmt
-
-#include <linux/io.h>
-#include <linux/iommu.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-
-#include <soc/tegra/mc.h>
-
-#define GART_REG_BASE 0x24
-#define GART_CONFIG (0x24 - GART_REG_BASE)
-#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE)
-#define GART_ENTRY_DATA (0x2c - GART_REG_BASE)
-
-#define GART_ENTRY_PHYS_ADDR_VALID BIT(31)
-
-#define GART_PAGE_SHIFT 12
-#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT)
-#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT)
-
-/* bitmap of the page sizes currently supported */
-#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE)
-
-struct gart_device {
- void __iomem *regs;
- u32 *savedata;
- unsigned long iovmm_base; /* offset to vmm_area start */
- unsigned long iovmm_end; /* offset to vmm_area end */
- spinlock_t pte_lock; /* for pagetable */
- spinlock_t dom_lock; /* for active domain */
- unsigned int active_devices; /* number of active devices */
- struct iommu_domain *active_domain; /* current active domain */
- struct iommu_device iommu; /* IOMMU Core handle */
- struct device *dev;
-};
-
-static struct gart_device *gart_handle; /* unique for a system */
-
-static bool gart_debug;
-
-/*
- * Any interaction between any block on PPSB and a block on APB or AHB
- * must have these read-back to ensure the APB/AHB bus transaction is
- * complete before initiating activity on the PPSB block.
- */
-#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG)
-
-#define for_each_gart_pte(gart, iova) \
- for (iova = gart->iovmm_base; \
- iova < gart->iovmm_end; \
- iova += GART_PAGE_SIZE)
-
-static inline void gart_set_pte(struct gart_device *gart,
- unsigned long iova, unsigned long pte)
-{
- writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
- writel_relaxed(pte, gart->regs + GART_ENTRY_DATA);
-}
-
-static inline unsigned long gart_read_pte(struct gart_device *gart,
- unsigned long iova)
-{
- unsigned long pte;
-
- writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
- pte = readl_relaxed(gart->regs + GART_ENTRY_DATA);
-
- return pte;
-}
-
-static void do_gart_setup(struct gart_device *gart, const u32 *data)
-{
- unsigned long iova;
-
- for_each_gart_pte(gart, iova)
- gart_set_pte(gart, iova, data ? *(data++) : 0);
-
- writel_relaxed(1, gart->regs + GART_CONFIG);
- FLUSH_GART_REGS(gart);
-}
-
-static inline bool gart_iova_range_invalid(struct gart_device *gart,
- unsigned long iova, size_t bytes)
-{
- return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE ||
- iova + bytes > gart->iovmm_end);
-}
-
-static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova)
-{
- return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID);
-}
-
-static int gart_iommu_attach_dev(struct iommu_domain *domain,
- struct device *dev)
-{
- struct gart_device *gart = gart_handle;
- int ret = 0;
-
- spin_lock(&gart->dom_lock);
-
- if (gart->active_domain && gart->active_domain != domain) {
- ret = -EINVAL;
- } else if (dev_iommu_priv_get(dev) != domain) {
- dev_iommu_priv_set(dev, domain);
- gart->active_domain = domain;
- gart->active_devices++;
- }
-
- spin_unlock(&gart->dom_lock);
-
- return ret;
-}
-
-static void gart_iommu_set_platform_dma(struct device *dev)
-{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct gart_device *gart = gart_handle;
-
- spin_lock(&gart->dom_lock);
-
- if (dev_iommu_priv_get(dev) == domain) {
- dev_iommu_priv_set(dev, NULL);
-
- if (--gart->active_devices == 0)
- gart->active_domain = NULL;
- }
-
- spin_unlock(&gart->dom_lock);
-}
-
-static struct iommu_domain *gart_iommu_domain_alloc(unsigned type)
-{
- struct iommu_domain *domain;
-
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (domain) {
- domain->geometry.aperture_start = gart_handle->iovmm_base;
- domain->geometry.aperture_end = gart_handle->iovmm_end - 1;
- domain->geometry.force_aperture = true;
- }
-
- return domain;
-}
-
-static void gart_iommu_domain_free(struct iommu_domain *domain)
-{
- WARN_ON(gart_handle->active_domain == domain);
- kfree(domain);
-}
-
-static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova,
- unsigned long pa)
-{
- if (unlikely(gart_debug && gart_pte_valid(gart, iova))) {
- dev_err(gart->dev, "Page entry is in-use\n");
- return -EINVAL;
- }
-
- gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa);
-
- return 0;
-}
-
-static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
-{
- struct gart_device *gart = gart_handle;
- int ret;
-
- if (gart_iova_range_invalid(gart, iova, bytes))
- return -EINVAL;
-
- spin_lock(&gart->pte_lock);
- ret = __gart_iommu_map(gart, iova, (unsigned long)pa);
- spin_unlock(&gart->pte_lock);
-
- return ret;
-}
-
-static inline int __gart_iommu_unmap(struct gart_device *gart,
- unsigned long iova)
-{
- if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) {
- dev_err(gart->dev, "Page entry is invalid\n");
- return -EINVAL;
- }
-
- gart_set_pte(gart, iova, 0);
-
- return 0;
-}
-
-static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t bytes, struct iommu_iotlb_gather *gather)
-{
- struct gart_device *gart = gart_handle;
- int err;
-
- if (gart_iova_range_invalid(gart, iova, bytes))
- return 0;
-
- spin_lock(&gart->pte_lock);
- err = __gart_iommu_unmap(gart, iova);
- spin_unlock(&gart->pte_lock);
-
- return err ? 0 : bytes;
-}
-
-static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
- dma_addr_t iova)
-{
- struct gart_device *gart = gart_handle;
- unsigned long pte;
-
- if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE))
- return -EINVAL;
-
- spin_lock(&gart->pte_lock);
- pte = gart_read_pte(gart, iova);
- spin_unlock(&gart->pte_lock);
-
- return pte & GART_PAGE_MASK;
-}
-
-static struct iommu_device *gart_iommu_probe_device(struct device *dev)
-{
- if (!dev_iommu_fwspec_get(dev))
- return ERR_PTR(-ENODEV);
-
- return &gart_handle->iommu;
-}
-
-static int gart_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *args)
-{
- return 0;
-}
-
-static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
-{
- FLUSH_GART_REGS(gart_handle);
-}
-
-static void gart_iommu_sync(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather)
-{
- size_t length = gather->end - gather->start + 1;
-
- gart_iommu_sync_map(domain, gather->start, length);
-}
-
-static const struct iommu_ops gart_iommu_ops = {
- .domain_alloc = gart_iommu_domain_alloc,
- .probe_device = gart_iommu_probe_device,
- .device_group = generic_device_group,
- .set_platform_dma_ops = gart_iommu_set_platform_dma,
- .pgsize_bitmap = GART_IOMMU_PGSIZES,
- .of_xlate = gart_iommu_of_xlate,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = gart_iommu_attach_dev,
- .map = gart_iommu_map,
- .unmap = gart_iommu_unmap,
- .iova_to_phys = gart_iommu_iova_to_phys,
- .iotlb_sync_map = gart_iommu_sync_map,
- .iotlb_sync = gart_iommu_sync,
- .free = gart_iommu_domain_free,
- }
-};
-
-int tegra_gart_suspend(struct gart_device *gart)
-{
- u32 *data = gart->savedata;
- unsigned long iova;
-
- /*
- * All GART users shall be suspended at this point. Disable
- * address translation to trap all GART accesses as invalid
- * memory accesses.
- */
- writel_relaxed(0, gart->regs + GART_CONFIG);
- FLUSH_GART_REGS(gart);
-
- for_each_gart_pte(gart, iova)
- *(data++) = gart_read_pte(gart, iova);
-
- return 0;
-}
-
-int tegra_gart_resume(struct gart_device *gart)
-{
- do_gart_setup(gart, gart->savedata);
-
- return 0;
-}
-
-struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
-{
- struct gart_device *gart;
- struct resource *res;
- int err;
-
- BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
-
- /* the GART memory aperture is required */
- res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1);
- if (!res) {
- dev_err(dev, "Memory aperture resource unavailable\n");
- return ERR_PTR(-ENXIO);
- }
-
- gart = kzalloc(sizeof(*gart), GFP_KERNEL);
- if (!gart)
- return ERR_PTR(-ENOMEM);
-
- gart_handle = gart;
-
- gart->dev = dev;
- gart->regs = mc->regs + GART_REG_BASE;
- gart->iovmm_base = res->start;
- gart->iovmm_end = res->end + 1;
- spin_lock_init(&gart->pte_lock);
- spin_lock_init(&gart->dom_lock);
-
- do_gart_setup(gart, NULL);
-
- err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart");
- if (err)
- goto free_gart;
-
- err = iommu_device_register(&gart->iommu, &gart_iommu_ops, dev);
- if (err)
- goto remove_sysfs;
-
- gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE *
- sizeof(u32));
- if (!gart->savedata) {
- err = -ENOMEM;
- goto unregister_iommu;
- }
-
- return gart;
-
-unregister_iommu:
- iommu_device_unregister(&gart->iommu);
-remove_sysfs:
- iommu_device_sysfs_remove(&gart->iommu);
-free_gart:
- kfree(gart);
-
- return ERR_PTR(err);
-}
-
-module_param(gart_debug, bool, 0644);
-MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index e445f80d02..310871728a 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -272,13 +272,10 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id)
clear_bit(id, smmu->asids);
}
-static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
+static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
{
struct tegra_smmu_as *as;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
as = kzalloc(sizeof(*as), GFP_KERNEL);
if (!as)
return NULL;
@@ -511,23 +508,39 @@ disable:
return err;
}
-static void tegra_smmu_set_platform_dma(struct device *dev)
+static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct tegra_smmu_as *as = to_smmu_as(domain);
- struct tegra_smmu *smmu = as->smmu;
+ struct tegra_smmu_as *as;
+ struct tegra_smmu *smmu;
unsigned int index;
if (!fwspec)
- return;
+ return -ENODEV;
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ as = to_smmu_as(domain);
+ smmu = as->smmu;
for (index = 0; index < fwspec->num_ids; index++) {
tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
tegra_smmu_as_unprepare(smmu, as);
}
+ return 0;
}
+static struct iommu_domain_ops tegra_smmu_identity_ops = {
+ .attach_dev = tegra_smmu_identity_attach,
+};
+
+static struct iommu_domain tegra_smmu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &tegra_smmu_identity_ops,
+};
+
static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
u32 value)
{
@@ -751,7 +764,8 @@ __tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
}
static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
unsigned long flags;
@@ -761,11 +775,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags);
spin_unlock_irqrestore(&as->lock, flags);
+ if (!ret)
+ *mapped = size;
+
return ret;
}
static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
unsigned long flags;
@@ -962,17 +979,28 @@ static int tegra_smmu_of_xlate(struct device *dev,
return iommu_fwspec_add_ids(dev, &id, 1);
}
+static int tegra_smmu_def_domain_type(struct device *dev)
+{
+ /*
+ * FIXME: For now we want to run all translation in IDENTITY mode, due
+ * to some device quirks. Better would be to just quirk the troubled
+ * devices.
+ */
+ return IOMMU_DOMAIN_IDENTITY;
+}
+
static const struct iommu_ops tegra_smmu_ops = {
- .domain_alloc = tegra_smmu_domain_alloc,
+ .identity_domain = &tegra_smmu_identity_domain,
+ .def_domain_type = &tegra_smmu_def_domain_type,
+ .domain_alloc_paging = tegra_smmu_domain_alloc_paging,
.probe_device = tegra_smmu_probe_device,
.device_group = tegra_smmu_device_group,
- .set_platform_dma_ops = tegra_smmu_set_platform_dma,
.of_xlate = tegra_smmu_of_xlate,
.pgsize_bitmap = SZ_4K,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = tegra_smmu_attach_dev,
- .map = tegra_smmu_map,
- .unmap = tegra_smmu_unmap,
+ .map_pages = tegra_smmu_map,
+ .unmap_pages = tegra_smmu_unmap,
.iova_to_phys = tegra_smmu_iova_to_phys,
.free = tegra_smmu_domain_free,
}
@@ -1056,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
smmu->debugfs = debugfs_create_dir("smmu", NULL);
- if (!smmu->debugfs)
- return;
debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu,
&tegra_smmu_swgroups_fops);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 17dcd826f5..379ebe03ef 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -85,7 +85,7 @@ struct viommu_request {
void *writeback;
unsigned int write_offset;
unsigned int len;
- char buf[];
+ char buf[] __counted_by(len);
};
#define VIOMMU_FAULT_RESV_MASK 0xffffff00
@@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
if (write_offset <= 0)
return -EINVAL;
- req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
+ req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC);
if (!req)
return -ENOMEM;