diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/Kconfig | 3 | ||||
-rw-r--r-- | drivers/iommu/amd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 99 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 67 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 320 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 13 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 39 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 936 | ||||
-rw-r--r-- | drivers/iommu/amd/pasid.c | 198 | ||||
-rw-r--r-- | drivers/iommu/amd/ppr.c | 273 |
10 files changed, 1263 insertions, 687 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 443b2c13c3..994063e558 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -7,9 +7,12 @@ config AMD_IOMMU select PCI_ATS select PCI_PRI select PCI_PASID + select MMU_NOTIFIER select IOMMU_API select IOMMU_IOVA select IOMMU_IO_PGTABLE + select IOMMU_SVA + select IOMMU_IOPF select IOMMUFD_DRIVER if IOMMUFD depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index f454fbb156..9de33b2d42 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8b3601f285..2d5945c982 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -17,10 +17,16 @@ irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); +void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask); void amd_iommu_restart_event_logging(struct amd_iommu *iommu); void amd_iommu_restart_ga_log(struct amd_iommu *iommu); void amd_iommu_restart_ppr_log(struct amd_iommu *iommu); void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit); +void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, + gfp_t gfp, size_t size); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); @@ -33,26 +39,47 @@ int amd_iommu_prepare(void); int amd_iommu_enable(void); void amd_iommu_disable(void); int amd_iommu_reenable(int mode); -int amd_iommu_enable_faulting(void); +int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; -bool amd_iommu_v2_supported(void); -struct amd_iommu *get_amd_iommu(unsigned int idx); -u8 amd_iommu_pc_get_max_banks(unsigned int idx); -bool amd_iommu_pc_supported(void); -u8 amd_iommu_pc_get_max_counters(unsigned int idx); -int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); -int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); - -/* Device capabilities */ -int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); -void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); - -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +/* Protection domain ops */ +struct protection_domain *protection_domain_alloc(unsigned int type); +void protection_domain_free(struct protection_domain *domain); +struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, + struct mm_struct *mm); +void amd_iommu_domain_free(struct iommu_domain *dom); +int iommu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain); + +/* SVA/PASID */ +bool amd_iommu_pasid_supported(void); + +/* IOPF */ +int amd_iommu_iopf_init(struct amd_iommu *iommu); +void amd_iommu_iopf_uninit(struct amd_iommu *iommu); +void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *resp); +int amd_iommu_iopf_add_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); +void amd_iommu_iopf_remove_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); + +/* GCR3 setup */ +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3); +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); + +/* PPR */ +int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu); +void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu); +void amd_iommu_enable_ppr_log(struct amd_iommu *iommu); +void amd_iommu_poll_ppr_log(struct amd_iommu *iommu); +int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); + /* * This function flushes all internal caches of * the IOMMU used by this driver. @@ -60,13 +87,14 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); +void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set); void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3); -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size); +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid); #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); @@ -77,13 +105,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } #endif -#define PPR_SUCCESS 0x0 -#define PPR_INVALID 0x1 -#define PPR_FAILURE 0xf - -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag); - static inline bool is_rd890_iommu(struct pci_dev *pdev) { return (pdev->vendor == PCI_VENDOR_ID_ATI) && @@ -108,7 +129,8 @@ static inline int check_feature_gpt_level(void) static inline bool amd_iommu_gt_ppr_supported(void) { return (check_feature(FEATURE_GT) && - check_feature(FEATURE_PPR)); + check_feature(FEATURE_PPR) && + check_feature(FEATURE_EPHSUP)); } static inline u64 iommu_virt_to_phys(void *vaddr) @@ -142,12 +164,24 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } -static inline void *alloc_pgtable_page(int nid, gfp_t gfp) +/* + * This must be called after device probe completes. During probe + * use rlookup_amd_iommu() get the iommu. + */ +static inline struct amd_iommu *get_amd_iommu_from_dev(struct device *dev) +{ + return iommu_get_iommu_dev(dev, struct amd_iommu, iommu); +} + +/* This must be called after device probe completes. */ +static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_data *dev_data) { - struct page *page; + return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu); +} - page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0); - return page ? page_address(page) : NULL; +static inline struct protection_domain *to_pdomain(struct iommu_domain *dom) +{ + return container_of(dom, struct protection_domain, domain); } bool translation_pre_enabled(struct amd_iommu *iommu); @@ -164,5 +198,4 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); -extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 809d74faa1..2b76b5dedc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -8,7 +8,9 @@ #ifndef _ASM_X86_AMD_IOMMU_TYPES_H #define _ASM_X86_AMD_IOMMU_TYPES_H +#include <linux/iommu.h> #include <linux/types.h> +#include <linux/mmu_notifier.h> #include <linux/mutex.h> #include <linux/msi.h> #include <linux/list.h> @@ -251,6 +253,14 @@ #define PPR_ENTRY_SIZE 16 #define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) +/* PAGE_SERVICE_REQUEST PPR Log Buffer Entry flags */ +#define PPR_FLAG_EXEC 0x002 /* Execute permission requested */ +#define PPR_FLAG_READ 0x004 /* Read permission requested */ +#define PPR_FLAG_WRITE 0x020 /* Write permission requested */ +#define PPR_FLAG_US 0x040 /* 1: User, 0: Supervisor */ +#define PPR_FLAG_RVSD 0x080 /* Reserved bit not zero */ +#define PPR_FLAG_GN 0x100 /* GVA and PASID is valid */ + #define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL) #define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL) #define PPR_DEVID(x) ((x) & 0xffffULL) @@ -453,15 +463,6 @@ #define MAX_DOMAIN_ID 65536 -/* Protection domain flags */ -#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */ -#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops - domain for an IOMMU */ -#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page - translation */ -#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ -#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ - /* Timeout stuff */ #define LOOP_TIMEOUT 100000 #define MMIO_STATUS_TIMEOUT 2000000 @@ -512,14 +513,11 @@ extern struct kmem_cache *amd_iommu_irq_cache; list_for_each_entry((iommu), &amd_iommu_list, list) #define for_each_iommu_safe(iommu, next) \ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) - -#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ -#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) -#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) -#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ -#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) -#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) - +/* Making iterating over protection_domain->dev_data_list easier */ +#define for_each_pdom_dev_data(pdom_dev_data, pdom) \ + list_for_each_entry(pdom_dev_data, &pdom->dev_data_list, list) +#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \ + list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list) struct amd_iommu; struct iommu_domain; @@ -541,6 +539,13 @@ struct amd_irte_ops; #define io_pgtable_cfg_to_data(x) \ container_of((x), struct amd_io_pgtable, pgtbl_cfg) +struct gcr3_tbl_info { + u64 *gcr3_tbl; /* Guest CR3 table */ + int glx; /* Number of levels for GCR3 table */ + u32 pasid_cnt; /* Track attached PASIDs */ + u16 domid; /* Per device domain ID */ +}; + struct amd_io_pgtable { struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable iop; @@ -549,6 +554,21 @@ struct amd_io_pgtable { u64 *pgd; /* v2 pgtable pgd pointer */ }; +enum protection_domain_mode { + PD_MODE_V1 = 1, + PD_MODE_V2, +}; + +/* Track dev_data/PASID list for the protection domain */ +struct pdom_dev_data { + /* Points to attached device data */ + struct iommu_dev_data *dev_data; + /* PASID attached to the protection domain */ + ioasid_t pasid; + /* For protection_domain->dev_data_list */ + struct list_head list; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -560,13 +580,14 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int glx; /* Number of levels for GCR3 table */ int nid; /* Node ID */ - u64 *gcr3_tbl; /* Guest CR3 table */ - unsigned long flags; /* flags to find out type of domain */ + enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ + + struct mmu_notifier mn; /* mmu notifier for the SVA domain */ + struct list_head dev_data_list; /* List of pdom_dev_data */ }; /* @@ -769,6 +790,10 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; #endif + + /* IOPF support */ + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[32]; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) @@ -816,9 +841,11 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ + struct gcr3_tbl_info gcr3_info; /* Per-device GCR3 table */ struct device *dev; u16 devid; /* PCI Device ID */ + u32 max_pasids; /* Max supported PASIDs */ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ int ats_qdep; u8 ats_enabled :1; /* ATS state */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 40979b0f52..c89d85b54a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -30,11 +30,13 @@ #include <asm/io_apic.h> #include <asm/irq_remapping.h> #include <asm/set_memory.h> +#include <asm/sev.h> #include <linux/crash_dump.h> #include "amd_iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" /* * definitions for the ACPI scanning code @@ -418,7 +420,7 @@ static void iommu_set_device_table(struct amd_iommu *iommu) } /* Generic functions to enable/disable certain features of the IOMMU. */ -static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u64 ctrl; @@ -648,8 +650,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(pci_seg->dev_table_size)); if (!pci_seg->dev_table) return -ENOMEM; @@ -658,17 +660,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->rlookup_table_size)); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -677,16 +678,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + iommu_free_pages(pci_seg->rlookup_table, + get_order(pci_seg->rlookup_table_size)); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(pci_seg->rlookup_table_size)); + pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->rlookup_table_size)); kmemleak_alloc(pci_seg->irq_lookup_table, pci_seg->rlookup_table_size, 1, GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) @@ -698,8 +698,8 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { kmemleak_free(pci_seg->irq_lookup_table); - free_pages((unsigned long)pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + iommu_free_pages(pci_seg->irq_lookup_table, + get_order(pci_seg->rlookup_table_size)); pci_seg->irq_lookup_table = NULL; } @@ -707,8 +707,8 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, + get_order(pci_seg->alias_table_size)); if (!pci_seg->alias_table) return -ENOMEM; @@ -723,8 +723,8 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - free_pages((unsigned long)pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + iommu_free_pages(pci_seg->alias_table, + get_order(pci_seg->alias_table_size)); pci_seg->alias_table = NULL; } @@ -735,8 +735,8 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, + get_order(CMD_BUFFER_SIZE)); return iommu->cmd_buf ? 0 : -ENOMEM; } @@ -745,9 +745,9 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) * Interrupt handler has processed all pending events and adjusted head * and tail pointer. Reset overflow mask and restart logging again. */ -static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, - u8 cntrl_intr, u8 cntrl_log, - u32 status_run_mask, u32 status_overflow_mask) +void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask) { u32 status; @@ -789,17 +789,6 @@ void amd_iommu_restart_ga_log(struct amd_iommu *iommu) } /* - * This function restarts ppr logging in case the IOMMU experienced - * PPR log overflow. - */ -void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) -{ - amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, - CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, - MMIO_STATUS_PPR_OVERFLOW_MASK); -} - -/* * This function resets the command buffer if the IOMMU stopped fetching * commands from it. */ @@ -844,19 +833,19 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } -static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, - gfp_t gfp, size_t size) +void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, + size_t size) { int order = get_order(size); - void *buf = (void *)__get_free_pages(gfp, order); + void *buf = iommu_alloc_pages(gfp, order); if (buf && check_feature(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { - free_pages((unsigned long)buf, order); + iommu_free_pages(buf, order); buf = NULL; } @@ -866,7 +855,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; @@ -900,50 +889,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); -} - -/* allocates the memory where the IOMMU will log its events to */ -static int __init alloc_ppr_log(struct amd_iommu *iommu) -{ - iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, - PPR_LOG_SIZE); - - return iommu->ppr_log ? 0 : -ENOMEM; -} - -static void iommu_enable_ppr_log(struct amd_iommu *iommu) -{ - u64 entry; - - if (iommu->ppr_log == NULL) - return; - - iommu_feature_enable(iommu, CONTROL_PPR_EN); - - entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, - &entry, sizeof(entry)); - - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); - iommu_feature_enable(iommu, CONTROL_PPRINT_EN); -} - -static void __init free_ppr_log(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); + iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); - free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); + iommu_free_pages(iommu->ga_log_tail, get_order(8)); #endif } @@ -988,13 +941,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); if (!iommu->ga_log_tail) goto err_out; @@ -1007,7 +958,7 @@ err_out: static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); + iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); return iommu->cmd_sem ? 0 : -ENOMEM; } @@ -1015,7 +966,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - free_page((unsigned long)iommu->cmd_sem); + iommu_free_page((void *)iommu->cmd_sem); } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -1080,7 +1031,6 @@ static bool __copy_device_table(struct amd_iommu *iommu) u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; u16 dom_id, dte_v, irq_v; - gfp_t gfp_flag; u64 tmp; /* Each IOMMU use separate device table with the same size */ @@ -1114,9 +1064,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) if (!old_devtb) return false; - gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(pci_seg->dev_table_size)); if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); @@ -1677,14 +1626,24 @@ static void __init free_pci_segments(void) } } +static void __init free_sysfs(struct amd_iommu *iommu) +{ + if (iommu->iommu.dev) { + iommu_device_unregister(&iommu->iommu); + iommu_device_sysfs_remove(&iommu->iommu); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { + free_sysfs(iommu); free_cwwb_sem(iommu); free_command_buffer(iommu); free_event_buffer(iommu); - free_ppr_log(iommu); + amd_iommu_free_ppr_log(iommu); free_ga_log(iommu); iommu_unmap_mmio_space(iommu); + amd_iommu_iopf_uninit(iommu); } static void __init free_iommu_all(void) @@ -2096,9 +2055,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = glxval; else amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + + iommu_enable_gt(iommu); } - if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) + if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { @@ -2154,6 +2115,16 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (ret) return ret; + /* + * Allocate per IOMMU IOPF queue here so that in attach device path, + * PRI capable device can be added to IOPF queue + */ + if (amd_iommu_gt_ppr_supported()) { + ret = amd_iommu_iopf_init(iommu); + if (ret) + return ret; + } + iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); return pci_enable_device(iommu->dev); @@ -2772,6 +2743,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -2803,8 +2775,8 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->old_dev_tbl_cpy, + get_order(pci_seg->dev_table_size)); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2817,8 +2789,8 @@ static void early_enable_iommus(void) pr_info("Copied DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - free_pages((unsigned long)pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } @@ -2837,14 +2809,15 @@ static void early_enable_iommus(void) } } -static void enable_iommus_v2(void) +static void enable_iommus_ppr(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { - iommu_enable_ppr_log(iommu); - iommu_enable_gt(iommu); - } + if (!amd_iommu_gt_ppr_supported()) + return; + + for_each_iommu(iommu) + amd_iommu_enable_ppr_log(iommu); } static void enable_iommus_vapic(void) @@ -3021,8 +2994,8 @@ static bool __init check_ioapic_information(void) static void __init free_dma_resources(void) { - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); + iommu_free_pages(amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID / 8)); amd_iommu_pd_alloc_bitmap = NULL; free_unity_maps(); @@ -3094,9 +3067,8 @@ static int __init early_amd_iommu_init(void) /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_ID/8)); + amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL, + get_order(MAX_DOMAIN_ID / 8)); if (amd_iommu_pd_alloc_bitmap == NULL) goto out; @@ -3180,7 +3152,7 @@ static int amd_iommu_enable_interrupts(void) * PPR and GA log interrupt for all IOMMUs. */ enable_iommus_vapic(); - enable_iommus_v2(); + enable_iommus_ppr(); out: return ret; @@ -3224,6 +3196,39 @@ out: return true; } +static void iommu_snp_enable(void) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return; + /* + * The SNP support requires that IOMMU must be enabled, and is + * configured with V1 page table (DTE[Mode] = 0 is not supported). + */ + if (no_iommu || iommu_default_passthrough()) { + pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + amd_iommu_snp_en = check_feature(FEATURE_SNP); + if (!amd_iommu_snp_en) { + pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); + goto disable_snp; + } + + pr_info("IOMMU SNP support enabled.\n"); + return; + +disable_snp: + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); +#endif +} + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3259,6 +3264,7 @@ static int __init state_next(void) break; case IOMMU_ENABLED: register_syscore_ops(&amd_iommu_syscore_ops); + iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -3357,7 +3363,7 @@ int amd_iommu_reenable(int mode) return 0; } -int __init amd_iommu_enable_faulting(void) +int amd_iommu_enable_faulting(unsigned int cpu) { /* We enable MSI later when PCI is initialized */ return 0; @@ -3655,7 +3661,7 @@ __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); __setup("ivrs_acpihid", parse_ivrs_acpihid); -bool amd_iommu_v2_supported(void) +bool amd_iommu_pasid_supported(void) { /* CPU page table size should match IOMMU guest page table size */ if (cpu_feature_enabled(X86_FEATURE_LA57) && @@ -3697,13 +3703,11 @@ u8 amd_iommu_pc_get_max_banks(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); bool amd_iommu_pc_supported(void) { return amd_iommu_pc_present; } -EXPORT_SYMBOL(amd_iommu_pc_supported); u8 amd_iommu_pc_get_max_counters(unsigned int idx) { @@ -3714,7 +3718,6 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) @@ -3770,40 +3773,85 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void) +#ifdef CONFIG_KVM_AMD_SEV +static int iommu_page_make_shared(void *page) { - /* - * The SNP support requires that IOMMU must be enabled, and is - * not configured in the passthrough mode. - */ - if (no_iommu || iommu_default_passthrough()) { - pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); - return -EINVAL; + unsigned long paddr, pfn; + + paddr = iommu_virt_to_phys(page); + /* Cbit maybe set in the paddr */ + pfn = __sme_clr(paddr) >> PAGE_SHIFT; + + if (!(pfn % PTRS_PER_PMD)) { + int ret, level; + bool assigned; + + ret = snp_lookup_rmpentry(pfn, &assigned, &level); + if (ret) { + pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); + return ret; + } + + if (!assigned) { + pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); + return -EINVAL; + } + + if (level > PG_LEVEL_4K) { + ret = psmash(pfn); + if (!ret) + goto done; + + pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", + pfn, ret, level); + return ret; + } } - /* - * Prevent enabling SNP after IOMMU_ENABLED state because this process - * affect how IOMMU driver sets up data structures and configures - * IOMMU hardware. - */ - if (init_state > IOMMU_ENABLED) { - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); - return -EINVAL; +done: + return rmp_make_shared(pfn, PG_LEVEL_4K); +} + +static int iommu_make_shared(void *va, size_t size) +{ + void *page; + int ret; + + if (!va) + return 0; + + for (page = va; page < (va + size); page += PAGE_SIZE) { + ret = iommu_page_make_shared(page); + if (ret) + return ret; } - amd_iommu_snp_en = check_feature(FEATURE_SNP); + return 0; +} + +int amd_iommu_snp_disable(void) +{ + struct amd_iommu *iommu; + int ret; + if (!amd_iommu_snp_en) - return -EINVAL; + return 0; - pr_info("SNP enabled\n"); + for_each_iommu(iommu) { + ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); + if (ret) + return ret; - /* Enforce IOMMU v1 pagetable when SNP is enabled. */ - if (amd_iommu_pgtable != AMD_IOMMU_V1) { - pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); + if (ret) + return ret; + + ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); + if (ret) + return ret; } return 0; } +EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 2a0d1e97e5..9d9a7fde59 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -22,6 +22,7 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +#include "../iommu-pages.h" static void v1_tlb_flush_all(void *cookie) { @@ -156,7 +157,7 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; - pte = alloc_pgtable_page(domain->nid, gfp); + pte = iommu_alloc_page_node(domain->nid, gfp); if (!pte) return false; @@ -187,7 +188,7 @@ static bool increase_address_space(struct protection_domain *domain, out: spin_unlock_irqrestore(&domain->lock, flags); - free_page((unsigned long)pte); + iommu_free_page(pte); return ret; } @@ -250,7 +251,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = alloc_pgtable_page(domain->nid, gfp); + page = iommu_alloc_page_node(domain->nid, gfp); if (!page) return NULL; @@ -259,7 +260,7 @@ static u64 *alloc_pte(struct protection_domain *domain, /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - free_page((unsigned long)page); + iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -431,7 +432,7 @@ out: } /* Everything flushed out, free pages now */ - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); return ret; } @@ -580,7 +581,7 @@ static void v1_free_pgtable(struct io_pgtable *iop) /* Make changes visible to IOMMUs */ amd_iommu_domain_update(dom); - put_pages_list(&freelist); + iommu_put_pages_list(&freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 6d69ba6074..78ac37c5cc 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -18,6 +18,7 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +#include "../iommu-pages.h" #define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ #define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ @@ -99,11 +100,6 @@ static inline int page_size_to_level(u64 pg_size) return PAGE_MODE_1_LEVEL; } -static inline void free_pgtable_page(u64 *pt) -{ - free_page((unsigned long)pt); -} - static void free_pgtable(u64 *pt, int level) { u64 *p; @@ -125,10 +121,10 @@ static void free_pgtable(u64 *pt, int level) if (level > 2) free_pgtable(p, level - 1); else - free_pgtable_page(p); + iommu_free_page(p); } - free_pgtable_page(pt); + iommu_free_page(pt); } /* Allocate page table */ @@ -156,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = alloc_pgtable_page(nid, gfp); + page = iommu_alloc_page_node(nid, gfp); if (!page) return NULL; __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ if (cmpxchg64(pte, __pte, __npte) != __pte) - free_pgtable_page(page); + iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -185,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, if (pg_size == IOMMU_PAGE_SIZE_1G) free_pgtable(__pte, end_level - 1); else if (pg_size == IOMMU_PAGE_SIZE_2M) - free_pgtable_page(__pte); + iommu_free_page(__pte); } return pte; @@ -350,38 +346,26 @@ static const struct iommu_flush_ops v2_flush_ops = { static void v2_free_pgtable(struct io_pgtable *iop) { - struct protection_domain *pdom; struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); - pdom = container_of(pgtable, struct protection_domain, iop); - if (!(pdom->flags & PD_IOMMUV2_MASK)) + if (!pgtable || !pgtable->pgd) return; - /* Clear gcr3 entry */ - amd_iommu_domain_clear_gcr3(&pdom->domain, 0); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(pdom); - /* Free page table */ free_pgtable(pgtable->pgd, get_pgtable_level()); + pgtable->pgd = NULL; } static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct protection_domain *pdom = (struct protection_domain *)cookie; - int ret; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); + pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) return NULL; - ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); - if (ret) - goto err_free_pgd; - if (get_pgtable_level() == PAGE_MODE_5_LEVEL) ias = 57; @@ -395,11 +379,6 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->tlb = &v2_flush_ops; return &pgtable->iop; - -err_free_pgd: - free_pgtable_page(pgtable->pgd); - - return NULL; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f945bf3253..b19e8c0f48 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -42,13 +42,10 @@ #include "amd_iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) - /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) #define MSI_RANGE_END (0xfeefffff) @@ -79,6 +76,9 @@ struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); + /**************************************************************************** * * Helper functions @@ -87,7 +87,22 @@ static void detach_device(struct device *dev); static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { - return (pdom && (pdom->flags & PD_IOMMUV2_MASK)); + return (pdom && (pdom->pd_mode == PD_MODE_V2)); +} + +static inline bool pdom_is_in_pt_mode(struct protection_domain *pdom) +{ + return (pdom->domain.type == IOMMU_DOMAIN_IDENTITY); +} + +/* + * We cannot support PASID w/ existing v1 page table in the same domain + * since it will be nested. However, existing domain w/ v2 page table + * or passthrough mode can be used for PASID. + */ +static inline bool pdom_is_sva_capable(struct protection_domain *pdom) +{ + return pdom_is_v2_pgtbl_mode(pdom) || pdom_is_in_pt_mode(pdom); } static inline int get_acpihid_device_id(struct device *dev, @@ -180,11 +195,6 @@ static struct amd_iommu *rlookup_amd_iommu(struct device *dev) return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); } -static struct protection_domain *to_pdomain(struct iommu_domain *dom) -{ - return container_of(dom, struct protection_domain, domain); -} - static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; @@ -385,7 +395,7 @@ static inline void pdev_disable_cap_ats(struct pci_dev *pdev) } } -int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) +static inline int pdev_enable_cap_pri(struct pci_dev *pdev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); int ret = -EINVAL; @@ -393,6 +403,9 @@ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) if (dev_data->pri_enabled) return 0; + if (!dev_data->ats_enabled) + return 0; + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) { /* * First reset the PRI state of the device. @@ -409,7 +422,7 @@ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) return ret; } -void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev) +static inline void pdev_disable_cap_pri(struct pci_dev *pdev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); @@ -451,15 +464,14 @@ static void pdev_enable_caps(struct pci_dev *pdev) { pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); - amd_iommu_pdev_enable_cap_pri(pdev); - + pdev_enable_cap_pri(pdev); } static void pdev_disable_caps(struct pci_dev *pdev) { pdev_disable_cap_ats(pdev); pdev_disable_cap_pasid(pdev); - amd_iommu_pdev_disable_cap_pri(pdev); + pdev_disable_cap_pri(pdev); } /* @@ -819,59 +831,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } -static void iommu_poll_ppr_log(struct amd_iommu *iommu) -{ - u32 head, tail; - - if (iommu->ppr_log == NULL) - return; - - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - - while (head != tail) { - volatile u64 *raw; - u64 entry[2]; - int i; - - raw = (u64 *)(iommu->ppr_log + head); - - /* - * Hardware bug: Interrupt may arrive before the entry is - * written to memory. If this happens we need to wait for the - * entry to arrive. - */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { - if (PPR_REQ_TYPE(raw[0]) != 0) - break; - udelay(1); - } - - /* Avoid memcpy function-call overhead */ - entry[0] = raw[0]; - entry[1] = raw[1]; - - /* - * To detect the hardware errata 733 we need to clear the - * entry back to zero. This issue does not exist on SNP - * enabled system. Also this buffer is not writeable on - * SNP enabled system. - */ - if (!amd_iommu_snp_en) - raw[0] = raw[1] = 0UL; - - /* Update head pointer of hardware ring-buffer */ - head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; - writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - - /* TODO: PPR Handler will be added when we add IOPF support */ - - /* Refresh ring-buffer information */ - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - } -} - #ifdef CONFIG_IRQ_REMAP static int (*iommu_ga_log_notifier)(u32); @@ -992,7 +951,7 @@ irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data) { amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK, MMIO_STATUS_PPR_OVERFLOW_MASK, - iommu_poll_ppr_log, amd_iommu_restart_ppr_log); + amd_iommu_poll_ppr_log, amd_iommu_restart_ppr_log); return IRQ_HANDLED; } @@ -1388,14 +1347,9 @@ void amd_iommu_flush_all_caches(struct amd_iommu *iommu) static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address, size_t size, ioasid_t pasid, bool gn) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct iommu_cmd cmd; - int qdep; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; + int qdep = dev_data->ats_qdep; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size, pasid, gn); @@ -1415,16 +1369,12 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) */ static int device_flush_dte(struct iommu_dev_data *dev_data) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct pci_dev *pdev = NULL; struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; - if (dev_is_pci(dev_data->dev)) pdev = to_pci_dev(dev_data->dev); @@ -1453,27 +1403,37 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } -/* - * TLB invalidation function which is called from the mapping functions. - * It invalidates a single PTE if the range to flush is within a single - * page. Otherwise it flushes the whole TLB of the IOMMU. - */ -static void __domain_flush_pages(struct protection_domain *domain, +static int domain_flush_pages_v2(struct protection_domain *pdom, u64 address, size_t size) { struct iommu_dev_data *dev_data; struct iommu_cmd cmd; - int ret = 0, i; - ioasid_t pasid = IOMMU_NO_PASID; - bool gn = false; + int ret = 0; - if (pdom_is_v2_pgtbl_mode(domain)) - gn = true; + list_for_each_entry(dev_data, &pdom->dev_list, list) { + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + u16 domid = dev_data->gcr3_info.domid; + + build_inv_iommu_pages(&cmd, address, size, + domid, IOMMU_NO_PASID, true); + + ret |= iommu_queue_command(iommu, &cmd); + } - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, gn); + return ret; +} + +static int domain_flush_pages_v1(struct protection_domain *pdom, + u64 address, size_t size) +{ + struct iommu_cmd cmd; + int ret = 0, i; + + build_inv_iommu_pages(&cmd, address, size, + pdom->id, IOMMU_NO_PASID, false); for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (!domain->dev_iommu[i]) + if (!pdom->dev_iommu[i]) continue; /* @@ -1483,6 +1443,28 @@ static void __domain_flush_pages(struct protection_domain *domain, ret |= iommu_queue_command(amd_iommus[i], &cmd); } + return ret; +} + +/* + * TLB invalidation function which is called from the mapping functions. + * It flushes range of PTEs of the domain. + */ +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + struct iommu_dev_data *dev_data; + int ret = 0; + ioasid_t pasid = IOMMU_NO_PASID; + bool gn = false; + + if (pdom_is_v2_pgtbl_mode(domain)) { + gn = true; + ret = domain_flush_pages_v2(domain, address, size); + } else { + ret = domain_flush_pages_v1(domain, address, size); + } + list_for_each_entry(dev_data, &domain->dev_list, list) { if (!dev_data->ats_enabled) @@ -1551,6 +1533,29 @@ static void amd_iommu_domain_flush_all(struct protection_domain *domain) CMD_INV_IOMMU_ALL_PAGES_ADDRESS); } +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size) +{ + struct iommu_cmd cmd; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + + build_inv_iommu_pages(&cmd, address, size, + dev_data->gcr3_info.domid, pasid, true); + iommu_queue_command(iommu, &cmd); + + if (dev_data->ats_enabled) + device_flush_iotlb(dev_data, address, size, pasid, true); + + iommu_completion_wait(iommu); +} + +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid) +{ + amd_iommu_dev_flush_pasid_pages(dev_data, 0, + CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); +} + void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1592,6 +1597,48 @@ static void domain_flush_devices(struct protection_domain *domain) device_flush_dte(dev_data); } +static void update_device_table(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + set_dte_entry(iommu, dev_data); + clone_aliases(iommu, dev_data->dev); + } +} + +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) +{ + update_device_table(domain); + domain_flush_devices(domain); +} + +void amd_iommu_domain_update(struct protection_domain *domain) +{ + /* Update device table */ + amd_iommu_update_and_flush_device_table(domain); + + /* Flush domain TLB(s) and wait for completion */ + amd_iommu_domain_flush_all(domain); +} + +int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + dev_data = dev_iommu_priv_get(dev); + iommu = get_amd_iommu_from_dev(dev); + + build_complete_ppr(&cmd, dev_data->devid, pasid, status, + tag, dev_data->pri_tlp); + + return iommu_queue_command(iommu, &cmd); +} + /**************************************************************************** * * The next functions belong to the domain allocation. A domain is @@ -1604,26 +1651,29 @@ static void domain_flush_devices(struct protection_domain *domain) static u16 domain_id_alloc(void) { + unsigned long flags; int id; - spin_lock(&pd_bitmap_lock); + spin_lock_irqsave(&pd_bitmap_lock, flags); id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); BUG_ON(id == 0); if (id > 0 && id < MAX_DOMAIN_ID) __set_bit(id, amd_iommu_pd_alloc_bitmap); else id = 0; - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); return id; } static void domain_id_free(int id) { - spin_lock(&pd_bitmap_lock); + unsigned long flags; + + spin_lock_irqsave(&pd_bitmap_lock, flags); if (id > 0 && id < MAX_DOMAIN_ID) __clear_bit(id, amd_iommu_pd_alloc_bitmap); - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); } static void free_gcr3_tbl_level1(u64 *tbl) @@ -1637,7 +1687,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); - free_page((unsigned long)ptr); + iommu_free_page(ptr); } } @@ -1656,16 +1706,22 @@ static void free_gcr3_tbl_level2(u64 *tbl) } } -static void free_gcr3_table(struct protection_domain *domain) +static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) { - if (domain->glx == 2) - free_gcr3_tbl_level2(domain->gcr3_tbl); - else if (domain->glx == 1) - free_gcr3_tbl_level1(domain->gcr3_tbl); + if (gcr3_info->glx == 2) + free_gcr3_tbl_level2(gcr3_info->gcr3_tbl); + else if (gcr3_info->glx == 1) + free_gcr3_tbl_level1(gcr3_info->gcr3_tbl); else - BUG_ON(domain->glx != 0); + WARN_ON_ONCE(gcr3_info->glx != 0); + + gcr3_info->glx = 0; + + /* Free per device domain ID */ + domain_id_free(gcr3_info->domid); - free_page((unsigned long)domain->gcr3_tbl); + iommu_free_page(gcr3_info->gcr3_tbl); + gcr3_info->gcr3_tbl = NULL; } /* @@ -1684,33 +1740,133 @@ static int get_gcr3_levels(int pasids) return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; } -/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ -static int setup_gcr3_table(struct protection_domain *domain, int pasids) +static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, + struct amd_iommu *iommu, int pasids) { int levels = get_gcr3_levels(pasids); + int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; if (levels > amd_iommu_max_glx_val) return -EINVAL; - domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) + if (gcr3_info->gcr3_tbl) + return -EBUSY; + + /* Allocate per device domain ID */ + gcr3_info->domid = domain_id_alloc(); + + gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); + if (gcr3_info->gcr3_tbl == NULL) { + domain_id_free(gcr3_info->domid); return -ENOMEM; + } + + gcr3_info->glx = levels; - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; + return 0; +} - amd_iommu_domain_update(domain); +static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info, + ioasid_t pasid, bool alloc) +{ + int index; + u64 *pte; + u64 *root = gcr3_info->gcr3_tbl; + int level = gcr3_info->glx; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + pte = &root[index]; + + if (level == 0) + break; + + if (!(*pte & GCR3_VALID)) { + if (!alloc) + return NULL; + root = (void *)get_zeroed_page(GFP_ATOMIC); + if (root == NULL) + return NULL; + + *pte = iommu_virt_to_phys(root) | GCR3_VALID; + } + + root = iommu_phys_to_virt(*pte & PAGE_MASK); + + level -= 1; + } + + return pte; +} + +static int update_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3, bool set) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + u64 *pte; + + pte = __get_gcr3_pte(gcr3_info, pasid, true); + if (pte == NULL) + return -ENOMEM; + + if (set) + *pte = (gcr3 & PAGE_MASK) | GCR3_VALID; + else + *pte = 0; + + amd_iommu_dev_flush_pasid_all(dev_data, pasid); return 0; } -static void set_dte_entry(struct amd_iommu *iommu, u16 devid, - struct protection_domain *domain, bool ats, bool ppr) +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, + unsigned long gcr3) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, gcr3, true); + if (ret) + return ret; + + gcr3_info->pasid_cnt++; + return ret; +} + +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, 0, false); + if (ret) + return ret; + + gcr3_info->pasid_cnt--; + return ret; +} + +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; + u16 devid = dev_data->devid; + u16 domid; + struct protection_domain *domain = dev_data->domain; struct dev_table_entry *dev_table = get_dev_table(iommu); + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + + if (gcr3_info && gcr3_info->gcr3_tbl) + domid = dev_data->gcr3_info.domid; + else + domid = domain->id; if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1724,23 +1880,23 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, * When SNP is enabled, Only set TV bit when IOMMU * page translation is in use. */ - if (!amd_iommu_snp_en || (domain->id != 0)) + if (!amd_iommu_snp_en || (domid != 0)) pte_root |= DTE_FLAG_TV; flags = dev_table[devid].data[1]; - if (ats) + if (dev_data->ats_enabled) flags |= DTE_FLAG_IOTLB; - if (ppr) + if (dev_data->ppr) pte_root |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) pte_root |= DTE_FLAG_HAD; - if (domain->flags & PD_IOMMUV2_MASK) { - u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); - u64 glx = domain->glx; + if (gcr3_info && gcr3_info->gcr3_tbl) { + u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); + u64 glx = gcr3_info->glx; u64 tmp; pte_root |= DTE_FLAG_GV; @@ -1768,12 +1924,13 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); } - if (domain->flags & PD_GIOV_MASK) + /* GIOV is supported with V2 page table mode only */ + if (pdom_is_v2_pgtbl_mode(domain)) pte_root |= DTE_FLAG_GIOV; } flags &= ~DEV_DOMID_MASK; - flags |= domain->id; + flags |= domid; old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; dev_table[devid].data[1] = flags; @@ -1804,16 +1961,78 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) amd_iommu_apply_erratum_63(iommu, devid); } -static void do_attach(struct iommu_dev_data *dev_data, - struct protection_domain *domain) +/* Update and flush DTE for the given device */ +void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set) { - struct amd_iommu *iommu; - bool ats; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) + if (set) + set_dte_entry(iommu, dev_data); + else + clear_dte_entry(iommu, dev_data->devid); + + clone_aliases(iommu, dev_data->dev); + device_flush_dte(dev_data); + iommu_completion_wait(iommu); +} + +/* + * If domain is SVA capable then initialize GCR3 table. Also if domain is + * in v2 page table mode then update GCR3[0]. + */ +static int init_gcr3_table(struct iommu_dev_data *dev_data, + struct protection_domain *pdom) +{ + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + int max_pasids = dev_data->max_pasids; + int ret = 0; + + /* + * If domain is in pt mode then setup GCR3 table only if device + * is PASID capable + */ + if (pdom_is_in_pt_mode(pdom) && !pdev_pasid_supported(dev_data)) + return ret; + + /* + * By default, setup GCR3 table to support MAX PASIDs + * supported by the device/IOMMU. + */ + ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, + max_pasids > 0 ? max_pasids : 1); + if (ret) + return ret; + + /* Setup GCR3[0] only if domain is setup with v2 page table mode */ + if (!pdom_is_v2_pgtbl_mode(pdom)) + return ret; + + ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(pdom->iop.pgd), true); + if (ret) + free_gcr3_table(&dev_data->gcr3_info); + + return ret; +} + +static void destroy_gcr3_table(struct iommu_dev_data *dev_data, + struct protection_domain *pdom) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + + if (pdom_is_v2_pgtbl_mode(pdom)) + update_gcr3(dev_data, 0, 0, false); + + if (gcr3_info->gcr3_tbl == NULL) return; - ats = dev_data->ats_enabled; + + free_gcr3_table(gcr3_info); +} + +static int do_attach(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + int ret = 0; /* Update data structures */ dev_data->domain = domain; @@ -1827,34 +2046,34 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Update device table */ - set_dte_entry(iommu, dev_data->devid, domain, - ats, dev_data->ppr); - clone_aliases(iommu, dev_data->dev); + /* Setup GCR3 table */ + if (pdom_is_sva_capable(domain)) { + ret = init_gcr3_table(dev_data, domain); + if (ret) + return ret; + } - device_flush_dte(dev_data); + return ret; } static void do_detach(struct iommu_dev_data *dev_data) { struct protection_domain *domain = dev_data->domain; - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return; + /* Clear DTE and flush the entry */ + amd_iommu_dev_update_dte(dev_data, false); + + /* Flush IOTLB and wait for the flushes to finish */ + amd_iommu_domain_flush_all(domain); + + /* Clear GCR3 table */ + if (pdom_is_sva_capable(domain)) + destroy_gcr3_table(dev_data, domain); /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); - clear_dte_entry(iommu, dev_data->devid); - clone_aliases(iommu, dev_data->dev); - - /* Flush the DTE entry */ - device_flush_dte(dev_data); - - /* Flush IOTLB and wait for the flushes to finish */ - amd_iommu_domain_flush_all(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -1883,10 +2102,7 @@ static int attach_device(struct device *dev, goto out; } - if (dev_is_pci(dev)) - pdev_enable_caps(to_pci_dev(dev)); - - do_attach(dev_data, domain); + ret = do_attach(dev_data, domain); out: spin_unlock(&dev_data->lock); @@ -1901,12 +2117,11 @@ out: */ static void detach_device(struct device *dev) { - struct protection_domain *domain; - struct iommu_dev_data *dev_data; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + struct protection_domain *domain = dev_data->domain; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); unsigned long flags; - - dev_data = dev_iommu_priv_get(dev); - domain = dev_data->domain; + bool ppr = dev_data->ppr; spin_lock_irqsave(&domain->lock, flags); @@ -1921,21 +2136,34 @@ static void detach_device(struct device *dev) if (WARN_ON(!dev_data->domain)) goto out; - do_detach(dev_data); + if (ppr) { + iopf_queue_flush_dev(dev); - if (dev_is_pci(dev)) - pdev_disable_caps(to_pci_dev(dev)); + /* Updated here so that it gets reflected in DTE */ + dev_data->ppr = false; + } + + do_detach(dev_data); out: spin_unlock(&dev_data->lock); spin_unlock_irqrestore(&domain->lock, flags); + + /* Remove IOPF handler */ + if (ppr) + amd_iommu_iopf_remove_device(iommu, dev_data); + + if (dev_is_pci(dev)) + pdev_disable_caps(to_pci_dev(dev)); + } static struct iommu_device *amd_iommu_probe_device(struct device *dev) { struct iommu_device *iommu_dev; struct amd_iommu *iommu; + struct iommu_dev_data *dev_data; int ret; if (!check_device(dev)) @@ -1954,8 +2182,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) ret = iommu_init_device(iommu, dev); if (ret) { - if (ret != -ENOTSUPP) - dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); iommu_ignore_device(iommu, dev); } else { @@ -1963,18 +2190,22 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) iommu_dev = &iommu->iommu; } + /* + * If IOMMU and device supports PASID then it will contain max + * supported PASIDs, else it will be zero. + */ + dev_data = dev_iommu_priv_get(dev); + if (amd_iommu_pasid_supported() && dev_is_pci(dev) && + pdev_pasid_supported(dev_data)) { + dev_data->max_pasids = min_t(u32, iommu->iommu.max_pasids, + pci_max_pasids(to_pci_dev(dev))); + } + iommu_completion_wait(iommu); return iommu_dev; } -static void amd_iommu_probe_finalize(struct device *dev) -{ - /* Domains are initialized for this device - have a look what we ended up with */ - set_dma_ops(dev, NULL); - iommu_setup_dma_ops(dev, 0, U64_MAX); -} - static void amd_iommu_release_device(struct device *dev) { struct amd_iommu *iommu; @@ -2000,42 +2231,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) /***************************************************************************** * - * The next functions belong to the dma_ops mapping/unmapping code. - * - *****************************************************************************/ - -static void update_device_table(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); - - if (!iommu) - continue; - set_dte_entry(iommu, dev_data->devid, domain, - dev_data->ats_enabled, dev_data->ppr); - clone_aliases(iommu, dev_data->dev); - } -} - -void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) -{ - update_device_table(domain); - domain_flush_devices(domain); -} - -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - -/***************************************************************************** - * * The following functions belong to the exported interface of AMD IOMMU * * This interface allows access to lower level functions of the IOMMU @@ -2062,7 +2257,7 @@ static void cleanup_domain(struct protection_domain *domain) WARN_ON(domain->dev_cnt != 0); } -static void protection_domain_free(struct protection_domain *domain) +void protection_domain_free(struct protection_domain *domain) { if (!domain) return; @@ -2070,11 +2265,8 @@ static void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); - if (domain->iop.root) - free_page((unsigned long)domain->iop.root); + iommu_free_page(domain->iop.root); if (domain->id) domain_id_free(domain->id); @@ -2089,29 +2281,26 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); if (mode != PAGE_MODE_NONE) { - pt_root = (void *)get_zeroed_page(GFP_KERNEL); + pt_root = iommu_alloc_page(GFP_KERNEL); if (!pt_root) return -ENOMEM; } + domain->pd_mode = PD_MODE_V1; amd_iommu_domain_set_pgtable(domain, pt_root, mode); return 0; } -static int protection_domain_init_v2(struct protection_domain *domain) +static int protection_domain_init_v2(struct protection_domain *pdom) { - domain->flags |= PD_GIOV_MASK; - - domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - - if (setup_gcr3_table(domain, 1)) - return -ENOMEM; + pdom->pd_mode = PD_MODE_V2; + pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; return 0; } -static struct protection_domain *protection_domain_alloc(unsigned int type) +struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; @@ -2128,11 +2317,13 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); + INIT_LIST_HEAD(&domain->dev_data_list); domain->nid = NUMA_NO_NODE; switch (type) { /* No need to allocate io pgtable ops in passthrough mode */ case IOMMU_DOMAIN_IDENTITY: + case IOMMU_DOMAIN_SVA: return domain; case IOMMU_DOMAIN_DMA: pgtable = amd_iommu_pgtable; @@ -2194,11 +2385,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, struct protection_domain *domain; struct amd_iommu *iommu = NULL; - if (dev) { - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return ERR_PTR(-ENODEV); - } + if (dev) + iommu = get_amd_iommu_from_dev(dev); /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system, @@ -2255,7 +2443,7 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags, return do_iommu_domain_alloc(type, dev, flags); } -static void amd_iommu_domain_free(struct iommu_domain *dom) +void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; unsigned long flags; @@ -2279,7 +2467,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); + struct pci_dev *pdev; int ret; /* @@ -2312,7 +2501,23 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } #endif - iommu_completion_wait(iommu); + pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL; + if (pdev && pdom_is_sva_capable(domain)) { + pdev_enable_caps(pdev); + + /* + * Device can continue to function even if IOPF + * enablement failed. Hence in error path just + * disable device PRI support. + */ + if (amd_iommu_iopf_add_device(iommu, dev_data)) + pdev_disable_cap_pri(pdev); + } else if (pdev) { + pdev_enable_cap_ats(pdev); + } + + /* Update device table */ + amd_iommu_dev_update_dte(dev_data, true); return ret; } @@ -2337,7 +2542,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, int prot = 0; int ret = -EINVAL; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; @@ -2383,7 +2588,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova struct io_pgtable_ops *ops = &domain->iop.iop.ops; size_t r; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return 0; @@ -2418,7 +2623,7 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_DEFERRED_FLUSH: return true; case IOMMU_CAP_DIRTY_TRACKING: { - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); return amd_iommu_hd_support(iommu); } @@ -2447,9 +2652,7 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } list_for_each_entry(dev_data, &pdomain->dev_list, list) { - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; + iommu = get_amd_iommu_from_dev_data(dev_data); dev_table = get_dev_table(iommu); pte_root = dev_table[dev_data->devid].data[0]; @@ -2509,9 +2712,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; devid = PCI_SBDF_TO_DEVID(sbdf); - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return; + iommu = get_amd_iommu_from_dev(dev); pci_seg = iommu->pci_seg; list_for_each_entry(entry, &pci_seg->unity_map, list) { @@ -2624,18 +2825,54 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; +static int amd_iommu_dev_enable_feature(struct device *dev, + enum iommu_dev_features feat) +{ + int ret = 0; + + switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + case IOMMU_DEV_FEAT_SVA: + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int amd_iommu_dev_disable_feature(struct device *dev, + enum iommu_dev_features feat) +{ + int ret = 0; + + switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + case IOMMU_DEV_FEAT_SVA: + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_user = amd_iommu_domain_alloc_user, + .domain_alloc_sva = amd_iommu_domain_alloc_sva, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, - .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, + .dev_enable_feat = amd_iommu_dev_enable_feature, + .dev_disable_feat = amd_iommu_dev_disable_feature, + .remove_dev_pasid = amd_iommu_remove_dev_pasid, + .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .map_pages = amd_iommu_map_pages, @@ -2649,216 +2886,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -static int __flush_pasid(struct protection_domain *domain, u32 pasid, - u64 address, size_t size) -{ - struct iommu_dev_data *dev_data; - struct iommu_cmd cmd; - int i, ret; - - if (!(domain->flags & PD_IOMMUV2_MASK)) - return -EINVAL; - - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, true); - - /* - * IOMMU TLB needs to be flushed before Device TLB to - * prevent device TLB refill from IOMMU TLB - */ - for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (domain->dev_iommu[i] == 0) - continue; - - ret = iommu_queue_command(amd_iommus[i], &cmd); - if (ret != 0) - goto out; - } - - /* Wait until IOMMU TLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); - - /* Now flush device TLBs */ - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu; - int qdep; - - /* - There might be non-IOMMUv2 capable devices in an IOMMUv2 - * domain. - */ - if (!dev_data->ats_enabled) - continue; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; - build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, - address, size, pasid, true); - - ret = iommu_queue_command(iommu, &cmd); - if (ret != 0) - goto out; - } - - /* Wait until all device TLBs are flushed */ - amd_iommu_domain_flush_complete(domain); - - ret = 0; - -out: - - return ret; -} - -static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid, - u64 address) -{ - return __flush_pasid(domain, pasid, address, PAGE_SIZE); -} - -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, - u64 address) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_page(domain, pasid, address); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) -{ - return __flush_pasid(domain, pasid, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS); -} - -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_tlb(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) -{ - int index; - u64 *pte; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - pte = &root[index]; - - if (level == 0) - break; - - if (!(*pte & GCR3_VALID)) { - if (!alloc) - return NULL; - - root = (void *)get_zeroed_page(GFP_ATOMIC); - if (root == NULL) - return NULL; - - *pte = iommu_virt_to_phys(root) | GCR3_VALID; - } - - root = iommu_phys_to_virt(*pte & PAGE_MASK); - - level -= 1; - } - - return pte; -} - -static int __set_gcr3(struct protection_domain *domain, u32 pasid, - unsigned long cr3) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); - if (pte == NULL) - return -ENOMEM; - - *pte = (cr3 & PAGE_MASK) | GCR3_VALID; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -static int __clear_gcr3(struct protection_domain *domain, u32 pasid) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); - if (pte == NULL) - return 0; - - *pte = 0; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __set_gcr3(domain, pasid, cr3); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __clear_gcr3(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag) -{ - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - struct iommu_cmd cmd; - - dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = rlookup_amd_iommu(&pdev->dev); - if (!iommu) - return -ENODEV; - - build_complete_ppr(&cmd, dev_data->devid, pasid, status, - tag, dev_data->pri_tlp); - - return iommu_queue_command(iommu, &cmd); -} - #ifdef CONFIG_IRQ_REMAP /***************************************************************************** @@ -3757,20 +3784,11 @@ static struct irq_chip amd_ir_chip = { }; static const struct msi_parent_ops amdvi_msi_parent_ops = { - .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | - MSI_FLAG_MULTI_PCI_MSI | - MSI_FLAG_PCI_IMS, + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; -static const struct msi_parent_ops virt_amdvi_msi_parent_ops = { - .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | - MSI_FLAG_MULTI_PCI_MSI, - .prefix = "vIR-", - .init_dev_msi_info = msi_parent_init_dev_msi_info, -}; - int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { struct fwnode_handle *fn; @@ -3788,11 +3806,7 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | IRQ_DOMAIN_FLAG_ISOLATED_MSI; - - if (amd_iommu_np_cache) - iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; - else - iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; + iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; return 0; } diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c new file mode 100644 index 0000000000..a68215f2b3 --- /dev/null +++ b/drivers/iommu/amd/pasid.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/iommu.h> +#include <linux/mm_types.h> + +#include "amd_iommu.h" + +static inline bool is_pasid_enabled(struct iommu_dev_data *dev_data) +{ + if (dev_data->pasid_enabled && dev_data->max_pasids && + dev_data->gcr3_info.gcr3_tbl != NULL) + return true; + + return false; +} + +static inline bool is_pasid_valid(struct iommu_dev_data *dev_data, + ioasid_t pasid) +{ + if (pasid > 0 && pasid < dev_data->max_pasids) + return true; + + return false; +} + +static void remove_dev_pasid(struct pdom_dev_data *pdom_dev_data) +{ + /* Update GCR3 table and flush IOTLB */ + amd_iommu_clear_gcr3(pdom_dev_data->dev_data, pdom_dev_data->pasid); + + list_del(&pdom_dev_data->list); + kfree(pdom_dev_data); +} + +/* Clear PASID from device GCR3 table and remove pdom_dev_data from list */ +static void remove_pdom_dev_pasid(struct protection_domain *pdom, + struct device *dev, ioasid_t pasid) +{ + struct pdom_dev_data *pdom_dev_data; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + + lockdep_assert_held(&pdom->lock); + + for_each_pdom_dev_data(pdom_dev_data, pdom) { + if (pdom_dev_data->dev_data == dev_data && + pdom_dev_data->pasid == pasid) { + remove_dev_pasid(pdom_dev_data); + break; + } + } +} + +static void sva_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pdom_dev_data *pdom_dev_data; + struct protection_domain *sva_pdom; + unsigned long flags; + + sva_pdom = container_of(mn, struct protection_domain, mn); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + for_each_pdom_dev_data(pdom_dev_data, sva_pdom) { + amd_iommu_dev_flush_pasid_pages(pdom_dev_data->dev_data, + pdom_dev_data->pasid, + start, end - start); + } + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} + +static void sva_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct pdom_dev_data *pdom_dev_data, *next; + struct protection_domain *sva_pdom; + unsigned long flags; + + sva_pdom = container_of(mn, struct protection_domain, mn); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Assume dev_data_list contains same PASID with different devices */ + for_each_pdom_dev_data_safe(pdom_dev_data, next, sva_pdom) + remove_dev_pasid(pdom_dev_data); + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} + +static const struct mmu_notifier_ops sva_mn = { + .arch_invalidate_secondary_tlbs = sva_arch_invalidate_secondary_tlbs, + .release = sva_mn_release, +}; + +int iommu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct pdom_dev_data *pdom_dev_data; + struct protection_domain *sva_pdom = to_pdomain(domain); + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + unsigned long flags; + int ret = -EINVAL; + + /* PASID zero is used for requests from the I/O device without PASID */ + if (!is_pasid_valid(dev_data, pasid)) + return ret; + + /* Make sure PASID is enabled */ + if (!is_pasid_enabled(dev_data)) + return ret; + + /* Add PASID to protection domain pasid list */ + pdom_dev_data = kzalloc(sizeof(*pdom_dev_data), GFP_KERNEL); + if (pdom_dev_data == NULL) + return ret; + + pdom_dev_data->pasid = pasid; + pdom_dev_data->dev_data = dev_data; + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Setup GCR3 table */ + ret = amd_iommu_set_gcr3(dev_data, pasid, + iommu_virt_to_phys(domain->mm->pgd)); + if (ret) { + kfree(pdom_dev_data); + goto out_unlock; + } + + list_add(&pdom_dev_data->list, &sva_pdom->dev_data_list); + +out_unlock: + spin_unlock_irqrestore(&sva_pdom->lock, flags); + return ret; +} + +void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) +{ + struct protection_domain *sva_pdom; + unsigned long flags; + + if (!is_pasid_valid(dev_iommu_priv_get(dev), pasid)) + return; + + sva_pdom = to_pdomain(domain); + + spin_lock_irqsave(&sva_pdom->lock, flags); + + /* Remove PASID from dev_data_list */ + remove_pdom_dev_pasid(sva_pdom, dev, pasid); + + spin_unlock_irqrestore(&sva_pdom->lock, flags); +} + +static void iommu_sva_domain_free(struct iommu_domain *domain) +{ + struct protection_domain *sva_pdom = to_pdomain(domain); + + if (sva_pdom->mn.ops) + mmu_notifier_unregister(&sva_pdom->mn, domain->mm); + + amd_iommu_domain_free(domain); +} + +static const struct iommu_domain_ops amd_sva_domain_ops = { + .set_dev_pasid = iommu_sva_set_dev_pasid, + .free = iommu_sva_domain_free +}; + +struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, + struct mm_struct *mm) +{ + struct protection_domain *pdom; + int ret; + + pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); + if (!pdom) + return ERR_PTR(-ENOMEM); + + pdom->domain.ops = &amd_sva_domain_ops; + pdom->mn.ops = &sva_mn; + + ret = mmu_notifier_register(&pdom->mn, mm); + if (ret) { + protection_domain_free(pdom); + return ERR_PTR(ret); + } + + return &pdom->domain; +} diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c new file mode 100644 index 0000000000..7c67d69f0b --- /dev/null +++ b/drivers/iommu/amd/ppr.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/amd-iommu.h> +#include <linux/delay.h> +#include <linux/mmu_notifier.h> + +#include <asm/iommu.h> + +#include "amd_iommu.h" +#include "amd_iommu_types.h" + +#include "../iommu-pages.h" + +int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu) +{ + iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + PPR_LOG_SIZE); + return iommu->ppr_log ? 0 : -ENOMEM; +} + +void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) +{ + u64 entry; + + if (iommu->ppr_log == NULL) + return; + + iommu_feature_enable(iommu, CONTROL_PPR_EN); + + entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_PPRINT_EN); + iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); +} + +void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu) +{ + iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE)); +} + +/* + * This function restarts ppr logging in case the IOMMU experienced + * PPR log overflow. + */ +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) +{ + amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, + CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK); +} + +static inline u32 ppr_flag_to_fault_perm(u16 flag) +{ + int perm = 0; + + if (flag & PPR_FLAG_READ) + perm |= IOMMU_FAULT_PERM_READ; + if (flag & PPR_FLAG_WRITE) + perm |= IOMMU_FAULT_PERM_WRITE; + if (flag & PPR_FLAG_EXEC) + perm |= IOMMU_FAULT_PERM_EXEC; + if (!(flag & PPR_FLAG_US)) + perm |= IOMMU_FAULT_PERM_PRIV; + + return perm; +} + +static bool ppr_is_valid(struct amd_iommu *iommu, u64 *raw) +{ + struct device *dev = iommu->iommu.dev; + u16 devid = PPR_DEVID(raw[0]); + + if (!(PPR_FLAGS(raw[0]) & PPR_FLAG_GN)) { + dev_dbg(dev, "PPR logged [Request ignored due to GN=0 (device=%04x:%02x:%02x.%x " + "pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0])); + return false; + } + + if (PPR_FLAGS(raw[0]) & PPR_FLAG_RVSD) { + dev_dbg(dev, "PPR logged [Invalid request format (device=%04x:%02x:%02x.%x " + "pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0])); + return false; + } + + return true; +} + +static void iommu_call_iopf_notifier(struct amd_iommu *iommu, u64 *raw) +{ + struct iommu_dev_data *dev_data; + struct iopf_fault event; + struct pci_dev *pdev; + u16 devid = PPR_DEVID(raw[0]); + + if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { + pr_info_ratelimited("Unknown PPR request received\n"); + return; + } + + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, + PCI_BUS_NUM(devid), devid & 0xff); + if (!pdev) + return; + + if (!ppr_is_valid(iommu, raw)) + goto out; + + memset(&event, 0, sizeof(struct iopf_fault)); + + event.fault.type = IOMMU_FAULT_PAGE_REQ; + event.fault.prm.perm = ppr_flag_to_fault_perm(PPR_FLAGS(raw[0])); + event.fault.prm.addr = (u64)(raw[1] & PAGE_MASK); + event.fault.prm.pasid = PPR_PASID(raw[0]); + event.fault.prm.grpid = PPR_TAG(raw[0]) & 0x1FF; + + /* + * PASID zero is used for requests from the I/O device without + * a PASID + */ + dev_data = dev_iommu_priv_get(&pdev->dev); + if (event.fault.prm.pasid == 0 || + event.fault.prm.pasid >= dev_data->max_pasids) { + pr_info_ratelimited("Invalid PASID : 0x%x, device : 0x%x\n", + event.fault.prm.pasid, pdev->dev.id); + goto out; + } + + event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + if (PPR_TAG(raw[0]) & 0x200) + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + + /* Submit event */ + iommu_report_device_fault(&pdev->dev, &event); + + return; + +out: + /* Nobody cared, abort */ + amd_iommu_complete_ppr(&pdev->dev, PPR_PASID(raw[0]), + IOMMU_PAGE_RESP_FAILURE, + PPR_TAG(raw[0]) & 0x1FF); +} + +void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) +{ + u32 head, tail; + + if (iommu->ppr_log == NULL) + return; + + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); + + while (head != tail) { + volatile u64 *raw; + u64 entry[2]; + int i; + + raw = (u64 *)(iommu->ppr_log + head); + + /* + * Hardware bug: Interrupt may arrive before the entry is + * written to memory. If this happens we need to wait for the + * entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + /* Avoid memcpy function-call overhead */ + entry[0] = raw[0]; + entry[1] = raw[1]; + + /* + * To detect the hardware errata 733 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. + */ + if (!amd_iommu_snp_en) + raw[0] = raw[1] = 0UL; + + /* Update head pointer of hardware ring-buffer */ + head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + + /* Handle PPR entry */ + iommu_call_iopf_notifier(iommu, entry); + } +} + +/************************************************************** + * + * IOPF handling stuff + */ + +/* Setup per-IOMMU IOPF queue if not exist. */ +int amd_iommu_iopf_init(struct amd_iommu *iommu) +{ + int ret = 0; + + if (iommu->iopf_queue) + return ret; + + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), "amdvi-%#x", + PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, iommu->devid)); + + iommu->iopf_queue = iopf_queue_alloc(iommu->iopfq_name); + if (!iommu->iopf_queue) + ret = -ENOMEM; + + return ret; +} + +/* Destroy per-IOMMU IOPF queue if no longer needed. */ +void amd_iommu_iopf_uninit(struct amd_iommu *iommu) +{ + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; +} + +void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *resp) +{ + amd_iommu_complete_ppr(dev, resp->pasid, resp->code, resp->grpid); +} + +int amd_iommu_iopf_add_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) +{ + int ret = 0; + + if (!dev_data->pri_enabled) + return ret; + + if (!iommu->iopf_queue) + return -EINVAL; + + ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev); + if (ret) + return ret; + + dev_data->ppr = true; + return 0; +} + +/* Its assumed that caller has verified that device was added to iopf queue */ +void amd_iommu_iopf_remove_device(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) +{ + iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev); + dev_data->ppr = false; +} |