diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 42 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 34 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 146 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 21 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 645 |
5 files changed, 454 insertions, 434 deletions
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8b3601f285..f482aab420 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -39,20 +39,16 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; bool amd_iommu_v2_supported(void); -struct amd_iommu *get_amd_iommu(unsigned int idx); -u8 amd_iommu_pc_get_max_banks(unsigned int idx); -bool amd_iommu_pc_supported(void); -u8 amd_iommu_pc_get_max_counters(unsigned int idx); -int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); -int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); /* Device capabilities */ int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +/* GCR3 setup */ +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3); +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid); + /* * This function flushes all internal caches of * the IOMMU used by this driver. @@ -63,10 +59,10 @@ void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_complete(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3); -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size); +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid); #ifdef CONFIG_IRQ_REMAP int amd_iommu_create_irq_domain(struct amd_iommu *iommu); @@ -77,10 +73,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } #endif -#define PPR_SUCCESS 0x0 -#define PPR_INVALID 0x1 -#define PPR_FAILURE 0xf - int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag); @@ -150,6 +142,21 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp) return page ? page_address(page) : NULL; } +/* + * This must be called after device probe completes. During probe + * use rlookup_amd_iommu() get the iommu. + */ +static inline struct amd_iommu *get_amd_iommu_from_dev(struct device *dev) +{ + return iommu_get_iommu_dev(dev, struct amd_iommu, iommu); +} + +/* This must be called after device probe completes. */ +static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_data *dev_data) +{ + return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu); +} + bool translation_pre_enabled(struct amd_iommu *iommu); bool amd_iommu_is_attach_deferred(struct device *dev); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); @@ -164,5 +171,4 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); -extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 809d74faa1..d1fed5fc21 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -453,15 +453,6 @@ #define MAX_DOMAIN_ID 65536 -/* Protection domain flags */ -#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */ -#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops - domain for an IOMMU */ -#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page - translation */ -#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ -#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ - /* Timeout stuff */ #define LOOP_TIMEOUT 100000 #define MMIO_STATUS_TIMEOUT 2000000 @@ -513,14 +504,6 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define for_each_iommu_safe(iommu, next) \ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) -#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ -#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) -#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) -#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ -#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) -#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) - - struct amd_iommu; struct iommu_domain; struct irq_domain; @@ -541,6 +524,13 @@ struct amd_irte_ops; #define io_pgtable_cfg_to_data(x) \ container_of((x), struct amd_io_pgtable, pgtbl_cfg) +struct gcr3_tbl_info { + u64 *gcr3_tbl; /* Guest CR3 table */ + int glx; /* Number of levels for GCR3 table */ + u32 pasid_cnt; /* Track attached PASIDs */ + u16 domid; /* Per device domain ID */ +}; + struct amd_io_pgtable { struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable iop; @@ -549,6 +539,11 @@ struct amd_io_pgtable { u64 *pgd; /* v2 pgtable pgd pointer */ }; +enum protection_domain_mode { + PD_MODE_V1 = 1, + PD_MODE_V2, +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -560,10 +555,8 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int glx; /* Number of levels for GCR3 table */ int nid; /* Node ID */ - u64 *gcr3_tbl; /* Guest CR3 table */ - unsigned long flags; /* flags to find out type of domain */ + enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ @@ -816,6 +809,7 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ + struct gcr3_tbl_info gcr3_info; /* Per-device GCR3 table */ struct device *dev; u16 devid; /* PCI Device ID */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 40979b0f52..e740dc54c4 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -30,6 +30,7 @@ #include <asm/io_apic.h> #include <asm/irq_remapping.h> #include <asm/set_memory.h> +#include <asm/sev.h> #include <linux/crash_dump.h> @@ -1677,8 +1678,17 @@ static void __init free_pci_segments(void) } } +static void __init free_sysfs(struct amd_iommu *iommu) +{ + if (iommu->iommu.dev) { + iommu_device_unregister(&iommu->iommu); + iommu_device_sysfs_remove(&iommu->iommu); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { + free_sysfs(iommu); free_cwwb_sem(iommu); free_command_buffer(iommu); free_event_buffer(iommu); @@ -2096,6 +2106,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = glxval; else amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + + iommu_enable_gt(iommu); } if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) @@ -2841,10 +2853,8 @@ static void enable_iommus_v2(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { + for_each_iommu(iommu) iommu_enable_ppr_log(iommu); - iommu_enable_gt(iommu); - } } static void enable_iommus_vapic(void) @@ -3224,6 +3234,39 @@ out: return true; } +static void iommu_snp_enable(void) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return; + /* + * The SNP support requires that IOMMU must be enabled, and is + * configured with V1 page table (DTE[Mode] = 0 is not supported). + */ + if (no_iommu || iommu_default_passthrough()) { + pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); + goto disable_snp; + } + + amd_iommu_snp_en = check_feature(FEATURE_SNP); + if (!amd_iommu_snp_en) { + pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); + goto disable_snp; + } + + pr_info("IOMMU SNP support enabled.\n"); + return; + +disable_snp: + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); +#endif +} + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3259,6 +3302,7 @@ static int __init state_next(void) break; case IOMMU_ENABLED: register_syscore_ops(&amd_iommu_syscore_ops); + iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -3697,13 +3741,11 @@ u8 amd_iommu_pc_get_max_banks(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); bool amd_iommu_pc_supported(void) { return amd_iommu_pc_present; } -EXPORT_SYMBOL(amd_iommu_pc_supported); u8 amd_iommu_pc_get_max_counters(unsigned int idx) { @@ -3714,7 +3756,6 @@ u8 amd_iommu_pc_get_max_counters(unsigned int idx) return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) @@ -3770,40 +3811,85 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void) +#ifdef CONFIG_KVM_AMD_SEV +static int iommu_page_make_shared(void *page) { - /* - * The SNP support requires that IOMMU must be enabled, and is - * not configured in the passthrough mode. - */ - if (no_iommu || iommu_default_passthrough()) { - pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); - return -EINVAL; + unsigned long paddr, pfn; + + paddr = iommu_virt_to_phys(page); + /* Cbit maybe set in the paddr */ + pfn = __sme_clr(paddr) >> PAGE_SHIFT; + + if (!(pfn % PTRS_PER_PMD)) { + int ret, level; + bool assigned; + + ret = snp_lookup_rmpentry(pfn, &assigned, &level); + if (ret) { + pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); + return ret; + } + + if (!assigned) { + pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); + return -EINVAL; + } + + if (level > PG_LEVEL_4K) { + ret = psmash(pfn); + if (!ret) + goto done; + + pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", + pfn, ret, level); + return ret; + } } - /* - * Prevent enabling SNP after IOMMU_ENABLED state because this process - * affect how IOMMU driver sets up data structures and configures - * IOMMU hardware. - */ - if (init_state > IOMMU_ENABLED) { - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); - return -EINVAL; +done: + return rmp_make_shared(pfn, PG_LEVEL_4K); +} + +static int iommu_make_shared(void *va, size_t size) +{ + void *page; + int ret; + + if (!va) + return 0; + + for (page = va; page < (va + size); page += PAGE_SIZE) { + ret = iommu_page_make_shared(page); + if (ret) + return ret; } - amd_iommu_snp_en = check_feature(FEATURE_SNP); + return 0; +} + +int amd_iommu_snp_disable(void) +{ + struct amd_iommu *iommu; + int ret; + if (!amd_iommu_snp_en) - return -EINVAL; + return 0; - pr_info("SNP enabled\n"); + for_each_iommu(iommu) { + ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); + if (ret) + return ret; - /* Enforce IOMMU v1 pagetable when SNP is enabled. */ - if (amd_iommu_pgtable != AMD_IOMMU_V1) { - pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); + if (ret) + return ret; + + ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); + if (ret) + return ret; } return 0; } +EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); #endif diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 6d69ba6074..93489d2db4 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -350,38 +350,26 @@ static const struct iommu_flush_ops v2_flush_ops = { static void v2_free_pgtable(struct io_pgtable *iop) { - struct protection_domain *pdom; struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); - pdom = container_of(pgtable, struct protection_domain, iop); - if (!(pdom->flags & PD_IOMMUV2_MASK)) + if (!pgtable || !pgtable->pgd) return; - /* Clear gcr3 entry */ - amd_iommu_domain_clear_gcr3(&pdom->domain, 0); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(pdom); - /* Free page table */ free_pgtable(pgtable->pgd, get_pgtable_level()); + pgtable->pgd = NULL; } static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct protection_domain *pdom = (struct protection_domain *)cookie; - int ret; int ias = IOMMU_IN_ADDR_BIT_SIZE; pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); if (!pgtable->pgd) return NULL; - ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); - if (ret) - goto err_free_pgd; - if (get_pgtable_level() == PAGE_MODE_5_LEVEL) ias = 57; @@ -395,11 +383,6 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->tlb = &v2_flush_ops; return &pgtable->iop; - -err_free_pgd: - free_pgtable_page(pgtable->pgd); - - return NULL; } struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f945bf3253..fb727f5b0b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -45,10 +45,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) - /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) #define MSI_RANGE_END (0xfeefffff) @@ -79,6 +75,9 @@ struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data); + /**************************************************************************** * * Helper functions @@ -87,7 +86,7 @@ static void detach_device(struct device *dev); static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { - return (pdom && (pdom->flags & PD_IOMMUV2_MASK)); + return (pdom && (pdom->pd_mode == PD_MODE_V2)); } static inline int get_acpihid_device_id(struct device *dev, @@ -1388,14 +1387,9 @@ void amd_iommu_flush_all_caches(struct amd_iommu *iommu) static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address, size_t size, ioasid_t pasid, bool gn) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct iommu_cmd cmd; - int qdep; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; + int qdep = dev_data->ats_qdep; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size, pasid, gn); @@ -1415,16 +1409,12 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) */ static int device_flush_dte(struct iommu_dev_data *dev_data) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct pci_dev *pdev = NULL; struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return -EINVAL; - if (dev_is_pci(dev_data->dev)) pdev = to_pci_dev(dev_data->dev); @@ -1453,27 +1443,37 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } -/* - * TLB invalidation function which is called from the mapping functions. - * It invalidates a single PTE if the range to flush is within a single - * page. Otherwise it flushes the whole TLB of the IOMMU. - */ -static void __domain_flush_pages(struct protection_domain *domain, +static int domain_flush_pages_v2(struct protection_domain *pdom, u64 address, size_t size) { struct iommu_dev_data *dev_data; struct iommu_cmd cmd; - int ret = 0, i; - ioasid_t pasid = IOMMU_NO_PASID; - bool gn = false; + int ret = 0; - if (pdom_is_v2_pgtbl_mode(domain)) - gn = true; + list_for_each_entry(dev_data, &pdom->dev_list, list) { + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + u16 domid = dev_data->gcr3_info.domid; + + build_inv_iommu_pages(&cmd, address, size, + domid, IOMMU_NO_PASID, true); + + ret |= iommu_queue_command(iommu, &cmd); + } + + return ret; +} - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, gn); +static int domain_flush_pages_v1(struct protection_domain *pdom, + u64 address, size_t size) +{ + struct iommu_cmd cmd; + int ret = 0, i; + + build_inv_iommu_pages(&cmd, address, size, + pdom->id, IOMMU_NO_PASID, false); for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (!domain->dev_iommu[i]) + if (!pdom->dev_iommu[i]) continue; /* @@ -1483,6 +1483,28 @@ static void __domain_flush_pages(struct protection_domain *domain, ret |= iommu_queue_command(amd_iommus[i], &cmd); } + return ret; +} + +/* + * TLB invalidation function which is called from the mapping functions. + * It flushes range of PTEs of the domain. + */ +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + struct iommu_dev_data *dev_data; + int ret = 0; + ioasid_t pasid = IOMMU_NO_PASID; + bool gn = false; + + if (pdom_is_v2_pgtbl_mode(domain)) { + gn = true; + ret = domain_flush_pages_v2(domain, address, size); + } else { + ret = domain_flush_pages_v1(domain, address, size); + } + list_for_each_entry(dev_data, &domain->dev_list, list) { if (!dev_data->ats_enabled) @@ -1551,6 +1573,29 @@ static void amd_iommu_domain_flush_all(struct protection_domain *domain) CMD_INV_IOMMU_ALL_PAGES_ADDRESS); } +void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, + ioasid_t pasid, u64 address, size_t size) +{ + struct iommu_cmd cmd; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); + + build_inv_iommu_pages(&cmd, address, size, + dev_data->gcr3_info.domid, pasid, true); + iommu_queue_command(iommu, &cmd); + + if (dev_data->ats_enabled) + device_flush_iotlb(dev_data, address, size, pasid, true); + + iommu_completion_wait(iommu); +} + +void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, + ioasid_t pasid) +{ + amd_iommu_dev_flush_pasid_pages(dev_data, 0, + CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid); +} + void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1592,6 +1637,49 @@ static void domain_flush_devices(struct protection_domain *domain) device_flush_dte(dev_data); } +static void update_device_table(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + set_dte_entry(iommu, dev_data); + clone_aliases(iommu, dev_data->dev); + } +} + +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) +{ + update_device_table(domain); + domain_flush_devices(domain); +} + +void amd_iommu_domain_update(struct protection_domain *domain) +{ + /* Update device table */ + amd_iommu_update_and_flush_device_table(domain); + + /* Flush domain TLB(s) and wait for completion */ + amd_iommu_domain_flush_all(domain); +} + +int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, + int status, int tag) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + dev_data = dev_iommu_priv_get(&pdev->dev); + iommu = get_amd_iommu_from_dev(&pdev->dev); + + build_complete_ppr(&cmd, dev_data->devid, pasid, status, + tag, dev_data->pri_tlp); + + return iommu_queue_command(iommu, &cmd); +} + /**************************************************************************** * * The next functions belong to the domain allocation. A domain is @@ -1604,26 +1692,29 @@ static void domain_flush_devices(struct protection_domain *domain) static u16 domain_id_alloc(void) { + unsigned long flags; int id; - spin_lock(&pd_bitmap_lock); + spin_lock_irqsave(&pd_bitmap_lock, flags); id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); BUG_ON(id == 0); if (id > 0 && id < MAX_DOMAIN_ID) __set_bit(id, amd_iommu_pd_alloc_bitmap); else id = 0; - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); return id; } static void domain_id_free(int id) { - spin_lock(&pd_bitmap_lock); + unsigned long flags; + + spin_lock_irqsave(&pd_bitmap_lock, flags); if (id > 0 && id < MAX_DOMAIN_ID) __clear_bit(id, amd_iommu_pd_alloc_bitmap); - spin_unlock(&pd_bitmap_lock); + spin_unlock_irqrestore(&pd_bitmap_lock, flags); } static void free_gcr3_tbl_level1(u64 *tbl) @@ -1656,16 +1747,22 @@ static void free_gcr3_tbl_level2(u64 *tbl) } } -static void free_gcr3_table(struct protection_domain *domain) +static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) { - if (domain->glx == 2) - free_gcr3_tbl_level2(domain->gcr3_tbl); - else if (domain->glx == 1) - free_gcr3_tbl_level1(domain->gcr3_tbl); + if (gcr3_info->glx == 2) + free_gcr3_tbl_level2(gcr3_info->gcr3_tbl); + else if (gcr3_info->glx == 1) + free_gcr3_tbl_level1(gcr3_info->gcr3_tbl); else - BUG_ON(domain->glx != 0); + WARN_ON_ONCE(gcr3_info->glx != 0); + + gcr3_info->glx = 0; - free_page((unsigned long)domain->gcr3_tbl); + /* Free per device domain ID */ + domain_id_free(gcr3_info->domid); + + free_page((unsigned long)gcr3_info->gcr3_tbl); + gcr3_info->gcr3_tbl = NULL; } /* @@ -1684,33 +1781,133 @@ static int get_gcr3_levels(int pasids) return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; } -/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ -static int setup_gcr3_table(struct protection_domain *domain, int pasids) +static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, + struct amd_iommu *iommu, int pasids) { int levels = get_gcr3_levels(pasids); + int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; if (levels > amd_iommu_max_glx_val) return -EINVAL; - domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) + if (gcr3_info->gcr3_tbl) + return -EBUSY; + + /* Allocate per device domain ID */ + gcr3_info->domid = domain_id_alloc(); + + gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_ATOMIC); + if (gcr3_info->gcr3_tbl == NULL) { + domain_id_free(gcr3_info->domid); return -ENOMEM; + } + + gcr3_info->glx = levels; + + return 0; +} + +static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info, + ioasid_t pasid, bool alloc) +{ + int index; + u64 *pte; + u64 *root = gcr3_info->gcr3_tbl; + int level = gcr3_info->glx; + + while (true) { + + index = (pasid >> (9 * level)) & 0x1ff; + pte = &root[index]; + + if (level == 0) + break; + + if (!(*pte & GCR3_VALID)) { + if (!alloc) + return NULL; + + root = (void *)get_zeroed_page(GFP_ATOMIC); + if (root == NULL) + return NULL; + + *pte = iommu_virt_to_phys(root) | GCR3_VALID; + } + + root = iommu_phys_to_virt(*pte & PAGE_MASK); + + level -= 1; + } + + return pte; +} + +static int update_gcr3(struct iommu_dev_data *dev_data, + ioasid_t pasid, unsigned long gcr3, bool set) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + u64 *pte; - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; + pte = __get_gcr3_pte(gcr3_info, pasid, true); + if (pte == NULL) + return -ENOMEM; - amd_iommu_domain_update(domain); + if (set) + *pte = (gcr3 & PAGE_MASK) | GCR3_VALID; + else + *pte = 0; + amd_iommu_dev_flush_pasid_all(dev_data, pasid); return 0; } -static void set_dte_entry(struct amd_iommu *iommu, u16 devid, - struct protection_domain *domain, bool ats, bool ppr) +int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, + unsigned long gcr3) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, gcr3, true); + if (ret) + return ret; + + gcr3_info->pasid_cnt++; + return ret; +} + +int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + int ret; + + iommu_group_mutex_assert(dev_data->dev); + + ret = update_gcr3(dev_data, pasid, 0, false); + if (ret) + return ret; + + gcr3_info->pasid_cnt--; + return ret; +} + +static void set_dte_entry(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; + u16 devid = dev_data->devid; + u16 domid; + struct protection_domain *domain = dev_data->domain; struct dev_table_entry *dev_table = get_dev_table(iommu); + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + + if (gcr3_info && gcr3_info->gcr3_tbl) + domid = dev_data->gcr3_info.domid; + else + domid = domain->id; if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1724,23 +1921,23 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, * When SNP is enabled, Only set TV bit when IOMMU * page translation is in use. */ - if (!amd_iommu_snp_en || (domain->id != 0)) + if (!amd_iommu_snp_en || (domid != 0)) pte_root |= DTE_FLAG_TV; flags = dev_table[devid].data[1]; - if (ats) + if (dev_data->ats_enabled) flags |= DTE_FLAG_IOTLB; - if (ppr) + if (dev_data->ppr) pte_root |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) pte_root |= DTE_FLAG_HAD; - if (domain->flags & PD_IOMMUV2_MASK) { - u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); - u64 glx = domain->glx; + if (gcr3_info && gcr3_info->gcr3_tbl) { + u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); + u64 glx = gcr3_info->glx; u64 tmp; pte_root |= DTE_FLAG_GV; @@ -1768,12 +1965,13 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); } - if (domain->flags & PD_GIOV_MASK) + /* GIOV is supported with V2 page table mode only */ + if (pdom_is_v2_pgtbl_mode(domain)) pte_root |= DTE_FLAG_GIOV; } flags &= ~DEV_DOMID_MASK; - flags |= domain->id; + flags |= domid; old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; dev_table[devid].data[1] = flags; @@ -1804,16 +2002,11 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) amd_iommu_apply_erratum_63(iommu, devid); } -static void do_attach(struct iommu_dev_data *dev_data, - struct protection_domain *domain) +static int do_attach(struct iommu_dev_data *dev_data, + struct protection_domain *domain) { - struct amd_iommu *iommu; - bool ats; - - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return; - ats = dev_data->ats_enabled; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + int ret = 0; /* Update data structures */ dev_data->domain = domain; @@ -1827,22 +2020,40 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; + /* Init GCR3 table and update device table */ + if (domain->pd_mode == PD_MODE_V2) { + /* By default, setup GCR3 table to support single PASID */ + ret = setup_gcr3_table(&dev_data->gcr3_info, iommu, 1); + if (ret) + return ret; + + ret = update_gcr3(dev_data, 0, + iommu_virt_to_phys(domain->iop.pgd), true); + if (ret) { + free_gcr3_table(&dev_data->gcr3_info); + return ret; + } + } + /* Update device table */ - set_dte_entry(iommu, dev_data->devid, domain, - ats, dev_data->ppr); + set_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); + + return ret; } static void do_detach(struct iommu_dev_data *dev_data) { struct protection_domain *domain = dev_data->domain; - struct amd_iommu *iommu; + struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - return; + /* Clear GCR3 table */ + if (domain->pd_mode == PD_MODE_V2) { + update_gcr3(dev_data, 0, 0, false); + free_gcr3_table(&dev_data->gcr3_info); + } /* Update data structures */ dev_data->domain = NULL; @@ -1886,7 +2097,7 @@ static int attach_device(struct device *dev, if (dev_is_pci(dev)) pdev_enable_caps(to_pci_dev(dev)); - do_attach(dev_data, domain); + ret = do_attach(dev_data, domain); out: spin_unlock(&dev_data->lock); @@ -1954,8 +2165,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) ret = iommu_init_device(iommu, dev); if (ret) { - if (ret != -ENOTSUPP) - dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); iommu_ignore_device(iommu, dev); } else { @@ -2000,42 +2210,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) /***************************************************************************** * - * The next functions belong to the dma_ops mapping/unmapping code. - * - *****************************************************************************/ - -static void update_device_table(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); - - if (!iommu) - continue; - set_dte_entry(iommu, dev_data->devid, domain, - dev_data->ats_enabled, dev_data->ppr); - clone_aliases(iommu, dev_data->dev); - } -} - -void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) -{ - update_device_table(domain); - domain_flush_devices(domain); -} - -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - -/***************************************************************************** - * * The following functions belong to the exported interface of AMD IOMMU * * This interface allows access to lower level functions of the IOMMU @@ -2070,9 +2244,6 @@ static void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); - if (domain->iop.root) free_page((unsigned long)domain->iop.root); @@ -2094,19 +2265,16 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return -ENOMEM; } + domain->pd_mode = PD_MODE_V1; amd_iommu_domain_set_pgtable(domain, pt_root, mode); return 0; } -static int protection_domain_init_v2(struct protection_domain *domain) +static int protection_domain_init_v2(struct protection_domain *pdom) { - domain->flags |= PD_GIOV_MASK; - - domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - - if (setup_gcr3_table(domain, 1)) - return -ENOMEM; + pdom->pd_mode = PD_MODE_V2; + pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; return 0; } @@ -2194,11 +2362,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, struct protection_domain *domain; struct amd_iommu *iommu = NULL; - if (dev) { - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return ERR_PTR(-ENODEV); - } + if (dev) + iommu = get_amd_iommu_from_dev(dev); /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system, @@ -2279,7 +2444,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); int ret; /* @@ -2337,7 +2502,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, int prot = 0; int ret = -EINVAL; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; @@ -2383,7 +2548,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova struct io_pgtable_ops *ops = &domain->iop.iop.ops; size_t r; - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + if ((domain->pd_mode == PD_MODE_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return 0; @@ -2418,7 +2583,7 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_DEFERRED_FLUSH: return true; case IOMMU_CAP_DIRTY_TRACKING: { - struct amd_iommu *iommu = rlookup_amd_iommu(dev); + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); return amd_iommu_hd_support(iommu); } @@ -2447,9 +2612,7 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } list_for_each_entry(dev_data, &pdomain->dev_list, list) { - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; + iommu = get_amd_iommu_from_dev_data(dev_data); dev_table = get_dev_table(iommu); pte_root = dev_table[dev_data->devid].data[0]; @@ -2509,9 +2672,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; devid = PCI_SBDF_TO_DEVID(sbdf); - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return; + iommu = get_amd_iommu_from_dev(dev); pci_seg = iommu->pci_seg; list_for_each_entry(entry, &pci_seg->unity_map, list) { @@ -2649,216 +2810,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -static int __flush_pasid(struct protection_domain *domain, u32 pasid, - u64 address, size_t size) -{ - struct iommu_dev_data *dev_data; - struct iommu_cmd cmd; - int i, ret; - - if (!(domain->flags & PD_IOMMUV2_MASK)) - return -EINVAL; - - build_inv_iommu_pages(&cmd, address, size, domain->id, pasid, true); - - /* - * IOMMU TLB needs to be flushed before Device TLB to - * prevent device TLB refill from IOMMU TLB - */ - for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { - if (domain->dev_iommu[i] == 0) - continue; - - ret = iommu_queue_command(amd_iommus[i], &cmd); - if (ret != 0) - goto out; - } - - /* Wait until IOMMU TLB flushes are complete */ - amd_iommu_domain_flush_complete(domain); - - /* Now flush device TLBs */ - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct amd_iommu *iommu; - int qdep; - - /* - There might be non-IOMMUv2 capable devices in an IOMMUv2 - * domain. - */ - if (!dev_data->ats_enabled) - continue; - - qdep = dev_data->ats_qdep; - iommu = rlookup_amd_iommu(dev_data->dev); - if (!iommu) - continue; - build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, - address, size, pasid, true); - - ret = iommu_queue_command(iommu, &cmd); - if (ret != 0) - goto out; - } - - /* Wait until all device TLBs are flushed */ - amd_iommu_domain_flush_complete(domain); - - ret = 0; - -out: - - return ret; -} - -static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid, - u64 address) -{ - return __flush_pasid(domain, pasid, address, PAGE_SIZE); -} - -int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, - u64 address) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_page(domain, pasid, address); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) -{ - return __flush_pasid(domain, pasid, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS); -} - -int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __amd_iommu_flush_tlb(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) -{ - int index; - u64 *pte; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - pte = &root[index]; - - if (level == 0) - break; - - if (!(*pte & GCR3_VALID)) { - if (!alloc) - return NULL; - - root = (void *)get_zeroed_page(GFP_ATOMIC); - if (root == NULL) - return NULL; - - *pte = iommu_virt_to_phys(root) | GCR3_VALID; - } - - root = iommu_phys_to_virt(*pte & PAGE_MASK); - - level -= 1; - } - - return pte; -} - -static int __set_gcr3(struct protection_domain *domain, u32 pasid, - unsigned long cr3) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); - if (pte == NULL) - return -ENOMEM; - - *pte = (cr3 & PAGE_MASK) | GCR3_VALID; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -static int __clear_gcr3(struct protection_domain *domain, u32 pasid) -{ - u64 *pte; - - if (domain->iop.mode != PAGE_MODE_NONE) - return -EINVAL; - - pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); - if (pte == NULL) - return 0; - - *pte = 0; - - return __amd_iommu_flush_tlb(domain, pasid); -} - -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __set_gcr3(domain, pasid, cr3); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&domain->lock, flags); - ret = __clear_gcr3(domain, pasid); - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag) -{ - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - struct iommu_cmd cmd; - - dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = rlookup_amd_iommu(&pdev->dev); - if (!iommu) - return -ENODEV; - - build_complete_ppr(&cmd, dev_data->devid, pasid, status, - tag, dev_data->pri_tlp); - - return iommu_queue_command(iommu, &cmd); -} - #ifdef CONFIG_IRQ_REMAP /***************************************************************************** |