diff options
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/debugfs.c | 3 | ||||
-rw-r--r-- | drivers/iommu/intel/dmar.c | 26 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 447 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 189 | ||||
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/nested.c | 88 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 338 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.h | 224 | ||||
-rw-r--r-- | drivers/iommu/intel/perfmon.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/svm.c | 53 |
10 files changed, 789 insertions, 583 deletions
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index dee61e513b..86b506af7d 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -106,9 +106,6 @@ static const struct iommu_regset iommu_regs_64[] = { IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8), IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9), IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9), - IOMMU_REGSET_ENTRY(VCCAP), - IOMMU_REGSET_ENTRY(VCMD), - IOMMU_REGSET_ENTRY(VCRSP), }; static struct dentry *intel_iommu_debug; diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a..36d7427b12 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1095,7 +1095,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; iommu->segment = drhd->segment; - + iommu->device_rbtree = RB_ROOT; + spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); @@ -1271,6 +1273,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; + struct device *dev; + u64 iqe_err, ite_sid; struct q_inval *qi = iommu->qi; int shift = qi_shift(iommu); @@ -1315,6 +1319,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; + /* + * SID field is valid only when the ITE field is Set in FSTS_REG + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + ite_sid = DMAR_IQER_REG_ITESID(iqe_err); + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); pr_info("Invalidation Time-out Error (ITE) cleared\n"); @@ -1324,6 +1335,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) head = (head - 2 + QI_LENGTH) % QI_LENGTH; } while (head != tail); + /* + * If device was released or isn't present, no need to retry + * the ATS invalidate request anymore. + * + * 0 value of ite_sid means old VT-d device, no ite_sid value. + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + if (ite_sid) { + dev = device_rbtree_find(iommu, ite_sid); + if (!dev || !dev_is_pci(dev) || + !pci_device_is_present(to_pci_dev(dev))) + return -ETIMEDOUT; + } if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a8366b1f4f..d7e10f1311 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -46,9 +46,6 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 -#define MAX_AGAW_WIDTH 64 -#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) - #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1) #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1) @@ -63,74 +60,6 @@ #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -/* page table handling */ -#define LEVEL_STRIDE (9) -#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) - -static inline int agaw_to_level(int agaw) -{ - return agaw + 2; -} - -static inline int agaw_to_width(int agaw) -{ - return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); -} - -static inline int width_to_agaw(int width) -{ - return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); -} - -static inline unsigned int level_to_offset_bits(int level) -{ - return (level - 1) * LEVEL_STRIDE; -} - -static inline int pfn_level_offset(u64 pfn, int level) -{ - return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; -} - -static inline u64 level_mask(int level) -{ - return -1ULL << level_to_offset_bits(level); -} - -static inline u64 level_size(int level) -{ - return 1ULL << level_to_offset_bits(level); -} - -static inline u64 align_to_level(u64 pfn, int level) -{ - return (pfn + level_size(level) - 1) & level_mask(level); -} - -static inline unsigned long lvl_to_nr_pages(unsigned int lvl) -{ - return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); -} - -/* VT-d pages must always be _smaller_ than MM pages. Otherwise things - are never going to work. */ -static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) -{ - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); -} -static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) -{ - return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; -} -static inline unsigned long page_to_dma_pfn(struct page *pg) -{ - return mm_to_dma_pfn_start(page_to_pfn(pg)); -} -static inline unsigned long virt_to_dma_pfn(void *p) -{ - return page_to_dma_pfn(virt_to_page(p)); -} - static void __init check_tylersburg_isoch(void); static int rwbf_quirk; @@ -168,76 +97,79 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } -static inline void context_set_present(struct context_entry *context) +static int device_rid_cmp_key(const void *key, const struct rb_node *node) { - context->lo |= 1; -} + struct device_domain_info *info = + rb_entry(node, struct device_domain_info, node); + const u16 *rid_lhs = key; -static inline void context_set_fault_enable(struct context_entry *context) -{ - context->lo &= (((u64)-1) << 2) | 1; -} + if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) + return -1; -static inline void context_set_translation_type(struct context_entry *context, - unsigned long value) -{ - context->lo &= (((u64)-1) << 4) | 3; - context->lo |= (value & 3) << 2; -} + if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) + return 1; -static inline void context_set_address_root(struct context_entry *context, - unsigned long value) -{ - context->lo &= ~VTD_PAGE_MASK; - context->lo |= value & VTD_PAGE_MASK; + return 0; } -static inline void context_set_address_width(struct context_entry *context, - unsigned long value) +static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) { - context->hi |= value & 7; -} + struct device_domain_info *info = + rb_entry(lhs, struct device_domain_info, node); + u16 key = PCI_DEVID(info->bus, info->devfn); -static inline void context_set_domain_id(struct context_entry *context, - unsigned long value) -{ - context->hi |= (value & ((1 << 16) - 1)) << 8; + return device_rid_cmp_key(&key, rhs); } -static inline void context_set_pasid(struct context_entry *context) +/* + * Looks up an IOMMU-probed device using its source ID. + * + * Returns the pointer to the device if there is a match. Otherwise, + * returns NULL. + * + * Note that this helper doesn't guarantee that the device won't be + * released by the iommu subsystem after being returned. The caller + * should use its own synchronization mechanism to avoid the device + * being released during its use if its possibly the case. + */ +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) { - context->lo |= CONTEXT_PASIDE; -} + struct device_domain_info *info = NULL; + struct rb_node *node; + unsigned long flags; -static inline int context_domain_id(struct context_entry *c) -{ - return((c->hi >> 8) & 0xffff); -} + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); + if (node) + info = rb_entry(node, struct device_domain_info, node); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); -static inline void context_clear_entry(struct context_entry *context) -{ - context->lo = 0; - context->hi = 0; + return info ? info->dev : NULL; } -static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +static int device_rbtree_insert(struct intel_iommu *iommu, + struct device_domain_info *info) { - if (!iommu->copied_tables) - return false; + struct rb_node *curr; + unsigned long flags; - return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); -} + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + if (WARN_ON(curr)) + return -EEXIST; -static inline void -set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - set_bit(((long)bus << 8) | devfn, iommu->copied_tables); + return 0; } -static inline void -clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +static void device_rbtree_remove(struct device_domain_info *info) { - clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + rb_erase(&info->node, &iommu->device_rbtree); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } /* @@ -383,13 +315,12 @@ void free_pgtable_page(void *vaddr) free_page((unsigned long)vaddr); } -static inline int domain_type_is_si(struct dmar_domain *domain) +static int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; } -static inline int domain_pfn_supported(struct dmar_domain *domain, - unsigned long pfn) +static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; @@ -451,7 +382,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) +static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) { return sm_supported(iommu) ? ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); @@ -701,7 +632,7 @@ static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) return false; } -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) +static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct pci_dev *pdev = NULL; @@ -1510,6 +1441,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, spin_unlock_irqrestore(&domain->lock, flags); } +static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + unsigned long pfn, unsigned int pages, + int ih) +{ + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned long bitmask = aligned_pages - 1; + unsigned int mask = ilog2(aligned_pages); + u64 addr = (u64)pfn << VTD_PAGE_SHIFT; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + + /* + * Fallback to domain selective flush if no PSI support or + * the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1526,42 +1497,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain->use_first_level) { + if (domain->use_first_level) domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); - } else { - unsigned long bitmask = aligned_pages - 1; - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - - /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. - */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; - } - - /* - * Fallback to domain selective flush if no PSI support or - * the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + else + __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); /* * In caching mode, changes of pages from non-present to present require @@ -1572,9 +1511,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, } /* Notification for newly created mappings */ -static inline void __mapping_notify_one(struct intel_iommu *iommu, - struct dmar_domain *domain, - unsigned long pfn, unsigned int pages) +static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain, + unsigned long pfn, unsigned int pages) { /* * It's a non-present to present mapping. Only flush if caching mode @@ -1586,6 +1524,46 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, iommu_flush_write_buffer(iommu); } +/* + * Flush the relevant caches in nested translation if the domain + * also serves as a parent + */ +static void parent_domain_flush(struct dmar_domain *domain, + unsigned long pfn, + unsigned long pages, int ih) +{ + struct dmar_domain *s1_domain; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + struct device_domain_info *device_info; + struct iommu_domain_info *info; + unsigned long flags; + unsigned long i; + + xa_for_each(&s1_domain->iommu_array, i, info) + __iommu_flush_iotlb_psi(info->iommu, info->did, + pfn, pages, ih); + + if (!s1_domain->has_iotlb_device) + continue; + + spin_lock_irqsave(&s1_domain->lock, flags); + list_for_each_entry(device_info, &s1_domain->devices, link) + /* + * Address translation cache in device side caches the + * result of nested translation. There is no easy way + * to identify the exact set of nested translations + * affected by a change in S2. So just flush the entire + * device cache. + */ + __iommu_flush_dev_iotlb(device_info, 0, + MAX_AGAW_PFN_WIDTH); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); +} + static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); @@ -1605,6 +1583,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) if (!cap_caching_mode(iommu->cap)) iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } + + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, 0, -1, 0); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1841,7 +1822,7 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) spin_unlock(&iommu->lock); } -static inline int guestwidth_to_adjustwidth(int gaw) +static int guestwidth_to_adjustwidth(int gaw) { int agaw; int r = (gaw - 12) % 9; @@ -1875,7 +1856,7 @@ static void domain_exit(struct dmar_domain *domain) * Value of X in the PDTS field of a scalable mode context entry * indicates PASID directory with 2^(X + 7) entries. */ -static inline unsigned long context_get_sm_pds(struct pasid_table *table) +static unsigned long context_get_sm_pds(struct pasid_table *table) { unsigned long pds, max_pde; @@ -1887,38 +1868,6 @@ static inline unsigned long context_get_sm_pds(struct pasid_table *table) return pds - 7; } -/* - * Set the RID_PASID field of a scalable mode context entry. The - * IOMMU hardware will use the PASID value set in this field for - * DMA translations of DMA requests without PASID. - */ -static inline void -context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) -{ - context->hi |= pasid & ((1 << 20) - 1); -} - -/* - * Set the DTE(Device-TLB Enable) field of a scalable mode context - * entry. - */ -static inline void context_set_sm_dte(struct context_entry *context) -{ - context->lo |= BIT_ULL(2); -} - -/* - * Set the PRE(Page Request Enable) field of a scalable mode context - * entry. - */ -static inline void context_set_sm_pre(struct context_entry *context) -{ - context->lo |= BIT_ULL(4); -} - -/* Convert value to context PASID directory size field coding. */ -#define context_pdts(pds) (((pds) & 0x7) << 9) - static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, struct pasid_table *table, @@ -2079,14 +2028,11 @@ static int domain_context_mapping_cb(struct pci_dev *pdev, static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); struct domain_context_mapping_data data; + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; struct pasid_table *table; - struct intel_iommu *iommu; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; table = intel_pasid_get_table(dev); @@ -2103,18 +2049,15 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) } /* Returns a number of VTD pages, but aligned to MM page size */ -static inline unsigned long aligned_nrpages(unsigned long host_addr, - size_t size) +static unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { host_addr &= ~PAGE_MASK; return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } /* Return largest possible superpage level for a given mapping */ -static inline int hardware_largepage_caps(struct dmar_domain *domain, - unsigned long iov_pfn, - unsigned long phy_pfn, - unsigned long pages) +static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phy_pfn, unsigned long pages) { int support, level = 1; unsigned long pfnmerge; @@ -2166,6 +2109,9 @@ static void switch_to_super_page(struct dmar_domain *domain, iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); + if (domain->nested_parent) + parent_domain_flush(domain, start_pfn, + lvl_pages, 0); } pte++; @@ -2447,15 +2393,10 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; + struct intel_iommu *iommu = info->iommu; unsigned long flags; - u8 bus, devfn; int ret; - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - ret = domain_attach_iommu(domain, iommu); if (ret) return ret; @@ -2468,7 +2409,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, domain, + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, @@ -3613,7 +3554,7 @@ void intel_iommu_shutdown(void) up_write(&dmar_global_lock); } -static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) +static struct intel_iommu *dev_to_intel_iommu(struct device *dev) { struct iommu_device *iommu_dev = dev_to_iommu_device(dev); @@ -3692,7 +3633,7 @@ const struct attribute_group *intel_iommu_groups[] = { NULL, }; -static inline bool has_external_pci(void) +static bool has_external_pci(void) { struct pci_dev *pdev = NULL; @@ -3933,30 +3874,6 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *domain = info->domain; - struct intel_iommu *iommu = info->iommu; - unsigned long flags; - - if (!dev_is_real_dma_subdevice(info->dev)) { - if (dev_is_pci(info->dev) && sm_supported(iommu)) - intel_pasid_tear_down_entry(iommu, info->dev, - IOMMU_NO_PASID, false); - - iommu_disable_pci_caps(info); - domain_context_clear(info); - } - - spin_lock_irqsave(&domain->lock, flags); - list_del(&info->link); - spin_unlock_irqrestore(&domain->lock, flags); - - domain_detach_iommu(domain, iommu); - info->domain = NULL; -} - /* * Clear the page table pointer in context or pasid table entries so that * all DMA requests without PASID from the device are blocked. If the page @@ -4127,14 +4044,11 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct intel_iommu *iommu; + struct intel_iommu *iommu = info->iommu; int addr_width; - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return -ENODEV; - if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; @@ -4306,6 +4220,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, start_pfn, nrpages, + list_empty(&gather->freelist)); put_pages_list(&gather->freelist); } @@ -4411,7 +4328,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) u8 bus, devfn; int ret; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_lookup_iommu(dev, &bus, &devfn); if (!iommu || !iommu->iommu.ops) return ERR_PTR(-ENODEV); @@ -4464,30 +4381,47 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } dev_iommu_priv_set(dev, info); + if (pdev && pci_ats_supported(pdev)) { + ret = device_rbtree_insert(iommu, info); + if (ret) + goto free; + } if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - dev_iommu_priv_set(dev, NULL); - kfree(info); - return ERR_PTR(ret); + goto clear_rbtree; } } intel_iommu_debugfs_create_dev(info); return &iommu->iommu; +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + mutex_lock(&iommu->iopf_lock); + if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) + device_rbtree_remove(info); + mutex_unlock(&iommu->iopf_lock); + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + !context_copied(iommu, info->bus, info->devfn)) + intel_pasid_teardown_sm_context(dev); - dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); - dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); } @@ -4747,8 +4681,9 @@ static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct device_domain_info *info = dev_iommu_priv_get(dev); struct dev_pasid_info *curr, *dev_pasid = NULL; + struct intel_iommu *iommu = info->iommu; struct dmar_domain *dmar_domain; struct iommu_domain *domain; unsigned long flags; @@ -4819,8 +4754,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, goto out_free; if (domain_type_is_si(dmar_domain)) - ret = intel_pasid_setup_pass_through(iommu, dmar_domain, - dev, pasid); + ret = intel_pasid_setup_pass_through(iommu, dev, pasid); else if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid); @@ -4991,6 +4925,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = { const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, + .release_domain = &blocking_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index efc00d2b45..cd267ba64e 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -140,9 +140,6 @@ #define DMAR_ECEO_REG 0x408 #define DMAR_ECRSP_REG 0x410 #define DMAR_ECCAP_REG 0x430 -#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ #define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) #define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) @@ -722,9 +719,16 @@ struct intel_iommu { #endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; + /* Synchronization between fault report and iommu device release. */ + struct mutex iopf_lock; struct q_inval *qi; /* Queued invalidation info */ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/ + /* rb tree for all probed devices */ + struct rb_root device_rbtree; + /* protect the device_rbtree */ + spinlock_t device_rbtree_lock; + #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ struct irq_domain *ir_domain; @@ -758,6 +762,8 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ + /* device tracking node(lookup by PCI RID) */ + struct rb_node node; #ifdef CONFIG_INTEL_IOMMU_DEBUGFS struct dentry *debugfs_dentry; /* pointer to device directory dentry */ #endif @@ -860,6 +866,181 @@ static inline bool context_present(struct context_entry *context) return (context->lo & 1); } +#define LEVEL_STRIDE (9) +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +#define MAX_AGAW_WIDTH (64) +#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); +} + +static inline int width_to_agaw(int width) +{ + return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return (level - 1) * LEVEL_STRIDE; +} + +static inline int pfn_level_offset(u64 pfn, int level) +{ + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; +} + +static inline u64 level_mask(int level) +{ + return -1ULL << level_to_offset_bits(level); +} + +static inline u64 level_size(int level) +{ + return 1ULL << level_to_offset_bits(level); +} + +static inline u64 align_to_level(u64 pfn, int level) +{ + return (pfn + level_size(level) - 1) & level_mask(level); +} + +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); +} + +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) +{ + return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn_start(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} + +static inline void context_set_present(struct context_entry *context) +{ + context->lo |= 1; +} + +static inline void context_set_fault_enable(struct context_entry *context) +{ + context->lo &= (((u64)-1) << 2) | 1; +} + +static inline void context_set_translation_type(struct context_entry *context, + unsigned long value) +{ + context->lo &= (((u64)-1) << 4) | 3; + context->lo |= (value & 3) << 2; +} + +static inline void context_set_address_root(struct context_entry *context, + unsigned long value) +{ + context->lo &= ~VTD_PAGE_MASK; + context->lo |= value & VTD_PAGE_MASK; +} + +static inline void context_set_address_width(struct context_entry *context, + unsigned long value) +{ + context->hi |= value & 7; +} + +static inline void context_set_domain_id(struct context_entry *context, + unsigned long value) +{ + context->hi |= (value & ((1 << 16) - 1)) << 8; +} + +static inline void context_set_pasid(struct context_entry *context) +{ + context->lo |= CONTEXT_PASIDE; +} + +static inline int context_domain_id(struct context_entry *c) +{ + return((c->hi >> 8) & 0xffff); +} + +static inline void context_clear_entry(struct context_entry *context) +{ + context->lo = 0; + context->hi = 0; +} + +#ifdef CONFIG_INTEL_IOMMU +static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + if (!iommu->copied_tables) + return false; + + return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + set_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} +#endif /* CONFIG_INTEL_IOMMU */ + +/* + * Set the RID_PASID field of a scalable mode context entry. The + * IOMMU hardware will use the PASID value set in this field for + * DMA translations of DMA requests without PASID. + */ +static inline void +context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) +{ + context->hi |= pasid & ((1 << 20) - 1); +} + +/* + * Set the DTE(Device-TLB Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_dte(struct context_entry *context) +{ + context->lo |= BIT_ULL(2); +} + +/* + * Set the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_pre(struct context_entry *context) +{ + context->lo |= BIT_ULL(4); +} + +/* Convert value to context PASID directory size field coding. */ +#define context_pdts(pds) (((pds) & 0x7) << 9) + struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev); int dmar_enable_qi(struct intel_iommu *iommu); @@ -907,9 +1088,9 @@ int dmar_ir_support(void); void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 29b9e55dcf..566297bc87 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1112,7 +1112,7 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest) * irq migration in the presence of interrupt-remapping. */ irte->trigger_mode = 0; - irte->dlvry_mode = apic->delivery_mode; + irte->dlvry_mode = APIC_DELIVERY_MODE_FIXED; irte->vector = vector; irte->dest_id = IRTE_DEST(dest); irte->redir_hint = 1; diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 92e82b33ea..a7d68f3d51 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -81,9 +81,97 @@ static void intel_nested_domain_free(struct iommu_domain *domain) kfree(dmar_domain); } +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, + unsigned int mask) +{ + struct device_domain_info *info; + unsigned long flags; + u16 sid, qdep; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->ats_enabled) + continue; + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, + IOMMU_NO_PASID, qdep); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, + u64 npages, bool ih) +{ + struct iommu_domain_info *info; + unsigned int mask; + unsigned long i; + + xa_for_each(&domain->iommu_array, i, info) + qi_flush_piotlb(info->iommu, + domain_id_iommu(domain, info->iommu), + IOMMU_NO_PASID, addr, npages, ih); + + if (!domain->has_iotlb_device) + return; + + if (npages == U64_MAX) + mask = 64 - VTD_PAGE_SHIFT; + else + mask = ilog2(__roundup_pow_of_two(npages)); + + nested_flush_dev_iotlb(domain, addr, mask); +} + +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, + struct iommu_user_data_array *array) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct iommu_hwpt_vtd_s1_invalidate inv_entry; + u32 index, processed = 0; + int ret = 0; + + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { + ret = -EINVAL; + goto out; + } + + for (index = 0; index < array->entry_num; index++) { + ret = iommu_copy_struct_from_user_array(&inv_entry, array, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + index, __reserved); + if (ret) + break; + + if ((inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) || + inv_entry.__reserved) { + ret = -EOPNOTSUPP; + break; + } + + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { + ret = -EINVAL; + break; + } + + intel_nested_flush_cache(dmar_domain, inv_entry.addr, + inv_entry.npages, + inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF); + processed++; + } + +out: + array->entry_num = processed; + return ret; +} + static const struct iommu_domain_ops intel_nested_domain_ops = { .attach_dev = intel_nested_attach_dev, .free = intel_nested_domain_free, + .cache_invalidate_user = intel_nested_cache_invalidate_user, }; struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index cc2490e5cf..a51e895d9a 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -26,63 +26,6 @@ */ u32 intel_pasid_max_id = PASID_MAX; -int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) -{ - unsigned long flags; - u8 status_code; - int ret = 0; - u64 res; - - raw_spin_lock_irqsave(&iommu->register_lock, flags); - dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); - IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, - !(res & VCMD_VRSP_IP), res); - raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - - status_code = VCMD_VRSP_SC(res); - switch (status_code) { - case VCMD_VRSP_SC_SUCCESS: - *pasid = VCMD_VRSP_RESULT_PASID(res); - break; - case VCMD_VRSP_SC_NO_PASID_AVAIL: - pr_info("IOMMU: %s: No PASID available\n", iommu->name); - ret = -ENOSPC; - break; - default: - ret = -ENODEV; - pr_warn("IOMMU: %s: Unexpected error code %d\n", - iommu->name, status_code); - } - - return ret; -} - -void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) -{ - unsigned long flags; - u8 status_code; - u64 res; - - raw_spin_lock_irqsave(&iommu->register_lock, flags); - dmar_writeq(iommu->reg + DMAR_VCMD_REG, - VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); - IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, - !(res & VCMD_VRSP_IP), res); - raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - - status_code = VCMD_VRSP_SC(res); - switch (status_code) { - case VCMD_VRSP_SC_SUCCESS: - break; - case VCMD_VRSP_SC_INVALID_PASID: - pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); - break; - default: - pr_warn("IOMMU: %s: Unexpected error code %d\n", - iommu->name, status_code); - } -} - /* * Per device pasid table management: */ @@ -230,30 +173,6 @@ retry: /* * Interfaces for PASID table entry manipulation: */ -static inline void pasid_clear_entry(struct pasid_entry *pe) -{ - WRITE_ONCE(pe->val[0], 0); - WRITE_ONCE(pe->val[1], 0); - WRITE_ONCE(pe->val[2], 0); - WRITE_ONCE(pe->val[3], 0); - WRITE_ONCE(pe->val[4], 0); - WRITE_ONCE(pe->val[5], 0); - WRITE_ONCE(pe->val[6], 0); - WRITE_ONCE(pe->val[7], 0); -} - -static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) -{ - WRITE_ONCE(pe->val[0], PASID_PTE_FPD); - WRITE_ONCE(pe->val[1], 0); - WRITE_ONCE(pe->val[2], 0); - WRITE_ONCE(pe->val[3], 0); - WRITE_ONCE(pe->val[4], 0); - WRITE_ONCE(pe->val[5], 0); - WRITE_ONCE(pe->val[6], 0); - WRITE_ONCE(pe->val[7], 0); -} - static void intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) { @@ -269,192 +188,6 @@ intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) pasid_clear_entry(pe); } -static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) -{ - u64 old; - - old = READ_ONCE(*ptr); - WRITE_ONCE(*ptr, (old & ~mask) | bits); -} - -static inline u64 pasid_get_bits(u64 *ptr) -{ - return READ_ONCE(*ptr); -} - -/* - * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode - * PASID entry. - */ -static inline void -pasid_set_domain_id(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); -} - -/* - * Get domain ID value of a scalable mode PASID entry. - */ -static inline u16 -pasid_get_domain_id(struct pasid_entry *pe) -{ - return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); -} - -/* - * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_slptr(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); -} - -/* - * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID - * entry. - */ -static inline void -pasid_set_address_width(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); -} - -/* - * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_translation_type(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); -} - -/* - * Enable fault processing by clearing the FPD(Fault Processing - * Disable) field (Bit 1) of a scalable mode PASID entry. - */ -static inline void pasid_set_fault_enable(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 1, 0); -} - -/* - * Enable second level A/D bits by setting the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_ssade(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); -} - -/* - * Disable second level A/D bits by clearing the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry. - */ -static inline void pasid_clear_ssade(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 9, 0); -} - -/* - * Checks if second level A/D bits specifically the SLADE (Second Level - * Access Dirty Enable) field (Bit 9) of a scalable mode PASID - * entry is set. - */ -static inline bool pasid_get_ssade(struct pasid_entry *pe) -{ - return pasid_get_bits(&pe->val[0]) & (1 << 9); -} - -/* - * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_sre(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 0, 1); -} - -/* - * Setup the WPE(Write Protect Enable) field (Bit 132) of a - * scalable mode PASID entry. - */ -static inline void pasid_set_wpe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); -} - -/* - * Setup the P(Present) field (Bit 0) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_present(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[0], 1 << 0, 1); -} - -/* - * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID - * entry. - */ -static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) -{ - pasid_set_bits(&pe->val[1], 1 << 23, value << 23); -} - -/* - * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID - * entry. It is required when XD bit of the first level page table - * entry is about to be set. - */ -static inline void pasid_set_nxe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); -} - -/* - * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode - * PASID entry. - */ -static inline void -pasid_set_pgsnp(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); -} - -/* - * Setup the First Level Page table Pointer field (Bit 140~191) - * of a scalable mode PASID entry. - */ -static inline void -pasid_set_flptr(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); -} - -/* - * Setup the First Level Paging Mode field (Bit 130~131) of a - * scalable mode PASID entry. - */ -static inline void -pasid_set_flpm(struct pasid_entry *pe, u64 value) -{ - pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); -} - -/* - * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) - * of a scalable mode PASID entry. - */ -static inline void pasid_set_eafe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); -} - static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) @@ -616,9 +349,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, * Skip top levels of page tables for iommu which has less agaw * than default. Unnecessary for PT mode. */ -static inline int iommu_skip_agaw(struct dmar_domain *domain, - struct intel_iommu *iommu, - struct dma_pte **pgd) +static int iommu_skip_agaw(struct dmar_domain *domain, + struct intel_iommu *iommu, + struct dma_pte **pgd) { int agaw; @@ -769,7 +502,6 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, * Set up the scalable mode pasid entry for passthrough translation type. */ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid) { u16 did = FLPT_DEFAULT_DID; @@ -938,3 +670,67 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, return 0; } + +/* + * Interfaces to setup or teardown a pasid table to the scalable-mode + * context table entry: + */ + +static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context) { + spin_unlock(&iommu->lock); + return; + } + + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + spin_unlock(&iommu->lock); + + /* + * Cache invalidation for changes to a scalable-mode context table + * entry. + * + * Section 6.5.3.3 of the VT-d spec: + * - Device-selective context-cache invalidation; + * - Domain-selective PASID-cache invalidation to affected domains + * (can be skipped if all PASID entries were not-present); + * - Domain-selective IOTLB invalidation to affected domains; + * - Global Device-TLB invalidation to affected functions. + * + * The iommu has been parked in the blocking state. All domains have + * been detached from the device or PASID. The PASID and IOTLB caches + * have been invalidated during the domain detach path. + */ + iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); +} + +static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev == &pdev->dev) + device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); + + return 0; +} + +void intel_pasid_teardown_sm_context(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) { + device_pasid_table_teardown(dev, info->bus, info->devfn); + return; + } + + pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 3568adca1f..42fda97fd8 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -22,16 +22,6 @@ #define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) #define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) -/* Virtual command interface for enlightened pasid management. */ -#define VCMD_CMD_ALLOC 0x1 -#define VCMD_CMD_FREE 0x2 -#define VCMD_VRSP_IP 0x1 -#define VCMD_VRSP_SC(e) (((e) & 0xff) >> 1) -#define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 16 -#define VCMD_VRSP_SC_INVALID_PASID 16 -#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 16) & 0xfffff) -#define VCMD_CMD_OPERAND(e) ((e) << 16) /* * Domain ID reserved for pasid entries programmed for first-level * only and pass-through transfer modes. @@ -96,6 +86,216 @@ static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7); } +static inline void pasid_clear_entry(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], PASID_PTE_FPD); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +static inline u64 pasid_get_bits(u64 *ptr) +{ + return READ_ONCE(*ptr); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Enable second level A/D bits by setting the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9); +} + +/* + * Disable second level A/D bits by clearing the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry. + */ +static inline void pasid_clear_ssade(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 9, 0); +} + +/* + * Checks if second level A/D bits specifically the SLADE (Second Level + * Access Dirty Enable) field (Bit 9) of a scalable mode PASID + * entry is set. + */ +static inline bool pasid_get_ssade(struct pasid_entry *pe) +{ + return pasid_get_bits(&pe->val[0]) & (1 << 9); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the WPE(Write Protect Enable) field (Bit 132) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_wpe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value << 23); +} + +/* + * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID + * entry. It is required when XD bit of the first level page table + * entry is about to be set. + */ +static inline void pasid_set_nxe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); +} + +/* + * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode + * PASID entry. + */ +static inline void +pasid_set_pgsnp(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); +} + +/* + * Setup the First Level Page table Pointer field (Bit 140~191) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_flptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); +} + +/* + * Setup the First Level Paging Mode field (Bit 130~131) of a + * scalable mode PASID entry. + */ +static inline void +pasid_set_flpm(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); +} + +/* + * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) + * of a scalable mode PASID entry. + */ +static inline void pasid_set_eafe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); +} + extern unsigned int intel_pasid_max_id; int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); @@ -110,15 +310,13 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid); int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, u32 pasid, struct dmar_domain *domain); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore); -int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid); -void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid); void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, struct device *dev, u32 pasid); +void intel_pasid_teardown_sm_context(struct device *dev); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index cf43e798ec..44083d0185 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, iommu_pmu_set_filter(domain, event->attr.config1, IOMMU_PMU_FILTER_DOMAIN, idx, event->attr.config1); - iommu_pmu_set_filter(pasid, event->attr.config1, + iommu_pmu_set_filter(pasid, event->attr.config2, IOMMU_PMU_FILTER_PASID, idx, event->attr.config1); iommu_pmu_set_filter(ats, event->attr.config2, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ac12f76c12..4d269df008 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -67,7 +67,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); @@ -316,21 +316,22 @@ out: } static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct mm_struct *mm) + struct iommu_domain *domain, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct mm_struct *mm = domain->mm; struct intel_svm_dev *sdev; struct intel_svm *svm; unsigned long sflags; int ret = 0; - svm = pasid_private_find(mm->pasid); + svm = pasid_private_find(pasid); if (!svm) { svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) return -ENOMEM; - svm->pasid = mm->pasid; + svm->pasid = pasid; svm->mm = mm; INIT_LIST_HEAD_RCU(&svm->devs); @@ -368,7 +369,7 @@ static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; - ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid, FLPT_DEFAULT_DID, sflags); if (ret) goto free_sdev; @@ -382,7 +383,7 @@ free_sdev: free_svm: if (list_empty(&svm->devs)) { mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(mm->pasid); + pasid_private_remove(pasid); kfree(svm); } @@ -392,14 +393,9 @@ free_svm: void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) { struct intel_svm_dev *sdev; - struct intel_iommu *iommu; struct intel_svm *svm; struct mm_struct *mm; - iommu = device_to_iommu(dev, NULL, NULL); - if (!iommu) - return; - if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) return; mm = svm->mm; @@ -654,7 +650,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct page_req_dsc *req; int head, tail, handled; - struct pci_dev *pdev; + struct device *dev; u64 address; /* @@ -700,23 +696,24 @@ bad_req: if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) goto prq_advance; - pdev = pci_get_domain_bus_and_slot(iommu->segment, - PCI_BUS_NUM(req->rid), - req->rid & 0xff); /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev) + mutex_lock(&iommu->iopf_lock); + dev = device_rbtree_find(iommu, req->rid); + if (!dev) { + mutex_unlock(&iommu->iopf_lock); goto bad_req; + } - if (intel_svm_prq_report(iommu, &pdev->dev, req)) + if (intel_svm_prq_report(iommu, dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); else - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + trace_prq_report(iommu, dev, req->qw_0, req->qw_1, req->priv_data[0], req->priv_data[1], iommu->prq_seq_number++); - pci_dev_put(pdev); + mutex_unlock(&iommu->iopf_lock); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -750,25 +747,16 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; struct iommu_fault_page_request *prm; - struct intel_iommu *iommu; bool private_present; bool pasid_present; bool last_page; - u8 bus, devfn; int ret = 0; u16 sid; - if (!dev || !dev_is_pci(dev)) - return -ENODEV; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!msg || !evt) - return -EINVAL; - prm = &evt->fault.prm; sid = PCI_DEVID(bus, devfn); pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; @@ -822,9 +810,8 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - struct mm_struct *mm = domain->mm; - return intel_svm_bind_mm(iommu, dev, mm); + return intel_svm_bind_mm(iommu, dev, domain, pasid); } static void intel_svm_domain_free(struct iommu_domain *domain) |