summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pat
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
commit50ba0232fd5312410f1b65247e774244f89a628e (patch)
treefd8f2fc78e9e548af0ff9590276602ee6125be00 /arch/x86/mm/pat
parentReleasing progress-linux version 6.7.12-1~progress7.99u1. (diff)
downloadlinux-50ba0232fd5312410f1b65247e774244f89a628e.tar.xz
linux-50ba0232fd5312410f1b65247e774244f89a628e.zip
Merging upstream version 6.8.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/mm/pat')
-rw-r--r--arch/x86/mm/pat/memtype.c51
-rw-r--r--arch/x86/mm/pat/set_memory.c34
2 files changed, 55 insertions, 30 deletions
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index de10800cd4..37d51cfd77 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -14,7 +14,7 @@
* memory ranges: uncached, write-combining, write-through, write-protected,
* and the most commonly used and default attribute: write-back caching.
*
- * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
+ * PAT support supersedes and augments MTRR support in a compatible fashion: MTRR is
* a hardware interface to enumerate a limited number of physical memory ranges
* and set their caching attributes explicitly, programmed into the CPU via MSRs.
* Even modern CPUs have MTRRs enabled - but these are typically not touched
@@ -950,6 +950,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -960,20 +992,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1048,7 +1073,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1056,11 +1080,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index bda9f12983..135bb594df 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
return NULL;
*level = PG_LEVEL_1G;
- if (pud_large(*pud) || !pud_present(*pud))
+ if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, address);
@@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address)
return NULL;
pud = pud_offset(p4d, address);
- if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+ if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud))
return NULL;
return pmd_offset(pud, address);
@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
- * This could be optimized, but it is only intended to be
- * used at initialization time, and keeping it
- * unoptimized should increase the testing coverage for
- * the more obscure platforms.
+ * Note that as long as the PTEs are well-formed with correct PFNs, this
+ * works without checking the PRESENT bit in the leaf PTE. This is unlike
+ * the similar vmalloc_to_page() and derivatives. Callers may depend on
+ * this behavior.
+ *
+ * This could be optimized, but it is only used in paths that are not perf
+ * sensitive, and keeping it unoptimized should increase the testing coverage
+ * for the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
@@ -1274,7 +1278,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
*/
while (end - start >= PUD_SIZE) {
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
pud_clear(pud);
else
unmap_pmd_range(pud, start, start + PUD_SIZE);
@@ -1621,7 +1625,7 @@ repeat:
/*
* We need to keep the pfn from the existing PTE,
- * after all we're only going to change it's attributes
+ * after all we're only going to change its attributes
* not the memory it points to
*/
new_pte = pfn_pte(pfn, new_prot);
@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
-static int set_memory_p(unsigned long *addr, int numpages)
-{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-}
-
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
- return set_memory_p(&addr, 1);
+ return set_memory_p(addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
CPA_NO_CHECK_ALIAS, NULL);
}
+int set_memory_p(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
@@ -2447,7 +2451,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
/*
* The typical sequence for unmapping is to find a pte through
* lookup_address_in_pgd() (ideally, it should never return NULL because
- * the address is already mapped) and change it's protections. As pfn is
+ * the address is already mapped) and change its protections. As pfn is
* the *target* of a mapping, it's not useful while unmapping.
*/
struct cpa_data cpa = {