summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 10:15:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 10:15:33 +0000
commita097cf30108c0151cb2684afd962bd2678ff97d7 (patch)
treede1ac8afc7248eec4b2fb60496b8e9273452d9f7 /arch/x86/mm
parentAdding upstream version 6.1.82. (diff)
downloadlinux-a097cf30108c0151cb2684afd962bd2678ff97d7.tar.xz
linux-a097cf30108c0151cb2684afd962bd2678ff97d7.zip
Adding upstream version 6.1.85.upstream/6.1.85
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c9
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/maccess.c10
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c18
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c40
-rw-r--r--arch/x86/mm/pat/memtype.c49
6 files changed, 80 insertions, 69 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbbad731..f20636510 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -818,15 +818,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
- return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index f50cc210a..968d7005f 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- /* if this is already a gbpage, this portion is already mapped */
- if (pud_large(*pud))
- continue;
-
- /* Is using a gbpage allowed? */
- use_gbpage = info->direct_gbpages;
-
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
- /* Never overwrite existing mappings */
- use_gbpage &= !pud_present(*pud);
-
- if (use_gbpage) {
+ if (info->direct_gbpages) {
pud_t pudval;
+ if (pud_present(*pud))
+ continue;
+
+ addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 6993f026a..42115ac07 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -3,6 +3,8 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+
#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
@@ -16,6 +18,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return false;
/*
+ * Reading from the vsyscall page may cause an unhandled fault in
+ * certain cases. Though it is at an address above TASK_SIZE_MAX, it is
+ * usually considered as a user space address.
+ */
+ if (is_vsyscall_vaddr(vaddr))
+ return false;
+
+ /*
* Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early
* exception handlers.
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 3e93af083..d4957eefe 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -513,6 +513,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index d94ebd8ac..06ccbd36e 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch");
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
@@ -305,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* instrumentation or checking boot_cpu_data in the cc_platform_has()
* function.
*/
- if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
+ if (!sme_get_me_mask() ||
+ RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
return;
/*
@@ -504,10 +504,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
void __init sme_enable(struct boot_params *bp)
{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
- bool active_by_default;
unsigned long me_mask;
char buffer[16];
bool snp;
@@ -543,11 +542,11 @@ void __init sme_enable(struct boot_params *bp)
me_mask = 1UL << (ebx & 0x3f);
/* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+ RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
@@ -573,7 +572,6 @@ void __init sme_enable(struct boot_params *bp)
return;
} else {
/* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
goto out;
}
@@ -588,31 +586,17 @@ void __init sme_enable(struct boot_params *bp)
asm ("lea sme_cmdline_on(%%rip), %0"
: "=r" (cmdline_on)
: "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
+ strncmp(buffer, cmdline_on, sizeof(buffer)))
return;
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
out:
- if (sme_me_mask) {
- physical_mask &= ~sme_me_mask;
- cc_set_vendor(CC_VENDOR_AMD);
- cc_set_mask(sme_me_mask);
- }
+ RIP_REL_REF(sme_me_mask) = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
+ cc_set_mask(me_mask);
}
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 66a209f7e..d6fe9093e 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -997,6 +997,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -1007,20 +1039,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1095,7 +1120,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1103,11 +1127,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);