diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /arch/x86/platform/efi | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/platform/efi')
-rw-r--r-- | arch/x86/platform/efi/Makefile | 9 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 952 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 154 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 884 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_32.S | 60 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 27 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_thunk_64.S | 98 | ||||
-rw-r--r-- | arch/x86/platform/efi/fake_mem.c | 197 | ||||
-rw-r--r-- | arch/x86/platform/efi/memmap.c | 239 | ||||
-rw-r--r-- | arch/x86/platform/efi/quirks.c | 781 | ||||
-rw-r--r-- | arch/x86/platform/efi/runtime-map.c | 194 |
11 files changed, 3595 insertions, 0 deletions
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile new file mode 100644 index 0000000000..543df9a137 --- /dev/null +++ b/arch/x86/platform/efi/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +KASAN_SANITIZE := n +GCOV_PROFILE := n + +obj-$(CONFIG_EFI) += memmap.o quirks.o efi.o efi_$(BITS).o \ + efi_stub_$(BITS).o +obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o +obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o +obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c new file mode 100644 index 0000000000..e9f99c56f3 --- /dev/null +++ b/arch/x86/platform/efi/efi.c @@ -0,0 +1,952 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common EFI (Extensible Firmware Interface) support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov <bp@suse.de> - runtime services VA mapping + * + * Copied from efi_32.c to eliminate the duplicated code between EFI + * 32/64 support code. --ying 2007-10-26 + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/time.h> +#include <linux/io.h> +#include <linux/reboot.h> +#include <linux/bcd.h> + +#include <asm/setup.h> +#include <asm/efi.h> +#include <asm/e820/api.h> +#include <asm/time.h> +#include <asm/tlbflush.h> +#include <asm/x86_init.h> +#include <asm/uv/uv.h> + +static unsigned long efi_systab_phys __initdata; +static unsigned long prop_phys = EFI_INVALID_TABLE_ADDR; +static unsigned long uga_phys = EFI_INVALID_TABLE_ADDR; +static unsigned long efi_runtime, efi_nr_tables; + +unsigned long efi_fw_vendor, efi_config_table; + +static const efi_config_table_type_t arch_tables[] __initconst = { + {EFI_PROPERTIES_TABLE_GUID, &prop_phys, "PROP" }, + {UGA_IO_PROTOCOL_GUID, &uga_phys, "UGA" }, +#ifdef CONFIG_X86_UV + {UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" }, +#endif + {}, +}; + +static const unsigned long * const efi_tables[] = { + &efi.acpi, + &efi.acpi20, + &efi.smbios, + &efi.smbios3, + &uga_phys, +#ifdef CONFIG_X86_UV + &uv_systab_phys, +#endif + &efi_fw_vendor, + &efi_runtime, + &efi_config_table, + &efi.esrt, + &prop_phys, + &efi_mem_attr_table, +#ifdef CONFIG_EFI_RCI2_TABLE + &rci2_table_phys, +#endif + &efi.tpm_log, + &efi.tpm_final_log, + &efi_rng_seed, +#ifdef CONFIG_LOAD_UEFI_KEYS + &efi.mokvar_table, +#endif +#ifdef CONFIG_EFI_COCO_SECRET + &efi.coco_secret, +#endif +#ifdef CONFIG_UNACCEPTED_MEMORY + &efi.unaccepted, +#endif +}; + +u64 efi_setup; /* efi setup_data physical address */ + +static int add_efi_memmap __initdata; +static int __init setup_add_efi_memmap(char *arg) +{ + add_efi_memmap = 1; + return 0; +} +early_param("add_efi_memmap", setup_add_efi_memmap); + +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the passed in e820 + * legacy (zeropage) memory map, but the kernel's e820 table can hold + * E820_MAX_ENTRIES. + */ + +static void __init do_add_efi_memmap(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + if (efi_soft_reserve_enabled() + && (md->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_TYPE_RAM; + else + e820_type = E820_TYPE_RESERVED; + break; + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_TYPE_ACPI; + break; + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_TYPE_NVS; + break; + case EFI_UNUSABLE_MEMORY: + e820_type = E820_TYPE_UNUSABLE; + break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_TYPE_PMEM; + break; + default: + /* + * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE + * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO + * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE + */ + e820_type = E820_TYPE_RESERVED; + break; + } + + e820__range_add(start, size, e820_type); + } + e820__update_table(e820_table); +} + +/* + * Given add_efi_memmap defaults to 0 and there is no alternative + * e820 mechanism for soft-reserved memory, import the full EFI memory + * map if soft reservations are present and enabled. Otherwise, the + * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is + * the efi=nosoftreserve option. + */ +static bool do_efi_soft_reserve(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return false; + + if (!efi_soft_reserve_enabled()) + return false; + + for_each_efi_memory_desc(md) + if (md->type == EFI_CONVENTIONAL_MEMORY && + (md->attribute & EFI_MEMORY_SP)) + return true; + return false; +} + +int __init efi_memblock_x86_reserve_range(void) +{ + struct efi_info *e = &boot_params.efi_info; + struct efi_memory_map_data data; + phys_addr_t pmap; + int rv; + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + /* Can't handle firmware tables above 4GB on i386 */ + if (IS_ENABLED(CONFIG_X86_32) && e->efi_memmap_hi > 0) { + pr_err("Memory map is above 4GB, disabling EFI.\n"); + return -EINVAL; + } + pmap = (phys_addr_t)(e->efi_memmap | ((u64)e->efi_memmap_hi << 32)); + + data.phys_map = pmap; + data.size = e->efi_memmap_size; + data.desc_size = e->efi_memdesc_size; + data.desc_version = e->efi_memdesc_version; + + if (!efi_enabled(EFI_PARAVIRT)) { + rv = efi_memmap_init_early(&data); + if (rv) + return rv; + } + + if (add_efi_memmap || do_efi_soft_reserve()) + do_add_efi_memmap(); + + efi_fake_memmap_early(); + + WARN(efi.memmap.desc_version != 1, + "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", + efi.memmap.desc_version); + + memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); + + return 0; +} + +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(&data); + } +} + +/* + * Firmware can use EfiMemoryMappedIO to request that MMIO regions be + * mapped by the OS so they can be accessed by EFI runtime services, but + * should have no other significance to the OS (UEFI r2.10, sec 7.2). + * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO + * regions to E820_TYPE_RESERVED entries, which prevent Linux from + * allocating space from them (see remove_e820_regions()). + * + * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and + * PCI host bridge windows, which means Linux can't allocate BAR space for + * hot-added devices. + * + * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this + * problem. + * + * Retain small EfiMemoryMappedIO regions because on some platforms, these + * describe non-window space that's included in host bridge _CRS. If we + * assign that space to PCI devices, they don't work. + */ +static void __init efi_remove_e820_mmio(void) +{ + efi_memory_desc_t *md; + u64 size, start, end; + int i = 0; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + start = md->phys_addr; + end = start + size - 1; + if (size >= 256*1024) { + pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n", + i, start, end, size >> 20); + e820__range_remove(start, size, + E820_TYPE_RESERVED, 1); + } else { + pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n", + i, start, end, size >> 10); + } + } + i++; + } +} + +void __init efi_print_memmap(void) +{ + efi_memory_desc_t *md; + int i = 0; + + for_each_efi_memory_desc(md) { + char buf[64]; + + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", + i++, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + } +} + +static int __init efi_systab_init(unsigned long phys) +{ + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + const efi_table_hdr_t *hdr; + bool over4g = false; + void *p; + int ret; + + hdr = p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + + ret = efi_systab_check_header(hdr); + if (ret) { + early_memunmap(p, size); + return ret; + } + + if (efi_enabled(EFI_64BIT)) { + const efi_system_table_64_t *systab64 = p; + + efi_runtime = systab64->runtime; + over4g = systab64->runtime > U32_MAX; + + if (efi_setup) { + struct efi_setup_data *data; + + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); + return -ENOMEM; + } + + efi_fw_vendor = (unsigned long)data->fw_vendor; + efi_config_table = (unsigned long)data->tables; + + over4g |= data->fw_vendor > U32_MAX || + data->tables > U32_MAX; + + early_memunmap(data, sizeof(*data)); + } else { + efi_fw_vendor = systab64->fw_vendor; + efi_config_table = systab64->tables; + + over4g |= systab64->fw_vendor > U32_MAX || + systab64->tables > U32_MAX; + } + efi_nr_tables = systab64->nr_tables; + } else { + const efi_system_table_32_t *systab32 = p; + + efi_fw_vendor = systab32->fw_vendor; + efi_runtime = systab32->runtime; + efi_config_table = systab32->tables; + efi_nr_tables = systab32->nr_tables; + } + + efi.runtime_version = hdr->revision; + + efi_systab_report_header(hdr, efi_fw_vendor); + early_memunmap(p, size); + + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; + } + + return 0; +} + +static int __init efi_config_init(const efi_config_table_type_t *arch_tables) +{ + void *config_tables; + int sz, ret; + + if (efi_nr_tables == 0) + return 0; + + if (efi_enabled(EFI_64BIT)) + sz = sizeof(efi_config_table_64_t); + else + sz = sizeof(efi_config_table_32_t); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = early_memremap(efi_config_table, efi_nr_tables * sz); + if (config_tables == NULL) { + pr_err("Could not map Configuration table!\n"); + return -ENOMEM; + } + + ret = efi_config_parse_tables(config_tables, efi_nr_tables, + arch_tables); + + early_memunmap(config_tables, efi_nr_tables * sz); + return ret; +} + +void __init efi_init(void) +{ + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { + pr_info("Table located above 4GB, disabling EFI.\n"); + return; + } + + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); + + if (efi_systab_init(efi_systab_phys)) + return; + + if (efi_reuse_config(efi_config_table, efi_nr_tables)) + return; + + if (efi_config_init(arch_tables)) + return; + + /* + * Note: We currently don't support runtime services on an EFI + * that doesn't match the kernel 32/64-bit mode. + */ + + if (!efi_runtime_supported()) + pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n"); + + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); + return; + } + + /* Parse the EFI Properties table if it exists */ + if (prop_phys != EFI_INVALID_TABLE_ADDR) { + efi_properties_table_t *tbl; + + tbl = early_memremap_ro(prop_phys, sizeof(*tbl)); + if (tbl == NULL) { + pr_err("Could not map Properties table!\n"); + } else { + if (tbl->memory_protection_attribute & + EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA) + set_bit(EFI_NX_PE_DATA, &efi.flags); + + early_memunmap(tbl, sizeof(*tbl)); + } + } + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_clean_memmap(); + + efi_remove_e820_mmio(); + + if (efi_enabled(EFI_DBG)) + efi_print_memmap(); +} + +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) +{ + efi_memory_desc_t *md, *prev_md = NULL; + + for_each_efi_memory_desc(md) { + u64 prev_size; + + if (!prev_md) { + prev_md = md; + continue; + } + + if (prev_md->type != md->type || + prev_md->attribute != md->attribute) { + prev_md = md; + continue; + } + + prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; + + if (md->phys_addr == (prev_md->phys_addr + prev_size)) { + prev_md->num_pages += md->num_pages; + md->type = EFI_RESERVED_TYPE; + md->attribute = 0; + continue; + } + prev_md = md; + } +} + +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; + + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return efi.memmap.map_end - efi.memmap.desc_size; + + entry -= efi.memmap.desc_size; + if (entry < efi.memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return efi.memmap.map; + + entry += efi.memmap.desc_size; + if (entry >= efi.memmap.map_end) + return NULL; + + return entry; +} + +static bool should_map_region(efi_memory_desc_t *md) +{ + /* + * Runtime regions always require runtime mappings (obviously). + */ + if (md->attribute & EFI_MEMORY_RUNTIME) + return true; + + /* + * 32-bit EFI doesn't suffer from the bug that requires us to + * reserve boot services regions, and mixed mode support + * doesn't exist for 32-bit kernels. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + + /* + * EFI specific purpose memory may be reserved by default + * depending on kernel config and boot options. + */ + if (md->type == EFI_CONVENTIONAL_MEMORY && + efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return false; + + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + if (efi_is_mixed()) { + if (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_LOADER_DATA || + md->type == EFI_LOADER_CODE) + return true; + } + + /* + * Map boot services regions as a workaround for buggy + * firmware that accesses them even when they shouldn't. + * + * See efi_{reserve,free}_boot_services(). + */ + if (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) + return true; + + return false; +} + +/* + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. + */ +static void * __init efi_map_regions(int *count, int *pg_shift) +{ + void *p, *new_memmap = NULL; + unsigned long left = 0; + unsigned long desc_size; + efi_memory_desc_t *md; + + desc_size = efi.memmap.desc_size; + + p = NULL; + while ((p = efi_map_next_entry(p))) { + md = p; + + if (!should_map_region(md)) + continue; + + efi_map_region(md); + + if (left < desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; + } + + memcpy(new_memmap + (*count * desc_size), md, desc_size); + + left -= desc_size; + (*count)++; + } + + return new_memmap; +} + +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC_CORE + efi_memory_desc_t *md; + unsigned int num_pages; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI. + */ + if (efi_is_mixed()) { + efi_memmap_unmap(); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for_each_efi_memory_desc(md) + efi_map_region_fixed(md); /* FIXME: add error handling */ + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map. + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + efi_sync_low_kernel_mappings(); + efi_native_runtime_setup(); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * efi_pgd page table. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). + */ +static void __init __efi_enter_virtual_mode(void) +{ + int count = 0, pg_shift = 0; + void *new_memmap = NULL; + efi_status_t status; + unsigned long pa; + + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + goto err; + } + + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + goto err; + } + + pa = __pa(new_memmap); + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map that we are about to pass to the + * firmware via SetVirtualAddressMap(). + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { + pr_err("Failed to remap late EFI memory map\n"); + goto err; + } + + if (efi_enabled(EFI_DBG)) { + pr_info("EFI runtime memory map:\n"); + efi_print_memmap(); + } + + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; + + efi_sync_low_kernel_mappings(); + + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa, + efi_systab_phys); + if (status != EFI_SUCCESS) { + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; + } + + efi_check_for_embedded_firmwares(); + efi_free_boot_services(); + + if (!efi_is_mixed()) + efi_native_runtime_setup(); + else + efi_thunk_runtime_setup(); + + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + + /* clean DUMMY object */ + efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); +} + +void __init efi_enter_virtual_mode(void) +{ + if (efi_enabled(EFI_PARAVIRT)) + return; + + efi.runtime = (efi_runtime_services_t *)efi_runtime; + + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); + + efi_dump_pagetable(); +} + +bool efi_is_table_address(unsigned long phys_addr) +{ + unsigned int i; + + if (phys_addr == EFI_INVALID_TABLE_ADDR) + return false; + + for (i = 0; i < ARRAY_SIZE(efi_tables); i++) + if (*(efi_tables[i]) == phys_addr) + return true; + + return false; +} + +char *efi_systab_show_arch(char *str) +{ + if (uga_phys != EFI_INVALID_TABLE_ADDR) + str += sprintf(str, "UGA=0x%lx\n", uga_phys); + return str; +} + +#define EFI_FIELD(var) efi_ ## var + +#define EFI_ATTR_SHOW(name) \ +static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \ +} + +EFI_ATTR_SHOW(fw_vendor); +EFI_ATTR_SHOW(runtime); +EFI_ATTR_SHOW(config_table); + +struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor); +struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime); +struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table); + +umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + if (attr == &efi_attr_fw_vendor.attr) { + if (efi_enabled(EFI_PARAVIRT) || + efi_fw_vendor == EFI_INVALID_TABLE_ADDR) + return 0; + } else if (attr == &efi_attr_runtime.attr) { + if (efi_runtime == EFI_INVALID_TABLE_ADDR) + return 0; + } else if (attr == &efi_attr_config_table.attr) { + if (efi_config_table == EFI_INVALID_TABLE_ADDR) + return 0; + } + return attr->mode; +} diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c new file mode 100644 index 0000000000..b2cc7b4552 --- /dev/null +++ b/arch/x86/platform/efi/efi_32.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/ioport.h> +#include <linux/efi.h> +#include <linux/pgtable.h> + +#include <asm/io.h> +#include <asm/desc.h> +#include <asm/page.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> +#include <asm/efi.h> + +void __init efi_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc((unsigned long)va, md->num_pages); + } else { + va = ioremap_cache(md->phys_addr, size); + } + + md->virt_addr = (unsigned long)va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", md->phys_addr); +} + +/* + * To make EFI call EFI runtime service in physical addressing mode we need + * prolog/epilog before/after the invocation to claim the EFI runtime service + * handler exclusively and to duplicate a memory mapping in low memory space, + * say 0 - 3G. + */ + +int __init efi_alloc_page_tables(void) +{ + return 0; +} + +void efi_sync_low_kernel_mappings(void) {} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, &init_mm); +#endif +} + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} + +void __init efi_map_region_fixed(efi_memory_desc_t *md) {} +void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} + +efi_status_t efi_call_svam(efi_runtime_services_t * const *, + u32, u32, u32, void *, u32); + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys) +{ + const efi_system_table_t *systab = (efi_system_table_t *)systab_phys; + struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd; + + /* Current pgd is swapper_pg_dir, we'll restore it later: */ + save_pgd = swapper_pg_dir; + load_cr3(initial_page_table); + __flush_tlb_all(); + + gdt_descr.address = get_cpu_gdt_paddr(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_svam(&systab->runtime, + memory_map_size, descriptor_size, + descriptor_version, virtual_map, + __pa(&efi.runtime)); + local_irq_restore(flags); + + load_fixmap_gdt(0); + load_cr3(save_pgd); + __flush_tlb_all(); + + return status; +} + +void __init efi_runtime_update_mappings(void) +{ + if (__supported_pte_mask & _PAGE_NX) { + efi_memory_desc_t *md; + + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + set_memory_x(md->virt_addr, md->num_pages); + } + } +} + +void arch_efi_call_virt_setup(void) +{ + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); +} + +void arch_efi_call_virt_teardown(void) +{ + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c new file mode 100644 index 0000000000..91d31ac422 --- /dev/null +++ b/arch/x86/platform/efi/efi_64.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86_64 specific EFI support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * + * Code to convert EFI to E820 map has been implemented in elilo bootloader + * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table + * is setup appropriately for EFI runtime code. + * - mouli 06/14/2007. + * + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/memblock.h> +#include <linux/ioport.h> +#include <linux/mc146818rtc.h> +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/ucs2_string.h> +#include <linux/cc_platform.h> +#include <linux/sched/task.h> + +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/e820/api.h> +#include <asm/tlbflush.h> +#include <asm/proto.h> +#include <asm/efi.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> +#include <asm/realmode.h> +#include <asm/time.h> +#include <asm/pgalloc.h> +#include <asm/sev.h> + +/* + * We allocate runtime services regions top-down, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va = EFI_VA_START; +static struct mm_struct *efi_prev_mm; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd, *efi_pgd; + p4d_t *p4d; + pud_t *pud; + gfp_t gfp_mask; + + gfp_mask = GFP_KERNEL | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + if (!efi_pgd) + goto fail; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); + if (!p4d) + goto free_pgd; + + pud = pud_alloc(&init_mm, p4d, EFI_VA_END); + if (!pud) + goto free_p4d; + + efi_mm.pgd = efi_pgd; + mm_init_cpumask(&efi_mm); + init_new_context(NULL, &efi_mm); + + return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; +} + +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + p4d_t *p4d_k, *p4d_efi; + pud_t *pud_k, *pud_efi; + pgd_t *efi_pgd = efi_mm.pgd; + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pgd_k = pgd_offset_k(EFI_VA_END); + p4d_efi = p4d_offset(pgd_efi, 0); + p4d_k = p4d_offset(pgd_k, 0); + + num_entries = p4d_index(EFI_VA_END); + memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + p4d_efi = p4d_offset(pgd_efi, EFI_VA_END); + p4d_k = p4d_offset(pgd_k, EFI_VA_END); + pud_efi = pud_offset(p4d_efi, 0); + pud_k = pud_offset(p4d_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(p4d_efi, EFI_VA_START); + pud_k = pud_offset(p4d_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); +} + +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + phys_addr_t pa; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + pa = slow_virt_to_phys(va); + + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; + + return pa; +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + extern const u8 __efi64_thunk_ret_tramp[]; + unsigned long pfn, text, pf, rodata, tramp; + struct page *page; + unsigned npages; + pgd_t *pgd = efi_mm.pgd; + + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + pfn = pa_memmap >> PAGE_SHIFT; + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + /* + * Certain firmware versions are way too sentimental and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + + /* + * When SEV-ES is active, the GHCB as set by the kernel will be used + * by firmware. Create a 1:1 unencrypted mapping for each GHCB. + */ + if (sev_es_efi_map_ghcbs(pgd)) { + pr_err("Failed to create 1:1 mapping for the GHCBs!\n"); + return 1; + } + + /* + * When making calls to the firmware everything needs to be 1:1 + * mapped and addressable with 32-bit pointers. Map the kernel + * text and allocate a new stack because we can't rely on the + * stack pointer being < 4GB. + */ + if (!efi_is_mixed()) + return 0; + + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } + + efi_mixed_mode_stack_pa = page_to_phys(page + 1); /* stack grows down */ + + npages = (_etext - _text) >> PAGE_SHIFT; + text = __pa(_text); + + if (kernel_unmap_pages_in_pgd(pgd, text, npages)) { + pr_err("Failed to unmap kernel text 1:1 mapping\n"); + return 1; + } + + npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; + rodata = __pa(__start_rodata); + pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { + pr_err("Failed to map kernel rodata 1:1\n"); + return 1; + } + + tramp = __pa(__efi64_thunk_ret_tramp); + pfn = tramp >> PAGE_SHIFT; + + pf = _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) { + pr_err("Failed to map mixed mode return trampoline\n"); + return 1; + } + + return 0; +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) +{ + unsigned long flags = _PAGE_RW; + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_WB)) + flags |= _PAGE_PCD; + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && + md->type != EFI_MEMORY_MAPPED_IO) + flags |= _PAGE_ENC; + + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + unsigned long size = md->num_pages << PAGE_SHIFT; + u64 pa = md->phys_addr; + + /* + * Make sure the 1:1 mappings are present as a catch-all for b0rked + * firmware which doesn't update all internal pointers after switching + * to virtual mode and would otherwise crap on us. + */ + __map_region(md, md->phys_addr); + + /* + * Enforce the 1:1 mapping as the default virtual address when + * booting in EFI mixed mode, because even though we may be + * running a 64-bit kernel, the firmware may only be 32-bit. + */ + if (efi_is_mixed()) { + md->virt_addr = md->phys_addr; + return; + } + + efi_va -= size; + + /* Is PA 2M-aligned? */ + if (!(pa & (PMD_SIZE - 1))) { + efi_va &= PMD_MASK; + } else { + u64 pa_offset = pa & (PMD_SIZE - 1); + u64 prev_va = efi_va; + + /* get us the same offset within this 2M page */ + efi_va = (efi_va & PMD_MASK) + pa_offset; + + if (efi_va > prev_va) + efi_va -= PMD_SIZE; + } + + if (efi_va < EFI_VA_END) { + pr_warn(FW_WARN "VA address range overflow!\n"); + return; + } + + /* Do the VA map */ + __map_region(md, efi_va); + md->virt_addr = efi_va; +} + +/* + * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. + * md->virt_addr is the original virtual address which had been mapped in kexec + * 1st kernel. + */ +void __init efi_map_region_fixed(efi_memory_desc_t *md) +{ + __map_region(md, md->phys_addr); + __map_region(md, md->virt_addr); +} + +void __init parse_efi_setup(u64 phys_addr, u32 data_len) +{ + efi_setup = phys_addr + sizeof(struct setup_data); +} + +static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) +{ + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + int err1, err2; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf); + if (err1) { + pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf); + if (err2) { + pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + return err1 || err2; +} + +bool efi_disable_ibt_for_runtime __ro_after_init = true; + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md, + bool has_ibt) +{ + unsigned long pf = 0; + + efi_disable_ibt_for_runtime |= !has_ibt; + + if (md->attribute & EFI_MEMORY_XP) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO)) + pf |= _PAGE_RW; + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + pf |= _PAGE_ENC; + + return efi_update_mappings(md, pf); +} + +void __init efi_runtime_update_mappings(void) +{ + efi_memory_desc_t *md; + + /* + * Use the EFI Memory Attribute Table for mapping permissions if it + * exists, since it is intended to supersede EFI_PROPERTIES_TABLE. + */ + if (efi_enabled(EFI_MEM_ATTR)) { + efi_disable_ibt_for_runtime = false; + efi_memattr_apply_permissions(NULL, efi_update_mem_attr); + return; + } + + /* + * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace + * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update + * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not + * published by the firmware. Even if we find a buggy implementation of + * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to + * EFI_PROPERTIES_TABLE, because of the same reason. + */ + + if (!efi_enabled(EFI_NX_PE_DATA)) + return; + + for_each_efi_memory_desc(md) { + unsigned long pf = 0; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if ((md->attribute & EFI_MEMORY_XP) || + (md->type == EFI_RUNTIME_SERVICES_DATA)) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO) && + (md->type != EFI_RUNTIME_SERVICES_CODE)) + pf |= _PAGE_RW; + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + pf |= _PAGE_ENC; + + efi_update_mappings(md, pf); + } +} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, &efi_mm); +#endif +} + +/* + * Makes the calling thread switch to/from efi_mm context. Can be used + * in a kernel thread and user context. Preemption needs to remain disabled + * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm + * can not change under us. + * It should be ensured that there are no concurrent calls to this function. + */ +static void efi_enter_mm(void) +{ + efi_prev_mm = current->active_mm; + current->active_mm = &efi_mm; + switch_mm(efi_prev_mm, &efi_mm, NULL); +} + +static void efi_leave_mm(void) +{ + current->active_mm = efi_prev_mm; + switch_mm(&efi_mm, efi_prev_mm, NULL); +} + +void arch_efi_call_virt_setup(void) +{ + efi_sync_low_kernel_mappings(); + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); + efi_enter_mm(); +} + +void arch_efi_call_virt_teardown(void) +{ + efi_leave_mm(); + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} + +static DEFINE_SPINLOCK(efi_runtime_lock); + +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(efi.runtime->mixed_mode.func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(func...) \ +({ \ + efi_status_t __s; \ + \ + arch_efi_call_virt_setup(); \ + \ + __s = __efi_thunk(func); \ + \ + arch_efi_call_virt_teardown(); \ + \ + __s; \ +}) + +static efi_status_t __init __no_sanitize_address +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + + efi_sync_low_kernel_mappings(); + local_irq_save(flags); + + efi_enter_mm(); + + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); + + efi_leave_mm(); + local_irq_restore(flags); + + return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name, phys_vendor, phys_attr; + u32 phys_data_size, phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name_size, phys_name, phys_vendor; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, *name_size); + + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + *vendor = *vnd; + return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ + return EFI_UNSUPPORTED; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + u32 phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_data = virt_to_phys_or_null_size(data, data_size); + + efi_thunk(reset_system, reset_type, status, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +void __init efi_thunk_runtime_setup(void) +{ + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + + efi.get_time = efi_thunk_get_time; + efi.set_time = efi_thunk_set_time; + efi.get_wakeup_time = efi_thunk_get_wakeup_time; + efi.set_wakeup_time = efi_thunk_set_wakeup_time; + efi.get_variable = efi_thunk_get_variable; + efi.get_next_variable = efi_thunk_get_next_variable; + efi.set_variable = efi_thunk_set_variable; + efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking; + efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; + efi.reset_system = efi_thunk_reset_system; + efi.query_variable_info = efi_thunk_query_variable_info; + efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking; + efi.update_capsule = efi_thunk_update_capsule; + efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} + +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys) +{ + const efi_system_table_t *systab = (efi_system_table_t *)systab_phys; + efi_status_t status; + unsigned long flags; + + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + efi_enter_mm(); + + efi_fpu_begin(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = arch_efi_call_virt(efi.runtime, set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + efi_fpu_end(); + + /* grab the virtually remapped EFI runtime services table pointer */ + efi.runtime = READ_ONCE(systab->runtime); + + efi_leave_mm(); + + return status; +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S new file mode 100644 index 0000000000..f3cfdb1c9a --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/page_types.h> + + __INIT +SYM_FUNC_START(efi_call_svam) + push %ebp + movl %esp, %ebp + push %ebx + + push 16(%esp) + push 16(%esp) + push %ecx + push %edx + movl %eax, %ebx // &systab_phys->runtime + + /* + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. + */ + movl $1f, %edx + subl $__PAGE_OFFSET, %edx + jmp *%edx +1: + + /* disable paging */ + movl %cr0, %edx + andl $0x7fffffff, %edx + movl %edx, %cr0 + + /* convert the stack pointer to a flat mapped address */ + subl $__PAGE_OFFSET, %esp + + /* call the EFI routine */ + movl (%eax), %eax + call *EFI_svam(%eax) + + /* grab the virtually remapped EFI runtime services table pointer */ + movl (%ebx), %ecx + movl 36(%esp), %edx // &efi.runtime + movl %ecx, (%edx) + + /* re-enable paging */ + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + + movl 16(%esp), %ebx + leave + RET +SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S new file mode 100644 index 0000000000..2206b8bc47 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Function calling ABI conversion from Linux to EFI for x86_64 + * + * Copyright (C) 2007 Intel Corp + * Bibo Mao <bibo.mao@intel.com> + * Huang Ying <ying.huang@intel.com> + */ + +#include <linux/linkage.h> +#include <asm/nospec-branch.h> + +SYM_FUNC_START(__efi_call) + pushq %rbp + movq %rsp, %rbp + and $~0xf, %rsp + mov 16(%rbp), %rax + subq $48, %rsp + mov %r9, 32(%rsp) + mov %rax, 40(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + CALL_NOSPEC rdi + leave + RET +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 0000000000..c4b1144f99 --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + * + * Support for invoking 32-bit EFI runtime services from a 64-bit + * kernel. + * + * The below thunking functions are only used after ExitBootServices() + * has been called. This simplifies things considerably as compared with + * the early EFI thunking because we can leave all the kernel state + * intact (GDT, IDT, etc) and simply invoke the 32-bit EFI runtime + * services from __KERNEL32_CS. This means we can continue to service + * interrupts across an EFI mixed mode call. + * + * We do however, need to handle the fact that we're running in a full + * 64-bit virtual address space. Things like the stack and instruction + * addresses need to be accessible by the 32-bit firmware, so we rely on + * using the identity mappings in the EFI page table to access the stack + * and kernel text (see efi_setup_page_tables()). + */ + +#include <linux/linkage.h> +#include <linux/objtool.h> +#include <asm/page_types.h> +#include <asm/segment.h> + + .text + .code64 +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk + push %rbp + push %rbx + + /* + * Switch to 1:1 mapped 32-bit stack pointer. + */ + movq %rsp, %rax + movq efi_mixed_mode_stack_pa(%rip), %rsp + push %rax + + /* + * Copy args passed via the stack + */ + subq $0x24, %rsp + movq 0x18(%rax), %rbp + movq 0x20(%rax), %rbx + movq 0x28(%rax), %rax + movl %ebp, 0x18(%rsp) + movl %ebx, 0x1c(%rsp) + movl %eax, 0x20(%rsp) + + /* + * Calculate the physical address of the kernel text. + */ + movq $__START_KERNEL_map, %rax + subq phys_base(%rip), %rax + + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp + subq %rax, %rbx + + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) + + /* Switch to 32-bit descriptor */ + pushq $__KERNEL32_CS + pushq %rdi /* EFI runtime service address */ + lretq + + // This return instruction is not needed for correctness, as it will + // never be reached. It only exists to make objtool happy, which will + // otherwise complain about unreachable instructions in the callers. + RET +SYM_FUNC_END(__efi64_thunk) + + .section ".rodata", "a", @progbits + .balign 16 +SYM_DATA_START(__efi64_thunk_ret_tramp) +1: movq 0x20(%rsp), %rsp + pop %rbx + pop %rbp + ret + int3 + + .code32 +2: pushl $__KERNEL_CS + pushl %ebp + lret +SYM_DATA_END(__efi64_thunk_ret_tramp) + + .bss + .balign 8 +SYM_DATA(efi_mixed_mode_stack_pa, .quad 0) diff --git a/arch/x86/platform/efi/fake_mem.c b/arch/x86/platform/efi/fake_mem.c new file mode 100644 index 0000000000..41d57cad3d --- /dev/null +++ b/arch/x86/platform/efi/fake_mem.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fake_mem.c + * + * Copyright (C) 2015 FUJITSU LIMITED + * Author: Taku Izumi <izumi.taku@jp.fujitsu.com> + * + * This code introduces new boot option named "efi_fake_mem" + * By specifying this parameter, you can add arbitrary attribute to + * specific memory range by updating original (firmware provided) EFI + * memmap. + */ + +#include <linux/kernel.h> +#include <linux/efi.h> +#include <linux/init.h> +#include <linux/memblock.h> +#include <linux/types.h> +#include <linux/sort.h> +#include <asm/e820/api.h> +#include <asm/efi.h> + +#define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM + +static struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM]; +static int nr_fake_mem; + +static int __init cmp_fake_mem(const void *x1, const void *x2) +{ + const struct efi_mem_range *m1 = x1; + const struct efi_mem_range *m2 = x2; + + if (m1->range.start < m2->range.start) + return -1; + if (m1->range.start > m2->range.start) + return 1; + return 0; +} + +static void __init efi_fake_range(struct efi_mem_range *efi_range) +{ + struct efi_memory_map_data data = { 0 }; + int new_nr_map = efi.memmap.nr_map; + efi_memory_desc_t *md; + void *new_memmap; + + /* count up the number of EFI memory descriptor */ + for_each_efi_memory_desc(md) + new_nr_map += efi_memmap_split_count(md, &efi_range->range); + + /* allocate memory for new EFI memmap */ + if (efi_memmap_alloc(new_nr_map, &data) != 0) + return; + + /* create new EFI memmap */ + new_memmap = early_memremap(data.phys_map, data.size); + if (!new_memmap) { + __efi_memmap_free(data.phys_map, data.size, data.flags); + return; + } + + efi_memmap_insert(&efi.memmap, new_memmap, efi_range); + + /* swap into new EFI memmap */ + early_memunmap(new_memmap, data.size); + + efi_memmap_install(&data); +} + +void __init efi_fake_memmap(void) +{ + int i; + + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) + return; + + for (i = 0; i < nr_fake_mem; i++) + efi_fake_range(&efi_fake_mems[i]); + + /* print new EFI memmap */ + efi_print_memmap(); +} + +static int __init setup_fake_mem(char *p) +{ + u64 start = 0, mem_size = 0, attribute = 0; + int i; + + if (!p) + return -EINVAL; + + while (*p != '\0') { + mem_size = memparse(p, &p); + if (*p == '@') + start = memparse(p+1, &p); + else + break; + + if (*p == ':') + attribute = simple_strtoull(p+1, &p, 0); + else + break; + + if (nr_fake_mem >= EFI_MAX_FAKEMEM) + break; + + efi_fake_mems[nr_fake_mem].range.start = start; + efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1; + efi_fake_mems[nr_fake_mem].attribute = attribute; + nr_fake_mem++; + + if (*p == ',') + p++; + } + + sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), + cmp_fake_mem, NULL); + + for (i = 0; i < nr_fake_mem; i++) + pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]", + efi_fake_mems[i].attribute, efi_fake_mems[i].range.start, + efi_fake_mems[i].range.end); + + return *p == '\0' ? 0 : -EINVAL; +} + +early_param("efi_fake_mem", setup_fake_mem); + +void __init efi_fake_memmap_early(void) +{ + int i; + + /* + * The late efi_fake_mem() call can handle all requests if + * EFI_MEMORY_SP support is disabled. + */ + if (!efi_soft_reserve_enabled()) + return; + + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) + return; + + /* + * Given that efi_fake_memmap() needs to perform memblock + * allocations it needs to run after e820__memblock_setup(). + * However, if efi_fake_mem specifies EFI_MEMORY_SP for a given + * address range that potentially needs to mark the memory as + * reserved prior to e820__memblock_setup(). Update e820 + * directly if EFI_MEMORY_SP is specified for an + * EFI_CONVENTIONAL_MEMORY descriptor. + */ + for (i = 0; i < nr_fake_mem; i++) { + struct efi_mem_range *mem = &efi_fake_mems[i]; + efi_memory_desc_t *md; + u64 m_start, m_end; + + if ((mem->attribute & EFI_MEMORY_SP) == 0) + continue; + + m_start = mem->range.start; + m_end = mem->range.end; + for_each_efi_memory_desc(md) { + u64 start, end, size; + + if (md->type != EFI_CONVENTIONAL_MEMORY) + continue; + + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= end && m_end >= start) + /* fake range overlaps descriptor */; + else + continue; + + /* + * Trim the boundary of the e820 update to the + * descriptor in case the fake range overlaps + * !EFI_CONVENTIONAL_MEMORY + */ + start = max(start, m_start); + end = min(end, m_end); + size = end - start + 1; + + if (end <= start) + continue; + + /* + * Ensure each efi_fake_mem instance results in + * a unique e820 resource + */ + e820__range_remove(start, size, E820_TYPE_RAM, 1); + e820__range_add(start, size, E820_TYPE_SOFT_RESERVED); + e820__update_table(e820_table); + } + } +} diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c new file mode 100644 index 0000000000..4ef20b49eb --- /dev/null +++ b/arch/x86/platform/efi/memmap.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common EFI memory map functions. + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/efi.h> +#include <linux/io.h> +#include <asm/early_ioremap.h> +#include <asm/efi.h> +#include <linux/memblock.h> +#include <linux/slab.h> + +static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) +{ + return memblock_phys_alloc(size, SMP_CACHE_BYTES); +} + +static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) +{ + unsigned int order = get_order(size); + struct page *p = alloc_pages(GFP_KERNEL, order); + + if (!p) + return 0; + + return PFN_PHYS(page_to_pfn(p)); +} + +void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) +{ + if (flags & EFI_MEMMAP_MEMBLOCK) { + if (slab_is_available()) + memblock_free_late(phys, size); + else + memblock_phys_free(phys, size); + } else if (flags & EFI_MEMMAP_SLAB) { + struct page *p = pfn_to_page(PHYS_PFN(phys)); + unsigned int order = get_order(size); + + free_pages((unsigned long) page_address(p), order); + } +} + +/** + * efi_memmap_alloc - Allocate memory for the EFI memory map + * @num_entries: Number of entries in the allocated map. + * @data: efi memmap installation parameters + * + * Depending on whether mm_init() has already been invoked or not, + * either memblock or "normal" page allocation is used. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data) +{ + /* Expect allocation parameters are zero initialized */ + WARN_ON(data->phys_map || data->size); + + data->size = num_entries * efi.memmap.desc_size; + data->desc_version = efi.memmap.desc_version; + data->desc_size = efi.memmap.desc_size; + data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK); + data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE; + + if (slab_is_available()) { + data->flags |= EFI_MEMMAP_SLAB; + data->phys_map = __efi_memmap_alloc_late(data->size); + } else { + data->flags |= EFI_MEMMAP_MEMBLOCK; + data->phys_map = __efi_memmap_alloc_early(data->size); + } + + if (!data->phys_map) + return -ENOMEM; + return 0; +} + +/** + * efi_memmap_install - Install a new EFI memory map in efi.memmap + * @data: efi memmap installation parameters + * + * Unlike efi_memmap_init_*(), this function does not allow the caller + * to switch from early to late mappings. It simply uses the existing + * mapping function and installs the new memmap. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_install(struct efi_memory_map_data *data) +{ + efi_memmap_unmap(); + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + return __efi_memmap_init(data); +} + +/** + * efi_memmap_split_count - Count number of additional EFI memmap entries + * @md: EFI memory descriptor to split + * @range: Address range (start, end) to split around + * + * Returns the number of additional EFI memmap entries required to + * accommodate @range. + */ +int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range) +{ + u64 m_start, m_end; + u64 start, end; + int count = 0; + + start = md->phys_addr; + end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + /* modifying range */ + m_start = range->start; + m_end = range->end; + + if (m_start <= start) { + /* split into 2 parts */ + if (start < m_end && m_end < end) + count++; + } + + if (start < m_start && m_start < end) { + /* split into 3 parts */ + if (m_end < end) + count += 2; + /* split into 2 parts */ + if (end <= m_end) + count++; + } + + return count; +} + +/** + * efi_memmap_insert - Insert a memory region in an EFI memmap + * @old_memmap: The existing EFI memory map structure + * @buf: Address of buffer to store new map + * @mem: Memory map entry to insert + * + * It is suggested that you call efi_memmap_split_count() first + * to see how large @buf needs to be. + */ +void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf, + struct efi_mem_range *mem) +{ + u64 m_start, m_end, m_attr; + efi_memory_desc_t *md; + u64 start, end; + void *old, *new; + + /* modifying range */ + m_start = mem->range.start; + m_end = mem->range.end; + m_attr = mem->attribute; + + /* + * The EFI memory map deals with regions in EFI_PAGE_SIZE + * units. Ensure that the region described by 'mem' is aligned + * correctly. + */ + if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) || + !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) { + WARN_ON(1); + return; + } + + for (old = old_memmap->map, new = buf; + old < old_memmap->map_end; + old += old_memmap->desc_size, new += old_memmap->desc_size) { + + /* copy original EFI memory descriptor */ + memcpy(new, old, old_memmap->desc_size); + md = new; + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= start && end <= m_end) + md->attribute |= m_attr; + + if (m_start <= start && + (start < m_end && m_end < end)) { + /* first part */ + md->attribute |= m_attr; + md->num_pages = (m_end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && m_end < end) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* middle part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->attribute |= m_attr; + md->phys_addr = m_start; + md->num_pages = (m_end - m_start + 1) >> + EFI_PAGE_SHIFT; + /* last part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - m_end) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && + (end <= m_end)) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_start; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + md->attribute |= m_attr; + } + } +} diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c new file mode 100644 index 0000000000..f0cc000327 --- /dev/null +++ b/arch/x86/platform/efi/quirks.c @@ -0,0 +1,781 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/time.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/acpi.h> +#include <linux/dmi.h> + +#include <asm/e820/api.h> +#include <asm/efi.h> +#include <asm/uv/uv.h> +#include <asm/cpu_device_id.h> +#include <asm/realmode.h> +#include <asm/reboot.h> + +#define EFI_MIN_RESERVE 5120 + +#define EFI_DUMMY_GUID \ + EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) + +#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */ +#define QUARK_SECURITY_HEADER_SIZE 0x400 + +/* + * Header prepended to the standard EFI capsule on Quark systems the are based + * on Intel firmware BSP. + * @csh_signature: Unique identifier to sanity check signed module + * presence ("_CSH"). + * @version: Current version of CSH used. Should be one for Quark A0. + * @modulesize: Size of the entire module including the module header + * and payload. + * @security_version_number_index: Index of SVN to use for validation of signed + * module. + * @security_version_number: Used to prevent against roll back of modules. + * @rsvd_module_id: Currently unused for Clanton (Quark). + * @rsvd_module_vendor: Vendor Identifier. For Intel products value is + * 0x00008086. + * @rsvd_date: BCD representation of build date as yyyymmdd, where + * yyyy=4 digit year, mm=1-12, dd=1-31. + * @headersize: Total length of the header including including any + * padding optionally added by the signing tool. + * @hash_algo: What Hash is used in the module signing. + * @cryp_algo: What Crypto is used in the module signing. + * @keysize: Total length of the key data including including any + * padding optionally added by the signing tool. + * @signaturesize: Total length of the signature including including any + * padding optionally added by the signing tool. + * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the + * chain, if there is a next header. + * @rsvd: Reserved, padding structure to required size. + * + * See also QuartSecurityHeader_t in + * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h + * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP + */ +struct quark_security_header { + u32 csh_signature; + u32 version; + u32 modulesize; + u32 security_version_number_index; + u32 security_version_number; + u32 rsvd_module_id; + u32 rsvd_module_vendor; + u32 rsvd_date; + u32 headersize; + u32 hash_algo; + u32 cryp_algo; + u32 keysize; + u32 signaturesize; + u32 rsvd_next_header; + u32 rsvd[2]; +}; + +static const efi_char16_t efi_dummy_name[] = L"DUMMY"; + +static bool efi_no_storage_paranoia; + +/* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. The implementation of garbage collection + * in some FW versions causes stale (deleted) variables to take up space + * longer than intended and space is only freed once the store becomes + * almost completely full. + * + * Enabling this option disables the space checks in + * efi_query_variable_store() and forces garbage collection. + * + * Only enable this option if deleting EFI variables does not free up + * space in your variable store, e.g. if despite deleting variables + * you're unable to create new ones. + */ +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + +/* + * Deleting the dummy variable which kicks off garbage collection +*/ +void efi_delete_dummy_variable(void) +{ + efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL); +} + +u64 efivar_reserved_space(void) +{ + if (efi_no_storage_paranoia) + return 0; + return EFI_MIN_RESERVE; +} +EXPORT_SYMBOL_GPL(efivar_reserved_space); + +/* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) + return 0; + + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + /* + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. + */ + if ((remaining_size - size < EFI_MIN_RESERVE) && + !efi_no_storage_paranoia) { + + /* + * Triggering garbage collection may require that the firmware + * generate a real EFI_OUT_OF_RESOURCES error. We can force + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; + void *dummy = kzalloc(dummy_size, GFP_KERNEL); + + if (!dummy) + return EFI_OUT_OF_RESOURCES; + + status = efi.set_variable((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + dummy_size, dummy); + + if (status == EFI_SUCCESS) { + /* + * This should have failed, so if it didn't make sure + * that we delete it... + */ + efi_delete_dummy_variable(); + } + + kfree(dummy); + + /* + * The runtime code may now have triggered a garbage collection + * run, so check the variable info again + */ + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + + if (status != EFI_SUCCESS) + return status; + + /* + * There still isn't enough room, so return an error + */ + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + } + + return EFI_SUCCESS; +} +EXPORT_SYMBOL_GPL(efi_query_variable_store); + +/* + * The UEFI specification makes it clear that the operating system is + * free to do whatever it wants with boot services code after + * ExitBootServices() has been called. Ignoring this recommendation a + * significant bunch of EFI implementations continue calling into boot + * services code (SetVirtualAddressMap). In order to work around such + * buggy implementations we reserve boot services region during EFI + * init and make sure it stays executable. Then, after + * SetVirtualAddressMap(), it is discarded. + * + * However, some boot services regions contain data that is required + * by drivers, so we need to track which memory ranges can never be + * freed. This is done by tagging those regions with the + * EFI_MEMORY_RUNTIME attribute. + * + * Any driver that wants to mark a region as reserved must use + * efi_mem_reserve() which will insert a new EFI memory descriptor + * into efi.memmap (splitting existing regions if necessary) and tag + * it with EFI_MEMORY_RUNTIME. + */ +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) +{ + struct efi_memory_map_data data = { 0 }; + struct efi_mem_range mr; + efi_memory_desc_t md; + int num_entries; + void *new; + + if (efi_mem_desc_lookup(addr, &md) || + md.type != EFI_BOOT_SERVICES_DATA) { + pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); + return; + } + + if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { + pr_err("Region spans EFI memory descriptors, %pa\n", &addr); + return; + } + + size += addr % EFI_PAGE_SIZE; + size = round_up(size, EFI_PAGE_SIZE); + addr = round_down(addr, EFI_PAGE_SIZE); + + mr.range.start = addr; + mr.range.end = addr + size - 1; + mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; + + num_entries = efi_memmap_split_count(&md, &mr.range); + num_entries += efi.memmap.nr_map; + + if (efi_memmap_alloc(num_entries, &data) != 0) { + pr_err("Could not allocate boot services memmap\n"); + return; + } + + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); + if (!new) { + pr_err("Failed to map new boot services memmap\n"); + return; + } + + efi_memmap_insert(&efi.memmap, new, &mr); + early_memunmap(new, data.size); + + efi_memmap_install(&data); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); +} + +/* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) + */ +static __init bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820__mapped_all(start, start+size, E820_TYPE_RAM)) + return false; + + return true; +} + +void __init efi_reserve_boot_services(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return; + + for_each_efi_memory_desc(md) { + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with memblock_free_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_TYPE_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { + memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * memblock_free_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; + } +} + +/* + * Apart from having VA mappings for EFI boot services code/data regions, + * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So, + * unmap both 1:1 and VA mappings. + */ +static void __init efi_unmap_pages(efi_memory_desc_t *md) +{ + pgd_t *pgd = efi_mm.pgd; + u64 pa = md->phys_addr; + u64 va = md->virt_addr; + + /* + * EFI mixed mode has all RAM mapped to access arguments while making + * EFI runtime calls, hence don't unmap EFI boot services code/data + * regions. + */ + if (efi_is_mixed()) + return; + + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) + pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa); + + if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages)) + pr_err("Failed to unmap VA mapping for 0x%llx\n", va); +} + +void __init efi_free_boot_services(void) +{ + struct efi_memory_map_data data = { 0 }; + efi_memory_desc_t *md; + int num_entries = 0; + void *new, *new_md; + + /* Keep all regions for /sys/kernel/debug/efi */ + if (efi_enabled(EFI_DBG)) + return; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + size_t rm_size; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + num_entries++; + continue; + } + + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) { + num_entries++; + continue; + } + + /* + * Before calling set_virtual_address_map(), EFI boot services + * code/data regions were mapped as a quirk for buggy firmware. + * Unmap them from efi_pgd before freeing them up. + */ + efi_unmap_pages(md); + + /* + * Nasty quirk: if all sub-1MB memory is used for boot + * services, we can get here without having allocated the + * real mode trampoline. It's too late to hand boot services + * memory back to the memblock allocator, so instead + * try to manually allocate the trampoline if needed. + * + * I've seen this on a Dell XPS 13 9350 with firmware + * 1.4.4 with SGX enabled booting Linux via Fedora 24's + * grub2-efi on a hard disk. (And no, I don't know why + * this happened, but Linux should still try to boot rather + * panicking early.) + */ + rm_size = real_mode_size_needed(); + if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { + set_real_mode_mem(start); + start += rm_size; + size -= rm_size; + } + + /* + * Don't free memory under 1M for two reasons: + * - BIOS might clobber it + * - Crash kernel needs it to be reserved + */ + if (start + size < SZ_1M) + continue; + if (start < SZ_1M) { + size -= (SZ_1M - start); + start = SZ_1M; + } + + memblock_free_late(start, size); + } + + if (!num_entries) + return; + + if (efi_memmap_alloc(num_entries, &data) != 0) { + pr_err("Failed to allocate new EFI memmap\n"); + return; + } + + new = memremap(data.phys_map, data.size, MEMREMAP_WB); + if (!new) { + pr_err("Failed to map new EFI memmap\n"); + return; + } + + /* + * Build a new EFI memmap that excludes any boot services + * regions that are not tagged EFI_MEMORY_RUNTIME, since those + * regions have now been freed. + */ + new_md = new; + for_each_efi_memory_desc(md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA)) + continue; + + memcpy(new_md, md, efi.memmap.desc_size); + new_md += efi.memmap.desc_size; + } + + memunmap(new); + + if (efi_memmap_install(&data) != 0) { + pr_err("Could not install new EFI memmap\n"); + return; + } +} + +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (nr_tables == 0) + return 0; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + p += sz; + } + early_memunmap(tablep, nr_tables * sz); + +out_memremap: + early_memunmap(data, sizeof(*data)); +out: + return ret; +} + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); + efi_memmap_unmap(); + } +} + +/* + * For most modern platforms the preferred method of powering off is via + * ACPI. However, there are some that are known to require the use of + * EFI runtime services and for which ACPI does not work at all. + * + * Using EFI is a last resort, to be used only if no other option + * exists. + */ +bool efi_reboot_required(void) +{ + if (!acpi_gbl_reduced_hardware) + return false; + + efi_reboot_quirk_mode = EFI_RESET_WARM; + return true; +} + +bool efi_poweroff_required(void) +{ + return acpi_gbl_reduced_hardware || acpi_no_s5; +} + +#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH + +static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, + size_t hdr_bytes) +{ + struct quark_security_header *csh = *pkbuff; + + /* Only process data block that is larger than the security header */ + if (hdr_bytes < sizeof(struct quark_security_header)) + return 0; + + if (csh->csh_signature != QUARK_CSH_SIGNATURE || + csh->headersize != QUARK_SECURITY_HEADER_SIZE) + return 1; + + /* Only process data block if EFI header is included */ + if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE + + sizeof(efi_capsule_header_t)) + return 0; + + pr_debug("Quark security header detected\n"); + + if (csh->rsvd_next_header != 0) { + pr_err("multiple Quark security headers not supported\n"); + return -EINVAL; + } + + *pkbuff += csh->headersize; + cap_info->total_size = csh->headersize; + + /* + * Update the first page pointer to skip over the CSH header. + */ + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; + + return 1; +} + +static const struct x86_cpu_id efi_capsule_quirk_ids[] = { + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, + &qrk_capsule_setup_info), + { } +}; + +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes) +{ + int (*quirk_handler)(struct capsule_info *, void **, size_t); + const struct x86_cpu_id *id; + int ret; + + if (hdr_bytes < sizeof(efi_capsule_header_t)) + return 0; + + cap_info->total_size = 0; + + id = x86_match_cpu(efi_capsule_quirk_ids); + if (id) { + /* + * The quirk handler is supposed to return + * - a value > 0 if the setup should continue, after advancing + * kbuff as needed + * - 0 if not enough hdr_bytes are available yet + * - a negative error code otherwise + */ + quirk_handler = (typeof(quirk_handler))id->driver_data; + ret = quirk_handler(cap_info, &kbuff, hdr_bytes); + if (ret <= 0) + return ret; + } + + memcpy(&cap_info->header, kbuff, sizeof(cap_info->header)); + + cap_info->total_size += cap_info->header.imagesize; + + return __efi_capsule_setup_info(cap_info); +} + +#endif + +/* + * If any access by any efi runtime service causes a page fault, then, + * 1. If it's efi_reset_system(), reboot through BIOS. + * 2. If any other efi runtime service, then + * a. Return error status to the efi caller process. + * b. Disable EFI Runtime Services forever and + * c. Freeze efi_rts_wq and schedule new process. + * + * @return: Returns, if the page fault is not handled. This function + * will never return if the page fault is handled successfully. + */ +void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* + * If we get an interrupt/NMI while processing an EFI runtime service + * then this is a regular OOPS, not an EFI failure. + */ + if (in_interrupt()) + return; + + /* + * Make sure that an efi runtime service caused the page fault. + * READ_ONCE() because we might be OOPSing in a different thread, + * and we don't want to trip KTSAN while trying to OOPS. + */ + if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE || + current_work() != &efi_rts_work.work) + return; + + /* + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so + * page faulting on these addresses isn't expected. + */ + if (phys_addr <= 0x0fff) + return; + + /* + * Print stack trace as it might be useful to know which EFI Runtime + * Service is buggy. + */ + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n", + phys_addr); + + /* + * Buggy efi_reset_system() is handled differently from other EFI + * Runtime Services as it doesn't use efi_rts_wq. Although, + * native_machine_emergency_restart() says that machine_real_restart() + * could fail, it's better not to complicate this fault handler + * because this case occurs *very* rarely and hence could be improved + * on a need by basis. + */ + if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) { + pr_info("efi_reset_system() buggy! Reboot through BIOS\n"); + machine_real_restart(MRR_BIOS); + return; + } + + /* + * Before calling EFI Runtime Service, the kernel has switched the + * calling process to efi_mm. Hence, switch back to task_mm. + */ + arch_efi_call_virt_teardown(); + + /* Signal error status to the efi caller process */ + efi_rts_work.status = EFI_ABORTED; + complete(&efi_rts_work.efi_rts_comp); + + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n"); + + /* + * Call schedule() in an infinite loop, so that any spurious wake ups + * will never run efi_rts_wq again. + */ + for (;;) { + set_current_state(TASK_IDLE); + schedule(); + } +} diff --git a/arch/x86/platform/efi/runtime-map.c b/arch/x86/platform/efi/runtime-map.c new file mode 100644 index 0000000000..a6f02cef3c --- /dev/null +++ b/arch/x86/platform/efi/runtime-map.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com> + */ + +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> + +#include <asm/efi.h> +#include <asm/setup.h> + +struct efi_runtime_map_entry { + efi_memory_desc_t md; + struct kobject kobj; /* kobject for each entry */ +}; + +static struct efi_runtime_map_entry **map_entries; + +struct map_attribute { + struct attribute attr; + ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf); +}; + +static inline struct map_attribute *to_map_attr(struct attribute *attr) +{ + return container_of(attr, struct map_attribute, attr); +} + +static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type); +} + +#define EFI_RUNTIME_FIELD(var) entry->md.var + +#define EFI_RUNTIME_U64_ATTR_SHOW(name) \ +static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \ +{ \ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \ +} + +EFI_RUNTIME_U64_ATTR_SHOW(phys_addr); +EFI_RUNTIME_U64_ATTR_SHOW(virt_addr); +EFI_RUNTIME_U64_ATTR_SHOW(num_pages); +EFI_RUNTIME_U64_ATTR_SHOW(attribute); + +static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj) +{ + return container_of(kobj, struct efi_runtime_map_entry, kobj); +} + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct efi_runtime_map_entry *entry = to_map_entry(kobj); + struct map_attribute *map_attr = to_map_attr(attr); + + return map_attr->show(entry, buf); +} + +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); + +/* + * These are default attributes that are added for every memmap entry. + */ +static struct attribute *def_attrs[] = { + &map_type_attr.attr, + &map_phys_addr_attr.attr, + &map_virt_addr_attr.attr, + &map_num_pages_attr.attr, + &map_attribute_attr.attr, + NULL +}; +ATTRIBUTE_GROUPS(def); + +static const struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static void map_release(struct kobject *kobj) +{ + struct efi_runtime_map_entry *entry; + + entry = to_map_entry(kobj); + kfree(entry); +} + +static const struct kobj_type __refconst map_ktype = { + .sysfs_ops = &map_attr_ops, + .default_groups = def_groups, + .release = map_release, +}; + +static struct kset *map_kset; + +static struct efi_runtime_map_entry * +add_sysfs_runtime_map_entry(struct kobject *kobj, int nr, + efi_memory_desc_t *md) +{ + int ret; + struct efi_runtime_map_entry *entry; + + if (!map_kset) { + map_kset = kset_create_and_add("runtime-map", NULL, kobj); + if (!map_kset) + return ERR_PTR(-ENOMEM); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + kset_unregister(map_kset); + map_kset = NULL; + return ERR_PTR(-ENOMEM); + } + + memcpy(&entry->md, md, sizeof(efi_memory_desc_t)); + + kobject_init(&entry->kobj, &map_ktype); + entry->kobj.kset = map_kset; + ret = kobject_add(&entry->kobj, NULL, "%d", nr); + if (ret) { + kobject_put(&entry->kobj); + kset_unregister(map_kset); + map_kset = NULL; + return ERR_PTR(ret); + } + + return entry; +} + +int efi_get_runtime_map_size(void) +{ + return efi.memmap.nr_map * efi.memmap.desc_size; +} + +int efi_get_runtime_map_desc_size(void) +{ + return efi.memmap.desc_size; +} + +int efi_runtime_map_copy(void *buf, size_t bufsz) +{ + size_t sz = efi_get_runtime_map_size(); + + if (sz > bufsz) + sz = bufsz; + + memcpy(buf, efi.memmap.map, sz); + return 0; +} + +static int __init efi_runtime_map_init(void) +{ + int i, j, ret = 0; + struct efi_runtime_map_entry *entry; + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP) || !efi_kobj) + return 0; + + map_entries = kcalloc(efi.memmap.nr_map, sizeof(entry), GFP_KERNEL); + if (!map_entries) { + ret = -ENOMEM; + goto out; + } + + i = 0; + for_each_efi_memory_desc(md) { + entry = add_sysfs_runtime_map_entry(efi_kobj, i, md); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto out_add_entry; + } + *(map_entries + i++) = entry; + } + + return 0; +out_add_entry: + for (j = i - 1; j >= 0; j--) { + entry = *(map_entries + j); + kobject_put(&entry->kobj); + } +out: + return ret; +} +subsys_initcall_sync(efi_runtime_map_init); |