diff options
Diffstat (limited to 'arch/x86/platform/efi')
-rw-r--r-- | arch/x86/platform/efi/Makefile | 7 | ||||
-rw-r--r-- | arch/x86/platform/efi/early_printk.c | 236 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 1046 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 97 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 1039 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_32.S | 124 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 58 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_thunk_64.S | 153 | ||||
-rw-r--r-- | arch/x86/platform/efi/quirks.c | 655 |
9 files changed, 3415 insertions, 0 deletions
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile new file mode 100644 index 000000000..e4dc3862d --- /dev/null +++ b/arch/x86/platform/efi/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y + +obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o +obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o +obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c new file mode 100644 index 000000000..c3e6be110 --- /dev/null +++ b/arch/x86/platform/efi/early_printk.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2013 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + */ + +#include <linux/console.h> +#include <linux/efi.h> +#include <linux/font.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <asm/setup.h> + +static const struct font_desc *font; +static u32 efi_x, efi_y; +static void *efi_fb; +static bool early_efi_keep; + +/* + * efi earlyprintk need use early_ioremap to map the framebuffer. + * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should + * be used instead. ioremap will be available after paging_init() which is + * earlier than initcall callbacks. Thus adding this early initcall function + * early_efi_map_fb to map the whole efi framebuffer. + */ +static __init int early_efi_map_fb(void) +{ + unsigned long base, size; + + if (!early_efi_keep) + return 0; + + base = boot_params.screen_info.lfb_base; + size = boot_params.screen_info.lfb_size; + efi_fb = ioremap(base, size); + + return efi_fb ? 0 : -ENOMEM; +} +early_initcall(early_efi_map_fb); + +/* + * early_efi_map maps efi framebuffer region [start, start + len -1] + * In case earlyprintk=efi,keep we have the whole framebuffer mapped already + * so just return the offset efi_fb + start. + */ +static __ref void *early_efi_map(unsigned long start, unsigned long len) +{ + unsigned long base; + + base = boot_params.screen_info.lfb_base; + + if (efi_fb) + return (efi_fb + start); + else + return early_ioremap(base + start, len); +} + +static __ref void early_efi_unmap(void *addr, unsigned long len) +{ + if (!efi_fb) + early_iounmap(addr, len); +} + +static void early_efi_clear_scanline(unsigned int y) +{ + unsigned long *dst; + u16 len; + + len = boot_params.screen_info.lfb_linelength; + dst = early_efi_map(y*len, len); + if (!dst) + return; + + memset(dst, 0, len); + early_efi_unmap(dst, len); +} + +static void early_efi_scroll_up(void) +{ + unsigned long *dst, *src; + u16 len; + u32 i, height; + + len = boot_params.screen_info.lfb_linelength; + height = boot_params.screen_info.lfb_height; + + for (i = 0; i < height - font->height; i++) { + dst = early_efi_map(i*len, len); + if (!dst) + return; + + src = early_efi_map((i + font->height) * len, len); + if (!src) { + early_efi_unmap(dst, len); + return; + } + + memmove(dst, src, len); + + early_efi_unmap(src, len); + early_efi_unmap(dst, len); + } +} + +static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h) +{ + const u32 color_black = 0x00000000; + const u32 color_white = 0x00ffffff; + const u8 *src; + u8 s8; + int m; + + src = font->data + c * font->height; + s8 = *(src + h); + + for (m = 0; m < 8; m++) { + if ((s8 >> (7 - m)) & 1) + *dst = color_white; + else + *dst = color_black; + dst++; + } +} + +static void +early_efi_write(struct console *con, const char *str, unsigned int num) +{ + struct screen_info *si; + unsigned int len; + const char *s; + void *dst; + + si = &boot_params.screen_info; + len = si->lfb_linelength; + + while (num) { + unsigned int linemax; + unsigned int h, count = 0; + + for (s = str; *s && *s != '\n'; s++) { + if (count == num) + break; + count++; + } + + linemax = (si->lfb_width - efi_x) / font->width; + if (count > linemax) + count = linemax; + + for (h = 0; h < font->height; h++) { + unsigned int n, x; + + dst = early_efi_map((efi_y + h) * len, len); + if (!dst) + return; + + s = str; + n = count; + x = efi_x; + + while (n-- > 0) { + early_efi_write_char(dst + x*4, *s, h); + x += font->width; + s++; + } + + early_efi_unmap(dst, len); + } + + num -= count; + efi_x += count * font->width; + str += count; + + if (num > 0 && *s == '\n') { + efi_x = 0; + efi_y += font->height; + str++; + num--; + } + + if (efi_x + font->width > si->lfb_width) { + efi_x = 0; + efi_y += font->height; + } + + if (efi_y + font->height > si->lfb_height) { + u32 i; + + efi_y -= font->height; + early_efi_scroll_up(); + + for (i = 0; i < font->height; i++) + early_efi_clear_scanline(efi_y + i); + } + } +} + +static __init int early_efi_setup(struct console *con, char *options) +{ + struct screen_info *si; + u16 xres, yres; + u32 i; + + si = &boot_params.screen_info; + xres = si->lfb_width; + yres = si->lfb_height; + + /* + * early_efi_write_char() implicitly assumes a framebuffer with + * 32-bits per pixel. + */ + if (si->lfb_depth != 32) + return -ENODEV; + + font = get_default_font(xres, yres, -1, -1); + if (!font) + return -ENODEV; + + efi_y = rounddown(yres, font->height) - font->height; + for (i = 0; i < (yres - efi_y) / font->height; i++) + early_efi_scroll_up(); + + /* early_console_register will unset CON_BOOT in case ,keep */ + if (!(con->flags & CON_BOOT)) + early_efi_keep = true; + return 0; +} + +struct console early_efi_console = { + .name = "earlyefi", + .write = early_efi_write, + .setup = early_efi_setup, + .flags = CON_PRINTBUFFER, + .index = -1, +}; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c new file mode 100644 index 000000000..e7f19dec1 --- /dev/null +++ b/arch/x86/platform/efi/efi.c @@ -0,0 +1,1046 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common EFI (Extensible Firmware Interface) support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov <bp@suse.de> - runtime services VA mapping + * + * Copied from efi_32.c to eliminate the duplicated code between EFI + * 32/64 support code. --ying 2007-10-26 + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> +#include <linux/export.h> +#include <linux/bootmem.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/time.h> +#include <linux/io.h> +#include <linux/reboot.h> +#include <linux/bcd.h> + +#include <asm/setup.h> +#include <asm/efi.h> +#include <asm/e820/api.h> +#include <asm/time.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> +#include <asm/x86_init.h> +#include <asm/uv/uv.h> + +static struct efi efi_phys __initdata; +static efi_system_table_t efi_systab __initdata; + +static efi_config_table_type_t arch_tables[] __initdata = { +#ifdef CONFIG_X86_UV + {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, +#endif + {NULL_GUID, NULL, NULL}, +}; + +u64 efi_setup; /* efi setup_data physical address */ + +static int add_efi_memmap __initdata; +static int __init setup_add_efi_memmap(char *arg) +{ + add_efi_memmap = 1; + return 0; +} +early_param("add_efi_memmap", setup_add_efi_memmap); + +static efi_status_t __init phys_efi_set_virtual_address_map( + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd; + + save_pgd = efi_call_phys_prolog(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_phys(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + efi_call_phys_epilog(save_pgd); + + return status; +} + +void __init efi_find_mirror(void) +{ + efi_memory_desc_t *md; + u64 mirror_size = 0, total_size = 0; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + total_size += size; + if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { + memblock_mark_mirror(start, size); + mirror_size += size; + } + } + if (mirror_size) + pr_info("Memory: %lldM/%lldM mirrored memory\n", + mirror_size>>20, total_size>>20); +} + +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init do_add_efi_memmap(void) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_TYPE_RAM; + else + e820_type = E820_TYPE_RESERVED; + break; + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_TYPE_ACPI; + break; + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_TYPE_NVS; + break; + case EFI_UNUSABLE_MEMORY: + e820_type = E820_TYPE_UNUSABLE; + break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_TYPE_PMEM; + break; + default: + /* + * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE + * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO + * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE + */ + e820_type = E820_TYPE_RESERVED; + break; + } + e820__range_add(start, size, e820_type); + } + e820__update_table(e820_table); +} + +int __init efi_memblock_x86_reserve_range(void) +{ + struct efi_info *e = &boot_params.efi_info; + struct efi_memory_map_data data; + phys_addr_t pmap; + int rv; + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + +#ifdef CONFIG_X86_32 + /* Can't handle data above 4GB at this time */ + if (e->efi_memmap_hi) { + pr_err("Memory map is above 4GB, disabling EFI.\n"); + return -EINVAL; + } + pmap = e->efi_memmap; +#else + pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); +#endif + data.phys_map = pmap; + data.size = e->efi_memmap_size; + data.desc_size = e->efi_memdesc_size; + data.desc_version = e->efi_memdesc_version; + + rv = efi_memmap_init_early(&data); + if (rv) + return rv; + + if (add_efi_memmap) + do_add_efi_memmap(); + + WARN(efi.memmap.desc_version != 1, + "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", + efi.memmap.desc_version); + + memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size); + + return 0; +} + +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + u64 size = efi.memmap.nr_map - n_removal; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(efi.memmap.phys_map, size); + } +} + +void __init efi_print_memmap(void) +{ + efi_memory_desc_t *md; + int i = 0; + + for_each_efi_memory_desc(md) { + char buf[64]; + + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", + i++, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + } +} + +static int __init efi_systab_init(void *phys) +{ + if (efi_enabled(EFI_64BIT)) { + efi_system_table_64_t *systab64; + struct efi_setup_data *data = NULL; + u64 tmp = 0; + + if (efi_setup) { + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) + return -ENOMEM; + } + systab64 = early_memremap((unsigned long)phys, + sizeof(*systab64)); + if (systab64 == NULL) { + pr_err("Couldn't map the system table!\n"); + if (data) + early_memunmap(data, sizeof(*data)); + return -ENOMEM; + } + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : + systab64->fw_vendor; + tmp |= data ? data->fw_vendor : systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + tmp |= systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + tmp |= systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + tmp |= systab64->con_out_handle; + efi_systab.con_out = systab64->con_out; + tmp |= systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + tmp |= systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + tmp |= systab64->stderr; + efi_systab.runtime = data ? + (void *)(unsigned long)data->runtime : + (void *)(unsigned long)systab64->runtime; + tmp |= data ? data->runtime : systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + tmp |= systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = data ? (unsigned long)data->tables : + systab64->tables; + tmp |= data ? data->tables : systab64->tables; + + early_memunmap(systab64, sizeof(*systab64)); + if (data) + early_memunmap(data, sizeof(*data)); +#ifdef CONFIG_X86_32 + if (tmp >> 32) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; + } +#endif + } else { + efi_system_table_32_t *systab32; + + systab32 = early_memremap((unsigned long)phys, + sizeof(*systab32)); + if (systab32 == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + + early_memunmap(systab32, sizeof(*systab32)); + } + + efi.systab = &efi_systab; + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect!\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) == 0) + pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + return 0; +} + +static int __init efi_runtime_init32(void) +{ + efi_runtime_services_32_t *runtime; + + runtime = early_memremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_32_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); + return -ENOMEM; + } + + /* + * We will only need *early* access to the SetVirtualAddressMap + * EFI runtime service. All other runtime services will be called + * via the virtual mapping. + */ + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); + + return 0; +} + +static int __init efi_runtime_init64(void) +{ + efi_runtime_services_64_t *runtime; + + runtime = early_memremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_64_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); + return -ENOMEM; + } + + /* + * We will only need *early* access to the SetVirtualAddressMap + * EFI runtime service. All other runtime services will be called + * via the virtual mapping. + */ + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); + + return 0; +} + +static int __init efi_runtime_init(void) +{ + int rv; + + /* + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. + * + * When EFI_PARAVIRT is in force then we could not map runtime + * service memory region because we do not have direct access to it. + * However, runtime services are available through proxy functions + * (e.g. in case of Xen dom0 EFI implementation they call special + * hypercall which executes relevant EFI functions) and that is why + * they are always enabled. + */ + + if (!efi_enabled(EFI_PARAVIRT)) { + if (efi_enabled(EFI_64BIT)) + rv = efi_runtime_init64(); + else + rv = efi_runtime_init32(); + + if (rv) + return rv; + } + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; +} + +void __init efi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i = 0; + +#ifdef CONFIG_X86_32 + if (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi) { + pr_info("Table located above 4GB, disabling EFI.\n"); + return; + } + efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; +#else + efi_phys.systab = (efi_system_table_t *) + (boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi<<32)); +#endif + + if (efi_systab_init(efi_phys.systab)) + return; + + efi.config_table = (unsigned long)efi.systab->tables; + efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; + efi.runtime = (unsigned long)efi.systab->runtime; + + /* + * Show what we know for posterity + */ + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); + if (c16) { + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { + pr_err("Could not map the firmware vendor!\n"); + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) + return; + + if (efi_config_init(arch_tables)) + return; + + /* + * Note: We currently don't support runtime services on an EFI + * that doesn't match the kernel 32/64-bit mode. + */ + + if (!efi_runtime_supported()) + pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); + else { + if (efi_runtime_disabled() || efi_runtime_init()) { + efi_memmap_unmap(); + return; + } + } + + efi_clean_memmap(); + + if (efi_enabled(EFI_DBG)) + efi_print_memmap(); +} + +void __init efi_set_executable(efi_memory_desc_t *md, bool executable) +{ + u64 addr, npages; + + addr = md->virt_addr; + npages = md->num_pages; + + memrange_efi_to_native(&addr, &npages); + + if (executable) + set_memory_x(addr, npages); + else + set_memory_nx(addr, npages); +} + +void __init runtime_code_page_mkexec(void) +{ + efi_memory_desc_t *md; + + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + efi_set_executable(md, true); + } +} + +void __init efi_memory_uc(u64 addr, unsigned long size) +{ + unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; + u64 npages; + + npages = round_up(size, page_shift) / page_shift; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); +} + +void __init old_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); + + md->virt_addr = (u64) (unsigned long) va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); +} + +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) +{ + efi_memory_desc_t *md, *prev_md = NULL; + + for_each_efi_memory_desc(md) { + u64 prev_size; + + if (!prev_md) { + prev_md = md; + continue; + } + + if (prev_md->type != md->type || + prev_md->attribute != md->attribute) { + prev_md = md; + continue; + } + + prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; + + if (md->phys_addr == (prev_md->phys_addr + prev_size)) { + prev_md->num_pages += md->num_pages; + md->type = EFI_RESERVED_TYPE; + md->attribute = 0; + continue; + } + prev_md = md; + } +} + +static void __init get_systab_virt_addr(efi_memory_desc_t *md) +{ + unsigned long size; + u64 end, systab; + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + systab = (u64)(unsigned long)efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *)(unsigned long)systab; + } +} + +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; + + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return efi.memmap.map_end - efi.memmap.desc_size; + + entry -= efi.memmap.desc_size; + if (entry < efi.memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return efi.memmap.map; + + entry += efi.memmap.desc_size; + if (entry >= efi.memmap.map_end) + return NULL; + + return entry; +} + +static bool should_map_region(efi_memory_desc_t *md) +{ + /* + * Runtime regions always require runtime mappings (obviously). + */ + if (md->attribute & EFI_MEMORY_RUNTIME) + return true; + + /* + * 32-bit EFI doesn't suffer from the bug that requires us to + * reserve boot services regions, and mixed mode support + * doesn't exist for 32-bit kernels. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_LOADER_DATA || + md->type == EFI_LOADER_CODE) + return true; + } + + /* + * Map boot services regions as a workaround for buggy + * firmware that accesses them even when they shouldn't. + * + * See efi_{reserve,free}_boot_services(). + */ + if (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) + return true; + + return false; +} + +/* + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. + */ +static void * __init efi_map_regions(int *count, int *pg_shift) +{ + void *p, *new_memmap = NULL; + unsigned long left = 0; + unsigned long desc_size; + efi_memory_desc_t *md; + + desc_size = efi.memmap.desc_size; + + p = NULL; + while ((p = efi_map_next_entry(p))) { + md = p; + + if (!should_map_region(md)) + continue; + + efi_map_region(md); + get_systab_virt_addr(md); + + if (left < desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; + } + + memcpy(new_memmap + (*count * desc_size), md, desc_size); + + left -= desc_size; + (*count)++; + } + + return new_memmap; +} + +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC_CORE + efi_memory_desc_t *md; + unsigned int num_pages; + + efi.systab = NULL; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI. With efi=old_map, we don't do runtime services in + * kexec kernel because in the initial boot something else might + * have been mapped at these virtual addresses. + */ + if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { + efi_memmap_unmap(); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for_each_efi_memory_desc(md) { + efi_map_region_fixed(md); /* FIXME: add error handling */ + get_systab_virt_addr(md); + } + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map. + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + BUG_ON(!efi.systab); + + num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + efi_sync_low_kernel_mappings(); + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.runtime_version = efi_systab.hdr.revision; + + efi_native_runtime_setup(); + + efi.set_virtual_address_map = NULL; + + if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) + runtime_code_page_mkexec(); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * efi_pgd page table. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the + * kernel is booted with efi=old_map on its command line. Same old + * method enabled the runtime services to be called without having to + * thunk back into physical mode for every invocation. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). + */ +static void __init __efi_enter_virtual_mode(void) +{ + int count = 0, pg_shift = 0; + void *new_memmap = NULL; + efi_status_t status; + unsigned long pa; + + efi.systab = NULL; + + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + goto err; + } + + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + goto err; + } + + pa = __pa(new_memmap); + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map that we are about to pass to the + * firmware via SetVirtualAddressMap(). + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { + pr_err("Failed to remap late EFI memory map\n"); + goto err; + } + + if (efi_enabled(EFI_DBG)) { + pr_info("EFI runtime memory map:\n"); + efi_print_memmap(); + } + + if (WARN_ON(!efi.systab)) + goto err; + + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; + + efi_sync_low_kernel_mappings(); + + if (efi_is_native()) { + status = phys_efi_set_virtual_address_map( + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); + } else { + status = efi_thunk_set_virtual_address_map( + efi_phys.set_virtual_address_map, + efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); + } + + if (status != EFI_SUCCESS) { + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; + } + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.runtime_version = efi_systab.hdr.revision; + + if (efi_is_native()) + efi_native_runtime_setup(); + else + efi_thunk_runtime_setup(); + + efi.set_virtual_address_map = NULL; + + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + + /* clean DUMMY object */ + efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); +} + +void __init efi_enter_virtual_mode(void) +{ + if (efi_enabled(EFI_PARAVIRT)) + return; + + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); + + efi_dump_pagetable(); +} + +static int __init arch_parse_efi_cmdline(char *str) +{ + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + + if (parse_option_str(str, "old_map")) + set_bit(EFI_OLD_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", arch_parse_efi_cmdline); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c new file mode 100644 index 000000000..995965712 --- /dev/null +++ b/arch/x86/platform/efi/efi_32.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/ioport.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/desc.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/efi.h> + +/* + * To make EFI call EFI runtime service in physical addressing mode we need + * prolog/epilog before/after the invocation to claim the EFI runtime service + * handler exclusively and to duplicate a memory mapping in low memory space, + * say 0 - 3G. + */ + +int __init efi_alloc_page_tables(void) +{ + return 0; +} + +void efi_sync_low_kernel_mappings(void) {} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, swapper_pg_dir); +#endif +} + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + old_map_region(md); +} + +void __init efi_map_region_fixed(efi_memory_desc_t *md) {} +void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} + +pgd_t * __init efi_call_phys_prolog(void) +{ + struct desc_ptr gdt_descr; + pgd_t *save_pgd; + + /* Current pgd is swapper_pg_dir, we'll restore it later: */ + save_pgd = swapper_pg_dir; + load_cr3(initial_page_table); + __flush_tlb_all(); + + gdt_descr.address = get_cpu_gdt_paddr(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + + return save_pgd; +} + +void __init efi_call_phys_epilog(pgd_t *save_pgd) +{ + load_fixmap_gdt(0); + load_cr3(save_pgd); + __flush_tlb_all(); +} + +void __init efi_runtime_update_mappings(void) +{ + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c new file mode 100644 index 000000000..77d05b560 --- /dev/null +++ b/arch/x86/platform/efi/efi_64.c @@ -0,0 +1,1039 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86_64 specific EFI support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * + * Code to convert EFI to E820 map has been implemented in elilo bootloader + * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table + * is setup appropriately for EFI runtime code. + * - mouli 06/14/2007. + * + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> +#include <linux/mc146818rtc.h> +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/ucs2_string.h> +#include <linux/mem_encrypt.h> +#include <linux/sched/task.h> + +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/e820/api.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/proto.h> +#include <asm/efi.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> +#include <asm/realmode.h> +#include <asm/time.h> +#include <asm/pgalloc.h> + +/* + * We allocate runtime services regions top-down, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va = EFI_VA_START; + +struct efi_scratch efi_scratch; + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +pgd_t * __init efi_call_phys_prolog(void) +{ + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; + + int pgd; + int n_pgds, i, j; + + if (!efi_enabled(EFI_OLD_MEMMAP)) { + efi_switch_mm(&efi_mm); + return NULL; + } + + early_code_mapping_set_exec(1); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + + /* + * Build 1:1 identity mapping for efi=old_map usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ + for (pgd = 0; pgd < n_pgds; pgd++) { + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; + } + +out: + __flush_tlb_all(); + + return save_pgd; +} + +void __init efi_call_phys_epilog(pgd_t *save_pgd) +{ + /* + * After the lock is released, the original page table is restored. + */ + int pgd_idx, i; + int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + if (!efi_enabled(EFI_OLD_MEMMAP)) { + efi_switch_mm(efi_scratch.prev_mm); + return; + } + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + + if (!pgd_present(*pgd)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!p4d_present(*p4d)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + + kfree(save_pgd); + + __flush_tlb_all(); + early_code_mapping_set_exec(0); +} + +EXPORT_SYMBOL_GPL(efi_mm); + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd, *efi_pgd; + p4d_t *p4d; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + if (!efi_pgd) + goto fail; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); + if (!p4d) + goto free_pgd; + + pud = pud_alloc(&init_mm, p4d, EFI_VA_END); + if (!pud) + goto free_p4d; + + efi_mm.pgd = efi_pgd; + mm_init_cpumask(&efi_mm); + init_new_context(NULL, &efi_mm); + + return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; +} + +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + p4d_t *p4d_k, *p4d_efi; + pud_t *pud_k, *pud_efi; + pgd_t *efi_pgd = efi_mm.pgd; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + + /* + * As with PGDs, we share all P4D entries apart from the one entry + * that covers the EFI runtime mapping space. + */ + BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK)); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pgd_k = pgd_offset_k(EFI_VA_END); + p4d_efi = p4d_offset(pgd_efi, 0); + p4d_k = p4d_offset(pgd_k, 0); + + num_entries = p4d_index(EFI_VA_END); + memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + p4d_efi = p4d_offset(pgd_efi, EFI_VA_END); + p4d_k = p4d_offset(pgd_k, EFI_VA_END); + pud_efi = pud_offset(p4d_efi, 0); + pud_k = pud_offset(p4d_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(p4d_efi, EFI_VA_START); + pud_k = pud_offset(p4d_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); +} + +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + phys_addr_t pa; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + pa = slow_virt_to_phys(va); + + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; + + return pa; +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + unsigned long pfn, text, pf; + struct page *page; + unsigned npages; + pgd_t *pgd = efi_mm.pgd; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + pfn = pa_memmap >> PAGE_SHIFT; + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + pf = _PAGE_RW; + if (sev_active()) + pf |= _PAGE_ENC; + + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + + /* + * When making calls to the firmware everything needs to be 1:1 + * mapped and addressable with 32-bit pointers. Map the kernel + * text and allocate a new stack because we can't rely on the + * stack pointer being < 4GB. + */ + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) + return 0; + + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } + + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ + + npages = (_etext - _text) >> PAGE_SHIFT; + text = __pa(_text); + pfn = text >> PAGE_SHIFT; + + pf = _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { + pr_err("Failed to map kernel text 1:1\n"); + return 1; + } + + return 0; +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) +{ + unsigned long flags = _PAGE_RW; + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + + if (!(md->attribute & EFI_MEMORY_WB)) + flags |= _PAGE_PCD; + + if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO) + flags |= _PAGE_ENC; + + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + unsigned long size = md->num_pages << PAGE_SHIFT; + u64 pa = md->phys_addr; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return old_map_region(md); + + /* + * Make sure the 1:1 mappings are present as a catch-all for b0rked + * firmware which doesn't update all internal pointers after switching + * to virtual mode and would otherwise crap on us. + */ + __map_region(md, md->phys_addr); + + /* + * Enforce the 1:1 mapping as the default virtual address when + * booting in EFI mixed mode, because even though we may be + * running a 64-bit kernel, the firmware may only be 32-bit. + */ + if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + md->virt_addr = md->phys_addr; + return; + } + + efi_va -= size; + + /* Is PA 2M-aligned? */ + if (!(pa & (PMD_SIZE - 1))) { + efi_va &= PMD_MASK; + } else { + u64 pa_offset = pa & (PMD_SIZE - 1); + u64 prev_va = efi_va; + + /* get us the same offset within this 2M page */ + efi_va = (efi_va & PMD_MASK) + pa_offset; + + if (efi_va > prev_va) + efi_va -= PMD_SIZE; + } + + if (efi_va < EFI_VA_END) { + pr_warn(FW_WARN "VA address range overflow!\n"); + return; + } + + /* Do the VA map */ + __map_region(md, efi_va); + md->virt_addr = efi_va; +} + +/* + * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. + * md->virt_addr is the original virtual address which had been mapped in kexec + * 1st kernel. + */ +void __init efi_map_region_fixed(efi_memory_desc_t *md) +{ + __map_region(md, md->phys_addr); + __map_region(md, md->virt_addr); +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type, u64 attribute) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type, attribute); + } + + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + + return (void __iomem *)__va(phys_addr); +} + +void __init parse_efi_setup(u64 phys_addr, u32 data_len) +{ + efi_setup = phys_addr + sizeof(struct setup_data); +} + +static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) +{ + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + int err1, err2; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf); + if (err1) { + pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf); + if (err2) { + pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + return err1 || err2; +} + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md) +{ + unsigned long pf = 0; + + if (md->attribute & EFI_MEMORY_XP) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO)) + pf |= _PAGE_RW; + + if (sev_active()) + pf |= _PAGE_ENC; + + return efi_update_mappings(md, pf); +} + +void __init efi_runtime_update_mappings(void) +{ + efi_memory_desc_t *md; + + if (efi_enabled(EFI_OLD_MEMMAP)) { + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); + return; + } + + /* + * Use the EFI Memory Attribute Table for mapping permissions if it + * exists, since it is intended to supersede EFI_PROPERTIES_TABLE. + */ + if (efi_enabled(EFI_MEM_ATTR)) { + efi_memattr_apply_permissions(NULL, efi_update_mem_attr); + return; + } + + /* + * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace + * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update + * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not + * published by the firmware. Even if we find a buggy implementation of + * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to + * EFI_PROPERTIES_TABLE, because of the same reason. + */ + + if (!efi_enabled(EFI_NX_PE_DATA)) + return; + + for_each_efi_memory_desc(md) { + unsigned long pf = 0; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if ((md->attribute & EFI_MEMORY_XP) || + (md->type == EFI_RUNTIME_SERVICES_DATA)) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO) && + (md->type != EFI_RUNTIME_SERVICES_CODE)) + pf |= _PAGE_RW; + + if (sev_active()) + pf |= _PAGE_ENC; + + efi_update_mappings(md, pf); + } +} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + if (efi_enabled(EFI_OLD_MEMMAP)) + ptdump_walk_pgd_level(NULL, swapper_pg_dir); + else + ptdump_walk_pgd_level(NULL, efi_mm.pgd); +#endif +} + +/* + * Makes the calling thread switch to/from efi_mm context. Can be used + * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well + * as during efi runtime calls i.e current->active_mm == current_mm. + * We are not mm_dropping()/mm_grabbing() any mm, because we are not + * losing/creating any references. + */ +void efi_switch_mm(struct mm_struct *mm) +{ + task_lock(current); + efi_scratch.prev_mm = current->active_mm; + current->active_mm = mm; + switch_mm(efi_scratch.prev_mm, mm, NULL); + task_unlock(current); +} + +#ifdef CONFIG_EFI_MIXED +extern efi_status_t efi64_thunk(u32, ...); + +static DEFINE_SPINLOCK(efi_runtime_lock); + +#define runtime_service32(func) \ +({ \ + u32 table = (u32)(unsigned long)efi.systab; \ + u32 *rt, *___f; \ + \ + rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ + ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ + *___f; \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. This function must be called before runtime_service32(). + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(f, ...) \ +({ \ + efi_status_t __s; \ + u32 __func; \ + \ + arch_efi_call_virt_setup(); \ + \ + __func = runtime_service32(f); \ + __s = efi64_thunk(__func, __VA_ARGS__); \ + \ + arch_efi_call_virt_teardown(); \ + \ + __s; \ +}) + +efi_status_t efi_thunk_set_virtual_address_map( + void *phys_set_virtual_address_map, + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + u32 func; + + efi_sync_low_kernel_mappings(); + local_irq_save(flags); + + efi_switch_mm(&efi_mm); + + func = (u32)(unsigned long)phys_set_virtual_address_map; + status = efi64_thunk(func, memory_map_size, descriptor_size, + descriptor_version, virtual_map); + + efi_switch_mm(efi_scratch.prev_mm); + local_irq_restore(flags); + + return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + efi_status_t status; + u32 phys_tm, phys_tc; + unsigned long flags; + + spin_lock(&rtc_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_tm = virt_to_phys_or_null(tm); + phys_tc = virt_to_phys_or_null(tc); + + status = efi_thunk(get_time, phys_tm, phys_tc); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + unsigned long flags; + + spin_lock(&rtc_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_tm = virt_to_phys_or_null(tm); + + status = efi_thunk(set_time, phys_tm); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) +{ + efi_status_t status; + u32 phys_enabled, phys_pending, phys_tm; + unsigned long flags; + + spin_lock(&rtc_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_enabled = virt_to_phys_or_null(enabled); + phys_pending = virt_to_phys_or_null(pending); + phys_tm = virt_to_phys_or_null(tm); + + status = efi_thunk(get_wakeup_time, phys_enabled, + phys_pending, phys_tm); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + unsigned long flags; + + spin_lock(&rtc_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_tm = virt_to_phys_or_null(tm); + + status = efi_thunk(set_wakeup_time, enabled, phys_tm); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&rtc_lock); + + return status; +} + +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name, phys_vendor, phys_attr; + u32 phys_data_size, phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name_size, phys_name, phys_vendor; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, *name_size); + + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + *vendor = *vnd; + return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ + efi_status_t status; + u32 phys_count; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_count = virt_to_phys_or_null(count); + status = efi_thunk(get_next_high_mono_count, phys_count); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + u32 phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_data = virt_to_phys_or_null_size(data, data_size); + + efi_thunk(reset_system, reset_type, status, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +void efi_thunk_runtime_setup(void) +{ + efi.get_time = efi_thunk_get_time; + efi.set_time = efi_thunk_set_time; + efi.get_wakeup_time = efi_thunk_get_wakeup_time; + efi.set_wakeup_time = efi_thunk_set_wakeup_time; + efi.get_variable = efi_thunk_get_variable; + efi.get_next_variable = efi_thunk_get_next_variable; + efi.set_variable = efi_thunk_set_variable; + efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking; + efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; + efi.reset_system = efi_thunk_reset_system; + efi.query_variable_info = efi_thunk_query_variable_info; + efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking; + efi.update_capsule = efi_thunk_update_capsule; + efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} +#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S new file mode 100644 index 000000000..ab2e91e76 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prolog and epilog. + */ + + /* + * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. + * But to make it smoothly switch from virtual mode to flat mode. + * The mapping of lower virtual memory has been created in prolog and + * epilog. + */ + movl $1f, %edx + subl $__PAGE_OFFSET, %edx + jmp *%edx +1: + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %edx + movl %edx, saved_return_addr + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr + movl $2f, %edx + subl $__PAGE_OFFSET, %edx + pushl %edx + + /* + * 3. Clear PG bit in %CR0. + */ + movl %cr0, %edx + andl $0x7fffffff, %edx + movl %edx, %cr0 + jmp 1f +1: + + /* + * 4. Adjust stack pointer. + */ + subl $__PAGE_OFFSET, %esp + + /* + * 5. Call the physical function. + */ + jmp *%ecx + +2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. + */ + addl $__PAGE_OFFSET, %esp + + /* + * 7. Restore PG bit + */ + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f +1: + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ + movl $1f, %edx + jmp *%edx +1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ + leal efi_rt_function_ptr, %edx + movl (%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + leal saved_return_addr, %edx + movl (%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S new file mode 100644 index 000000000..74628ec78 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Function calling ABI conversion from Linux to EFI for x86_64 + * + * Copyright (C) 2007 Intel Corp + * Bibo Mao <bibo.mao@intel.com> + * Huang Ying <ying.huang@intel.com> + */ + +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> +#include <asm/page_types.h> + +#define SAVE_XMM \ + mov %rsp, %rax; \ + subq $0x70, %rsp; \ + and $~0xf, %rsp; \ + mov %rax, (%rsp); \ + mov %cr0, %rax; \ + clts; \ + mov %rax, 0x8(%rsp); \ + movaps %xmm0, 0x60(%rsp); \ + movaps %xmm1, 0x50(%rsp); \ + movaps %xmm2, 0x40(%rsp); \ + movaps %xmm3, 0x30(%rsp); \ + movaps %xmm4, 0x20(%rsp); \ + movaps %xmm5, 0x10(%rsp) + +#define RESTORE_XMM \ + movaps 0x60(%rsp), %xmm0; \ + movaps 0x50(%rsp), %xmm1; \ + movaps 0x40(%rsp), %xmm2; \ + movaps 0x30(%rsp), %xmm3; \ + movaps 0x20(%rsp), %xmm4; \ + movaps 0x10(%rsp), %xmm5; \ + mov 0x8(%rsp), %rsi; \ + mov %rsi, %cr0; \ + mov (%rsp), %rsp + +ENTRY(efi_call) + pushq %rbp + movq %rsp, %rbp + SAVE_XMM + mov 16(%rbp), %rax + subq $48, %rsp + mov %r9, 32(%rsp) + mov %rax, 40(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $48, %rsp + RESTORE_XMM + popq %rbp + ret +ENDPROC(efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 000000000..46c58b087 --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + * + * Support for invoking 32-bit EFI runtime services from a 64-bit + * kernel. + * + * The below thunking functions are only used after ExitBootServices() + * has been called. This simplifies things considerably as compared with + * the early EFI thunking because we can leave all the kernel state + * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime + * services from __KERNEL32_CS. This means we can continue to service + * interrupts across an EFI mixed mode call. + * + * We do however, need to handle the fact that we're running in a full + * 64-bit virtual address space. Things like the stack and instruction + * addresses need to be accessible by the 32-bit firmware, so we rely on + * using the identity mappings in the EFI page table to access the stack + * and kernel text (see efi_setup_page_tables()). + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> +#include <asm/segment.h> + + .text + .code64 +ENTRY(efi64_thunk) + push %rbp + push %rbx + + /* + * Switch to 1:1 mapped 32-bit stack pointer. + */ + movq %rsp, efi_saved_sp(%rip) + movq efi_scratch(%rip), %rsp + + /* + * Calculate the physical address of the kernel text. + */ + movq $__START_KERNEL_map, %rax + subq phys_base(%rip), %rax + + /* + * Push some physical addresses onto the stack. This is easier + * to do now in a code64 section while the assembler can address + * 64-bit values. Note that all the addresses on the stack are + * 32-bit. + */ + subq $16, %rsp + leaq efi_exit32(%rip), %rbx + subq %rax, %rbx + movl %ebx, 8(%rsp) + + leaq __efi64_thunk(%rip), %rbx + subq %rax, %rbx + call *%rbx + + movq efi_saved_sp(%rip), %rsp + pop %rbx + pop %rbp + retq +ENDPROC(efi64_thunk) + +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* Switch to 32-bit descriptor */ + pushq $__KERNEL32_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + movl 72(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + lret +ENDPROC(efi_enter32) + + .data + .balign 8 +func_rt_ptr: .quad 0 +efi_saved_sp: .quad 0 diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c new file mode 100644 index 000000000..006eb09e9 --- /dev/null +++ b/arch/x86/platform/efi/quirks.c @@ -0,0 +1,655 @@ +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/time.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/acpi.h> +#include <linux/dmi.h> + +#include <asm/e820/api.h> +#include <asm/efi.h> +#include <asm/uv/uv.h> +#include <asm/cpu_device_id.h> + +#define EFI_MIN_RESERVE 5120 + +#define EFI_DUMMY_GUID \ + EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) + +#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */ +#define QUARK_SECURITY_HEADER_SIZE 0x400 + +/* + * Header prepended to the standard EFI capsule on Quark systems the are based + * on Intel firmware BSP. + * @csh_signature: Unique identifier to sanity check signed module + * presence ("_CSH"). + * @version: Current version of CSH used. Should be one for Quark A0. + * @modulesize: Size of the entire module including the module header + * and payload. + * @security_version_number_index: Index of SVN to use for validation of signed + * module. + * @security_version_number: Used to prevent against roll back of modules. + * @rsvd_module_id: Currently unused for Clanton (Quark). + * @rsvd_module_vendor: Vendor Identifier. For Intel products value is + * 0x00008086. + * @rsvd_date: BCD representation of build date as yyyymmdd, where + * yyyy=4 digit year, mm=1-12, dd=1-31. + * @headersize: Total length of the header including including any + * padding optionally added by the signing tool. + * @hash_algo: What Hash is used in the module signing. + * @cryp_algo: What Crypto is used in the module signing. + * @keysize: Total length of the key data including including any + * padding optionally added by the signing tool. + * @signaturesize: Total length of the signature including including any + * padding optionally added by the signing tool. + * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the + * chain, if there is a next header. + * @rsvd: Reserved, padding structure to required size. + * + * See also QuartSecurityHeader_t in + * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h + * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP + */ +struct quark_security_header { + u32 csh_signature; + u32 version; + u32 modulesize; + u32 security_version_number_index; + u32 security_version_number; + u32 rsvd_module_id; + u32 rsvd_module_vendor; + u32 rsvd_date; + u32 headersize; + u32 hash_algo; + u32 cryp_algo; + u32 keysize; + u32 signaturesize; + u32 rsvd_next_header; + u32 rsvd[2]; +}; + +static const efi_char16_t efi_dummy_name[] = L"DUMMY"; + +static bool efi_no_storage_paranoia; + +/* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. The implementation of garbage collection + * in some FW versions causes stale (deleted) variables to take up space + * longer than intended and space is only freed once the store becomes + * almost completely full. + * + * Enabling this option disables the space checks in + * efi_query_variable_store() and forces garbage collection. + * + * Only enable this option if deleting EFI variables does not free up + * space in your variable store, e.g. if despite deleting variables + * you're unable to create new ones. + */ +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + +/* + * Deleting the dummy variable which kicks off garbage collection +*/ +void efi_delete_dummy_variable(void) +{ + efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL); +} + +/* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) + return 0; + + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + /* + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. + */ + if ((remaining_size - size < EFI_MIN_RESERVE) && + !efi_no_storage_paranoia) { + + /* + * Triggering garbage collection may require that the firmware + * generate a real EFI_OUT_OF_RESOURCES error. We can force + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; + void *dummy = kzalloc(dummy_size, GFP_KERNEL); + + if (!dummy) + return EFI_OUT_OF_RESOURCES; + + status = efi.set_variable((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + dummy_size, dummy); + + if (status == EFI_SUCCESS) { + /* + * This should have failed, so if it didn't make sure + * that we delete it... + */ + efi_delete_dummy_variable(); + } + + kfree(dummy); + + /* + * The runtime code may now have triggered a garbage collection + * run, so check the variable info again + */ + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + + if (status != EFI_SUCCESS) + return status; + + /* + * There still isn't enough room, so return an error + */ + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + } + + return EFI_SUCCESS; +} +EXPORT_SYMBOL_GPL(efi_query_variable_store); + +/* + * The UEFI specification makes it clear that the operating system is + * free to do whatever it wants with boot services code after + * ExitBootServices() has been called. Ignoring this recommendation a + * significant bunch of EFI implementations continue calling into boot + * services code (SetVirtualAddressMap). In order to work around such + * buggy implementations we reserve boot services region during EFI + * init and make sure it stays executable. Then, after + * SetVirtualAddressMap(), it is discarded. + * + * However, some boot services regions contain data that is required + * by drivers, so we need to track which memory ranges can never be + * freed. This is done by tagging those regions with the + * EFI_MEMORY_RUNTIME attribute. + * + * Any driver that wants to mark a region as reserved must use + * efi_mem_reserve() which will insert a new EFI memory descriptor + * into efi.memmap (splitting existing regions if necessary) and tag + * it with EFI_MEMORY_RUNTIME. + */ +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) +{ + phys_addr_t new_phys, new_size; + struct efi_mem_range mr; + efi_memory_desc_t md; + int num_entries; + void *new; + + if (efi_mem_desc_lookup(addr, &md) || + md.type != EFI_BOOT_SERVICES_DATA) { + pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); + return; + } + + if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { + pr_err("Region spans EFI memory descriptors, %pa\n", &addr); + return; + } + + size += addr % EFI_PAGE_SIZE; + size = round_up(size, EFI_PAGE_SIZE); + addr = round_down(addr, EFI_PAGE_SIZE); + + mr.range.start = addr; + mr.range.end = addr + size - 1; + mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; + + num_entries = efi_memmap_split_count(&md, &mr.range); + num_entries += efi.memmap.nr_map; + + new_size = efi.memmap.desc_size * num_entries; + + new_phys = efi_memmap_alloc(num_entries); + if (!new_phys) { + pr_err("Could not allocate boot services memmap\n"); + return; + } + + new = early_memremap_prot(new_phys, new_size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); + if (!new) { + pr_err("Failed to map new boot services memmap\n"); + return; + } + + efi_memmap_insert(&efi.memmap, new, &mr); + early_memunmap(new, new_size); + + efi_memmap_install(new_phys, num_entries); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); +} + +/* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) + */ +static bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820__mapped_all(start, start+size, E820_TYPE_RAM)) + return false; + + return true; +} + +void __init efi_reserve_boot_services(void) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(md) { + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with free_bootmem_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_TYPE_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { + memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * free_bootmem_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; + } +} + +void __init efi_free_boot_services(void) +{ + phys_addr_t new_phys, new_size; + efi_memory_desc_t *md; + int num_entries = 0; + void *new, *new_md; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + size_t rm_size; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + num_entries++; + continue; + } + + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) { + num_entries++; + continue; + } + + /* + * Nasty quirk: if all sub-1MB memory is used for boot + * services, we can get here without having allocated the + * real mode trampoline. It's too late to hand boot services + * memory back to the memblock allocator, so instead + * try to manually allocate the trampoline if needed. + * + * I've seen this on a Dell XPS 13 9350 with firmware + * 1.4.4 with SGX enabled booting Linux via Fedora 24's + * grub2-efi on a hard disk. (And no, I don't know why + * this happened, but Linux should still try to boot rather + * panicing early.) + */ + rm_size = real_mode_size_needed(); + if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { + set_real_mode_mem(start, rm_size); + start += rm_size; + size -= rm_size; + } + + free_bootmem_late(start, size); + } + + if (!num_entries) + return; + + new_size = efi.memmap.desc_size * num_entries; + new_phys = efi_memmap_alloc(num_entries); + if (!new_phys) { + pr_err("Failed to allocate new EFI memmap\n"); + return; + } + + new = memremap(new_phys, new_size, MEMREMAP_WB); + if (!new) { + pr_err("Failed to map new EFI memmap\n"); + return; + } + + /* + * Build a new EFI memmap that excludes any boot services + * regions that are not tagged EFI_MEMORY_RUNTIME, since those + * regions have now been freed. + */ + new_md = new; + for_each_efi_memory_desc(md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA)) + continue; + + memcpy(new_md, md, efi.memmap.desc_size); + new_md += efi.memmap.desc_size; + } + + memunmap(new); + + if (efi_memmap_install(new_phys, num_entries)) { + pr_err("Could not install new EFI memmap\n"); + return; + } +} + +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < efi.systab->nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + p += sz; + } + early_memunmap(tablep, nr_tables * sz); + +out_memremap: + early_memunmap(data, sizeof(*data)); +out: + return ret; +} + +static const struct dmi_system_id sgi_uv1_dmi[] = { + { NULL, "SGI UV1", + { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), + DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), + } + }, + { } /* NULL entry stops DMI scanning */ +}; + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); + efi_memmap_unmap(); + } + + /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ + if (dmi_check_system(sgi_uv1_dmi)) + set_bit(EFI_OLD_MEMMAP, &efi.flags); +} + +/* + * For most modern platforms the preferred method of powering off is via + * ACPI. However, there are some that are known to require the use of + * EFI runtime services and for which ACPI does not work at all. + * + * Using EFI is a last resort, to be used only if no other option + * exists. + */ +bool efi_reboot_required(void) +{ + if (!acpi_gbl_reduced_hardware) + return false; + + efi_reboot_quirk_mode = EFI_RESET_WARM; + return true; +} + +bool efi_poweroff_required(void) +{ + return acpi_gbl_reduced_hardware || acpi_no_s5; +} + +#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH + +static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, + size_t hdr_bytes) +{ + struct quark_security_header *csh = *pkbuff; + + /* Only process data block that is larger than the security header */ + if (hdr_bytes < sizeof(struct quark_security_header)) + return 0; + + if (csh->csh_signature != QUARK_CSH_SIGNATURE || + csh->headersize != QUARK_SECURITY_HEADER_SIZE) + return 1; + + /* Only process data block if EFI header is included */ + if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE + + sizeof(efi_capsule_header_t)) + return 0; + + pr_debug("Quark security header detected\n"); + + if (csh->rsvd_next_header != 0) { + pr_err("multiple Quark security headers not supported\n"); + return -EINVAL; + } + + *pkbuff += csh->headersize; + cap_info->total_size = csh->headersize; + + /* + * Update the first page pointer to skip over the CSH header. + */ + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; + + return 1; +} + +#define ICPU(family, model, quirk_handler) \ + { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ + (unsigned long)&quirk_handler } + +static const struct x86_cpu_id efi_capsule_quirk_ids[] = { + ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ + { } +}; + +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes) +{ + int (*quirk_handler)(struct capsule_info *, void **, size_t); + const struct x86_cpu_id *id; + int ret; + + if (hdr_bytes < sizeof(efi_capsule_header_t)) + return 0; + + cap_info->total_size = 0; + + id = x86_match_cpu(efi_capsule_quirk_ids); + if (id) { + /* + * The quirk handler is supposed to return + * - a value > 0 if the setup should continue, after advancing + * kbuff as needed + * - 0 if not enough hdr_bytes are available yet + * - a negative error code otherwise + */ + quirk_handler = (typeof(quirk_handler))id->driver_data; + ret = quirk_handler(cap_info, &kbuff, hdr_bytes); + if (ret <= 0) + return ret; + } + + memcpy(&cap_info->header, kbuff, sizeof(cap_info->header)); + + cap_info->total_size += cap_info->header.imagesize; + + return __efi_capsule_setup_info(cap_info); +} + +#endif |