diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/Kconfig | 70 | ||||
-rw-r--r-- | drivers/acpi/apei/Makefile | 7 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-base.c | 800 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-internal.h | 133 | ||||
-rw-r--r-- | drivers/acpi/apei/bert.c | 174 | ||||
-rw-r--r-- | drivers/acpi/apei/einj.c | 809 | ||||
-rw-r--r-- | drivers/acpi/apei/erst-dbg.c | 231 | ||||
-rw-r--r-- | drivers/acpi/apei/erst.c | 1267 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 1584 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 275 |
10 files changed, 5350 insertions, 0 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig new file mode 100644 index 0000000000..6b18f8bc7b --- /dev/null +++ b/drivers/acpi/apei/Kconfig @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: GPL-2.0 +config HAVE_ACPI_APEI + bool + +config HAVE_ACPI_APEI_NMI + bool + +config ACPI_APEI + bool "ACPI Platform Error Interface (APEI)" + select MISC_FILESYSTEMS + select PSTORE + select UEFI_CPER + depends on HAVE_ACPI_APEI + help + APEI allows to report errors (for example from the chipset) + to the operating system. This improves NMI handling + especially. In addition it supports error serialization and + error injection. + +config ACPI_APEI_GHES + bool "APEI Generic Hardware Error Source" + depends on ACPI_APEI + select ACPI_HED + select IRQ_WORK + select GENERIC_ALLOCATOR + help + Generic Hardware Error Source provides a way to report + platform hardware errors (such as that from chipset). It + works in so called "Firmware First" mode, that is, hardware + errors are reported to firmware firstly, then reported to + Linux by firmware. This way, some non-standard hardware + error registers or non-standard hardware link can be checked + by firmware to produce more valuable hardware error + information for Linux. + +config ACPI_APEI_PCIEAER + bool "APEI PCIe AER logging/recovering support" + depends on ACPI_APEI && PCIEAER + help + PCIe AER errors may be reported via APEI firmware first mode. + Turn on this option to enable the corresponding support. + +config ACPI_APEI_SEA + bool + depends on ARM64 && ACPI_APEI_GHES + default y + +config ACPI_APEI_MEMORY_FAILURE + bool "APEI memory error recovering support" + depends on ACPI_APEI && MEMORY_FAILURE + help + Memory errors may be reported via APEI firmware first mode. + Turn on this option to enable the memory recovering support. + +config ACPI_APEI_EINJ + tristate "APEI Error INJection (EINJ)" + depends on ACPI_APEI && DEBUG_FS + help + EINJ provides a hardware error injection mechanism, it is + mainly used for debugging and testing the other parts of + APEI and some other RAS features. + +config ACPI_APEI_ERST_DEBUG + tristate "APEI Error Record Serialization Table (ERST) Debug Support" + depends on ACPI_APEI + help + ERST is a way provided by APEI to save and retrieve hardware + error information to and from a persistent store. Enable this + if you want to debugging and testing the ERST kernel support + and firmware implementation. diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile new file mode 100644 index 0000000000..4dfac21287 --- /dev/null +++ b/drivers/acpi/apei/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ACPI_APEI) += apei.o +obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o +obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o +obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o + +apei-y := apei-base.o hest.o erst.o bert.o diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c new file mode 100644 index 0000000000..c7c26872f4 --- /dev/null +++ b/drivers/acpi/apei/apei-base.c @@ -0,0 +1,800 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * apei-base.c - ACPI Platform Error Interface (APEI) supporting + * infrastructure + * + * APEI allows to report errors (for example from the chipset) to + * the operating system. This improves NMI handling especially. In + * addition it supports error serialization and error injection. + * + * For more information about APEI, please refer to ACPI Specification + * version 4.0, chapter 17. + * + * This file has Common functions used by more than one APEI table, + * including framework of interpreter for ERST and EINJ; resource + * management for APEI registers. + * + * Copyright (C) 2009, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/kref.h> +#include <linux/interrupt.h> +#include <linux/debugfs.h> +#include <acpi/apei.h> +#include <asm/unaligned.h> + +#include "apei-internal.h" + +#define APEI_PFX "APEI: " + +/* + * APEI ERST (Error Record Serialization Table) and EINJ (Error + * INJection) interpreter framework. + */ + +#define APEI_EXEC_PRESERVE_REGISTER 0x1 + +void apei_exec_ctx_init(struct apei_exec_context *ctx, + struct apei_exec_ins_type *ins_table, + u32 instructions, + struct acpi_whea_header *action_table, + u32 entries) +{ + ctx->ins_table = ins_table; + ctx->instructions = instructions; + ctx->action_table = action_table; + ctx->entries = entries; +} +EXPORT_SYMBOL_GPL(apei_exec_ctx_init); + +int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val) +{ + int rc; + + rc = apei_read(val, &entry->register_region); + if (rc) + return rc; + *val >>= entry->register_region.bit_offset; + *val &= entry->mask; + + return 0; +} + +int apei_exec_read_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val = 0; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + ctx->value = val; + + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_read_register); + +int apei_exec_read_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + + rc = apei_exec_read_register(ctx, entry); + if (rc) + return rc; + ctx->value = (ctx->value == entry->value); + + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_read_register_value); + +int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val) +{ + int rc; + + val &= entry->mask; + val <<= entry->register_region.bit_offset; + if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) { + u64 valr = 0; + rc = apei_read(&valr, &entry->register_region); + if (rc) + return rc; + valr &= ~(entry->mask << entry->register_region.bit_offset); + val |= valr; + } + rc = apei_write(val, &entry->register_region); + + return rc; +} + +int apei_exec_write_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_write_register(entry, ctx->value); +} +EXPORT_SYMBOL_GPL(apei_exec_write_register); + +int apei_exec_write_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->value = entry->value; + + return apei_exec_write_register(ctx, entry); +} +EXPORT_SYMBOL_GPL(apei_exec_write_register_value); + +int apei_exec_noop(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_noop); + +/* + * Interpret the specified action. Go through whole action table, + * execute all instructions belong to the action. + */ +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, + bool optional) +{ + int rc = -ENOENT; + u32 i, ip; + struct acpi_whea_header *entry; + apei_exec_ins_func_t run; + + ctx->ip = 0; + + /* + * "ip" is the instruction pointer of current instruction, + * "ctx->ip" specifies the next instruction to executed, + * instruction "run" function may change the "ctx->ip" to + * implement "goto" semantics. + */ +rewind: + ip = 0; + for (i = 0; i < ctx->entries; i++) { + entry = &ctx->action_table[i]; + if (entry->action != action) + continue; + if (ip == ctx->ip) { + if (entry->instruction >= ctx->instructions || + !ctx->ins_table[entry->instruction].run) { + pr_warn(FW_WARN APEI_PFX + "Invalid action table, unknown instruction type: %d\n", + entry->instruction); + return -EINVAL; + } + run = ctx->ins_table[entry->instruction].run; + rc = run(ctx, entry); + if (rc < 0) + return rc; + else if (rc != APEI_EXEC_SET_IP) + ctx->ip++; + } + ip++; + if (ctx->ip < ip) + goto rewind; + } + + return !optional && rc < 0 ? rc : 0; +} +EXPORT_SYMBOL_GPL(__apei_exec_run); + +typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data); + +static int apei_exec_for_each_entry(struct apei_exec_context *ctx, + apei_exec_entry_func_t func, + void *data, + int *end) +{ + u8 ins; + int i, rc; + struct acpi_whea_header *entry; + struct apei_exec_ins_type *ins_table = ctx->ins_table; + + for (i = 0; i < ctx->entries; i++) { + entry = ctx->action_table + i; + ins = entry->instruction; + if (end) + *end = i; + if (ins >= ctx->instructions || !ins_table[ins].run) { + pr_warn(FW_WARN APEI_PFX + "Invalid action table, unknown instruction type: %d\n", + ins); + return -EINVAL; + } + rc = func(ctx, entry, data); + if (rc) + return rc; + } + + return 0; +} + +static int pre_map_gar_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + u8 ins = entry->instruction; + + if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) + return apei_map_generic_address(&entry->register_region); + + return 0; +} + +/* + * Pre-map all GARs in action table to make it possible to access them + * in NMI handler. + */ +int apei_exec_pre_map_gars(struct apei_exec_context *ctx) +{ + int rc, end; + + rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback, + NULL, &end); + if (rc) { + struct apei_exec_context ctx_unmap; + memcpy(&ctx_unmap, ctx, sizeof(*ctx)); + ctx_unmap.entries = end; + apei_exec_post_unmap_gars(&ctx_unmap); + } + + return rc; +} +EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars); + +static int post_unmap_gar_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + u8 ins = entry->instruction; + + if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) + apei_unmap_generic_address(&entry->register_region); + + return 0; +} + +/* Post-unmap all GAR in action table. */ +int apei_exec_post_unmap_gars(struct apei_exec_context *ctx) +{ + return apei_exec_for_each_entry(ctx, post_unmap_gar_callback, + NULL, NULL); +} +EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars); + +/* + * Resource management for GARs in APEI + */ +struct apei_res { + struct list_head list; + unsigned long start; + unsigned long end; +}; + +/* Collect all resources requested, to avoid conflict */ +static struct apei_resources apei_resources_all = { + .iomem = LIST_HEAD_INIT(apei_resources_all.iomem), + .ioport = LIST_HEAD_INIT(apei_resources_all.ioport), +}; + +static int apei_res_add(struct list_head *res_list, + unsigned long start, unsigned long size) +{ + struct apei_res *res, *resn, *res_ins = NULL; + unsigned long end = start + size; + + if (end <= start) + return 0; +repeat: + list_for_each_entry_safe(res, resn, res_list, list) { + if (res->start > end || res->end < start) + continue; + else if (end <= res->end && start >= res->start) { + kfree(res_ins); + return 0; + } + list_del(&res->list); + res->start = start = min(res->start, start); + res->end = end = max(res->end, end); + kfree(res_ins); + res_ins = res; + goto repeat; + } + + if (res_ins) + list_add(&res_ins->list, res_list); + else { + res_ins = kmalloc(sizeof(*res_ins), GFP_KERNEL); + if (!res_ins) + return -ENOMEM; + res_ins->start = start; + res_ins->end = end; + list_add(&res_ins->list, res_list); + } + + return 0; +} + +static int apei_res_sub(struct list_head *res_list1, + struct list_head *res_list2) +{ + struct apei_res *res1, *resn1, *res2, *res; + res1 = list_entry(res_list1->next, struct apei_res, list); + resn1 = list_entry(res1->list.next, struct apei_res, list); + while (&res1->list != res_list1) { + list_for_each_entry(res2, res_list2, list) { + if (res1->start >= res2->end || + res1->end <= res2->start) + continue; + else if (res1->end <= res2->end && + res1->start >= res2->start) { + list_del(&res1->list); + kfree(res1); + break; + } else if (res1->end > res2->end && + res1->start < res2->start) { + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + res->start = res2->end; + res->end = res1->end; + res1->end = res2->start; + list_add(&res->list, &res1->list); + resn1 = res; + } else { + if (res1->start < res2->start) + res1->end = res2->start; + else + res1->start = res2->end; + } + } + res1 = resn1; + resn1 = list_entry(resn1->list.next, struct apei_res, list); + } + + return 0; +} + +static void apei_res_clean(struct list_head *res_list) +{ + struct apei_res *res, *resn; + + list_for_each_entry_safe(res, resn, res_list, list) { + list_del(&res->list); + kfree(res); + } +} + +void apei_resources_fini(struct apei_resources *resources) +{ + apei_res_clean(&resources->iomem); + apei_res_clean(&resources->ioport); +} +EXPORT_SYMBOL_GPL(apei_resources_fini); + +static int apei_resources_merge(struct apei_resources *resources1, + struct apei_resources *resources2) +{ + int rc; + struct apei_res *res; + + list_for_each_entry(res, &resources2->iomem, list) { + rc = apei_res_add(&resources1->iomem, res->start, + res->end - res->start); + if (rc) + return rc; + } + list_for_each_entry(res, &resources2->ioport, list) { + rc = apei_res_add(&resources1->ioport, res->start, + res->end - res->start); + if (rc) + return rc; + } + + return 0; +} + +int apei_resources_add(struct apei_resources *resources, + unsigned long start, unsigned long size, + bool iomem) +{ + if (iomem) + return apei_res_add(&resources->iomem, start, size); + else + return apei_res_add(&resources->ioport, start, size); +} +EXPORT_SYMBOL_GPL(apei_resources_add); + +/* + * EINJ has two groups of GARs (EINJ table entry and trigger table + * entry), so common resources are subtracted from the trigger table + * resources before the second requesting. + */ +int apei_resources_sub(struct apei_resources *resources1, + struct apei_resources *resources2) +{ + int rc; + + rc = apei_res_sub(&resources1->iomem, &resources2->iomem); + if (rc) + return rc; + return apei_res_sub(&resources1->ioport, &resources2->ioport); +} +EXPORT_SYMBOL_GPL(apei_resources_sub); + +static int apei_get_res_callback(__u64 start, __u64 size, void *data) +{ + struct apei_resources *resources = data; + return apei_res_add(&resources->iomem, start, size); +} + +static int apei_get_nvs_resources(struct apei_resources *resources) +{ + return acpi_nvs_for_each_region(apei_get_res_callback, resources); +} + +int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); +static int apei_get_arch_resources(struct apei_resources *resources) + +{ + return arch_apei_filter_addr(apei_get_res_callback, resources); +} + +/* + * IO memory/port resource management mechanism is used to check + * whether memory/port area used by GARs conflicts with normal memory + * or IO memory/port of devices. + */ +int apei_resources_request(struct apei_resources *resources, + const char *desc) +{ + struct apei_res *res, *res_bak = NULL; + struct resource *r; + struct apei_resources nvs_resources, arch_res; + int rc; + + rc = apei_resources_sub(resources, &apei_resources_all); + if (rc) + return rc; + + /* + * Some firmware uses ACPI NVS region, that has been marked as + * busy, so exclude it from APEI resources to avoid false + * conflict. + */ + apei_resources_init(&nvs_resources); + rc = apei_get_nvs_resources(&nvs_resources); + if (rc) + goto nvs_res_fini; + rc = apei_resources_sub(resources, &nvs_resources); + if (rc) + goto nvs_res_fini; + + if (arch_apei_filter_addr) { + apei_resources_init(&arch_res); + rc = apei_get_arch_resources(&arch_res); + if (rc) + goto arch_res_fini; + rc = apei_resources_sub(resources, &arch_res); + if (rc) + goto arch_res_fini; + } + + rc = -EINVAL; + list_for_each_entry(res, &resources->iomem, list) { + r = request_mem_region(res->start, res->end - res->start, + desc); + if (!r) { + pr_err(APEI_PFX + "Can not request [mem %#010llx-%#010llx] for %s registers\n", + (unsigned long long)res->start, + (unsigned long long)res->end - 1, desc); + res_bak = res; + goto err_unmap_iomem; + } + } + + list_for_each_entry(res, &resources->ioport, list) { + r = request_region(res->start, res->end - res->start, desc); + if (!r) { + pr_err(APEI_PFX + "Can not request [io %#06llx-%#06llx] for %s registers\n", + (unsigned long long)res->start, + (unsigned long long)res->end - 1, desc); + res_bak = res; + goto err_unmap_ioport; + } + } + + rc = apei_resources_merge(&apei_resources_all, resources); + if (rc) { + pr_err(APEI_PFX "Fail to merge resources!\n"); + goto err_unmap_ioport; + } + + goto arch_res_fini; + +err_unmap_ioport: + list_for_each_entry(res, &resources->ioport, list) { + if (res == res_bak) + break; + release_region(res->start, res->end - res->start); + } + res_bak = NULL; +err_unmap_iomem: + list_for_each_entry(res, &resources->iomem, list) { + if (res == res_bak) + break; + release_mem_region(res->start, res->end - res->start); + } +arch_res_fini: + if (arch_apei_filter_addr) + apei_resources_fini(&arch_res); +nvs_res_fini: + apei_resources_fini(&nvs_resources); + return rc; +} +EXPORT_SYMBOL_GPL(apei_resources_request); + +void apei_resources_release(struct apei_resources *resources) +{ + int rc; + struct apei_res *res; + + list_for_each_entry(res, &resources->iomem, list) + release_mem_region(res->start, res->end - res->start); + list_for_each_entry(res, &resources->ioport, list) + release_region(res->start, res->end - res->start); + + rc = apei_resources_sub(&apei_resources_all, resources); + if (rc) + pr_err(APEI_PFX "Fail to sub resources!\n"); +} +EXPORT_SYMBOL_GPL(apei_resources_release); + +static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr, + u32 *access_bit_width) +{ + u32 bit_width, bit_offset, access_size_code, space_id; + + bit_width = reg->bit_width; + bit_offset = reg->bit_offset; + access_size_code = reg->access_width; + space_id = reg->space_id; + *paddr = get_unaligned(®->address); + if (!*paddr) { + pr_warn(FW_BUG APEI_PFX + "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n", + *paddr, bit_width, bit_offset, access_size_code, + space_id); + return -EINVAL; + } + + if (access_size_code < 1 || access_size_code > 4) { + pr_warn(FW_BUG APEI_PFX + "Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n", + *paddr, bit_width, bit_offset, access_size_code, + space_id); + return -EINVAL; + } + *access_bit_width = 1UL << (access_size_code + 2); + + /* Fixup common BIOS bug */ + if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 && + *access_bit_width < 32) + *access_bit_width = 32; + else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 && + *access_bit_width < 64) + *access_bit_width = 64; + + if ((bit_width + bit_offset) > *access_bit_width) { + pr_warn(FW_BUG APEI_PFX + "Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n", + *paddr, bit_width, bit_offset, access_size_code, + space_id); + return -EINVAL; + } + + if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY && + space_id != ACPI_ADR_SPACE_SYSTEM_IO) { + pr_warn(FW_BUG APEI_PFX + "Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n", + *paddr, bit_width, bit_offset, access_size_code, + space_id); + return -EINVAL; + } + + return 0; +} + +int apei_map_generic_address(struct acpi_generic_address *reg) +{ + int rc; + u32 access_bit_width; + u64 address; + + rc = apei_check_gar(reg, &address, &access_bit_width); + if (rc) + return rc; + + /* IO space doesn't need mapping */ + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + if (!acpi_os_map_generic_address(reg)) + return -ENXIO; + + return 0; +} +EXPORT_SYMBOL_GPL(apei_map_generic_address); + +/* read GAR in interrupt (including NMI) or process context */ +int apei_read(u64 *val, struct acpi_generic_address *reg) +{ + int rc; + u32 access_bit_width; + u64 address; + acpi_status status; + + rc = apei_check_gar(reg, &address, &access_bit_width); + if (rc) + return rc; + + *val = 0; + switch(reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_os_read_memory((acpi_physical_address) address, + val, access_bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_os_read_port(address, (u32 *)val, + access_bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_read); + +/* write GAR in interrupt (including NMI) or process context */ +int apei_write(u64 val, struct acpi_generic_address *reg) +{ + int rc; + u32 access_bit_width; + u64 address; + acpi_status status; + + rc = apei_check_gar(reg, &address, &access_bit_width); + if (rc) + return rc; + + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_os_write_memory((acpi_physical_address) address, + val, access_bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_os_write_port(address, val, access_bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_write); + +static int collect_res_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + struct apei_resources *resources = data; + struct acpi_generic_address *reg = &entry->register_region; + u8 ins = entry->instruction; + u32 access_bit_width; + u64 paddr; + int rc; + + if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)) + return 0; + + rc = apei_check_gar(reg, &paddr, &access_bit_width); + if (rc) + return rc; + + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return apei_res_add(&resources->iomem, paddr, + access_bit_width / 8); + case ACPI_ADR_SPACE_SYSTEM_IO: + return apei_res_add(&resources->ioport, paddr, + access_bit_width / 8); + default: + return -EINVAL; + } +} + +/* + * Same register may be used by multiple instructions in GARs, so + * resources are collected before requesting. + */ +int apei_exec_collect_resources(struct apei_exec_context *ctx, + struct apei_resources *resources) +{ + return apei_exec_for_each_entry(ctx, collect_res_callback, + resources, NULL); +} +EXPORT_SYMBOL_GPL(apei_exec_collect_resources); + +struct dentry *apei_get_debugfs_dir(void) +{ + static struct dentry *dapei; + + if (!dapei) + dapei = debugfs_create_dir("apei", NULL); + + return dapei; +} +EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); + +int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, + void *data) +{ + return 1; +} +EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff); + +void __weak arch_apei_report_mem_error(int sev, + struct cper_sec_mem_err *mem_err) +{ +} +EXPORT_SYMBOL_GPL(arch_apei_report_mem_error); + +int apei_osc_setup(void) +{ + static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; + acpi_handle handle; + u32 capbuf[3]; + struct acpi_osc_context context = { + .uuid_str = whea_uuid_str, + .rev = 1, + .cap.length = sizeof(capbuf), + .cap.pointer = capbuf, + }; + + capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_DWORD] = 1; + capbuf[OSC_CONTROL_DWORD] = 0; + + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) + || ACPI_FAILURE(acpi_run_osc(handle, &context))) + return -EIO; + else { + kfree(context.ret.pointer); + return 0; + } +} +EXPORT_SYMBOL_GPL(apei_osc_setup); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h new file mode 100644 index 0000000000..67c2c3b959 --- /dev/null +++ b/drivers/acpi/apei/apei-internal.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * apei-internal.h - ACPI Platform Error Interface internal + * definitions. + */ + +#ifndef APEI_INTERNAL_H +#define APEI_INTERNAL_H + +#include <linux/acpi.h> + +struct apei_exec_context; + +typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); + +#define APEI_EXEC_INS_ACCESS_REGISTER 0x0001 + +struct apei_exec_ins_type { + u32 flags; + apei_exec_ins_func_t run; +}; + +struct apei_exec_context { + u32 ip; + u64 value; + u64 var1; + u64 var2; + u64 src_base; + u64 dst_base; + struct apei_exec_ins_type *ins_table; + u32 instructions; + struct acpi_whea_header *action_table; + u32 entries; +}; + +void apei_exec_ctx_init(struct apei_exec_context *ctx, + struct apei_exec_ins_type *ins_table, + u32 instructions, + struct acpi_whea_header *action_table, + u32 entries); + +static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx, + u64 input) +{ + ctx->value = input; +} + +static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) +{ + return ctx->value; +} + +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); + +static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 0); +} + +/* It is optional whether the firmware provides the action */ +static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 1); +} + +/* Common instruction implementation */ + +/* IP has been set in instruction function */ +#define APEI_EXEC_SET_IP 1 + +int apei_map_generic_address(struct acpi_generic_address *reg); + +static inline void apei_unmap_generic_address(struct acpi_generic_address *reg) +{ + acpi_os_unmap_generic_address(reg); +} + +int apei_read(u64 *val, struct acpi_generic_address *reg); +int apei_write(u64 val, struct acpi_generic_address *reg); + +int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val); +int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val); +int apei_exec_read_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_read_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_write_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_write_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_noop(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_pre_map_gars(struct apei_exec_context *ctx); +int apei_exec_post_unmap_gars(struct apei_exec_context *ctx); + +struct apei_resources { + struct list_head iomem; + struct list_head ioport; +}; + +static inline void apei_resources_init(struct apei_resources *resources) +{ + INIT_LIST_HEAD(&resources->iomem); + INIT_LIST_HEAD(&resources->ioport); +} + +void apei_resources_fini(struct apei_resources *resources); +int apei_resources_add(struct apei_resources *resources, + unsigned long start, unsigned long size, + bool iomem); +int apei_resources_sub(struct apei_resources *resources1, + struct apei_resources *resources2); +int apei_resources_request(struct apei_resources *resources, + const char *desc); +void apei_resources_release(struct apei_resources *resources); +int apei_exec_collect_resources(struct apei_exec_context *ctx, + struct apei_resources *resources); + +struct dentry; +struct dentry *apei_get_debugfs_dir(void); + +static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus) +{ + if (estatus->raw_data_length) + return estatus->raw_data_offset + \ + estatus->raw_data_length; + else + return sizeof(*estatus) + estatus->data_length; +} + +int apei_osc_setup(void); +#endif diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c new file mode 100644 index 0000000000..5427e49e64 --- /dev/null +++ b/drivers/acpi/apei/bert.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Boot Error Record Table (BERT) support + * + * Copyright 2011 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * Under normal circumstances, when a hardware error occurs, the error + * handler receives control and processes the error. This gives OSPM a + * chance to process the error condition, report it, and optionally attempt + * recovery. In some cases, the system is unable to process an error. + * For example, system firmware or a management controller may choose to + * reset the system or the system might experience an uncontrolled crash + * or reset.The boot error source is used to report unhandled errors that + * occurred in a previous boot. This mechanism is described in the BERT + * table. + * + * For more information about BERT, please refer to ACPI Specification + * version 4.0, section 17.3.1 + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/cper.h> +#include <linux/io.h> + +#include "apei-internal.h" + +#undef pr_fmt +#define pr_fmt(fmt) "BERT: " fmt + +#define ACPI_BERT_PRINT_MAX_RECORDS 5 +#define ACPI_BERT_PRINT_MAX_LEN 1024 + +static int bert_disable __initdata; + +/* + * Print "all" the error records in the BERT table, but avoid huge spam to + * the console if the BIOS included oversize records, or too many records. + * Skipping some records here does not lose anything because the full + * data is available to user tools in: + * /sys/firmware/acpi/tables/data/BERT + */ +static void __init bert_print_all(struct acpi_bert_region *region, + unsigned int region_len) +{ + struct acpi_hest_generic_status *estatus = + (struct acpi_hest_generic_status *)region; + int remain = region_len; + int printed = 0, skipped = 0; + u32 estatus_len; + + while (remain >= sizeof(struct acpi_bert_region)) { + estatus_len = cper_estatus_len(estatus); + if (remain < estatus_len) { + pr_err(FW_BUG "Truncated status block (length: %u).\n", + estatus_len); + break; + } + + /* No more error records. */ + if (!estatus->block_status) + break; + + if (cper_estatus_check(estatus)) { + pr_err(FW_BUG "Invalid error record.\n"); + break; + } + + if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && + printed < ACPI_BERT_PRINT_MAX_RECORDS) { + pr_info_once("Error records from previous boot:\n"); + cper_estatus_print(KERN_INFO HW_ERR, estatus); + printed++; + } else { + skipped++; + } + + /* + * Because the boot error source is "one-time polled" type, + * clear Block Status of current Generic Error Status Block, + * once it's printed. + */ + estatus->block_status = 0; + + estatus = (void *)estatus + estatus_len; + remain -= estatus_len; + } + + if (skipped) + pr_info(HW_ERR "Skipped %d error records\n", skipped); + + if (printed + skipped) + pr_info("Total records found: %d\n", printed + skipped); +} + +static int __init setup_bert_disable(char *str) +{ + bert_disable = 1; + + return 1; +} +__setup("bert_disable", setup_bert_disable); + +static int __init bert_check_table(struct acpi_table_bert *bert_tab) +{ + if (bert_tab->header.length < sizeof(struct acpi_table_bert) || + bert_tab->region_length < sizeof(struct acpi_bert_region)) + return -EINVAL; + + return 0; +} + +static int __init bert_init(void) +{ + struct apei_resources bert_resources; + struct acpi_bert_region *boot_error_region; + struct acpi_table_bert *bert_tab; + unsigned int region_len; + acpi_status status; + int rc = 0; + + if (acpi_disabled) + return 0; + + if (bert_disable) { + pr_info("Boot Error Record Table support is disabled.\n"); + return 0; + } + + status = acpi_get_table(ACPI_SIG_BERT, 0, (struct acpi_table_header **)&bert_tab); + if (status == AE_NOT_FOUND) + return 0; + + if (ACPI_FAILURE(status)) { + pr_err("get table failed, %s.\n", acpi_format_exception(status)); + return -EINVAL; + } + + rc = bert_check_table(bert_tab); + if (rc) { + pr_err(FW_BUG "table invalid.\n"); + goto out_put_bert_tab; + } + + region_len = bert_tab->region_length; + apei_resources_init(&bert_resources); + rc = apei_resources_add(&bert_resources, bert_tab->address, + region_len, true); + if (rc) + goto out_put_bert_tab; + rc = apei_resources_request(&bert_resources, "APEI BERT"); + if (rc) + goto out_fini; + boot_error_region = ioremap_cache(bert_tab->address, region_len); + if (boot_error_region) { + bert_print_all(boot_error_region, region_len); + iounmap(boot_error_region); + } else { + rc = -ENOMEM; + } + + apei_resources_release(&bert_resources); +out_fini: + apei_resources_fini(&bert_resources); +out_put_bert_tab: + acpi_put_table((struct acpi_table_header *)bert_tab); + + return rc; +} + +late_initcall(bert_init); diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c new file mode 100644 index 0000000000..013eb621dc --- /dev/null +++ b/drivers/acpi/apei/einj.c @@ -0,0 +1,809 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Error INJection support + * + * EINJ provides a hardware error injection mechanism, this is useful + * for debugging and testing of other APEI and RAS features. + * + * For more information about EINJ, please refer to ACPI Specification + * version 4.0, section 17.5. + * + * Copyright 2009-2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <asm/unaligned.h> + +#include "apei-internal.h" + +#undef pr_fmt +#define pr_fmt(fmt) "EINJ: " fmt + +#define SLEEP_UNIT_MIN 1000 /* 1ms */ +#define SLEEP_UNIT_MAX 5000 /* 5ms */ +/* Firmware should respond within 1 seconds */ +#define FIRMWARE_TIMEOUT (1 * USEC_PER_SEC) +#define ACPI5_VENDOR_BIT BIT(31) +#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ + ACPI_EINJ_MEMORY_UNCORRECTABLE | \ + ACPI_EINJ_MEMORY_FATAL) + +/* + * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. + */ +static int acpi5; + +struct set_error_type_with_address { + u32 type; + u32 vendor_extension; + u32 flags; + u32 apicid; + u64 memory_address; + u64 memory_address_range; + u32 pcie_sbdf; +}; +enum { + SETWA_FLAGS_APICID = 1, + SETWA_FLAGS_MEM = 2, + SETWA_FLAGS_PCIE_SBDF = 4, +}; + +/* + * Vendor extensions for platform specific operations + */ +struct vendor_error_type_extension { + u32 length; + u32 pcie_sbdf; + u16 vendor_id; + u16 device_id; + u8 rev_id; + u8 reserved[3]; +}; + +static u32 notrigger; + +static u32 vendor_flags; +static struct debugfs_blob_wrapper vendor_blob; +static char vendor_dev[64]; + +/* + * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the + * EINJ table through an unpublished extension. Use with caution as + * most will ignore the parameter and make their own choice of address + * for error injection. This extension is used only if + * param_extension module parameter is specified. + */ +struct einj_parameter { + u64 type; + u64 reserved1; + u64 reserved2; + u64 param1; + u64 param2; +}; + +#define EINJ_OP_BUSY 0x1 +#define EINJ_STATUS_SUCCESS 0x0 +#define EINJ_STATUS_FAIL 0x1 +#define EINJ_STATUS_INVAL 0x2 + +#define EINJ_TAB_ENTRY(tab) \ + ((struct acpi_whea_header *)((char *)(tab) + \ + sizeof(struct acpi_table_einj))) + +static bool param_extension; +module_param(param_extension, bool, 0); + +static struct acpi_table_einj *einj_tab; + +static struct apei_resources einj_resources; + +static struct apei_exec_ins_type einj_ins_type[] = { + [ACPI_EINJ_READ_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register, + }, + [ACPI_EINJ_READ_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register_value, + }, + [ACPI_EINJ_WRITE_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register, + }, + [ACPI_EINJ_WRITE_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register_value, + }, + [ACPI_EINJ_NOOP] = { + .flags = 0, + .run = apei_exec_noop, + }, +}; + +/* + * Prevent EINJ interpreter to run simultaneously, because the + * corresponding firmware implementation may not work properly when + * invoked simultaneously. + */ +static DEFINE_MUTEX(einj_mutex); + +static void *einj_param; + +static void einj_exec_ctx_init(struct apei_exec_context *ctx) +{ + apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type), + EINJ_TAB_ENTRY(einj_tab), einj_tab->entries); +} + +static int __einj_get_available_error_type(u32 *type) +{ + struct apei_exec_context ctx; + int rc; + + einj_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE); + if (rc) + return rc; + *type = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +/* Get error injection capabilities of the platform */ +static int einj_get_available_error_type(u32 *type) +{ + int rc; + + mutex_lock(&einj_mutex); + rc = __einj_get_available_error_type(type); + mutex_unlock(&einj_mutex); + + return rc; +} + +static int einj_timedout(u64 *t) +{ + if ((s64)*t < SLEEP_UNIT_MIN) { + pr_warn(FW_WARN "Firmware does not respond in time\n"); + return 1; + } + *t -= SLEEP_UNIT_MIN; + usleep_range(SLEEP_UNIT_MIN, SLEEP_UNIT_MAX); + + return 0; +} + +static void check_vendor_extension(u64 paddr, + struct set_error_type_with_address *v5param) +{ + int offset = v5param->vendor_extension; + struct vendor_error_type_extension *v; + u32 sbdf; + + if (!offset) + return; + v = acpi_os_map_iomem(paddr + offset, sizeof(*v)); + if (!v) + return; + sbdf = v->pcie_sbdf; + sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n", + sbdf >> 24, (sbdf >> 16) & 0xff, + (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7, + v->vendor_id, v->device_id, v->rev_id); + acpi_os_unmap_iomem(v, sizeof(*v)); +} + +static void *einj_get_parameter_address(void) +{ + int i; + u64 pa_v4 = 0, pa_v5 = 0; + struct acpi_whea_header *entry; + + entry = EINJ_TAB_ENTRY(einj_tab); + for (i = 0; i < einj_tab->entries; i++) { + if (entry->action == ACPI_EINJ_SET_ERROR_TYPE && + entry->instruction == ACPI_EINJ_WRITE_REGISTER && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY) + pa_v4 = get_unaligned(&entry->register_region.address); + if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS && + entry->instruction == ACPI_EINJ_WRITE_REGISTER && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY) + pa_v5 = get_unaligned(&entry->register_region.address); + entry++; + } + if (pa_v5) { + struct set_error_type_with_address *v5param; + + v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param)); + if (v5param) { + acpi5 = 1; + check_vendor_extension(pa_v5, v5param); + return v5param; + } + } + if (param_extension && pa_v4) { + struct einj_parameter *v4param; + + v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param)); + if (!v4param) + return NULL; + if (v4param->reserved1 || v4param->reserved2) { + acpi_os_unmap_iomem(v4param, sizeof(*v4param)); + return NULL; + } + return v4param; + } + + return NULL; +} + +/* do sanity check to trigger table */ +static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab) +{ + if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger)) + return -EINVAL; + if (trigger_tab->table_size > PAGE_SIZE || + trigger_tab->table_size < trigger_tab->header_size) + return -EINVAL; + if (trigger_tab->entry_count != + (trigger_tab->table_size - trigger_tab->header_size) / + sizeof(struct acpi_einj_entry)) + return -EINVAL; + + return 0; +} + +static struct acpi_generic_address *einj_get_trigger_parameter_region( + struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2) +{ + int i; + struct acpi_whea_header *entry; + + entry = (struct acpi_whea_header *) + ((char *)trigger_tab + sizeof(struct acpi_einj_trigger)); + for (i = 0; i < trigger_tab->entry_count; i++) { + if (entry->action == ACPI_EINJ_TRIGGER_ERROR && + entry->instruction <= ACPI_EINJ_WRITE_REGISTER_VALUE && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY && + (entry->register_region.address & param2) == (param1 & param2)) + return &entry->register_region; + entry++; + } + + return NULL; +} +/* Execute instructions in trigger error action table */ +static int __einj_error_trigger(u64 trigger_paddr, u32 type, + u64 param1, u64 param2) +{ + struct acpi_einj_trigger *trigger_tab = NULL; + struct apei_exec_context trigger_ctx; + struct apei_resources trigger_resources; + struct acpi_whea_header *trigger_entry; + struct resource *r; + u32 table_size; + int rc = -EIO; + struct acpi_generic_address *trigger_param_region = NULL; + + r = request_mem_region(trigger_paddr, sizeof(*trigger_tab), + "APEI EINJ Trigger Table"); + if (!r) { + pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n", + (unsigned long long)trigger_paddr, + (unsigned long long)trigger_paddr + + sizeof(*trigger_tab) - 1); + goto out; + } + trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab)); + if (!trigger_tab) { + pr_err("Failed to map trigger table!\n"); + goto out_rel_header; + } + rc = einj_check_trigger_header(trigger_tab); + if (rc) { + pr_warn(FW_BUG "Invalid trigger error action table.\n"); + goto out_rel_header; + } + + /* No action structures in the TRIGGER_ERROR table, nothing to do */ + if (!trigger_tab->entry_count) + goto out_rel_header; + + rc = -EIO; + table_size = trigger_tab->table_size; + r = request_mem_region(trigger_paddr + sizeof(*trigger_tab), + table_size - sizeof(*trigger_tab), + "APEI EINJ Trigger Table"); + if (!r) { + pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n", + (unsigned long long)trigger_paddr + sizeof(*trigger_tab), + (unsigned long long)trigger_paddr + table_size - 1); + goto out_rel_header; + } + iounmap(trigger_tab); + trigger_tab = ioremap_cache(trigger_paddr, table_size); + if (!trigger_tab) { + pr_err("Failed to map trigger table!\n"); + goto out_rel_entry; + } + trigger_entry = (struct acpi_whea_header *) + ((char *)trigger_tab + sizeof(struct acpi_einj_trigger)); + apei_resources_init(&trigger_resources); + apei_exec_ctx_init(&trigger_ctx, einj_ins_type, + ARRAY_SIZE(einj_ins_type), + trigger_entry, trigger_tab->entry_count); + rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources); + if (rc) + goto out_fini; + rc = apei_resources_sub(&trigger_resources, &einj_resources); + if (rc) + goto out_fini; + /* + * Some firmware will access target address specified in + * param1 to trigger the error when injecting memory error. + * This will cause resource conflict with regular memory. So + * remove it from trigger table resources. + */ + if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) { + struct apei_resources addr_resources; + + apei_resources_init(&addr_resources); + trigger_param_region = einj_get_trigger_parameter_region( + trigger_tab, param1, param2); + if (trigger_param_region) { + rc = apei_resources_add(&addr_resources, + trigger_param_region->address, + trigger_param_region->bit_width/8, true); + if (rc) + goto out_fini; + rc = apei_resources_sub(&trigger_resources, + &addr_resources); + } + apei_resources_fini(&addr_resources); + if (rc) + goto out_fini; + } + rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger"); + if (rc) + goto out_fini; + rc = apei_exec_pre_map_gars(&trigger_ctx); + if (rc) + goto out_release; + + rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR); + + apei_exec_post_unmap_gars(&trigger_ctx); +out_release: + apei_resources_release(&trigger_resources); +out_fini: + apei_resources_fini(&trigger_resources); +out_rel_entry: + release_mem_region(trigger_paddr + sizeof(*trigger_tab), + table_size - sizeof(*trigger_tab)); +out_rel_header: + release_mem_region(trigger_paddr, sizeof(*trigger_tab)); +out: + if (trigger_tab) + iounmap(trigger_tab); + + return rc; +} + +static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) +{ + struct apei_exec_context ctx; + u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT; + int rc; + + einj_exec_ctx_init(&ctx); + + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, type); + if (acpi5) { + struct set_error_type_with_address *v5param = einj_param; + + v5param->type = type; + if (type & ACPI5_VENDOR_BIT) { + switch (vendor_flags) { + case SETWA_FLAGS_APICID: + v5param->apicid = param1; + break; + case SETWA_FLAGS_MEM: + v5param->memory_address = param1; + v5param->memory_address_range = param2; + break; + case SETWA_FLAGS_PCIE_SBDF: + v5param->pcie_sbdf = param1; + break; + } + v5param->flags = vendor_flags; + } else if (flags) { + v5param->flags = flags; + v5param->memory_address = param1; + v5param->memory_address_range = param2; + v5param->apicid = param3; + v5param->pcie_sbdf = param4; + } else { + switch (type) { + case ACPI_EINJ_PROCESSOR_CORRECTABLE: + case ACPI_EINJ_PROCESSOR_UNCORRECTABLE: + case ACPI_EINJ_PROCESSOR_FATAL: + v5param->apicid = param1; + v5param->flags = SETWA_FLAGS_APICID; + break; + case ACPI_EINJ_MEMORY_CORRECTABLE: + case ACPI_EINJ_MEMORY_UNCORRECTABLE: + case ACPI_EINJ_MEMORY_FATAL: + v5param->memory_address = param1; + v5param->memory_address_range = param2; + v5param->flags = SETWA_FLAGS_MEM; + break; + case ACPI_EINJ_PCIX_CORRECTABLE: + case ACPI_EINJ_PCIX_UNCORRECTABLE: + case ACPI_EINJ_PCIX_FATAL: + v5param->pcie_sbdf = param1; + v5param->flags = SETWA_FLAGS_PCIE_SBDF; + break; + } + } + } else { + rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); + if (rc) + return rc; + if (einj_param) { + struct einj_parameter *v4param = einj_param; + + v4param->param1 = param1; + v4param->param2 = param2; + } + } + rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!(val & EINJ_OP_BUSY)) + break; + if (einj_timedout(&timeout)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (val == EINJ_STATUS_FAIL) + return -EBUSY; + else if (val == EINJ_STATUS_INVAL) + return -EINVAL; + + /* + * The error is injected into the platform successfully, then it needs + * to trigger the error. + */ + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE); + if (rc) + return rc; + trigger_paddr = apei_exec_ctx_get_output(&ctx); + if (notrigger == 0) { + rc = __einj_error_trigger(trigger_paddr, type, param1, param2); + if (rc) + return rc; + } + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); + + return rc; +} + +/* Inject the specified hardware error */ +static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) +{ + int rc; + u64 base_addr, size; + + /* If user manually set "flags", make sure it is legal */ + if (flags && (flags & + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) + return -EINVAL; + + /* + * We need extra sanity checks for memory errors. + * Other types leap directly to injection. + */ + + /* ensure param1/param2 existed */ + if (!(param_extension || acpi5)) + goto inject; + + /* ensure injection is memory related */ + if (type & ACPI5_VENDOR_BIT) { + if (vendor_flags != SETWA_FLAGS_MEM) + goto inject; + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) + goto inject; + + /* + * Disallow crazy address masks that give BIOS leeway to pick + * injection address almost anywhere. Insist on page or + * better granularity and that target address is normal RAM or + * NVDIMM. + */ + base_addr = param1 & param2; + size = ~param2 + 1; + + if (((param2 & PAGE_MASK) != PAGE_MASK) || + ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) + != REGION_INTERSECTS) && + (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY) + != REGION_INTERSECTS) && + (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED) + != REGION_INTERSECTS) && + !arch_is_platform_page(base_addr))) + return -EINVAL; + + if (is_zero_pfn(base_addr >> PAGE_SHIFT)) + return -EADDRINUSE; + +inject: + mutex_lock(&einj_mutex); + rc = __einj_error_inject(type, flags, param1, param2, param3, param4); + mutex_unlock(&einj_mutex); + + return rc; +} + +static u32 error_type; +static u32 error_flags; +static u64 error_param1; +static u64 error_param2; +static u64 error_param3; +static u64 error_param4; +static struct dentry *einj_debug_dir; +static const char * const einj_error_type_string[] = { + "0x00000001\tProcessor Correctable\n", + "0x00000002\tProcessor Uncorrectable non-fatal\n", + "0x00000004\tProcessor Uncorrectable fatal\n", + "0x00000008\tMemory Correctable\n", + "0x00000010\tMemory Uncorrectable non-fatal\n", + "0x00000020\tMemory Uncorrectable fatal\n", + "0x00000040\tPCI Express Correctable\n", + "0x00000080\tPCI Express Uncorrectable non-fatal\n", + "0x00000100\tPCI Express Uncorrectable fatal\n", + "0x00000200\tPlatform Correctable\n", + "0x00000400\tPlatform Uncorrectable non-fatal\n", + "0x00000800\tPlatform Uncorrectable fatal\n", + "0x00001000\tCXL.cache Protocol Correctable\n", + "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", + "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", + "0x00008000\tCXL.mem Protocol Correctable\n", + "0x00010000\tCXL.mem Protocol Uncorrectable non-fatal\n", + "0x00020000\tCXL.mem Protocol Uncorrectable fatal\n", +}; + +static int available_error_type_show(struct seq_file *m, void *v) +{ + int rc; + u32 available_error_type = 0; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++) + if (available_error_type & BIT(pos)) + seq_puts(m, einj_error_type_string[pos]); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(available_error_type); + +static int error_type_get(void *data, u64 *val) +{ + *val = error_type; + + return 0; +} + +static int error_type_set(void *data, u64 val) +{ + int rc; + u32 available_error_type = 0; + u32 tval, vendor; + + /* Only low 32 bits for error type are valid */ + if (val & GENMASK_ULL(63, 32)) + return -EINVAL; + + /* + * Vendor defined types have 0x80000000 bit set, and + * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE + */ + vendor = val & ACPI5_VENDOR_BIT; + tval = val & 0x7fffffff; + + /* Only one error type can be specified */ + if (tval & (tval - 1)) + return -EINVAL; + if (!vendor) { + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + if (!(val & available_error_type)) + return -EINVAL; + } + error_type = val; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set, + "0x%llx\n"); + +static int error_inject_set(void *data, u64 val) +{ + if (!error_type) + return -EINVAL; + + return einj_error_inject(error_type, error_flags, error_param1, error_param2, + error_param3, error_param4); +} + +DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n"); + +static int einj_check_table(struct acpi_table_einj *einj_tab) +{ + if ((einj_tab->header_length != + (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header))) + && (einj_tab->header_length != sizeof(struct acpi_table_einj))) + return -EINVAL; + if (einj_tab->header.length < sizeof(struct acpi_table_einj)) + return -EINVAL; + if (einj_tab->entries != + (einj_tab->header.length - sizeof(struct acpi_table_einj)) / + sizeof(struct acpi_einj_entry)) + return -EINVAL; + + return 0; +} + +static int __init einj_init(void) +{ + int rc; + acpi_status status; + struct apei_exec_context ctx; + + if (acpi_disabled) { + pr_info("ACPI disabled.\n"); + return -ENODEV; + } + + status = acpi_get_table(ACPI_SIG_EINJ, 0, + (struct acpi_table_header **)&einj_tab); + if (status == AE_NOT_FOUND) { + pr_warn("EINJ table not found.\n"); + return -ENODEV; + } else if (ACPI_FAILURE(status)) { + pr_err("Failed to get EINJ table: %s\n", + acpi_format_exception(status)); + return -EINVAL; + } + + rc = einj_check_table(einj_tab); + if (rc) { + pr_warn(FW_BUG "Invalid EINJ table.\n"); + goto err_put_table; + } + + rc = -ENOMEM; + einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir()); + + debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir, + NULL, &available_error_type_fops); + debugfs_create_file_unsafe("error_type", 0600, einj_debug_dir, + NULL, &error_type_fops); + debugfs_create_file_unsafe("error_inject", 0200, einj_debug_dir, + NULL, &error_inject_fops); + + apei_resources_init(&einj_resources); + einj_exec_ctx_init(&ctx); + rc = apei_exec_collect_resources(&ctx, &einj_resources); + if (rc) { + pr_err("Error collecting EINJ resources.\n"); + goto err_fini; + } + + rc = apei_resources_request(&einj_resources, "APEI EINJ"); + if (rc) { + pr_err("Error requesting memory/port resources.\n"); + goto err_fini; + } + + rc = apei_exec_pre_map_gars(&ctx); + if (rc) { + pr_err("Error pre-mapping GARs.\n"); + goto err_release; + } + + einj_param = einj_get_parameter_address(); + if ((param_extension || acpi5) && einj_param) { + debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir, + &error_flags); + debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir, + &error_param1); + debugfs_create_x64("param2", S_IRUSR | S_IWUSR, einj_debug_dir, + &error_param2); + debugfs_create_x64("param3", S_IRUSR | S_IWUSR, einj_debug_dir, + &error_param3); + debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir, + &error_param4); + debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR, + einj_debug_dir, ¬rigger); + } + + if (vendor_dev[0]) { + vendor_blob.data = vendor_dev; + vendor_blob.size = strlen(vendor_dev); + debugfs_create_blob("vendor", S_IRUSR, einj_debug_dir, + &vendor_blob); + debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR, + einj_debug_dir, &vendor_flags); + } + + pr_info("Error INJection is initialized.\n"); + + return 0; + +err_release: + apei_resources_release(&einj_resources); +err_fini: + apei_resources_fini(&einj_resources); + debugfs_remove_recursive(einj_debug_dir); +err_put_table: + acpi_put_table((struct acpi_table_header *)einj_tab); + + return rc; +} + +static void __exit einj_exit(void) +{ + struct apei_exec_context ctx; + + if (einj_param) { + acpi_size size = (acpi5) ? + sizeof(struct set_error_type_with_address) : + sizeof(struct einj_parameter); + + acpi_os_unmap_iomem(einj_param, size); + } + einj_exec_ctx_init(&ctx); + apei_exec_post_unmap_gars(&ctx); + apei_resources_release(&einj_resources); + apei_resources_fini(&einj_resources); + debugfs_remove_recursive(einj_debug_dir); + acpi_put_table((struct acpi_table_header *)einj_tab); +} + +module_init(einj_init); +module_exit(einj_exit); + +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("APEI Error INJection support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c new file mode 100644 index 0000000000..8bc71cdc22 --- /dev/null +++ b/drivers/acpi/apei/erst-dbg.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Error Record Serialization Table debug support + * + * ERST is a way provided by APEI to save and retrieve hardware error + * information to and from a persistent store. This file provide the + * debugging/testing support for ERST kernel support and firmware + * implementation. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <acpi/apei.h> +#include <linux/miscdevice.h> + +#include "apei-internal.h" + +#define ERST_DBG_PFX "ERST DBG: " + +#define ERST_DBG_RECORD_LEN_MAX 0x4000 + +static void *erst_dbg_buf; +static unsigned int erst_dbg_buf_len; + +/* Prevent erst_dbg_read/write from being invoked concurrently */ +static DEFINE_MUTEX(erst_dbg_mutex); + +static int erst_dbg_open(struct inode *inode, struct file *file) +{ + int rc, *pos; + + if (erst_disable) + return -ENODEV; + + pos = (int *)&file->private_data; + + rc = erst_get_record_id_begin(pos); + if (rc) + return rc; + + return nonseekable_open(inode, file); +} + +static int erst_dbg_release(struct inode *inode, struct file *file) +{ + erst_get_record_id_end(); + + return 0; +} + +static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int rc; + u64 record_id; + u32 record_count; + + switch (cmd) { + case APEI_ERST_CLEAR_RECORD: + rc = copy_from_user(&record_id, (void __user *)arg, + sizeof(record_id)); + if (rc) + return -EFAULT; + return erst_clear(record_id); + case APEI_ERST_GET_RECORD_COUNT: + rc = erst_get_record_count(); + if (rc < 0) + return rc; + record_count = rc; + rc = put_user(record_count, (u32 __user *)arg); + if (rc) + return rc; + return 0; + default: + return -ENOTTY; + } +} + +static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf, + size_t usize, loff_t *off) +{ + int rc, *pos; + ssize_t len = 0; + u64 id; + + if (*off) + return -EINVAL; + + if (mutex_lock_interruptible(&erst_dbg_mutex) != 0) + return -EINTR; + + pos = (int *)&filp->private_data; + +retry_next: + rc = erst_get_record_id_next(pos, &id); + if (rc) + goto out; + /* no more record */ + if (id == APEI_ERST_INVALID_RECORD_ID) { + /* + * If the persistent store is empty initially, the function + * 'erst_read' below will return "-ENOENT" value. This causes + * 'retry_next' label is entered again. The returned value + * should be zero indicating the read operation is EOF. + */ + len = 0; + + goto out; + } +retry: + rc = len = erst_read_record(id, erst_dbg_buf, erst_dbg_buf_len, + erst_dbg_buf_len, NULL); + /* The record may be cleared by others, try read next record */ + if (rc == -ENOENT) + goto retry_next; + if (rc < 0) + goto out; + if (len > ERST_DBG_RECORD_LEN_MAX) { + pr_warn(ERST_DBG_PFX + "Record (ID: 0x%llx) length is too long: %zd\n", id, len); + rc = -EIO; + goto out; + } + if (len > erst_dbg_buf_len) { + void *p; + rc = -ENOMEM; + p = kmalloc(len, GFP_KERNEL); + if (!p) + goto out; + kfree(erst_dbg_buf); + erst_dbg_buf = p; + erst_dbg_buf_len = len; + goto retry; + } + + rc = -EINVAL; + if (len > usize) + goto out; + + rc = -EFAULT; + if (copy_to_user(ubuf, erst_dbg_buf, len)) + goto out; + rc = 0; +out: + mutex_unlock(&erst_dbg_mutex); + return rc ? rc : len; +} + +static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off) +{ + int rc; + struct cper_record_header *rcd; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (usize > ERST_DBG_RECORD_LEN_MAX) { + pr_err(ERST_DBG_PFX "Too long record to be written\n"); + return -EINVAL; + } + + if (mutex_lock_interruptible(&erst_dbg_mutex)) + return -EINTR; + if (usize > erst_dbg_buf_len) { + void *p; + rc = -ENOMEM; + p = kmalloc(usize, GFP_KERNEL); + if (!p) + goto out; + kfree(erst_dbg_buf); + erst_dbg_buf = p; + erst_dbg_buf_len = usize; + } + rc = copy_from_user(erst_dbg_buf, ubuf, usize); + if (rc) { + rc = -EFAULT; + goto out; + } + rcd = erst_dbg_buf; + rc = -EINVAL; + if (rcd->record_length != usize) + goto out; + + rc = erst_write(erst_dbg_buf); + +out: + mutex_unlock(&erst_dbg_mutex); + return rc < 0 ? rc : usize; +} + +static const struct file_operations erst_dbg_ops = { + .owner = THIS_MODULE, + .open = erst_dbg_open, + .release = erst_dbg_release, + .read = erst_dbg_read, + .write = erst_dbg_write, + .unlocked_ioctl = erst_dbg_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice erst_dbg_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "erst_dbg", + .fops = &erst_dbg_ops, +}; + +static __init int erst_dbg_init(void) +{ + if (erst_disable) { + pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); + return -ENODEV; + } + return misc_register(&erst_dbg_dev); +} + +static __exit void erst_dbg_exit(void) +{ + misc_deregister(&erst_dbg_dev); + kfree(erst_dbg_buf); +} + +module_init(erst_dbg_init); +module_exit(erst_dbg_exit); + +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c new file mode 100644 index 0000000000..247989060e --- /dev/null +++ b/drivers/acpi/apei/erst.c @@ -0,0 +1,1267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Error Record Serialization Table support + * + * ERST is a way provided by APEI to save and retrieve hardware error + * information to and from a persistent store. + * + * For more information about ERST, please refer to ACPI Specification + * version 4.0, section 17.4. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/acpi.h> +#include <linux/uaccess.h> +#include <linux/cper.h> +#include <linux/nmi.h> +#include <linux/hardirq.h> +#include <linux/pstore.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> /* kvfree() */ +#include <acpi/apei.h> + +#include "apei-internal.h" + +#undef pr_fmt +#define pr_fmt(fmt) "ERST: " fmt + +/* ERST command status */ +#define ERST_STATUS_SUCCESS 0x0 +#define ERST_STATUS_NOT_ENOUGH_SPACE 0x1 +#define ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x2 +#define ERST_STATUS_FAILED 0x3 +#define ERST_STATUS_RECORD_STORE_EMPTY 0x4 +#define ERST_STATUS_RECORD_NOT_FOUND 0x5 + +#define ERST_TAB_ENTRY(tab) \ + ((struct acpi_whea_header *)((char *)(tab) + \ + sizeof(struct acpi_table_erst))) + +#define SPIN_UNIT 100 /* 100ns */ +/* Firmware should respond within 1 milliseconds */ +#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) +#define FIRMWARE_MAX_STALL 50 /* 50us */ + +int erst_disable; +EXPORT_SYMBOL_GPL(erst_disable); + +static struct acpi_table_erst *erst_tab; + +/* ERST Error Log Address Range attributes */ +#define ERST_RANGE_RESERVED 0x0001 +#define ERST_RANGE_NVRAM 0x0002 +#define ERST_RANGE_SLOW 0x0004 + +/* + * ERST Error Log Address Range, used as buffer for reading/writing + * error records. + */ +static struct erst_erange { + u64 base; + u64 size; + void __iomem *vaddr; + u32 attr; +} erst_erange; + +/* + * Prevent ERST interpreter to run simultaneously, because the + * corresponding firmware implementation may not work properly when + * invoked simultaneously. + * + * It is used to provide exclusive accessing for ERST Error Log + * Address Range too. + */ +static DEFINE_RAW_SPINLOCK(erst_lock); + +static inline int erst_errno(int command_status) +{ + switch (command_status) { + case ERST_STATUS_SUCCESS: + return 0; + case ERST_STATUS_HARDWARE_NOT_AVAILABLE: + return -ENODEV; + case ERST_STATUS_NOT_ENOUGH_SPACE: + return -ENOSPC; + case ERST_STATUS_RECORD_STORE_EMPTY: + case ERST_STATUS_RECORD_NOT_FOUND: + return -ENOENT; + default: + return -EINVAL; + } +} + +static int erst_timedout(u64 *t, u64 spin_unit) +{ + if ((s64)*t < spin_unit) { + pr_warn(FW_WARN "Firmware does not respond in time.\n"); + return 1; + } + *t -= spin_unit; + ndelay(spin_unit); + touch_nmi_watchdog(); + return 0; +} + +static int erst_exec_load_var1(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->var1); +} + +static int erst_exec_load_var2(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->var2); +} + +static int erst_exec_store_var1(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_write_register(entry, ctx->var1); +} + +static int erst_exec_add(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->var1 += ctx->var2; + return 0; +} + +static int erst_exec_subtract(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->var1 -= ctx->var2; + return 0; +} + +static int erst_exec_add_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + val += ctx->value; + rc = __apei_exec_write_register(entry, val); + return rc; +} + +static int erst_exec_subtract_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + val -= ctx->value; + rc = __apei_exec_write_register(entry, val); + return rc; +} + +static int erst_exec_stall(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + u64 stall_time; + + if (ctx->value > FIRMWARE_MAX_STALL) { + if (!in_nmi()) + pr_warn(FW_WARN + "Too long stall time for stall instruction: 0x%llx.\n", + ctx->value); + stall_time = FIRMWARE_MAX_STALL; + } else + stall_time = ctx->value; + udelay(stall_time); + return 0; +} + +static int erst_exec_stall_while_true(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + u64 timeout = FIRMWARE_TIMEOUT; + u64 stall_time; + + if (ctx->var1 > FIRMWARE_MAX_STALL) { + if (!in_nmi()) + pr_warn(FW_WARN + "Too long stall time for stall while true instruction: 0x%llx.\n", + ctx->var1); + stall_time = FIRMWARE_MAX_STALL; + } else + stall_time = ctx->var1; + + for (;;) { + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + if (val != ctx->value) + break; + if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC)) + return -EIO; + } + return 0; +} + +static int erst_exec_skip_next_instruction_if_true( + struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + if (val == ctx->value) { + ctx->ip += 2; + return APEI_EXEC_SET_IP; + } + + return 0; +} + +static int erst_exec_goto(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->ip = ctx->value; + return APEI_EXEC_SET_IP; +} + +static int erst_exec_set_src_address_base(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->src_base); +} + +static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->dst_base); +} + +static int erst_exec_move_data(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 offset; + void *src, *dst; + + /* ioremap does not work in interrupt context */ + if (in_interrupt()) { + pr_warn("MOVE_DATA can not be used in interrupt context.\n"); + return -EBUSY; + } + + rc = __apei_exec_read_register(entry, &offset); + if (rc) + return rc; + + src = ioremap(ctx->src_base + offset, ctx->var2); + if (!src) + return -ENOMEM; + dst = ioremap(ctx->dst_base + offset, ctx->var2); + if (!dst) { + iounmap(src); + return -ENOMEM; + } + + memmove(dst, src, ctx->var2); + + iounmap(src); + iounmap(dst); + + return 0; +} + +static struct apei_exec_ins_type erst_ins_type[] = { + [ACPI_ERST_READ_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register, + }, + [ACPI_ERST_READ_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register_value, + }, + [ACPI_ERST_WRITE_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register, + }, + [ACPI_ERST_WRITE_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register_value, + }, + [ACPI_ERST_NOOP] = { + .flags = 0, + .run = apei_exec_noop, + }, + [ACPI_ERST_LOAD_VAR1] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_load_var1, + }, + [ACPI_ERST_LOAD_VAR2] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_load_var2, + }, + [ACPI_ERST_STORE_VAR1] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_store_var1, + }, + [ACPI_ERST_ADD] = { + .flags = 0, + .run = erst_exec_add, + }, + [ACPI_ERST_SUBTRACT] = { + .flags = 0, + .run = erst_exec_subtract, + }, + [ACPI_ERST_ADD_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_add_value, + }, + [ACPI_ERST_SUBTRACT_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_subtract_value, + }, + [ACPI_ERST_STALL] = { + .flags = 0, + .run = erst_exec_stall, + }, + [ACPI_ERST_STALL_WHILE_TRUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_stall_while_true, + }, + [ACPI_ERST_SKIP_NEXT_IF_TRUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_skip_next_instruction_if_true, + }, + [ACPI_ERST_GOTO] = { + .flags = 0, + .run = erst_exec_goto, + }, + [ACPI_ERST_SET_SRC_ADDRESS_BASE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_set_src_address_base, + }, + [ACPI_ERST_SET_DST_ADDRESS_BASE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_set_dst_address_base, + }, + [ACPI_ERST_MOVE_DATA] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_move_data, + }, +}; + +static inline void erst_exec_ctx_init(struct apei_exec_context *ctx) +{ + apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type), + ERST_TAB_ENTRY(erst_tab), erst_tab->entries); +} + +static int erst_get_erange(struct erst_erange *range) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE); + if (rc) + return rc; + range->base = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH); + if (rc) + return rc; + range->size = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES); + if (rc) + return rc; + range->attr = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +static ssize_t __erst_get_record_count(void) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT); + if (rc) + return rc; + return apei_exec_ctx_get_output(&ctx); +} + +ssize_t erst_get_record_count(void) +{ + ssize_t count; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + raw_spin_lock_irqsave(&erst_lock, flags); + count = __erst_get_record_count(); + raw_spin_unlock_irqrestore(&erst_lock, flags); + + return count; +} +EXPORT_SYMBOL_GPL(erst_get_record_count); + +#define ERST_RECORD_ID_CACHE_SIZE_MIN 16 +#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024 + +struct erst_record_id_cache { + struct mutex lock; + u64 *entries; + int len; + int size; + int refcount; +}; + +static struct erst_record_id_cache erst_record_id_cache = { + .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock), + .refcount = 0, +}; + +static int __erst_get_next_record_id(u64 *record_id) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID); + if (rc) + return rc; + *record_id = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +int erst_get_record_id_begin(int *pos) +{ + int rc; + + if (erst_disable) + return -ENODEV; + + rc = mutex_lock_interruptible(&erst_record_id_cache.lock); + if (rc) + return rc; + erst_record_id_cache.refcount++; + mutex_unlock(&erst_record_id_cache.lock); + + *pos = 0; + + return 0; +} +EXPORT_SYMBOL_GPL(erst_get_record_id_begin); + +/* erst_record_id_cache.lock must be held by caller */ +static int __erst_record_id_cache_add_one(void) +{ + u64 id, prev_id, first_id; + int i, rc; + u64 *entries; + unsigned long flags; + + id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID; +retry: + raw_spin_lock_irqsave(&erst_lock, flags); + rc = __erst_get_next_record_id(&id); + raw_spin_unlock_irqrestore(&erst_lock, flags); + if (rc == -ENOENT) + return 0; + if (rc) + return rc; + if (id == APEI_ERST_INVALID_RECORD_ID) + return 0; + /* can not skip current ID, or loop back to first ID */ + if (id == prev_id || id == first_id) + return 0; + if (first_id == APEI_ERST_INVALID_RECORD_ID) + first_id = id; + prev_id = id; + + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == id) + break; + } + /* record id already in cache, try next */ + if (i < erst_record_id_cache.len) + goto retry; + if (erst_record_id_cache.len >= erst_record_id_cache.size) { + int new_size; + u64 *new_entries; + + new_size = erst_record_id_cache.size * 2; + new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN, + ERST_RECORD_ID_CACHE_SIZE_MAX); + if (new_size <= erst_record_id_cache.size) { + if (printk_ratelimit()) + pr_warn(FW_WARN "too many record IDs!\n"); + return 0; + } + new_entries = kvmalloc_array(new_size, sizeof(entries[0]), + GFP_KERNEL); + if (!new_entries) + return -ENOMEM; + memcpy(new_entries, entries, + erst_record_id_cache.len * sizeof(entries[0])); + kvfree(entries); + erst_record_id_cache.entries = entries = new_entries; + erst_record_id_cache.size = new_size; + } + entries[i] = id; + erst_record_id_cache.len++; + + return 1; +} + +/* + * Get the record ID of an existing error record on the persistent + * storage. If there is no error record on the persistent storage, the + * returned record_id is APEI_ERST_INVALID_RECORD_ID. + */ +int erst_get_record_id_next(int *pos, u64 *record_id) +{ + int rc = 0; + u64 *entries; + + if (erst_disable) + return -ENODEV; + + /* must be enclosed by erst_get_record_id_begin/end */ + BUG_ON(!erst_record_id_cache.refcount); + BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len); + + mutex_lock(&erst_record_id_cache.lock); + entries = erst_record_id_cache.entries; + for (; *pos < erst_record_id_cache.len; (*pos)++) + if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID) + break; + /* found next record id in cache */ + if (*pos < erst_record_id_cache.len) { + *record_id = entries[*pos]; + (*pos)++; + goto out_unlock; + } + + /* Try to add one more record ID to cache */ + rc = __erst_record_id_cache_add_one(); + if (rc < 0) + goto out_unlock; + /* successfully add one new ID */ + if (rc == 1) { + *record_id = erst_record_id_cache.entries[*pos]; + (*pos)++; + rc = 0; + } else { + *pos = -1; + *record_id = APEI_ERST_INVALID_RECORD_ID; + } +out_unlock: + mutex_unlock(&erst_record_id_cache.lock); + + return rc; +} +EXPORT_SYMBOL_GPL(erst_get_record_id_next); + +/* erst_record_id_cache.lock must be held by caller */ +static void __erst_record_id_cache_compact(void) +{ + int i, wpos = 0; + u64 *entries; + + if (erst_record_id_cache.refcount) + return; + + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == APEI_ERST_INVALID_RECORD_ID) + continue; + if (wpos != i) + entries[wpos] = entries[i]; + wpos++; + } + erst_record_id_cache.len = wpos; +} + +void erst_get_record_id_end(void) +{ + /* + * erst_disable != 0 should be detected by invoker via the + * return value of erst_get_record_id_begin/next, so this + * function should not be called for erst_disable != 0. + */ + BUG_ON(erst_disable); + + mutex_lock(&erst_record_id_cache.lock); + erst_record_id_cache.refcount--; + BUG_ON(erst_record_id_cache.refcount < 0); + __erst_record_id_cache_compact(); + mutex_unlock(&erst_record_id_cache.lock); +} +EXPORT_SYMBOL_GPL(erst_get_record_id_end); + +static int __erst_write_to_storage(u64 offset) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, offset); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +static int __erst_read_from_storage(u64 record_id, u64 offset) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, offset); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, record_id); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +static int __erst_clear_from_storage(u64 record_id) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, record_id); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +/* NVRAM ERST Error Log Address Range is not supported yet */ +static void pr_unimpl_nvram(void) +{ + if (printk_ratelimit()) + pr_warn("NVRAM ERST Log Address Range not implemented yet.\n"); +} + +static int __erst_write_to_nvram(const struct cper_record_header *record) +{ + /* do not print message, because printk is not safe for NMI */ + return -ENOSYS; +} + +static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset) +{ + pr_unimpl_nvram(); + return -ENOSYS; +} + +static int __erst_clear_from_nvram(u64 record_id) +{ + pr_unimpl_nvram(); + return -ENOSYS; +} + +int erst_write(const struct cper_record_header *record) +{ + int rc; + unsigned long flags; + struct cper_record_header *rcd_erange; + + if (erst_disable) + return -ENODEV; + + if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE)) + return -EINVAL; + + if (erst_erange.attr & ERST_RANGE_NVRAM) { + if (!raw_spin_trylock_irqsave(&erst_lock, flags)) + return -EBUSY; + rc = __erst_write_to_nvram(record); + raw_spin_unlock_irqrestore(&erst_lock, flags); + return rc; + } + + if (record->record_length > erst_erange.size) + return -EINVAL; + + if (!raw_spin_trylock_irqsave(&erst_lock, flags)) + return -EBUSY; + memcpy(erst_erange.vaddr, record, record->record_length); + rcd_erange = erst_erange.vaddr; + /* signature for serialization system */ + memcpy(&rcd_erange->persistence_information, "ER", 2); + + rc = __erst_write_to_storage(0); + raw_spin_unlock_irqrestore(&erst_lock, flags); + + return rc; +} +EXPORT_SYMBOL_GPL(erst_write); + +static int __erst_read_to_erange(u64 record_id, u64 *offset) +{ + int rc; + + if (erst_erange.attr & ERST_RANGE_NVRAM) + return __erst_read_to_erange_from_nvram( + record_id, offset); + + rc = __erst_read_from_storage(record_id, 0); + if (rc) + return rc; + *offset = 0; + + return 0; +} + +static ssize_t __erst_read(u64 record_id, struct cper_record_header *record, + size_t buflen) +{ + int rc; + u64 offset, len = 0; + struct cper_record_header *rcd_tmp; + + rc = __erst_read_to_erange(record_id, &offset); + if (rc) + return rc; + rcd_tmp = erst_erange.vaddr + offset; + len = rcd_tmp->record_length; + if (len <= buflen) + memcpy(record, rcd_tmp, len); + + return len; +} + +/* + * If return value > buflen, the buffer size is not big enough, + * else if return value < 0, something goes wrong, + * else everything is OK, and return value is record length + */ +ssize_t erst_read(u64 record_id, struct cper_record_header *record, + size_t buflen) +{ + ssize_t len; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + raw_spin_lock_irqsave(&erst_lock, flags); + len = __erst_read(record_id, record, buflen); + raw_spin_unlock_irqrestore(&erst_lock, flags); + return len; +} +EXPORT_SYMBOL_GPL(erst_read); + +static void erst_clear_cache(u64 record_id) +{ + int i; + u64 *entries; + + mutex_lock(&erst_record_id_cache.lock); + + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == record_id) + entries[i] = APEI_ERST_INVALID_RECORD_ID; + } + __erst_record_id_cache_compact(); + + mutex_unlock(&erst_record_id_cache.lock); +} + +ssize_t erst_read_record(u64 record_id, struct cper_record_header *record, + size_t buflen, size_t recordlen, const guid_t *creatorid) +{ + ssize_t len; + + /* + * if creatorid is NULL, read any record for erst-dbg module + */ + if (creatorid == NULL) { + len = erst_read(record_id, record, buflen); + if (len == -ENOENT) + erst_clear_cache(record_id); + + return len; + } + + len = erst_read(record_id, record, buflen); + /* + * if erst_read return value is -ENOENT skip to next record_id, + * and clear the record_id cache. + */ + if (len == -ENOENT) { + erst_clear_cache(record_id); + goto out; + } + + if (len < 0) + goto out; + + /* + * if erst_read return value is less than record head length, + * consider it as -EIO, and clear the record_id cache. + */ + if (len < recordlen) { + len = -EIO; + erst_clear_cache(record_id); + goto out; + } + + /* + * if creatorid is not wanted, consider it as not found, + * for skipping to next record_id. + */ + if (!guid_equal(&record->creator_id, creatorid)) + len = -ENOENT; + +out: + return len; +} +EXPORT_SYMBOL_GPL(erst_read_record); + +int erst_clear(u64 record_id) +{ + int rc, i; + unsigned long flags; + u64 *entries; + + if (erst_disable) + return -ENODEV; + + rc = mutex_lock_interruptible(&erst_record_id_cache.lock); + if (rc) + return rc; + raw_spin_lock_irqsave(&erst_lock, flags); + if (erst_erange.attr & ERST_RANGE_NVRAM) + rc = __erst_clear_from_nvram(record_id); + else + rc = __erst_clear_from_storage(record_id); + raw_spin_unlock_irqrestore(&erst_lock, flags); + if (rc) + goto out; + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == record_id) + entries[i] = APEI_ERST_INVALID_RECORD_ID; + } + __erst_record_id_cache_compact(); +out: + mutex_unlock(&erst_record_id_cache.lock); + return rc; +} +EXPORT_SYMBOL_GPL(erst_clear); + +static int __init setup_erst_disable(char *str) +{ + erst_disable = 1; + return 1; +} + +__setup("erst_disable", setup_erst_disable); + +static int erst_check_table(struct acpi_table_erst *erst_tab) +{ + if ((erst_tab->header_length != + (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header))) + && (erst_tab->header_length != sizeof(struct acpi_table_erst))) + return -EINVAL; + if (erst_tab->header.length < sizeof(struct acpi_table_erst)) + return -EINVAL; + if (erst_tab->entries != + (erst_tab->header.length - sizeof(struct acpi_table_erst)) / + sizeof(struct acpi_erst_entry)) + return -EINVAL; + + return 0; +} + +static int erst_open_pstore(struct pstore_info *psi); +static int erst_close_pstore(struct pstore_info *psi); +static ssize_t erst_reader(struct pstore_record *record); +static int erst_writer(struct pstore_record *record); +static int erst_clearer(struct pstore_record *record); + +static struct pstore_info erst_info = { + .owner = THIS_MODULE, + .name = "erst", + .flags = PSTORE_FLAGS_DMESG, + .open = erst_open_pstore, + .close = erst_close_pstore, + .read = erst_reader, + .write = erst_writer, + .erase = erst_clearer +}; + +#define CPER_CREATOR_PSTORE \ + GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) +#define CPER_SECTION_TYPE_DMESG \ + GUID_INIT(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \ + 0x94, 0x19, 0xeb, 0x12) +#define CPER_SECTION_TYPE_DMESG_Z \ + GUID_INIT(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \ + 0x34, 0xdd, 0xfa, 0xc6) +#define CPER_SECTION_TYPE_MCE \ + GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) + +struct cper_pstore_record { + struct cper_record_header hdr; + struct cper_section_descriptor sec_hdr; + char data[]; +} __packed; + +static int reader_pos; + +static int erst_open_pstore(struct pstore_info *psi) +{ + if (erst_disable) + return -ENODEV; + + return erst_get_record_id_begin(&reader_pos); +} + +static int erst_close_pstore(struct pstore_info *psi) +{ + erst_get_record_id_end(); + + return 0; +} + +static ssize_t erst_reader(struct pstore_record *record) +{ + int rc; + ssize_t len = 0; + u64 record_id; + struct cper_pstore_record *rcd; + size_t rcd_len = sizeof(*rcd) + erst_info.bufsize; + + if (erst_disable) + return -ENODEV; + + rcd = kmalloc(rcd_len, GFP_KERNEL); + if (!rcd) { + rc = -ENOMEM; + goto out; + } +skip: + rc = erst_get_record_id_next(&reader_pos, &record_id); + if (rc) + goto out; + + /* no more record */ + if (record_id == APEI_ERST_INVALID_RECORD_ID) { + rc = -EINVAL; + goto out; + } + + len = erst_read_record(record_id, &rcd->hdr, rcd_len, sizeof(*rcd), + &CPER_CREATOR_PSTORE); + /* The record may be cleared by others, try read next record */ + if (len == -ENOENT) + goto skip; + else if (len < 0) + goto out; + + record->buf = kmalloc(len, GFP_KERNEL); + if (record->buf == NULL) { + rc = -ENOMEM; + goto out; + } + memcpy(record->buf, rcd->data, len - sizeof(*rcd)); + record->id = record_id; + record->compressed = false; + record->ecc_notice_size = 0; + if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG_Z)) { + record->type = PSTORE_TYPE_DMESG; + record->compressed = true; + } else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG)) + record->type = PSTORE_TYPE_DMESG; + else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_MCE)) + record->type = PSTORE_TYPE_MCE; + else + record->type = PSTORE_TYPE_MAX; + + if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP) + record->time.tv_sec = rcd->hdr.timestamp; + else + record->time.tv_sec = 0; + record->time.tv_nsec = 0; + +out: + kfree(rcd); + return (rc < 0) ? rc : (len - sizeof(*rcd)); +} + +static int erst_writer(struct pstore_record *record) +{ + struct cper_pstore_record *rcd = (struct cper_pstore_record *) + (erst_info.buf - sizeof(*rcd)); + int ret; + + memset(rcd, 0, sizeof(*rcd)); + memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + rcd->hdr.revision = CPER_RECORD_REV; + rcd->hdr.signature_end = CPER_SIG_END; + rcd->hdr.section_count = 1; + rcd->hdr.error_severity = CPER_SEV_FATAL; + /* timestamp valid. platform_id, partition_id are invalid */ + rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP; + rcd->hdr.timestamp = ktime_get_real_seconds(); + rcd->hdr.record_length = sizeof(*rcd) + record->size; + rcd->hdr.creator_id = CPER_CREATOR_PSTORE; + rcd->hdr.notification_type = CPER_NOTIFY_MCE; + rcd->hdr.record_id = cper_next_record_id(); + rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; + + rcd->sec_hdr.section_offset = sizeof(*rcd); + rcd->sec_hdr.section_length = record->size; + rcd->sec_hdr.revision = CPER_SEC_REV; + /* fru_id and fru_text is invalid */ + rcd->sec_hdr.validation_bits = 0; + rcd->sec_hdr.flags = CPER_SEC_PRIMARY; + switch (record->type) { + case PSTORE_TYPE_DMESG: + if (record->compressed) + rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z; + else + rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG; + break; + case PSTORE_TYPE_MCE: + rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE; + break; + default: + return -EINVAL; + } + rcd->sec_hdr.section_severity = CPER_SEV_FATAL; + + ret = erst_write(&rcd->hdr); + record->id = rcd->hdr.record_id; + + return ret; +} + +static int erst_clearer(struct pstore_record *record) +{ + return erst_clear(record->id); +} + +static int __init erst_init(void) +{ + int rc = 0; + acpi_status status; + struct apei_exec_context ctx; + struct apei_resources erst_resources; + struct resource *r; + char *buf; + + if (acpi_disabled) + goto err; + + if (erst_disable) { + pr_info( + "Error Record Serialization Table (ERST) support is disabled.\n"); + goto err; + } + + status = acpi_get_table(ACPI_SIG_ERST, 0, + (struct acpi_table_header **)&erst_tab); + if (status == AE_NOT_FOUND) + goto err; + else if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + pr_err("Failed to get table, %s\n", msg); + rc = -EINVAL; + goto err; + } + + rc = erst_check_table(erst_tab); + if (rc) { + pr_err(FW_BUG "ERST table is invalid.\n"); + goto err_put_erst_tab; + } + + apei_resources_init(&erst_resources); + erst_exec_ctx_init(&ctx); + rc = apei_exec_collect_resources(&ctx, &erst_resources); + if (rc) + goto err_fini; + rc = apei_resources_request(&erst_resources, "APEI ERST"); + if (rc) + goto err_fini; + rc = apei_exec_pre_map_gars(&ctx); + if (rc) + goto err_release; + rc = erst_get_erange(&erst_erange); + if (rc) { + if (rc == -ENODEV) + pr_info( + "The corresponding hardware device or firmware implementation " + "is not available.\n"); + else + pr_err("Failed to get Error Log Address Range.\n"); + goto err_unmap_reg; + } + + r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); + if (!r) { + pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n", + (unsigned long long)erst_erange.base, + (unsigned long long)erst_erange.base + erst_erange.size - 1); + rc = -EIO; + goto err_unmap_reg; + } + rc = -ENOMEM; + erst_erange.vaddr = ioremap_cache(erst_erange.base, + erst_erange.size); + if (!erst_erange.vaddr) + goto err_release_erange; + + pr_info( + "Error Record Serialization Table (ERST) support is initialized.\n"); + + buf = kmalloc(erst_erange.size, GFP_KERNEL); + if (buf) { + erst_info.buf = buf + sizeof(struct cper_pstore_record); + erst_info.bufsize = erst_erange.size - + sizeof(struct cper_pstore_record); + rc = pstore_register(&erst_info); + if (rc) { + if (rc != -EPERM) + pr_info( + "Could not register with persistent store.\n"); + erst_info.buf = NULL; + erst_info.bufsize = 0; + kfree(buf); + } + } else + pr_err( + "Failed to allocate %lld bytes for persistent store error log.\n", + erst_erange.size); + + /* Cleanup ERST Resources */ + apei_resources_fini(&erst_resources); + + return 0; + +err_release_erange: + release_mem_region(erst_erange.base, erst_erange.size); +err_unmap_reg: + apei_exec_post_unmap_gars(&ctx); +err_release: + apei_resources_release(&erst_resources); +err_fini: + apei_resources_fini(&erst_resources); +err_put_erst_tab: + acpi_put_table((struct acpi_table_header *)erst_tab); +err: + erst_disable = 1; + return rc; +} + +device_initcall(erst_init); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c new file mode 100644 index 0000000000..63ad0541db --- /dev/null +++ b/drivers/acpi/apei/ghes.c @@ -0,0 +1,1584 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Generic Hardware Error Source support + * + * Generic Hardware Error Source provides a way to report platform + * hardware errors (such as that from chipset). It works in so called + * "Firmware First" mode, that is, hardware errors are reported to + * firmware firstly, then reported to Linux by firmware. This way, + * some non-standard hardware error registers or non-standard hardware + * link can be checked by firmware to produce more hardware error + * information for Linux. + * + * For more information about Generic Hardware Error Source, please + * refer to ACPI Specification version 4.0, section 17.3.2.6 + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/arm_sdei.h> +#include <linux/kernel.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/timer.h> +#include <linux/cper.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/ratelimit.h> +#include <linux/vmalloc.h> +#include <linux/irq_work.h> +#include <linux/llist.h> +#include <linux/genalloc.h> +#include <linux/pci.h> +#include <linux/pfn.h> +#include <linux/aer.h> +#include <linux/nmi.h> +#include <linux/sched/clock.h> +#include <linux/uuid.h> +#include <linux/ras.h> +#include <linux/task_work.h> + +#include <acpi/actbl1.h> +#include <acpi/ghes.h> +#include <acpi/apei.h> +#include <asm/fixmap.h> +#include <asm/tlbflush.h> +#include <ras/ras_event.h> + +#include "apei-internal.h" + +#define GHES_PFX "GHES: " + +#define GHES_ESTATUS_MAX_SIZE 65536 +#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 + +#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 + +/* This is just an estimation for memory pool allocation */ +#define GHES_ESTATUS_CACHE_AVG_SIZE 512 + +#define GHES_ESTATUS_CACHES_SIZE 4 + +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL +/* Prevent too many caches are allocated because of RCU */ +#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) + +#define GHES_ESTATUS_CACHE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_cache) + (estatus_len)) +#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_cache *)(estatus_cache) + 1)) + +#define GHES_ESTATUS_NODE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_node) + (estatus_len)) +#define GHES_ESTATUS_FROM_NODE(estatus_node) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_node *)(estatus_node) + 1)) + +#define GHES_VENDOR_ENTRY_LEN(gdata_len) \ + (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) +#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ + ((struct acpi_hest_generic_data *) \ + ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) + +/* + * NMI-like notifications vary by architecture, before the compiler can prune + * unused static functions it needs a value for these enums. + */ +#ifndef CONFIG_ARM_SDE_INTERFACE +#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses +#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses +#endif + +static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); + +static inline bool is_hest_type_generic_v2(struct ghes *ghes) +{ + return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; +} + +/* + * This driver isn't really modular, however for the time being, + * continuing to use module_param is the easiest way to remain + * compatible with existing boot arg use cases. + */ +bool ghes_disable; +module_param_named(disable, ghes_disable, bool, 0); + +/* + * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform + * check. + */ +static bool ghes_edac_force_enable; +module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); + +/* + * All error sources notified with HED (Hardware Error Device) share a + * single notifier callback, so they need to be linked and checked one + * by one. This holds true for NMI too. + * + * RCU is used for these lists, so ghes_list_mutex is only used for + * list changing, not for traversing. + */ +static LIST_HEAD(ghes_hed); +static DEFINE_MUTEX(ghes_list_mutex); + +/* + * A list of GHES devices which are given to the corresponding EDAC driver + * ghes_edac for further use. + */ +static LIST_HEAD(ghes_devs); +static DEFINE_MUTEX(ghes_devs_mutex); + +/* + * Because the memory area used to transfer hardware error information + * from BIOS to Linux can be determined only in NMI, IRQ or timer + * handler, but general ioremap can not be used in atomic context, so + * the fixmap is used instead. + * + * This spinlock is used to prevent the fixmap entry from being used + * simultaneously. + */ +static DEFINE_SPINLOCK(ghes_notify_lock_irq); + +struct ghes_vendor_record_entry { + struct work_struct work; + int error_severity; + char vendor_record[]; +}; + +static struct gen_pool *ghes_estatus_pool; + +static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static atomic_t ghes_estatus_cache_alloced; + +static int ghes_panic_timeout __read_mostly = 30; + +static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) +{ + phys_addr_t paddr; + pgprot_t prot; + + paddr = PFN_PHYS(pfn); + prot = arch_apei_get_mem_attribute(paddr); + __set_fixmap(fixmap_idx, paddr, prot); + + return (void __iomem *) __fix_to_virt(fixmap_idx); +} + +static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) +{ + int _idx = virt_to_fix((unsigned long)vaddr); + + WARN_ON_ONCE(fixmap_idx != _idx); + clear_fixmap(fixmap_idx); +} + +int ghes_estatus_pool_init(unsigned int num_ghes) +{ + unsigned long addr, len; + int rc; + + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); + if (!ghes_estatus_pool) + return -ENOMEM; + + len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; + len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); + + addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); + if (!addr) + goto err_pool_alloc; + + rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); + if (rc) + goto err_pool_add; + + return 0; + +err_pool_add: + vfree((void *)addr); + +err_pool_alloc: + gen_pool_destroy(ghes_estatus_pool); + + return -ENOMEM; +} + +/** + * ghes_estatus_pool_region_free - free previously allocated memory + * from the ghes_estatus_pool. + * @addr: address of memory to free. + * @size: size of memory to free. + * + * Returns none. + */ +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) +{ + gen_pool_free(ghes_estatus_pool, addr, size); +} +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); + +static int map_gen_v2(struct ghes *ghes) +{ + return apei_map_generic_address(&ghes->generic_v2->read_ack_register); +} + +static void unmap_gen_v2(struct ghes *ghes) +{ + apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); +} + +static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &gv2->read_ack_register); + if (rc) + return; + + val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; + val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + + apei_write(val, &gv2->read_ack_register); +} + +static struct ghes *ghes_new(struct acpi_hest_generic *generic) +{ + struct ghes *ghes; + unsigned int error_block_length; + int rc; + + ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); + if (!ghes) + return ERR_PTR(-ENOMEM); + + ghes->generic = generic; + if (is_hest_type_generic_v2(ghes)) { + rc = map_gen_v2(ghes); + if (rc) + goto err_free; + } + + rc = apei_map_generic_address(&generic->error_status_address); + if (rc) + goto err_unmap_read_ack_addr; + error_block_length = generic->error_block_length; + if (error_block_length > GHES_ESTATUS_MAX_SIZE) { + pr_warn(FW_WARN GHES_PFX + "Error status block length is too long: %u for " + "generic hardware error source: %d.\n", + error_block_length, generic->header.source_id); + error_block_length = GHES_ESTATUS_MAX_SIZE; + } + ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); + if (!ghes->estatus) { + rc = -ENOMEM; + goto err_unmap_status_addr; + } + + return ghes; + +err_unmap_status_addr: + apei_unmap_generic_address(&generic->error_status_address); +err_unmap_read_ack_addr: + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); +err_free: + kfree(ghes); + return ERR_PTR(rc); +} + +static void ghes_fini(struct ghes *ghes) +{ + kfree(ghes->estatus); + apei_unmap_generic_address(&ghes->generic->error_status_address); + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); +} + +static inline int ghes_severity(int severity) +{ + switch (severity) { + case CPER_SEV_INFORMATIONAL: + return GHES_SEV_NO; + case CPER_SEV_CORRECTED: + return GHES_SEV_CORRECTED; + case CPER_SEV_RECOVERABLE: + return GHES_SEV_RECOVERABLE; + case CPER_SEV_FATAL: + return GHES_SEV_PANIC; + default: + /* Unknown, go panic */ + return GHES_SEV_PANIC; + } +} + +static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, + int from_phys, + enum fixed_addresses fixmap_idx) +{ + void __iomem *vaddr; + u64 offset; + u32 trunk; + + while (len > 0) { + offset = paddr - (paddr & PAGE_MASK); + vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); + trunk = PAGE_SIZE - offset; + trunk = min(trunk, len); + if (from_phys) + memcpy_fromio(buffer, vaddr + offset, trunk); + else + memcpy_toio(vaddr + offset, buffer, trunk); + len -= trunk; + paddr += trunk; + buffer += trunk; + ghes_unmap(vaddr, fixmap_idx); + } +} + +/* Check the top-level record header has an appropriate size. */ +static int __ghes_check_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus) +{ + u32 len = cper_estatus_len(estatus); + + if (len < sizeof(*estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); + return -EIO; + } + + if (len > ghes->generic->error_block_length) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); + return -EIO; + } + + if (cper_estatus_check_header(estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); + return -EIO; + } + + return 0; +} + +/* Read the CPER block, returning its address, and header in estatus. */ +static int __ghes_peek_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + struct acpi_hest_generic *g = ghes->generic; + int rc; + + rc = apei_read(buf_paddr, &g->error_status_address); + if (rc) { + *buf_paddr = 0; + pr_warn_ratelimited(FW_WARN GHES_PFX +"Failed to read error status block address for hardware error source: %d.\n", + g->header.source_id); + return -EIO; + } + if (!*buf_paddr) + return -ENOENT; + + ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, + fixmap_idx); + if (!estatus->block_status) { + *buf_paddr = 0; + return -ENOENT; + } + + return 0; +} + +static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx, + size_t buf_len) +{ + ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); + if (cper_estatus_check(estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Failed to read error status block!\n"); + return -EIO; + } + + return 0; +} + +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + int rc; + + rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); + if (rc) + return rc; + + rc = __ghes_check_estatus(ghes, estatus); + if (rc) + return rc; + + return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, + cper_estatus_len(estatus)); +} + +static void ghes_clear_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) +{ + estatus->block_status = 0; + + if (!buf_paddr) + return; + + ghes_copy_tofrom_phys(estatus, buf_paddr, + sizeof(estatus->block_status), 0, + fixmap_idx); + + /* + * GHESv2 type HEST entries introduce support for error acknowledgment, + * so only acknowledge the error if this support is present. + */ + if (is_hest_type_generic_v2(ghes)) + ghes_ack_error(ghes->generic_v2); +} + +/* + * Called as task_work before returning to user-space. + * Ensure any queued work has been done before we return to the context that + * triggered the notification. + */ +static void ghes_kick_task_work(struct callback_head *head) +{ + struct acpi_hest_generic_status *estatus; + struct ghes_estatus_node *estatus_node; + u32 node_len; + + estatus_node = container_of(head, struct ghes_estatus_node, task_work); + if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) + memory_failure_queue_kick(estatus_node->task_work_cpu); + + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); +} + +static bool ghes_do_memory_failure(u64 physical_addr, int flags) +{ + unsigned long pfn; + + if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) + return false; + + pfn = PHYS_PFN(physical_addr); + if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Invalid address in generic error data: %#llx\n", + physical_addr); + return false; + } + + memory_failure_queue(pfn, flags); + return true; +} + +static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, + int sev) +{ + int flags = -1; + int sec_sev = ghes_severity(gdata->error_severity); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return false; + + /* iff following two events can be handled properly by now */ + if (sec_sev == GHES_SEV_CORRECTED && + (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) + flags = MF_SOFT_OFFLINE; + if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) + flags = 0; + + if (flags != -1) + return ghes_do_memory_failure(mem_err->physical_addr, flags); + + return false; +} + +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +{ + struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + bool queued = false; + int sec_sev, i; + char *p; + + log_arm_hw_error(err); + + sec_sev = ghes_severity(gdata->error_severity); + if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) + return false; + + p = (char *)(err + 1); + for (i = 0; i < err->err_info_num; i++) { + struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; + bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); + bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); + const char *error_type = "unknown error"; + + /* + * The field (err_info->error_info & BIT(26)) is fixed to set to + * 1 in some old firmware of HiSilicon Kunpeng920. We assume that + * firmware won't mix corrected errors in an uncorrected section, + * and don't filter out 'corrected' error here. + */ + if (is_cache && has_pa) { + queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + p += err_info->length; + continue; + } + + if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs)) + error_type = cper_proc_error_type_strs[err_info->type]; + + pr_warn_ratelimited(FW_WARN GHES_PFX + "Unhandled processor error type: %s\n", + error_type); + p += err_info->length; + } + + return queued; +} + +/* + * PCIe AER errors need to be sent to the AER driver for reporting and + * recovery. The GHES severities map to the following AER severities and + * require the following handling: + * + * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE + * These need to be reported by the AER driver but no recovery is + * necessary. + * GHES_SEV_RECOVERABLE -> AER_NONFATAL + * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL + * These both need to be reported and recovered from by the AER driver. + * GHES_SEV_PANIC does not make it to this handling since the kernel must + * panic. + */ +static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + + if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { + unsigned int devfn; + int aer_severity; + u8 *aer_info; + + devfn = PCI_DEVFN(pcie_err->device_id.device, + pcie_err->device_id.function); + aer_severity = cper_severity_to_aer(gdata->error_severity); + + /* + * If firmware reset the component to contain + * the error, we must reinitialize it before + * use, so treat it as a fatal AER error. + */ + if (gdata->flags & CPER_SEC_RESET) + aer_severity = AER_FATAL; + + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, + sizeof(struct aer_capability_regs)); + if (!aer_info) + return; + memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); + + aer_recover_queue(pcie_err->device_id.segment, + pcie_err->device_id.bus, + devfn, aer_severity, + (struct aer_capability_regs *) + aer_info); + } +#endif +} + +static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); + +int ghes_register_vendor_record_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); + +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); + +static void ghes_vendor_record_work_func(struct work_struct *work) +{ + struct ghes_vendor_record_entry *entry; + struct acpi_hest_generic_data *gdata; + u32 len; + + entry = container_of(work, struct ghes_vendor_record_entry, work); + gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + + blocking_notifier_call_chain(&vendor_record_notify_list, + entry->error_severity, gdata); + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); +} + +static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, + int sev) +{ + struct acpi_hest_generic_data *copied_gdata; + struct ghes_vendor_record_entry *entry; + u32 len; + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); + if (!entry) + return; + + copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); + entry->error_severity = sev; + + INIT_WORK(&entry->work, ghes_vendor_record_work_func); + schedule_work(&entry->work); +} + +static bool ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) +{ + int sev, sec_sev; + struct acpi_hest_generic_data *gdata; + guid_t *sec_type; + const guid_t *fru_id = &guid_null; + char *fru_text = ""; + bool queued = false; + + sev = ghes_severity(estatus->error_severity); + apei_estatus_for_each_section(estatus, gdata) { + sec_type = (guid_t *)gdata->section_type; + sec_sev = ghes_severity(gdata->error_severity); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (guid_t *)gdata->fru_id; + + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + + atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); + + arch_apei_report_mem_error(sev, mem_err); + queued = ghes_handle_memory_failure(gdata, sev); + } + else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { + ghes_handle_aer(gdata); + } + else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { + queued = ghes_handle_arm_hw_error(gdata, sev); + } else { + void *err = acpi_hest_get_payload(gdata); + + ghes_defer_non_standard_event(gdata, sev); + log_non_standard_event(sec_type, fru_id, fru_text, + sec_sev, err, + gdata->error_data_length); + } + } + + return queued; +} + +static void __ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) +{ + static atomic_t seqno; + unsigned int curr_seqno; + char pfx_seq[64]; + + if (pfx == NULL) { + if (ghes_severity(estatus->error_severity) <= + GHES_SEV_CORRECTED) + pfx = KERN_WARNING; + else + pfx = KERN_ERR; + } + curr_seqno = atomic_inc_return(&seqno); + snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); + printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", + pfx_seq, generic->header.source_id); + cper_estatus_print(pfx_seq, estatus); +} + +static int ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) +{ + /* Not more than 2 messages every 5 seconds */ + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); + struct ratelimit_state *ratelimit; + + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) + ratelimit = &ratelimit_corrected; + else + ratelimit = &ratelimit_uncorrected; + if (__ratelimit(ratelimit)) { + __ghes_print_estatus(pfx, generic, estatus); + return 1; + } + return 0; +} + +/* + * GHES error status reporting throttle, to report more kinds of + * errors, instead of just most frequently occurred errors. + */ +static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) +{ + u32 len; + int i, cached = 0; + unsigned long long now; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + len = cper_estatus_len(estatus); + rcu_read_lock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) + continue; + if (len != cache->estatus_len) + continue; + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + if (memcmp(estatus, cache_estatus, len)) + continue; + atomic_inc(&cache->count); + now = sched_clock(); + if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) + cached = 1; + break; + } + rcu_read_unlock(); + return cached; +} + +static struct ghes_estatus_cache *ghes_estatus_cache_alloc( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int alloced; + u32 len, cache_len; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); + if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + len = cper_estatus_len(estatus); + cache_len = GHES_ESTATUS_CACHE_LEN(len); + cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); + if (!cache) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + memcpy(cache_estatus, estatus, len); + cache->estatus_len = len; + atomic_set(&cache->count, 0); + cache->generic = generic; + cache->time_in = sched_clock(); + return cache; +} + +static void ghes_estatus_cache_rcu_free(struct rcu_head *head) +{ + struct ghes_estatus_cache *cache; + u32 len; + + cache = container_of(head, struct ghes_estatus_cache, rcu); + len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); + len = GHES_ESTATUS_CACHE_LEN(len); + gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); + atomic_dec(&ghes_estatus_cache_alloced); +} + +static void +ghes_estatus_cache_add(struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + unsigned long long now, duration, period, max_period = 0; + struct ghes_estatus_cache *cache, *new_cache; + struct ghes_estatus_cache __rcu *victim; + int i, slot = -1, count; + + new_cache = ghes_estatus_cache_alloc(generic, estatus); + if (!new_cache) + return; + + rcu_read_lock(); + now = sched_clock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) { + slot = i; + break; + } + duration = now - cache->time_in; + if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { + slot = i; + break; + } + count = atomic_read(&cache->count); + period = duration; + do_div(period, (count + 1)); + if (period > max_period) { + max_period = period; + slot = i; + } + } + rcu_read_unlock(); + + if (slot != -1) { + /* + * Use release semantics to ensure that ghes_estatus_cached() + * running on another CPU will see the updated cache fields if + * it can see the new value of the pointer. + */ + victim = xchg_release(&ghes_estatus_caches[slot], + RCU_INITIALIZER(new_cache)); + + /* + * At this point, victim may point to a cached item different + * from the one based on which we selected the slot. Instead of + * going to the loop again to pick another slot, let's just + * drop the other item anyway: this may cause a false cache + * miss later on, but that won't cause any problems. + */ + if (victim) + call_rcu(&unrcu_pointer(victim)->rcu, + ghes_estatus_cache_rcu_free); + } +} + +static void __ghes_panic(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) +{ + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); + + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); + + /* reboot to log the error! */ + if (!panic_timeout) + panic_timeout = ghes_panic_timeout; + panic("Fatal hardware error!"); +} + +static int ghes_proc(struct ghes *ghes) +{ + struct acpi_hest_generic_status *estatus = ghes->estatus; + u64 buf_paddr; + int rc; + + rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); + if (rc) + goto out; + + if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) + __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); + + if (!ghes_estatus_cached(estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, estatus)) + ghes_estatus_cache_add(ghes->generic, estatus); + } + ghes_do_proc(ghes, estatus); + +out: + ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); + + return rc; +} + +static void ghes_add_timer(struct ghes *ghes) +{ + struct acpi_hest_generic *g = ghes->generic; + unsigned long expire; + + if (!g->notify.poll_interval) { + pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", + g->header.source_id); + return; + } + expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); + ghes->timer.expires = round_jiffies_relative(expire); + add_timer(&ghes->timer); +} + +static void ghes_poll_func(struct timer_list *t) +{ + struct ghes *ghes = from_timer(ghes, t, timer); + unsigned long flags; + + spin_lock_irqsave(&ghes_notify_lock_irq, flags); + ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); + if (!(ghes->flags & GHES_EXITING)) + ghes_add_timer(ghes); +} + +static irqreturn_t ghes_irq_func(int irq, void *data) +{ + struct ghes *ghes = data; + unsigned long flags; + int rc; + + spin_lock_irqsave(&ghes_notify_lock_irq, flags); + rc = ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); + if (rc) + return IRQ_NONE; + + return IRQ_HANDLED; +} + +static int ghes_notify_hed(struct notifier_block *this, unsigned long event, + void *data) +{ + struct ghes *ghes; + unsigned long flags; + int ret = NOTIFY_DONE; + + spin_lock_irqsave(&ghes_notify_lock_irq, flags); + rcu_read_lock(); + list_for_each_entry_rcu(ghes, &ghes_hed, list) { + if (!ghes_proc(ghes)) + ret = NOTIFY_OK; + } + rcu_read_unlock(); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); + + return ret; +} + +static struct notifier_block ghes_notifier_hed = { + .notifier_call = ghes_notify_hed, +}; + +/* + * Handlers for CPER records may not be NMI safe. For example, + * memory_failure_queue() takes spinlocks and calls schedule_work_on(). + * In any NMI-like handler, memory from ghes_estatus_pool is used to save + * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes + * ghes_proc_in_irq() to run in IRQ context where each estatus in + * ghes_estatus_llist is processed. + * + * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache + * to suppress frequent messages. + */ +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + +static void ghes_proc_in_irq(struct irq_work *irq_work) +{ + struct llist_node *llnode, *next; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + bool task_work_pending; + u32 len, node_len; + int ret; + + llnode = llist_del_all(&ghes_estatus_llist); + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_reverse_order(llnode); + while (llnode) { + next = llnode->next; + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + len = cper_estatus_len(estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); + if (!ghes_estatus_cached(estatus)) { + generic = estatus_node->generic; + if (ghes_print_estatus(NULL, generic, estatus)) + ghes_estatus_cache_add(generic, estatus); + } + + if (task_work_pending && current->mm) { + estatus_node->task_work.func = ghes_kick_task_work; + estatus_node->task_work_cpu = smp_processor_id(); + ret = task_work_add(current, &estatus_node->task_work, + TWA_RESUME); + if (ret) + estatus_node->task_work.func = NULL; + } + + if (!estatus_node->task_work.func) + gen_pool_free(ghes_estatus_pool, + (unsigned long)estatus_node, node_len); + + llnode = next; + } +} + +static void ghes_print_queued_estatus(void) +{ + struct llist_node *llnode; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + + llnode = llist_del_all(&ghes_estatus_llist); + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_reverse_order(llnode); + while (llnode) { + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + generic = estatus_node->generic; + ghes_print_estatus(NULL, generic, estatus); + llnode = llnode->next; + } +} + +static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, + enum fixed_addresses fixmap_idx) +{ + struct acpi_hest_generic_status *estatus, tmp_header; + struct ghes_estatus_node *estatus_node; + u32 len, node_len; + u64 buf_paddr; + int sev, rc; + + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) + return -EOPNOTSUPP; + + rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } + + rc = __ghes_check_estatus(ghes, &tmp_header); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } + + len = cper_estatus_len(&tmp_header); + node_len = GHES_ESTATUS_NODE_LEN(len); + estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); + if (!estatus_node) + return -ENOMEM; + + estatus_node->ghes = ghes; + estatus_node->generic = ghes->generic; + estatus_node->task_work.func = NULL; + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + + if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); + rc = -ENOENT; + goto no_work; + } + + sev = ghes_severity(estatus->error_severity); + if (sev >= GHES_SEV_PANIC) { + ghes_print_queued_estatus(); + __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); + } + + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + + /* This error has been reported before, don't process it again. */ + if (ghes_estatus_cached(estatus)) + goto no_work; + + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + + return rc; + +no_work: + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + + return rc; +} + +static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, + enum fixed_addresses fixmap_idx) +{ + int ret = -ENOENT; + struct ghes *ghes; + + rcu_read_lock(); + list_for_each_entry_rcu(ghes, rcu_list, list) { + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) + ret = 0; + } + rcu_read_unlock(); + + if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) + irq_work_queue(&ghes_proc_irq_work); + + return ret; +} + +#ifdef CONFIG_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); + int rv; + + raw_spin_lock(&ghes_notify_lock_sea); + rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); + raw_spin_unlock(&ghes_notify_lock_sea); + + return rv; +} + +static void ghes_sea_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_sea_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_SEA */ +static inline void ghes_sea_add(struct ghes *ghes) { } +static inline void ghes_sea_remove(struct ghes *ghes) { } +#endif /* CONFIG_ACPI_APEI_SEA */ + +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +/* + * NMI may be triggered on any CPU, so ghes_in_nmi is used for + * having only one concurrent reader. + */ +static atomic_t ghes_in_nmi = ATOMIC_INIT(0); + +static LIST_HEAD(ghes_nmi); + +static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); + int ret = NMI_DONE; + + if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) + return ret; + + raw_spin_lock(&ghes_notify_lock_nmi); + if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) + ret = NMI_HANDLED; + raw_spin_unlock(&ghes_notify_lock_nmi); + + atomic_dec(&ghes_in_nmi); + return ret; +} + +static void ghes_nmi_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + if (list_empty(&ghes_nmi)) + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); + list_add_rcu(&ghes->list, &ghes_nmi); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_nmi_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + if (list_empty(&ghes_nmi)) + unregister_nmi_handler(NMI_LOCAL, "ghes"); + mutex_unlock(&ghes_list_mutex); + /* + * To synchronize with NMI handler, ghes can only be + * freed after NMI handler finishes. + */ + synchronize_rcu(); +} +#else /* CONFIG_HAVE_ACPI_APEI_NMI */ +static inline void ghes_nmi_add(struct ghes *ghes) { } +static inline void ghes_nmi_remove(struct ghes *ghes) { } +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ + +static void ghes_nmi_init_cxt(void) +{ + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); +} + +static int __ghes_sdei_callback(struct ghes *ghes, + enum fixed_addresses fixmap_idx) +{ + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { + irq_work_queue(&ghes_proc_irq_work); + + return 0; + } + + return -ENOENT; +} + +static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, + void *arg) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_normal); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); + raw_spin_unlock(&ghes_notify_lock_sdei_normal); + + return err; +} + +static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, + void *arg) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_critical); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); + raw_spin_unlock(&ghes_notify_lock_sdei_critical); + + return err; +} + +static int apei_sdei_register_ghes(struct ghes *ghes) +{ + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_register_ghes(ghes, ghes_sdei_normal_callback, + ghes_sdei_critical_callback); +} + +static int apei_sdei_unregister_ghes(struct ghes *ghes) +{ + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_unregister_ghes(ghes); +} + +static int ghes_probe(struct platform_device *ghes_dev) +{ + struct acpi_hest_generic *generic; + struct ghes *ghes = NULL; + unsigned long flags; + + int rc = -EINVAL; + + generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; + if (!generic->enabled) + return -ENODEV; + + switch (generic->notify.type) { + case ACPI_HEST_NOTIFY_POLLED: + case ACPI_HEST_NOTIFY_EXTERNAL: + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GSIV: + case ACPI_HEST_NOTIFY_GPIO: + break; + + case ACPI_HEST_NOTIFY_SEA: + if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", + generic->header.source_id); + rc = -ENOTSUPP; + goto err; + } + break; + case ACPI_HEST_NOTIFY_NMI: + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", + generic->header.source_id); + goto err; + } + break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", + generic->header.source_id); + goto err; + } + break; + case ACPI_HEST_NOTIFY_LOCAL: + pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", + generic->header.source_id); + goto err; + default: + pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", + generic->notify.type, generic->header.source_id); + goto err; + } + + rc = -EIO; + if (generic->error_block_length < + sizeof(struct acpi_hest_generic_status)) { + pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", + generic->error_block_length, generic->header.source_id); + goto err; + } + ghes = ghes_new(generic); + if (IS_ERR(ghes)) { + rc = PTR_ERR(ghes); + ghes = NULL; + goto err; + } + + switch (generic->notify.type) { + case ACPI_HEST_NOTIFY_POLLED: + timer_setup(&ghes->timer, ghes_poll_func, 0); + ghes_add_timer(ghes); + break; + case ACPI_HEST_NOTIFY_EXTERNAL: + /* External interrupt vector is GSI */ + rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); + if (rc) { + pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", + generic->header.source_id); + goto err; + } + rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, + "GHES IRQ", ghes); + if (rc) { + pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", + generic->header.source_id); + goto err; + } + break; + + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GSIV: + case ACPI_HEST_NOTIFY_GPIO: + mutex_lock(&ghes_list_mutex); + if (list_empty(&ghes_hed)) + register_acpi_hed_notifier(&ghes_notifier_hed); + list_add_rcu(&ghes->list, &ghes_hed); + mutex_unlock(&ghes_list_mutex); + break; + + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_add(ghes); + break; + case ACPI_HEST_NOTIFY_NMI: + ghes_nmi_add(ghes); + break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_register_ghes(ghes); + if (rc) + goto err; + break; + default: + BUG(); + } + + platform_set_drvdata(ghes_dev, ghes); + + ghes->dev = &ghes_dev->dev; + + mutex_lock(&ghes_devs_mutex); + list_add_tail(&ghes->elist, &ghes_devs); + mutex_unlock(&ghes_devs_mutex); + + /* Handle any pending errors right away */ + spin_lock_irqsave(&ghes_notify_lock_irq, flags); + ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); + + return 0; + +err: + if (ghes) { + ghes_fini(ghes); + kfree(ghes); + } + return rc; +} + +static int ghes_remove(struct platform_device *ghes_dev) +{ + int rc; + struct ghes *ghes; + struct acpi_hest_generic *generic; + + ghes = platform_get_drvdata(ghes_dev); + generic = ghes->generic; + + ghes->flags |= GHES_EXITING; + switch (generic->notify.type) { + case ACPI_HEST_NOTIFY_POLLED: + timer_shutdown_sync(&ghes->timer); + break; + case ACPI_HEST_NOTIFY_EXTERNAL: + free_irq(ghes->irq, ghes); + break; + + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GSIV: + case ACPI_HEST_NOTIFY_GPIO: + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + if (list_empty(&ghes_hed)) + unregister_acpi_hed_notifier(&ghes_notifier_hed); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); + break; + + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_remove(ghes); + break; + case ACPI_HEST_NOTIFY_NMI: + ghes_nmi_remove(ghes); + break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_unregister_ghes(ghes); + if (rc) + return rc; + break; + default: + BUG(); + break; + } + + ghes_fini(ghes); + + mutex_lock(&ghes_devs_mutex); + list_del(&ghes->elist); + mutex_unlock(&ghes_devs_mutex); + + kfree(ghes); + + return 0; +} + +static struct platform_driver ghes_platform_driver = { + .driver = { + .name = "GHES", + }, + .probe = ghes_probe, + .remove = ghes_remove, +}; + +void __init acpi_ghes_init(void) +{ + int rc; + + sdei_init(); + + if (acpi_disabled) + return; + + switch (hest_disable) { + case HEST_NOT_FOUND: + return; + case HEST_DISABLED: + pr_info(GHES_PFX "HEST is not enabled!\n"); + return; + default: + break; + } + + if (ghes_disable) { + pr_info(GHES_PFX "GHES is not enabled!\n"); + return; + } + + ghes_nmi_init_cxt(); + + rc = platform_driver_register(&ghes_platform_driver); + if (rc) + return; + + rc = apei_osc_setup(); + if (rc == 0 && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); + else if (rc == 0 && !osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); + else if (rc && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); + else + pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); +} + +/* + * Known x86 systems that prefer GHES error reporting: + */ +static struct acpi_platform_list plat_list[] = { + {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, + { } /* End */ +}; + +struct list_head *ghes_get_devices(void) +{ + int idx = -1; + + if (IS_ENABLED(CONFIG_X86)) { + idx = acpi_match_platform_list(plat_list); + if (idx < 0) { + if (!ghes_edac_force_enable) + return NULL; + + pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); + } + } else if (list_empty(&ghes_devs)) { + return NULL; + } + + return &ghes_devs; +} +EXPORT_SYMBOL_GPL(ghes_get_devices); + +void ghes_register_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_register(&ghes_report_chain, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_report_chain); + +void ghes_unregister_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&ghes_report_chain, nb); +} +EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c new file mode 100644 index 0000000000..6aef1ee5e1 --- /dev/null +++ b/drivers/acpi/apei/hest.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * APEI Hardware Error Source Table support + * + * HEST describes error sources in detail; communicates operational + * parameters (i.e. severity levels, masking bits, and threshold + * values) to Linux as necessary. It also allows the BIOS to report + * non-standard error sources to Linux (for example, chipset-specific + * error registers). + * + * For more information about HEST, please refer to ACPI Specification + * version 4.0, section 17.3.2. + * + * Copyright 2009 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/kdebug.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <acpi/apei.h> +#include <acpi/ghes.h> + +#include "apei-internal.h" + +#define HEST_PFX "HEST: " + +int hest_disable; +EXPORT_SYMBOL_GPL(hest_disable); + +/* HEST table parsing */ + +static struct acpi_table_hest *__read_mostly hest_tab; + +static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { + [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ + [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, + [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi), + [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root), + [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), + [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), + [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), + [ACPI_HEST_TYPE_IA32_DEFERRED_CHECK] = -1, +}; + +static inline bool is_generic_error(struct acpi_hest_header *hest_hdr) +{ + return hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; +} + +static int hest_esrc_len(struct acpi_hest_header *hest_hdr) +{ + u16 hest_type = hest_hdr->type; + int len; + + if (hest_type >= ACPI_HEST_TYPE_RESERVED) + return 0; + + len = hest_esrc_len_tab[hest_type]; + + if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) { + struct acpi_hest_ia_corrected *cmc; + cmc = (struct acpi_hest_ia_corrected *)hest_hdr; + len = sizeof(*cmc) + cmc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { + struct acpi_hest_ia_machine_check *mc; + mc = (struct acpi_hest_ia_machine_check *)hest_hdr; + len = sizeof(*mc) + mc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) { + struct acpi_hest_ia_deferred_check *mc; + mc = (struct acpi_hest_ia_deferred_check *)hest_hdr; + len = sizeof(*mc) + mc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } + BUG_ON(len == -1); + + return len; +}; + +typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); + +static int apei_hest_parse(apei_hest_func_t func, void *data) +{ + struct acpi_hest_header *hest_hdr; + int i, rc, len; + + if (hest_disable || !hest_tab) + return -EINVAL; + + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); + for (i = 0; i < hest_tab->error_source_count; i++) { + len = hest_esrc_len(hest_hdr); + if (!len) { + pr_warn(FW_WARN HEST_PFX + "Unknown or unused hardware error source " + "type: %d for hardware error source: %d.\n", + hest_hdr->type, hest_hdr->source_id); + return -EINVAL; + } + if ((void *)hest_hdr + len > + (void *)hest_tab + hest_tab->header.length) { + pr_warn(FW_BUG HEST_PFX + "Table contents overflow for hardware error source: %d.\n", + hest_hdr->source_id); + return -EINVAL; + } + + rc = func(hest_hdr, data); + if (rc) + return rc; + + hest_hdr = (void *)hest_hdr + len; + } + + return 0; +} + +/* + * Check if firmware advertises firmware first mode. We need FF bit to be set + * along with a set of MC banks which work in FF mode. + */ +static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data) +{ + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) + return 0; + + if (!acpi_disable_cmcff) + return !arch_apei_enable_cmcff(hest_hdr, data); + + return 0; +} + +struct ghes_arr { + struct platform_device **ghes_devs; + unsigned int count; +}; + +static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data) +{ + int *count = data; + + if (is_generic_error(hest_hdr)) + (*count)++; + return 0; +} + +static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) +{ + struct platform_device *ghes_dev; + struct ghes_arr *ghes_arr = data; + int rc, i; + + if (!is_generic_error(hest_hdr)) + return 0; + + if (!((struct acpi_hest_generic *)hest_hdr)->enabled) + return 0; + for (i = 0; i < ghes_arr->count; i++) { + struct acpi_hest_header *hdr; + ghes_dev = ghes_arr->ghes_devs[i]; + hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data; + if (hdr->source_id == hest_hdr->source_id) { + pr_warn(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n", + hdr->source_id); + return -EIO; + } + } + ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id); + if (!ghes_dev) + return -ENOMEM; + + rc = platform_device_add_data(ghes_dev, &hest_hdr, sizeof(void *)); + if (rc) + goto err; + + rc = platform_device_add(ghes_dev); + if (rc) + goto err; + ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev; + + return 0; +err: + platform_device_put(ghes_dev); + return rc; +} + +static int __init hest_ghes_dev_register(unsigned int ghes_count) +{ + int rc, i; + struct ghes_arr ghes_arr; + + ghes_arr.count = 0; + ghes_arr.ghes_devs = kmalloc_array(ghes_count, sizeof(void *), + GFP_KERNEL); + if (!ghes_arr.ghes_devs) + return -ENOMEM; + + rc = apei_hest_parse(hest_parse_ghes, &ghes_arr); + if (rc) + goto err; + + rc = ghes_estatus_pool_init(ghes_count); + if (rc) + goto err; + +out: + kfree(ghes_arr.ghes_devs); + return rc; +err: + for (i = 0; i < ghes_arr.count; i++) + platform_device_unregister(ghes_arr.ghes_devs[i]); + goto out; +} + +static int __init setup_hest_disable(char *str) +{ + hest_disable = HEST_DISABLED; + return 1; +} + +__setup("hest_disable", setup_hest_disable); + +void __init acpi_hest_init(void) +{ + acpi_status status; + int rc; + unsigned int ghes_count = 0; + + if (hest_disable) { + pr_info(HEST_PFX "Table parsing disabled.\n"); + return; + } + + status = acpi_get_table(ACPI_SIG_HEST, 0, + (struct acpi_table_header **)&hest_tab); + if (status == AE_NOT_FOUND) { + hest_disable = HEST_NOT_FOUND; + return; + } else if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + pr_err(HEST_PFX "Failed to get table, %s\n", msg); + hest_disable = HEST_DISABLED; + return; + } + + rc = apei_hest_parse(hest_parse_cmc, NULL); + if (rc) + goto err; + + if (!ghes_disable) { + rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); + if (rc) + goto err; + + if (ghes_count) + rc = hest_ghes_dev_register(ghes_count); + if (rc) + goto err; + } + + pr_info(HEST_PFX "Table parsing has been initialized.\n"); + return; +err: + hest_disable = HEST_DISABLED; + acpi_put_table((struct acpi_table_header *)hest_tab); +} |