summaryrefslogtreecommitdiffstats
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:05:51 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:05:51 +0000
commit5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
treea94efe259b9009378be6d90eb30d2b019d95c194 /drivers/acpi/apei
parentInitial commit. (diff)
downloadlinux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz
linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig70
-rw-r--r--drivers/acpi/apei/Makefile7
-rw-r--r--drivers/acpi/apei/apei-base.c803
-rw-r--r--drivers/acpi/apei/apei-internal.h139
-rw-r--r--drivers/acpi/apei/bert.c170
-rw-r--r--drivers/acpi/apei/einj.c795
-rw-r--r--drivers/acpi/apei/erst-dbg.c230
-rw-r--r--drivers/acpi/apei/erst.c1206
-rw-r--r--drivers/acpi/apei/ghes.c1499
-rw-r--r--drivers/acpi/apei/hest.c270
10 files changed, 5189 insertions, 0 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
new file mode 100644
index 000000000..6b18f8bc7
--- /dev/null
+++ b/drivers/acpi/apei/Kconfig
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+config HAVE_ACPI_APEI
+ bool
+
+config HAVE_ACPI_APEI_NMI
+ bool
+
+config ACPI_APEI
+ bool "ACPI Platform Error Interface (APEI)"
+ select MISC_FILESYSTEMS
+ select PSTORE
+ select UEFI_CPER
+ depends on HAVE_ACPI_APEI
+ help
+ APEI allows to report errors (for example from the chipset)
+ to the operating system. This improves NMI handling
+ especially. In addition it supports error serialization and
+ error injection.
+
+config ACPI_APEI_GHES
+ bool "APEI Generic Hardware Error Source"
+ depends on ACPI_APEI
+ select ACPI_HED
+ select IRQ_WORK
+ select GENERIC_ALLOCATOR
+ help
+ Generic Hardware Error Source provides a way to report
+ platform hardware errors (such as that from chipset). It
+ works in so called "Firmware First" mode, that is, hardware
+ errors are reported to firmware firstly, then reported to
+ Linux by firmware. This way, some non-standard hardware
+ error registers or non-standard hardware link can be checked
+ by firmware to produce more valuable hardware error
+ information for Linux.
+
+config ACPI_APEI_PCIEAER
+ bool "APEI PCIe AER logging/recovering support"
+ depends on ACPI_APEI && PCIEAER
+ help
+ PCIe AER errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the corresponding support.
+
+config ACPI_APEI_SEA
+ bool
+ depends on ARM64 && ACPI_APEI_GHES
+ default y
+
+config ACPI_APEI_MEMORY_FAILURE
+ bool "APEI memory error recovering support"
+ depends on ACPI_APEI && MEMORY_FAILURE
+ help
+ Memory errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the memory recovering support.
+
+config ACPI_APEI_EINJ
+ tristate "APEI Error INJection (EINJ)"
+ depends on ACPI_APEI && DEBUG_FS
+ help
+ EINJ provides a hardware error injection mechanism, it is
+ mainly used for debugging and testing the other parts of
+ APEI and some other RAS features.
+
+config ACPI_APEI_ERST_DEBUG
+ tristate "APEI Error Record Serialization Table (ERST) Debug Support"
+ depends on ACPI_APEI
+ help
+ ERST is a way provided by APEI to save and retrieve hardware
+ error information to and from a persistent store. Enable this
+ if you want to debugging and testing the ERST kernel support
+ and firmware implementation.
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
new file mode 100644
index 000000000..4dfac2128
--- /dev/null
+++ b/drivers/acpi/apei/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ACPI_APEI) += apei.o
+obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
+obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
+obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
+
+apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
new file mode 100644
index 000000000..3294cc8dc
--- /dev/null
+++ b/drivers/acpi/apei/apei-base.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * apei-base.c - ACPI Platform Error Interface (APEI) supporting
+ * infrastructure
+ *
+ * APEI allows to report errors (for example from the chipset) to the
+ * the operating system. This improves NMI handling especially. In
+ * addition it supports error serialization and error injection.
+ *
+ * For more information about APEI, please refer to ACPI Specification
+ * version 4.0, chapter 17.
+ *
+ * This file has Common functions used by more than one APEI table,
+ * including framework of interpreter for ERST and EINJ; resource
+ * management for APEI registers.
+ *
+ * Copyright (C) 2009, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#define APEI_PFX "APEI: "
+
+/*
+ * APEI ERST (Error Record Serialization Table) and EINJ (Error
+ * INJection) interpreter framework.
+ */
+
+#define APEI_EXEC_PRESERVE_REGISTER 0x1
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+ struct apei_exec_ins_type *ins_table,
+ u32 instructions,
+ struct acpi_whea_header *action_table,
+ u32 entries)
+{
+ ctx->ins_table = ins_table;
+ ctx->instructions = instructions;
+ ctx->action_table = action_table;
+ ctx->entries = entries;
+}
+EXPORT_SYMBOL_GPL(apei_exec_ctx_init);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val)
+{
+ int rc;
+
+ rc = apei_read(val, &entry->register_region);
+ if (rc)
+ return rc;
+ *val >>= entry->register_region.bit_offset;
+ *val &= entry->mask;
+
+ return 0;
+}
+
+int apei_exec_read_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val = 0;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ ctx->value = val;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register);
+
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+
+ rc = apei_exec_read_register(ctx, entry);
+ if (rc)
+ return rc;
+ ctx->value = (ctx->value == entry->value);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register_value);
+
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val)
+{
+ int rc;
+
+ val &= entry->mask;
+ val <<= entry->register_region.bit_offset;
+ if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) {
+ u64 valr = 0;
+ rc = apei_read(&valr, &entry->register_region);
+ if (rc)
+ return rc;
+ valr &= ~(entry->mask << entry->register_region.bit_offset);
+ val |= valr;
+ }
+ rc = apei_write(val, &entry->register_region);
+
+ return rc;
+}
+
+int apei_exec_write_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_write_register(entry, ctx->value);
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register);
+
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+
+ ctx->value = entry->value;
+ rc = apei_exec_write_register(ctx, entry);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register_value);
+
+int apei_exec_noop(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_noop);
+
+/*
+ * Interpret the specified action. Go through whole action table,
+ * execute all instructions belong to the action.
+ */
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+ bool optional)
+{
+ int rc = -ENOENT;
+ u32 i, ip;
+ struct acpi_whea_header *entry;
+ apei_exec_ins_func_t run;
+
+ ctx->ip = 0;
+
+ /*
+ * "ip" is the instruction pointer of current instruction,
+ * "ctx->ip" specifies the next instruction to executed,
+ * instruction "run" function may change the "ctx->ip" to
+ * implement "goto" semantics.
+ */
+rewind:
+ ip = 0;
+ for (i = 0; i < ctx->entries; i++) {
+ entry = &ctx->action_table[i];
+ if (entry->action != action)
+ continue;
+ if (ip == ctx->ip) {
+ if (entry->instruction >= ctx->instructions ||
+ !ctx->ins_table[entry->instruction].run) {
+ pr_warn(FW_WARN APEI_PFX
+ "Invalid action table, unknown instruction type: %d\n",
+ entry->instruction);
+ return -EINVAL;
+ }
+ run = ctx->ins_table[entry->instruction].run;
+ rc = run(ctx, entry);
+ if (rc < 0)
+ return rc;
+ else if (rc != APEI_EXEC_SET_IP)
+ ctx->ip++;
+ }
+ ip++;
+ if (ctx->ip < ip)
+ goto rewind;
+ }
+
+ return !optional && rc < 0 ? rc : 0;
+}
+EXPORT_SYMBOL_GPL(__apei_exec_run);
+
+typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data);
+
+static int apei_exec_for_each_entry(struct apei_exec_context *ctx,
+ apei_exec_entry_func_t func,
+ void *data,
+ int *end)
+{
+ u8 ins;
+ int i, rc;
+ struct acpi_whea_header *entry;
+ struct apei_exec_ins_type *ins_table = ctx->ins_table;
+
+ for (i = 0; i < ctx->entries; i++) {
+ entry = ctx->action_table + i;
+ ins = entry->instruction;
+ if (end)
+ *end = i;
+ if (ins >= ctx->instructions || !ins_table[ins].run) {
+ pr_warn(FW_WARN APEI_PFX
+ "Invalid action table, unknown instruction type: %d\n",
+ ins);
+ return -EINVAL;
+ }
+ rc = func(ctx, entry, data);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int pre_map_gar_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ u8 ins = entry->instruction;
+
+ if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+ return apei_map_generic_address(&entry->register_region);
+
+ return 0;
+}
+
+/*
+ * Pre-map all GARs in action table to make it possible to access them
+ * in NMI handler.
+ */
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx)
+{
+ int rc, end;
+
+ rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback,
+ NULL, &end);
+ if (rc) {
+ struct apei_exec_context ctx_unmap;
+ memcpy(&ctx_unmap, ctx, sizeof(*ctx));
+ ctx_unmap.entries = end;
+ apei_exec_post_unmap_gars(&ctx_unmap);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars);
+
+static int post_unmap_gar_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ u8 ins = entry->instruction;
+
+ if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+ apei_unmap_generic_address(&entry->register_region);
+
+ return 0;
+}
+
+/* Post-unmap all GAR in action table. */
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx)
+{
+ return apei_exec_for_each_entry(ctx, post_unmap_gar_callback,
+ NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars);
+
+/*
+ * Resource management for GARs in APEI
+ */
+struct apei_res {
+ struct list_head list;
+ unsigned long start;
+ unsigned long end;
+};
+
+/* Collect all resources requested, to avoid conflict */
+struct apei_resources apei_resources_all = {
+ .iomem = LIST_HEAD_INIT(apei_resources_all.iomem),
+ .ioport = LIST_HEAD_INIT(apei_resources_all.ioport),
+};
+
+static int apei_res_add(struct list_head *res_list,
+ unsigned long start, unsigned long size)
+{
+ struct apei_res *res, *resn, *res_ins = NULL;
+ unsigned long end = start + size;
+
+ if (end <= start)
+ return 0;
+repeat:
+ list_for_each_entry_safe(res, resn, res_list, list) {
+ if (res->start > end || res->end < start)
+ continue;
+ else if (end <= res->end && start >= res->start) {
+ kfree(res_ins);
+ return 0;
+ }
+ list_del(&res->list);
+ res->start = start = min(res->start, start);
+ res->end = end = max(res->end, end);
+ kfree(res_ins);
+ res_ins = res;
+ goto repeat;
+ }
+
+ if (res_ins)
+ list_add(&res_ins->list, res_list);
+ else {
+ res_ins = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res_ins)
+ return -ENOMEM;
+ res_ins->start = start;
+ res_ins->end = end;
+ list_add(&res_ins->list, res_list);
+ }
+
+ return 0;
+}
+
+static int apei_res_sub(struct list_head *res_list1,
+ struct list_head *res_list2)
+{
+ struct apei_res *res1, *resn1, *res2, *res;
+ res1 = list_entry(res_list1->next, struct apei_res, list);
+ resn1 = list_entry(res1->list.next, struct apei_res, list);
+ while (&res1->list != res_list1) {
+ list_for_each_entry(res2, res_list2, list) {
+ if (res1->start >= res2->end ||
+ res1->end <= res2->start)
+ continue;
+ else if (res1->end <= res2->end &&
+ res1->start >= res2->start) {
+ list_del(&res1->list);
+ kfree(res1);
+ break;
+ } else if (res1->end > res2->end &&
+ res1->start < res2->start) {
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+ res->start = res2->end;
+ res->end = res1->end;
+ res1->end = res2->start;
+ list_add(&res->list, &res1->list);
+ resn1 = res;
+ } else {
+ if (res1->start < res2->start)
+ res1->end = res2->start;
+ else
+ res1->start = res2->end;
+ }
+ }
+ res1 = resn1;
+ resn1 = list_entry(resn1->list.next, struct apei_res, list);
+ }
+
+ return 0;
+}
+
+static void apei_res_clean(struct list_head *res_list)
+{
+ struct apei_res *res, *resn;
+
+ list_for_each_entry_safe(res, resn, res_list, list) {
+ list_del(&res->list);
+ kfree(res);
+ }
+}
+
+void apei_resources_fini(struct apei_resources *resources)
+{
+ apei_res_clean(&resources->iomem);
+ apei_res_clean(&resources->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_fini);
+
+static int apei_resources_merge(struct apei_resources *resources1,
+ struct apei_resources *resources2)
+{
+ int rc;
+ struct apei_res *res;
+
+ list_for_each_entry(res, &resources2->iomem, list) {
+ rc = apei_res_add(&resources1->iomem, res->start,
+ res->end - res->start);
+ if (rc)
+ return rc;
+ }
+ list_for_each_entry(res, &resources2->ioport, list) {
+ rc = apei_res_add(&resources1->ioport, res->start,
+ res->end - res->start);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem)
+{
+ if (iomem)
+ return apei_res_add(&resources->iomem, start, size);
+ else
+ return apei_res_add(&resources->ioport, start, size);
+}
+EXPORT_SYMBOL_GPL(apei_resources_add);
+
+/*
+ * EINJ has two groups of GARs (EINJ table entry and trigger table
+ * entry), so common resources are subtracted from the trigger table
+ * resources before the second requesting.
+ */
+int apei_resources_sub(struct apei_resources *resources1,
+ struct apei_resources *resources2)
+{
+ int rc;
+
+ rc = apei_res_sub(&resources1->iomem, &resources2->iomem);
+ if (rc)
+ return rc;
+ return apei_res_sub(&resources1->ioport, &resources2->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_sub);
+
+static int apei_get_res_callback(__u64 start, __u64 size, void *data)
+{
+ struct apei_resources *resources = data;
+ return apei_res_add(&resources->iomem, start, size);
+}
+
+static int apei_get_nvs_resources(struct apei_resources *resources)
+{
+ return acpi_nvs_for_each_region(apei_get_res_callback, resources);
+}
+
+int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
+ void *data), void *data);
+static int apei_get_arch_resources(struct apei_resources *resources)
+
+{
+ return arch_apei_filter_addr(apei_get_res_callback, resources);
+}
+
+/*
+ * IO memory/port resource management mechanism is used to check
+ * whether memory/port area used by GARs conflicts with normal memory
+ * or IO memory/port of devices.
+ */
+int apei_resources_request(struct apei_resources *resources,
+ const char *desc)
+{
+ struct apei_res *res, *res_bak = NULL;
+ struct resource *r;
+ struct apei_resources nvs_resources, arch_res;
+ int rc;
+
+ rc = apei_resources_sub(resources, &apei_resources_all);
+ if (rc)
+ return rc;
+
+ /*
+ * Some firmware uses ACPI NVS region, that has been marked as
+ * busy, so exclude it from APEI resources to avoid false
+ * conflict.
+ */
+ apei_resources_init(&nvs_resources);
+ rc = apei_get_nvs_resources(&nvs_resources);
+ if (rc)
+ goto nvs_res_fini;
+ rc = apei_resources_sub(resources, &nvs_resources);
+ if (rc)
+ goto nvs_res_fini;
+
+ if (arch_apei_filter_addr) {
+ apei_resources_init(&arch_res);
+ rc = apei_get_arch_resources(&arch_res);
+ if (rc)
+ goto arch_res_fini;
+ rc = apei_resources_sub(resources, &arch_res);
+ if (rc)
+ goto arch_res_fini;
+ }
+
+ rc = -EINVAL;
+ list_for_each_entry(res, &resources->iomem, list) {
+ r = request_mem_region(res->start, res->end - res->start,
+ desc);
+ if (!r) {
+ pr_err(APEI_PFX
+ "Can not request [mem %#010llx-%#010llx] for %s registers\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end - 1, desc);
+ res_bak = res;
+ goto err_unmap_iomem;
+ }
+ }
+
+ list_for_each_entry(res, &resources->ioport, list) {
+ r = request_region(res->start, res->end - res->start, desc);
+ if (!r) {
+ pr_err(APEI_PFX
+ "Can not request [io %#06llx-%#06llx] for %s registers\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end - 1, desc);
+ res_bak = res;
+ goto err_unmap_ioport;
+ }
+ }
+
+ rc = apei_resources_merge(&apei_resources_all, resources);
+ if (rc) {
+ pr_err(APEI_PFX "Fail to merge resources!\n");
+ goto err_unmap_ioport;
+ }
+
+ goto arch_res_fini;
+
+err_unmap_ioport:
+ list_for_each_entry(res, &resources->ioport, list) {
+ if (res == res_bak)
+ break;
+ release_region(res->start, res->end - res->start);
+ }
+ res_bak = NULL;
+err_unmap_iomem:
+ list_for_each_entry(res, &resources->iomem, list) {
+ if (res == res_bak)
+ break;
+ release_mem_region(res->start, res->end - res->start);
+ }
+arch_res_fini:
+ if (arch_apei_filter_addr)
+ apei_resources_fini(&arch_res);
+nvs_res_fini:
+ apei_resources_fini(&nvs_resources);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_resources_request);
+
+void apei_resources_release(struct apei_resources *resources)
+{
+ int rc;
+ struct apei_res *res;
+
+ list_for_each_entry(res, &resources->iomem, list)
+ release_mem_region(res->start, res->end - res->start);
+ list_for_each_entry(res, &resources->ioport, list)
+ release_region(res->start, res->end - res->start);
+
+ rc = apei_resources_sub(&apei_resources_all, resources);
+ if (rc)
+ pr_err(APEI_PFX "Fail to sub resources!\n");
+}
+EXPORT_SYMBOL_GPL(apei_resources_release);
+
+static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
+ u32 *access_bit_width)
+{
+ u32 bit_width, bit_offset, access_size_code, space_id;
+
+ bit_width = reg->bit_width;
+ bit_offset = reg->bit_offset;
+ access_size_code = reg->access_width;
+ space_id = reg->space_id;
+ *paddr = get_unaligned(&reg->address);
+ if (!*paddr) {
+ pr_warn(FW_BUG APEI_PFX
+ "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ if (access_size_code < 1 || access_size_code > 4) {
+ pr_warn(FW_BUG APEI_PFX
+ "Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+ *access_bit_width = 1UL << (access_size_code + 2);
+
+ /* Fixup common BIOS bug */
+ if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 &&
+ *access_bit_width < 32)
+ *access_bit_width = 32;
+ else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 &&
+ *access_bit_width < 64)
+ *access_bit_width = 64;
+
+ if ((bit_width + bit_offset) > *access_bit_width) {
+ pr_warn(FW_BUG APEI_PFX
+ "Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+ space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
+ pr_warn(FW_BUG APEI_PFX
+ "Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int apei_map_generic_address(struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ /* IO space doesn't need mapping */
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ if (!acpi_os_map_generic_address(reg))
+ return -ENXIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_map_generic_address);
+
+/* read GAR in interrupt (including NMI) or process context */
+int apei_read(u64 *val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ *val = 0;
+ switch(reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_read_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_read_port(address, (u32 *)val,
+ access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_read);
+
+/* write GAR in interrupt (including NMI) or process context */
+int apei_write(u64 val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_write_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_write_port(address, val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_write);
+
+static int collect_res_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ struct apei_resources *resources = data;
+ struct acpi_generic_address *reg = &entry->register_region;
+ u8 ins = entry->instruction;
+ u32 access_bit_width;
+ u64 paddr;
+ int rc;
+
+ if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER))
+ return 0;
+
+ rc = apei_check_gar(reg, &paddr, &access_bit_width);
+ if (rc)
+ return rc;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ return apei_res_add(&resources->iomem, paddr,
+ access_bit_width / 8);
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ return apei_res_add(&resources->ioport, paddr,
+ access_bit_width / 8);
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Same register may be used by multiple instructions in GARs, so
+ * resources are collected before requesting.
+ */
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+ struct apei_resources *resources)
+{
+ return apei_exec_for_each_entry(ctx, collect_res_callback,
+ resources, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_collect_resources);
+
+struct dentry *apei_get_debugfs_dir(void)
+{
+ static struct dentry *dapei;
+
+ if (!dapei)
+ dapei = debugfs_create_dir("apei", NULL);
+
+ return dapei;
+}
+EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr,
+ void *data)
+{
+ return 1;
+}
+EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff);
+
+void __weak arch_apei_report_mem_error(int sev,
+ struct cper_sec_mem_err *mem_err)
+{
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
+
+int apei_osc_setup(void)
+{
+ static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+ acpi_handle handle;
+ u32 capbuf[3];
+ struct acpi_osc_context context = {
+ .uuid_str = whea_uuid_str,
+ .rev = 1,
+ .cap.length = sizeof(capbuf),
+ .cap.pointer = capbuf,
+ };
+
+ capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+ capbuf[OSC_SUPPORT_DWORD] = 1;
+ capbuf[OSC_CONTROL_DWORD] = 0;
+
+ if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+ || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return -EIO;
+ else {
+ kfree(context.ret.pointer);
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
new file mode 100644
index 000000000..1d6ef9654
--- /dev/null
+++ b/drivers/acpi/apei/apei-internal.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * apei-internal.h - ACPI Platform Error Interface internal
+ * definitions.
+ */
+
+#ifndef APEI_INTERNAL_H
+#define APEI_INTERNAL_H
+
+#include <linux/cper.h>
+#include <linux/acpi.h>
+
+struct apei_exec_context;
+
+typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+
+#define APEI_EXEC_INS_ACCESS_REGISTER 0x0001
+
+struct apei_exec_ins_type {
+ u32 flags;
+ apei_exec_ins_func_t run;
+};
+
+struct apei_exec_context {
+ u32 ip;
+ u64 value;
+ u64 var1;
+ u64 var2;
+ u64 src_base;
+ u64 dst_base;
+ struct apei_exec_ins_type *ins_table;
+ u32 instructions;
+ struct acpi_whea_header *action_table;
+ u32 entries;
+};
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+ struct apei_exec_ins_type *ins_table,
+ u32 instructions,
+ struct acpi_whea_header *action_table,
+ u32 entries);
+
+static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx,
+ u64 input)
+{
+ ctx->value = input;
+}
+
+static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
+{
+ return ctx->value;
+}
+
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 1);
+}
+
+/* Common instruction implementation */
+
+/* IP has been set in instruction function */
+#define APEI_EXEC_SET_IP 1
+
+int apei_map_generic_address(struct acpi_generic_address *reg);
+
+static inline void apei_unmap_generic_address(struct acpi_generic_address *reg)
+{
+ acpi_os_unmap_generic_address(reg);
+}
+
+int apei_read(u64 *val, struct acpi_generic_address *reg);
+int apei_write(u64 val, struct acpi_generic_address *reg);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val);
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val);
+int apei_exec_read_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_write_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_noop(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx);
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx);
+
+struct apei_resources {
+ struct list_head iomem;
+ struct list_head ioport;
+};
+
+static inline void apei_resources_init(struct apei_resources *resources)
+{
+ INIT_LIST_HEAD(&resources->iomem);
+ INIT_LIST_HEAD(&resources->ioport);
+}
+
+void apei_resources_fini(struct apei_resources *resources);
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem);
+int apei_resources_sub(struct apei_resources *resources1,
+ struct apei_resources *resources2);
+int apei_resources_request(struct apei_resources *resources,
+ const char *desc);
+void apei_resources_release(struct apei_resources *resources);
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+ struct apei_resources *resources);
+
+struct dentry;
+struct dentry *apei_get_debugfs_dir(void);
+
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+ if (estatus->raw_data_length)
+ return estatus->raw_data_offset + \
+ estatus->raw_data_length;
+ else
+ return sizeof(*estatus) + estatus->data_length;
+}
+
+void cper_estatus_print(const char *pfx,
+ const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+
+int apei_osc_setup(void);
+#endif
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
new file mode 100644
index 000000000..45973aa6e
--- /dev/null
+++ b/drivers/acpi/apei/bert.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Boot Error Record Table (BERT) support
+ *
+ * Copyright 2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * Under normal circumstances, when a hardware error occurs, the error
+ * handler receives control and processes the error. This gives OSPM a
+ * chance to process the error condition, report it, and optionally attempt
+ * recovery. In some cases, the system is unable to process an error.
+ * For example, system firmware or a management controller may choose to
+ * reset the system or the system might experience an uncontrolled crash
+ * or reset.The boot error source is used to report unhandled errors that
+ * occurred in a previous boot. This mechanism is described in the BERT
+ * table.
+ *
+ * For more information about BERT, please refer to ACPI Specification
+ * version 4.0, section 17.3.1
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "BERT: " fmt
+
+#define ACPI_BERT_PRINT_MAX_RECORDS 5
+#define ACPI_BERT_PRINT_MAX_LEN 1024
+
+static int bert_disable;
+
+/*
+ * Print "all" the error records in the BERT table, but avoid huge spam to
+ * the console if the BIOS included oversize records, or too many records.
+ * Skipping some records here does not lose anything because the full
+ * data is available to user tools in:
+ * /sys/firmware/acpi/tables/data/BERT
+ */
+static void __init bert_print_all(struct acpi_bert_region *region,
+ unsigned int region_len)
+{
+ struct acpi_hest_generic_status *estatus =
+ (struct acpi_hest_generic_status *)region;
+ int remain = region_len;
+ int printed = 0, skipped = 0;
+ u32 estatus_len;
+
+ while (remain >= sizeof(struct acpi_bert_region)) {
+ estatus_len = cper_estatus_len(estatus);
+ if (remain < estatus_len) {
+ pr_err(FW_BUG "Truncated status block (length: %u).\n",
+ estatus_len);
+ break;
+ }
+
+ /* No more error records. */
+ if (!estatus->block_status)
+ break;
+
+ if (cper_estatus_check(estatus)) {
+ pr_err(FW_BUG "Invalid error record.\n");
+ break;
+ }
+
+ if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
+ printed < ACPI_BERT_PRINT_MAX_RECORDS) {
+ pr_info_once("Error records from previous boot:\n");
+ cper_estatus_print(KERN_INFO HW_ERR, estatus);
+ printed++;
+ } else {
+ skipped++;
+ }
+
+ /*
+ * Because the boot error source is "one-time polled" type,
+ * clear Block Status of current Generic Error Status Block,
+ * once it's printed.
+ */
+ estatus->block_status = 0;
+
+ estatus = (void *)estatus + estatus_len;
+ remain -= estatus_len;
+ }
+
+ if (skipped)
+ pr_info(HW_ERR "Skipped %d error records\n", skipped);
+}
+
+static int __init setup_bert_disable(char *str)
+{
+ bert_disable = 1;
+
+ return 1;
+}
+__setup("bert_disable", setup_bert_disable);
+
+static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+{
+ if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
+ bert_tab->region_length < sizeof(struct acpi_bert_region))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init bert_init(void)
+{
+ struct apei_resources bert_resources;
+ struct acpi_bert_region *boot_error_region;
+ struct acpi_table_bert *bert_tab;
+ unsigned int region_len;
+ acpi_status status;
+ int rc = 0;
+
+ if (acpi_disabled)
+ return 0;
+
+ if (bert_disable) {
+ pr_info("Boot Error Record Table support is disabled.\n");
+ return 0;
+ }
+
+ status = acpi_get_table(ACPI_SIG_BERT, 0, (struct acpi_table_header **)&bert_tab);
+ if (status == AE_NOT_FOUND)
+ return 0;
+
+ if (ACPI_FAILURE(status)) {
+ pr_err("get table failed, %s.\n", acpi_format_exception(status));
+ return -EINVAL;
+ }
+
+ rc = bert_check_table(bert_tab);
+ if (rc) {
+ pr_err(FW_BUG "table invalid.\n");
+ goto out_put_bert_tab;
+ }
+
+ region_len = bert_tab->region_length;
+ apei_resources_init(&bert_resources);
+ rc = apei_resources_add(&bert_resources, bert_tab->address,
+ region_len, true);
+ if (rc)
+ goto out_put_bert_tab;
+ rc = apei_resources_request(&bert_resources, "APEI BERT");
+ if (rc)
+ goto out_fini;
+ boot_error_region = ioremap_cache(bert_tab->address, region_len);
+ if (boot_error_region) {
+ bert_print_all(boot_error_region, region_len);
+ iounmap(boot_error_region);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ apei_resources_release(&bert_resources);
+out_fini:
+ apei_resources_fini(&bert_resources);
+out_put_bert_tab:
+ acpi_put_table((struct acpi_table_header *)bert_tab);
+
+ return rc;
+}
+
+late_initcall(bert_init);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
new file mode 100644
index 000000000..c281d5b33
--- /dev/null
+++ b/drivers/acpi/apei/einj.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error INJection support
+ *
+ * EINJ provides a hardware error injection mechanism, this is useful
+ * for debugging and testing of other APEI and RAS features.
+ *
+ * For more information about EINJ, please refer to ACPI Specification
+ * version 4.0, section 17.5.
+ *
+ * Copyright 2009-2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "EINJ: " fmt
+
+#define SPIN_UNIT 100 /* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
+#define ACPI5_VENDOR_BIT BIT(31)
+#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
+ ACPI_EINJ_MEMORY_UNCORRECTABLE | \
+ ACPI_EINJ_MEMORY_FATAL)
+
+/*
+ * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
+ */
+static int acpi5;
+
+struct set_error_type_with_address {
+ u32 type;
+ u32 vendor_extension;
+ u32 flags;
+ u32 apicid;
+ u64 memory_address;
+ u64 memory_address_range;
+ u32 pcie_sbdf;
+};
+enum {
+ SETWA_FLAGS_APICID = 1,
+ SETWA_FLAGS_MEM = 2,
+ SETWA_FLAGS_PCIE_SBDF = 4,
+};
+
+/*
+ * Vendor extensions for platform specific operations
+ */
+struct vendor_error_type_extension {
+ u32 length;
+ u32 pcie_sbdf;
+ u16 vendor_id;
+ u16 device_id;
+ u8 rev_id;
+ u8 reserved[3];
+};
+
+static u32 notrigger;
+
+static u32 vendor_flags;
+static struct debugfs_blob_wrapper vendor_blob;
+static char vendor_dev[64];
+
+/*
+ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
+ * EINJ table through an unpublished extension. Use with caution as
+ * most will ignore the parameter and make their own choice of address
+ * for error injection. This extension is used only if
+ * param_extension module parameter is specified.
+ */
+struct einj_parameter {
+ u64 type;
+ u64 reserved1;
+ u64 reserved2;
+ u64 param1;
+ u64 param2;
+};
+
+#define EINJ_OP_BUSY 0x1
+#define EINJ_STATUS_SUCCESS 0x0
+#define EINJ_STATUS_FAIL 0x1
+#define EINJ_STATUS_INVAL 0x2
+
+#define EINJ_TAB_ENTRY(tab) \
+ ((struct acpi_whea_header *)((char *)(tab) + \
+ sizeof(struct acpi_table_einj)))
+
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
+static struct acpi_table_einj *einj_tab;
+
+static struct apei_resources einj_resources;
+
+static struct apei_exec_ins_type einj_ins_type[] = {
+ [ACPI_EINJ_READ_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register,
+ },
+ [ACPI_EINJ_READ_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register_value,
+ },
+ [ACPI_EINJ_WRITE_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register,
+ },
+ [ACPI_EINJ_WRITE_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register_value,
+ },
+ [ACPI_EINJ_NOOP] = {
+ .flags = 0,
+ .run = apei_exec_noop,
+ },
+};
+
+/*
+ * Prevent EINJ interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ */
+static DEFINE_MUTEX(einj_mutex);
+
+static void *einj_param;
+
+static void einj_exec_ctx_init(struct apei_exec_context *ctx)
+{
+ apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
+ EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
+}
+
+static int __einj_get_available_error_type(u32 *type)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ einj_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ *type = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+/* Get error injection capabilities of the platform */
+static int einj_get_available_error_type(u32 *type)
+{
+ int rc;
+
+ mutex_lock(&einj_mutex);
+ rc = __einj_get_available_error_type(type);
+ mutex_unlock(&einj_mutex);
+
+ return rc;
+}
+
+static int einj_timedout(u64 *t)
+{
+ if ((s64)*t < SPIN_UNIT) {
+ pr_warn(FW_WARN "Firmware does not respond in time\n");
+ return 1;
+ }
+ *t -= SPIN_UNIT;
+ ndelay(SPIN_UNIT);
+ touch_nmi_watchdog();
+ return 0;
+}
+
+static void check_vendor_extension(u64 paddr,
+ struct set_error_type_with_address *v5param)
+{
+ int offset = v5param->vendor_extension;
+ struct vendor_error_type_extension *v;
+ u32 sbdf;
+
+ if (!offset)
+ return;
+ v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
+ if (!v)
+ return;
+ sbdf = v->pcie_sbdf;
+ sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
+ sbdf >> 24, (sbdf >> 16) & 0xff,
+ (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
+ v->vendor_id, v->device_id, v->rev_id);
+ acpi_os_unmap_iomem(v, sizeof(*v));
+}
+
+static void *einj_get_parameter_address(void)
+{
+ int i;
+ u64 pa_v4 = 0, pa_v5 = 0;
+ struct acpi_whea_header *entry;
+
+ entry = EINJ_TAB_ENTRY(einj_tab);
+ for (i = 0; i < einj_tab->entries; i++) {
+ if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pa_v4 = get_unaligned(&entry->register_region.address);
+ if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pa_v5 = get_unaligned(&entry->register_region.address);
+ entry++;
+ }
+ if (pa_v5) {
+ struct set_error_type_with_address *v5param;
+
+ v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
+ if (v5param) {
+ acpi5 = 1;
+ check_vendor_extension(pa_v5, v5param);
+ return v5param;
+ }
+ }
+ if (param_extension && pa_v4) {
+ struct einj_parameter *v4param;
+
+ v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
+ if (!v4param)
+ return NULL;
+ if (v4param->reserved1 || v4param->reserved2) {
+ acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+ return NULL;
+ }
+ return v4param;
+ }
+
+ return NULL;
+}
+
+/* do sanity check to trigger table */
+static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
+{
+ if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
+ return -EINVAL;
+ if (trigger_tab->table_size > PAGE_SIZE ||
+ trigger_tab->table_size < trigger_tab->header_size)
+ return -EINVAL;
+ if (trigger_tab->entry_count !=
+ (trigger_tab->table_size - trigger_tab->header_size) /
+ sizeof(struct acpi_einj_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct acpi_generic_address *einj_get_trigger_parameter_region(
+ struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
+{
+ int i;
+ struct acpi_whea_header *entry;
+
+ entry = (struct acpi_whea_header *)
+ ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ for (i = 0; i < trigger_tab->entry_count; i++) {
+ if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
+ entry->instruction <= ACPI_EINJ_WRITE_REGISTER_VALUE &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+ (entry->register_region.address & param2) == (param1 & param2))
+ return &entry->register_region;
+ entry++;
+ }
+
+ return NULL;
+}
+/* Execute instructions in trigger error action table */
+static int __einj_error_trigger(u64 trigger_paddr, u32 type,
+ u64 param1, u64 param2)
+{
+ struct acpi_einj_trigger *trigger_tab = NULL;
+ struct apei_exec_context trigger_ctx;
+ struct apei_resources trigger_resources;
+ struct acpi_whea_header *trigger_entry;
+ struct resource *r;
+ u32 table_size;
+ int rc = -EIO;
+ struct acpi_generic_address *trigger_param_region = NULL;
+
+ r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+ "APEI EINJ Trigger Table");
+ if (!r) {
+ pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
+ (unsigned long long)trigger_paddr,
+ (unsigned long long)trigger_paddr +
+ sizeof(*trigger_tab) - 1);
+ goto out;
+ }
+ trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
+ if (!trigger_tab) {
+ pr_err("Failed to map trigger table!\n");
+ goto out_rel_header;
+ }
+ rc = einj_check_trigger_header(trigger_tab);
+ if (rc) {
+ pr_warn(FW_BUG "Invalid trigger error action table.\n");
+ goto out_rel_header;
+ }
+
+ /* No action structures in the TRIGGER_ERROR table, nothing to do */
+ if (!trigger_tab->entry_count)
+ goto out_rel_header;
+
+ rc = -EIO;
+ table_size = trigger_tab->table_size;
+ r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
+ table_size - sizeof(*trigger_tab),
+ "APEI EINJ Trigger Table");
+ if (!r) {
+ pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+ (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+ (unsigned long long)trigger_paddr + table_size - 1);
+ goto out_rel_header;
+ }
+ iounmap(trigger_tab);
+ trigger_tab = ioremap_cache(trigger_paddr, table_size);
+ if (!trigger_tab) {
+ pr_err("Failed to map trigger table!\n");
+ goto out_rel_entry;
+ }
+ trigger_entry = (struct acpi_whea_header *)
+ ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ apei_resources_init(&trigger_resources);
+ apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
+ ARRAY_SIZE(einj_ins_type),
+ trigger_entry, trigger_tab->entry_count);
+ rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
+ if (rc)
+ goto out_fini;
+ rc = apei_resources_sub(&trigger_resources, &einj_resources);
+ if (rc)
+ goto out_fini;
+ /*
+ * Some firmware will access target address specified in
+ * param1 to trigger the error when injecting memory error.
+ * This will cause resource conflict with regular memory. So
+ * remove it from trigger table resources.
+ */
+ if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
+ struct apei_resources addr_resources;
+ apei_resources_init(&addr_resources);
+ trigger_param_region = einj_get_trigger_parameter_region(
+ trigger_tab, param1, param2);
+ if (trigger_param_region) {
+ rc = apei_resources_add(&addr_resources,
+ trigger_param_region->address,
+ trigger_param_region->bit_width/8, true);
+ if (rc)
+ goto out_fini;
+ rc = apei_resources_sub(&trigger_resources,
+ &addr_resources);
+ }
+ apei_resources_fini(&addr_resources);
+ if (rc)
+ goto out_fini;
+ }
+ rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
+ if (rc)
+ goto out_fini;
+ rc = apei_exec_pre_map_gars(&trigger_ctx);
+ if (rc)
+ goto out_release;
+
+ rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
+
+ apei_exec_post_unmap_gars(&trigger_ctx);
+out_release:
+ apei_resources_release(&trigger_resources);
+out_fini:
+ apei_resources_fini(&trigger_resources);
+out_rel_entry:
+ release_mem_region(trigger_paddr + sizeof(*trigger_tab),
+ table_size - sizeof(*trigger_tab));
+out_rel_header:
+ release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+out:
+ if (trigger_tab)
+ iounmap(trigger_tab);
+
+ return rc;
+}
+
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
+{
+ struct apei_exec_context ctx;
+ u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
+ int rc;
+
+ einj_exec_ctx_init(&ctx);
+
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, type);
+ if (acpi5) {
+ struct set_error_type_with_address *v5param = einj_param;
+
+ v5param->type = type;
+ if (type & ACPI5_VENDOR_BIT) {
+ switch (vendor_flags) {
+ case SETWA_FLAGS_APICID:
+ v5param->apicid = param1;
+ break;
+ case SETWA_FLAGS_MEM:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ break;
+ case SETWA_FLAGS_PCIE_SBDF:
+ v5param->pcie_sbdf = param1;
+ break;
+ }
+ v5param->flags = vendor_flags;
+ } else if (flags) {
+ v5param->flags = flags;
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
+ } else {
+ switch (type) {
+ case ACPI_EINJ_PROCESSOR_CORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_FATAL:
+ v5param->apicid = param1;
+ v5param->flags = SETWA_FLAGS_APICID;
+ break;
+ case ACPI_EINJ_MEMORY_CORRECTABLE:
+ case ACPI_EINJ_MEMORY_UNCORRECTABLE:
+ case ACPI_EINJ_MEMORY_FATAL:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->flags = SETWA_FLAGS_MEM;
+ break;
+ case ACPI_EINJ_PCIX_CORRECTABLE:
+ case ACPI_EINJ_PCIX_UNCORRECTABLE:
+ case ACPI_EINJ_PCIX_FATAL:
+ v5param->pcie_sbdf = param1;
+ v5param->flags = SETWA_FLAGS_PCIE_SBDF;
+ break;
+ }
+ }
+ } else {
+ rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ if (einj_param) {
+ struct einj_parameter *v4param = einj_param;
+ v4param->param1 = param1;
+ v4param->param2 = param2;
+ }
+ }
+ rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!(val & EINJ_OP_BUSY))
+ break;
+ if (einj_timedout(&timeout))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (val != EINJ_STATUS_SUCCESS)
+ return -EBUSY;
+
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
+ if (rc)
+ return rc;
+ trigger_paddr = apei_exec_ctx_get_output(&ctx);
+ if (notrigger == 0) {
+ rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
+ if (rc)
+ return rc;
+ }
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
+
+ return rc;
+}
+
+/* Inject the specified hardware error */
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
+{
+ int rc;
+ u64 base_addr, size;
+
+ /* If user manually set "flags", make sure it is legal */
+ if (flags && (flags &
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ return -EINVAL;
+
+ /*
+ * We need extra sanity checks for memory errors.
+ * Other types leap directly to injection.
+ */
+
+ /* ensure param1/param2 existed */
+ if (!(param_extension || acpi5))
+ goto inject;
+
+ /* ensure injection is memory related */
+ if (type & ACPI5_VENDOR_BIT) {
+ if (vendor_flags != SETWA_FLAGS_MEM)
+ goto inject;
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+ goto inject;
+
+ /*
+ * Disallow crazy address masks that give BIOS leeway to pick
+ * injection address almost anywhere. Insist on page or
+ * better granularity and that target address is normal RAM or
+ * NVDIMM.
+ */
+ base_addr = param1 & param2;
+ size = ~param2 + 1;
+
+ if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+ ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+ != REGION_INTERSECTS) &&
+ (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+ != REGION_INTERSECTS) &&
+ (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
+ != REGION_INTERSECTS)))
+ return -EINVAL;
+
+inject:
+ mutex_lock(&einj_mutex);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+ mutex_unlock(&einj_mutex);
+
+ return rc;
+}
+
+static u32 error_type;
+static u32 error_flags;
+static u64 error_param1;
+static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
+static struct dentry *einj_debug_dir;
+
+static int available_error_type_show(struct seq_file *m, void *v)
+{
+ int rc;
+ u32 available_error_type = 0;
+
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+ if (available_error_type & 0x0001)
+ seq_printf(m, "0x00000001\tProcessor Correctable\n");
+ if (available_error_type & 0x0002)
+ seq_printf(m, "0x00000002\tProcessor Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0004)
+ seq_printf(m, "0x00000004\tProcessor Uncorrectable fatal\n");
+ if (available_error_type & 0x0008)
+ seq_printf(m, "0x00000008\tMemory Correctable\n");
+ if (available_error_type & 0x0010)
+ seq_printf(m, "0x00000010\tMemory Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0020)
+ seq_printf(m, "0x00000020\tMemory Uncorrectable fatal\n");
+ if (available_error_type & 0x0040)
+ seq_printf(m, "0x00000040\tPCI Express Correctable\n");
+ if (available_error_type & 0x0080)
+ seq_printf(m, "0x00000080\tPCI Express Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0100)
+ seq_printf(m, "0x00000100\tPCI Express Uncorrectable fatal\n");
+ if (available_error_type & 0x0200)
+ seq_printf(m, "0x00000200\tPlatform Correctable\n");
+ if (available_error_type & 0x0400)
+ seq_printf(m, "0x00000400\tPlatform Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0800)
+ seq_printf(m, "0x00000800\tPlatform Uncorrectable fatal\n");
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(available_error_type);
+
+static int error_type_get(void *data, u64 *val)
+{
+ *val = error_type;
+
+ return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+ int rc;
+ u32 available_error_type = 0;
+ u32 tval, vendor;
+
+ /*
+ * Vendor defined types have 0x80000000 bit set, and
+ * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
+ */
+ vendor = val & ACPI5_VENDOR_BIT;
+ tval = val & 0x7fffffff;
+
+ /* Only one error type can be specified */
+ if (tval & (tval - 1))
+ return -EINVAL;
+ if (!vendor) {
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+ if (!(val & available_error_type))
+ return -EINVAL;
+ }
+ error_type = val;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set,
+ "0x%llx\n");
+
+static int error_inject_set(void *data, u64 val)
+{
+ if (!error_type)
+ return -EINVAL;
+
+ return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+ error_param3, error_param4);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n");
+
+static int einj_check_table(struct acpi_table_einj *einj_tab)
+{
+ if ((einj_tab->header_length !=
+ (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header)))
+ && (einj_tab->header_length != sizeof(struct acpi_table_einj)))
+ return -EINVAL;
+ if (einj_tab->header.length < sizeof(struct acpi_table_einj))
+ return -EINVAL;
+ if (einj_tab->entries !=
+ (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
+ sizeof(struct acpi_einj_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init einj_init(void)
+{
+ int rc;
+ acpi_status status;
+ struct apei_exec_context ctx;
+
+ if (acpi_disabled) {
+ pr_warn("ACPI disabled.\n");
+ return -ENODEV;
+ }
+
+ status = acpi_get_table(ACPI_SIG_EINJ, 0,
+ (struct acpi_table_header **)&einj_tab);
+ if (status == AE_NOT_FOUND) {
+ pr_warn("EINJ table not found.\n");
+ return -ENODEV;
+ }
+ else if (ACPI_FAILURE(status)) {
+ pr_err("Failed to get EINJ table: %s\n",
+ acpi_format_exception(status));
+ return -EINVAL;
+ }
+
+ rc = einj_check_table(einj_tab);
+ if (rc) {
+ pr_warn(FW_BUG "Invalid EINJ table.\n");
+ goto err_put_table;
+ }
+
+ rc = -ENOMEM;
+ einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
+
+ debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir,
+ NULL, &available_error_type_fops);
+ debugfs_create_file_unsafe("error_type", 0600, einj_debug_dir,
+ NULL, &error_type_fops);
+ debugfs_create_file_unsafe("error_inject", 0200, einj_debug_dir,
+ NULL, &error_inject_fops);
+
+ apei_resources_init(&einj_resources);
+ einj_exec_ctx_init(&ctx);
+ rc = apei_exec_collect_resources(&ctx, &einj_resources);
+ if (rc) {
+ pr_err("Error collecting EINJ resources.\n");
+ goto err_fini;
+ }
+
+ rc = apei_resources_request(&einj_resources, "APEI EINJ");
+ if (rc) {
+ pr_err("Error requesting memory/port resources.\n");
+ goto err_fini;
+ }
+
+ rc = apei_exec_pre_map_gars(&ctx);
+ if (rc) {
+ pr_err("Error pre-mapping GARs.\n");
+ goto err_release;
+ }
+
+ rc = -ENOMEM;
+ einj_param = einj_get_parameter_address();
+ if ((param_extension || acpi5) && einj_param) {
+ debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
+ &error_flags);
+ debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
+ &error_param1);
+ debugfs_create_x64("param2", S_IRUSR | S_IWUSR, einj_debug_dir,
+ &error_param2);
+ debugfs_create_x64("param3", S_IRUSR | S_IWUSR, einj_debug_dir,
+ &error_param3);
+ debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
+ &error_param4);
+ debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &notrigger);
+ }
+
+ if (vendor_dev[0]) {
+ vendor_blob.data = vendor_dev;
+ vendor_blob.size = strlen(vendor_dev);
+ debugfs_create_blob("vendor", S_IRUSR, einj_debug_dir,
+ &vendor_blob);
+ debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &vendor_flags);
+ }
+
+ pr_info("Error INJection is initialized.\n");
+
+ return 0;
+
+err_release:
+ apei_resources_release(&einj_resources);
+err_fini:
+ apei_resources_fini(&einj_resources);
+ debugfs_remove_recursive(einj_debug_dir);
+err_put_table:
+ acpi_put_table((struct acpi_table_header *)einj_tab);
+
+ return rc;
+}
+
+static void __exit einj_exit(void)
+{
+ struct apei_exec_context ctx;
+
+ if (einj_param) {
+ acpi_size size = (acpi5) ?
+ sizeof(struct set_error_type_with_address) :
+ sizeof(struct einj_parameter);
+
+ acpi_os_unmap_iomem(einj_param, size);
+ }
+ einj_exec_ctx_init(&ctx);
+ apei_exec_post_unmap_gars(&ctx);
+ apei_resources_release(&einj_resources);
+ apei_resources_fini(&einj_resources);
+ debugfs_remove_recursive(einj_debug_dir);
+ acpi_put_table((struct acpi_table_header *)einj_tab);
+}
+
+module_init(einj_init);
+module_exit(einj_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error INJection support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
new file mode 100644
index 000000000..c740f0faa
--- /dev/null
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error Record Serialization Table debug support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store. This file provide the
+ * debugging/testing support for ERST kernel support and firmware
+ * implementation.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <acpi/apei.h>
+#include <linux/miscdevice.h>
+
+#include "apei-internal.h"
+
+#define ERST_DBG_PFX "ERST DBG: "
+
+#define ERST_DBG_RECORD_LEN_MAX 0x4000
+
+static void *erst_dbg_buf;
+static unsigned int erst_dbg_buf_len;
+
+/* Prevent erst_dbg_read/write from being invoked concurrently */
+static DEFINE_MUTEX(erst_dbg_mutex);
+
+static int erst_dbg_open(struct inode *inode, struct file *file)
+{
+ int rc, *pos;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ pos = (int *)&file->private_data;
+
+ rc = erst_get_record_id_begin(pos);
+ if (rc)
+ return rc;
+
+ return nonseekable_open(inode, file);
+}
+
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ int rc;
+ u64 record_id;
+ u32 record_count;
+
+ switch (cmd) {
+ case APEI_ERST_CLEAR_RECORD:
+ rc = copy_from_user(&record_id, (void __user *)arg,
+ sizeof(record_id));
+ if (rc)
+ return -EFAULT;
+ return erst_clear(record_id);
+ case APEI_ERST_GET_RECORD_COUNT:
+ rc = erst_get_record_count();
+ if (rc < 0)
+ return rc;
+ record_count = rc;
+ rc = put_user(record_count, (u32 __user *)arg);
+ if (rc)
+ return rc;
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
+
+static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
+ size_t usize, loff_t *off)
+{
+ int rc, *pos;
+ ssize_t len = 0;
+ u64 id;
+
+ if (*off)
+ return -EINVAL;
+
+ if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
+ return -EINTR;
+
+ pos = (int *)&filp->private_data;
+
+retry_next:
+ rc = erst_get_record_id_next(pos, &id);
+ if (rc)
+ goto out;
+ /* no more record */
+ if (id == APEI_ERST_INVALID_RECORD_ID) {
+ /*
+ * If the persistent store is empty initially, the function
+ * 'erst_read' below will return "-ENOENT" value. This causes
+ * 'retry_next' label is entered again. The returned value
+ * should be zero indicating the read operation is EOF.
+ */
+ len = 0;
+
+ goto out;
+ }
+retry:
+ rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
+ /* The record may be cleared by others, try read next record */
+ if (rc == -ENOENT)
+ goto retry_next;
+ if (rc < 0)
+ goto out;
+ if (len > ERST_DBG_RECORD_LEN_MAX) {
+ pr_warn(ERST_DBG_PFX
+ "Record (ID: 0x%llx) length is too long: %zd\n", id, len);
+ rc = -EIO;
+ goto out;
+ }
+ if (len > erst_dbg_buf_len) {
+ void *p;
+ rc = -ENOMEM;
+ p = kmalloc(len, GFP_KERNEL);
+ if (!p)
+ goto out;
+ kfree(erst_dbg_buf);
+ erst_dbg_buf = p;
+ erst_dbg_buf_len = len;
+ goto retry;
+ }
+
+ rc = -EINVAL;
+ if (len > usize)
+ goto out;
+
+ rc = -EFAULT;
+ if (copy_to_user(ubuf, erst_dbg_buf, len))
+ goto out;
+ rc = 0;
+out:
+ mutex_unlock(&erst_dbg_mutex);
+ return rc ? rc : len;
+}
+
+static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
+{
+ int rc;
+ struct cper_record_header *rcd;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (usize > ERST_DBG_RECORD_LEN_MAX) {
+ pr_err(ERST_DBG_PFX "Too long record to be written\n");
+ return -EINVAL;
+ }
+
+ if (mutex_lock_interruptible(&erst_dbg_mutex))
+ return -EINTR;
+ if (usize > erst_dbg_buf_len) {
+ void *p;
+ rc = -ENOMEM;
+ p = kmalloc(usize, GFP_KERNEL);
+ if (!p)
+ goto out;
+ kfree(erst_dbg_buf);
+ erst_dbg_buf = p;
+ erst_dbg_buf_len = usize;
+ }
+ rc = copy_from_user(erst_dbg_buf, ubuf, usize);
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
+ }
+ rcd = erst_dbg_buf;
+ rc = -EINVAL;
+ if (rcd->record_length != usize)
+ goto out;
+
+ rc = erst_write(erst_dbg_buf);
+
+out:
+ mutex_unlock(&erst_dbg_mutex);
+ return rc < 0 ? rc : usize;
+}
+
+static const struct file_operations erst_dbg_ops = {
+ .owner = THIS_MODULE,
+ .open = erst_dbg_open,
+ .release = erst_dbg_release,
+ .read = erst_dbg_read,
+ .write = erst_dbg_write,
+ .unlocked_ioctl = erst_dbg_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice erst_dbg_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "erst_dbg",
+ .fops = &erst_dbg_ops,
+};
+
+static __init int erst_dbg_init(void)
+{
+ if (erst_disable) {
+ pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+ return -ENODEV;
+ }
+ return misc_register(&erst_dbg_dev);
+}
+
+static __exit void erst_dbg_exit(void)
+{
+ misc_deregister(&erst_dbg_dev);
+ kfree(erst_dbg_buf);
+}
+
+module_init(erst_dbg_init);
+module_exit(erst_dbg_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
new file mode 100644
index 000000000..83efb52a3
--- /dev/null
+++ b/drivers/acpi/apei/erst.c
@@ -0,0 +1,1206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error Record Serialization Table support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store.
+ *
+ * For more information about ERST, please refer to ACPI Specification
+ * version 4.0, section 17.4.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/cper.h>
+#include <linux/nmi.h>
+#include <linux/hardirq.h>
+#include <linux/pstore.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ERST: " fmt
+
+/* ERST command status */
+#define ERST_STATUS_SUCCESS 0x0
+#define ERST_STATUS_NOT_ENOUGH_SPACE 0x1
+#define ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x2
+#define ERST_STATUS_FAILED 0x3
+#define ERST_STATUS_RECORD_STORE_EMPTY 0x4
+#define ERST_STATUS_RECORD_NOT_FOUND 0x5
+
+#define ERST_TAB_ENTRY(tab) \
+ ((struct acpi_whea_header *)((char *)(tab) + \
+ sizeof(struct acpi_table_erst)))
+
+#define SPIN_UNIT 100 /* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
+#define FIRMWARE_MAX_STALL 50 /* 50us */
+
+int erst_disable;
+EXPORT_SYMBOL_GPL(erst_disable);
+
+static struct acpi_table_erst *erst_tab;
+
+/* ERST Error Log Address Range atrributes */
+#define ERST_RANGE_RESERVED 0x0001
+#define ERST_RANGE_NVRAM 0x0002
+#define ERST_RANGE_SLOW 0x0004
+
+/*
+ * ERST Error Log Address Range, used as buffer for reading/writing
+ * error records.
+ */
+static struct erst_erange {
+ u64 base;
+ u64 size;
+ void __iomem *vaddr;
+ u32 attr;
+} erst_erange;
+
+/*
+ * Prevent ERST interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ *
+ * It is used to provide exclusive accessing for ERST Error Log
+ * Address Range too.
+ */
+static DEFINE_RAW_SPINLOCK(erst_lock);
+
+static inline int erst_errno(int command_status)
+{
+ switch (command_status) {
+ case ERST_STATUS_SUCCESS:
+ return 0;
+ case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
+ return -ENODEV;
+ case ERST_STATUS_NOT_ENOUGH_SPACE:
+ return -ENOSPC;
+ case ERST_STATUS_RECORD_STORE_EMPTY:
+ case ERST_STATUS_RECORD_NOT_FOUND:
+ return -ENOENT;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int erst_timedout(u64 *t, u64 spin_unit)
+{
+ if ((s64)*t < spin_unit) {
+ pr_warn(FW_WARN "Firmware does not respond in time.\n");
+ return 1;
+ }
+ *t -= spin_unit;
+ ndelay(spin_unit);
+ touch_nmi_watchdog();
+ return 0;
+}
+
+static int erst_exec_load_var1(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->var1);
+}
+
+static int erst_exec_load_var2(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->var2);
+}
+
+static int erst_exec_store_var1(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_write_register(entry, ctx->var1);
+}
+
+static int erst_exec_add(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->var1 += ctx->var2;
+ return 0;
+}
+
+static int erst_exec_subtract(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->var1 -= ctx->var2;
+ return 0;
+}
+
+static int erst_exec_add_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ val += ctx->value;
+ rc = __apei_exec_write_register(entry, val);
+ return rc;
+}
+
+static int erst_exec_subtract_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ val -= ctx->value;
+ rc = __apei_exec_write_register(entry, val);
+ return rc;
+}
+
+static int erst_exec_stall(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ u64 stall_time;
+
+ if (ctx->value > FIRMWARE_MAX_STALL) {
+ if (!in_nmi())
+ pr_warn(FW_WARN
+ "Too long stall time for stall instruction: 0x%llx.\n",
+ ctx->value);
+ stall_time = FIRMWARE_MAX_STALL;
+ } else
+ stall_time = ctx->value;
+ udelay(stall_time);
+ return 0;
+}
+
+static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 stall_time;
+
+ if (ctx->var1 > FIRMWARE_MAX_STALL) {
+ if (!in_nmi())
+ pr_warn(FW_WARN
+ "Too long stall time for stall while true instruction: 0x%llx.\n",
+ ctx->var1);
+ stall_time = FIRMWARE_MAX_STALL;
+ } else
+ stall_time = ctx->var1;
+
+ for (;;) {
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ if (val != ctx->value)
+ break;
+ if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
+ return -EIO;
+ }
+ return 0;
+}
+
+static int erst_exec_skip_next_instruction_if_true(
+ struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ if (val == ctx->value) {
+ ctx->ip += 2;
+ return APEI_EXEC_SET_IP;
+ }
+
+ return 0;
+}
+
+static int erst_exec_goto(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->ip = ctx->value;
+ return APEI_EXEC_SET_IP;
+}
+
+static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->src_base);
+}
+
+static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->dst_base);
+}
+
+static int erst_exec_move_data(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 offset;
+ void *src, *dst;
+
+ /* ioremap does not work in interrupt context */
+ if (in_interrupt()) {
+ pr_warn("MOVE_DATA can not be used in interrupt context.\n");
+ return -EBUSY;
+ }
+
+ rc = __apei_exec_read_register(entry, &offset);
+ if (rc)
+ return rc;
+
+ src = ioremap(ctx->src_base + offset, ctx->var2);
+ if (!src)
+ return -ENOMEM;
+ dst = ioremap(ctx->dst_base + offset, ctx->var2);
+ if (!dst) {
+ iounmap(src);
+ return -ENOMEM;
+ }
+
+ memmove(dst, src, ctx->var2);
+
+ iounmap(src);
+ iounmap(dst);
+
+ return 0;
+}
+
+static struct apei_exec_ins_type erst_ins_type[] = {
+ [ACPI_ERST_READ_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register,
+ },
+ [ACPI_ERST_READ_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register_value,
+ },
+ [ACPI_ERST_WRITE_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register,
+ },
+ [ACPI_ERST_WRITE_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register_value,
+ },
+ [ACPI_ERST_NOOP] = {
+ .flags = 0,
+ .run = apei_exec_noop,
+ },
+ [ACPI_ERST_LOAD_VAR1] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_load_var1,
+ },
+ [ACPI_ERST_LOAD_VAR2] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_load_var2,
+ },
+ [ACPI_ERST_STORE_VAR1] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_store_var1,
+ },
+ [ACPI_ERST_ADD] = {
+ .flags = 0,
+ .run = erst_exec_add,
+ },
+ [ACPI_ERST_SUBTRACT] = {
+ .flags = 0,
+ .run = erst_exec_subtract,
+ },
+ [ACPI_ERST_ADD_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_add_value,
+ },
+ [ACPI_ERST_SUBTRACT_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_subtract_value,
+ },
+ [ACPI_ERST_STALL] = {
+ .flags = 0,
+ .run = erst_exec_stall,
+ },
+ [ACPI_ERST_STALL_WHILE_TRUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_stall_while_true,
+ },
+ [ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_skip_next_instruction_if_true,
+ },
+ [ACPI_ERST_GOTO] = {
+ .flags = 0,
+ .run = erst_exec_goto,
+ },
+ [ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_set_src_address_base,
+ },
+ [ACPI_ERST_SET_DST_ADDRESS_BASE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_set_dst_address_base,
+ },
+ [ACPI_ERST_MOVE_DATA] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_move_data,
+ },
+};
+
+static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
+{
+ apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
+ ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
+}
+
+static int erst_get_erange(struct erst_erange *range)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
+ if (rc)
+ return rc;
+ range->base = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
+ if (rc)
+ return rc;
+ range->size = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
+ if (rc)
+ return rc;
+ range->attr = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+static ssize_t __erst_get_record_count(void)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
+ if (rc)
+ return rc;
+ return apei_exec_ctx_get_output(&ctx);
+}
+
+ssize_t erst_get_record_count(void)
+{
+ ssize_t count;
+ unsigned long flags;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ count = __erst_get_record_count();
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_count);
+
+#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
+
+struct erst_record_id_cache {
+ struct mutex lock;
+ u64 *entries;
+ int len;
+ int size;
+ int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+ .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+ .refcount = 0,
+};
+
+static int __erst_get_next_record_id(u64 *record_id)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
+ if (rc)
+ return rc;
+ *record_id = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+int erst_get_record_id_begin(int *pos)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ erst_record_id_cache.refcount++;
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ *pos = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+ u64 id, prev_id, first_id;
+ int i, rc;
+ u64 *entries;
+ unsigned long flags;
+
+ id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ rc = __erst_get_next_record_id(&id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc == -ENOENT)
+ return 0;
+ if (rc)
+ return rc;
+ if (id == APEI_ERST_INVALID_RECORD_ID)
+ return 0;
+ /* can not skip current ID, or loop back to first ID */
+ if (id == prev_id || id == first_id)
+ return 0;
+ if (first_id == APEI_ERST_INVALID_RECORD_ID)
+ first_id = id;
+ prev_id = id;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == id)
+ break;
+ }
+ /* record id already in cache, try next */
+ if (i < erst_record_id_cache.len)
+ goto retry;
+ if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+ int new_size;
+ u64 *new_entries;
+
+ new_size = erst_record_id_cache.size * 2;
+ new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+ ERST_RECORD_ID_CACHE_SIZE_MAX);
+ if (new_size <= erst_record_id_cache.size) {
+ if (printk_ratelimit())
+ pr_warn(FW_WARN "too many record IDs!\n");
+ return 0;
+ }
+ new_entries = kvmalloc_array(new_size, sizeof(entries[0]),
+ GFP_KERNEL);
+ if (!new_entries)
+ return -ENOMEM;
+ memcpy(new_entries, entries,
+ erst_record_id_cache.len * sizeof(entries[0]));
+ kvfree(entries);
+ erst_record_id_cache.entries = entries = new_entries;
+ erst_record_id_cache.size = new_size;
+ }
+ entries[i] = id;
+ erst_record_id_cache.len++;
+
+ return 1;
+}
+
+/*
+ * Get the record ID of an existing error record on the persistent
+ * storage. If there is no error record on the persistent storage, the
+ * returned record_id is APEI_ERST_INVALID_RECORD_ID.
+ */
+int erst_get_record_id_next(int *pos, u64 *record_id)
+{
+ int rc = 0;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ /* must be enclosed by erst_get_record_id_begin/end */
+ BUG_ON(!erst_record_id_cache.refcount);
+ BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ entries = erst_record_id_cache.entries;
+ for (; *pos < erst_record_id_cache.len; (*pos)++)
+ if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+ break;
+ /* found next record id in cache */
+ if (*pos < erst_record_id_cache.len) {
+ *record_id = entries[*pos];
+ (*pos)++;
+ goto out_unlock;
+ }
+
+ /* Try to add one more record ID to cache */
+ rc = __erst_record_id_cache_add_one();
+ if (rc < 0)
+ goto out_unlock;
+ /* successfully add one new ID */
+ if (rc == 1) {
+ *record_id = erst_record_id_cache.entries[*pos];
+ (*pos)++;
+ rc = 0;
+ } else {
+ *pos = -1;
+ *record_id = APEI_ERST_INVALID_RECORD_ID;
+ }
+out_unlock:
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+ int i, wpos = 0;
+ u64 *entries;
+
+ if (erst_record_id_cache.refcount)
+ return;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+ continue;
+ if (wpos != i)
+ entries[wpos] = entries[i];
+ wpos++;
+ }
+ erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+ /*
+ * erst_disable != 0 should be detected by invoker via the
+ * return value of erst_get_record_id_begin/next, so this
+ * function should not be called for erst_disable != 0.
+ */
+ BUG_ON(erst_disable);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ erst_record_id_cache.refcount--;
+ BUG_ON(erst_record_id_cache.refcount < 0);
+ __erst_record_id_cache_compact();
+ mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
+
+static int __erst_write_to_storage(u64 offset)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, offset);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+static int __erst_read_from_storage(u64 record_id, u64 offset)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, offset);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, record_id);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ };
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+static int __erst_clear_from_storage(u64 record_id)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, record_id);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+/* NVRAM ERST Error Log Address Range is not supported yet */
+static void pr_unimpl_nvram(void)
+{
+ if (printk_ratelimit())
+ pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
+}
+
+static int __erst_write_to_nvram(const struct cper_record_header *record)
+{
+ /* do not print message, because printk is not safe for NMI */
+ return -ENOSYS;
+}
+
+static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
+{
+ pr_unimpl_nvram();
+ return -ENOSYS;
+}
+
+static int __erst_clear_from_nvram(u64 record_id)
+{
+ pr_unimpl_nvram();
+ return -ENOSYS;
+}
+
+int erst_write(const struct cper_record_header *record)
+{
+ int rc;
+ unsigned long flags;
+ struct cper_record_header *rcd_erange;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
+ return -EINVAL;
+
+ if (erst_erange.attr & ERST_RANGE_NVRAM) {
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+ return -EBUSY;
+ rc = __erst_write_to_nvram(record);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ return rc;
+ }
+
+ if (record->record_length > erst_erange.size)
+ return -EINVAL;
+
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+ return -EBUSY;
+ memcpy(erst_erange.vaddr, record, record->record_length);
+ rcd_erange = erst_erange.vaddr;
+ /* signature for serialization system */
+ memcpy(&rcd_erange->persistence_information, "ER", 2);
+
+ rc = __erst_write_to_storage(0);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_write);
+
+static int __erst_read_to_erange(u64 record_id, u64 *offset)
+{
+ int rc;
+
+ if (erst_erange.attr & ERST_RANGE_NVRAM)
+ return __erst_read_to_erange_from_nvram(
+ record_id, offset);
+
+ rc = __erst_read_from_storage(record_id, 0);
+ if (rc)
+ return rc;
+ *offset = 0;
+
+ return 0;
+}
+
+static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
+ size_t buflen)
+{
+ int rc;
+ u64 offset, len = 0;
+ struct cper_record_header *rcd_tmp;
+
+ rc = __erst_read_to_erange(record_id, &offset);
+ if (rc)
+ return rc;
+ rcd_tmp = erst_erange.vaddr + offset;
+ len = rcd_tmp->record_length;
+ if (len <= buflen)
+ memcpy(record, rcd_tmp, len);
+
+ return len;
+}
+
+/*
+ * If return value > buflen, the buffer size is not big enough,
+ * else if return value < 0, something goes wrong,
+ * else everything is OK, and return value is record length
+ */
+ssize_t erst_read(u64 record_id, struct cper_record_header *record,
+ size_t buflen)
+{
+ ssize_t len;
+ unsigned long flags;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ len = __erst_read(record_id, record, buflen);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ return len;
+}
+EXPORT_SYMBOL_GPL(erst_read);
+
+int erst_clear(u64 record_id)
+{
+ int rc, i;
+ unsigned long flags;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ if (erst_erange.attr & ERST_RANGE_NVRAM)
+ rc = __erst_clear_from_nvram(record_id);
+ else
+ rc = __erst_clear_from_storage(record_id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc)
+ goto out;
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+out:
+ mutex_unlock(&erst_record_id_cache.lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear);
+
+static int __init setup_erst_disable(char *str)
+{
+ erst_disable = 1;
+ return 1;
+}
+
+__setup("erst_disable", setup_erst_disable);
+
+static int erst_check_table(struct acpi_table_erst *erst_tab)
+{
+ if ((erst_tab->header_length !=
+ (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
+ && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
+ return -EINVAL;
+ if (erst_tab->header.length < sizeof(struct acpi_table_erst))
+ return -EINVAL;
+ if (erst_tab->entries !=
+ (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
+ sizeof(struct acpi_erst_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(struct pstore_record *record);
+static int erst_writer(struct pstore_record *record);
+static int erst_clearer(struct pstore_record *record);
+
+static struct pstore_info erst_info = {
+ .owner = THIS_MODULE,
+ .name = "erst",
+ .flags = PSTORE_FLAGS_DMESG,
+ .open = erst_open_pstore,
+ .close = erst_close_pstore,
+ .read = erst_reader,
+ .write = erst_writer,
+ .erase = erst_clearer
+};
+
+#define CPER_CREATOR_PSTORE \
+ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_DMESG \
+ GUID_INIT(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \
+ 0x94, 0x19, 0xeb, 0x12)
+#define CPER_SECTION_TYPE_DMESG_Z \
+ GUID_INIT(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \
+ 0x34, 0xdd, 0xfa, 0xc6)
+#define CPER_SECTION_TYPE_MCE \
+ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+struct cper_pstore_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ char data[];
+} __packed;
+
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = erst_get_record_id_begin(&reader_pos);
+
+ return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static ssize_t erst_reader(struct pstore_record *record)
+{
+ int rc;
+ ssize_t len = 0;
+ u64 record_id;
+ struct cper_pstore_record *rcd;
+ size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rcd = kmalloc(rcd_len, GFP_KERNEL);
+ if (!rcd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+skip:
+ rc = erst_get_record_id_next(&reader_pos, &record_id);
+ if (rc)
+ goto out;
+
+ /* no more record */
+ if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ len = erst_read(record_id, &rcd->hdr, rcd_len);
+ /* The record may be cleared by others, try read next record */
+ if (len == -ENOENT)
+ goto skip;
+ else if (len < 0 || len < sizeof(*rcd)) {
+ rc = -EIO;
+ goto out;
+ }
+ if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
+ goto skip;
+
+ record->buf = kmalloc(len, GFP_KERNEL);
+ if (record->buf == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memcpy(record->buf, rcd->data, len - sizeof(*rcd));
+ record->id = record_id;
+ record->compressed = false;
+ record->ecc_notice_size = 0;
+ if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG_Z)) {
+ record->type = PSTORE_TYPE_DMESG;
+ record->compressed = true;
+ } else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG))
+ record->type = PSTORE_TYPE_DMESG;
+ else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_MCE))
+ record->type = PSTORE_TYPE_MCE;
+ else
+ record->type = PSTORE_TYPE_MAX;
+
+ if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
+ record->time.tv_sec = rcd->hdr.timestamp;
+ else
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+
+out:
+ kfree(rcd);
+ return (rc < 0) ? rc : (len - sizeof(*rcd));
+}
+
+static int erst_writer(struct pstore_record *record)
+{
+ struct cper_pstore_record *rcd = (struct cper_pstore_record *)
+ (erst_info.buf - sizeof(*rcd));
+ int ret;
+
+ memset(rcd, 0, sizeof(*rcd));
+ memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd->hdr.revision = CPER_RECORD_REV;
+ rcd->hdr.signature_end = CPER_SIG_END;
+ rcd->hdr.section_count = 1;
+ rcd->hdr.error_severity = CPER_SEV_FATAL;
+ /* timestamp valid. platform_id, partition_id are invalid */
+ rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
+ rcd->hdr.timestamp = ktime_get_real_seconds();
+ rcd->hdr.record_length = sizeof(*rcd) + record->size;
+ rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
+ rcd->hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd->hdr.record_id = cper_next_record_id();
+ rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd->sec_hdr.section_offset = sizeof(*rcd);
+ rcd->sec_hdr.section_length = record->size;
+ rcd->sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd->sec_hdr.validation_bits = 0;
+ rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
+ switch (record->type) {
+ case PSTORE_TYPE_DMESG:
+ if (record->compressed)
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
+ else
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
+ break;
+ case PSTORE_TYPE_MCE:
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
+
+ ret = erst_write(&rcd->hdr);
+ record->id = rcd->hdr.record_id;
+
+ return ret;
+}
+
+static int erst_clearer(struct pstore_record *record)
+{
+ return erst_clear(record->id);
+}
+
+static int __init erst_init(void)
+{
+ int rc = 0;
+ acpi_status status;
+ struct apei_exec_context ctx;
+ struct apei_resources erst_resources;
+ struct resource *r;
+ char *buf;
+
+ if (acpi_disabled)
+ goto err;
+
+ if (erst_disable) {
+ pr_info(
+ "Error Record Serialization Table (ERST) support is disabled.\n");
+ goto err;
+ }
+
+ status = acpi_get_table(ACPI_SIG_ERST, 0,
+ (struct acpi_table_header **)&erst_tab);
+ if (status == AE_NOT_FOUND)
+ goto err;
+ else if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+ pr_err("Failed to get table, %s\n", msg);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = erst_check_table(erst_tab);
+ if (rc) {
+ pr_err(FW_BUG "ERST table is invalid.\n");
+ goto err_put_erst_tab;
+ }
+
+ apei_resources_init(&erst_resources);
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_collect_resources(&ctx, &erst_resources);
+ if (rc)
+ goto err_fini;
+ rc = apei_resources_request(&erst_resources, "APEI ERST");
+ if (rc)
+ goto err_fini;
+ rc = apei_exec_pre_map_gars(&ctx);
+ if (rc)
+ goto err_release;
+ rc = erst_get_erange(&erst_erange);
+ if (rc) {
+ if (rc == -ENODEV)
+ pr_info(
+ "The corresponding hardware device or firmware implementation "
+ "is not available.\n");
+ else
+ pr_err("Failed to get Error Log Address Range.\n");
+ goto err_unmap_reg;
+ }
+
+ r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
+ if (!r) {
+ pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
+ (unsigned long long)erst_erange.base,
+ (unsigned long long)erst_erange.base + erst_erange.size - 1);
+ rc = -EIO;
+ goto err_unmap_reg;
+ }
+ rc = -ENOMEM;
+ erst_erange.vaddr = ioremap_cache(erst_erange.base,
+ erst_erange.size);
+ if (!erst_erange.vaddr)
+ goto err_release_erange;
+
+ pr_info(
+ "Error Record Serialization Table (ERST) support is initialized.\n");
+
+ buf = kmalloc(erst_erange.size, GFP_KERNEL);
+ if (buf) {
+ erst_info.buf = buf + sizeof(struct cper_pstore_record);
+ erst_info.bufsize = erst_erange.size -
+ sizeof(struct cper_pstore_record);
+ rc = pstore_register(&erst_info);
+ if (rc) {
+ if (rc != -EPERM)
+ pr_info(
+ "Could not register with persistent store.\n");
+ erst_info.buf = NULL;
+ erst_info.bufsize = 0;
+ kfree(buf);
+ }
+ } else
+ pr_err(
+ "Failed to allocate %lld bytes for persistent store error log.\n",
+ erst_erange.size);
+
+ /* Cleanup ERST Resources */
+ apei_resources_fini(&erst_resources);
+
+ return 0;
+
+err_release_erange:
+ release_mem_region(erst_erange.base, erst_erange.size);
+err_unmap_reg:
+ apei_exec_post_unmap_gars(&ctx);
+err_release:
+ apei_resources_release(&erst_resources);
+err_fini:
+ apei_resources_fini(&erst_resources);
+err_put_erst_tab:
+ acpi_put_table((struct acpi_table_header *)erst_tab);
+err:
+ erst_disable = 1;
+ return rc;
+}
+
+device_initcall(erst_init);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
new file mode 100644
index 000000000..8678e1621
--- /dev/null
+++ b/drivers/acpi/apei/ghes.c
@@ -0,0 +1,1499 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Generic Hardware Error Source support
+ *
+ * Generic Hardware Error Source provides a way to report platform
+ * hardware errors (such as that from chipset). It works in so called
+ * "Firmware First" mode, that is, hardware errors are reported to
+ * firmware firstly, then reported to Linux by firmware. This way,
+ * some non-standard hardware error registers or non-standard hardware
+ * link can be checked by firmware to produce more hardware error
+ * information for Linux.
+ *
+ * For more information about Generic Hardware Error Source, please
+ * refer to ACPI Specification version 4.0, section 17.3.2.6
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/arm_sdei.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/cper.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
+#include <linux/pci.h>
+#include <linux/pfn.h>
+#include <linux/aer.h>
+#include <linux/nmi.h>
+#include <linux/sched/clock.h>
+#include <linux/uuid.h>
+#include <linux/ras.h>
+#include <linux/task_work.h>
+
+#include <acpi/actbl1.h>
+#include <acpi/ghes.h>
+#include <acpi/apei.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <ras/ras_event.h>
+
+#include "apei-internal.h"
+
+#define GHES_PFX "GHES: "
+
+#define GHES_ESTATUS_MAX_SIZE 65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE 512
+
+#define GHES_ESTATUS_CACHES_SIZE 4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_node *)(estatus_node) + 1))
+
+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
+ ((struct acpi_hest_generic_data *) \
+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+
+/*
+ * NMI-like notifications vary by architecture, before the compiler can prune
+ * unused static functions it needs a value for these enums.
+ */
+#ifndef CONFIG_ARM_SDE_INTERFACE
+#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses
+#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
+#endif
+
+static inline bool is_hest_type_generic_v2(struct ghes *ghes)
+{
+ return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
+}
+
+/*
+ * This driver isn't really modular, however for the time being,
+ * continuing to use module_param is the easiest way to remain
+ * compatible with existing boot arg use cases.
+ */
+bool ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
+/*
+ * All error sources notified with HED (Hardware Error Device) share a
+ * single notifier callback, so they need to be linked and checked one
+ * by one. This holds true for NMI too.
+ *
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
+ */
+static LIST_HEAD(ghes_hed);
+static DEFINE_MUTEX(ghes_list_mutex);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * the fixmap is used instead.
+ *
+ * This spinlock is used to prevent the fixmap entry from being used
+ * simultaneously.
+ */
+static DEFINE_SPINLOCK(ghes_notify_lock_irq);
+
+struct ghes_vendor_record_entry {
+ struct work_struct work;
+ int error_severity;
+ char vendor_record[];
+};
+
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+
+static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
+static int ghes_panic_timeout __read_mostly = 30;
+
+static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
+{
+ phys_addr_t paddr;
+ pgprot_t prot;
+
+ paddr = PFN_PHYS(pfn);
+ prot = arch_apei_get_mem_attribute(paddr);
+ __set_fixmap(fixmap_idx, paddr, prot);
+
+ return (void __iomem *) __fix_to_virt(fixmap_idx);
+}
+
+static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
+{
+ int _idx = virt_to_fix((unsigned long)vaddr);
+
+ WARN_ON_ONCE(fixmap_idx != _idx);
+ clear_fixmap(fixmap_idx);
+}
+
+int ghes_estatus_pool_init(unsigned int num_ghes)
+{
+ unsigned long addr, len;
+ int rc;
+
+ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+ if (!ghes_estatus_pool)
+ return -ENOMEM;
+
+ len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
+ len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+ ghes_estatus_pool_size_request = PAGE_ALIGN(len);
+ addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
+ if (!addr)
+ goto err_pool_alloc;
+
+ rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
+ if (rc)
+ goto err_pool_add;
+
+ return 0;
+
+err_pool_add:
+ vfree((void *)addr);
+
+err_pool_alloc:
+ gen_pool_destroy(ghes_estatus_pool);
+
+ return -ENOMEM;
+}
+
+static int map_gen_v2(struct ghes *ghes)
+{
+ return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void unmap_gen_v2(struct ghes *ghes)
+{
+ apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
+{
+ int rc;
+ u64 val = 0;
+
+ rc = apei_read(&val, &gv2->read_ack_register);
+ if (rc)
+ return;
+
+ val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
+ val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
+
+ apei_write(val, &gv2->read_ack_register);
+}
+
+static struct ghes *ghes_new(struct acpi_hest_generic *generic)
+{
+ struct ghes *ghes;
+ unsigned int error_block_length;
+ int rc;
+
+ ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
+ if (!ghes)
+ return ERR_PTR(-ENOMEM);
+
+ ghes->generic = generic;
+ if (is_hest_type_generic_v2(ghes)) {
+ rc = map_gen_v2(ghes);
+ if (rc)
+ goto err_free;
+ }
+
+ rc = apei_map_generic_address(&generic->error_status_address);
+ if (rc)
+ goto err_unmap_read_ack_addr;
+ error_block_length = generic->error_block_length;
+ if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
+ pr_warn(FW_WARN GHES_PFX
+ "Error status block length is too long: %u for "
+ "generic hardware error source: %d.\n",
+ error_block_length, generic->header.source_id);
+ error_block_length = GHES_ESTATUS_MAX_SIZE;
+ }
+ ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
+ if (!ghes->estatus) {
+ rc = -ENOMEM;
+ goto err_unmap_status_addr;
+ }
+
+ return ghes;
+
+err_unmap_status_addr:
+ apei_unmap_generic_address(&generic->error_status_address);
+err_unmap_read_ack_addr:
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
+err_free:
+ kfree(ghes);
+ return ERR_PTR(rc);
+}
+
+static void ghes_fini(struct ghes *ghes)
+{
+ kfree(ghes->estatus);
+ apei_unmap_generic_address(&ghes->generic->error_status_address);
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
+}
+
+static inline int ghes_severity(int severity)
+{
+ switch (severity) {
+ case CPER_SEV_INFORMATIONAL:
+ return GHES_SEV_NO;
+ case CPER_SEV_CORRECTED:
+ return GHES_SEV_CORRECTED;
+ case CPER_SEV_RECOVERABLE:
+ return GHES_SEV_RECOVERABLE;
+ case CPER_SEV_FATAL:
+ return GHES_SEV_PANIC;
+ default:
+ /* Unknown, go panic */
+ return GHES_SEV_PANIC;
+ }
+}
+
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+ int from_phys,
+ enum fixed_addresses fixmap_idx)
+{
+ void __iomem *vaddr;
+ u64 offset;
+ u32 trunk;
+
+ while (len > 0) {
+ offset = paddr - (paddr & PAGE_MASK);
+ vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
+ trunk = PAGE_SIZE - offset;
+ trunk = min(trunk, len);
+ if (from_phys)
+ memcpy_fromio(buffer, vaddr + offset, trunk);
+ else
+ memcpy_toio(vaddr + offset, buffer, trunk);
+ len -= trunk;
+ paddr += trunk;
+ buffer += trunk;
+ ghes_unmap(vaddr, fixmap_idx);
+ }
+}
+
+/* Check the top-level record header has an appropriate size. */
+static int __ghes_check_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus)
+{
+ u32 len = cper_estatus_len(estatus);
+
+ if (len < sizeof(*estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
+ return -EIO;
+ }
+
+ if (len > ghes->generic->error_block_length) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
+ return -EIO;
+ }
+
+ if (cper_estatus_check_header(estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* Read the CPER block, returning its address, and header in estatus. */
+static int __ghes_peek_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ int rc;
+
+ rc = apei_read(buf_paddr, &g->error_status_address);
+ if (rc) {
+ *buf_paddr = 0;
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+"Failed to read error status block address for hardware error source: %d.\n",
+ g->header.source_id);
+ return -EIO;
+ }
+ if (!*buf_paddr)
+ return -ENOENT;
+
+ ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
+ fixmap_idx);
+ if (!estatus->block_status) {
+ *buf_paddr = 0;
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx,
+ size_t buf_len)
+{
+ ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
+ if (cper_estatus_check(estatus)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Failed to read error status block!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int ghes_read_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ int rc;
+
+ rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+ if (rc)
+ return rc;
+
+ rc = __ghes_check_estatus(ghes, estatus);
+ if (rc)
+ return rc;
+
+ return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
+ cper_estatus_len(estatus));
+}
+
+static void ghes_clear_estatus(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ estatus->block_status = 0;
+
+ if (!buf_paddr)
+ return;
+
+ ghes_copy_tofrom_phys(estatus, buf_paddr,
+ sizeof(estatus->block_status), 0,
+ fixmap_idx);
+
+ /*
+ * GHESv2 type HEST entries introduce support for error acknowledgment,
+ * so only acknowledge the error if this support is present.
+ */
+ if (is_hest_type_generic_v2(ghes))
+ ghes_ack_error(ghes->generic_v2);
+}
+
+/*
+ * Called as task_work before returning to user-space.
+ * Ensure any queued work has been done before we return to the context that
+ * triggered the notification.
+ */
+static void ghes_kick_task_work(struct callback_head *head)
+{
+ struct acpi_hest_generic_status *estatus;
+ struct ghes_estatus_node *estatus_node;
+ u32 node_len;
+
+ estatus_node = container_of(head, struct ghes_estatus_node, task_work);
+ if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+ memory_failure_queue_kick(estatus_node->task_work_cpu);
+
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
+}
+
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
+{
+ unsigned long pfn;
+
+ if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+ return false;
+
+ pfn = PHYS_PFN(physical_addr);
+ if (!pfn_valid(pfn)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Invalid address in generic error data: %#llx\n",
+ physical_addr);
+ return false;
+ }
+
+ memory_failure_queue(pfn, flags);
+ return true;
+}
+
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+ int sev)
+{
+ int flags = -1;
+ int sec_sev = ghes_severity(gdata->error_severity);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return false;
+
+ /* iff following two events can be handled properly by now */
+ if (sec_sev == GHES_SEV_CORRECTED &&
+ (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+ flags = MF_SOFT_OFFLINE;
+ if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+ flags = 0;
+
+ if (flags != -1)
+ return ghes_do_memory_failure(mem_err->physical_addr, flags);
+
+ return false;
+}
+
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+{
+ struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ bool queued = false;
+ int sec_sev, i;
+ char *p;
+
+ log_arm_hw_error(err);
+
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
+ return false;
+
+ p = (char *)(err + 1);
+ for (i = 0; i < err->err_info_num; i++) {
+ struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
+ bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+ bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+ const char *error_type = "unknown error";
+
+ /*
+ * The field (err_info->error_info & BIT(26)) is fixed to set to
+ * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
+ * firmware won't mix corrected errors in an uncorrected section,
+ * and don't filter out 'corrected' error here.
+ */
+ if (is_cache && has_pa) {
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ p += err_info->length;
+ continue;
+ }
+
+ if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
+ error_type = cper_proc_error_type_strs[err_info->type];
+
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Unhandled processor error type: %s\n",
+ error_type);
+ p += err_info->length;
+ }
+
+ return queued;
+}
+
+/*
+ * PCIe AER errors need to be sent to the AER driver for reporting and
+ * recovery. The GHES severities map to the following AER severities and
+ * require the following handling:
+ *
+ * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
+ * These need to be reported by the AER driver but no recovery is
+ * necessary.
+ * GHES_SEV_RECOVERABLE -> AER_NONFATAL
+ * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
+ * These both need to be reported and recovered from by the AER driver.
+ * GHES_SEV_PANIC does not make it to this handling since the kernel must
+ * panic.
+ */
+static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+ if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ unsigned int devfn;
+ int aer_severity;
+
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+ pcie_err->device_id.function);
+ aer_severity = cper_severity_to_aer(gdata->error_severity);
+
+ /*
+ * If firmware reset the component to contain
+ * the error, we must reinitialize it before
+ * use, so treat it as a fatal AER error.
+ */
+ if (gdata->flags & CPER_SEC_RESET)
+ aer_severity = AER_FATAL;
+
+ aer_recover_queue(pcie_err->device_id.segment,
+ pcie_err->device_id.bus,
+ devfn, aer_severity,
+ (struct aer_capability_regs *)
+ pcie_err->aer_info);
+ }
+#endif
+}
+
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
+
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
+
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
+
+static void ghes_vendor_record_work_func(struct work_struct *work)
+{
+ struct ghes_vendor_record_entry *entry;
+ struct acpi_hest_generic_data *gdata;
+ u32 len;
+
+ entry = container_of(work, struct ghes_vendor_record_entry, work);
+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+
+ blocking_notifier_call_chain(&vendor_record_notify_list,
+ entry->error_severity, gdata);
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
+}
+
+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+ int sev)
+{
+ struct acpi_hest_generic_data *copied_gdata;
+ struct ghes_vendor_record_entry *entry;
+ u32 len;
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
+ if (!entry)
+ return;
+
+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
+ entry->error_severity = sev;
+
+ INIT_WORK(&entry->work, ghes_vendor_record_work_func);
+ schedule_work(&entry->work);
+}
+
+static bool ghes_do_proc(struct ghes *ghes,
+ const struct acpi_hest_generic_status *estatus)
+{
+ int sev, sec_sev;
+ struct acpi_hest_generic_data *gdata;
+ guid_t *sec_type;
+ const guid_t *fru_id = &guid_null;
+ char *fru_text = "";
+ bool queued = false;
+
+ sev = ghes_severity(estatus->error_severity);
+ apei_estatus_for_each_section(estatus, gdata) {
+ sec_type = (guid_t *)gdata->section_type;
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (guid_t *)gdata->fru_id;
+
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+
+ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+ ghes_edac_report_mem_error(sev, mem_err);
+
+ arch_apei_report_mem_error(sev, mem_err);
+ queued = ghes_handle_memory_failure(gdata, sev);
+ }
+ else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+ ghes_handle_aer(gdata);
+ }
+ else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+ queued = ghes_handle_arm_hw_error(gdata, sev);
+ } else {
+ void *err = acpi_hest_get_payload(gdata);
+
+ ghes_defer_non_standard_event(gdata, sev);
+ log_non_standard_event(sec_type, fru_id, fru_text,
+ sec_sev, err,
+ gdata->error_data_length);
+ }
+ }
+
+ return queued;
+}
+
+static void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ static atomic_t seqno;
+ unsigned int curr_seqno;
+ char pfx_seq[64];
+
+ if (pfx == NULL) {
+ if (ghes_severity(estatus->error_severity) <=
+ GHES_SEV_CORRECTED)
+ pfx = KERN_WARNING;
+ else
+ pfx = KERN_ERR;
+ }
+ curr_seqno = atomic_inc_return(&seqno);
+ snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
+ printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+ pfx_seq, generic->header.source_id);
+ cper_estatus_print(pfx_seq, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ /* Not more than 2 messages every 5 seconds */
+ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+ static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+ struct ratelimit_state *ratelimit;
+
+ if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+ ratelimit = &ratelimit_corrected;
+ else
+ ratelimit = &ratelimit_uncorrected;
+ if (__ratelimit(ratelimit)) {
+ __ghes_print_estatus(pfx, generic, estatus);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+ u32 len;
+ int i, cached = 0;
+ unsigned long long now;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ len = cper_estatus_len(estatus);
+ rcu_read_lock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL)
+ continue;
+ if (len != cache->estatus_len)
+ continue;
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ if (memcmp(estatus, cache_estatus, len))
+ continue;
+ atomic_inc(&cache->count);
+ now = sched_clock();
+ if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+ cached = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int alloced;
+ u32 len, cache_len;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+ if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ len = cper_estatus_len(estatus);
+ cache_len = GHES_ESTATUS_CACHE_LEN(len);
+ cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+ if (!cache) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ memcpy(cache_estatus, estatus, len);
+ cache->estatus_len = len;
+ atomic_set(&cache->count, 0);
+ cache->generic = generic;
+ cache->time_in = sched_clock();
+ return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+ u32 len;
+
+ len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+ len = GHES_ESTATUS_CACHE_LEN(len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+ atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+ struct ghes_estatus_cache *cache;
+
+ cache = container_of(head, struct ghes_estatus_cache, rcu);
+ ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int i, slot = -1, count;
+ unsigned long long now, duration, period, max_period = 0;
+ struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+ new_cache = ghes_estatus_cache_alloc(generic, estatus);
+ if (new_cache == NULL)
+ return;
+ rcu_read_lock();
+ now = sched_clock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL) {
+ slot = i;
+ slot_cache = NULL;
+ break;
+ }
+ duration = now - cache->time_in;
+ if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+ slot = i;
+ slot_cache = cache;
+ break;
+ }
+ count = atomic_read(&cache->count);
+ period = duration;
+ do_div(period, (count + 1));
+ if (period > max_period) {
+ max_period = period;
+ slot = i;
+ slot_cache = cache;
+ }
+ }
+ /* new_cache must be put into array after its contents are written */
+ smp_wmb();
+ if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+ slot_cache, new_cache) == slot_cache) {
+ if (slot_cache)
+ call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+ } else
+ ghes_estatus_cache_free(new_cache);
+ rcu_read_unlock();
+}
+
+static void __ghes_panic(struct ghes *ghes,
+ struct acpi_hest_generic_status *estatus,
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
+{
+ __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
+
+ ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+
+ /* reboot to log the error! */
+ if (!panic_timeout)
+ panic_timeout = ghes_panic_timeout;
+ panic("Fatal hardware error!");
+}
+
+static int ghes_proc(struct ghes *ghes)
+{
+ struct acpi_hest_generic_status *estatus = ghes->estatus;
+ u64 buf_paddr;
+ int rc;
+
+ rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
+ if (rc)
+ goto out;
+
+ if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
+ __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
+
+ if (!ghes_estatus_cached(estatus)) {
+ if (ghes_print_estatus(NULL, ghes->generic, estatus))
+ ghes_estatus_cache_add(ghes->generic, estatus);
+ }
+ ghes_do_proc(ghes, estatus);
+
+out:
+ ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
+
+ return rc;
+}
+
+static void ghes_add_timer(struct ghes *ghes)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ unsigned long expire;
+
+ if (!g->notify.poll_interval) {
+ pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+ g->header.source_id);
+ return;
+ }
+ expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+ ghes->timer.expires = round_jiffies_relative(expire);
+ add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(struct timer_list *t)
+{
+ struct ghes *ghes = from_timer(ghes, t, timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+ ghes_proc(ghes);
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+ if (!(ghes->flags & GHES_EXITING))
+ ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+ struct ghes *ghes = data;
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+ rc = ghes_proc(ghes);
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+ if (rc)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
+static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
+ void *data)
+{
+ struct ghes *ghes;
+ unsigned long flags;
+ int ret = NOTIFY_DONE;
+
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+ rcu_read_lock();
+ list_for_each_entry_rcu(ghes, &ghes_hed, list) {
+ if (!ghes_proc(ghes))
+ ret = NOTIFY_OK;
+ }
+ rcu_read_unlock();
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+
+ return ret;
+}
+
+static struct notifier_block ghes_notifier_hed = {
+ .notifier_call = ghes_notify_hed,
+};
+
+/*
+ * Handlers for CPER records may not be NMI safe. For example,
+ * memory_failure_queue() takes spinlocks and calls schedule_work_on().
+ * In any NMI-like handler, memory from ghes_estatus_pool is used to save
+ * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
+ * ghes_proc_in_irq() to run in IRQ context where each estatus in
+ * ghes_estatus_llist is processed.
+ *
+ * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
+ * to suppress frequent messages.
+ */
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+ struct llist_node *llnode, *next;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ bool task_work_pending;
+ u32 len, node_len;
+ int ret;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_reverse_order(llnode);
+ while (llnode) {
+ next = llnode->next;
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = cper_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
+ if (!ghes_estatus_cached(estatus)) {
+ generic = estatus_node->generic;
+ if (ghes_print_estatus(NULL, generic, estatus))
+ ghes_estatus_cache_add(generic, estatus);
+ }
+
+ if (task_work_pending && current->mm) {
+ estatus_node->task_work.func = ghes_kick_task_work;
+ estatus_node->task_work_cpu = smp_processor_id();
+ ret = task_work_add(current, &estatus_node->task_work,
+ TWA_RESUME);
+ if (ret)
+ estatus_node->task_work.func = NULL;
+ }
+
+ if (!estatus_node->task_work.func)
+ gen_pool_free(ghes_estatus_pool,
+ (unsigned long)estatus_node, node_len);
+
+ llnode = next;
+ }
+}
+
+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_reverse_order(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
+static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
+ enum fixed_addresses fixmap_idx)
+{
+ struct acpi_hest_generic_status *estatus, tmp_header;
+ struct ghes_estatus_node *estatus_node;
+ u32 len, node_len;
+ u64 buf_paddr;
+ int sev, rc;
+
+ if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
+ return -EOPNOTSUPP;
+
+ rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
+ if (rc) {
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+ return rc;
+ }
+
+ rc = __ghes_check_estatus(ghes, &tmp_header);
+ if (rc) {
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+ return rc;
+ }
+
+ len = cper_estatus_len(&tmp_header);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
+ if (!estatus_node)
+ return -ENOMEM;
+
+ estatus_node->ghes = ghes;
+ estatus_node->generic = ghes->generic;
+ estatus_node->task_work.func = NULL;
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+
+ if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
+ ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+ rc = -ENOENT;
+ goto no_work;
+ }
+
+ sev = ghes_severity(estatus->error_severity);
+ if (sev >= GHES_SEV_PANIC) {
+ ghes_print_queued_estatus();
+ __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
+ }
+
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+
+ /* This error has been reported before, don't process it again. */
+ if (ghes_estatus_cached(estatus))
+ goto no_work;
+
+ llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+
+ return rc;
+
+no_work:
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+ node_len);
+
+ return rc;
+}
+
+static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
+ enum fixed_addresses fixmap_idx)
+{
+ int ret = -ENOENT;
+ struct ghes *ghes;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ghes, rcu_list, list) {
+ if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
+ ret = 0;
+ }
+ rcu_read_unlock();
+
+ if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
+ irq_work_queue(&ghes_proc_irq_work);
+
+ return ret;
+}
+
+#ifdef CONFIG_ACPI_APEI_SEA
+static LIST_HEAD(ghes_sea);
+
+/*
+ * Return 0 only if one of the SEA error sources successfully reported an error
+ * record sent from the firmware.
+ */
+int ghes_notify_sea(void)
+{
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
+ int rv;
+
+ raw_spin_lock(&ghes_notify_lock_sea);
+ rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
+ raw_spin_unlock(&ghes_notify_lock_sea);
+
+ return rv;
+}
+
+static void ghes_sea_add(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ list_add_rcu(&ghes->list, &ghes_sea);
+ mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_sea_remove(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ mutex_unlock(&ghes_list_mutex);
+ synchronize_rcu();
+}
+#else /* CONFIG_ACPI_APEI_SEA */
+static inline void ghes_sea_add(struct ghes *ghes) { }
+static inline void ghes_sea_remove(struct ghes *ghes) { }
+#endif /* CONFIG_ACPI_APEI_SEA */
+
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+/*
+ * NMI may be triggered on any CPU, so ghes_in_nmi is used for
+ * having only one concurrent reader.
+ */
+static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
+
+static LIST_HEAD(ghes_nmi);
+
+static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
+ int ret = NMI_DONE;
+
+ if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
+ return ret;
+
+ raw_spin_lock(&ghes_notify_lock_nmi);
+ if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
+ ret = NMI_HANDLED;
+ raw_spin_unlock(&ghes_notify_lock_nmi);
+
+ atomic_dec(&ghes_in_nmi);
+ return ret;
+}
+
+static void ghes_nmi_add(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_nmi))
+ register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+ list_add_rcu(&ghes->list, &ghes_nmi);
+ mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_nmi_remove(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_nmi))
+ unregister_nmi_handler(NMI_LOCAL, "ghes");
+ mutex_unlock(&ghes_list_mutex);
+ /*
+ * To synchronize with NMI handler, ghes can only be
+ * freed after NMI handler finishes.
+ */
+ synchronize_rcu();
+}
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
+static inline void ghes_nmi_add(struct ghes *ghes) { }
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+
+static void ghes_nmi_init_cxt(void)
+{
+ init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+}
+
+static int __ghes_sdei_callback(struct ghes *ghes,
+ enum fixed_addresses fixmap_idx)
+{
+ if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
+ irq_work_queue(&ghes_proc_irq_work);
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
+ void *arg)
+{
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
+ struct ghes *ghes = arg;
+ int err;
+
+ raw_spin_lock(&ghes_notify_lock_sdei_normal);
+ err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
+ raw_spin_unlock(&ghes_notify_lock_sdei_normal);
+
+ return err;
+}
+
+static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
+ void *arg)
+{
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
+ struct ghes *ghes = arg;
+ int err;
+
+ raw_spin_lock(&ghes_notify_lock_sdei_critical);
+ err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
+ raw_spin_unlock(&ghes_notify_lock_sdei_critical);
+
+ return err;
+}
+
+static int apei_sdei_register_ghes(struct ghes *ghes)
+{
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return -EOPNOTSUPP;
+
+ return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
+ ghes_sdei_critical_callback);
+}
+
+static int apei_sdei_unregister_ghes(struct ghes *ghes)
+{
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return -EOPNOTSUPP;
+
+ return sdei_unregister_ghes(ghes);
+}
+
+static int ghes_probe(struct platform_device *ghes_dev)
+{
+ struct acpi_hest_generic *generic;
+ struct ghes *ghes = NULL;
+ unsigned long flags;
+
+ int rc = -EINVAL;
+
+ generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
+ if (!generic->enabled)
+ return -ENODEV;
+
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ case ACPI_HEST_NOTIFY_SCI:
+ case ACPI_HEST_NOTIFY_GSIV:
+ case ACPI_HEST_NOTIFY_GPIO:
+ break;
+
+ case ACPI_HEST_NOTIFY_SEA:
+ if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
+ generic->header.source_id);
+ rc = -ENOTSUPP;
+ goto err;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_LOCAL:
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ default:
+ pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+ generic->notify.type, generic->header.source_id);
+ goto err;
+ }
+
+ rc = -EIO;
+ if (generic->error_block_length <
+ sizeof(struct acpi_hest_generic_status)) {
+ pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+ generic->error_block_length, generic->header.source_id);
+ goto err;
+ }
+ ghes = ghes_new(generic);
+ if (IS_ERR(ghes)) {
+ rc = PTR_ERR(ghes);
+ ghes = NULL;
+ goto err;
+ }
+
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ timer_setup(&ghes->timer, ghes_poll_func, 0);
+ ghes_add_timer(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ /* External interrupt vector is GSI */
+ rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err;
+ }
+ rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
+ "GHES IRQ", ghes);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err;
+ }
+ break;
+
+ case ACPI_HEST_NOTIFY_SCI:
+ case ACPI_HEST_NOTIFY_GSIV:
+ case ACPI_HEST_NOTIFY_GPIO:
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_hed))
+ register_acpi_hed_notifier(&ghes_notifier_hed);
+ list_add_rcu(&ghes->list, &ghes_hed);
+ mutex_unlock(&ghes_list_mutex);
+ break;
+
+ case ACPI_HEST_NOTIFY_SEA:
+ ghes_sea_add(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ ghes_nmi_add(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+ rc = apei_sdei_register_ghes(ghes);
+ if (rc)
+ goto err;
+ break;
+ default:
+ BUG();
+ }
+
+ platform_set_drvdata(ghes_dev, ghes);
+
+ ghes_edac_register(ghes, &ghes_dev->dev);
+
+ /* Handle any pending errors right away */
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+ ghes_proc(ghes);
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+
+ return 0;
+
+err:
+ if (ghes) {
+ ghes_fini(ghes);
+ kfree(ghes);
+ }
+ return rc;
+}
+
+static int ghes_remove(struct platform_device *ghes_dev)
+{
+ int rc;
+ struct ghes *ghes;
+ struct acpi_hest_generic *generic;
+
+ ghes = platform_get_drvdata(ghes_dev);
+ generic = ghes->generic;
+
+ ghes->flags |= GHES_EXITING;
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ del_timer_sync(&ghes->timer);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ free_irq(ghes->irq, ghes);
+ break;
+
+ case ACPI_HEST_NOTIFY_SCI:
+ case ACPI_HEST_NOTIFY_GSIV:
+ case ACPI_HEST_NOTIFY_GPIO:
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_hed))
+ unregister_acpi_hed_notifier(&ghes_notifier_hed);
+ mutex_unlock(&ghes_list_mutex);
+ synchronize_rcu();
+ break;
+
+ case ACPI_HEST_NOTIFY_SEA:
+ ghes_sea_remove(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ ghes_nmi_remove(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+ rc = apei_sdei_unregister_ghes(ghes);
+ if (rc)
+ return rc;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ ghes_fini(ghes);
+
+ ghes_edac_unregister(ghes);
+
+ kfree(ghes);
+
+ platform_set_drvdata(ghes_dev, NULL);
+
+ return 0;
+}
+
+static struct platform_driver ghes_platform_driver = {
+ .driver = {
+ .name = "GHES",
+ },
+ .probe = ghes_probe,
+ .remove = ghes_remove,
+};
+
+void __init ghes_init(void)
+{
+ int rc;
+
+ sdei_init();
+
+ if (acpi_disabled)
+ return;
+
+ switch (hest_disable) {
+ case HEST_NOT_FOUND:
+ return;
+ case HEST_DISABLED:
+ pr_info(GHES_PFX "HEST is not enabled!\n");
+ return;
+ default:
+ break;
+ }
+
+ if (ghes_disable) {
+ pr_info(GHES_PFX "GHES is not enabled!\n");
+ return;
+ }
+
+ ghes_nmi_init_cxt();
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+ return;
+
+ rc = apei_osc_setup();
+ if (rc == 0 && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+ else if (rc == 0 && !osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+ else if (rc && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+ else
+ pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+}
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
new file mode 100644
index 000000000..7bf48c277
--- /dev/null
+++ b/drivers/acpi/apei/hest.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/kdebug.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <acpi/apei.h>
+#include <acpi/ghes.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+int hest_disable;
+EXPORT_SYMBOL_GPL(hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+ [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
+ [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+ [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+ [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+ [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+ [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+ [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+ [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
+ [ACPI_HEST_TYPE_IA32_DEFERRED_CHECK] = -1,
+};
+
+static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
+{
+ u16 hest_type = hest_hdr->type;
+ int len;
+
+ if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+ return 0;
+
+ len = hest_esrc_len_tab[hest_type];
+
+ if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+ struct acpi_hest_ia_corrected *cmc;
+ cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
+ len = sizeof(*cmc) + cmc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+ struct acpi_hest_ia_machine_check *mc;
+ mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
+ len = sizeof(*mc) + mc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
+ struct acpi_hest_ia_deferred_check *mc;
+ mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
+ len = sizeof(*mc) + mc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ }
+ BUG_ON(len == -1);
+
+ return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+ struct acpi_hest_header *hest_hdr;
+ int i, rc, len;
+
+ if (hest_disable || !hest_tab)
+ return -EINVAL;
+
+ hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+ for (i = 0; i < hest_tab->error_source_count; i++) {
+ len = hest_esrc_len(hest_hdr);
+ if (!len) {
+ pr_warn(FW_WARN HEST_PFX
+ "Unknown or unused hardware error source "
+ "type: %d for hardware error source: %d.\n",
+ hest_hdr->type, hest_hdr->source_id);
+ return -EINVAL;
+ }
+ if ((void *)hest_hdr + len >
+ (void *)hest_tab + hest_tab->header.length) {
+ pr_warn(FW_BUG HEST_PFX
+ "Table contents overflow for hardware error source: %d.\n",
+ hest_hdr->source_id);
+ return -EINVAL;
+ }
+
+ rc = func(hest_hdr, data);
+ if (rc)
+ return rc;
+
+ hest_hdr = (void *)hest_hdr + len;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_hest_parse);
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
+{
+ if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+ return 0;
+
+ if (!acpi_disable_cmcff)
+ return !arch_apei_enable_cmcff(hest_hdr, data);
+
+ return 0;
+}
+
+struct ghes_arr {
+ struct platform_device **ghes_devs;
+ unsigned int count;
+};
+
+static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
+{
+ int *count = data;
+
+ if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
+ hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+ (*count)++;
+ return 0;
+}
+
+static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
+{
+ struct platform_device *ghes_dev;
+ struct ghes_arr *ghes_arr = data;
+ int rc, i;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+ hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+ return 0;
+
+ if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
+ return 0;
+ for (i = 0; i < ghes_arr->count; i++) {
+ struct acpi_hest_header *hdr;
+ ghes_dev = ghes_arr->ghes_devs[i];
+ hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+ if (hdr->source_id == hest_hdr->source_id) {
+ pr_warn(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+ hdr->source_id);
+ return -EIO;
+ }
+ }
+ ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
+ if (!ghes_dev)
+ return -ENOMEM;
+
+ rc = platform_device_add_data(ghes_dev, &hest_hdr, sizeof(void *));
+ if (rc)
+ goto err;
+
+ rc = platform_device_add(ghes_dev);
+ if (rc)
+ goto err;
+ ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev;
+
+ return 0;
+err:
+ platform_device_put(ghes_dev);
+ return rc;
+}
+
+static int __init hest_ghes_dev_register(unsigned int ghes_count)
+{
+ int rc, i;
+ struct ghes_arr ghes_arr;
+
+ ghes_arr.count = 0;
+ ghes_arr.ghes_devs = kmalloc_array(ghes_count, sizeof(void *),
+ GFP_KERNEL);
+ if (!ghes_arr.ghes_devs)
+ return -ENOMEM;
+
+ rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
+ if (rc)
+ goto err;
+
+ rc = ghes_estatus_pool_init(ghes_count);
+ if (rc)
+ goto err;
+
+out:
+ kfree(ghes_arr.ghes_devs);
+ return rc;
+err:
+ for (i = 0; i < ghes_arr.count; i++)
+ platform_device_unregister(ghes_arr.ghes_devs[i]);
+ goto out;
+}
+
+static int __init setup_hest_disable(char *str)
+{
+ hest_disable = HEST_DISABLED;
+ return 1;
+}
+
+__setup("hest_disable", setup_hest_disable);
+
+void __init acpi_hest_init(void)
+{
+ acpi_status status;
+ int rc;
+ unsigned int ghes_count = 0;
+
+ if (hest_disable) {
+ pr_info(HEST_PFX "Table parsing disabled.\n");
+ return;
+ }
+
+ status = acpi_get_table(ACPI_SIG_HEST, 0,
+ (struct acpi_table_header **)&hest_tab);
+ if (status == AE_NOT_FOUND) {
+ hest_disable = HEST_NOT_FOUND;
+ return;
+ } else if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+ pr_err(HEST_PFX "Failed to get table, %s\n", msg);
+ hest_disable = HEST_DISABLED;
+ return;
+ }
+
+ rc = apei_hest_parse(hest_parse_cmc, NULL);
+ if (rc)
+ goto err;
+
+ if (!ghes_disable) {
+ rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+ if (rc)
+ goto err;
+
+ if (ghes_count)
+ rc = hest_ghes_dev_register(ghes_count);
+ if (rc)
+ goto err;
+ }
+
+ pr_info(HEST_PFX "Table parsing has been initialized.\n");
+ return;
+err:
+ hest_disable = HEST_DISABLED;
+ acpi_put_table((struct acpi_table_header *)hest_tab);
+}