10 files changed, 5189 insertions, 0 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
new file mode 100644
index 000000000..6b18f8bc7
--- /dev/null
+++ b/drivers/acpi/apei/Kconfig
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+config HAVE_ACPI_APEI
+	bool
+
+config HAVE_ACPI_APEI_NMI
+	bool
+
+config ACPI_APEI
+	bool "ACPI Platform Error Interface (APEI)"
+	select MISC_FILESYSTEMS
+	select PSTORE
+	select UEFI_CPER
+	depends on HAVE_ACPI_APEI
+	help
+	  APEI allows to report errors (for example from the chipset)
+	  to the operating system. This improves NMI handling
+	  especially. In addition it supports error serialization and
+	  error injection.
+
+config ACPI_APEI_GHES
+	bool "APEI Generic Hardware Error Source"
+	depends on ACPI_APEI
+	select ACPI_HED
+	select IRQ_WORK
+	select GENERIC_ALLOCATOR
+	help
+	  Generic Hardware Error Source provides a way to report
+	  platform hardware errors (such as that from chipset). It
+	  works in so called "Firmware First" mode, that is, hardware
+	  errors are reported to firmware firstly, then reported to
+	  Linux by firmware. This way, some non-standard hardware
+	  error registers or non-standard hardware link can be checked
+	  by firmware to produce more valuable hardware error
+	  information for Linux.
+
+config ACPI_APEI_PCIEAER
+	bool "APEI PCIe AER logging/recovering support"
+	depends on ACPI_APEI && PCIEAER
+	help
+	  PCIe AER errors may be reported via APEI firmware first mode.
+	  Turn on this option to enable the corresponding support.
+
+config ACPI_APEI_SEA
+	bool
+	depends on ARM64 && ACPI_APEI_GHES
+	default y
+
+config ACPI_APEI_MEMORY_FAILURE
+	bool "APEI memory error recovering support"
+	depends on ACPI_APEI && MEMORY_FAILURE
+	help
+	  Memory errors may be reported via APEI firmware first mode.
+	  Turn on this option to enable the memory recovering support.
+
+config ACPI_APEI_EINJ
+	tristate "APEI Error INJection (EINJ)"
+	depends on ACPI_APEI && DEBUG_FS
+	help
+	  EINJ provides a hardware error injection mechanism, it is
+	  mainly used for debugging and testing the other parts of
+	  APEI and some other RAS features.
+
+config ACPI_APEI_ERST_DEBUG
+	tristate "APEI Error Record Serialization Table (ERST) Debug Support"
+	depends on ACPI_APEI
+	help
+	  ERST is a way provided by APEI to save and retrieve hardware
+	  error information to and from a persistent store. Enable this
+	  if you want to debugging and testing the ERST kernel support
+	  and firmware implementation.
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
new file mode 100644
index 000000000..4dfac2128
--- /dev/null
+++ b/drivers/acpi/apei/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ACPI_APEI)		+= apei.o
+obj-$(CONFIG_ACPI_APEI_GHES)	+= ghes.o
+obj-$(CONFIG_ACPI_APEI_EINJ)	+= einj.o
+obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
+
+apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
new file mode 100644
index 000000000..3294cc8dc
--- /dev/null
+++ b/drivers/acpi/apei/apei-base.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * apei-base.c - ACPI Platform Error Interface (APEI) supporting
+ * infrastructure
+ *
+ * APEI allows to report errors (for example from the chipset) to the
+ * the operating system. This improves NMI handling especially. In
+ * addition it supports error serialization and error injection.
+ *
+ * For more information about APEI, please refer to ACPI Specification
+ * version 4.0, chapter 17.
+ *
+ * This file has Common functions used by more than one APEI table,
+ * including framework of interpreter for ERST and EINJ; resource
+ * management for APEI registers.
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#define APEI_PFX "APEI: "
+
+/*
+ * APEI ERST (Error Record Serialization Table) and EINJ (Error
+ * INJection) interpreter framework.
+ */
+
+#define APEI_EXEC_PRESERVE_REGISTER	0x1
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+			struct apei_exec_ins_type *ins_table,
+			u32 instructions,
+			struct acpi_whea_header *action_table,
+			u32 entries)
+{
+	ctx->ins_table = ins_table;
+	ctx->instructions = instructions;
+	ctx->action_table = action_table;
+	ctx->entries = entries;
+}
+EXPORT_SYMBOL_GPL(apei_exec_ctx_init);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val)
+{
+	int rc;
+
+	rc = apei_read(val, &entry->register_region);
+	if (rc)
+		return rc;
+	*val >>= entry->register_region.bit_offset;
+	*val &= entry->mask;
+
+	return 0;
+}
+
+int apei_exec_read_register(struct apei_exec_context *ctx,
+			    struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val = 0;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	ctx->value = val;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register);
+
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+				  struct acpi_whea_header *entry)
+{
+	int rc;
+
+	rc = apei_exec_read_register(ctx, entry);
+	if (rc)
+		return rc;
+	ctx->value = (ctx->value == entry->value);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register_value);
+
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val)
+{
+	int rc;
+
+	val &= entry->mask;
+	val <<= entry->register_region.bit_offset;
+	if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) {
+		u64 valr = 0;
+		rc = apei_read(&valr, &entry->register_region);
+		if (rc)
+			return rc;
+		valr &= ~(entry->mask << entry->register_region.bit_offset);
+		val |= valr;
+	}
+	rc = apei_write(val, &entry->register_region);
+
+	return rc;
+}
+
+int apei_exec_write_register(struct apei_exec_context *ctx,
+			     struct acpi_whea_header *entry)
+{
+	return __apei_exec_write_register(entry, ctx->value);
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register);
+
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry)
+{
+	int rc;
+
+	ctx->value = entry->value;
+	rc = apei_exec_write_register(ctx, entry);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register_value);
+
+int apei_exec_noop(struct apei_exec_context *ctx,
+		   struct acpi_whea_header *entry)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_noop);
+
+/*
+ * Interpret the specified action. Go through whole action table,
+ * execute all instructions belong to the action.
+ */
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+		    bool optional)
+{
+	int rc = -ENOENT;
+	u32 i, ip;
+	struct acpi_whea_header *entry;
+	apei_exec_ins_func_t run;
+
+	ctx->ip = 0;
+
+	/*
+	 * "ip" is the instruction pointer of current instruction,
+	 * "ctx->ip" specifies the next instruction to executed,
+	 * instruction "run" function may change the "ctx->ip" to
+	 * implement "goto" semantics.
+	 */
+rewind:
+	ip = 0;
+	for (i = 0; i < ctx->entries; i++) {
+		entry = &ctx->action_table[i];
+		if (entry->action != action)
+			continue;
+		if (ip == ctx->ip) {
+			if (entry->instruction >= ctx->instructions ||
+			    !ctx->ins_table[entry->instruction].run) {
+				pr_warn(FW_WARN APEI_PFX
+					"Invalid action table, unknown instruction type: %d\n",
+					entry->instruction);
+				return -EINVAL;
+			}
+			run = ctx->ins_table[entry->instruction].run;
+			rc = run(ctx, entry);
+			if (rc < 0)
+				return rc;
+			else if (rc != APEI_EXEC_SET_IP)
+				ctx->ip++;
+		}
+		ip++;
+		if (ctx->ip < ip)
+			goto rewind;
+	}
+
+	return !optional && rc < 0 ? rc : 0;
+}
+EXPORT_SYMBOL_GPL(__apei_exec_run);
+
+typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
+				      struct acpi_whea_header *entry,
+				      void *data);
+
+static int apei_exec_for_each_entry(struct apei_exec_context *ctx,
+				    apei_exec_entry_func_t func,
+				    void *data,
+				    int *end)
+{
+	u8 ins;
+	int i, rc;
+	struct acpi_whea_header *entry;
+	struct apei_exec_ins_type *ins_table = ctx->ins_table;
+
+	for (i = 0; i < ctx->entries; i++) {
+		entry = ctx->action_table + i;
+		ins = entry->instruction;
+		if (end)
+			*end = i;
+		if (ins >= ctx->instructions || !ins_table[ins].run) {
+			pr_warn(FW_WARN APEI_PFX
+				"Invalid action table, unknown instruction type: %d\n",
+				ins);
+			return -EINVAL;
+		}
+		rc = func(ctx, entry, data);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int pre_map_gar_callback(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry,
+				void *data)
+{
+	u8 ins = entry->instruction;
+
+	if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+		return apei_map_generic_address(&entry->register_region);
+
+	return 0;
+}
+
+/*
+ * Pre-map all GARs in action table to make it possible to access them
+ * in NMI handler.
+ */
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx)
+{
+	int rc, end;
+
+	rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback,
+				      NULL, &end);
+	if (rc) {
+		struct apei_exec_context ctx_unmap;
+		memcpy(&ctx_unmap, ctx, sizeof(*ctx));
+		ctx_unmap.entries = end;
+		apei_exec_post_unmap_gars(&ctx_unmap);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars);
+
+static int post_unmap_gar_callback(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry,
+				   void *data)
+{
+	u8 ins = entry->instruction;
+
+	if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+		apei_unmap_generic_address(&entry->register_region);
+
+	return 0;
+}
+
+/* Post-unmap all GAR in action table. */
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx)
+{
+	return apei_exec_for_each_entry(ctx, post_unmap_gar_callback,
+					NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars);
+
+/*
+ * Resource management for GARs in APEI
+ */
+struct apei_res {
+	struct list_head list;
+	unsigned long start;
+	unsigned long end;
+};
+
+/* Collect all resources requested, to avoid conflict */
+struct apei_resources apei_resources_all = {
+	.iomem = LIST_HEAD_INIT(apei_resources_all.iomem),
+	.ioport = LIST_HEAD_INIT(apei_resources_all.ioport),
+};
+
+static int apei_res_add(struct list_head *res_list,
+			unsigned long start, unsigned long size)
+{
+	struct apei_res *res, *resn, *res_ins = NULL;
+	unsigned long end = start + size;
+
+	if (end <= start)
+		return 0;
+repeat:
+	list_for_each_entry_safe(res, resn, res_list, list) {
+		if (res->start > end || res->end < start)
+			continue;
+		else if (end <= res->end && start >= res->start) {
+			kfree(res_ins);
+			return 0;
+		}
+		list_del(&res->list);
+		res->start = start = min(res->start, start);
+		res->end = end = max(res->end, end);
+		kfree(res_ins);
+		res_ins = res;
+		goto repeat;
+	}
+
+	if (res_ins)
+		list_add(&res_ins->list, res_list);
+	else {
+		res_ins = kmalloc(sizeof(*res), GFP_KERNEL);
+		if (!res_ins)
+			return -ENOMEM;
+		res_ins->start = start;
+		res_ins->end = end;
+		list_add(&res_ins->list, res_list);
+	}
+
+	return 0;
+}
+
+static int apei_res_sub(struct list_head *res_list1,
+			struct list_head *res_list2)
+{
+	struct apei_res *res1, *resn1, *res2, *res;
+	res1 = list_entry(res_list1->next, struct apei_res, list);
+	resn1 = list_entry(res1->list.next, struct apei_res, list);
+	while (&res1->list != res_list1) {
+		list_for_each_entry(res2, res_list2, list) {
+			if (res1->start >= res2->end ||
+			    res1->end <= res2->start)
+				continue;
+			else if (res1->end <= res2->end &&
+				 res1->start >= res2->start) {
+				list_del(&res1->list);
+				kfree(res1);
+				break;
+			} else if (res1->end > res2->end &&
+				   res1->start < res2->start) {
+				res = kmalloc(sizeof(*res), GFP_KERNEL);
+				if (!res)
+					return -ENOMEM;
+				res->start = res2->end;
+				res->end = res1->end;
+				res1->end = res2->start;
+				list_add(&res->list, &res1->list);
+				resn1 = res;
+			} else {
+				if (res1->start < res2->start)
+					res1->end = res2->start;
+				else
+					res1->start = res2->end;
+			}
+		}
+		res1 = resn1;
+		resn1 = list_entry(resn1->list.next, struct apei_res, list);
+	}
+
+	return 0;
+}
+
+static void apei_res_clean(struct list_head *res_list)
+{
+	struct apei_res *res, *resn;
+
+	list_for_each_entry_safe(res, resn, res_list, list) {
+		list_del(&res->list);
+		kfree(res);
+	}
+}
+
+void apei_resources_fini(struct apei_resources *resources)
+{
+	apei_res_clean(&resources->iomem);
+	apei_res_clean(&resources->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_fini);
+
+static int apei_resources_merge(struct apei_resources *resources1,
+				struct apei_resources *resources2)
+{
+	int rc;
+	struct apei_res *res;
+
+	list_for_each_entry(res, &resources2->iomem, list) {
+		rc = apei_res_add(&resources1->iomem, res->start,
+				  res->end - res->start);
+		if (rc)
+			return rc;
+	}
+	list_for_each_entry(res, &resources2->ioport, list) {
+		rc = apei_res_add(&resources1->ioport, res->start,
+				  res->end - res->start);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+int apei_resources_add(struct apei_resources *resources,
+		       unsigned long start, unsigned long size,
+		       bool iomem)
+{
+	if (iomem)
+		return apei_res_add(&resources->iomem, start, size);
+	else
+		return apei_res_add(&resources->ioport, start, size);
+}
+EXPORT_SYMBOL_GPL(apei_resources_add);
+
+/*
+ * EINJ has two groups of GARs (EINJ table entry and trigger table
+ * entry), so common resources are subtracted from the trigger table
+ * resources before the second requesting.
+ */
+int apei_resources_sub(struct apei_resources *resources1,
+		       struct apei_resources *resources2)
+{
+	int rc;
+
+	rc = apei_res_sub(&resources1->iomem, &resources2->iomem);
+	if (rc)
+		return rc;
+	return apei_res_sub(&resources1->ioport, &resources2->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_sub);
+
+static int apei_get_res_callback(__u64 start, __u64 size, void *data)
+{
+	struct apei_resources *resources = data;
+	return apei_res_add(&resources->iomem, start, size);
+}
+
+static int apei_get_nvs_resources(struct apei_resources *resources)
+{
+	return acpi_nvs_for_each_region(apei_get_res_callback, resources);
+}
+
+int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
+				     void *data), void *data);
+static int apei_get_arch_resources(struct apei_resources *resources)
+
+{
+	return arch_apei_filter_addr(apei_get_res_callback, resources);
+}
+
+/*
+ * IO memory/port resource management mechanism is used to check
+ * whether memory/port area used by GARs conflicts with normal memory
+ * or IO memory/port of devices.
+ */
+int apei_resources_request(struct apei_resources *resources,
+			   const char *desc)
+{
+	struct apei_res *res, *res_bak = NULL;
+	struct resource *r;
+	struct apei_resources nvs_resources, arch_res;
+	int rc;
+
+	rc = apei_resources_sub(resources, &apei_resources_all);
+	if (rc)
+		return rc;
+
+	/*
+	 * Some firmware uses ACPI NVS region, that has been marked as
+	 * busy, so exclude it from APEI resources to avoid false
+	 * conflict.
+	 */
+	apei_resources_init(&nvs_resources);
+	rc = apei_get_nvs_resources(&nvs_resources);
+	if (rc)
+		goto nvs_res_fini;
+	rc = apei_resources_sub(resources, &nvs_resources);
+	if (rc)
+		goto nvs_res_fini;
+
+	if (arch_apei_filter_addr) {
+		apei_resources_init(&arch_res);
+		rc = apei_get_arch_resources(&arch_res);
+		if (rc)
+			goto arch_res_fini;
+		rc = apei_resources_sub(resources, &arch_res);
+		if (rc)
+			goto arch_res_fini;
+	}
+
+	rc = -EINVAL;
+	list_for_each_entry(res, &resources->iomem, list) {
+		r = request_mem_region(res->start, res->end - res->start,
+				       desc);
+		if (!r) {
+			pr_err(APEI_PFX
+		"Can not request [mem %#010llx-%#010llx] for %s registers\n",
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end - 1, desc);
+			res_bak = res;
+			goto err_unmap_iomem;
+		}
+	}
+
+	list_for_each_entry(res, &resources->ioport, list) {
+		r = request_region(res->start, res->end - res->start, desc);
+		if (!r) {
+			pr_err(APEI_PFX
+		"Can not request [io  %#06llx-%#06llx] for %s registers\n",
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end - 1, desc);
+			res_bak = res;
+			goto err_unmap_ioport;
+		}
+	}
+
+	rc = apei_resources_merge(&apei_resources_all, resources);
+	if (rc) {
+		pr_err(APEI_PFX "Fail to merge resources!\n");
+		goto err_unmap_ioport;
+	}
+
+	goto arch_res_fini;
+
+err_unmap_ioport:
+	list_for_each_entry(res, &resources->ioport, list) {
+		if (res == res_bak)
+			break;
+		release_region(res->start, res->end - res->start);
+	}
+	res_bak = NULL;
+err_unmap_iomem:
+	list_for_each_entry(res, &resources->iomem, list) {
+		if (res == res_bak)
+			break;
+		release_mem_region(res->start, res->end - res->start);
+	}
+arch_res_fini:
+	if (arch_apei_filter_addr)
+		apei_resources_fini(&arch_res);
+nvs_res_fini:
+	apei_resources_fini(&nvs_resources);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(apei_resources_request);
+
+void apei_resources_release(struct apei_resources *resources)
+{
+	int rc;
+	struct apei_res *res;
+
+	list_for_each_entry(res, &resources->iomem, list)
+		release_mem_region(res->start, res->end - res->start);
+	list_for_each_entry(res, &resources->ioport, list)
+		release_region(res->start, res->end - res->start);
+
+	rc = apei_resources_sub(&apei_resources_all, resources);
+	if (rc)
+		pr_err(APEI_PFX "Fail to sub resources!\n");
+}
+EXPORT_SYMBOL_GPL(apei_resources_release);
+
+static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
+				u32 *access_bit_width)
+{
+	u32 bit_width, bit_offset, access_size_code, space_id;
+
+	bit_width = reg->bit_width;
+	bit_offset = reg->bit_offset;
+	access_size_code = reg->access_width;
+	space_id = reg->space_id;
+	*paddr = get_unaligned(&reg->address);
+	if (!*paddr) {
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
+		return -EINVAL;
+	}
+
+	if (access_size_code < 1 || access_size_code > 4) {
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
+		return -EINVAL;
+	}
+	*access_bit_width = 1UL << (access_size_code + 2);
+
+	/* Fixup common BIOS bug */
+	if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 &&
+	    *access_bit_width < 32)
+		*access_bit_width = 32;
+	else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 &&
+	    *access_bit_width < 64)
+		*access_bit_width = 64;
+
+	if ((bit_width + bit_offset) > *access_bit_width) {
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
+		return -EINVAL;
+	}
+
+	if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+	    space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
+		pr_warn(FW_BUG APEI_PFX
+			"Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
+			*paddr, bit_width, bit_offset, access_size_code,
+			space_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int apei_map_generic_address(struct acpi_generic_address *reg)
+{
+	int rc;
+	u32 access_bit_width;
+	u64 address;
+
+	rc = apei_check_gar(reg, &address, &access_bit_width);
+	if (rc)
+		return rc;
+
+	/* IO space doesn't need mapping */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		return 0;
+
+	if (!acpi_os_map_generic_address(reg))
+		return -ENXIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_map_generic_address);
+
+/* read GAR in interrupt (including NMI) or process context */
+int apei_read(u64 *val, struct acpi_generic_address *reg)
+{
+	int rc;
+	u32 access_bit_width;
+	u64 address;
+	acpi_status status;
+
+	rc = apei_check_gar(reg, &address, &access_bit_width);
+	if (rc)
+		return rc;
+
+	*val = 0;
+	switch(reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		status = acpi_os_read_memory((acpi_physical_address) address,
+					       val, access_bit_width);
+		if (ACPI_FAILURE(status))
+			return -EIO;
+		break;
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		status = acpi_os_read_port(address, (u32 *)val,
+					   access_bit_width);
+		if (ACPI_FAILURE(status))
+			return -EIO;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_read);
+
+/* write GAR in interrupt (including NMI) or process context */
+int apei_write(u64 val, struct acpi_generic_address *reg)
+{
+	int rc;
+	u32 access_bit_width;
+	u64 address;
+	acpi_status status;
+
+	rc = apei_check_gar(reg, &address, &access_bit_width);
+	if (rc)
+		return rc;
+
+	switch (reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		status = acpi_os_write_memory((acpi_physical_address) address,
+						val, access_bit_width);
+		if (ACPI_FAILURE(status))
+			return -EIO;
+		break;
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		status = acpi_os_write_port(address, val, access_bit_width);
+		if (ACPI_FAILURE(status))
+			return -EIO;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_write);
+
+static int collect_res_callback(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry,
+				void *data)
+{
+	struct apei_resources *resources = data;
+	struct acpi_generic_address *reg = &entry->register_region;
+	u8 ins = entry->instruction;
+	u32 access_bit_width;
+	u64 paddr;
+	int rc;
+
+	if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER))
+		return 0;
+
+	rc = apei_check_gar(reg, &paddr, &access_bit_width);
+	if (rc)
+		return rc;
+
+	switch (reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		return apei_res_add(&resources->iomem, paddr,
+				    access_bit_width / 8);
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		return apei_res_add(&resources->ioport, paddr,
+				    access_bit_width / 8);
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * Same register may be used by multiple instructions in GARs, so
+ * resources are collected before requesting.
+ */
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+				struct apei_resources *resources)
+{
+	return apei_exec_for_each_entry(ctx, collect_res_callback,
+					resources, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_collect_resources);
+
+struct dentry *apei_get_debugfs_dir(void)
+{
+	static struct dentry *dapei;
+
+	if (!dapei)
+		dapei = debugfs_create_dir("apei", NULL);
+
+	return dapei;
+}
+EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr,
+				  void *data)
+{
+	return 1;
+}
+EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff);
+
+void __weak arch_apei_report_mem_error(int sev,
+				       struct cper_sec_mem_err *mem_err)
+{
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
+
+int apei_osc_setup(void)
+{
+	static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+	acpi_handle handle;
+	u32 capbuf[3];
+	struct acpi_osc_context context = {
+		.uuid_str	= whea_uuid_str,
+		.rev		= 1,
+		.cap.length	= sizeof(capbuf),
+		.cap.pointer	= capbuf,
+	};
+
+	capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_DWORD] = 1;
+	capbuf[OSC_CONTROL_DWORD] = 0;
+
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+	    || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+		return -EIO;
+	else {
+		kfree(context.ret.pointer);
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
new file mode 100644
index 000000000..1d6ef9654
--- /dev/null
+++ b/drivers/acpi/apei/apei-internal.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * apei-internal.h - ACPI Platform Error Interface internal
+ * definitions.
+ */
+
+#ifndef APEI_INTERNAL_H
+#define APEI_INTERNAL_H
+
+#include <linux/cper.h>
+#include <linux/acpi.h>
+
+struct apei_exec_context;
+
+typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx,
+				    struct acpi_whea_header *entry);
+
+#define APEI_EXEC_INS_ACCESS_REGISTER	0x0001
+
+struct apei_exec_ins_type {
+	u32 flags;
+	apei_exec_ins_func_t run;
+};
+
+struct apei_exec_context {
+	u32 ip;
+	u64 value;
+	u64 var1;
+	u64 var2;
+	u64 src_base;
+	u64 dst_base;
+	struct apei_exec_ins_type *ins_table;
+	u32 instructions;
+	struct acpi_whea_header *action_table;
+	u32 entries;
+};
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+			struct apei_exec_ins_type *ins_table,
+			u32 instructions,
+			struct acpi_whea_header *action_table,
+			u32 entries);
+
+static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx,
+					   u64 input)
+{
+	ctx->value = input;
+}
+
+static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
+{
+	return ctx->value;
+}
+
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+	return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+	return __apei_exec_run(ctx, action, 1);
+}
+
+/* Common instruction implementation */
+
+/* IP has been set in instruction function */
+#define APEI_EXEC_SET_IP	1
+
+int apei_map_generic_address(struct acpi_generic_address *reg);
+
+static inline void apei_unmap_generic_address(struct acpi_generic_address *reg)
+{
+	acpi_os_unmap_generic_address(reg);
+}
+
+int apei_read(u64 *val, struct acpi_generic_address *reg);
+int apei_write(u64 val, struct acpi_generic_address *reg);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val);
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val);
+int apei_exec_read_register(struct apei_exec_context *ctx,
+			    struct acpi_whea_header *entry);
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+				  struct acpi_whea_header *entry);
+int apei_exec_write_register(struct apei_exec_context *ctx,
+			     struct acpi_whea_header *entry);
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry);
+int apei_exec_noop(struct apei_exec_context *ctx,
+		   struct acpi_whea_header *entry);
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx);
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx);
+
+struct apei_resources {
+	struct list_head iomem;
+	struct list_head ioport;
+};
+
+static inline void apei_resources_init(struct apei_resources *resources)
+{
+	INIT_LIST_HEAD(&resources->iomem);
+	INIT_LIST_HEAD(&resources->ioport);
+}
+
+void apei_resources_fini(struct apei_resources *resources);
+int apei_resources_add(struct apei_resources *resources,
+		       unsigned long start, unsigned long size,
+		       bool iomem);
+int apei_resources_sub(struct apei_resources *resources1,
+		       struct apei_resources *resources2);
+int apei_resources_request(struct apei_resources *resources,
+			   const char *desc);
+void apei_resources_release(struct apei_resources *resources);
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+				struct apei_resources *resources);
+
+struct dentry;
+struct dentry *apei_get_debugfs_dir(void);
+
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+	if (estatus->raw_data_length)
+		return estatus->raw_data_offset + \
+			estatus->raw_data_length;
+	else
+		return sizeof(*estatus) + estatus->data_length;
+}
+
+void cper_estatus_print(const char *pfx,
+			const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+
+int apei_osc_setup(void);
+#endif
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
new file mode 100644
index 000000000..45973aa6e
--- /dev/null
+++ b/drivers/acpi/apei/bert.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Boot Error Record Table (BERT) support
+ *
+ * Copyright 2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * Under normal circumstances, when a hardware error occurs, the error
+ * handler receives control and processes the error. This gives OSPM a
+ * chance to process the error condition, report it, and optionally attempt
+ * recovery. In some cases, the system is unable to process an error.
+ * For example, system firmware or a management controller may choose to
+ * reset the system or the system might experience an uncontrolled crash
+ * or reset.The boot error source is used to report unhandled errors that
+ * occurred in a previous boot. This mechanism is described in the BERT
+ * table.
+ *
+ * For more information about BERT, please refer to ACPI Specification
+ * version 4.0, section 17.3.1
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "BERT: " fmt
+
+#define ACPI_BERT_PRINT_MAX_RECORDS 5
+#define ACPI_BERT_PRINT_MAX_LEN 1024
+
+static int bert_disable;
+
+/*
+ * Print "all" the error records in the BERT table, but avoid huge spam to
+ * the console if the BIOS included oversize records, or too many records.
+ * Skipping some records here does not lose anything because the full
+ * data is available to user tools in:
+ *	/sys/firmware/acpi/tables/data/BERT
+ */
+static void __init bert_print_all(struct acpi_bert_region *region,
+				  unsigned int region_len)
+{
+	struct acpi_hest_generic_status *estatus =
+		(struct acpi_hest_generic_status *)region;
+	int remain = region_len;
+	int printed = 0, skipped = 0;
+	u32 estatus_len;
+
+	while (remain >= sizeof(struct acpi_bert_region)) {
+		estatus_len = cper_estatus_len(estatus);
+		if (remain < estatus_len) {
+			pr_err(FW_BUG "Truncated status block (length: %u).\n",
+			       estatus_len);
+			break;
+		}
+
+		/* No more error records. */
+		if (!estatus->block_status)
+			break;
+
+		if (cper_estatus_check(estatus)) {
+			pr_err(FW_BUG "Invalid error record.\n");
+			break;
+		}
+
+		if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
+		    printed < ACPI_BERT_PRINT_MAX_RECORDS) {
+			pr_info_once("Error records from previous boot:\n");
+			cper_estatus_print(KERN_INFO HW_ERR, estatus);
+			printed++;
+		} else {
+			skipped++;
+		}
+
+		/*
+		 * Because the boot error source is "one-time polled" type,
+		 * clear Block Status of current Generic Error Status Block,
+		 * once it's printed.
+		 */
+		estatus->block_status = 0;
+
+		estatus = (void *)estatus + estatus_len;
+		remain -= estatus_len;
+	}
+
+	if (skipped)
+		pr_info(HW_ERR "Skipped %d error records\n", skipped);
+}
+
+static int __init setup_bert_disable(char *str)
+{
+	bert_disable = 1;
+
+	return 1;
+}
+__setup("bert_disable", setup_bert_disable);
+
+static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+{
+	if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
+	    bert_tab->region_length < sizeof(struct acpi_bert_region))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init bert_init(void)
+{
+	struct apei_resources bert_resources;
+	struct acpi_bert_region *boot_error_region;
+	struct acpi_table_bert *bert_tab;
+	unsigned int region_len;
+	acpi_status status;
+	int rc = 0;
+
+	if (acpi_disabled)
+		return 0;
+
+	if (bert_disable) {
+		pr_info("Boot Error Record Table support is disabled.\n");
+		return 0;
+	}
+
+	status = acpi_get_table(ACPI_SIG_BERT, 0, (struct acpi_table_header **)&bert_tab);
+	if (status == AE_NOT_FOUND)
+		return 0;
+
+	if (ACPI_FAILURE(status)) {
+		pr_err("get table failed, %s.\n", acpi_format_exception(status));
+		return -EINVAL;
+	}
+
+	rc = bert_check_table(bert_tab);
+	if (rc) {
+		pr_err(FW_BUG "table invalid.\n");
+		goto out_put_bert_tab;
+	}
+
+	region_len = bert_tab->region_length;
+	apei_resources_init(&bert_resources);
+	rc = apei_resources_add(&bert_resources, bert_tab->address,
+				region_len, true);
+	if (rc)
+		goto out_put_bert_tab;
+	rc = apei_resources_request(&bert_resources, "APEI BERT");
+	if (rc)
+		goto out_fini;
+	boot_error_region = ioremap_cache(bert_tab->address, region_len);
+	if (boot_error_region) {
+		bert_print_all(boot_error_region, region_len);
+		iounmap(boot_error_region);
+	} else {
+		rc = -ENOMEM;
+	}
+
+	apei_resources_release(&bert_resources);
+out_fini:
+	apei_resources_fini(&bert_resources);
+out_put_bert_tab:
+	acpi_put_table((struct acpi_table_header *)bert_tab);
+
+	return rc;
+}
+
+late_initcall(bert_init);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
new file mode 100644
index 000000000..c281d5b33
--- /dev/null
+++ b/drivers/acpi/apei/einj.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error INJection support
+ *
+ * EINJ provides a hardware error injection mechanism, this is useful
+ * for debugging and testing of other APEI and RAS features.
+ *
+ * For more information about EINJ, please refer to ACPI Specification
+ * version 4.0, section 17.5.
+ *
+ * Copyright 2009-2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "EINJ: " fmt
+
+#define SPIN_UNIT		100			/* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
+#define ACPI5_VENDOR_BIT	BIT(31)
+#define MEM_ERROR_MASK		(ACPI_EINJ_MEMORY_CORRECTABLE | \
+				ACPI_EINJ_MEMORY_UNCORRECTABLE | \
+				ACPI_EINJ_MEMORY_FATAL)
+
+/*
+ * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
+ */
+static int acpi5;
+
+struct set_error_type_with_address {
+	u32	type;
+	u32	vendor_extension;
+	u32	flags;
+	u32	apicid;
+	u64	memory_address;
+	u64	memory_address_range;
+	u32	pcie_sbdf;
+};
+enum {
+	SETWA_FLAGS_APICID = 1,
+	SETWA_FLAGS_MEM = 2,
+	SETWA_FLAGS_PCIE_SBDF = 4,
+};
+
+/*
+ * Vendor extensions for platform specific operations
+ */
+struct vendor_error_type_extension {
+	u32	length;
+	u32	pcie_sbdf;
+	u16	vendor_id;
+	u16	device_id;
+	u8	rev_id;
+	u8	reserved[3];
+};
+
+static u32 notrigger;
+
+static u32 vendor_flags;
+static struct debugfs_blob_wrapper vendor_blob;
+static char vendor_dev[64];
+
+/*
+ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
+ * EINJ table through an unpublished extension. Use with caution as
+ * most will ignore the parameter and make their own choice of address
+ * for error injection.  This extension is used only if
+ * param_extension module parameter is specified.
+ */
+struct einj_parameter {
+	u64 type;
+	u64 reserved1;
+	u64 reserved2;
+	u64 param1;
+	u64 param2;
+};
+
+#define EINJ_OP_BUSY			0x1
+#define EINJ_STATUS_SUCCESS		0x0
+#define EINJ_STATUS_FAIL		0x1
+#define EINJ_STATUS_INVAL		0x2
+
+#define EINJ_TAB_ENTRY(tab)						\
+	((struct acpi_whea_header *)((char *)(tab) +			\
+				    sizeof(struct acpi_table_einj)))
+
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
+static struct acpi_table_einj *einj_tab;
+
+static struct apei_resources einj_resources;
+
+static struct apei_exec_ins_type einj_ins_type[] = {
+	[ACPI_EINJ_READ_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_read_register,
+	},
+	[ACPI_EINJ_READ_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_read_register_value,
+	},
+	[ACPI_EINJ_WRITE_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_write_register,
+	},
+	[ACPI_EINJ_WRITE_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_write_register_value,
+	},
+	[ACPI_EINJ_NOOP] = {
+		.flags = 0,
+		.run   = apei_exec_noop,
+	},
+};
+
+/*
+ * Prevent EINJ interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ */
+static DEFINE_MUTEX(einj_mutex);
+
+static void *einj_param;
+
+static void einj_exec_ctx_init(struct apei_exec_context *ctx)
+{
+	apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
+			   EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
+}
+
+static int __einj_get_available_error_type(u32 *type)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	einj_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+	if (rc)
+		return rc;
+	*type = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+/* Get error injection capabilities of the platform */
+static int einj_get_available_error_type(u32 *type)
+{
+	int rc;
+
+	mutex_lock(&einj_mutex);
+	rc = __einj_get_available_error_type(type);
+	mutex_unlock(&einj_mutex);
+
+	return rc;
+}
+
+static int einj_timedout(u64 *t)
+{
+	if ((s64)*t < SPIN_UNIT) {
+		pr_warn(FW_WARN "Firmware does not respond in time\n");
+		return 1;
+	}
+	*t -= SPIN_UNIT;
+	ndelay(SPIN_UNIT);
+	touch_nmi_watchdog();
+	return 0;
+}
+
+static void check_vendor_extension(u64 paddr,
+				   struct set_error_type_with_address *v5param)
+{
+	int	offset = v5param->vendor_extension;
+	struct	vendor_error_type_extension *v;
+	u32	sbdf;
+
+	if (!offset)
+		return;
+	v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
+	if (!v)
+		return;
+	sbdf = v->pcie_sbdf;
+	sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
+		sbdf >> 24, (sbdf >> 16) & 0xff,
+		(sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
+		 v->vendor_id, v->device_id, v->rev_id);
+	acpi_os_unmap_iomem(v, sizeof(*v));
+}
+
+static void *einj_get_parameter_address(void)
+{
+	int i;
+	u64 pa_v4 = 0, pa_v5 = 0;
+	struct acpi_whea_header *entry;
+
+	entry = EINJ_TAB_ENTRY(einj_tab);
+	for (i = 0; i < einj_tab->entries; i++) {
+		if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
+		    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+		    entry->register_region.space_id ==
+		    ACPI_ADR_SPACE_SYSTEM_MEMORY)
+			pa_v4 = get_unaligned(&entry->register_region.address);
+		if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
+		    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+		    entry->register_region.space_id ==
+		    ACPI_ADR_SPACE_SYSTEM_MEMORY)
+			pa_v5 = get_unaligned(&entry->register_region.address);
+		entry++;
+	}
+	if (pa_v5) {
+		struct set_error_type_with_address *v5param;
+
+		v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
+		if (v5param) {
+			acpi5 = 1;
+			check_vendor_extension(pa_v5, v5param);
+			return v5param;
+		}
+	}
+	if (param_extension && pa_v4) {
+		struct einj_parameter *v4param;
+
+		v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
+		if (!v4param)
+			return NULL;
+		if (v4param->reserved1 || v4param->reserved2) {
+			acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+			return NULL;
+		}
+		return v4param;
+	}
+
+	return NULL;
+}
+
+/* do sanity check to trigger table */
+static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
+{
+	if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
+		return -EINVAL;
+	if (trigger_tab->table_size > PAGE_SIZE ||
+	    trigger_tab->table_size < trigger_tab->header_size)
+		return -EINVAL;
+	if (trigger_tab->entry_count !=
+	    (trigger_tab->table_size - trigger_tab->header_size) /
+	    sizeof(struct acpi_einj_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct acpi_generic_address *einj_get_trigger_parameter_region(
+	struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
+{
+	int i;
+	struct acpi_whea_header *entry;
+
+	entry = (struct acpi_whea_header *)
+		((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+	for (i = 0; i < trigger_tab->entry_count; i++) {
+		if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
+		entry->instruction <= ACPI_EINJ_WRITE_REGISTER_VALUE &&
+		entry->register_region.space_id ==
+			ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+		(entry->register_region.address & param2) == (param1 & param2))
+			return &entry->register_region;
+		entry++;
+	}
+
+	return NULL;
+}
+/* Execute instructions in trigger error action table */
+static int __einj_error_trigger(u64 trigger_paddr, u32 type,
+				u64 param1, u64 param2)
+{
+	struct acpi_einj_trigger *trigger_tab = NULL;
+	struct apei_exec_context trigger_ctx;
+	struct apei_resources trigger_resources;
+	struct acpi_whea_header *trigger_entry;
+	struct resource *r;
+	u32 table_size;
+	int rc = -EIO;
+	struct acpi_generic_address *trigger_param_region = NULL;
+
+	r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+			       "APEI EINJ Trigger Table");
+	if (!r) {
+		pr_err("Can not request [mem %#010llx-%#010llx] for Trigger table\n",
+		       (unsigned long long)trigger_paddr,
+		       (unsigned long long)trigger_paddr +
+			    sizeof(*trigger_tab) - 1);
+		goto out;
+	}
+	trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
+	if (!trigger_tab) {
+		pr_err("Failed to map trigger table!\n");
+		goto out_rel_header;
+	}
+	rc = einj_check_trigger_header(trigger_tab);
+	if (rc) {
+		pr_warn(FW_BUG "Invalid trigger error action table.\n");
+		goto out_rel_header;
+	}
+
+	/* No action structures in the TRIGGER_ERROR table, nothing to do */
+	if (!trigger_tab->entry_count)
+		goto out_rel_header;
+
+	rc = -EIO;
+	table_size = trigger_tab->table_size;
+	r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
+			       table_size - sizeof(*trigger_tab),
+			       "APEI EINJ Trigger Table");
+	if (!r) {
+		pr_err("Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+		       (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+		       (unsigned long long)trigger_paddr + table_size - 1);
+		goto out_rel_header;
+	}
+	iounmap(trigger_tab);
+	trigger_tab = ioremap_cache(trigger_paddr, table_size);
+	if (!trigger_tab) {
+		pr_err("Failed to map trigger table!\n");
+		goto out_rel_entry;
+	}
+	trigger_entry = (struct acpi_whea_header *)
+		((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+	apei_resources_init(&trigger_resources);
+	apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
+			   ARRAY_SIZE(einj_ins_type),
+			   trigger_entry, trigger_tab->entry_count);
+	rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
+	if (rc)
+		goto out_fini;
+	rc = apei_resources_sub(&trigger_resources, &einj_resources);
+	if (rc)
+		goto out_fini;
+	/*
+	 * Some firmware will access target address specified in
+	 * param1 to trigger the error when injecting memory error.
+	 * This will cause resource conflict with regular memory.  So
+	 * remove it from trigger table resources.
+	 */
+	if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
+		struct apei_resources addr_resources;
+		apei_resources_init(&addr_resources);
+		trigger_param_region = einj_get_trigger_parameter_region(
+			trigger_tab, param1, param2);
+		if (trigger_param_region) {
+			rc = apei_resources_add(&addr_resources,
+				trigger_param_region->address,
+				trigger_param_region->bit_width/8, true);
+			if (rc)
+				goto out_fini;
+			rc = apei_resources_sub(&trigger_resources,
+					&addr_resources);
+		}
+		apei_resources_fini(&addr_resources);
+		if (rc)
+			goto out_fini;
+	}
+	rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
+	if (rc)
+		goto out_fini;
+	rc = apei_exec_pre_map_gars(&trigger_ctx);
+	if (rc)
+		goto out_release;
+
+	rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
+
+	apei_exec_post_unmap_gars(&trigger_ctx);
+out_release:
+	apei_resources_release(&trigger_resources);
+out_fini:
+	apei_resources_fini(&trigger_resources);
+out_rel_entry:
+	release_mem_region(trigger_paddr + sizeof(*trigger_tab),
+			   table_size - sizeof(*trigger_tab));
+out_rel_header:
+	release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+out:
+	if (trigger_tab)
+		iounmap(trigger_tab);
+
+	return rc;
+}
+
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+			       u64 param3, u64 param4)
+{
+	struct apei_exec_context ctx;
+	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
+	int rc;
+
+	einj_exec_ctx_init(&ctx);
+
+	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, type);
+	if (acpi5) {
+		struct set_error_type_with_address *v5param = einj_param;
+
+		v5param->type = type;
+		if (type & ACPI5_VENDOR_BIT) {
+			switch (vendor_flags) {
+			case SETWA_FLAGS_APICID:
+				v5param->apicid = param1;
+				break;
+			case SETWA_FLAGS_MEM:
+				v5param->memory_address = param1;
+				v5param->memory_address_range = param2;
+				break;
+			case SETWA_FLAGS_PCIE_SBDF:
+				v5param->pcie_sbdf = param1;
+				break;
+			}
+			v5param->flags = vendor_flags;
+		} else if (flags) {
+				v5param->flags = flags;
+				v5param->memory_address = param1;
+				v5param->memory_address_range = param2;
+				v5param->apicid = param3;
+				v5param->pcie_sbdf = param4;
+		} else {
+			switch (type) {
+			case ACPI_EINJ_PROCESSOR_CORRECTABLE:
+			case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
+			case ACPI_EINJ_PROCESSOR_FATAL:
+				v5param->apicid = param1;
+				v5param->flags = SETWA_FLAGS_APICID;
+				break;
+			case ACPI_EINJ_MEMORY_CORRECTABLE:
+			case ACPI_EINJ_MEMORY_UNCORRECTABLE:
+			case ACPI_EINJ_MEMORY_FATAL:
+				v5param->memory_address = param1;
+				v5param->memory_address_range = param2;
+				v5param->flags = SETWA_FLAGS_MEM;
+				break;
+			case ACPI_EINJ_PCIX_CORRECTABLE:
+			case ACPI_EINJ_PCIX_UNCORRECTABLE:
+			case ACPI_EINJ_PCIX_FATAL:
+				v5param->pcie_sbdf = param1;
+				v5param->flags = SETWA_FLAGS_PCIE_SBDF;
+				break;
+			}
+		}
+	} else {
+		rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+		if (rc)
+			return rc;
+		if (einj_param) {
+			struct einj_parameter *v4param = einj_param;
+			v4param->param1 = param1;
+			v4param->param2 = param2;
+		}
+	}
+	rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!(val & EINJ_OP_BUSY))
+			break;
+		if (einj_timedout(&timeout))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	if (val != EINJ_STATUS_SUCCESS)
+		return -EBUSY;
+
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
+	if (rc)
+		return rc;
+	trigger_paddr = apei_exec_ctx_get_output(&ctx);
+	if (notrigger == 0) {
+		rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
+		if (rc)
+			return rc;
+	}
+	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
+
+	return rc;
+}
+
+/* Inject the specified hardware error */
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+			     u64 param3, u64 param4)
+{
+	int rc;
+	u64 base_addr, size;
+
+	/* If user manually set "flags", make sure it is legal */
+	if (flags && (flags &
+		~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+		return -EINVAL;
+
+	/*
+	 * We need extra sanity checks for memory errors.
+	 * Other types leap directly to injection.
+	 */
+
+	/* ensure param1/param2 existed */
+	if (!(param_extension || acpi5))
+		goto inject;
+
+	/* ensure injection is memory related */
+	if (type & ACPI5_VENDOR_BIT) {
+		if (vendor_flags != SETWA_FLAGS_MEM)
+			goto inject;
+	} else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+		goto inject;
+
+	/*
+	 * Disallow crazy address masks that give BIOS leeway to pick
+	 * injection address almost anywhere. Insist on page or
+	 * better granularity and that target address is normal RAM or
+	 * NVDIMM.
+	 */
+	base_addr = param1 & param2;
+	size = ~param2 + 1;
+
+	if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+	    ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+				!= REGION_INTERSECTS) &&
+	     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+				!= REGION_INTERSECTS) &&
+	     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
+				!= REGION_INTERSECTS)))
+		return -EINVAL;
+
+inject:
+	mutex_lock(&einj_mutex);
+	rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+	mutex_unlock(&einj_mutex);
+
+	return rc;
+}
+
+static u32 error_type;
+static u32 error_flags;
+static u64 error_param1;
+static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
+static struct dentry *einj_debug_dir;
+
+static int available_error_type_show(struct seq_file *m, void *v)
+{
+	int rc;
+	u32 available_error_type = 0;
+
+	rc = einj_get_available_error_type(&available_error_type);
+	if (rc)
+		return rc;
+	if (available_error_type & 0x0001)
+		seq_printf(m, "0x00000001\tProcessor Correctable\n");
+	if (available_error_type & 0x0002)
+		seq_printf(m, "0x00000002\tProcessor Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0004)
+		seq_printf(m, "0x00000004\tProcessor Uncorrectable fatal\n");
+	if (available_error_type & 0x0008)
+		seq_printf(m, "0x00000008\tMemory Correctable\n");
+	if (available_error_type & 0x0010)
+		seq_printf(m, "0x00000010\tMemory Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0020)
+		seq_printf(m, "0x00000020\tMemory Uncorrectable fatal\n");
+	if (available_error_type & 0x0040)
+		seq_printf(m, "0x00000040\tPCI Express Correctable\n");
+	if (available_error_type & 0x0080)
+		seq_printf(m, "0x00000080\tPCI Express Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0100)
+		seq_printf(m, "0x00000100\tPCI Express Uncorrectable fatal\n");
+	if (available_error_type & 0x0200)
+		seq_printf(m, "0x00000200\tPlatform Correctable\n");
+	if (available_error_type & 0x0400)
+		seq_printf(m, "0x00000400\tPlatform Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0800)
+		seq_printf(m, "0x00000800\tPlatform Uncorrectable fatal\n");
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(available_error_type);
+
+static int error_type_get(void *data, u64 *val)
+{
+	*val = error_type;
+
+	return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+	int rc;
+	u32 available_error_type = 0;
+	u32 tval, vendor;
+
+	/*
+	 * Vendor defined types have 0x80000000 bit set, and
+	 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
+	 */
+	vendor = val & ACPI5_VENDOR_BIT;
+	tval = val & 0x7fffffff;
+
+	/* Only one error type can be specified */
+	if (tval & (tval - 1))
+		return -EINVAL;
+	if (!vendor) {
+		rc = einj_get_available_error_type(&available_error_type);
+		if (rc)
+			return rc;
+		if (!(val & available_error_type))
+			return -EINVAL;
+	}
+	error_type = val;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set,
+			 "0x%llx\n");
+
+static int error_inject_set(void *data, u64 val)
+{
+	if (!error_type)
+		return -EINVAL;
+
+	return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+		error_param3, error_param4);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n");
+
+static int einj_check_table(struct acpi_table_einj *einj_tab)
+{
+	if ((einj_tab->header_length !=
+	     (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header)))
+	    && (einj_tab->header_length != sizeof(struct acpi_table_einj)))
+		return -EINVAL;
+	if (einj_tab->header.length < sizeof(struct acpi_table_einj))
+		return -EINVAL;
+	if (einj_tab->entries !=
+	    (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
+	    sizeof(struct acpi_einj_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init einj_init(void)
+{
+	int rc;
+	acpi_status status;
+	struct apei_exec_context ctx;
+
+	if (acpi_disabled) {
+		pr_warn("ACPI disabled.\n");
+		return -ENODEV;
+	}
+
+	status = acpi_get_table(ACPI_SIG_EINJ, 0,
+				(struct acpi_table_header **)&einj_tab);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("EINJ table not found.\n");
+		return -ENODEV;
+	}
+	else if (ACPI_FAILURE(status)) {
+		pr_err("Failed to get EINJ table: %s\n",
+				acpi_format_exception(status));
+		return -EINVAL;
+	}
+
+	rc = einj_check_table(einj_tab);
+	if (rc) {
+		pr_warn(FW_BUG "Invalid EINJ table.\n");
+		goto err_put_table;
+	}
+
+	rc = -ENOMEM;
+	einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
+
+	debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir,
+			    NULL, &available_error_type_fops);
+	debugfs_create_file_unsafe("error_type", 0600, einj_debug_dir,
+				   NULL, &error_type_fops);
+	debugfs_create_file_unsafe("error_inject", 0200, einj_debug_dir,
+				   NULL, &error_inject_fops);
+
+	apei_resources_init(&einj_resources);
+	einj_exec_ctx_init(&ctx);
+	rc = apei_exec_collect_resources(&ctx, &einj_resources);
+	if (rc) {
+		pr_err("Error collecting EINJ resources.\n");
+		goto err_fini;
+	}
+
+	rc = apei_resources_request(&einj_resources, "APEI EINJ");
+	if (rc) {
+		pr_err("Error requesting memory/port resources.\n");
+		goto err_fini;
+	}
+
+	rc = apei_exec_pre_map_gars(&ctx);
+	if (rc) {
+		pr_err("Error pre-mapping GARs.\n");
+		goto err_release;
+	}
+
+	rc = -ENOMEM;
+	einj_param = einj_get_parameter_address();
+	if ((param_extension || acpi5) && einj_param) {
+		debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
+				   &error_flags);
+		debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
+				   &error_param1);
+		debugfs_create_x64("param2", S_IRUSR | S_IWUSR, einj_debug_dir,
+				   &error_param2);
+		debugfs_create_x64("param3", S_IRUSR | S_IWUSR, einj_debug_dir,
+				   &error_param3);
+		debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
+				   &error_param4);
+		debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
+				   einj_debug_dir, &notrigger);
+	}
+
+	if (vendor_dev[0]) {
+		vendor_blob.data = vendor_dev;
+		vendor_blob.size = strlen(vendor_dev);
+		debugfs_create_blob("vendor", S_IRUSR, einj_debug_dir,
+				    &vendor_blob);
+		debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
+				   einj_debug_dir, &vendor_flags);
+	}
+
+	pr_info("Error INJection is initialized.\n");
+
+	return 0;
+
+err_release:
+	apei_resources_release(&einj_resources);
+err_fini:
+	apei_resources_fini(&einj_resources);
+	debugfs_remove_recursive(einj_debug_dir);
+err_put_table:
+	acpi_put_table((struct acpi_table_header *)einj_tab);
+
+	return rc;
+}
+
+static void __exit einj_exit(void)
+{
+	struct apei_exec_context ctx;
+
+	if (einj_param) {
+		acpi_size size = (acpi5) ?
+			sizeof(struct set_error_type_with_address) :
+			sizeof(struct einj_parameter);
+
+		acpi_os_unmap_iomem(einj_param, size);
+	}
+	einj_exec_ctx_init(&ctx);
+	apei_exec_post_unmap_gars(&ctx);
+	apei_resources_release(&einj_resources);
+	apei_resources_fini(&einj_resources);
+	debugfs_remove_recursive(einj_debug_dir);
+	acpi_put_table((struct acpi_table_header *)einj_tab);
+}
+
+module_init(einj_init);
+module_exit(einj_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error INJection support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
new file mode 100644
index 000000000..c740f0faa
--- /dev/null
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error Record Serialization Table debug support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store. This file provide the
+ * debugging/testing support for ERST kernel support and firmware
+ * implementation.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <acpi/apei.h>
+#include <linux/miscdevice.h>
+
+#include "apei-internal.h"
+
+#define ERST_DBG_PFX			"ERST DBG: "
+
+#define ERST_DBG_RECORD_LEN_MAX		0x4000
+
+static void *erst_dbg_buf;
+static unsigned int erst_dbg_buf_len;
+
+/* Prevent erst_dbg_read/write from being invoked concurrently */
+static DEFINE_MUTEX(erst_dbg_mutex);
+
+static int erst_dbg_open(struct inode *inode, struct file *file)
+{
+	int rc, *pos;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	pos = (int *)&file->private_data;
+
+	rc = erst_get_record_id_begin(pos);
+	if (rc)
+		return rc;
+
+	return nonseekable_open(inode, file);
+}
+
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
+static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	int rc;
+	u64 record_id;
+	u32 record_count;
+
+	switch (cmd) {
+	case APEI_ERST_CLEAR_RECORD:
+		rc = copy_from_user(&record_id, (void __user *)arg,
+				    sizeof(record_id));
+		if (rc)
+			return -EFAULT;
+		return erst_clear(record_id);
+	case APEI_ERST_GET_RECORD_COUNT:
+		rc = erst_get_record_count();
+		if (rc < 0)
+			return rc;
+		record_count = rc;
+		rc = put_user(record_count, (u32 __user *)arg);
+		if (rc)
+			return rc;
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
+			     size_t usize, loff_t *off)
+{
+	int rc, *pos;
+	ssize_t len = 0;
+	u64 id;
+
+	if (*off)
+		return -EINVAL;
+
+	if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
+		return -EINTR;
+
+	pos = (int *)&filp->private_data;
+
+retry_next:
+	rc = erst_get_record_id_next(pos, &id);
+	if (rc)
+		goto out;
+	/* no more record */
+	if (id == APEI_ERST_INVALID_RECORD_ID) {
+		/*
+		 * If the persistent store is empty initially, the function
+		 * 'erst_read' below will return "-ENOENT" value. This causes
+		 * 'retry_next' label is entered again. The returned value
+		 * should be zero indicating the read operation is EOF.
+		 */
+		len = 0;
+
+		goto out;
+	}
+retry:
+	rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
+	/* The record may be cleared by others, try read next record */
+	if (rc == -ENOENT)
+		goto retry_next;
+	if (rc < 0)
+		goto out;
+	if (len > ERST_DBG_RECORD_LEN_MAX) {
+		pr_warn(ERST_DBG_PFX
+			"Record (ID: 0x%llx) length is too long: %zd\n", id, len);
+		rc = -EIO;
+		goto out;
+	}
+	if (len > erst_dbg_buf_len) {
+		void *p;
+		rc = -ENOMEM;
+		p = kmalloc(len, GFP_KERNEL);
+		if (!p)
+			goto out;
+		kfree(erst_dbg_buf);
+		erst_dbg_buf = p;
+		erst_dbg_buf_len = len;
+		goto retry;
+	}
+
+	rc = -EINVAL;
+	if (len > usize)
+		goto out;
+
+	rc = -EFAULT;
+	if (copy_to_user(ubuf, erst_dbg_buf, len))
+		goto out;
+	rc = 0;
+out:
+	mutex_unlock(&erst_dbg_mutex);
+	return rc ? rc : len;
+}
+
+static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
+			      size_t usize, loff_t *off)
+{
+	int rc;
+	struct cper_record_header *rcd;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (usize > ERST_DBG_RECORD_LEN_MAX) {
+		pr_err(ERST_DBG_PFX "Too long record to be written\n");
+		return -EINVAL;
+	}
+
+	if (mutex_lock_interruptible(&erst_dbg_mutex))
+		return -EINTR;
+	if (usize > erst_dbg_buf_len) {
+		void *p;
+		rc = -ENOMEM;
+		p = kmalloc(usize, GFP_KERNEL);
+		if (!p)
+			goto out;
+		kfree(erst_dbg_buf);
+		erst_dbg_buf = p;
+		erst_dbg_buf_len = usize;
+	}
+	rc = copy_from_user(erst_dbg_buf, ubuf, usize);
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rcd = erst_dbg_buf;
+	rc = -EINVAL;
+	if (rcd->record_length != usize)
+		goto out;
+
+	rc = erst_write(erst_dbg_buf);
+
+out:
+	mutex_unlock(&erst_dbg_mutex);
+	return rc < 0 ? rc : usize;
+}
+
+static const struct file_operations erst_dbg_ops = {
+	.owner		= THIS_MODULE,
+	.open		= erst_dbg_open,
+	.release	= erst_dbg_release,
+	.read		= erst_dbg_read,
+	.write		= erst_dbg_write,
+	.unlocked_ioctl	= erst_dbg_ioctl,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice erst_dbg_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "erst_dbg",
+	.fops	= &erst_dbg_ops,
+};
+
+static __init int erst_dbg_init(void)
+{
+	if (erst_disable) {
+		pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+		return -ENODEV;
+	}
+	return misc_register(&erst_dbg_dev);
+}
+
+static __exit void erst_dbg_exit(void)
+{
+	misc_deregister(&erst_dbg_dev);
+	kfree(erst_dbg_buf);
+}
+
+module_init(erst_dbg_init);
+module_exit(erst_dbg_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
new file mode 100644
index 000000000..83efb52a3
--- /dev/null
+++ b/drivers/acpi/apei/erst.c
@@ -0,0 +1,1206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Error Record Serialization Table support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store.
+ *
+ * For more information about ERST, please refer to ACPI Specification
+ * version 4.0, section 17.4.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/cper.h>
+#include <linux/nmi.h>
+#include <linux/hardirq.h>
+#include <linux/pstore.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ERST: " fmt
+
+/* ERST command status */
+#define ERST_STATUS_SUCCESS			0x0
+#define ERST_STATUS_NOT_ENOUGH_SPACE		0x1
+#define ERST_STATUS_HARDWARE_NOT_AVAILABLE	0x2
+#define ERST_STATUS_FAILED			0x3
+#define ERST_STATUS_RECORD_STORE_EMPTY		0x4
+#define ERST_STATUS_RECORD_NOT_FOUND		0x5
+
+#define ERST_TAB_ENTRY(tab)						\
+	((struct acpi_whea_header *)((char *)(tab) +			\
+				     sizeof(struct acpi_table_erst)))
+
+#define SPIN_UNIT		100			/* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
+#define FIRMWARE_MAX_STALL	50			/* 50us */
+
+int erst_disable;
+EXPORT_SYMBOL_GPL(erst_disable);
+
+static struct acpi_table_erst *erst_tab;
+
+/* ERST Error Log Address Range atrributes */
+#define ERST_RANGE_RESERVED	0x0001
+#define ERST_RANGE_NVRAM	0x0002
+#define ERST_RANGE_SLOW		0x0004
+
+/*
+ * ERST Error Log Address Range, used as buffer for reading/writing
+ * error records.
+ */
+static struct erst_erange {
+	u64 base;
+	u64 size;
+	void __iomem *vaddr;
+	u32 attr;
+} erst_erange;
+
+/*
+ * Prevent ERST interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ *
+ * It is used to provide exclusive accessing for ERST Error Log
+ * Address Range too.
+ */
+static DEFINE_RAW_SPINLOCK(erst_lock);
+
+static inline int erst_errno(int command_status)
+{
+	switch (command_status) {
+	case ERST_STATUS_SUCCESS:
+		return 0;
+	case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
+		return -ENODEV;
+	case ERST_STATUS_NOT_ENOUGH_SPACE:
+		return -ENOSPC;
+	case ERST_STATUS_RECORD_STORE_EMPTY:
+	case ERST_STATUS_RECORD_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int erst_timedout(u64 *t, u64 spin_unit)
+{
+	if ((s64)*t < spin_unit) {
+		pr_warn(FW_WARN "Firmware does not respond in time.\n");
+		return 1;
+	}
+	*t -= spin_unit;
+	ndelay(spin_unit);
+	touch_nmi_watchdog();
+	return 0;
+}
+
+static int erst_exec_load_var1(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->var1);
+}
+
+static int erst_exec_load_var2(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->var2);
+}
+
+static int erst_exec_store_var1(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry)
+{
+	return __apei_exec_write_register(entry, ctx->var1);
+}
+
+static int erst_exec_add(struct apei_exec_context *ctx,
+			 struct acpi_whea_header *entry)
+{
+	ctx->var1 += ctx->var2;
+	return 0;
+}
+
+static int erst_exec_subtract(struct apei_exec_context *ctx,
+			      struct acpi_whea_header *entry)
+{
+	ctx->var1 -= ctx->var2;
+	return 0;
+}
+
+static int erst_exec_add_value(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	val += ctx->value;
+	rc = __apei_exec_write_register(entry, val);
+	return rc;
+}
+
+static int erst_exec_subtract_value(struct apei_exec_context *ctx,
+				    struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	val -= ctx->value;
+	rc = __apei_exec_write_register(entry, val);
+	return rc;
+}
+
+static int erst_exec_stall(struct apei_exec_context *ctx,
+			   struct acpi_whea_header *entry)
+{
+	u64 stall_time;
+
+	if (ctx->value > FIRMWARE_MAX_STALL) {
+		if (!in_nmi())
+			pr_warn(FW_WARN
+			"Too long stall time for stall instruction: 0x%llx.\n",
+				   ctx->value);
+		stall_time = FIRMWARE_MAX_STALL;
+	} else
+		stall_time = ctx->value;
+	udelay(stall_time);
+	return 0;
+}
+
+static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
+				      struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 stall_time;
+
+	if (ctx->var1 > FIRMWARE_MAX_STALL) {
+		if (!in_nmi())
+			pr_warn(FW_WARN
+		"Too long stall time for stall while true instruction: 0x%llx.\n",
+				   ctx->var1);
+		stall_time = FIRMWARE_MAX_STALL;
+	} else
+		stall_time = ctx->var1;
+
+	for (;;) {
+		rc = __apei_exec_read_register(entry, &val);
+		if (rc)
+			return rc;
+		if (val != ctx->value)
+			break;
+		if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
+			return -EIO;
+	}
+	return 0;
+}
+
+static int erst_exec_skip_next_instruction_if_true(
+	struct apei_exec_context *ctx,
+	struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	if (val == ctx->value) {
+		ctx->ip += 2;
+		return APEI_EXEC_SET_IP;
+	}
+
+	return 0;
+}
+
+static int erst_exec_goto(struct apei_exec_context *ctx,
+			  struct acpi_whea_header *entry)
+{
+	ctx->ip = ctx->value;
+	return APEI_EXEC_SET_IP;
+}
+
+static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
+					  struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->src_base);
+}
+
+static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
+					  struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->dst_base);
+}
+
+static int erst_exec_move_data(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 offset;
+	void *src, *dst;
+
+	/* ioremap does not work in interrupt context */
+	if (in_interrupt()) {
+		pr_warn("MOVE_DATA can not be used in interrupt context.\n");
+		return -EBUSY;
+	}
+
+	rc = __apei_exec_read_register(entry, &offset);
+	if (rc)
+		return rc;
+
+	src = ioremap(ctx->src_base + offset, ctx->var2);
+	if (!src)
+		return -ENOMEM;
+	dst = ioremap(ctx->dst_base + offset, ctx->var2);
+	if (!dst) {
+		iounmap(src);
+		return -ENOMEM;
+	}
+
+	memmove(dst, src, ctx->var2);
+
+	iounmap(src);
+	iounmap(dst);
+
+	return 0;
+}
+
+static struct apei_exec_ins_type erst_ins_type[] = {
+	[ACPI_ERST_READ_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_read_register,
+	},
+	[ACPI_ERST_READ_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_read_register_value,
+	},
+	[ACPI_ERST_WRITE_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_write_register,
+	},
+	[ACPI_ERST_WRITE_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_write_register_value,
+	},
+	[ACPI_ERST_NOOP] = {
+		.flags = 0,
+		.run = apei_exec_noop,
+	},
+	[ACPI_ERST_LOAD_VAR1] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_load_var1,
+	},
+	[ACPI_ERST_LOAD_VAR2] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_load_var2,
+	},
+	[ACPI_ERST_STORE_VAR1] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_store_var1,
+	},
+	[ACPI_ERST_ADD] = {
+		.flags = 0,
+		.run = erst_exec_add,
+	},
+	[ACPI_ERST_SUBTRACT] = {
+		.flags = 0,
+		.run = erst_exec_subtract,
+	},
+	[ACPI_ERST_ADD_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_add_value,
+	},
+	[ACPI_ERST_SUBTRACT_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_subtract_value,
+	},
+	[ACPI_ERST_STALL] = {
+		.flags = 0,
+		.run = erst_exec_stall,
+	},
+	[ACPI_ERST_STALL_WHILE_TRUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_stall_while_true,
+	},
+	[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_skip_next_instruction_if_true,
+	},
+	[ACPI_ERST_GOTO] = {
+		.flags = 0,
+		.run = erst_exec_goto,
+	},
+	[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_set_src_address_base,
+	},
+	[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_set_dst_address_base,
+	},
+	[ACPI_ERST_MOVE_DATA] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_move_data,
+	},
+};
+
+static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
+{
+	apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
+			   ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
+}
+
+static int erst_get_erange(struct erst_erange *range)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
+	if (rc)
+		return rc;
+	range->base = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
+	if (rc)
+		return rc;
+	range->size = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
+	if (rc)
+		return rc;
+	range->attr = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+static ssize_t __erst_get_record_count(void)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
+	if (rc)
+		return rc;
+	return apei_exec_ctx_get_output(&ctx);
+}
+
+ssize_t erst_get_record_count(void)
+{
+	ssize_t count;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	raw_spin_lock_irqsave(&erst_lock, flags);
+	count = __erst_get_record_count();
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_count);
+
+#define ERST_RECORD_ID_CACHE_SIZE_MIN	16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX	1024
+
+struct erst_record_id_cache {
+	struct mutex lock;
+	u64 *entries;
+	int len;
+	int size;
+	int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+	.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+	.refcount = 0,
+};
+
+static int __erst_get_next_record_id(u64 *record_id)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
+	if (rc)
+		return rc;
+	*record_id = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+int erst_get_record_id_begin(int *pos)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
+	erst_record_id_cache.refcount++;
+	mutex_unlock(&erst_record_id_cache.lock);
+
+	*pos = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+	u64 id, prev_id, first_id;
+	int i, rc;
+	u64 *entries;
+	unsigned long flags;
+
+	id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+	raw_spin_lock_irqsave(&erst_lock, flags);
+	rc = __erst_get_next_record_id(&id);
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+	if (rc == -ENOENT)
+		return 0;
+	if (rc)
+		return rc;
+	if (id == APEI_ERST_INVALID_RECORD_ID)
+		return 0;
+	/* can not skip current ID, or loop back to first ID */
+	if (id == prev_id || id == first_id)
+		return 0;
+	if (first_id == APEI_ERST_INVALID_RECORD_ID)
+		first_id = id;
+	prev_id = id;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == id)
+			break;
+	}
+	/* record id already in cache, try next */
+	if (i < erst_record_id_cache.len)
+		goto retry;
+	if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+		int new_size;
+		u64 *new_entries;
+
+		new_size = erst_record_id_cache.size * 2;
+		new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+				     ERST_RECORD_ID_CACHE_SIZE_MAX);
+		if (new_size <= erst_record_id_cache.size) {
+			if (printk_ratelimit())
+				pr_warn(FW_WARN "too many record IDs!\n");
+			return 0;
+		}
+		new_entries = kvmalloc_array(new_size, sizeof(entries[0]),
+					     GFP_KERNEL);
+		if (!new_entries)
+			return -ENOMEM;
+		memcpy(new_entries, entries,
+		       erst_record_id_cache.len * sizeof(entries[0]));
+		kvfree(entries);
+		erst_record_id_cache.entries = entries = new_entries;
+		erst_record_id_cache.size = new_size;
+	}
+	entries[i] = id;
+	erst_record_id_cache.len++;
+
+	return 1;
+}
+
+/*
+ * Get the record ID of an existing error record on the persistent
+ * storage. If there is no error record on the persistent storage, the
+ * returned record_id is APEI_ERST_INVALID_RECORD_ID.
+ */
+int erst_get_record_id_next(int *pos, u64 *record_id)
+{
+	int rc = 0;
+	u64 *entries;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	/* must be enclosed by erst_get_record_id_begin/end */
+	BUG_ON(!erst_record_id_cache.refcount);
+	BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+	mutex_lock(&erst_record_id_cache.lock);
+	entries = erst_record_id_cache.entries;
+	for (; *pos < erst_record_id_cache.len; (*pos)++)
+		if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+			break;
+	/* found next record id in cache */
+	if (*pos < erst_record_id_cache.len) {
+		*record_id = entries[*pos];
+		(*pos)++;
+		goto out_unlock;
+	}
+
+	/* Try to add one more record ID to cache */
+	rc = __erst_record_id_cache_add_one();
+	if (rc < 0)
+		goto out_unlock;
+	/* successfully add one new ID */
+	if (rc == 1) {
+		*record_id = erst_record_id_cache.entries[*pos];
+		(*pos)++;
+		rc = 0;
+	} else {
+		*pos = -1;
+		*record_id = APEI_ERST_INVALID_RECORD_ID;
+	}
+out_unlock:
+	mutex_unlock(&erst_record_id_cache.lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+	int i, wpos = 0;
+	u64 *entries;
+
+	if (erst_record_id_cache.refcount)
+		return;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+			continue;
+		if (wpos != i)
+			entries[wpos] = entries[i];
+		wpos++;
+	}
+	erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+	/*
+	 * erst_disable != 0 should be detected by invoker via the
+	 * return value of erst_get_record_id_begin/next, so this
+	 * function should not be called for erst_disable != 0.
+	 */
+	BUG_ON(erst_disable);
+
+	mutex_lock(&erst_record_id_cache.lock);
+	erst_record_id_cache.refcount--;
+	BUG_ON(erst_record_id_cache.refcount < 0);
+	__erst_record_id_cache_compact();
+	mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
+
+static int __erst_write_to_storage(u64 offset)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, offset);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+static int __erst_read_from_storage(u64 record_id, u64 offset)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, offset);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, record_id);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	};
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+static int __erst_clear_from_storage(u64 record_id)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, record_id);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+/* NVRAM ERST Error Log Address Range is not supported yet */
+static void pr_unimpl_nvram(void)
+{
+	if (printk_ratelimit())
+		pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
+}
+
+static int __erst_write_to_nvram(const struct cper_record_header *record)
+{
+	/* do not print message, because printk is not safe for NMI */
+	return -ENOSYS;
+}
+
+static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
+{
+	pr_unimpl_nvram();
+	return -ENOSYS;
+}
+
+static int __erst_clear_from_nvram(u64 record_id)
+{
+	pr_unimpl_nvram();
+	return -ENOSYS;
+}
+
+int erst_write(const struct cper_record_header *record)
+{
+	int rc;
+	unsigned long flags;
+	struct cper_record_header *rcd_erange;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
+		return -EINVAL;
+
+	if (erst_erange.attr & ERST_RANGE_NVRAM) {
+		if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+			return -EBUSY;
+		rc = __erst_write_to_nvram(record);
+		raw_spin_unlock_irqrestore(&erst_lock, flags);
+		return rc;
+	}
+
+	if (record->record_length > erst_erange.size)
+		return -EINVAL;
+
+	if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+		return -EBUSY;
+	memcpy(erst_erange.vaddr, record, record->record_length);
+	rcd_erange = erst_erange.vaddr;
+	/* signature for serialization system */
+	memcpy(&rcd_erange->persistence_information, "ER", 2);
+
+	rc = __erst_write_to_storage(0);
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_write);
+
+static int __erst_read_to_erange(u64 record_id, u64 *offset)
+{
+	int rc;
+
+	if (erst_erange.attr & ERST_RANGE_NVRAM)
+		return __erst_read_to_erange_from_nvram(
+			record_id, offset);
+
+	rc = __erst_read_from_storage(record_id, 0);
+	if (rc)
+		return rc;
+	*offset = 0;
+
+	return 0;
+}
+
+static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
+			   size_t buflen)
+{
+	int rc;
+	u64 offset, len = 0;
+	struct cper_record_header *rcd_tmp;
+
+	rc = __erst_read_to_erange(record_id, &offset);
+	if (rc)
+		return rc;
+	rcd_tmp = erst_erange.vaddr + offset;
+	len = rcd_tmp->record_length;
+	if (len <= buflen)
+		memcpy(record, rcd_tmp, len);
+
+	return len;
+}
+
+/*
+ * If return value > buflen, the buffer size is not big enough,
+ * else if return value < 0, something goes wrong,
+ * else everything is OK, and return value is record length
+ */
+ssize_t erst_read(u64 record_id, struct cper_record_header *record,
+		  size_t buflen)
+{
+	ssize_t len;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	raw_spin_lock_irqsave(&erst_lock, flags);
+	len = __erst_read(record_id, record, buflen);
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+	return len;
+}
+EXPORT_SYMBOL_GPL(erst_read);
+
+int erst_clear(u64 record_id)
+{
+	int rc, i;
+	unsigned long flags;
+	u64 *entries;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
+	raw_spin_lock_irqsave(&erst_lock, flags);
+	if (erst_erange.attr & ERST_RANGE_NVRAM)
+		rc = __erst_clear_from_nvram(record_id);
+	else
+		rc = __erst_clear_from_storage(record_id);
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+	if (rc)
+		goto out;
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == record_id)
+			entries[i] = APEI_ERST_INVALID_RECORD_ID;
+	}
+	__erst_record_id_cache_compact();
+out:
+	mutex_unlock(&erst_record_id_cache.lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear);
+
+static int __init setup_erst_disable(char *str)
+{
+	erst_disable = 1;
+	return 1;
+}
+
+__setup("erst_disable", setup_erst_disable);
+
+static int erst_check_table(struct acpi_table_erst *erst_tab)
+{
+	if ((erst_tab->header_length !=
+	     (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
+	    && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
+		return -EINVAL;
+	if (erst_tab->header.length < sizeof(struct acpi_table_erst))
+		return -EINVAL;
+	if (erst_tab->entries !=
+	    (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
+	    sizeof(struct acpi_erst_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(struct pstore_record *record);
+static int erst_writer(struct pstore_record *record);
+static int erst_clearer(struct pstore_record *record);
+
+static struct pstore_info erst_info = {
+	.owner		= THIS_MODULE,
+	.name		= "erst",
+	.flags		= PSTORE_FLAGS_DMESG,
+	.open		= erst_open_pstore,
+	.close		= erst_close_pstore,
+	.read		= erst_reader,
+	.write		= erst_writer,
+	.erase		= erst_clearer
+};
+
+#define CPER_CREATOR_PSTORE						\
+	GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
+		  0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_DMESG						\
+	GUID_INIT(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54,	\
+		  0x94, 0x19, 0xeb, 0x12)
+#define CPER_SECTION_TYPE_DMESG_Z					\
+	GUID_INIT(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d,	\
+		  0x34, 0xdd, 0xfa, 0xc6)
+#define CPER_SECTION_TYPE_MCE						\
+	GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,	\
+		  0x04, 0x4a, 0x38, 0xfc)
+
+struct cper_pstore_record {
+	struct cper_record_header hdr;
+	struct cper_section_descriptor sec_hdr;
+	char data[];
+} __packed;
+
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = erst_get_record_id_begin(&reader_pos);
+
+	return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
+static ssize_t erst_reader(struct pstore_record *record)
+{
+	int rc;
+	ssize_t len = 0;
+	u64 record_id;
+	struct cper_pstore_record *rcd;
+	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rcd = kmalloc(rcd_len, GFP_KERNEL);
+	if (!rcd) {
+		rc = -ENOMEM;
+		goto out;
+	}
+skip:
+	rc = erst_get_record_id_next(&reader_pos, &record_id);
+	if (rc)
+		goto out;
+
+	/* no more record */
+	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	len = erst_read(record_id, &rcd->hdr, rcd_len);
+	/* The record may be cleared by others, try read next record */
+	if (len == -ENOENT)
+		goto skip;
+	else if (len < 0 || len < sizeof(*rcd)) {
+		rc = -EIO;
+		goto out;
+	}
+	if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
+		goto skip;
+
+	record->buf = kmalloc(len, GFP_KERNEL);
+	if (record->buf == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	memcpy(record->buf, rcd->data, len - sizeof(*rcd));
+	record->id = record_id;
+	record->compressed = false;
+	record->ecc_notice_size = 0;
+	if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG_Z)) {
+		record->type = PSTORE_TYPE_DMESG;
+		record->compressed = true;
+	} else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG))
+		record->type = PSTORE_TYPE_DMESG;
+	else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_MCE))
+		record->type = PSTORE_TYPE_MCE;
+	else
+		record->type = PSTORE_TYPE_MAX;
+
+	if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
+		record->time.tv_sec = rcd->hdr.timestamp;
+	else
+		record->time.tv_sec = 0;
+	record->time.tv_nsec = 0;
+
+out:
+	kfree(rcd);
+	return (rc < 0) ? rc : (len - sizeof(*rcd));
+}
+
+static int erst_writer(struct pstore_record *record)
+{
+	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
+					(erst_info.buf - sizeof(*rcd));
+	int ret;
+
+	memset(rcd, 0, sizeof(*rcd));
+	memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+	rcd->hdr.revision = CPER_RECORD_REV;
+	rcd->hdr.signature_end = CPER_SIG_END;
+	rcd->hdr.section_count = 1;
+	rcd->hdr.error_severity = CPER_SEV_FATAL;
+	/* timestamp valid. platform_id, partition_id are invalid */
+	rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
+	rcd->hdr.timestamp = ktime_get_real_seconds();
+	rcd->hdr.record_length = sizeof(*rcd) + record->size;
+	rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
+	rcd->hdr.notification_type = CPER_NOTIFY_MCE;
+	rcd->hdr.record_id = cper_next_record_id();
+	rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+	rcd->sec_hdr.section_offset = sizeof(*rcd);
+	rcd->sec_hdr.section_length = record->size;
+	rcd->sec_hdr.revision = CPER_SEC_REV;
+	/* fru_id and fru_text is invalid */
+	rcd->sec_hdr.validation_bits = 0;
+	rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
+	switch (record->type) {
+	case PSTORE_TYPE_DMESG:
+		if (record->compressed)
+			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
+		else
+			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
+		break;
+	case PSTORE_TYPE_MCE:
+		rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+		break;
+	default:
+		return -EINVAL;
+	}
+	rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
+
+	ret = erst_write(&rcd->hdr);
+	record->id = rcd->hdr.record_id;
+
+	return ret;
+}
+
+static int erst_clearer(struct pstore_record *record)
+{
+	return erst_clear(record->id);
+}
+
+static int __init erst_init(void)
+{
+	int rc = 0;
+	acpi_status status;
+	struct apei_exec_context ctx;
+	struct apei_resources erst_resources;
+	struct resource *r;
+	char *buf;
+
+	if (acpi_disabled)
+		goto err;
+
+	if (erst_disable) {
+		pr_info(
+	"Error Record Serialization Table (ERST) support is disabled.\n");
+		goto err;
+	}
+
+	status = acpi_get_table(ACPI_SIG_ERST, 0,
+				(struct acpi_table_header **)&erst_tab);
+	if (status == AE_NOT_FOUND)
+		goto err;
+	else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+		pr_err("Failed to get table, %s\n", msg);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = erst_check_table(erst_tab);
+	if (rc) {
+		pr_err(FW_BUG "ERST table is invalid.\n");
+		goto err_put_erst_tab;
+	}
+
+	apei_resources_init(&erst_resources);
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_collect_resources(&ctx, &erst_resources);
+	if (rc)
+		goto err_fini;
+	rc = apei_resources_request(&erst_resources, "APEI ERST");
+	if (rc)
+		goto err_fini;
+	rc = apei_exec_pre_map_gars(&ctx);
+	if (rc)
+		goto err_release;
+	rc = erst_get_erange(&erst_erange);
+	if (rc) {
+		if (rc == -ENODEV)
+			pr_info(
+	"The corresponding hardware device or firmware implementation "
+	"is not available.\n");
+		else
+			pr_err("Failed to get Error Log Address Range.\n");
+		goto err_unmap_reg;
+	}
+
+	r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
+	if (!r) {
+		pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
+		       (unsigned long long)erst_erange.base,
+		       (unsigned long long)erst_erange.base + erst_erange.size - 1);
+		rc = -EIO;
+		goto err_unmap_reg;
+	}
+	rc = -ENOMEM;
+	erst_erange.vaddr = ioremap_cache(erst_erange.base,
+					  erst_erange.size);
+	if (!erst_erange.vaddr)
+		goto err_release_erange;
+
+	pr_info(
+	"Error Record Serialization Table (ERST) support is initialized.\n");
+
+	buf = kmalloc(erst_erange.size, GFP_KERNEL);
+	if (buf) {
+		erst_info.buf = buf + sizeof(struct cper_pstore_record);
+		erst_info.bufsize = erst_erange.size -
+				    sizeof(struct cper_pstore_record);
+		rc = pstore_register(&erst_info);
+		if (rc) {
+			if (rc != -EPERM)
+				pr_info(
+				"Could not register with persistent store.\n");
+			erst_info.buf = NULL;
+			erst_info.bufsize = 0;
+			kfree(buf);
+		}
+	} else
+		pr_err(
+		"Failed to allocate %lld bytes for persistent store error log.\n",
+		erst_erange.size);
+
+	/* Cleanup ERST Resources */
+	apei_resources_fini(&erst_resources);
+
+	return 0;
+
+err_release_erange:
+	release_mem_region(erst_erange.base, erst_erange.size);
+err_unmap_reg:
+	apei_exec_post_unmap_gars(&ctx);
+err_release:
+	apei_resources_release(&erst_resources);
+err_fini:
+	apei_resources_fini(&erst_resources);
+err_put_erst_tab:
+	acpi_put_table((struct acpi_table_header *)erst_tab);
+err:
+	erst_disable = 1;
+	return rc;
+}
+
+device_initcall(erst_init);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
new file mode 100644
index 000000000..8678e1621
--- /dev/null
+++ b/drivers/acpi/apei/ghes.c
@@ -0,0 +1,1499 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Generic Hardware Error Source support
+ *
+ * Generic Hardware Error Source provides a way to report platform
+ * hardware errors (such as that from chipset). It works in so called
+ * "Firmware First" mode, that is, hardware errors are reported to
+ * firmware firstly, then reported to Linux by firmware. This way,
+ * some non-standard hardware error registers or non-standard hardware
+ * link can be checked by firmware to produce more hardware error
+ * information for Linux.
+ *
+ * For more information about Generic Hardware Error Source, please
+ * refer to ACPI Specification version 4.0, section 17.3.2.6
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/arm_sdei.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/cper.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
+#include <linux/pci.h>
+#include <linux/pfn.h>
+#include <linux/aer.h>
+#include <linux/nmi.h>
+#include <linux/sched/clock.h>
+#include <linux/uuid.h>
+#include <linux/ras.h>
+#include <linux/task_work.h>
+
+#include <acpi/actbl1.h>
+#include <acpi/ghes.h>
+#include <acpi/apei.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <ras/ras_event.h>
+
+#include "apei-internal.h"
+
+#define GHES_PFX	"GHES: "
+
+#define GHES_ESTATUS_MAX_SIZE		65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE	512
+
+#define GHES_ESTATUS_CACHES_SIZE	4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
+	(sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
+	((struct acpi_hest_generic_status *)				\
+	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len)			\
+	(sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node)			\
+	((struct acpi_hest_generic_status *)				\
+	 ((struct ghes_estatus_node *)(estatus_node) + 1))
+
+#define GHES_VENDOR_ENTRY_LEN(gdata_len)                               \
+	(sizeof(struct ghes_vendor_record_entry) + (gdata_len))
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry)                     \
+	((struct acpi_hest_generic_data *)                              \
+	((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+
+/*
+ *  NMI-like notifications vary by architecture, before the compiler can prune
+ *  unused static functions it needs a value for these enums.
+ */
+#ifndef CONFIG_ARM_SDE_INTERFACE
+#define FIX_APEI_GHES_SDEI_NORMAL	__end_of_fixed_addresses
+#define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
+#endif
+
+static inline bool is_hest_type_generic_v2(struct ghes *ghes)
+{
+	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
+}
+
+/*
+ * This driver isn't really modular, however for the time being,
+ * continuing to use module_param is the easiest way to remain
+ * compatible with existing boot arg use cases.
+ */
+bool ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
+/*
+ * All error sources notified with HED (Hardware Error Device) share a
+ * single notifier callback, so they need to be linked and checked one
+ * by one. This holds true for NMI too.
+ *
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
+ */
+static LIST_HEAD(ghes_hed);
+static DEFINE_MUTEX(ghes_list_mutex);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * the fixmap is used instead.
+ *
+ * This spinlock is used to prevent the fixmap entry from being used
+ * simultaneously.
+ */
+static DEFINE_SPINLOCK(ghes_notify_lock_irq);
+
+struct ghes_vendor_record_entry {
+	struct work_struct work;
+	int error_severity;
+	char vendor_record[];
+};
+
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+
+static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
+static int ghes_panic_timeout __read_mostly = 30;
+
+static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
+{
+	phys_addr_t paddr;
+	pgprot_t prot;
+
+	paddr = PFN_PHYS(pfn);
+	prot = arch_apei_get_mem_attribute(paddr);
+	__set_fixmap(fixmap_idx, paddr, prot);
+
+	return (void __iomem *) __fix_to_virt(fixmap_idx);
+}
+
+static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
+{
+	int _idx = virt_to_fix((unsigned long)vaddr);
+
+	WARN_ON_ONCE(fixmap_idx != _idx);
+	clear_fixmap(fixmap_idx);
+}
+
+int ghes_estatus_pool_init(unsigned int num_ghes)
+{
+	unsigned long addr, len;
+	int rc;
+
+	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+	if (!ghes_estatus_pool)
+		return -ENOMEM;
+
+	len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
+	len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+	ghes_estatus_pool_size_request = PAGE_ALIGN(len);
+	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
+	if (!addr)
+		goto err_pool_alloc;
+
+	rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
+	if (rc)
+		goto err_pool_add;
+
+	return 0;
+
+err_pool_add:
+	vfree((void *)addr);
+
+err_pool_alloc:
+	gen_pool_destroy(ghes_estatus_pool);
+
+	return -ENOMEM;
+}
+
+static int map_gen_v2(struct ghes *ghes)
+{
+	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void unmap_gen_v2(struct ghes *ghes)
+{
+	apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
+{
+	int rc;
+	u64 val = 0;
+
+	rc = apei_read(&val, &gv2->read_ack_register);
+	if (rc)
+		return;
+
+	val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
+	val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
+
+	apei_write(val, &gv2->read_ack_register);
+}
+
+static struct ghes *ghes_new(struct acpi_hest_generic *generic)
+{
+	struct ghes *ghes;
+	unsigned int error_block_length;
+	int rc;
+
+	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
+	if (!ghes)
+		return ERR_PTR(-ENOMEM);
+
+	ghes->generic = generic;
+	if (is_hest_type_generic_v2(ghes)) {
+		rc = map_gen_v2(ghes);
+		if (rc)
+			goto err_free;
+	}
+
+	rc = apei_map_generic_address(&generic->error_status_address);
+	if (rc)
+		goto err_unmap_read_ack_addr;
+	error_block_length = generic->error_block_length;
+	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
+		pr_warn(FW_WARN GHES_PFX
+			"Error status block length is too long: %u for "
+			"generic hardware error source: %d.\n",
+			error_block_length, generic->header.source_id);
+		error_block_length = GHES_ESTATUS_MAX_SIZE;
+	}
+	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
+	if (!ghes->estatus) {
+		rc = -ENOMEM;
+		goto err_unmap_status_addr;
+	}
+
+	return ghes;
+
+err_unmap_status_addr:
+	apei_unmap_generic_address(&generic->error_status_address);
+err_unmap_read_ack_addr:
+	if (is_hest_type_generic_v2(ghes))
+		unmap_gen_v2(ghes);
+err_free:
+	kfree(ghes);
+	return ERR_PTR(rc);
+}
+
+static void ghes_fini(struct ghes *ghes)
+{
+	kfree(ghes->estatus);
+	apei_unmap_generic_address(&ghes->generic->error_status_address);
+	if (is_hest_type_generic_v2(ghes))
+		unmap_gen_v2(ghes);
+}
+
+static inline int ghes_severity(int severity)
+{
+	switch (severity) {
+	case CPER_SEV_INFORMATIONAL:
+		return GHES_SEV_NO;
+	case CPER_SEV_CORRECTED:
+		return GHES_SEV_CORRECTED;
+	case CPER_SEV_RECOVERABLE:
+		return GHES_SEV_RECOVERABLE;
+	case CPER_SEV_FATAL:
+		return GHES_SEV_PANIC;
+	default:
+		/* Unknown, go panic */
+		return GHES_SEV_PANIC;
+	}
+}
+
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+				  int from_phys,
+				  enum fixed_addresses fixmap_idx)
+{
+	void __iomem *vaddr;
+	u64 offset;
+	u32 trunk;
+
+	while (len > 0) {
+		offset = paddr - (paddr & PAGE_MASK);
+		vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
+		trunk = PAGE_SIZE - offset;
+		trunk = min(trunk, len);
+		if (from_phys)
+			memcpy_fromio(buffer, vaddr + offset, trunk);
+		else
+			memcpy_toio(vaddr + offset, buffer, trunk);
+		len -= trunk;
+		paddr += trunk;
+		buffer += trunk;
+		ghes_unmap(vaddr, fixmap_idx);
+	}
+}
+
+/* Check the top-level record header has an appropriate size. */
+static int __ghes_check_estatus(struct ghes *ghes,
+				struct acpi_hest_generic_status *estatus)
+{
+	u32 len = cper_estatus_len(estatus);
+
+	if (len < sizeof(*estatus)) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
+		return -EIO;
+	}
+
+	if (len > ghes->generic->error_block_length) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
+		return -EIO;
+	}
+
+	if (cper_estatus_check_header(estatus)) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* Read the CPER block, returning its address, and header in estatus. */
+static int __ghes_peek_estatus(struct ghes *ghes,
+			       struct acpi_hest_generic_status *estatus,
+			       u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+	struct acpi_hest_generic *g = ghes->generic;
+	int rc;
+
+	rc = apei_read(buf_paddr, &g->error_status_address);
+	if (rc) {
+		*buf_paddr = 0;
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+"Failed to read error status block address for hardware error source: %d.\n",
+				   g->header.source_id);
+		return -EIO;
+	}
+	if (!*buf_paddr)
+		return -ENOENT;
+
+	ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
+			      fixmap_idx);
+	if (!estatus->block_status) {
+		*buf_paddr = 0;
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
+			       u64 buf_paddr, enum fixed_addresses fixmap_idx,
+			       size_t buf_len)
+{
+	ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
+	if (cper_estatus_check(estatus)) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+				    "Failed to read error status block!\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int ghes_read_estatus(struct ghes *ghes,
+			     struct acpi_hest_generic_status *estatus,
+			     u64 *buf_paddr, enum fixed_addresses fixmap_idx)
+{
+	int rc;
+
+	rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+	if (rc)
+		return rc;
+
+	rc = __ghes_check_estatus(ghes, estatus);
+	if (rc)
+		return rc;
+
+	return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
+				   cper_estatus_len(estatus));
+}
+
+static void ghes_clear_estatus(struct ghes *ghes,
+			       struct acpi_hest_generic_status *estatus,
+			       u64 buf_paddr, enum fixed_addresses fixmap_idx)
+{
+	estatus->block_status = 0;
+
+	if (!buf_paddr)
+		return;
+
+	ghes_copy_tofrom_phys(estatus, buf_paddr,
+			      sizeof(estatus->block_status), 0,
+			      fixmap_idx);
+
+	/*
+	 * GHESv2 type HEST entries introduce support for error acknowledgment,
+	 * so only acknowledge the error if this support is present.
+	 */
+	if (is_hest_type_generic_v2(ghes))
+		ghes_ack_error(ghes->generic_v2);
+}
+
+/*
+ * Called as task_work before returning to user-space.
+ * Ensure any queued work has been done before we return to the context that
+ * triggered the notification.
+ */
+static void ghes_kick_task_work(struct callback_head *head)
+{
+	struct acpi_hest_generic_status *estatus;
+	struct ghes_estatus_node *estatus_node;
+	u32 node_len;
+
+	estatus_node = container_of(head, struct ghes_estatus_node, task_work);
+	if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+		memory_failure_queue_kick(estatus_node->task_work_cpu);
+
+	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+	node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
+	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
+}
+
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
+{
+	unsigned long pfn;
+
+	if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+		return false;
+
+	pfn = PHYS_PFN(physical_addr);
+	if (!pfn_valid(pfn)) {
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+		"Invalid address in generic error data: %#llx\n",
+		physical_addr);
+		return false;
+	}
+
+	memory_failure_queue(pfn, flags);
+	return true;
+}
+
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+				       int sev)
+{
+	int flags = -1;
+	int sec_sev = ghes_severity(gdata->error_severity);
+	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+		return false;
+
+	/* iff following two events can be handled properly by now */
+	if (sec_sev == GHES_SEV_CORRECTED &&
+	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+		flags = MF_SOFT_OFFLINE;
+	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+		flags = 0;
+
+	if (flags != -1)
+		return ghes_do_memory_failure(mem_err->physical_addr, flags);
+
+	return false;
+}
+
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+{
+	struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+	bool queued = false;
+	int sec_sev, i;
+	char *p;
+
+	log_arm_hw_error(err);
+
+	sec_sev = ghes_severity(gdata->error_severity);
+	if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
+		return false;
+
+	p = (char *)(err + 1);
+	for (i = 0; i < err->err_info_num; i++) {
+		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
+		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+		const char *error_type = "unknown error";
+
+		/*
+		 * The field (err_info->error_info & BIT(26)) is fixed to set to
+		 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
+		 * firmware won't mix corrected errors in an uncorrected section,
+		 * and don't filter out 'corrected' error here.
+		 */
+		if (is_cache && has_pa) {
+			queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+			p += err_info->length;
+			continue;
+		}
+
+		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
+			error_type = cper_proc_error_type_strs[err_info->type];
+
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+				    "Unhandled processor error type: %s\n",
+				    error_type);
+		p += err_info->length;
+	}
+
+	return queued;
+}
+
+/*
+ * PCIe AER errors need to be sent to the AER driver for reporting and
+ * recovery. The GHES severities map to the following AER severities and
+ * require the following handling:
+ *
+ * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
+ *     These need to be reported by the AER driver but no recovery is
+ *     necessary.
+ * GHES_SEV_RECOVERABLE -> AER_NONFATAL
+ * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
+ *     These both need to be reported and recovered from by the AER driver.
+ * GHES_SEV_PANIC does not make it to this handling since the kernel must
+ *     panic.
+ */
+static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+		unsigned int devfn;
+		int aer_severity;
+
+		devfn = PCI_DEVFN(pcie_err->device_id.device,
+				  pcie_err->device_id.function);
+		aer_severity = cper_severity_to_aer(gdata->error_severity);
+
+		/*
+		 * If firmware reset the component to contain
+		 * the error, we must reinitialize it before
+		 * use, so treat it as a fatal AER error.
+		 */
+		if (gdata->flags & CPER_SEC_RESET)
+			aer_severity = AER_FATAL;
+
+		aer_recover_queue(pcie_err->device_id.segment,
+				  pcie_err->device_id.bus,
+				  devfn, aer_severity,
+				  (struct aer_capability_regs *)
+				  pcie_err->aer_info);
+	}
+#endif
+}
+
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
+
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
+
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
+
+static void ghes_vendor_record_work_func(struct work_struct *work)
+{
+	struct ghes_vendor_record_entry *entry;
+	struct acpi_hest_generic_data *gdata;
+	u32 len;
+
+	entry = container_of(work, struct ghes_vendor_record_entry, work);
+	gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+
+	blocking_notifier_call_chain(&vendor_record_notify_list,
+				     entry->error_severity, gdata);
+
+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+	gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
+}
+
+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+					  int sev)
+{
+	struct acpi_hest_generic_data *copied_gdata;
+	struct ghes_vendor_record_entry *entry;
+	u32 len;
+
+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+	entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
+	if (!entry)
+		return;
+
+	copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+	memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
+	entry->error_severity = sev;
+
+	INIT_WORK(&entry->work, ghes_vendor_record_work_func);
+	schedule_work(&entry->work);
+}
+
+static bool ghes_do_proc(struct ghes *ghes,
+			 const struct acpi_hest_generic_status *estatus)
+{
+	int sev, sec_sev;
+	struct acpi_hest_generic_data *gdata;
+	guid_t *sec_type;
+	const guid_t *fru_id = &guid_null;
+	char *fru_text = "";
+	bool queued = false;
+
+	sev = ghes_severity(estatus->error_severity);
+	apei_estatus_for_each_section(estatus, gdata) {
+		sec_type = (guid_t *)gdata->section_type;
+		sec_sev = ghes_severity(gdata->error_severity);
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (guid_t *)gdata->fru_id;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+			ghes_edac_report_mem_error(sev, mem_err);
+
+			arch_apei_report_mem_error(sev, mem_err);
+			queued = ghes_handle_memory_failure(gdata, sev);
+		}
+		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+			ghes_handle_aer(gdata);
+		}
+		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+			queued = ghes_handle_arm_hw_error(gdata, sev);
+		} else {
+			void *err = acpi_hest_get_payload(gdata);
+
+			ghes_defer_non_standard_event(gdata, sev);
+			log_non_standard_event(sec_type, fru_id, fru_text,
+					       sec_sev, err,
+					       gdata->error_data_length);
+		}
+	}
+
+	return queued;
+}
+
+static void __ghes_print_estatus(const char *pfx,
+				 const struct acpi_hest_generic *generic,
+				 const struct acpi_hest_generic_status *estatus)
+{
+	static atomic_t seqno;
+	unsigned int curr_seqno;
+	char pfx_seq[64];
+
+	if (pfx == NULL) {
+		if (ghes_severity(estatus->error_severity) <=
+		    GHES_SEV_CORRECTED)
+			pfx = KERN_WARNING;
+		else
+			pfx = KERN_ERR;
+	}
+	curr_seqno = atomic_inc_return(&seqno);
+	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
+	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+	       pfx_seq, generic->header.source_id);
+	cper_estatus_print(pfx_seq, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+			      const struct acpi_hest_generic *generic,
+			      const struct acpi_hest_generic_status *estatus)
+{
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+	struct ratelimit_state *ratelimit;
+
+	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+		ratelimit = &ratelimit_corrected;
+	else
+		ratelimit = &ratelimit_uncorrected;
+	if (__ratelimit(ratelimit)) {
+		__ghes_print_estatus(pfx, generic, estatus);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+	u32 len;
+	int i, cached = 0;
+	unsigned long long now;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	len = cper_estatus_len(estatus);
+	rcu_read_lock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL)
+			continue;
+		if (len != cache->estatus_len)
+			continue;
+		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+		if (memcmp(estatus, cache_estatus, len))
+			continue;
+		atomic_inc(&cache->count);
+		now = sched_clock();
+		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+			cached = 1;
+		break;
+	}
+	rcu_read_unlock();
+	return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int alloced;
+	u32 len, cache_len;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	len = cper_estatus_len(estatus);
+	cache_len = GHES_ESTATUS_CACHE_LEN(len);
+	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+	if (!cache) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+	memcpy(cache_estatus, estatus, len);
+	cache->estatus_len = len;
+	atomic_set(&cache->count, 0);
+	cache->generic = generic;
+	cache->time_in = sched_clock();
+	return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+	u32 len;
+
+	len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+	len = GHES_ESTATUS_CACHE_LEN(len);
+	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+	atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+	struct ghes_estatus_cache *cache;
+
+	cache = container_of(head, struct ghes_estatus_cache, rcu);
+	ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int i, slot = -1, count;
+	unsigned long long now, duration, period, max_period = 0;
+	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+	new_cache = ghes_estatus_cache_alloc(generic, estatus);
+	if (new_cache == NULL)
+		return;
+	rcu_read_lock();
+	now = sched_clock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL) {
+			slot = i;
+			slot_cache = NULL;
+			break;
+		}
+		duration = now - cache->time_in;
+		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+			slot = i;
+			slot_cache = cache;
+			break;
+		}
+		count = atomic_read(&cache->count);
+		period = duration;
+		do_div(period, (count + 1));
+		if (period > max_period) {
+			max_period = period;
+			slot = i;
+			slot_cache = cache;
+		}
+	}
+	/* new_cache must be put into array after its contents are written */
+	smp_wmb();
+	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+				  slot_cache, new_cache) == slot_cache) {
+		if (slot_cache)
+			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+	} else
+		ghes_estatus_cache_free(new_cache);
+	rcu_read_unlock();
+}
+
+static void __ghes_panic(struct ghes *ghes,
+			 struct acpi_hest_generic_status *estatus,
+			 u64 buf_paddr, enum fixed_addresses fixmap_idx)
+{
+	__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
+
+	ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+
+	/* reboot to log the error! */
+	if (!panic_timeout)
+		panic_timeout = ghes_panic_timeout;
+	panic("Fatal hardware error!");
+}
+
+static int ghes_proc(struct ghes *ghes)
+{
+	struct acpi_hest_generic_status *estatus = ghes->estatus;
+	u64 buf_paddr;
+	int rc;
+
+	rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
+	if (rc)
+		goto out;
+
+	if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
+		__ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
+
+	if (!ghes_estatus_cached(estatus)) {
+		if (ghes_print_estatus(NULL, ghes->generic, estatus))
+			ghes_estatus_cache_add(ghes->generic, estatus);
+	}
+	ghes_do_proc(ghes, estatus);
+
+out:
+	ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
+
+	return rc;
+}
+
+static void ghes_add_timer(struct ghes *ghes)
+{
+	struct acpi_hest_generic *g = ghes->generic;
+	unsigned long expire;
+
+	if (!g->notify.poll_interval) {
+		pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+			g->header.source_id);
+		return;
+	}
+	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+	ghes->timer.expires = round_jiffies_relative(expire);
+	add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(struct timer_list *t)
+{
+	struct ghes *ghes = from_timer(ghes, t, timer);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+	ghes_proc(ghes);
+	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+	if (!(ghes->flags & GHES_EXITING))
+		ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+	struct ghes *ghes = data;
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+	rc = ghes_proc(ghes);
+	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+	if (rc)
+		return IRQ_NONE;
+
+	return IRQ_HANDLED;
+}
+
+static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
+			   void *data)
+{
+	struct ghes *ghes;
+	unsigned long flags;
+	int ret = NOTIFY_DONE;
+
+	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+	rcu_read_lock();
+	list_for_each_entry_rcu(ghes, &ghes_hed, list) {
+		if (!ghes_proc(ghes))
+			ret = NOTIFY_OK;
+	}
+	rcu_read_unlock();
+	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+
+	return ret;
+}
+
+static struct notifier_block ghes_notifier_hed = {
+	.notifier_call = ghes_notify_hed,
+};
+
+/*
+ * Handlers for CPER records may not be NMI safe. For example,
+ * memory_failure_queue() takes spinlocks and calls schedule_work_on().
+ * In any NMI-like handler, memory from ghes_estatus_pool is used to save
+ * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
+ * ghes_proc_in_irq() to run in IRQ context where each estatus in
+ * ghes_estatus_llist is processed.
+ *
+ * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
+ * to suppress frequent messages.
+ */
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+	struct llist_node *llnode, *next;
+	struct ghes_estatus_node *estatus_node;
+	struct acpi_hest_generic *generic;
+	struct acpi_hest_generic_status *estatus;
+	bool task_work_pending;
+	u32 len, node_len;
+	int ret;
+
+	llnode = llist_del_all(&ghes_estatus_llist);
+	/*
+	 * Because the time order of estatus in list is reversed,
+	 * revert it back to proper order.
+	 */
+	llnode = llist_reverse_order(llnode);
+	while (llnode) {
+		next = llnode->next;
+		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+					   llnode);
+		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+		len = cper_estatus_len(estatus);
+		node_len = GHES_ESTATUS_NODE_LEN(len);
+		task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
+		if (!ghes_estatus_cached(estatus)) {
+			generic = estatus_node->generic;
+			if (ghes_print_estatus(NULL, generic, estatus))
+				ghes_estatus_cache_add(generic, estatus);
+		}
+
+		if (task_work_pending && current->mm) {
+			estatus_node->task_work.func = ghes_kick_task_work;
+			estatus_node->task_work_cpu = smp_processor_id();
+			ret = task_work_add(current, &estatus_node->task_work,
+					    TWA_RESUME);
+			if (ret)
+				estatus_node->task_work.func = NULL;
+		}
+
+		if (!estatus_node->task_work.func)
+			gen_pool_free(ghes_estatus_pool,
+				      (unsigned long)estatus_node, node_len);
+
+		llnode = next;
+	}
+}
+
+static void ghes_print_queued_estatus(void)
+{
+	struct llist_node *llnode;
+	struct ghes_estatus_node *estatus_node;
+	struct acpi_hest_generic *generic;
+	struct acpi_hest_generic_status *estatus;
+
+	llnode = llist_del_all(&ghes_estatus_llist);
+	/*
+	 * Because the time order of estatus in list is reversed,
+	 * revert it back to proper order.
+	 */
+	llnode = llist_reverse_order(llnode);
+	while (llnode) {
+		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+					   llnode);
+		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+		generic = estatus_node->generic;
+		ghes_print_estatus(NULL, generic, estatus);
+		llnode = llnode->next;
+	}
+}
+
+static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
+				       enum fixed_addresses fixmap_idx)
+{
+	struct acpi_hest_generic_status *estatus, tmp_header;
+	struct ghes_estatus_node *estatus_node;
+	u32 len, node_len;
+	u64 buf_paddr;
+	int sev, rc;
+
+	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
+		return -EOPNOTSUPP;
+
+	rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
+	if (rc) {
+		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+		return rc;
+	}
+
+	rc = __ghes_check_estatus(ghes, &tmp_header);
+	if (rc) {
+		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+		return rc;
+	}
+
+	len = cper_estatus_len(&tmp_header);
+	node_len = GHES_ESTATUS_NODE_LEN(len);
+	estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
+	if (!estatus_node)
+		return -ENOMEM;
+
+	estatus_node->ghes = ghes;
+	estatus_node->generic = ghes->generic;
+	estatus_node->task_work.func = NULL;
+	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+
+	if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
+		ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
+		rc = -ENOENT;
+		goto no_work;
+	}
+
+	sev = ghes_severity(estatus->error_severity);
+	if (sev >= GHES_SEV_PANIC) {
+		ghes_print_queued_estatus();
+		__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
+	}
+
+	ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
+
+	/* This error has been reported before, don't process it again. */
+	if (ghes_estatus_cached(estatus))
+		goto no_work;
+
+	llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+
+	return rc;
+
+no_work:
+	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+		      node_len);
+
+	return rc;
+}
+
+static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
+				       enum fixed_addresses fixmap_idx)
+{
+	int ret = -ENOENT;
+	struct ghes *ghes;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ghes, rcu_list, list) {
+		if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
+			ret = 0;
+	}
+	rcu_read_unlock();
+
+	if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
+		irq_work_queue(&ghes_proc_irq_work);
+
+	return ret;
+}
+
+#ifdef CONFIG_ACPI_APEI_SEA
+static LIST_HEAD(ghes_sea);
+
+/*
+ * Return 0 only if one of the SEA error sources successfully reported an error
+ * record sent from the firmware.
+ */
+int ghes_notify_sea(void)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
+	int rv;
+
+	raw_spin_lock(&ghes_notify_lock_sea);
+	rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
+	raw_spin_unlock(&ghes_notify_lock_sea);
+
+	return rv;
+}
+
+static void ghes_sea_add(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	list_add_rcu(&ghes->list, &ghes_sea);
+	mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_sea_remove(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	list_del_rcu(&ghes->list);
+	mutex_unlock(&ghes_list_mutex);
+	synchronize_rcu();
+}
+#else /* CONFIG_ACPI_APEI_SEA */
+static inline void ghes_sea_add(struct ghes *ghes) { }
+static inline void ghes_sea_remove(struct ghes *ghes) { }
+#endif /* CONFIG_ACPI_APEI_SEA */
+
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+/*
+ * NMI may be triggered on any CPU, so ghes_in_nmi is used for
+ * having only one concurrent reader.
+ */
+static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
+
+static LIST_HEAD(ghes_nmi);
+
+static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
+	int ret = NMI_DONE;
+
+	if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
+		return ret;
+
+	raw_spin_lock(&ghes_notify_lock_nmi);
+	if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
+		ret = NMI_HANDLED;
+	raw_spin_unlock(&ghes_notify_lock_nmi);
+
+	atomic_dec(&ghes_in_nmi);
+	return ret;
+}
+
+static void ghes_nmi_add(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	if (list_empty(&ghes_nmi))
+		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+	list_add_rcu(&ghes->list, &ghes_nmi);
+	mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_nmi_remove(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	list_del_rcu(&ghes->list);
+	if (list_empty(&ghes_nmi))
+		unregister_nmi_handler(NMI_LOCAL, "ghes");
+	mutex_unlock(&ghes_list_mutex);
+	/*
+	 * To synchronize with NMI handler, ghes can only be
+	 * freed after NMI handler finishes.
+	 */
+	synchronize_rcu();
+}
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
+static inline void ghes_nmi_add(struct ghes *ghes) { }
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+
+static void ghes_nmi_init_cxt(void)
+{
+	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+}
+
+static int __ghes_sdei_callback(struct ghes *ghes,
+				enum fixed_addresses fixmap_idx)
+{
+	if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
+		irq_work_queue(&ghes_proc_irq_work);
+
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
+				      void *arg)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
+	struct ghes *ghes = arg;
+	int err;
+
+	raw_spin_lock(&ghes_notify_lock_sdei_normal);
+	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
+	raw_spin_unlock(&ghes_notify_lock_sdei_normal);
+
+	return err;
+}
+
+static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
+				       void *arg)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
+	struct ghes *ghes = arg;
+	int err;
+
+	raw_spin_lock(&ghes_notify_lock_sdei_critical);
+	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
+	raw_spin_unlock(&ghes_notify_lock_sdei_critical);
+
+	return err;
+}
+
+static int apei_sdei_register_ghes(struct ghes *ghes)
+{
+	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+		return -EOPNOTSUPP;
+
+	return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
+				 ghes_sdei_critical_callback);
+}
+
+static int apei_sdei_unregister_ghes(struct ghes *ghes)
+{
+	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+		return -EOPNOTSUPP;
+
+	return sdei_unregister_ghes(ghes);
+}
+
+static int ghes_probe(struct platform_device *ghes_dev)
+{
+	struct acpi_hest_generic *generic;
+	struct ghes *ghes = NULL;
+	unsigned long flags;
+
+	int rc = -EINVAL;
+
+	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
+	if (!generic->enabled)
+		return -ENODEV;
+
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
+		break;
+
+	case ACPI_HEST_NOTIFY_SEA:
+		if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
+				generic->header.source_id);
+			rc = -ENOTSUPP;
+			goto err;
+		}
+		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
+				generic->header.source_id);
+			goto err;
+		}
+		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
+				generic->header.source_id);
+			goto err;
+		}
+		break;
+	case ACPI_HEST_NOTIFY_LOCAL:
+		pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
+			generic->header.source_id);
+		goto err;
+	default:
+		pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+			generic->notify.type, generic->header.source_id);
+		goto err;
+	}
+
+	rc = -EIO;
+	if (generic->error_block_length <
+	    sizeof(struct acpi_hest_generic_status)) {
+		pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+			generic->error_block_length, generic->header.source_id);
+		goto err;
+	}
+	ghes = ghes_new(generic);
+	if (IS_ERR(ghes)) {
+		rc = PTR_ERR(ghes);
+		ghes = NULL;
+		goto err;
+	}
+
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+		timer_setup(&ghes->timer, ghes_poll_func, 0);
+		ghes_add_timer(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+		/* External interrupt vector is GSI */
+		rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
+		if (rc) {
+			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+			       generic->header.source_id);
+			goto err;
+		}
+		rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
+				 "GHES IRQ", ghes);
+		if (rc) {
+			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+			       generic->header.source_id);
+			goto err;
+		}
+		break;
+
+	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
+		mutex_lock(&ghes_list_mutex);
+		if (list_empty(&ghes_hed))
+			register_acpi_hed_notifier(&ghes_notifier_hed);
+		list_add_rcu(&ghes->list, &ghes_hed);
+		mutex_unlock(&ghes_list_mutex);
+		break;
+
+	case ACPI_HEST_NOTIFY_SEA:
+		ghes_sea_add(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		ghes_nmi_add(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		rc = apei_sdei_register_ghes(ghes);
+		if (rc)
+			goto err;
+		break;
+	default:
+		BUG();
+	}
+
+	platform_set_drvdata(ghes_dev, ghes);
+
+	ghes_edac_register(ghes, &ghes_dev->dev);
+
+	/* Handle any pending errors right away */
+	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
+	ghes_proc(ghes);
+	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
+
+	return 0;
+
+err:
+	if (ghes) {
+		ghes_fini(ghes);
+		kfree(ghes);
+	}
+	return rc;
+}
+
+static int ghes_remove(struct platform_device *ghes_dev)
+{
+	int rc;
+	struct ghes *ghes;
+	struct acpi_hest_generic *generic;
+
+	ghes = platform_get_drvdata(ghes_dev);
+	generic = ghes->generic;
+
+	ghes->flags |= GHES_EXITING;
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+		del_timer_sync(&ghes->timer);
+		break;
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+		free_irq(ghes->irq, ghes);
+		break;
+
+	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
+		mutex_lock(&ghes_list_mutex);
+		list_del_rcu(&ghes->list);
+		if (list_empty(&ghes_hed))
+			unregister_acpi_hed_notifier(&ghes_notifier_hed);
+		mutex_unlock(&ghes_list_mutex);
+		synchronize_rcu();
+		break;
+
+	case ACPI_HEST_NOTIFY_SEA:
+		ghes_sea_remove(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		ghes_nmi_remove(ghes);
+		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		rc = apei_sdei_unregister_ghes(ghes);
+		if (rc)
+			return rc;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	ghes_fini(ghes);
+
+	ghes_edac_unregister(ghes);
+
+	kfree(ghes);
+
+	platform_set_drvdata(ghes_dev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver ghes_platform_driver = {
+	.driver		= {
+		.name	= "GHES",
+	},
+	.probe		= ghes_probe,
+	.remove		= ghes_remove,
+};
+
+void __init ghes_init(void)
+{
+	int rc;
+
+	sdei_init();
+
+	if (acpi_disabled)
+		return;
+
+	switch (hest_disable) {
+	case HEST_NOT_FOUND:
+		return;
+	case HEST_DISABLED:
+		pr_info(GHES_PFX "HEST is not enabled!\n");
+		return;
+	default:
+		break;
+	}
+
+	if (ghes_disable) {
+		pr_info(GHES_PFX "GHES is not enabled!\n");
+		return;
+	}
+
+	ghes_nmi_init_cxt();
+
+	rc = platform_driver_register(&ghes_platform_driver);
+	if (rc)
+		return;
+
+	rc = apei_osc_setup();
+	if (rc == 0 && osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+	else if (rc == 0 && !osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+	else if (rc && osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+	else
+		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+}
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
new file mode 100644
index 000000000..7bf48c277
--- /dev/null
+++ b/drivers/acpi/apei/hest.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/kdebug.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <acpi/apei.h>
+#include <acpi/ghes.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+int hest_disable;
+EXPORT_SYMBOL_GPL(hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+	[ACPI_HEST_TYPE_IA32_CHECK] = -1,	/* need further calculation */
+	[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+	[ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+	[ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+	[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+	[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+	[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+	[ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
+	[ACPI_HEST_TYPE_IA32_DEFERRED_CHECK] = -1,
+};
+
+static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
+{
+	u16 hest_type = hest_hdr->type;
+	int len;
+
+	if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+		return 0;
+
+	len = hest_esrc_len_tab[hest_type];
+
+	if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+		struct acpi_hest_ia_corrected *cmc;
+		cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
+		len = sizeof(*cmc) + cmc->num_hardware_banks *
+			sizeof(struct acpi_hest_ia_error_bank);
+	} else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+		struct acpi_hest_ia_machine_check *mc;
+		mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
+		len = sizeof(*mc) + mc->num_hardware_banks *
+			sizeof(struct acpi_hest_ia_error_bank);
+	} else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
+		struct acpi_hest_ia_deferred_check *mc;
+		mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
+		len = sizeof(*mc) + mc->num_hardware_banks *
+			sizeof(struct acpi_hest_ia_error_bank);
+	}
+	BUG_ON(len == -1);
+
+	return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+	struct acpi_hest_header *hest_hdr;
+	int i, rc, len;
+
+	if (hest_disable || !hest_tab)
+		return -EINVAL;
+
+	hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+	for (i = 0; i < hest_tab->error_source_count; i++) {
+		len = hest_esrc_len(hest_hdr);
+		if (!len) {
+			pr_warn(FW_WARN HEST_PFX
+				"Unknown or unused hardware error source "
+				"type: %d for hardware error source: %d.\n",
+				hest_hdr->type, hest_hdr->source_id);
+			return -EINVAL;
+		}
+		if ((void *)hest_hdr + len >
+		    (void *)hest_tab + hest_tab->header.length) {
+			pr_warn(FW_BUG HEST_PFX
+		"Table contents overflow for hardware error source: %d.\n",
+				hest_hdr->source_id);
+			return -EINVAL;
+		}
+
+		rc = func(hest_hdr, data);
+		if (rc)
+			return rc;
+
+		hest_hdr = (void *)hest_hdr + len;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_hest_parse);
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
+{
+	if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+		return 0;
+
+	if (!acpi_disable_cmcff)
+		return !arch_apei_enable_cmcff(hest_hdr, data);
+
+	return 0;
+}
+
+struct ghes_arr {
+	struct platform_device **ghes_devs;
+	unsigned int count;
+};
+
+static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
+{
+	int *count = data;
+
+	if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
+	    hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+		(*count)++;
+	return 0;
+}
+
+static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
+{
+	struct platform_device *ghes_dev;
+	struct ghes_arr *ghes_arr = data;
+	int rc, i;
+
+	if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+	    hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+		return 0;
+
+	if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
+		return 0;
+	for (i = 0; i < ghes_arr->count; i++) {
+		struct acpi_hest_header *hdr;
+		ghes_dev = ghes_arr->ghes_devs[i];
+		hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+		if (hdr->source_id == hest_hdr->source_id) {
+			pr_warn(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+				hdr->source_id);
+			return -EIO;
+		}
+	}
+	ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
+	if (!ghes_dev)
+		return -ENOMEM;
+
+	rc = platform_device_add_data(ghes_dev, &hest_hdr, sizeof(void *));
+	if (rc)
+		goto err;
+
+	rc = platform_device_add(ghes_dev);
+	if (rc)
+		goto err;
+	ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev;
+
+	return 0;
+err:
+	platform_device_put(ghes_dev);
+	return rc;
+}
+
+static int __init hest_ghes_dev_register(unsigned int ghes_count)
+{
+	int rc, i;
+	struct ghes_arr ghes_arr;
+
+	ghes_arr.count = 0;
+	ghes_arr.ghes_devs = kmalloc_array(ghes_count, sizeof(void *),
+					   GFP_KERNEL);
+	if (!ghes_arr.ghes_devs)
+		return -ENOMEM;
+
+	rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
+	if (rc)
+		goto err;
+
+	rc = ghes_estatus_pool_init(ghes_count);
+	if (rc)
+		goto err;
+
+out:
+	kfree(ghes_arr.ghes_devs);
+	return rc;
+err:
+	for (i = 0; i < ghes_arr.count; i++)
+		platform_device_unregister(ghes_arr.ghes_devs[i]);
+	goto out;
+}
+
+static int __init setup_hest_disable(char *str)
+{
+	hest_disable = HEST_DISABLED;
+	return 1;
+}
+
+__setup("hest_disable", setup_hest_disable);
+
+void __init acpi_hest_init(void)
+{
+	acpi_status status;
+	int rc;
+	unsigned int ghes_count = 0;
+
+	if (hest_disable) {
+		pr_info(HEST_PFX "Table parsing disabled.\n");
+		return;
+	}
+
+	status = acpi_get_table(ACPI_SIG_HEST, 0,
+				(struct acpi_table_header **)&hest_tab);
+	if (status == AE_NOT_FOUND) {
+		hest_disable = HEST_NOT_FOUND;
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+		pr_err(HEST_PFX "Failed to get table, %s\n", msg);
+		hest_disable = HEST_DISABLED;
+		return;
+	}
+
+	rc = apei_hest_parse(hest_parse_cmc, NULL);
+	if (rc)
+		goto err;
+
+	if (!ghes_disable) {
+		rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+		if (rc)
+			goto err;
+
+		if (ghes_count)
+			rc = hest_ghes_dev_register(ghes_count);
+		if (rc)
+			goto err;
+	}
+
+	pr_info(HEST_PFX "Table parsing has been initialized.\n");
+	return;
+err:
+	hest_disable = HEST_DISABLED;
+	acpi_put_table((struct acpi_table_header *)hest_tab);
+}