summaryrefslogtreecommitdiffstats
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig13
-rw-r--r--drivers/acpi/apei/Makefile2
-rw-r--r--drivers/acpi/apei/apei-internal.h18
-rw-r--r--drivers/acpi/apei/einj-core.c (renamed from drivers/acpi/apei/einj.c)122
-rw-r--r--drivers/acpi/apei/einj-cxl.c113
-rw-r--r--drivers/acpi/apei/ghes.c17
-rw-r--r--drivers/acpi/apei/hest.c51
7 files changed, 309 insertions, 27 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 6b18f8bc7..3cfe7e747 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -60,6 +60,19 @@ config ACPI_APEI_EINJ
mainly used for debugging and testing the other parts of
APEI and some other RAS features.
+config ACPI_APEI_EINJ_CXL
+ bool "CXL Error INJection Support"
+ default ACPI_APEI_EINJ
+ depends on ACPI_APEI_EINJ
+ depends on CXL_BUS && CXL_BUS <= ACPI_APEI_EINJ
+ help
+ Support for CXL protocol Error INJection through debugfs/cxl.
+ Availability and which errors are supported is dependent on
+ the host platform. Look to ACPI v6.5 section 18.6.4 and kernel
+ EINJ documentation for more information.
+
+ If unsure say 'n'
+
config ACPI_APEI_ERST_DEBUG
tristate "APEI Error Record Serialization Table (ERST) Debug Support"
depends on ACPI_APEI
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index 4dfac2128..2c474e647 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -2,6 +2,8 @@
obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
+einj-y := einj-core.o
+einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o
obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
apei-y := apei-base.o hest.o erst.o bert.o
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 67c2c3b95..cd2766c69 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -130,4 +130,22 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
}
int apei_osc_setup(void);
+
+int einj_get_available_error_type(u32 *type);
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+ u64 param4);
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4);
+bool einj_is_cxl_error_type(u64 type);
+int einj_validate_error_type(u64 type);
+
+#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE
+#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12)
+#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13)
+#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14)
+#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15)
+#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16)
+#define ACPI_EINJ_CXL_MEM_FATAL BIT(17)
+#endif
+
#endif
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj-core.c
index 89fb9331c..bb9f8475c 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -21,6 +21,7 @@
#include <linux/nmi.h>
#include <linux/delay.h>
#include <linux/mm.h>
+#include <linux/platform_device.h>
#include <asm/unaligned.h>
#include "apei-internal.h"
@@ -36,6 +37,12 @@
#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
ACPI_EINJ_MEMORY_UNCORRECTABLE | \
ACPI_EINJ_MEMORY_FATAL)
+#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
+ ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
+ ACPI_EINJ_CXL_CACHE_FATAL | \
+ ACPI_EINJ_CXL_MEM_CORRECTABLE | \
+ ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
+ ACPI_EINJ_CXL_MEM_FATAL)
/*
* ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
@@ -137,6 +144,11 @@ static struct apei_exec_ins_type einj_ins_type[] = {
*/
static DEFINE_MUTEX(einj_mutex);
+/*
+ * Exported APIs use this flag to exit early if einj_probe() failed.
+ */
+bool einj_initialized __ro_after_init;
+
static void *einj_param;
static void einj_exec_ctx_init(struct apei_exec_context *ctx)
@@ -160,7 +172,7 @@ static int __einj_get_available_error_type(u32 *type)
}
/* Get error injection capabilities of the platform */
-static int einj_get_available_error_type(u32 *type)
+int einj_get_available_error_type(u32 *type)
{
int rc;
@@ -530,8 +542,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
}
/* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
- u64 param3, u64 param4)
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+ u64 param4)
{
int rc;
u64 base_addr, size;
@@ -554,8 +566,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
if (type & ACPI5_VENDOR_BIT) {
if (vendor_flags != SETWA_FLAGS_MEM)
goto inject;
- } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
goto inject;
+ }
+
+ /*
+ * Injections targeting a CXL 1.0/1.1 port have to be injected
+ * via the einj_cxl_rch_error_inject() path as that does the proper
+ * validation of the given RCRB base (MMIO) address.
+ */
+ if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM))
+ return -EINVAL;
/*
* Disallow crazy address masks that give BIOS leeway to pick
@@ -587,6 +608,21 @@ inject:
return rc;
}
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
+{
+ int rc;
+
+ if (!(einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)))
+ return -EINVAL;
+
+ mutex_lock(&einj_mutex);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+ mutex_unlock(&einj_mutex);
+
+ return rc;
+}
+
static u32 error_type;
static u32 error_flags;
static u64 error_param1;
@@ -607,12 +643,6 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
{ BIT(9), "Platform Correctable" },
{ BIT(10), "Platform Uncorrectable non-fatal" },
{ BIT(11), "Platform Uncorrectable fatal"},
- { BIT(12), "CXL.cache Protocol Correctable" },
- { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" },
- { BIT(14), "CXL.cache Protocol Uncorrectable fatal" },
- { BIT(15), "CXL.mem Protocol Correctable" },
- { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" },
- { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
{ BIT(31), "Vendor Defined Error Types" },
};
@@ -641,22 +671,26 @@ static int error_type_get(void *data, u64 *val)
return 0;
}
-static int error_type_set(void *data, u64 val)
+bool einj_is_cxl_error_type(u64 type)
{
+ return (type & CXL_ERROR_MASK) && (!(type & ACPI5_VENDOR_BIT));
+}
+
+int einj_validate_error_type(u64 type)
+{
+ u32 tval, vendor, available_error_type = 0;
int rc;
- u32 available_error_type = 0;
- u32 tval, vendor;
/* Only low 32 bits for error type are valid */
- if (val & GENMASK_ULL(63, 32))
+ if (type & GENMASK_ULL(63, 32))
return -EINVAL;
/*
* Vendor defined types have 0x80000000 bit set, and
* are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
*/
- vendor = val & ACPI5_VENDOR_BIT;
- tval = val & 0x7fffffff;
+ vendor = type & ACPI5_VENDOR_BIT;
+ tval = type & GENMASK(30, 0);
/* Only one error type can be specified */
if (tval & (tval - 1))
@@ -665,9 +699,21 @@ static int error_type_set(void *data, u64 val)
rc = einj_get_available_error_type(&available_error_type);
if (rc)
return rc;
- if (!(val & available_error_type))
+ if (!(type & available_error_type))
return -EINVAL;
}
+
+ return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+ int rc;
+
+ rc = einj_validate_error_type(val);
+ if (rc)
+ return rc;
+
error_type = val;
return 0;
@@ -703,21 +749,21 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
return 0;
}
-static int __init einj_init(void)
+static int __init einj_probe(struct platform_device *pdev)
{
int rc;
acpi_status status;
struct apei_exec_context ctx;
if (acpi_disabled) {
- pr_info("ACPI disabled.\n");
+ pr_debug("ACPI disabled.\n");
return -ENODEV;
}
status = acpi_get_table(ACPI_SIG_EINJ, 0,
(struct acpi_table_header **)&einj_tab);
if (status == AE_NOT_FOUND) {
- pr_warn("EINJ table not found.\n");
+ pr_debug("EINJ table not found.\n");
return -ENODEV;
} else if (ACPI_FAILURE(status)) {
pr_err("Failed to get EINJ table: %s\n",
@@ -805,7 +851,7 @@ err_put_table:
return rc;
}
-static void __exit einj_exit(void)
+static void einj_remove(struct platform_device *pdev)
{
struct apei_exec_context ctx;
@@ -826,6 +872,40 @@ static void __exit einj_exit(void)
acpi_put_table((struct acpi_table_header *)einj_tab);
}
+static struct platform_device *einj_dev;
+static struct platform_driver einj_driver = {
+ .remove_new = einj_remove,
+ .driver = {
+ .name = "acpi-einj",
+ },
+};
+
+static int __init einj_init(void)
+{
+ struct platform_device_info einj_dev_info = {
+ .name = "acpi-einj",
+ .id = -1,
+ };
+ int rc;
+
+ einj_dev = platform_device_register_full(&einj_dev_info);
+ if (IS_ERR(einj_dev))
+ return PTR_ERR(einj_dev);
+
+ rc = platform_driver_probe(&einj_driver, einj_probe);
+ einj_initialized = rc == 0;
+
+ return 0;
+}
+
+static void __exit einj_exit(void)
+{
+ if (einj_initialized)
+ platform_driver_unregister(&einj_driver);
+
+ platform_device_unregister(einj_dev);
+}
+
module_init(einj_init);
module_exit(einj_exit);
diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c
new file mode 100644
index 000000000..8b8be0c90
--- /dev/null
+++ b/drivers/acpi/apei/einj-cxl.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CXL Error INJection support. Used by CXL core to inject
+ * protocol errors into CXL ports.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#include <linux/einj-cxl.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+
+#include "apei-internal.h"
+
+/* Defined in einj-core.c */
+extern bool einj_initialized;
+
+static struct { u32 mask; const char *str; } const einj_cxl_error_type_string[] = {
+ { ACPI_EINJ_CXL_CACHE_CORRECTABLE, "CXL.cache Protocol Correctable" },
+ { ACPI_EINJ_CXL_CACHE_UNCORRECTABLE, "CXL.cache Protocol Uncorrectable non-fatal" },
+ { ACPI_EINJ_CXL_CACHE_FATAL, "CXL.cache Protocol Uncorrectable fatal" },
+ { ACPI_EINJ_CXL_MEM_CORRECTABLE, "CXL.mem Protocol Correctable" },
+ { ACPI_EINJ_CXL_MEM_UNCORRECTABLE, "CXL.mem Protocol Uncorrectable non-fatal" },
+ { ACPI_EINJ_CXL_MEM_FATAL, "CXL.mem Protocol Uncorrectable fatal" },
+};
+
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
+{
+ int cxl_err, rc;
+ u32 available_error_type = 0;
+
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+
+ for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
+ cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
+
+ if (available_error_type & cxl_err)
+ seq_printf(m, "0x%08x\t%s\n",
+ einj_cxl_error_type_string[pos].mask,
+ einj_cxl_error_type_string[pos].str);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
+
+static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
+{
+ struct pci_bus *pbus;
+ struct pci_host_bridge *bridge;
+ u64 seg = 0, bus;
+
+ pbus = dport_dev->bus;
+ bridge = pci_find_host_bridge(pbus);
+
+ if (!bridge)
+ return -ENODEV;
+
+ if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
+ seg = bridge->domain_nr;
+
+ bus = pbus->number;
+ *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn;
+
+ return 0;
+}
+
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+ int rc;
+
+ /* Only CXL error types can be specified */
+ if (!einj_is_cxl_error_type(type))
+ return -EINVAL;
+
+ rc = einj_validate_error_type(type);
+ if (rc)
+ return rc;
+
+ return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0),
+ 0, 0);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
+
+int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
+{
+ u64 param4 = 0;
+ int rc;
+
+ /* Only CXL error types can be specified */
+ if (!einj_is_cxl_error_type(type))
+ return -EINVAL;
+
+ rc = einj_validate_error_type(type);
+ if (rc)
+ return rc;
+
+ rc = cxl_dport_get_sbdf(dport, &param4);
+ if (rc)
+ return rc;
+
+ return einj_error_inject(type, 0x4, 0, 0, 0, param4);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
+
+bool einj_cxl_is_initialized(void)
+{
+ return einj_initialized;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, CXL);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ab2a82cb1..512067cac 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1455,7 +1455,7 @@ err:
return rc;
}
-static int ghes_remove(struct platform_device *ghes_dev)
+static void ghes_remove(struct platform_device *ghes_dev)
{
int rc;
struct ghes *ghes;
@@ -1492,8 +1492,15 @@ static int ghes_remove(struct platform_device *ghes_dev)
break;
case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
rc = apei_sdei_unregister_ghes(ghes);
- if (rc)
- return rc;
+ if (rc) {
+ /*
+ * Returning early results in a resource leak, but we're
+ * only here if stopping the hardware failed.
+ */
+ dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n",
+ ERR_PTR(rc));
+ return;
+ }
break;
default:
BUG();
@@ -1507,8 +1514,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
mutex_unlock(&ghes_devs_mutex);
kfree(ghes);
-
- return 0;
}
static struct platform_driver ghes_platform_driver = {
@@ -1516,7 +1521,7 @@ static struct platform_driver ghes_platform_driver = {
.name = "GHES",
},
.probe = ghes_probe,
- .remove = ghes_remove,
+ .remove_new = ghes_remove,
};
void __init acpi_ghes_init(void)
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 6aef1ee5e..20d757687 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable);
static struct acpi_table_hest *__read_mostly hest_tab;
+/*
+ * Since GHES_ASSIST is not supported, skip initialization of GHES_ASSIST
+ * structures for MCA.
+ * During HEST parsing, detected MCA error sources are cached from early
+ * table entries so that the Flags and Source Id fields from these cached
+ * values are then referred to in later table entries to determine if the
+ * encountered GHES_ASSIST structure should be initialized.
+ */
+static struct {
+ struct acpi_hest_ia_corrected *cmc;
+ struct acpi_hest_ia_machine_check *mc;
+ struct acpi_hest_ia_deferred_check *dmc;
+} mces;
+
static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
@@ -70,22 +84,54 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
len = sizeof(*cmc) + cmc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.cmc = cmc;
} else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
struct acpi_hest_ia_machine_check *mc;
mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
len = sizeof(*mc) + mc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.mc = mc;
} else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
struct acpi_hest_ia_deferred_check *mc;
mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
len = sizeof(*mc) + mc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.dmc = mc;
}
BUG_ON(len == -1);
return len;
};
+/*
+ * GHES and GHESv2 structures share the same format, starting from
+ * Source Id and ending in Error Status Block Length (inclusive).
+ */
+static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr)
+{
+ struct acpi_hest_generic *ghes;
+ u16 related_source_id;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+ hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+ return false;
+
+ ghes = (struct acpi_hest_generic *)hest_hdr;
+ related_source_id = ghes->related_source_id;
+
+ if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.cmc->header.source_id)
+ return true;
+ if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.mc->header.source_id)
+ return true;
+ if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.dmc->header.source_id)
+ return true;
+
+ return false;
+}
+
typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
static int apei_hest_parse(apei_hest_func_t func, void *data)
@@ -114,6 +160,11 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
return -EINVAL;
}
+ if (is_ghes_assist_struct(hest_hdr)) {
+ hest_hdr = (void *)hest_hdr + len;
+ continue;
+ }
+
rc = func(hest_hdr, data);
if (rc)
return rc;