diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/Kconfig | 13 | ||||
-rw-r--r-- | drivers/acpi/apei/Makefile | 2 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-internal.h | 18 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-core.c (renamed from drivers/acpi/apei/einj.c) | 122 | ||||
-rw-r--r-- | drivers/acpi/apei/einj-cxl.c | 113 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 17 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 51 |
7 files changed, 309 insertions, 27 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7b..3cfe7e7475 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -60,6 +60,19 @@ config ACPI_APEI_EINJ mainly used for debugging and testing the other parts of APEI and some other RAS features. +config ACPI_APEI_EINJ_CXL + bool "CXL Error INJection Support" + default ACPI_APEI_EINJ + depends on ACPI_APEI_EINJ + depends on CXL_BUS && CXL_BUS <= ACPI_APEI_EINJ + help + Support for CXL protocol Error INJection through debugfs/cxl. + Availability and which errors are supported is dependent on + the host platform. Look to ACPI v6.5 section 18.6.4 and kernel + EINJ documentation for more information. + + If unsure say 'n' + config ACPI_APEI_ERST_DEBUG tristate "APEI Error Record Serialization Table (ERST) Debug Support" depends on ACPI_APEI diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile index 4dfac21287..2c474e6477 100644 --- a/drivers/acpi/apei/Makefile +++ b/drivers/acpi/apei/Makefile @@ -2,6 +2,8 @@ obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o +einj-y := einj-core.o +einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o apei-y := apei-base.o hest.o erst.o bert.o diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index 67c2c3b959..cd2766c69d 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -130,4 +130,22 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus) } int apei_osc_setup(void); + +int einj_get_available_error_type(u32 *type); +int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, + u64 param4); +int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4); +bool einj_is_cxl_error_type(u64 type); +int einj_validate_error_type(u64 type); + +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) +#endif + #endif diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj-core.c index 89fb9331c6..01faca3a23 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj-core.c @@ -21,6 +21,7 @@ #include <linux/nmi.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/platform_device.h> #include <asm/unaligned.h> #include "apei-internal.h" @@ -36,6 +37,12 @@ #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ ACPI_EINJ_MEMORY_UNCORRECTABLE | \ ACPI_EINJ_MEMORY_FATAL) +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_FATAL | \ + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ + ACPI_EINJ_CXL_MEM_FATAL) /* * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. @@ -137,6 +144,11 @@ static struct apei_exec_ins_type einj_ins_type[] = { */ static DEFINE_MUTEX(einj_mutex); +/* + * Exported APIs use this flag to exit early if einj_probe() failed. + */ +bool einj_initialized __ro_after_init; + static void *einj_param; static void einj_exec_ctx_init(struct apei_exec_context *ctx) @@ -160,7 +172,7 @@ static int __einj_get_available_error_type(u32 *type) } /* Get error injection capabilities of the platform */ -static int einj_get_available_error_type(u32 *type) +int einj_get_available_error_type(u32 *type) { int rc; @@ -530,8 +542,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, } /* Inject the specified hardware error */ -static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, - u64 param3, u64 param4) +int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, + u64 param4) { int rc; u64 base_addr, size; @@ -554,8 +566,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, if (type & ACPI5_VENDOR_BIT) { if (vendor_flags != SETWA_FLAGS_MEM) goto inject; - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { goto inject; + } + + /* + * Injections targeting a CXL 1.0/1.1 port have to be injected + * via the einj_cxl_rch_error_inject() path as that does the proper + * validation of the given RCRB base (MMIO) address. + */ + if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)) + return -EINVAL; /* * Disallow crazy address masks that give BIOS leeway to pick @@ -587,6 +608,21 @@ inject: return rc; } +int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) +{ + int rc; + + if (!(einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM))) + return -EINVAL; + + mutex_lock(&einj_mutex); + rc = __einj_error_inject(type, flags, param1, param2, param3, param4); + mutex_unlock(&einj_mutex); + + return rc; +} + static u32 error_type; static u32 error_flags; static u64 error_param1; @@ -607,12 +643,6 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = { { BIT(9), "Platform Correctable" }, { BIT(10), "Platform Uncorrectable non-fatal" }, { BIT(11), "Platform Uncorrectable fatal"}, - { BIT(12), "CXL.cache Protocol Correctable" }, - { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" }, - { BIT(14), "CXL.cache Protocol Uncorrectable fatal" }, - { BIT(15), "CXL.mem Protocol Correctable" }, - { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" }, - { BIT(17), "CXL.mem Protocol Uncorrectable fatal" }, { BIT(31), "Vendor Defined Error Types" }, }; @@ -641,22 +671,26 @@ static int error_type_get(void *data, u64 *val) return 0; } -static int error_type_set(void *data, u64 val) +bool einj_is_cxl_error_type(u64 type) { + return (type & CXL_ERROR_MASK) && (!(type & ACPI5_VENDOR_BIT)); +} + +int einj_validate_error_type(u64 type) +{ + u32 tval, vendor, available_error_type = 0; int rc; - u32 available_error_type = 0; - u32 tval, vendor; /* Only low 32 bits for error type are valid */ - if (val & GENMASK_ULL(63, 32)) + if (type & GENMASK_ULL(63, 32)) return -EINVAL; /* * Vendor defined types have 0x80000000 bit set, and * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE */ - vendor = val & ACPI5_VENDOR_BIT; - tval = val & 0x7fffffff; + vendor = type & ACPI5_VENDOR_BIT; + tval = type & GENMASK(30, 0); /* Only one error type can be specified */ if (tval & (tval - 1)) @@ -665,9 +699,21 @@ static int error_type_set(void *data, u64 val) rc = einj_get_available_error_type(&available_error_type); if (rc) return rc; - if (!(val & available_error_type)) + if (!(type & available_error_type)) return -EINVAL; } + + return 0; +} + +static int error_type_set(void *data, u64 val) +{ + int rc; + + rc = einj_validate_error_type(val); + if (rc) + return rc; + error_type = val; return 0; @@ -703,21 +749,21 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) return 0; } -static int __init einj_init(void) +static int __init einj_probe(struct platform_device *pdev) { int rc; acpi_status status; struct apei_exec_context ctx; if (acpi_disabled) { - pr_info("ACPI disabled.\n"); + pr_debug("ACPI disabled.\n"); return -ENODEV; } status = acpi_get_table(ACPI_SIG_EINJ, 0, (struct acpi_table_header **)&einj_tab); if (status == AE_NOT_FOUND) { - pr_warn("EINJ table not found.\n"); + pr_debug("EINJ table not found.\n"); return -ENODEV; } else if (ACPI_FAILURE(status)) { pr_err("Failed to get EINJ table: %s\n", @@ -805,7 +851,7 @@ err_put_table: return rc; } -static void __exit einj_exit(void) +static void einj_remove(struct platform_device *pdev) { struct apei_exec_context ctx; @@ -826,6 +872,40 @@ static void __exit einj_exit(void) acpi_put_table((struct acpi_table_header *)einj_tab); } +static struct platform_device *einj_dev; +static struct platform_driver einj_driver = { + .remove_new = einj_remove, + .driver = { + .name = "acpi-einj", + }, +}; + +static int __init einj_init(void) +{ + struct platform_device_info einj_dev_info = { + .name = "acpi-einj", + .id = -1, + }; + int rc; + + einj_dev = platform_device_register_full(&einj_dev_info); + if (IS_ERR(einj_dev)) + return PTR_ERR(einj_dev); + + rc = platform_driver_probe(&einj_driver, einj_probe); + einj_initialized = rc == 0; + + return 0; +} + +static void __exit einj_exit(void) +{ + if (einj_initialized) + platform_driver_unregister(&einj_driver); + + platform_device_del(einj_dev); +} + module_init(einj_init); module_exit(einj_exit); diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c new file mode 100644 index 0000000000..8b8be0c907 --- /dev/null +++ b/drivers/acpi/apei/einj-cxl.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CXL Error INJection support. Used by CXL core to inject + * protocol errors into CXL ports. + * + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#include <linux/einj-cxl.h> +#include <linux/seq_file.h> +#include <linux/pci.h> + +#include "apei-internal.h" + +/* Defined in einj-core.c */ +extern bool einj_initialized; + +static struct { u32 mask; const char *str; } const einj_cxl_error_type_string[] = { + { ACPI_EINJ_CXL_CACHE_CORRECTABLE, "CXL.cache Protocol Correctable" }, + { ACPI_EINJ_CXL_CACHE_UNCORRECTABLE, "CXL.cache Protocol Uncorrectable non-fatal" }, + { ACPI_EINJ_CXL_CACHE_FATAL, "CXL.cache Protocol Uncorrectable fatal" }, + { ACPI_EINJ_CXL_MEM_CORRECTABLE, "CXL.mem Protocol Correctable" }, + { ACPI_EINJ_CXL_MEM_UNCORRECTABLE, "CXL.mem Protocol Uncorrectable non-fatal" }, + { ACPI_EINJ_CXL_MEM_FATAL, "CXL.mem Protocol Uncorrectable fatal" }, +}; + +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) +{ + int cxl_err, rc; + u32 available_error_type = 0; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; + + if (available_error_type & cxl_err) + seq_printf(m, "0x%08x\t%s\n", + einj_cxl_error_type_string[pos].mask, + einj_cxl_error_type_string[pos].str); + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); + +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) +{ + struct pci_bus *pbus; + struct pci_host_bridge *bridge; + u64 seg = 0, bus; + + pbus = dport_dev->bus; + bridge = pci_find_host_bridge(pbus); + + if (!bridge) + return -ENODEV; + + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) + seg = bridge->domain_nr; + + bus = pbus->number; + *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn; + + return 0; +} + +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + int rc; + + /* Only CXL error types can be specified */ + if (!einj_is_cxl_error_type(type)) + return -EINVAL; + + rc = einj_validate_error_type(type); + if (rc) + return rc; + + return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0), + 0, 0); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); + +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) +{ + u64 param4 = 0; + int rc; + + /* Only CXL error types can be specified */ + if (!einj_is_cxl_error_type(type)) + return -EINVAL; + + rc = einj_validate_error_type(type); + if (rc) + return rc; + + rc = cxl_dport_get_sbdf(dport, ¶m4); + if (rc) + return rc; + + return einj_error_inject(type, 0x4, 0, 0, 0, param4); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); + +bool einj_cxl_is_initialized(void) +{ + return einj_initialized; +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, CXL); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ab2a82cb1b..512067cac1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1455,7 +1455,7 @@ err: return rc; } -static int ghes_remove(struct platform_device *ghes_dev) +static void ghes_remove(struct platform_device *ghes_dev) { int rc; struct ghes *ghes; @@ -1492,8 +1492,15 @@ static int ghes_remove(struct platform_device *ghes_dev) break; case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: rc = apei_sdei_unregister_ghes(ghes); - if (rc) - return rc; + if (rc) { + /* + * Returning early results in a resource leak, but we're + * only here if stopping the hardware failed. + */ + dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n", + ERR_PTR(rc)); + return; + } break; default: BUG(); @@ -1507,8 +1514,6 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_devs_mutex); kfree(ghes); - - return 0; } static struct platform_driver ghes_platform_driver = { @@ -1516,7 +1521,7 @@ static struct platform_driver ghes_platform_driver = { .name = "GHES", }, .probe = ghes_probe, - .remove = ghes_remove, + .remove_new = ghes_remove, }; void __init acpi_ghes_init(void) diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 6aef1ee5e1..20d757687e 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable); static struct acpi_table_hest *__read_mostly hest_tab; +/* + * Since GHES_ASSIST is not supported, skip initialization of GHES_ASSIST + * structures for MCA. + * During HEST parsing, detected MCA error sources are cached from early + * table entries so that the Flags and Source Id fields from these cached + * values are then referred to in later table entries to determine if the + * encountered GHES_ASSIST structure should be initialized. + */ +static struct { + struct acpi_hest_ia_corrected *cmc; + struct acpi_hest_ia_machine_check *mc; + struct acpi_hest_ia_deferred_check *dmc; +} mces; + static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, @@ -70,22 +84,54 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr) cmc = (struct acpi_hest_ia_corrected *)hest_hdr; len = sizeof(*cmc) + cmc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.cmc = cmc; } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { struct acpi_hest_ia_machine_check *mc; mc = (struct acpi_hest_ia_machine_check *)hest_hdr; len = sizeof(*mc) + mc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.mc = mc; } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) { struct acpi_hest_ia_deferred_check *mc; mc = (struct acpi_hest_ia_deferred_check *)hest_hdr; len = sizeof(*mc) + mc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + mces.dmc = mc; } BUG_ON(len == -1); return len; }; +/* + * GHES and GHESv2 structures share the same format, starting from + * Source Id and ending in Error Status Block Length (inclusive). + */ +static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr) +{ + struct acpi_hest_generic *ghes; + u16 related_source_id; + + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) + return false; + + ghes = (struct acpi_hest_generic *)hest_hdr; + related_source_id = ghes->related_source_id; + + if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.cmc->header.source_id) + return true; + if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.mc->header.source_id) + return true; + if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST && + related_source_id == mces.dmc->header.source_id) + return true; + + return false; +} + typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); static int apei_hest_parse(apei_hest_func_t func, void *data) @@ -114,6 +160,11 @@ static int apei_hest_parse(apei_hest_func_t func, void *data) return -EINVAL; } + if (is_ghes_assist_struct(hest_hdr)) { + hest_hdr = (void *)hest_hdr + len; + continue; + } + rc = func(hest_hdr, data); if (rc) return rc; |