diff options
Diffstat (limited to 'drivers/acpi/nfit/mce.c')
-rw-r--r-- | drivers/acpi/nfit/mce.c | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c new file mode 100644 index 000000000..d48a388b7 --- /dev/null +++ b/drivers/acpi/nfit/mce.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFIT - Machine Check Handler + * + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. + */ +#include <linux/notifier.h> +#include <linux/acpi.h> +#include <linux/nd.h> +#include <asm/mce.h> +#include "nfit.h" + +static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + struct acpi_nfit_desc *acpi_desc; + struct nfit_spa *nfit_spa; + + /* We only care about uncorrectable memory errors */ + if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) + return NOTIFY_DONE; + + /* Verify the address reported in the MCE is valid. */ + if (!mce_usable_address(mce)) + return NOTIFY_DONE; + + /* + * mce->addr contains the physical addr accessed that caused the + * machine check. We need to walk through the list of NFITs, and see + * if any of them matches that address, and only then start a scrub. + */ + mutex_lock(&acpi_desc_lock); + list_for_each_entry(acpi_desc, &acpi_descs, list) { + unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc); + struct device *dev = acpi_desc->dev; + int found_match = 0; + + mutex_lock(&acpi_desc->init_mutex); + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + struct acpi_nfit_system_address *spa = nfit_spa->spa; + + if (nfit_spa_type(spa) != NFIT_SPA_PM) + continue; + /* find the spa that covers the mce addr */ + if (spa->address > mce->addr) + continue; + if ((spa->address + spa->length - 1) < mce->addr) + continue; + found_match = 1; + dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n", + spa->range_index, spa->address, spa->length); + /* + * We can break at the first match because we're going + * to rescan all the SPA ranges. There shouldn't be any + * aliasing anyway. + */ + break; + } + mutex_unlock(&acpi_desc->init_mutex); + + if (!found_match) + continue; + + /* If this fails due to an -ENOMEM, there is little we can do */ + nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, + ALIGN_DOWN(mce->addr, align), align); + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + + if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { + /* + * We can ignore an -EBUSY here because if an ARS is + * already in progress, just let that be the last + * authoritative one + */ + acpi_nfit_ars_rescan(acpi_desc, 0); + } + mce->kflags |= MCE_HANDLED_NFIT; + break; + } + + mutex_unlock(&acpi_desc_lock); + return NOTIFY_DONE; +} + +static struct notifier_block nfit_mce_dec = { + .notifier_call = nfit_handle_mce, + .priority = MCE_PRIO_NFIT, +}; + +void nfit_mce_register(void) +{ + mce_register_decode_chain(&nfit_mce_dec); +} + +void nfit_mce_unregister(void) +{ + mce_unregister_decode_chain(&nfit_mce_dec); +} |