summaryrefslogtreecommitdiffstats
path: root/drivers/pci/pcie/aer.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:40:19 +0000
commit9f0fc191371843c4fc000a226b0a26b6c059aacd (patch)
tree35f8be3ef04506ac891ad001e8c41e535ae8d01d /drivers/pci/pcie/aer.c
parentReleasing progress-linux version 6.6.15-2~progress7.99u1. (diff)
downloadlinux-9f0fc191371843c4fc000a226b0a26b6c059aacd.tar.xz
linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/pci/pcie/aer.c')
-rw-r--r--drivers/pci/pcie/aer.c213
1 files changed, 187 insertions, 26 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 40d84cb0c..38e334677 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -740,7 +740,7 @@ static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
u8 bus = info->id >> 8;
u8 devfn = info->id & 0xff;
- pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n",
+ pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n",
info->multi_error_valid ? "Multiple " : "",
aer_error_severity_string[info->severity],
pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
@@ -760,9 +760,10 @@ int cper_severity_to_aer(int cper_severity)
}
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
+#endif
-void cper_print_aer(struct pci_dev *dev, int aer_severity,
- struct aer_capability_regs *aer)
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
+ struct aer_capability_regs *aer)
{
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
@@ -801,7 +802,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
trace_aer_event(dev_name(&dev->dev), (status & ~mask),
aer_severity, tlp_header_valid, &aer->header_log);
}
-#endif
+EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL);
/**
* add_error_device - list device to be handled
@@ -928,20 +929,164 @@ static bool find_source_device(struct pci_dev *parent,
pci_walk_bus(parent->subordinate, find_device_iter, e_info);
if (!e_info->error_dev_num) {
- pci_info(parent, "can't find device of ID%04x\n", e_info->id);
+ u8 bus = e_info->id >> 8;
+ u8 devfn = e_info->id & 0xff;
+
+ pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n",
+ pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn));
return false;
}
return true;
}
+#ifdef CONFIG_PCIEAER_CXL
+
+/**
+ * pci_aer_unmask_internal_errors - unmask internal errors
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Unmasks internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+{
+ int aer = dev->aer_cap;
+ u32 mask;
+
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
+ mask &= ~PCI_ERR_UNC_INTN;
+ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
+
+ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
+ mask &= ~PCI_ERR_COR_INTERNAL;
+ pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
+}
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+ /*
+ * The capability, status, and control fields in Device 0,
+ * Function 0 DVSEC control the CXL functionality of the
+ * entire device (CXL 3.0, 8.1.3).
+ */
+ if (dev->devfn != PCI_DEVFN(0, 0))
+ return false;
+
+ /*
+ * CXL Memory Devices must have the 502h class code set (CXL
+ * 3.0, 8.1.12.1).
+ */
+ if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
+ return false;
+
+ return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+ return (pcie_ports_native || host->native_aer);
+}
+
+static bool is_internal_error(struct aer_err_info *info)
+{
+ if (info->severity == AER_CORRECTABLE)
+ return info->status & PCI_ERR_COR_INTERNAL;
+
+ return info->status & PCI_ERR_UNC_INTN;
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+ struct aer_err_info *info = (struct aer_err_info *)data;
+ const struct pci_error_handlers *err_handler;
+
+ if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+ return 0;
+
+ /* protect dev->driver */
+ device_lock(&dev->dev);
+
+ err_handler = dev->driver ? dev->driver->err_handler : NULL;
+ if (!err_handler)
+ goto out;
+
+ if (info->severity == AER_CORRECTABLE) {
+ if (err_handler->cor_error_detected)
+ err_handler->cor_error_detected(dev);
+ } else if (err_handler->error_detected) {
+ if (info->severity == AER_NONFATAL)
+ err_handler->error_detected(dev, pci_channel_io_normal);
+ else if (info->severity == AER_FATAL)
+ err_handler->error_detected(dev, pci_channel_io_frozen);
+ }
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+ /*
+ * Internal errors of an RCEC indicate an AER error in an
+ * RCH's downstream port. Check and handle them in the CXL.mem
+ * device driver.
+ */
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
+ is_internal_error(info))
+ pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+}
+
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+ bool *handles_cxl = data;
+
+ if (!*handles_cxl)
+ *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+ /* Non-zero terminates iteration */
+ return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+ bool handles_cxl = false;
+
+ if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+ pcie_aer_is_native(rcec))
+ pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
+
+ return handles_cxl;
+}
+
+static void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+ if (!handles_cxl_errors(rcec))
+ return;
+
+ pci_aer_unmask_internal_errors(rcec);
+ pci_info(rcec, "CXL: Internal errors unmasked");
+}
+
+#else
+static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
+static inline void cxl_rch_handle_error(struct pci_dev *dev,
+ struct aer_err_info *info) { }
+#endif
+
/**
- * handle_error_source - handle logging error into an event log
+ * pci_aer_handle_error - handle logging error into an event log
* @dev: pointer to pci_dev data structure of error source device
* @info: comprehensive error information
*
* Invoked when an error being detected by Root Port.
*/
-static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
+static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
{
int aer = dev->aer_cap;
@@ -965,6 +1110,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
+}
+
+static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
+{
+ cxl_rch_handle_error(dev, info);
+ pci_aer_handle_error(dev, info);
pci_dev_put(dev);
}
@@ -997,7 +1148,7 @@ static void aer_recover_work_func(struct work_struct *work)
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
- cper_print_aer(pdev, entry.severity, entry.regs);
+ pci_print_aer(pdev, entry.severity, entry.regs);
/*
* Memory for aer_capability_regs(entry.regs) is being allocated from the
* ghes_estatus_pool to protect it from overwriting when multiple sections
@@ -1224,6 +1375,28 @@ static irqreturn_t aer_irq(int irq, void *context)
return IRQ_WAKE_THREAD;
}
+static void aer_enable_irq(struct pci_dev *pdev)
+{
+ int aer = pdev->aer_cap;
+ u32 reg32;
+
+ /* Enable Root Port's interrupt in response to error messages */
+ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+}
+
+static void aer_disable_irq(struct pci_dev *pdev)
+{
+ int aer = pdev->aer_cap;
+ u32 reg32;
+
+ /* Disable Root's interrupt in response to error messages */
+ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+}
+
/**
* aer_enable_rootport - enable Root Port's interrupts when receiving messages
* @rpc: pointer to a Root Port data structure
@@ -1253,10 +1426,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, &reg32);
pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
- /* Enable Root Port's interrupt in response to error messages */
- pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_enable_irq(pdev);
}
/**
@@ -1271,10 +1441,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
int aer = pdev->aer_cap;
u32 reg32;
- /* Disable Root's interrupt in response to error messages */
- pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_disable_irq(pdev);
/* Clear Root's error status reg */
pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, &reg32);
@@ -1332,6 +1499,7 @@ static int aer_probe(struct pcie_device *dev)
return status;
}
+ cxl_rch_enable_rcec(port);
aer_enable_rootport(rpc);
pci_info(port, "enabled with IRQ %d\n", dev->irq);
return 0;
@@ -1369,12 +1537,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
*/
aer = root ? root->aer_cap : 0;
- if ((host->native_aer || pcie_ports_native) && aer) {
- /* Disable Root's interrupt in response to error messages */
- pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
- }
+ if ((host->native_aer || pcie_ports_native) && aer)
+ aer_disable_irq(root);
if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET);
@@ -1393,10 +1557,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
- /* Enable Root Port's interrupt in response to error messages */
- pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_enable_irq(root);
}
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;