diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/pci/pcie | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/pci/pcie')
-rw-r--r-- | drivers/pci/pcie/Kconfig | 149 | ||||
-rw-r--r-- | drivers/pci/pcie/Makefile | 14 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 1579 | ||||
-rw-r--r-- | drivers/pci/pcie/aer_inject.c | 551 | ||||
-rw-r--r-- | drivers/pci/pcie/aspm.c | 1313 | ||||
-rw-r--r-- | drivers/pci/pcie/dpc.c | 313 | ||||
-rw-r--r-- | drivers/pci/pcie/err.c | 346 | ||||
-rw-r--r-- | drivers/pci/pcie/pme.c | 461 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv.h | 157 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv_core.c | 578 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv_pci.c | 247 | ||||
-rw-r--r-- | drivers/pci/pcie/ptm.c | 146 |
12 files changed, 5854 insertions, 0 deletions
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig new file mode 100644 index 000000000..0a1e9d379 --- /dev/null +++ b/drivers/pci/pcie/Kconfig @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# PCI Express Port Bus Configuration +# +config PCIEPORTBUS + bool "PCI Express Port Bus support" + depends on PCI + help + This automatically enables PCI Express Port Bus support. Users can + choose Native Hot-Plug support, Advanced Error Reporting support, + Power Management Event support and Virtual Channel support to run + on PCI Express Ports (Root or Switch). + +# +# Include service Kconfig here +# +config HOTPLUG_PCI_PCIE + bool "PCI Express Hotplug driver" + depends on HOTPLUG_PCI && PCIEPORTBUS + help + Say Y here if you have a motherboard that supports PCI Express Native + Hotplug + + When in doubt, say N. + +config PCIEAER + bool "PCI Express Advanced Error Reporting support" + depends on PCIEPORTBUS + select RAS + default y + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) driver support. Error reporting messages sent to Root + Port will be handled by PCI Express AER driver. + +config PCIEAER_INJECT + tristate "PCI Express error injection support" + depends on PCIEAER + default n + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) software error injector. + + Debugging AER code is quite difficult because it is hard + to trigger various real hardware errors. Software-based + error injection can fake almost all kinds of errors with the + help of a user space helper tool aer-inject, which can be + gotten from: + http://www.kernel.org/pub/linux/utils/pci/aer-inject/ + +# +# PCI Express ECRC +# +config PCIE_ECRC + bool "PCI Express ECRC settings control" + depends on PCIEAER + help + Used to override firmware/bios settings for PCI Express ECRC + (transaction layer end-to-end CRC checking). + + When in doubt, say N. + +# +# PCI Express ASPM +# +config PCIEASPM + bool "PCI Express ASPM control" if EXPERT + depends on PCI && PCIEPORTBUS + default y + help + This enables OS control over PCI Express ASPM (Active State + Power Management) and Clock Power Management. ASPM supports + state L0/L0s/L1. + + ASPM is initially set up by the firmware. With this option enabled, + Linux can modify this state in order to disable ASPM on known-bad + hardware or configurations and enable it when known-safe. + + ASPM can be disabled or enabled at runtime via + /sys/module/pcie_aspm/parameters/policy + + When in doubt, say Y. + +config PCIEASPM_DEBUG + bool "Debug PCI Express ASPM" + depends on PCIEASPM + default n + help + This enables PCI Express ASPM debug support. It will add per-device + interface to control ASPM. + +choice + prompt "Default ASPM policy" + default PCIEASPM_DEFAULT + depends on PCIEASPM + +config PCIEASPM_DEFAULT + bool "BIOS default" + depends on PCIEASPM + help + Use the BIOS defaults for PCI Express ASPM. + +config PCIEASPM_POWERSAVE + bool "Powersave" + depends on PCIEASPM + help + Enable PCI Express ASPM L0s and L1 where possible, even if the + BIOS did not. + +config PCIEASPM_POWER_SUPERSAVE + bool "Power Supersave" + depends on PCIEASPM + help + Same as PCIEASPM_POWERSAVE, except it also enables L1 substates where + possible. This would result in higher power savings while staying in L1 + where the components support it. + +config PCIEASPM_PERFORMANCE + bool "Performance" + depends on PCIEASPM + help + Disable PCI Express ASPM L0s and L1, even if the BIOS enabled them. +endchoice + +config PCIE_PME + def_bool y + depends on PCIEPORTBUS && PM + +config PCIE_DPC + bool "PCI Express Downstream Port Containment support" + depends on PCIEPORTBUS && PCIEAER + default n + help + This enables PCI Express Downstream Port Containment (DPC) + driver support. DPC events from Root and Downstream ports + will be handled by the DPC driver. If your system doesn't + have this capability or you do not want to use this feature, + it is safe to answer N. + +config PCIE_PTM + bool "PCI Express Precision Time Measurement support" + default n + depends on PCIEPORTBUS + help + This enables PCI Express Precision Time Measurement (PTM) + support. + + This is only useful if you have devices that support PTM, but it + is safe to enable even if you don't. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile new file mode 100644 index 000000000..ab514083d --- /dev/null +++ b/drivers/pci/pcie/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for PCI Express features and port driver + +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o + +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o + +obj-$(CONFIG_PCIEASPM) += aspm.o +obj-$(CONFIG_PCIEAER) += aer.o +obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o +obj-$(CONFIG_PCIE_PME) += pme.o +obj-$(CONFIG_PCIE_DPC) += dpc.o +obj-$(CONFIG_PCIE_PTM) += ptm.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c new file mode 100644 index 000000000..1563e2260 --- /dev/null +++ b/drivers/pci/pcie/aer.c @@ -0,0 +1,1579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implement the AER root port service driver. The driver registers an IRQ + * handler. When a root port triggers an AER interrupt, the IRQ handler + * collects root port status and schedules work. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. + * Andrew Patterson <andrew.patterson@hp.com> + */ + +#include <linux/cper.h> +#include <linux/pci.h> +#include <linux/pci-acpi.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/kfifo.h> +#include <linux/slab.h> +#include <acpi/apei.h> +#include <ras/ras_event.h> + +#include "../pci.h" +#include "portdrv.h" + +#define AER_ERROR_SOURCES_MAX 100 + +#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ +#define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/ + +struct aer_err_source { + unsigned int status; + unsigned int id; +}; + +struct aer_rpc { + struct pci_dev *rpd; /* Root Port device */ + struct work_struct dpc_handler; + struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; + struct aer_err_info e_info; + unsigned short prod_idx; /* Error Producer Index */ + unsigned short cons_idx; /* Error Consumer Index */ + int isr; + spinlock_t e_lock; /* + * Lock access to Error Status/ID Regs + * and error producer/consumer index + */ + struct mutex rpc_mutex; /* + * only one thread could do + * recovery on the same + * root port hierarchy + */ +}; + +/* AER stats for the device */ +struct aer_stats { + + /* + * Fields for all AER capable devices. They indicate the errors + * "as seen by this device". Note that this may mean that if an + * end point is causing problems, the AER counters may increment + * at its link partner (e.g. root port) because the errors will be + * "seen" by the link partner and not the the problematic end point + * itself (which may report all counters as 0 as it never saw any + * problems). + */ + /* Counters for different type of correctable errors */ + u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; + /* Counters for different type of fatal uncorrectable errors */ + u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; + /* Counters for different type of nonfatal uncorrectable errors */ + u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; + /* Total number of ERR_COR sent by this device */ + u64 dev_total_cor_errs; + /* Total number of ERR_FATAL sent by this device */ + u64 dev_total_fatal_errs; + /* Total number of ERR_NONFATAL sent by this device */ + u64 dev_total_nonfatal_errs; + + /* + * Fields for Root ports & root complex event collectors only, these + * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL + * messages received by the root port / event collector, INCLUDING the + * ones that are generated internally (by the rootport itself) + */ + u64 rootport_total_cor_errs; + u64 rootport_total_fatal_errs; + u64 rootport_total_nonfatal_errs; +}; + +#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ + PCI_ERR_UNC_ECRC| \ + PCI_ERR_UNC_UNSUP| \ + PCI_ERR_UNC_COMP_ABORT| \ + PCI_ERR_UNC_UNX_COMP| \ + PCI_ERR_UNC_MALF_TLP) + +#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ + PCI_EXP_RTCTL_SENFEE| \ + PCI_EXP_RTCTL_SEFEE) +#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ + PCI_ERR_ROOT_CMD_NONFATAL_EN| \ + PCI_ERR_ROOT_CMD_FATAL_EN) +#define ERR_COR_ID(d) (d & 0xffff) +#define ERR_UNCOR_ID(d) (d >> 16) + +static int pcie_aer_disable; + +void pci_no_aer(void) +{ + pcie_aer_disable = 1; +} + +bool pci_aer_available(void) +{ + return !pcie_aer_disable && pci_msi_enabled(); +} + +#ifdef CONFIG_PCIE_ECRC + +#define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ +#define ECRC_POLICY_OFF 1 /* ECRC off for performance */ +#define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ + +static int ecrc_policy = ECRC_POLICY_DEFAULT; + +static const char *ecrc_policy_str[] = { + [ECRC_POLICY_DEFAULT] = "bios", + [ECRC_POLICY_OFF] = "off", + [ECRC_POLICY_ON] = "on" +}; + +/** + * enable_ercr_checking - enable PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int enable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + if (reg32 & PCI_ERR_CAP_ECRC_GENC) + reg32 |= PCI_ERR_CAP_ECRC_GENE; + if (reg32 & PCI_ERR_CAP_ECRC_CHKC) + reg32 |= PCI_ERR_CAP_ECRC_CHKE; + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * disable_ercr_checking - disables PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int disable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * @dev: the PCI device + */ +void pcie_set_ecrc_checking(struct pci_dev *dev) +{ + switch (ecrc_policy) { + case ECRC_POLICY_DEFAULT: + return; + case ECRC_POLICY_OFF: + disable_ecrc_checking(dev); + break; + case ECRC_POLICY_ON: + enable_ecrc_checking(dev); + break; + default: + return; + } +} + +/** + * pcie_ecrc_get_policy - parse kernel command-line ecrc option + */ +void pcie_ecrc_get_policy(char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++) + if (!strncmp(str, ecrc_policy_str[i], + strlen(ecrc_policy_str[i]))) + break; + if (i >= ARRAY_SIZE(ecrc_policy_str)) + return; + + ecrc_policy = i; +} +#endif /* CONFIG_PCIE_ECRC */ + +#ifdef CONFIG_ACPI_APEI +static inline int hest_match_pci(struct acpi_hest_aer_common *p, + struct pci_dev *pci) +{ + return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) && + ACPI_HEST_BUS(p->bus) == pci->bus->number && + p->device == PCI_SLOT(pci->devfn) && + p->function == PCI_FUNC(pci->devfn); +} + +static inline bool hest_match_type(struct acpi_hest_header *hest_hdr, + struct pci_dev *dev) +{ + u16 hest_type = hest_hdr->type; + u8 pcie_type = pci_pcie_type(dev); + + if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT && + pcie_type == PCI_EXP_TYPE_ROOT_PORT) || + (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT && + pcie_type == PCI_EXP_TYPE_ENDPOINT) || + (hest_type == ACPI_HEST_TYPE_AER_BRIDGE && + (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)) + return true; + return false; +} + +struct aer_hest_parse_info { + struct pci_dev *pci_dev; + int firmware_first; +}; + +static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr) +{ + if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || + hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || + hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE) + return 1; + return 0; +} + +static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) +{ + struct aer_hest_parse_info *info = data; + struct acpi_hest_aer_common *p; + int ff; + + if (!hest_source_is_pcie_aer(hest_hdr)) + return 0; + + p = (struct acpi_hest_aer_common *)(hest_hdr + 1); + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + + /* + * If no specific device is supplied, determine whether + * FIRMWARE_FIRST is set for *any* PCIe device. + */ + if (!info->pci_dev) { + info->firmware_first |= ff; + return 0; + } + + /* Otherwise, check the specific device */ + if (p->flags & ACPI_HEST_GLOBAL) { + if (hest_match_type(hest_hdr, info->pci_dev)) + info->firmware_first = ff; + } else + if (hest_match_pci(p, info->pci_dev)) + info->firmware_first = ff; + + return 0; +} + +static void aer_set_firmware_first(struct pci_dev *pci_dev) +{ + int rc; + struct aer_hest_parse_info info = { + .pci_dev = pci_dev, + .firmware_first = 0, + }; + + rc = apei_hest_parse(aer_hest_parse, &info); + + if (rc) + pci_dev->__aer_firmware_first = 0; + else + pci_dev->__aer_firmware_first = info.firmware_first; + pci_dev->__aer_firmware_first_valid = 1; +} + +int pcie_aer_get_firmware_first(struct pci_dev *dev) +{ + if (!pci_is_pcie(dev)) + return 0; + + if (pcie_ports_native) + return 0; + + if (!dev->__aer_firmware_first_valid) + aer_set_firmware_first(dev); + return dev->__aer_firmware_first; +} + +static bool aer_firmware_first; + +/** + * aer_acpi_firmware_first - Check if APEI should control AER. + */ +bool aer_acpi_firmware_first(void) +{ + static bool parsed = false; + struct aer_hest_parse_info info = { + .pci_dev = NULL, /* Check all PCIe devices */ + .firmware_first = 0, + }; + + if (pcie_ports_native) + return false; + + if (!parsed) { + apei_hest_parse(aer_hest_parse, &info); + aer_firmware_first = info.firmware_first; + parsed = true; + } + return aer_firmware_first; +} +#endif + +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) + +int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + if (!dev->aer_cap) + return -EIO; + + return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); +} +EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); + +int pci_disable_pcie_error_reporting(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_AER_FLAGS); +} +EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); + +void pci_aer_clear_device_status(struct pci_dev *dev) +{ + u16 sta; + + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); + pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); +} + +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +{ + int pos; + u32 status, sev; + + pos = dev->aer_cap; + if (!pos) + return -EIO; + + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + /* Clear status bits for ERR_NONFATAL errors only */ + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + status &= ~sev; + if (status) + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); + +void pci_aer_clear_fatal_status(struct pci_dev *dev) +{ + int pos; + u32 status, sev; + + pos = dev->aer_cap; + if (!pos) + return; + + if (pcie_aer_get_firmware_first(dev)) + return; + + /* Clear status bits for ERR_FATAL errors only */ + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); +} + +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + int pos; + u32 status; + int port_type; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -EIO; + + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + port_type = pci_pcie_type(dev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT) { + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); + } + + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} + +void pci_aer_init(struct pci_dev *dev) +{ + dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + + if (dev->aer_cap) + dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); + + pci_cleanup_aer_error_status_regs(dev); +} + +void pci_aer_exit(struct pci_dev *dev) +{ + kfree(dev->aer_stats); + dev->aer_stats = NULL; +} + +#define AER_AGENT_RECEIVER 0 +#define AER_AGENT_REQUESTER 1 +#define AER_AGENT_COMPLETER 2 +#define AER_AGENT_TRANSMITTER 3 + +#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) +#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : PCI_ERR_UNC_COMP_ABORT) +#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) + +#define AER_GET_AGENT(t, e) \ + ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ + (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ + (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ + AER_AGENT_RECEIVER) + +#define AER_PHYSICAL_LAYER_ERROR 0 +#define AER_DATA_LINK_LAYER_ERROR 1 +#define AER_TRANSACTION_LAYER_ERROR 2 + +#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + PCI_ERR_COR_RCVR : 0) +#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_BAD_TLP| \ + PCI_ERR_COR_BAD_DLLP| \ + PCI_ERR_COR_REP_ROLL| \ + PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) + +#define AER_GET_LAYER_ERROR(t, e) \ + ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ + (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ + AER_TRANSACTION_LAYER_ERROR) + +/* + * AER error strings + */ +static const char *aer_error_severity_string[] = { + "Uncorrected (Non-Fatal)", + "Uncorrected (Fatal)", + "Corrected" +}; + +static const char *aer_error_layer[] = { + "Physical Layer", + "Data Link Layer", + "Transaction Layer" +}; + +static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { + "RxErr", /* Bit Position 0 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "BadTLP", /* Bit Position 6 */ + "BadDLLP", /* Bit Position 7 */ + "Rollover", /* Bit Position 8 */ + NULL, + NULL, + NULL, + "Timeout", /* Bit Position 12 */ + "NonFatalErr", /* Bit Position 13 */ + "CorrIntErr", /* Bit Position 14 */ + "HeaderOF", /* Bit Position 15 */ +}; + +static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { + "Undefined", /* Bit Position 0 */ + NULL, + NULL, + NULL, + "DLP", /* Bit Position 4 */ + "SDES", /* Bit Position 5 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "TLP", /* Bit Position 12 */ + "FCP", /* Bit Position 13 */ + "CmpltTO", /* Bit Position 14 */ + "CmpltAbrt", /* Bit Position 15 */ + "UnxCmplt", /* Bit Position 16 */ + "RxOF", /* Bit Position 17 */ + "MalfTLP", /* Bit Position 18 */ + "ECRC", /* Bit Position 19 */ + "UnsupReq", /* Bit Position 20 */ + "ACSViol", /* Bit Position 21 */ + "UncorrIntErr", /* Bit Position 22 */ + "BlockedTLP", /* Bit Position 23 */ + "AtomicOpBlocked", /* Bit Position 24 */ + "TLPBlockedErr", /* Bit Position 25 */ +}; + +static const char *aer_agent_string[] = { + "Receiver ID", + "Requester ID", + "Completer ID", + "Transmitter ID" +}; + +#define aer_stats_dev_attr(name, stats_array, strings_array, \ + total_string, total_field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + unsigned int i; \ + char *str = buf; \ + struct pci_dev *pdev = to_pci_dev(dev); \ + u64 *stats = pdev->aer_stats->stats_array; \ + \ + for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ + if (strings_array[i]) \ + str += sprintf(str, "%s %llu\n", \ + strings_array[i], stats[i]); \ + else if (stats[i]) \ + str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ + i, stats[i]); \ + } \ + str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ + pdev->aer_stats->total_field); \ + return str-buf; \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, + aer_correctable_error_string, "ERR_COR", + dev_total_cor_errs); +aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, + aer_uncorrectable_error_string, "ERR_FATAL", + dev_total_fatal_errs); +aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, + aer_uncorrectable_error_string, "ERR_NONFATAL", + dev_total_nonfatal_errs); + +#define aer_stats_rootport_attr(name, field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct pci_dev *pdev = to_pci_dev(dev); \ + return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_rootport_attr(aer_rootport_total_err_cor, + rootport_total_cor_errs); +aer_stats_rootport_attr(aer_rootport_total_err_fatal, + rootport_total_fatal_errs); +aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, + rootport_total_nonfatal_errs); + +static struct attribute *aer_stats_attrs[] __ro_after_init = { + &dev_attr_aer_dev_correctable.attr, + &dev_attr_aer_dev_fatal.attr, + &dev_attr_aer_dev_nonfatal.attr, + &dev_attr_aer_rootport_total_err_cor.attr, + &dev_attr_aer_rootport_total_err_fatal.attr, + &dev_attr_aer_rootport_total_err_nonfatal.attr, + NULL +}; + +static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pdev->aer_stats) + return 0; + + if ((a == &dev_attr_aer_rootport_total_err_cor.attr || + a == &dev_attr_aer_rootport_total_err_fatal.attr || + a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && + pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) + return 0; + + return a->mode; +} + +const struct attribute_group aer_stats_attr_group = { + .attrs = aer_stats_attrs, + .is_visible = aer_stats_attrs_are_visible, +}; + +static void pci_dev_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_info *info) +{ + int status, i, max = -1; + u64 *counter = NULL; + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + switch (info->severity) { + case AER_CORRECTABLE: + aer_stats->dev_total_cor_errs++; + counter = &aer_stats->dev_cor_errs[0]; + max = AER_MAX_TYPEOF_COR_ERRS; + break; + case AER_NONFATAL: + aer_stats->dev_total_nonfatal_errs++; + counter = &aer_stats->dev_nonfatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + case AER_FATAL: + aer_stats->dev_total_fatal_errs++; + counter = &aer_stats->dev_fatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + } + + status = (info->status & ~info->mask); + for (i = 0; i < max; i++) + if (status & (1 << i)) + counter[i]++; +} + +static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_source *e_src) +{ + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + if (e_src->status & PCI_ERR_ROOT_COR_RCV) + aer_stats->rootport_total_cor_errs++; + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + aer_stats->rootport_total_fatal_errs++; + else + aer_stats->rootport_total_nonfatal_errs++; + } +} + +static void __print_tlp_header(struct pci_dev *dev, + struct aer_header_log_regs *t) +{ + pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", + t->dw0, t->dw1, t->dw2, t->dw3); +} + +static void __aer_print_error(struct pci_dev *dev, + struct aer_err_info *info) +{ + int i, status; + const char *errmsg = NULL; + status = (info->status & ~info->mask); + + for (i = 0; i < 32; i++) { + if (!(status & (1 << i))) + continue; + + if (info->severity == AER_CORRECTABLE) + errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? + aer_correctable_error_string[i] : NULL; + else + errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? + aer_uncorrectable_error_string[i] : NULL; + + if (errmsg) + pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, + info->first_error == i ? " (First)" : ""); + else + pci_err(dev, " [%2d] Unknown Error Bit%s\n", + i, info->first_error == i ? " (First)" : ""); + } + pci_dev_aer_stats_incr(dev, info); +} + +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +{ + int layer, agent; + int id = ((dev->bus->number << 8) | dev->devfn); + + if (!info->status) { + pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + aer_error_severity_string[info->severity]); + goto out; + } + + layer = AER_GET_LAYER_ERROR(info->severity, info->status); + agent = AER_GET_AGENT(info->severity, info->status); + + pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); + + pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, + info->status, info->mask); + + __aer_print_error(dev, info); + + if (info->tlp_header_valid) + __print_tlp_header(dev, &info->tlp); + +out: + if (info->id && info->error_dev_num > 1 && info->id == id) + pci_err(dev, " Error of this Agent is reported first\n"); + + trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), + info->severity, info->tlp_header_valid, &info->tlp); +} + +static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) +{ + u8 bus = info->id >> 8; + u8 devfn = info->id & 0xff; + + pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n", + info->multi_error_valid ? "Multiple " : "", + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER +int cper_severity_to_aer(int cper_severity) +{ + switch (cper_severity) { + case CPER_SEV_RECOVERABLE: + return AER_NONFATAL; + case CPER_SEV_FATAL: + return AER_FATAL; + default: + return AER_CORRECTABLE; + } +} +EXPORT_SYMBOL_GPL(cper_severity_to_aer); + +void cper_print_aer(struct pci_dev *dev, int aer_severity, + struct aer_capability_regs *aer) +{ + int layer, agent, tlp_header_valid = 0; + u32 status, mask; + struct aer_err_info info; + + if (aer_severity == AER_CORRECTABLE) { + status = aer->cor_status; + mask = aer->cor_mask; + } else { + status = aer->uncor_status; + mask = aer->uncor_mask; + tlp_header_valid = status & AER_LOG_TLP_MASKS; + } + + layer = AER_GET_LAYER_ERROR(aer_severity, status); + agent = AER_GET_AGENT(aer_severity, status); + + memset(&info, 0, sizeof(info)); + info.severity = aer_severity; + info.status = status; + info.mask = mask; + info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); + + pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); + __aer_print_error(dev, &info); + pci_err(dev, "aer_layer=%s, aer_agent=%s\n", + aer_error_layer[layer], aer_agent_string[agent]); + + if (aer_severity != AER_CORRECTABLE) + pci_err(dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); + + if (tlp_header_valid) + __print_tlp_header(dev, &aer->header_log); + + trace_aer_event(dev_name(&dev->dev), (status & ~mask), + aer_severity, tlp_header_valid, &aer->header_log); +} +#endif + +/** + * add_error_device - list device to be handled + * @e_info: pointer to error info + * @dev: pointer to pci_dev to be added + */ +static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) +{ + if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { + e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); + e_info->error_dev_num++; + return 0; + } + return -ENOSPC; +} + +/** + * is_error_source - check whether the device is source of reported error + * @dev: pointer to pci_dev to be checked + * @e_info: pointer to reported error info + */ +static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) +{ + int pos; + u32 status, mask; + u16 reg16; + + /* + * When bus id is equal to 0, it might be a bad id + * reported by root port. + */ + if ((PCI_BUS_NUM(e_info->id) != 0) && + !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { + /* Device ID match? */ + if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) + return true; + + /* Continue id comparing if there is no multiple error */ + if (!e_info->multi_error_valid) + return false; + } + + /* + * When either + * 1) bus id is equal to 0. Some ports might lose the bus + * id of error source id; + * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 3) There are multiple errors and prior ID comparing fails; + * We check AER status registers to find possible reporter. + */ + if (atomic_read(&dev->enable_cnt) == 0) + return false; + + /* Check if AER is enabled */ + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); + if (!(reg16 & PCI_EXP_AER_FLAGS)) + return false; + + pos = dev->aer_cap; + if (!pos) + return false; + + /* Check if error is recorded */ + if (e_info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); + } else { + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + } + if (status & ~mask) + return true; + + return false; +} + +static int find_device_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *e_info = (struct aer_err_info *)data; + + if (is_error_source(dev, e_info)) { + /* List this device */ + if (add_error_device(e_info, dev)) { + /* We cannot handle more... Stop iteration */ + /* TODO: Should print error message here? */ + return 1; + } + + /* If there is only a single error, stop iteration */ + if (!e_info->multi_error_valid) + return 1; + } + return 0; +} + +/** + * find_source_device - search through device hierarchy for source device + * @parent: pointer to Root Port pci_dev data structure + * @e_info: including detailed error information such like id + * + * Return true if found. + * + * Invoked by DPC when error is detected at the Root Port. + * Caller of this function must set id, severity, and multi_error_valid of + * struct aer_err_info pointed by @e_info properly. This function must fill + * e_info->error_dev_num and e_info->dev[], based on the given information. + */ +static bool find_source_device(struct pci_dev *parent, + struct aer_err_info *e_info) +{ + struct pci_dev *dev = parent; + int result; + + /* Must reset in this function */ + e_info->error_dev_num = 0; + + /* Is Root Port an agent that sends error message? */ + result = find_device_iter(dev, e_info); + if (result) + return true; + + pci_walk_bus(parent->subordinate, find_device_iter, e_info); + + if (!e_info->error_dev_num) { + pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n", + e_info->id); + return false; + } + return true; +} + +/** + * handle_error_source - handle logging error into an event log + * @dev: pointer to pci_dev data structure of error source device + * @info: comprehensive error information + * + * Invoked when an error being detected by Root Port. + */ +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + int pos; + + if (info->severity == AER_CORRECTABLE) { + /* + * Correctable error does not need software intervention. + * No need to go through error recovery process. + */ + pos = dev->aer_cap; + if (pos) + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, + info->status); + pci_aer_clear_device_status(dev); + } else if (info->severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(dev); + else if (info->severity == AER_FATAL) + pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER); + pci_dev_put(dev); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER + +#define AER_RECOVER_RING_ORDER 4 +#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) + +struct aer_recover_entry { + u8 bus; + u8 devfn; + u16 domain; + int severity; + struct aer_capability_regs *regs; +}; + +static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, + AER_RECOVER_RING_SIZE); + +static void aer_recover_work_func(struct work_struct *work) +{ + struct aer_recover_entry entry; + struct pci_dev *pdev; + + while (kfifo_get(&aer_recover_ring, &entry)) { + pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, + entry.devfn); + if (!pdev) { + pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + continue; + } + cper_print_aer(pdev, entry.severity, entry.regs); + if (entry.severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(pdev); + else if (entry.severity == AER_FATAL) + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER); + pci_dev_put(pdev); + } +} + +/* + * Mutual exclusion for writers of aer_recover_ring, reader side don't + * need lock, because there is only one reader and lock is not needed + * between reader and writer. + */ +static DEFINE_SPINLOCK(aer_recover_ring_lock); +static DECLARE_WORK(aer_recover_work, aer_recover_work_func); + +void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, + int severity, struct aer_capability_regs *aer_regs) +{ + unsigned long flags; + struct aer_recover_entry entry = { + .bus = bus, + .devfn = devfn, + .domain = domain, + .severity = severity, + .regs = aer_regs, + }; + + spin_lock_irqsave(&aer_recover_ring_lock, flags); + if (kfifo_put(&aer_recover_ring, entry)) + schedule_work(&aer_recover_work); + else + pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", + domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + spin_unlock_irqrestore(&aer_recover_ring_lock, flags); +} +EXPORT_SYMBOL_GPL(aer_recover_queue); +#endif + +/** + * aer_get_device_error_info - read error status from dev and store it to info + * @dev: pointer to the device expected to have a error record + * @info: pointer to structure to store the error record + * + * Return 1 on success, 0 on error. + * + * Note that @info is reused among all error devices. Clear fields properly. + */ +int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +{ + int pos, temp; + + /* Must reset in this function */ + info->status = 0; + info->tlp_header_valid = 0; + + pos = dev->aer_cap; + + /* The device might not support AER */ + if (!pos) + return 0; + + if (info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, + &info->status); + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, + &info->status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + + /* Get First Error Pointer */ + pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); + info->first_error = PCI_ERR_CAP_FEP(temp); + + if (info->status & AER_LOG_TLP_MASKS) { + info->tlp_header_valid = 1; + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + } + } + + return 1; +} + +static inline void aer_process_err_devices(struct aer_err_info *e_info) +{ + int i; + + /* Report all before handle them, not to lost records by reset etc. */ + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (aer_get_device_error_info(e_info->dev[i], e_info)) + aer_print_error(e_info->dev[i], e_info); + } + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (aer_get_device_error_info(e_info->dev[i], e_info)) + handle_error_source(e_info->dev[i], e_info); + } +} + +/** + * aer_isr_one_error - consume an error detected by root port + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to an error source + */ +static void aer_isr_one_error(struct aer_rpc *rpc, + struct aer_err_source *e_src) +{ + struct pci_dev *pdev = rpc->rpd; + struct aer_err_info *e_info = &rpc->e_info; + + pci_rootport_aer_stats_incr(pdev, e_src); + + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + if (e_src->status & PCI_ERR_ROOT_COR_RCV) { + e_info->id = ERR_COR_ID(e_src->id); + e_info->severity = AER_CORRECTABLE; + + if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) + e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; + aer_print_port_info(pdev, e_info); + + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); + } + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + e_info->id = ERR_UNCOR_ID(e_src->id); + + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + e_info->severity = AER_FATAL; + else + e_info->severity = AER_NONFATAL; + + if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) + e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; + + aer_print_port_info(pdev, e_info); + + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); + } +} + +/** + * get_e_source - retrieve an error source + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to store retrieved error source + * + * Return 1 if an error source is retrieved, otherwise 0. + * + * Invoked by DPC handler to consume an error. + */ +static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) +{ + unsigned long flags; + + /* Lock access to Root error producer/consumer index */ + spin_lock_irqsave(&rpc->e_lock, flags); + if (rpc->prod_idx == rpc->cons_idx) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return 0; + } + + *e_src = rpc->e_sources[rpc->cons_idx]; + rpc->cons_idx++; + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) + rpc->cons_idx = 0; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + return 1; +} + +/** + * aer_isr - consume errors detected by root port + * @work: definition of this work item + * + * Invoked, as DPC, when root port records new detected error + */ +static void aer_isr(struct work_struct *work) +{ + struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); + struct aer_err_source uninitialized_var(e_src); + + mutex_lock(&rpc->rpc_mutex); + while (get_e_source(rpc, &e_src)) + aer_isr_one_error(rpc, &e_src); + mutex_unlock(&rpc->rpc_mutex); +} + +/** + * aer_irq - Root Port's ISR + * @irq: IRQ assigned to Root Port + * @context: pointer to Root Port data structure + * + * Invoked when Root Port detects AER messages. + */ +irqreturn_t aer_irq(int irq, void *context) +{ + unsigned int status, id; + struct pcie_device *pdev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(pdev); + int next_prod_idx; + unsigned long flags; + int pos; + + pos = pdev->port->aer_cap; + /* + * Must lock access to Root Error Status Reg, Root Error ID Reg, + * and Root error producer/consumer index + */ + spin_lock_irqsave(&rpc->e_lock, flags); + + /* Read error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); + if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_NONE; + } + + /* Read error source and clear error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id); + pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); + + /* Store error source for later DPC handler */ + next_prod_idx = rpc->prod_idx + 1; + if (next_prod_idx == AER_ERROR_SOURCES_MAX) + next_prod_idx = 0; + if (next_prod_idx == rpc->cons_idx) { + /* + * Error Storm Condition - possibly the same error occurred. + * Drop the error. + */ + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_HANDLED; + } + rpc->e_sources[rpc->prod_idx].status = status; + rpc->e_sources[rpc->prod_idx].id = id; + rpc->prod_idx = next_prod_idx; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + /* Invoke DPC handler */ + schedule_work(&rpc->dpc_handler); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(aer_irq); + +static int set_device_error_reporting(struct pci_dev *dev, void *data) +{ + bool enable = *((bool *)data); + int type = pci_pcie_type(dev); + + if ((type == PCI_EXP_TYPE_ROOT_PORT) || + (type == PCI_EXP_TYPE_UPSTREAM) || + (type == PCI_EXP_TYPE_DOWNSTREAM)) { + if (enable) + pci_enable_pcie_error_reporting(dev); + else + pci_disable_pcie_error_reporting(dev); + } + + if (enable) + pcie_set_ecrc_checking(dev); + + return 0; +} + +/** + * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. + * @dev: pointer to root port's pci_dev data structure + * @enable: true = enable error reporting, false = disable error reporting. + */ +static void set_downstream_devices_error_reporting(struct pci_dev *dev, + bool enable) +{ + set_device_error_reporting(dev, &enable); + + if (!dev->subordinate) + return; + pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); +} + +/** + * aer_enable_rootport - enable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus loads AER service driver. + */ +static void aer_enable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + int aer_pos; + u16 reg16; + u32 reg32; + + /* Clear PCIe Capability's Device Status */ + pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); + + /* Disable system error generation in response to error messages */ + pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, + SYSTEM_ERROR_INTR_ON_MESG_MASK); + + aer_pos = pdev->aer_cap; + /* Clear error status */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); + + /* + * Enable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, true); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); +} + +/** + * aer_disable_rootport - disable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus unloads AER service driver. + */ +static void aer_disable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + u32 reg32; + int pos; + + /* + * Disable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, false); + + pos = pdev->aer_cap; + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + /* Clear Root's error status reg */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); +} + +/** + * aer_alloc_rpc - allocate Root Port data structure + * @dev: pointer to the pcie_dev data structure + * + * Invoked when Root Port's AER service is loaded. + */ +static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) +{ + struct aer_rpc *rpc; + + rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL); + if (!rpc) + return NULL; + + /* Initialize Root lock access, e_lock, to Root Error Status Reg */ + spin_lock_init(&rpc->e_lock); + + rpc->rpd = dev->port; + INIT_WORK(&rpc->dpc_handler, aer_isr); + mutex_init(&rpc->rpc_mutex); + + /* Use PCIe bus function to store rpc into PCIe device */ + set_service_data(dev, rpc); + + return rpc; +} + +/** + * aer_remove - clean up resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus unloads or AER probe fails. + */ +static void aer_remove(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + if (rpc) { + /* If register interrupt service, it must be free. */ + if (rpc->isr) + free_irq(dev->irq, dev); + + flush_work(&rpc->dpc_handler); + aer_disable_rootport(rpc); + kfree(rpc); + set_service_data(dev, NULL); + } +} + +/** + * aer_probe - initialize resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus loads AER service driver. + */ +static int aer_probe(struct pcie_device *dev) +{ + int status; + struct aer_rpc *rpc; + struct device *device = &dev->port->dev; + + /* Alloc rpc data structure */ + rpc = aer_alloc_rpc(dev); + if (!rpc) { + dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n"); + aer_remove(dev); + return -ENOMEM; + } + + /* Request IRQ ISR */ + status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev); + if (status) { + dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n", + dev->irq); + aer_remove(dev); + return status; + } + + rpc->isr = 1; + + aer_enable_rootport(rpc); + dev_info(device, "AER enabled with IRQ %d\n", dev->irq); + return 0; +} + +/** + * aer_root_reset - reset link on Root Port + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver when performing link reset at Root Port. + */ +static pci_ers_result_t aer_root_reset(struct pci_dev *dev) +{ + u32 reg32; + int pos; + int rc; + + pos = dev->aer_cap; + + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + rc = pci_bus_error_reset(dev); + pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); + + /* Clear Root Error Status */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +} + +/** + * aer_error_resume - clean up corresponding error status bits + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver during nonfatal recovery. + */ +static void aer_error_resume(struct pci_dev *dev) +{ + pci_aer_clear_device_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev); +} + +static struct pcie_port_service_driver aerdriver = { + .name = "aer", + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_AER, + + .probe = aer_probe, + .remove = aer_remove, + .error_resume = aer_error_resume, + .reset_link = aer_root_reset, +}; + +/** + * aer_service_init - register AER root service driver + * + * Invoked when AER root service driver is loaded. + */ +int __init pcie_aer_init(void) +{ + if (!pci_aer_available() || aer_acpi_firmware_first()) + return -ENXIO; + return pcie_port_service_register(&aerdriver); +} diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c new file mode 100644 index 000000000..0eb24346c --- /dev/null +++ b/drivers/pci/pcie/aer_inject.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe AER software error injection support. + * + * Debuging PCIe AER code is quite difficult because it is hard to + * trigger various real hardware errors. Software based error + * injection can fake almost all kinds of errors with the help of a + * user space helper tool aer-inject, which can be gotten from: + * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ + * + * Copyright 2009 Intel Corporation. + * Huang Ying <ying.huang@intel.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/stddef.h> +#include <linux/device.h> + +#include "portdrv.h" + +/* Override the existing corrected and uncorrected error masks */ +static bool aer_mask_override; +module_param(aer_mask_override, bool, 0); + +struct aer_error_inj { + u8 bus; + u8 dev; + u8 fn; + u32 uncor_status; + u32 cor_status; + u32 header_log0; + u32 header_log1; + u32 header_log2; + u32 header_log3; + u32 domain; +}; + +struct aer_error { + struct list_head list; + u32 domain; + unsigned int bus; + unsigned int devfn; + int pos_cap_err; + + u32 uncor_status; + u32 cor_status; + u32 header_log0; + u32 header_log1; + u32 header_log2; + u32 header_log3; + u32 root_status; + u32 source_id; +}; + +struct pci_bus_ops { + struct list_head list; + struct pci_bus *bus; + struct pci_ops *ops; +}; + +static LIST_HEAD(einjected); + +static LIST_HEAD(pci_bus_ops_list); + +/* Protect einjected and pci_bus_ops_list */ +static DEFINE_SPINLOCK(inject_lock); + +static void aer_error_init(struct aer_error *err, u32 domain, + unsigned int bus, unsigned int devfn, + int pos_cap_err) +{ + INIT_LIST_HEAD(&err->list); + err->domain = domain; + err->bus = bus; + err->devfn = devfn; + err->pos_cap_err = pos_cap_err; +} + +/* inject_lock must be held before calling */ +static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, + unsigned int devfn) +{ + struct aer_error *err; + + list_for_each_entry(err, &einjected, list) { + if (domain == err->domain && + bus == err->bus && + devfn == err->devfn) + return err; + } + return NULL; +} + +/* inject_lock must be held before calling */ +static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) +{ + int domain = pci_domain_nr(dev->bus); + if (domain < 0) + return NULL; + return __find_aer_error(domain, dev->bus->number, dev->devfn); +} + +/* inject_lock must be held before calling */ +static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) +{ + struct pci_bus_ops *bus_ops; + + list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { + if (bus_ops->bus == bus) + return bus_ops->ops; + } + return NULL; +} + +static struct pci_bus_ops *pci_bus_ops_pop(void) +{ + unsigned long flags; + struct pci_bus_ops *bus_ops; + + spin_lock_irqsave(&inject_lock, flags); + bus_ops = list_first_entry_or_null(&pci_bus_ops_list, + struct pci_bus_ops, list); + if (bus_ops) + list_del(&bus_ops->list); + spin_unlock_irqrestore(&inject_lock, flags); + return bus_ops; +} + +static u32 *find_pci_config_dword(struct aer_error *err, int where, + int *prw1cs) +{ + int rw1cs = 0; + u32 *target = NULL; + + if (err->pos_cap_err == -1) + return NULL; + + switch (where - err->pos_cap_err) { + case PCI_ERR_UNCOR_STATUS: + target = &err->uncor_status; + rw1cs = 1; + break; + case PCI_ERR_COR_STATUS: + target = &err->cor_status; + rw1cs = 1; + break; + case PCI_ERR_HEADER_LOG: + target = &err->header_log0; + break; + case PCI_ERR_HEADER_LOG+4: + target = &err->header_log1; + break; + case PCI_ERR_HEADER_LOG+8: + target = &err->header_log2; + break; + case PCI_ERR_HEADER_LOG+12: + target = &err->header_log3; + break; + case PCI_ERR_ROOT_STATUS: + target = &err->root_status; + rw1cs = 1; + break; + case PCI_ERR_ROOT_ERR_SRC: + target = &err->source_id; + break; + } + if (prw1cs) + *prw1cs = rw1cs; + return target; +} + +static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + u32 *sim; + struct aer_error *err; + unsigned long flags; + struct pci_ops *ops; + struct pci_ops *my_ops; + int domain; + int rv; + + spin_lock_irqsave(&inject_lock, flags); + if (size != sizeof(u32)) + goto out; + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error(domain, bus->number, devfn); + if (!err) + goto out; + + sim = find_pci_config_dword(err, where, NULL); + if (sim) { + *val = *sim; + spin_unlock_irqrestore(&inject_lock, flags); + return 0; + } +out: + ops = __find_pci_bus_ops(bus); + /* + * pci_lock must already be held, so we can directly + * manipulate bus->ops. Many config access functions, + * including pci_generic_config_read() require the original + * bus->ops be installed to function, so temporarily put them + * back. + */ + my_ops = bus->ops; + bus->ops = ops; + rv = ops->read(bus, devfn, where, size, val); + bus->ops = my_ops; + spin_unlock_irqrestore(&inject_lock, flags); + return rv; +} + +static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + u32 *sim; + struct aer_error *err; + unsigned long flags; + int rw1cs; + struct pci_ops *ops; + struct pci_ops *my_ops; + int domain; + int rv; + + spin_lock_irqsave(&inject_lock, flags); + if (size != sizeof(u32)) + goto out; + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error(domain, bus->number, devfn); + if (!err) + goto out; + + sim = find_pci_config_dword(err, where, &rw1cs); + if (sim) { + if (rw1cs) + *sim ^= val; + else + *sim = val; + spin_unlock_irqrestore(&inject_lock, flags); + return 0; + } +out: + ops = __find_pci_bus_ops(bus); + /* + * pci_lock must already be held, so we can directly + * manipulate bus->ops. Many config access functions, + * including pci_generic_config_write() require the original + * bus->ops be installed to function, so temporarily put them + * back. + */ + my_ops = bus->ops; + bus->ops = ops; + rv = ops->write(bus, devfn, where, size, val); + bus->ops = my_ops; + spin_unlock_irqrestore(&inject_lock, flags); + return rv; +} + +static struct pci_ops aer_inj_pci_ops = { + .read = aer_inj_read_config, + .write = aer_inj_write_config, +}; + +static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, + struct pci_bus *bus, + struct pci_ops *ops) +{ + INIT_LIST_HEAD(&bus_ops->list); + bus_ops->bus = bus; + bus_ops->ops = ops; +} + +static int pci_bus_set_aer_ops(struct pci_bus *bus) +{ + struct pci_ops *ops; + struct pci_bus_ops *bus_ops; + unsigned long flags; + + bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + ops = pci_bus_set_ops(bus, &aer_inj_pci_ops); + spin_lock_irqsave(&inject_lock, flags); + if (ops == &aer_inj_pci_ops) + goto out; + pci_bus_ops_init(bus_ops, bus, ops); + list_add(&bus_ops->list, &pci_bus_ops_list); + bus_ops = NULL; +out: + spin_unlock_irqrestore(&inject_lock, flags); + kfree(bus_ops); + return 0; +} + +static int find_aer_device_iter(struct device *device, void *data) +{ + struct pcie_device **result = data; + struct pcie_device *pcie_dev; + + if (device->bus == &pcie_port_bus_type) { + pcie_dev = to_pcie_device(device); + if (pcie_dev->service & PCIE_PORT_SERVICE_AER) { + *result = pcie_dev; + return 1; + } + } + return 0; +} + +static int find_aer_device(struct pci_dev *dev, struct pcie_device **result) +{ + return device_for_each_child(&dev->dev, result, find_aer_device_iter); +} + +static int aer_inject(struct aer_error_inj *einj) +{ + struct aer_error *err, *rperr; + struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; + struct pci_dev *dev, *rpdev; + struct pcie_device *edev; + unsigned long flags; + unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); + int pos_cap_err, rp_pos_cap_err; + u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; + int ret = 0; + + dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn); + if (!dev) + return -ENODEV; + rpdev = pcie_find_root_port(dev); + if (!rpdev) { + pci_err(dev, "aer_inject: Root port not found\n"); + ret = -ENODEV; + goto out_put; + } + + pos_cap_err = dev->aer_cap; + if (!pos_cap_err) { + pci_err(dev, "aer_inject: Device doesn't support AER\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + &uncor_mask); + + rp_pos_cap_err = rpdev->aer_cap; + if (!rp_pos_cap_err) { + pci_err(rpdev, "aer_inject: Root port doesn't support AER\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + + err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); + if (!err_alloc) { + ret = -ENOMEM; + goto out_put; + } + rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); + if (!rperr_alloc) { + ret = -ENOMEM; + goto out_put; + } + + if (aer_mask_override) { + cor_mask_orig = cor_mask; + cor_mask &= !(einj->cor_status); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, + cor_mask); + + uncor_mask_orig = uncor_mask; + uncor_mask &= !(einj->uncor_status); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + uncor_mask); + } + + spin_lock_irqsave(&inject_lock, flags); + + err = __find_aer_error_by_dev(dev); + if (!err) { + err = err_alloc; + err_alloc = NULL; + aer_error_init(err, einj->domain, einj->bus, devfn, + pos_cap_err); + list_add(&err->list, &einjected); + } + err->uncor_status |= einj->uncor_status; + err->cor_status |= einj->cor_status; + err->header_log0 = einj->header_log0; + err->header_log1 = einj->header_log1; + err->header_log2 = einj->header_log2; + err->header_log3 = einj->header_log3; + + if (!aer_mask_override && einj->cor_status && + !(einj->cor_status & ~cor_mask)) { + ret = -EINVAL; + pci_warn(dev, "aer_inject: The correctable error(s) is masked by device\n"); + spin_unlock_irqrestore(&inject_lock, flags); + goto out_put; + } + if (!aer_mask_override && einj->uncor_status && + !(einj->uncor_status & ~uncor_mask)) { + ret = -EINVAL; + pci_warn(dev, "aer_inject: The uncorrectable error(s) is masked by device\n"); + spin_unlock_irqrestore(&inject_lock, flags); + goto out_put; + } + + rperr = __find_aer_error_by_dev(rpdev); + if (!rperr) { + rperr = rperr_alloc; + rperr_alloc = NULL; + aer_error_init(rperr, pci_domain_nr(rpdev->bus), + rpdev->bus->number, rpdev->devfn, + rp_pos_cap_err); + list_add(&rperr->list, &einjected); + } + if (einj->cor_status) { + if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) + rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; + else + rperr->root_status |= PCI_ERR_ROOT_COR_RCV; + rperr->source_id &= 0xffff0000; + rperr->source_id |= (einj->bus << 8) | devfn; + } + if (einj->uncor_status) { + if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) + rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; + if (sever & einj->uncor_status) { + rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; + if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) + rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; + } else + rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; + rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; + rperr->source_id &= 0x0000ffff; + rperr->source_id |= ((einj->bus << 8) | devfn) << 16; + } + spin_unlock_irqrestore(&inject_lock, flags); + + if (aer_mask_override) { + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, + cor_mask_orig); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + uncor_mask_orig); + } + + ret = pci_bus_set_aer_ops(dev->bus); + if (ret) + goto out_put; + ret = pci_bus_set_aer_ops(rpdev->bus); + if (ret) + goto out_put; + + if (find_aer_device(rpdev, &edev)) { + if (!get_service_data(edev)) { + dev_warn(&edev->device, + "aer_inject: AER service is not initialized\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + dev_info(&edev->device, + "aer_inject: Injecting errors %08x/%08x into device %s\n", + einj->cor_status, einj->uncor_status, pci_name(dev)); + aer_irq(-1, edev); + } else { + pci_err(rpdev, "aer_inject: AER device not found\n"); + ret = -ENODEV; + } +out_put: + kfree(err_alloc); + kfree(rperr_alloc); + pci_dev_put(dev); + return ret; +} + +static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off) +{ + struct aer_error_inj einj; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (usize < offsetof(struct aer_error_inj, domain) || + usize > sizeof(einj)) + return -EINVAL; + + memset(&einj, 0, sizeof(einj)); + if (copy_from_user(&einj, ubuf, usize)) + return -EFAULT; + + ret = aer_inject(&einj); + return ret ? ret : usize; +} + +static const struct file_operations aer_inject_fops = { + .write = aer_inject_write, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice aer_inject_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "aer_inject", + .fops = &aer_inject_fops, +}; + +static int __init aer_inject_init(void) +{ + return misc_register(&aer_inject_device); +} + +static void __exit aer_inject_exit(void) +{ + struct aer_error *err, *err_next; + unsigned long flags; + struct pci_bus_ops *bus_ops; + + misc_deregister(&aer_inject_device); + + while ((bus_ops = pci_bus_ops_pop())) { + pci_bus_set_ops(bus_ops->bus, bus_ops->ops); + kfree(bus_ops); + } + + spin_lock_irqsave(&inject_lock, flags); + list_for_each_entry_safe(err, err_next, &einjected, list) { + list_del(&err->list); + kfree(err); + } + spin_unlock_irqrestore(&inject_lock, flags); +} + +module_init(aer_inject_init); +module_exit(aer_inject_exit); + +MODULE_DESCRIPTION("PCIe AER software error injector"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c new file mode 100644 index 000000000..279f9f019 --- /dev/null +++ b/drivers/pci/pcie/aspm.c @@ -0,0 +1,1313 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Enable PCIe link L0s/L1 state and Clock Power Management + * + * Copyright (C) 2007 Intel + * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com) + * Copyright (C) Shaohua Li (shaohua.li@intel.com) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/delay.h> +#include <linux/pci-aspm.h> +#include "../pci.h" + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "pcie_aspm." + +/* Note: those are not register definitions */ +#define ASPM_STATE_L0S_UP (1) /* Upstream direction L0s state */ +#define ASPM_STATE_L0S_DW (2) /* Downstream direction L0s state */ +#define ASPM_STATE_L1 (4) /* L1 state */ +#define ASPM_STATE_L1_1 (8) /* ASPM L1.1 state */ +#define ASPM_STATE_L1_2 (0x10) /* ASPM L1.2 state */ +#define ASPM_STATE_L1_1_PCIPM (0x20) /* PCI PM L1.1 state */ +#define ASPM_STATE_L1_2_PCIPM (0x40) /* PCI PM L1.2 state */ +#define ASPM_STATE_L1_SS_PCIPM (ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM) +#define ASPM_STATE_L1_2_MASK (ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM) +#define ASPM_STATE_L1SS (ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\ + ASPM_STATE_L1_2_MASK) +#define ASPM_STATE_L0S (ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW) +#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \ + ASPM_STATE_L1SS) + +struct aspm_latency { + u32 l0s; /* L0s latency (nsec) */ + u32 l1; /* L1 latency (nsec) */ +}; + +struct pcie_link_state { + struct pci_dev *pdev; /* Upstream component of the Link */ + struct pci_dev *downstream; /* Downstream component, function 0 */ + struct pcie_link_state *root; /* pointer to the root port link */ + struct pcie_link_state *parent; /* pointer to the parent Link state */ + struct list_head sibling; /* node in link_list */ + struct list_head children; /* list of child link states */ + struct list_head link; /* node in parent's children list */ + + /* ASPM state */ + u32 aspm_support:7; /* Supported ASPM state */ + u32 aspm_enabled:7; /* Enabled ASPM state */ + u32 aspm_capable:7; /* Capable ASPM state with latency */ + u32 aspm_default:7; /* Default ASPM state by BIOS */ + u32 aspm_disable:7; /* Disabled ASPM state */ + + /* Clock PM state */ + u32 clkpm_capable:1; /* Clock PM capable? */ + u32 clkpm_enabled:1; /* Current Clock PM state */ + u32 clkpm_default:1; /* Default Clock PM state by BIOS */ + u32 clkpm_disable:1; /* Clock PM disabled */ + + /* Exit latencies */ + struct aspm_latency latency_up; /* Upstream direction exit latency */ + struct aspm_latency latency_dw; /* Downstream direction exit latency */ + /* + * Endpoint acceptable latencies. A pcie downstream port only + * has one slot under it, so at most there are 8 functions. + */ + struct aspm_latency acceptable[8]; + + /* L1 PM Substate info */ + struct { + u32 up_cap_ptr; /* L1SS cap ptr in upstream dev */ + u32 dw_cap_ptr; /* L1SS cap ptr in downstream dev */ + u32 ctl1; /* value to be programmed in ctl1 */ + u32 ctl2; /* value to be programmed in ctl2 */ + } l1ss; +}; + +static int aspm_disabled, aspm_force; +static bool aspm_support_enabled = true; +static DEFINE_MUTEX(aspm_lock); +static LIST_HEAD(link_list); + +#define POLICY_DEFAULT 0 /* BIOS default setting */ +#define POLICY_PERFORMANCE 1 /* high performance */ +#define POLICY_POWERSAVE 2 /* high power saving */ +#define POLICY_POWER_SUPERSAVE 3 /* possibly even more power saving */ + +#ifdef CONFIG_PCIEASPM_PERFORMANCE +static int aspm_policy = POLICY_PERFORMANCE; +#elif defined CONFIG_PCIEASPM_POWERSAVE +static int aspm_policy = POLICY_POWERSAVE; +#elif defined CONFIG_PCIEASPM_POWER_SUPERSAVE +static int aspm_policy = POLICY_POWER_SUPERSAVE; +#else +static int aspm_policy; +#endif + +static const char *policy_str[] = { + [POLICY_DEFAULT] = "default", + [POLICY_PERFORMANCE] = "performance", + [POLICY_POWERSAVE] = "powersave", + [POLICY_POWER_SUPERSAVE] = "powersupersave" +}; + +#define LINK_RETRAIN_TIMEOUT HZ + +static int policy_to_aspm_state(struct pcie_link_state *link) +{ + switch (aspm_policy) { + case POLICY_PERFORMANCE: + /* Disable ASPM and Clock PM */ + return 0; + case POLICY_POWERSAVE: + /* Enable ASPM L0s/L1 */ + return (ASPM_STATE_L0S | ASPM_STATE_L1); + case POLICY_POWER_SUPERSAVE: + /* Enable Everything */ + return ASPM_STATE_ALL; + case POLICY_DEFAULT: + return link->aspm_default; + } + return 0; +} + +static int policy_to_clkpm_state(struct pcie_link_state *link) +{ + switch (aspm_policy) { + case POLICY_PERFORMANCE: + /* Disable ASPM and Clock PM */ + return 0; + case POLICY_POWERSAVE: + case POLICY_POWER_SUPERSAVE: + /* Enable Clock PM */ + return 1; + case POLICY_DEFAULT: + return link->clkpm_default; + } + return 0; +} + +static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) +{ + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + u32 val = enable ? PCI_EXP_LNKCTL_CLKREQ_EN : 0; + + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN, + val); + link->clkpm_enabled = !!enable; +} + +static void pcie_set_clkpm(struct pcie_link_state *link, int enable) +{ + /* + * Don't enable Clock PM if the link is not Clock PM capable + * or Clock PM is disabled + */ + if (!link->clkpm_capable || link->clkpm_disable) + enable = 0; + /* Need nothing if the specified equals to current state */ + if (link->clkpm_enabled == enable) + return; + pcie_set_clkpm_nocheck(link, enable); +} + +static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) +{ + int capable = 1, enabled = 1; + u32 reg32; + u16 reg16; + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + + /* All functions should have the same cap and state, take the worst */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_dword(child, PCI_EXP_LNKCAP, ®32); + if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) { + capable = 0; + enabled = 0; + break; + } + pcie_capability_read_word(child, PCI_EXP_LNKCTL, ®16); + if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN)) + enabled = 0; + } + link->clkpm_enabled = enabled; + link->clkpm_default = enabled; + link->clkpm_capable = capable; + link->clkpm_disable = blacklist ? 1 : 0; +} + +static bool pcie_retrain_link(struct pcie_link_state *link) +{ + struct pci_dev *parent = link->pdev; + unsigned long start_jiffies; + u16 reg16; + + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); + reg16 |= PCI_EXP_LNKCTL_RL; + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); + if (parent->clear_retrain_link) { + /* + * Due to an erratum in some devices the Retrain Link bit + * needs to be cleared again manually to allow the link + * training to succeed. + */ + reg16 &= ~PCI_EXP_LNKCTL_RL; + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); + } + + /* Wait for link training end. Break out after waiting for timeout */ + start_jiffies = jiffies; + for (;;) { + pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_LT)) + break; + if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) + break; + msleep(1); + } + return !(reg16 & PCI_EXP_LNKSTA_LT); +} + +/* + * pcie_aspm_configure_common_clock: check if the 2 ends of a link + * could use common clock. If they are, configure them to use the + * common clock. That will reduce the ASPM state exit latency. + */ +static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) +{ + int same_clock = 1; + u16 reg16, parent_reg, child_reg[8]; + struct pci_dev *child, *parent = link->pdev; + struct pci_bus *linkbus = parent->subordinate; + /* + * All functions of a slot should have the same Slot Clock + * Configuration, so just check one function + */ + child = list_entry(linkbus->devices.next, struct pci_dev, bus_list); + BUG_ON(!pci_is_pcie(child)); + + /* Check downstream component if bit Slot Clock Configuration is 1 */ + pcie_capability_read_word(child, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_SLC)) + same_clock = 0; + + /* Check upstream component if bit Slot Clock Configuration is 1 */ + pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_SLC)) + same_clock = 0; + + /* Port might be already in common clock mode */ + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); + if (same_clock && (reg16 & PCI_EXP_LNKCTL_CCC)) { + bool consistent = true; + + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_word(child, PCI_EXP_LNKCTL, + ®16); + if (!(reg16 & PCI_EXP_LNKCTL_CCC)) { + consistent = false; + break; + } + } + if (consistent) + return; + pci_warn(parent, "ASPM: current common clock configuration is broken, reconfiguring\n"); + } + + /* Configure downstream component, all functions */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_word(child, PCI_EXP_LNKCTL, ®16); + child_reg[PCI_FUNC(child->devfn)] = reg16; + if (same_clock) + reg16 |= PCI_EXP_LNKCTL_CCC; + else + reg16 &= ~PCI_EXP_LNKCTL_CCC; + pcie_capability_write_word(child, PCI_EXP_LNKCTL, reg16); + } + + /* Configure upstream component */ + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); + parent_reg = reg16; + if (same_clock) + reg16 |= PCI_EXP_LNKCTL_CCC; + else + reg16 &= ~PCI_EXP_LNKCTL_CCC; + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); + + if (pcie_retrain_link(link)) + return; + + /* Training failed. Restore common clock configurations */ + pci_err(parent, "ASPM: Could not configure common clock\n"); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_write_word(child, PCI_EXP_LNKCTL, + child_reg[PCI_FUNC(child->devfn)]); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); +} + +/* Convert L0s latency encoding to ns */ +static u32 calc_l0s_latency(u32 encoding) +{ + if (encoding == 0x7) + return (5 * 1000); /* > 4us */ + return (64 << encoding); +} + +/* Convert L0s acceptable latency encoding to ns */ +static u32 calc_l0s_acceptable(u32 encoding) +{ + if (encoding == 0x7) + return -1U; + return (64 << encoding); +} + +/* Convert L1 latency encoding to ns */ +static u32 calc_l1_latency(u32 encoding) +{ + if (encoding == 0x7) + return (65 * 1000); /* > 64us */ + return (1000 << encoding); +} + +/* Convert L1 acceptable latency encoding to ns */ +static u32 calc_l1_acceptable(u32 encoding) +{ + if (encoding == 0x7) + return -1U; + return (1000 << encoding); +} + +/* Convert L1SS T_pwr encoding to usec */ +static u32 calc_l1ss_pwron(struct pci_dev *pdev, u32 scale, u32 val) +{ + switch (scale) { + case 0: + return val * 2; + case 1: + return val * 10; + case 2: + return val * 100; + } + pci_err(pdev, "%s: Invalid T_PwrOn scale: %u\n", __func__, scale); + return 0; +} + +static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value) +{ + u32 threshold_ns = threshold_us * 1000; + + /* See PCIe r3.1, sec 7.33.3 and sec 6.18 */ + if (threshold_ns < 32) { + *scale = 0; + *value = threshold_ns; + } else if (threshold_ns < 1024) { + *scale = 1; + *value = threshold_ns >> 5; + } else if (threshold_ns < 32768) { + *scale = 2; + *value = threshold_ns >> 10; + } else if (threshold_ns < 1048576) { + *scale = 3; + *value = threshold_ns >> 15; + } else if (threshold_ns < 33554432) { + *scale = 4; + *value = threshold_ns >> 20; + } else { + *scale = 5; + *value = threshold_ns >> 25; + } +} + +struct aspm_register_info { + u32 support:2; + u32 enabled:2; + u32 latency_encoding_l0s; + u32 latency_encoding_l1; + + /* L1 substates */ + u32 l1ss_cap_ptr; + u32 l1ss_cap; + u32 l1ss_ctl1; + u32 l1ss_ctl2; +}; + +static void pcie_get_aspm_reg(struct pci_dev *pdev, + struct aspm_register_info *info) +{ + u16 reg16; + u32 reg32; + + pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, ®32); + info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10; + info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12; + info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15; + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, ®16); + info->enabled = reg16 & PCI_EXP_LNKCTL_ASPMC; + + /* Read L1 PM substate capabilities */ + info->l1ss_cap = info->l1ss_ctl1 = info->l1ss_ctl2 = 0; + info->l1ss_cap_ptr = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); + if (!info->l1ss_cap_ptr) + return; + pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CAP, + &info->l1ss_cap); + if (!(info->l1ss_cap & PCI_L1SS_CAP_L1_PM_SS)) { + info->l1ss_cap = 0; + return; + } + + /* + * If we don't have LTR for the entire path from the Root Complex + * to this device, we can't use ASPM L1.2 because it relies on the + * LTR_L1.2_THRESHOLD. See PCIe r4.0, secs 5.5.4, 6.18. + */ + if (!pdev->ltr_path) + info->l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2; + + pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL1, + &info->l1ss_ctl1); + pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL2, + &info->l1ss_ctl2); +} + +static void pcie_aspm_check_latency(struct pci_dev *endpoint) +{ + u32 latency, l1_switch_latency = 0; + struct aspm_latency *acceptable; + struct pcie_link_state *link; + + /* Device not in D0 doesn't need latency check */ + if ((endpoint->current_state != PCI_D0) && + (endpoint->current_state != PCI_UNKNOWN)) + return; + + link = endpoint->bus->self->link_state; + acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)]; + + while (link) { + /* Check upstream direction L0s latency */ + if ((link->aspm_capable & ASPM_STATE_L0S_UP) && + (link->latency_up.l0s > acceptable->l0s)) + link->aspm_capable &= ~ASPM_STATE_L0S_UP; + + /* Check downstream direction L0s latency */ + if ((link->aspm_capable & ASPM_STATE_L0S_DW) && + (link->latency_dw.l0s > acceptable->l0s)) + link->aspm_capable &= ~ASPM_STATE_L0S_DW; + /* + * Check L1 latency. + * Every switch on the path to root complex need 1 + * more microsecond for L1. Spec doesn't mention L0s. + * + * The exit latencies for L1 substates are not advertised + * by a device. Since the spec also doesn't mention a way + * to determine max latencies introduced by enabling L1 + * substates on the components, it is not clear how to do + * a L1 substate exit latency check. We assume that the + * L1 exit latencies advertised by a device include L1 + * substate latencies (and hence do not do any check). + */ + latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1); + if ((link->aspm_capable & ASPM_STATE_L1) && + (latency + l1_switch_latency > acceptable->l1)) + link->aspm_capable &= ~ASPM_STATE_L1; + l1_switch_latency += 1000; + + link = link->parent; + } +} + +/* + * The L1 PM substate capability is only implemented in function 0 in a + * multi function device. + */ +static struct pci_dev *pci_function_0(struct pci_bus *linkbus) +{ + struct pci_dev *child; + + list_for_each_entry(child, &linkbus->devices, bus_list) + if (PCI_FUNC(child->devfn) == 0) + return child; + return NULL; +} + +/* Calculate L1.2 PM substate timing parameters */ +static void aspm_calc_l1ss_info(struct pcie_link_state *link, + struct aspm_register_info *upreg, + struct aspm_register_info *dwreg) +{ + u32 val1, val2, scale1, scale2; + u32 t_common_mode, t_power_on, l1_2_threshold, scale, value; + + link->l1ss.up_cap_ptr = upreg->l1ss_cap_ptr; + link->l1ss.dw_cap_ptr = dwreg->l1ss_cap_ptr; + link->l1ss.ctl1 = link->l1ss.ctl2 = 0; + + if (!(link->aspm_support & ASPM_STATE_L1_2_MASK)) + return; + + /* Choose the greater of the two Port Common_Mode_Restore_Times */ + val1 = (upreg->l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8; + val2 = (dwreg->l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8; + t_common_mode = max(val1, val2); + + /* Choose the greater of the two Port T_POWER_ON times */ + val1 = (upreg->l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19; + scale1 = (upreg->l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16; + val2 = (dwreg->l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19; + scale2 = (dwreg->l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16; + + if (calc_l1ss_pwron(link->pdev, scale1, val1) > + calc_l1ss_pwron(link->downstream, scale2, val2)) { + link->l1ss.ctl2 |= scale1 | (val1 << 3); + t_power_on = calc_l1ss_pwron(link->pdev, scale1, val1); + } else { + link->l1ss.ctl2 |= scale2 | (val2 << 3); + t_power_on = calc_l1ss_pwron(link->downstream, scale2, val2); + } + + /* + * Set LTR_L1.2_THRESHOLD to the time required to transition the + * Link from L0 to L1.2 and back to L0 so we enter L1.2 only if + * downstream devices report (via LTR) that they can tolerate at + * least that much latency. + * + * Based on PCIe r3.1, sec 5.5.3.3.1, Figures 5-16 and 5-17, and + * Table 5-11. T(POWER_OFF) is at most 2us and T(L1.2) is at + * least 4us. + */ + l1_2_threshold = 2 + 4 + t_common_mode + t_power_on; + encode_l12_threshold(l1_2_threshold, &scale, &value); + link->l1ss.ctl1 |= t_common_mode << 8 | scale << 29 | value << 16; +} + +static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) +{ + struct pci_dev *child = link->downstream, *parent = link->pdev; + struct pci_bus *linkbus = parent->subordinate; + struct aspm_register_info upreg, dwreg; + + if (blacklist) { + /* Set enabled/disable so that we will disable ASPM later */ + link->aspm_enabled = ASPM_STATE_ALL; + link->aspm_disable = ASPM_STATE_ALL; + return; + } + + /* Get upstream/downstream components' register state */ + pcie_get_aspm_reg(parent, &upreg); + pcie_get_aspm_reg(child, &dwreg); + + /* + * If ASPM not supported, don't mess with the clocks and link, + * bail out now. + */ + if (!(upreg.support & dwreg.support)) + return; + + /* Configure common clock before checking latencies */ + pcie_aspm_configure_common_clock(link); + + /* + * Re-read upstream/downstream components' register state + * after clock configuration + */ + pcie_get_aspm_reg(parent, &upreg); + pcie_get_aspm_reg(child, &dwreg); + + /* + * Setup L0s state + * + * Note that we must not enable L0s in either direction on a + * given link unless components on both sides of the link each + * support L0s. + */ + if (dwreg.support & upreg.support & PCIE_LINK_STATE_L0S) + link->aspm_support |= ASPM_STATE_L0S; + if (dwreg.enabled & PCIE_LINK_STATE_L0S) + link->aspm_enabled |= ASPM_STATE_L0S_UP; + if (upreg.enabled & PCIE_LINK_STATE_L0S) + link->aspm_enabled |= ASPM_STATE_L0S_DW; + link->latency_up.l0s = calc_l0s_latency(upreg.latency_encoding_l0s); + link->latency_dw.l0s = calc_l0s_latency(dwreg.latency_encoding_l0s); + + /* Setup L1 state */ + if (upreg.support & dwreg.support & PCIE_LINK_STATE_L1) + link->aspm_support |= ASPM_STATE_L1; + if (upreg.enabled & dwreg.enabled & PCIE_LINK_STATE_L1) + link->aspm_enabled |= ASPM_STATE_L1; + link->latency_up.l1 = calc_l1_latency(upreg.latency_encoding_l1); + link->latency_dw.l1 = calc_l1_latency(dwreg.latency_encoding_l1); + + /* Setup L1 substate */ + if (upreg.l1ss_cap & dwreg.l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1) + link->aspm_support |= ASPM_STATE_L1_1; + if (upreg.l1ss_cap & dwreg.l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2) + link->aspm_support |= ASPM_STATE_L1_2; + if (upreg.l1ss_cap & dwreg.l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1) + link->aspm_support |= ASPM_STATE_L1_1_PCIPM; + if (upreg.l1ss_cap & dwreg.l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2) + link->aspm_support |= ASPM_STATE_L1_2_PCIPM; + + if (upreg.l1ss_ctl1 & dwreg.l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1) + link->aspm_enabled |= ASPM_STATE_L1_1; + if (upreg.l1ss_ctl1 & dwreg.l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2) + link->aspm_enabled |= ASPM_STATE_L1_2; + if (upreg.l1ss_ctl1 & dwreg.l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1) + link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM; + if (upreg.l1ss_ctl1 & dwreg.l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2) + link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM; + + if (link->aspm_support & ASPM_STATE_L1SS) + aspm_calc_l1ss_info(link, &upreg, &dwreg); + + /* Save default state */ + link->aspm_default = link->aspm_enabled; + + /* Setup initial capable state. Will be updated later */ + link->aspm_capable = link->aspm_support; + + /* Get and check endpoint acceptable latencies */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + u32 reg32, encoding; + struct aspm_latency *acceptable = + &link->acceptable[PCI_FUNC(child->devfn)]; + + if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT && + pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END) + continue; + + pcie_capability_read_dword(child, PCI_EXP_DEVCAP, ®32); + /* Calculate endpoint L0s acceptable latency */ + encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6; + acceptable->l0s = calc_l0s_acceptable(encoding); + /* Calculate endpoint L1 acceptable latency */ + encoding = (reg32 & PCI_EXP_DEVCAP_L1) >> 9; + acceptable->l1 = calc_l1_acceptable(encoding); + + pcie_aspm_check_latency(child); + } +} + +static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos, + u32 clear, u32 set) +{ + u32 val; + + pci_read_config_dword(pdev, pos, &val); + val &= ~clear; + val |= set; + pci_write_config_dword(pdev, pos, val); +} + +/* Configure the ASPM L1 substates */ +static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) +{ + u32 val, enable_req; + struct pci_dev *child = link->downstream, *parent = link->pdev; + u32 up_cap_ptr = link->l1ss.up_cap_ptr; + u32 dw_cap_ptr = link->l1ss.dw_cap_ptr; + + enable_req = (link->aspm_enabled ^ state) & state; + + /* + * Here are the rules specified in the PCIe spec for enabling L1SS: + * - When enabling L1.x, enable bit at parent first, then at child + * - When disabling L1.x, disable bit at child first, then at parent + * - When enabling ASPM L1.x, need to disable L1 + * (at child followed by parent). + * - The ASPM/PCIPM L1.2 must be disabled while programming timing + * parameters + * + * To keep it simple, disable all L1SS bits first, and later enable + * what is needed. + */ + + /* Disable all L1 substates */ + pci_clear_and_set_dword(child, dw_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + /* + * If needed, disable L1, and it gets enabled later + * in pcie_config_aspm_link(). + */ + if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1, 0); + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1, 0); + } + + if (enable_req & ASPM_STATE_L1_2_MASK) { + + /* Program T_POWER_ON times in both ports */ + pci_write_config_dword(parent, up_cap_ptr + PCI_L1SS_CTL2, + link->l1ss.ctl2); + pci_write_config_dword(child, dw_cap_ptr + PCI_L1SS_CTL2, + link->l1ss.ctl2); + + /* Program Common_Mode_Restore_Time in upstream device */ + pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_CM_RESTORE_TIME, + link->l1ss.ctl1); + + /* Program LTR_L1.2_THRESHOLD time in both ports */ + pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, + link->l1ss.ctl1); + pci_clear_and_set_dword(child, dw_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, + link->l1ss.ctl1); + } + + val = 0; + if (state & ASPM_STATE_L1_1) + val |= PCI_L1SS_CTL1_ASPM_L1_1; + if (state & ASPM_STATE_L1_2) + val |= PCI_L1SS_CTL1_ASPM_L1_2; + if (state & ASPM_STATE_L1_1_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_1; + if (state & ASPM_STATE_L1_2_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_2; + + /* Enable what we need to enable */ + pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); + pci_clear_and_set_dword(child, dw_cap_ptr + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); +} + +static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) +{ + pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, val); +} + +static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) +{ + u32 upstream = 0, dwstream = 0; + struct pci_dev *child = link->downstream, *parent = link->pdev; + struct pci_bus *linkbus = parent->subordinate; + + /* Enable only the states that were not explicitly disabled */ + state &= (link->aspm_capable & ~link->aspm_disable); + + /* Can't enable any substates if L1 is not enabled */ + if (!(state & ASPM_STATE_L1)) + state &= ~ASPM_STATE_L1SS; + + /* Spec says both ports must be in D0 before enabling PCI PM substates*/ + if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) { + state &= ~ASPM_STATE_L1_SS_PCIPM; + state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM); + } + + /* Nothing to do if the link is already in the requested state */ + if (link->aspm_enabled == state) + return; + /* Convert ASPM state to upstream/downstream ASPM register state */ + if (state & ASPM_STATE_L0S_UP) + dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; + if (state & ASPM_STATE_L0S_DW) + upstream |= PCI_EXP_LNKCTL_ASPM_L0S; + if (state & ASPM_STATE_L1) { + upstream |= PCI_EXP_LNKCTL_ASPM_L1; + dwstream |= PCI_EXP_LNKCTL_ASPM_L1; + } + + if (link->aspm_capable & ASPM_STATE_L1SS) + pcie_config_aspm_l1ss(link, state); + + /* + * Spec 2.0 suggests all functions should be configured the + * same setting for ASPM. Enabling ASPM L1 should be done in + * upstream component first and then downstream, and vice + * versa for disabling ASPM L1. Spec doesn't mention L0S. + */ + if (state & ASPM_STATE_L1) + pcie_config_aspm_dev(parent, upstream); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_config_aspm_dev(child, dwstream); + if (!(state & ASPM_STATE_L1)) + pcie_config_aspm_dev(parent, upstream); + + link->aspm_enabled = state; +} + +static void pcie_config_aspm_path(struct pcie_link_state *link) +{ + while (link) { + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + link = link->parent; + } +} + +static void free_link_state(struct pcie_link_state *link) +{ + link->pdev->link_state = NULL; + kfree(link); +} + +static int pcie_aspm_sanity_check(struct pci_dev *pdev) +{ + struct pci_dev *child; + u32 reg32; + + /* + * Some functions in a slot might not all be PCIe functions, + * very strange. Disable ASPM for the whole slot + */ + list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { + if (!pci_is_pcie(child)) + return -EINVAL; + + /* + * If ASPM is disabled then we're not going to change + * the BIOS state. It's safe to continue even if it's a + * pre-1.1 device + */ + + if (aspm_disabled) + continue; + + /* + * Disable ASPM for pre-1.1 PCIe device, we follow MS to use + * RBER bit to determine if a function is 1.1 version device + */ + pcie_capability_read_dword(child, PCI_EXP_DEVCAP, ®32); + if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) { + pci_info(child, "disabling ASPM on pre-1.1 PCIe device. You can enable it with 'pcie_aspm=force'\n"); + return -EINVAL; + } + } + return 0; +} + +static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) +{ + struct pcie_link_state *link; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return NULL; + + INIT_LIST_HEAD(&link->sibling); + INIT_LIST_HEAD(&link->children); + INIT_LIST_HEAD(&link->link); + link->pdev = pdev; + link->downstream = pci_function_0(pdev->subordinate); + + /* + * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe + * hierarchies. Note that some PCIe host implementations omit + * the root ports entirely, in which case a downstream port on + * a switch may become the root of the link state chain for all + * its subordinate endpoints. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE || + !pdev->bus->parent->self) { + link->root = link; + } else { + struct pcie_link_state *parent; + + parent = pdev->bus->parent->self->link_state; + if (!parent) { + kfree(link); + return NULL; + } + + link->parent = parent; + link->root = link->parent->root; + list_add(&link->link, &parent->children); + } + + list_add(&link->sibling, &link_list); + pdev->link_state = link; + return link; +} + +/* + * pcie_aspm_init_link_state: Initiate PCI express link state. + * It is called after the pcie and its children devices are scanned. + * @pdev: the root port or switch downstream port + */ +void pcie_aspm_init_link_state(struct pci_dev *pdev) +{ + struct pcie_link_state *link; + int blacklist = !!pcie_aspm_sanity_check(pdev); + + if (!aspm_support_enabled) + return; + + if (pdev->link_state) + return; + + /* + * We allocate pcie_link_state for the component on the upstream + * end of a Link, so there's nothing to do unless this device has a + * Link on its secondary side. + */ + if (!pdev->has_secondary_link) + return; + + /* VIA has a strange chipset, root port is under a bridge */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT && + pdev->bus->self) + return; + + down_read(&pci_bus_sem); + if (list_empty(&pdev->subordinate->devices)) + goto out; + + mutex_lock(&aspm_lock); + link = alloc_pcie_link_state(pdev); + if (!link) + goto unlock; + /* + * Setup initial ASPM state. Note that we need to configure + * upstream links also because capable state of them can be + * update through pcie_aspm_cap_init(). + */ + pcie_aspm_cap_init(link, blacklist); + + /* Setup initial Clock PM state */ + pcie_clkpm_cap_init(link, blacklist); + + /* + * At this stage drivers haven't had an opportunity to change the + * link policy setting. Enabling ASPM on broken hardware can cripple + * it even before the driver has had a chance to disable ASPM, so + * default to a safe level right now. If we're enabling ASPM beyond + * the BIOS's expectation, we'll do so once pci_enable_device() is + * called. + */ + if (aspm_policy != POLICY_POWERSAVE && + aspm_policy != POLICY_POWER_SUPERSAVE) { + pcie_config_aspm_path(link); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + } + +unlock: + mutex_unlock(&aspm_lock); +out: + up_read(&pci_bus_sem); +} + +/* Recheck latencies and update aspm_capable for links under the root */ +static void pcie_update_aspm_capable(struct pcie_link_state *root) +{ + struct pcie_link_state *link; + BUG_ON(root->parent); + list_for_each_entry(link, &link_list, sibling) { + if (link->root != root) + continue; + link->aspm_capable = link->aspm_support; + } + list_for_each_entry(link, &link_list, sibling) { + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + if (link->root != root) + continue; + list_for_each_entry(child, &linkbus->devices, bus_list) { + if ((pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT) && + (pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)) + continue; + pcie_aspm_check_latency(child); + } + } +} + +/* @pdev: the endpoint device */ +void pcie_aspm_exit_link_state(struct pci_dev *pdev) +{ + struct pci_dev *parent = pdev->bus->self; + struct pcie_link_state *link, *root, *parent_link; + + if (!parent || !parent->link_state) + return; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + /* + * All PCIe functions are in one slot, remove one function will remove + * the whole slot, so just wait until we are the last function left. + */ + if (!list_empty(&parent->subordinate->devices)) + goto out; + + link = parent->link_state; + root = link->root; + parent_link = link->parent; + + /* All functions are removed, so just disable ASPM for the link */ + pcie_config_aspm_link(link, 0); + list_del(&link->sibling); + list_del(&link->link); + /* Clock PM is for endpoint device */ + free_link_state(link); + + /* Recheck latencies and configure upstream links */ + if (parent_link) { + pcie_update_aspm_capable(root); + pcie_config_aspm_path(parent_link); + } +out: + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +/* @pdev: the root port or switch downstream port */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev) +{ + struct pcie_link_state *link = pdev->link_state; + + if (aspm_disabled || !link) + return; + /* + * Devices changed PM state, we should recheck if latency + * meets all functions' requirement + */ + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + pcie_update_aspm_capable(link->root); + pcie_config_aspm_path(link); + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +void pcie_aspm_powersave_config_link(struct pci_dev *pdev) +{ + struct pcie_link_state *link = pdev->link_state; + + if (aspm_disabled || !link) + return; + + if (aspm_policy != POLICY_POWERSAVE && + aspm_policy != POLICY_POWER_SUPERSAVE) + return; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + pcie_config_aspm_path(link); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) +{ + struct pci_dev *parent = pdev->bus->self; + struct pcie_link_state *link; + + if (!pci_is_pcie(pdev)) + return; + + if (pdev->has_secondary_link) + parent = pdev; + if (!parent || !parent->link_state) + return; + + /* + * A driver requested that ASPM be disabled on this device, but + * if we don't have permission to manage ASPM (e.g., on ACPI + * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and + * the _OSC method), we can't honor that request. Windows has + * a similar mechanism using "PciASPMOptOut", which is also + * ignored in this situation. + */ + if (aspm_disabled) { + pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); + return; + } + + if (sem) + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + link = parent->link_state; + if (state & PCIE_LINK_STATE_L0S) + link->aspm_disable |= ASPM_STATE_L0S; + if (state & PCIE_LINK_STATE_L1) + link->aspm_disable |= ASPM_STATE_L1; + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + + if (state & PCIE_LINK_STATE_CLKPM) + link->clkpm_disable = 1; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + mutex_unlock(&aspm_lock); + if (sem) + up_read(&pci_bus_sem); +} + +void pci_disable_link_state_locked(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_disable_link_state_locked); + +/** + * pci_disable_link_state - Disable device's link state, so the link will + * never enter specific states. Note that if the BIOS didn't grant ASPM + * control to the OS, this does nothing because we can't touch the LNKCTL + * register. + * + * @pdev: PCI device + * @state: ASPM link state to disable + */ +void pci_disable_link_state(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_disable_link_state); + +static int pcie_aspm_set_policy(const char *val, + const struct kernel_param *kp) +{ + int i; + struct pcie_link_state *link; + + if (aspm_disabled) + return -EPERM; + i = sysfs_match_string(policy_str, val); + if (i < 0) + return i; + if (i == aspm_policy) + return 0; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + aspm_policy = i; + list_for_each_entry(link, &link_list, sibling) { + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + } + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + return 0; +} + +static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp) +{ + int i, cnt = 0; + for (i = 0; i < ARRAY_SIZE(policy_str); i++) + if (i == aspm_policy) + cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]); + else + cnt += sprintf(buffer + cnt, "%s ", policy_str[i]); + cnt += sprintf(buffer + cnt, "\n"); + return cnt; +} + +module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy, + NULL, 0644); + +#ifdef CONFIG_PCIEASPM_DEBUG +static ssize_t link_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pci_device = to_pci_dev(dev); + struct pcie_link_state *link_state = pci_device->link_state; + + return sprintf(buf, "%d\n", link_state->aspm_enabled); +} + +static ssize_t link_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t n) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link, *root = pdev->link_state->root; + u32 state; + + if (aspm_disabled) + return -EPERM; + + if (kstrtouint(buf, 10, &state)) + return -EINVAL; + if ((state & ~ASPM_STATE_ALL) != 0) + return -EINVAL; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + list_for_each_entry(link, &link_list, sibling) { + if (link->root != root) + continue; + pcie_config_aspm_link(link, state); + } + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + return n; +} + +static ssize_t clk_ctl_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pci_device = to_pci_dev(dev); + struct pcie_link_state *link_state = pci_device->link_state; + + return sprintf(buf, "%d\n", link_state->clkpm_enabled); +} + +static ssize_t clk_ctl_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t n) +{ + struct pci_dev *pdev = to_pci_dev(dev); + bool state; + + if (strtobool(buf, &state)) + return -EINVAL; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + pcie_set_clkpm_nocheck(pdev->link_state, state); + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + + return n; +} + +static DEVICE_ATTR_RW(link_state); +static DEVICE_ATTR_RW(clk_ctl); + +static char power_group[] = "power"; +void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) +{ + struct pcie_link_state *link_state = pdev->link_state; + + if (!link_state) + return; + + if (link_state->aspm_support) + sysfs_add_file_to_group(&pdev->dev.kobj, + &dev_attr_link_state.attr, power_group); + if (link_state->clkpm_capable) + sysfs_add_file_to_group(&pdev->dev.kobj, + &dev_attr_clk_ctl.attr, power_group); +} + +void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) +{ + struct pcie_link_state *link_state = pdev->link_state; + + if (!link_state) + return; + + if (link_state->aspm_support) + sysfs_remove_file_from_group(&pdev->dev.kobj, + &dev_attr_link_state.attr, power_group); + if (link_state->clkpm_capable) + sysfs_remove_file_from_group(&pdev->dev.kobj, + &dev_attr_clk_ctl.attr, power_group); +} +#endif + +static int __init pcie_aspm_disable(char *str) +{ + if (!strcmp(str, "off")) { + aspm_policy = POLICY_DEFAULT; + aspm_disabled = 1; + aspm_support_enabled = false; + printk(KERN_INFO "PCIe ASPM is disabled\n"); + } else if (!strcmp(str, "force")) { + aspm_force = 1; + printk(KERN_INFO "PCIe ASPM is forcibly enabled\n"); + } + return 1; +} + +__setup("pcie_aspm=", pcie_aspm_disable); + +void pcie_no_aspm(void) +{ + /* + * Disabling ASPM is intended to prevent the kernel from modifying + * existing hardware state, not to clear existing state. To that end: + * (a) set policy to POLICY_DEFAULT in order to avoid changing state + * (b) prevent userspace from changing policy + */ + if (!aspm_force) { + aspm_policy = POLICY_DEFAULT; + aspm_disabled = 1; + } +} + +bool pcie_aspm_support_enabled(void) +{ + return aspm_support_enabled; +} +EXPORT_SYMBOL(pcie_aspm_support_enabled); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c new file mode 100644 index 000000000..118b5bcae --- /dev/null +++ b/drivers/pci/pcie/dpc.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Express Downstream Port Containment services driver + * Author: Keith Busch <keith.busch@intel.com> + * + * Copyright (C) 2016 Intel Corp. + */ + +#include <linux/aer.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/pci.h> + +#include "portdrv.h" +#include "../pci.h" + +struct dpc_dev { + struct pcie_device *dev; + u16 cap_pos; + bool rp_extensions; + u8 rp_log_size; +}; + +static const char * const rp_pio_error_string[] = { + "Configuration Request received UR Completion", /* Bit Position 0 */ + "Configuration Request received CA Completion", /* Bit Position 1 */ + "Configuration Request Completion Timeout", /* Bit Position 2 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "I/O Request received UR Completion", /* Bit Position 8 */ + "I/O Request received CA Completion", /* Bit Position 9 */ + "I/O Request Completion Timeout", /* Bit Position 10 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "Memory Request received UR Completion", /* Bit Position 16 */ + "Memory Request received CA Completion", /* Bit Position 17 */ + "Memory Request Completion Timeout", /* Bit Position 18 */ +}; + +static int dpc_wait_rp_inactive(struct dpc_dev *dpc) +{ + unsigned long timeout = jiffies + HZ; + struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; + u16 cap = dpc->cap_pos, status; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + while (status & PCI_EXP_DPC_RP_BUSY && + !time_after(jiffies, timeout)) { + msleep(10); + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + } + if (status & PCI_EXP_DPC_RP_BUSY) { + dev_warn(dev, "DPC root port still busy\n"); + return -EBUSY; + } + return 0; +} + +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) +{ + struct dpc_dev *dpc; + struct pcie_device *pciedev; + struct device *devdpc; + + u16 cap; + + /* + * DPC disables the Link automatically in hardware, so it has + * already been reset by the time we get here. + */ + devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC); + pciedev = to_pcie_device(devdpc); + dpc = get_service_data(pciedev); + cap = dpc->cap_pos; + + /* + * Wait until the Link is inactive, then clear DPC Trigger Status + * to allow the Port to leave DPC. + */ + pcie_wait_for_link(pdev, false); + + if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) + return PCI_ERS_RESULT_DISCONNECT; + + pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_TRIGGER); + + return PCI_ERS_RESULT_RECOVERED; +} + + +static void dpc_process_rp_pio_error(struct dpc_dev *dpc) +{ + struct device *dev = &dpc->dev->device; + struct pci_dev *pdev = dpc->dev->port; + u16 cap = dpc->cap_pos, dpc_status, first_error; + u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; + int i; + + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_MASK, &mask); + dev_err(dev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n", + status, mask); + + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc); + dev_err(dev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n", + sev, syserr, exc); + + /* Get First Error Pointer */ + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status); + first_error = (dpc_status & 0x1f00) >> 8; + + for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { + if ((status & ~mask) & (1 << i)) + dev_err(dev, "[%2d] %s%s\n", i, rp_pio_error_string[i], + first_error == i ? " (First)" : ""); + } + + if (dpc->rp_log_size < 4) + goto clear_status; + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + &dw0); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4, + &dw1); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8, + &dw2); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12, + &dw3); + dev_err(dev, "TLP Header: %#010x %#010x %#010x %#010x\n", + dw0, dw1, dw2, dw3); + + if (dpc->rp_log_size < 5) + goto clear_status; + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); + dev_err(dev, "RP PIO ImpSpec Log %#010x\n", log); + + for (i = 0; i < dpc->rp_log_size - 5; i++) { + pci_read_config_dword(pdev, + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + dev_err(dev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); + } + clear_status: + pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); +} + +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, + struct aer_err_info *info) +{ + int pos = dev->aer_cap; + u32 status, mask, sev; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + status &= ~mask; + if (!status) + return 0; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) + info->severity = AER_FATAL; + else + info->severity = AER_NONFATAL; + + return 1; +} + +static irqreturn_t dpc_handler(int irq, void *context) +{ + struct aer_err_info info; + struct dpc_dev *dpc = context; + struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; + u16 cap = dpc->cap_pos, status, source, reason, ext_reason; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); + + dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n", + status, source); + + reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1; + ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5; + dev_warn(dev, "DPC %s detected, remove downstream devices\n", + (reason == 0) ? "unmasked uncorrectable error" : + (reason == 1) ? "ERR_NONFATAL" : + (reason == 2) ? "ERR_FATAL" : + (ext_reason == 0) ? "RP PIO error" : + (ext_reason == 1) ? "software trigger" : + "reserved error"); + + /* show RP PIO error detail information */ + if (dpc->rp_extensions && reason == 3 && ext_reason == 0) + dpc_process_rp_pio_error(dpc); + else if (reason == 0 && + dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(pdev, &info)) { + aer_print_error(pdev, &info); + pci_cleanup_aer_uncorrect_error_status(pdev); + pci_aer_clear_fatal_status(pdev); + } + + /* We configure DPC so it only triggers on ERR_FATAL */ + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC); + + return IRQ_HANDLED; +} + +static irqreturn_t dpc_irq(int irq, void *context) +{ + struct dpc_dev *dpc = (struct dpc_dev *)context; + struct pci_dev *pdev = dpc->dev->port; + u16 cap = dpc->cap_pos, status; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + + if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0)) + return IRQ_NONE; + + pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_INTERRUPT); + if (status & PCI_EXP_DPC_STATUS_TRIGGER) + return IRQ_WAKE_THREAD; + return IRQ_HANDLED; +} + +#define FLAG(x, y) (((x) & (y)) ? '+' : '-') +static int dpc_probe(struct pcie_device *dev) +{ + struct dpc_dev *dpc; + struct pci_dev *pdev = dev->port; + struct device *device = &dev->device; + int status; + u16 ctl, cap; + + if (pcie_aer_get_firmware_first(pdev)) + return -ENOTSUPP; + + dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL); + if (!dpc) + return -ENOMEM; + + dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); + dpc->dev = dev; + set_service_data(dev, dpc); + + status = devm_request_threaded_irq(device, dev->irq, dpc_irq, + dpc_handler, IRQF_SHARED, + "pcie-dpc", dpc); + if (status) { + dev_warn(device, "request IRQ%d failed: %d\n", dev->irq, + status); + return status; + } + + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); + + dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); + if (dpc->rp_extensions) { + dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { + dev_err(device, "RP PIO log size %u is invalid\n", + dpc->rp_log_size); + dpc->rp_log_size = 0; + } + } + + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); + + dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", + cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), + FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), + FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), dpc->rp_log_size, + FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); + return status; +} + +static void dpc_remove(struct pcie_device *dev) +{ + struct dpc_dev *dpc = get_service_data(dev); + struct pci_dev *pdev = dev->port; + u16 ctl; + + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); +} + +static struct pcie_port_service_driver dpcdriver = { + .name = "dpc", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_DPC, + .probe = dpc_probe, + .remove = dpc_remove, + .reset_link = dpc_reset_link, +}; + +int __init pcie_dpc_init(void) +{ + return pcie_port_service_register(&dpcdriver); +} diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c new file mode 100644 index 000000000..2c3b5bd59 --- /dev/null +++ b/drivers/pci/pcie/err.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file implements the error recovery as a core part of PCIe error + * reporting. When a PCIe error is delivered, an error message will be + * collected and printed to console, then, an error recovery procedure + * will be executed by following the PCI error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/aer.h> +#include "portdrv.h" +#include "../pci.h" + +struct aer_broadcast_data { + enum pci_channel_state state; + enum pci_ers_result result; +}; + +static pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) + return PCI_ERS_RESULT_NO_AER_DRIVER; + + if (new == PCI_ERS_RESULT_NONE) + return orig; + + switch (orig) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + orig = new; + break; + case PCI_ERS_RESULT_DISCONNECT: + if (new == PCI_ERS_RESULT_NEED_RESET) + orig = PCI_ERS_RESULT_NEED_RESET; + break; + default: + break; + } + + return orig; +} + +static int report_error_detected(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + dev->error_state = result_data->state; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->error_detected) { + /* + * If any device in the subtree does not have an error_detected + * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent + * error callbacks of "any" device in the subtree, and will + * exit in the disconnected error state. + */ + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) + vote = PCI_ERS_RESULT_NO_AER_DRIVER; + else + vote = PCI_ERS_RESULT_NONE; + } else { + err_handler = dev->driver->err_handler; + vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); + } + + result_data->result = merge_result(result_data->result, vote); + device_unlock(&dev->dev); + return 0; +} + +static int report_mmio_enabled(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->mmio_enabled) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->mmio_enabled(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_slot_reset(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->slot_reset) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->slot_reset(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_resume(struct pci_dev *dev, void *data) +{ + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + dev->error_state = pci_channel_io_normal; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->resume) + goto out; + + err_handler = dev->driver->err_handler; + err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +out: + device_unlock(&dev->dev); + return 0; +} + +/** + * default_reset_link - default reset function + * @dev: pointer to pci_dev data structure + * + * Invoked when performing link reset on a Downstream Port or a + * Root Port with no aer driver. + */ +static pci_ers_result_t default_reset_link(struct pci_dev *dev) +{ + int rc; + + rc = pci_bus_error_reset(dev); + pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) +{ + pci_ers_result_t status; + struct pcie_port_service_driver *driver = NULL; + + driver = pcie_port_find_service(dev, service); + if (driver && driver->reset_link) { + status = driver->reset_link(dev); + } else if (dev->has_secondary_link) { + status = default_reset_link(dev); + } else { + pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", + pci_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (status != PCI_ERS_RESULT_RECOVERED) { + pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", + pci_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + return status; +} + +/** + * broadcast_error_message - handle message broadcast to downstream drivers + * @dev: pointer to from where in a hierarchy message is broadcasted down + * @state: error state + * @error_mesg: message to print + * @cb: callback to be broadcasted + * + * Invoked during error recovery process. Once being invoked, the content + * of error severity will be broadcasted to all downstream drivers in a + * hierarchy in question. + */ +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, + enum pci_channel_state state, + char *error_mesg, + int (*cb)(struct pci_dev *, void *)) +{ + struct aer_broadcast_data result_data; + + pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); + result_data.state = state; + if (cb == report_error_detected) + result_data.result = PCI_ERS_RESULT_CAN_RECOVER; + else + result_data.result = PCI_ERS_RESULT_RECOVERED; + + pci_walk_bus(dev->subordinate, cb, &result_data); + return result_data.result; +} + +/** + * pcie_do_fatal_recovery - handle fatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is fatal. Once being invoked, removes the devices + * beneath this AER agent, followed by reset link e.g. secondary bus reset + * followed by re-enumeration of devices. + */ +void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + pci_dev_get(dev); + list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, + bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, + pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev, service); + + if ((service == PCIE_PORT_SERVICE_AER) && + (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + pci_aer_clear_fatal_status(dev); + pci_aer_clear_device_status(dev); + } + + if (result == PCI_ERS_RESULT_RECOVERED) { + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + pci_info(dev, "Device recovery from fatal error successful\n"); + } else { + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_info(dev, "Device recovery from fatal error failed\n"); + } + + pci_dev_put(dev); + pci_unlock_rescan_remove(); +} + +/** + * pcie_do_nonfatal_recovery - handle nonfatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * error detected message to all downstream drivers within a hierarchy in + * question and return the returned code. + */ +void pcie_do_nonfatal_recovery(struct pci_dev *dev) +{ + pci_ers_result_t status; + enum pci_channel_state state; + + state = pci_channel_io_normal; + + /* + * Error recovery runs on all subordinates of the first downstream port. + * If the downstream port detected the error, it is cleared at the end. + */ + if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) + dev = dev->bus->self; + + status = broadcast_error_message(dev, + state, + "error_detected", + report_error_detected); + + if (status == PCI_ERS_RESULT_CAN_RECOVER) + status = broadcast_error_message(dev, + state, + "mmio_enabled", + report_mmio_enabled); + + if (status == PCI_ERS_RESULT_NEED_RESET) { + /* + * TODO: Should call platform-specific + * functions to reset slot before calling + * drivers' slot_reset callbacks? + */ + status = broadcast_error_message(dev, + state, + "slot_reset", + report_slot_reset); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + broadcast_error_message(dev, + state, + "resume", + report_resume); + + pci_aer_clear_device_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev); + pci_info(dev, "AER: Device recovery successful\n"); + return; + +failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ + pci_info(dev, "AER: Device recovery failed\n"); +} diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c new file mode 100644 index 000000000..54d593d10 --- /dev/null +++ b/drivers/pci/pcie/pme.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe Native PME support + * + * Copyright (C) 2007 - 2009 Intel Corp + * Copyright (C) 2007 - 2009 Shaohua Li <shaohua.li@intel.com> + * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + */ + +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/pm_runtime.h> + +#include "../pci.h" +#include "portdrv.h" + +/* + * If this switch is set, MSI will not be used for PCIe PME signaling. This + * causes the PCIe port driver to use INTx interrupts only, but it turns out + * that using MSI for PCIe PME signaling doesn't play well with PCIe PME-based + * wake-up from system sleep states. + */ +bool pcie_pme_msi_disabled; + +static int __init pcie_pme_setup(char *str) +{ + if (!strncmp(str, "nomsi", 5)) + pcie_pme_msi_disabled = true; + + return 1; +} +__setup("pcie_pme=", pcie_pme_setup); + +struct pcie_pme_service_data { + spinlock_t lock; + struct pcie_device *srv; + struct work_struct work; + bool noirq; /* If set, keep the PME interrupt disabled. */ +}; + +/** + * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation. + * @dev: PCIe root port or event collector. + * @enable: Enable or disable the interrupt. + */ +void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable) +{ + if (enable) + pcie_capability_set_word(dev, PCI_EXP_RTCTL, + PCI_EXP_RTCTL_PMEIE); + else + pcie_capability_clear_word(dev, PCI_EXP_RTCTL, + PCI_EXP_RTCTL_PMEIE); +} + +/** + * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#. + * @bus: PCI bus to scan. + * + * Scan given PCI bus and all buses under it for devices asserting PME#. + */ +static bool pcie_pme_walk_bus(struct pci_bus *bus) +{ + struct pci_dev *dev; + bool ret = false; + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Skip PCIe devices in case we started from a root port. */ + if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { + if (dev->pme_poll) + dev->pme_poll = false; + + pci_wakeup_event(dev); + pm_request_resume(&dev->dev); + ret = true; + } + + if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate)) + ret = true; + } + + return ret; +} + +/** + * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME. + * @bus: Secondary bus of the bridge. + * @devfn: Device/function number to check. + * + * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band + * PCIe PME message. In such that case the bridge should use the Requester ID + * of device/function number 0 on its secondary bus. + */ +static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn) +{ + struct pci_dev *dev; + bool found = false; + + if (devfn) + return false; + + dev = pci_dev_get(bus->self); + if (!dev) + return false; + + if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) { + down_read(&pci_bus_sem); + if (pcie_pme_walk_bus(bus)) + found = true; + up_read(&pci_bus_sem); + } + + pci_dev_put(dev); + return found; +} + +/** + * pcie_pme_handle_request - Find device that generated PME and handle it. + * @port: Root port or event collector that generated the PME interrupt. + * @req_id: PCIe Requester ID of the device that generated the PME. + */ +static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) +{ + u8 busnr = req_id >> 8, devfn = req_id & 0xff; + struct pci_bus *bus; + struct pci_dev *dev; + bool found = false; + + /* First, check if the PME is from the root port itself. */ + if (port->devfn == devfn && port->bus->number == busnr) { + if (port->pme_poll) + port->pme_poll = false; + + if (pci_check_pme_status(port)) { + pm_request_resume(&port->dev); + found = true; + } else { + /* + * Apparently, the root port generated the PME on behalf + * of a non-PCIe device downstream. If this is done by + * a root port, the Requester ID field in its status + * register may contain either the root port's, or the + * source device's information (PCI Express Base + * Specification, Rev. 2.0, Section 6.1.9). + */ + down_read(&pci_bus_sem); + found = pcie_pme_walk_bus(port->subordinate); + up_read(&pci_bus_sem); + } + goto out; + } + + /* Second, find the bus the source device is on. */ + bus = pci_find_bus(pci_domain_nr(port->bus), busnr); + if (!bus) + goto out; + + /* Next, check if the PME is from a PCIe-PCI bridge. */ + found = pcie_pme_from_pci_bridge(bus, devfn); + if (found) + goto out; + + /* Finally, try to find the PME source on the bus. */ + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_get(dev); + if (dev->devfn == devfn) { + found = true; + break; + } + pci_dev_put(dev); + } + up_read(&pci_bus_sem); + + if (found) { + /* The device is there, but we have to check its PME status. */ + found = pci_check_pme_status(dev); + if (found) { + if (dev->pme_poll) + dev->pme_poll = false; + + pci_wakeup_event(dev); + pm_request_resume(&dev->dev); + } + pci_dev_put(dev); + } else if (devfn) { + /* + * The device is not there, but we can still try to recover by + * assuming that the PME was reported by a PCIe-PCI bridge that + * used devfn different from zero. + */ + pci_dbg(port, "PME interrupt generated for non-existent device %02x:%02x.%d\n", + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); + found = pcie_pme_from_pci_bridge(bus, 0); + } + + out: + if (!found) + pci_dbg(port, "Spurious native PME interrupt!\n"); +} + +/** + * pcie_pme_work_fn - Work handler for PCIe PME interrupt. + * @work: Work structure giving access to service data. + */ +static void pcie_pme_work_fn(struct work_struct *work) +{ + struct pcie_pme_service_data *data = + container_of(work, struct pcie_pme_service_data, work); + struct pci_dev *port = data->srv->port; + u32 rtsta; + + spin_lock_irq(&data->lock); + + for (;;) { + if (data->noirq) + break; + + pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + if (rtsta == (u32) ~0) + break; + + if (rtsta & PCI_EXP_RTSTA_PME) { + /* + * Clear PME status of the port. If there are other + * pending PMEs, the status will be set again. + */ + pcie_clear_root_pme_status(port); + + spin_unlock_irq(&data->lock); + pcie_pme_handle_request(port, rtsta & 0xffff); + spin_lock_irq(&data->lock); + + continue; + } + + /* No need to loop if there are no more PMEs pending. */ + if (!(rtsta & PCI_EXP_RTSTA_PENDING)) + break; + + spin_unlock_irq(&data->lock); + cpu_relax(); + spin_lock_irq(&data->lock); + } + + if (!data->noirq) + pcie_pme_interrupt_enable(port, true); + + spin_unlock_irq(&data->lock); +} + +/** + * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt. + * @irq: Interrupt vector. + * @context: Interrupt context pointer. + */ +static irqreturn_t pcie_pme_irq(int irq, void *context) +{ + struct pci_dev *port; + struct pcie_pme_service_data *data; + u32 rtsta; + unsigned long flags; + + port = ((struct pcie_device *)context)->port; + data = get_service_data((struct pcie_device *)context); + + spin_lock_irqsave(&data->lock, flags); + pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + + if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) { + spin_unlock_irqrestore(&data->lock, flags); + return IRQ_NONE; + } + + pcie_pme_interrupt_enable(port, false); + spin_unlock_irqrestore(&data->lock, flags); + + /* We don't use pm_wq, because it's freezable. */ + schedule_work(&data->work); + + return IRQ_HANDLED; +} + +/** + * pcie_pme_can_wakeup - Set the wakeup capability flag. + * @dev: PCI device to handle. + * @ign: Ignored. + */ +static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) +{ + device_set_wakeup_capable(&dev->dev, true); + return 0; +} + +/** + * pcie_pme_mark_devices - Set the wakeup flag for devices below a port. + * @port: PCIe root port or event collector to handle. + * + * For each device below given root port, including the port itself (or for each + * root complex integrated endpoint if @port is a root complex event collector) + * set the flag indicating that it can signal run-time wake-up events. + */ +static void pcie_pme_mark_devices(struct pci_dev *port) +{ + pcie_pme_can_wakeup(port, NULL); + if (port->subordinate) + pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); +} + +/** + * pcie_pme_probe - Initialize PCIe PME service for given root port. + * @srv: PCIe service to initialize. + */ +static int pcie_pme_probe(struct pcie_device *srv) +{ + struct pci_dev *port; + struct pcie_pme_service_data *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&data->lock); + INIT_WORK(&data->work, pcie_pme_work_fn); + data->srv = srv; + set_service_data(srv, data); + + port = srv->port; + pcie_pme_interrupt_enable(port, false); + pcie_clear_root_pme_status(port); + + ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv); + if (ret) { + kfree(data); + return ret; + } + + pci_info(port, "Signaling PME with IRQ %d\n", srv->irq); + + pcie_pme_mark_devices(port); + pcie_pme_interrupt_enable(port, true); + return 0; +} + +static bool pcie_pme_check_wakeup(struct pci_bus *bus) +{ + struct pci_dev *dev; + + if (!bus) + return false; + + list_for_each_entry(dev, &bus->devices, bus_list) + if (device_may_wakeup(&dev->dev) + || pcie_pme_check_wakeup(dev->subordinate)) + return true; + + return false; +} + +static void pcie_pme_disable_interrupt(struct pci_dev *port, + struct pcie_pme_service_data *data) +{ + spin_lock_irq(&data->lock); + pcie_pme_interrupt_enable(port, false); + pcie_clear_root_pme_status(port); + data->noirq = true; + spin_unlock_irq(&data->lock); +} + +/** + * pcie_pme_suspend - Suspend PCIe PME service device. + * @srv: PCIe service device to suspend. + */ +static int pcie_pme_suspend(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + struct pci_dev *port = srv->port; + bool wakeup; + int ret; + + if (device_may_wakeup(&port->dev)) { + wakeup = true; + } else { + down_read(&pci_bus_sem); + wakeup = pcie_pme_check_wakeup(port->subordinate); + up_read(&pci_bus_sem); + } + if (wakeup) { + ret = enable_irq_wake(srv->irq); + if (!ret) + return 0; + } + + pcie_pme_disable_interrupt(port, data); + + synchronize_irq(srv->irq); + + return 0; +} + +/** + * pcie_pme_resume - Resume PCIe PME service device. + * @srv - PCIe service device to resume. + */ +static int pcie_pme_resume(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + + spin_lock_irq(&data->lock); + if (data->noirq) { + struct pci_dev *port = srv->port; + + pcie_clear_root_pme_status(port); + pcie_pme_interrupt_enable(port, true); + data->noirq = false; + } else { + disable_irq_wake(srv->irq); + } + spin_unlock_irq(&data->lock); + + return 0; +} + +/** + * pcie_pme_remove - Prepare PCIe PME service device for removal. + * @srv - PCIe service device to remove. + */ +static void pcie_pme_remove(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + + pcie_pme_disable_interrupt(srv->port, data); + free_irq(srv->irq, srv); + cancel_work_sync(&data->work); + kfree(data); +} + +static struct pcie_port_service_driver pcie_pme_driver = { + .name = "pcie_pme", + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_PME, + + .probe = pcie_pme_probe, + .suspend = pcie_pme_suspend, + .resume = pcie_pme_resume, + .remove = pcie_pme_remove, +}; + +/** + * pcie_pme_service_init - Register the PCIe PME service driver. + */ +int __init pcie_pme_init(void) +{ + return pcie_port_service_register(&pcie_pme_driver); +} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h new file mode 100644 index 000000000..2498b2d34 --- /dev/null +++ b/drivers/pci/pcie/portdrv.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Purpose: PCI Express Port Bus Driver's Internal Data Structures + * + * Copyright (C) 2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#ifndef _PORTDRV_H_ +#define _PORTDRV_H_ + +#include <linux/compiler.h> + +/* Service Type */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ +#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) + +#define PCIE_PORT_DEVICE_MAXSERVICES 4 + +#ifdef CONFIG_PCIEAER +int pcie_aer_init(void); +#else +static inline int pcie_aer_init(void) { return 0; } +#endif + +#ifdef CONFIG_HOTPLUG_PCI_PCIE +int pcie_hp_init(void); +#else +static inline int pcie_hp_init(void) { return 0; } +#endif + +#ifdef CONFIG_PCIE_PME +int pcie_pme_init(void); +#else +static inline int pcie_pme_init(void) { return 0; } +#endif + +#ifdef CONFIG_PCIE_DPC +int pcie_dpc_init(void); +#else +static inline int pcie_dpc_init(void) { return 0; } +#endif + +/* Port Type */ +#define PCIE_ANY_PORT (~0) + +struct pcie_device { + int irq; /* Service IRQ/MSI/MSI-X Vector */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ + void *priv_data; /* Service Private Data */ + struct device device; /* Generic Device Interface */ +}; +#define to_pcie_device(d) container_of(d, struct pcie_device, device) + +static inline void set_service_data(struct pcie_device *dev, void *data) +{ + dev->priv_data = data; +} + +static inline void *get_service_data(struct pcie_device *dev) +{ + return dev->priv_data; +} + +struct pcie_port_service_driver { + const char *name; + int (*probe) (struct pcie_device *dev); + void (*remove) (struct pcie_device *dev); + int (*suspend) (struct pcie_device *dev); + int (*resume_noirq) (struct pcie_device *dev); + int (*resume) (struct pcie_device *dev); + + /* Device driver may resume normal operations */ + void (*error_resume)(struct pci_dev *dev); + + /* Link Reset Capability - AER service driver specific */ + pci_ers_result_t (*reset_link) (struct pci_dev *dev); + + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + + struct device_driver driver; +}; +#define to_service_driver(d) \ + container_of(d, struct pcie_port_service_driver, driver) + +int pcie_port_service_register(struct pcie_port_service_driver *new); +void pcie_port_service_unregister(struct pcie_port_service_driver *new); + +/* + * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must + * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI + * supports a maximum of 32 vectors per function. + */ +#define PCIE_PORT_MAX_MSI_ENTRIES 32 + +#define get_descriptor_id(type, service) (((type - 4) << 8) | service) + +extern struct bus_type pcie_port_bus_type; +int pcie_port_device_register(struct pci_dev *dev); +#ifdef CONFIG_PM +int pcie_port_device_suspend(struct device *dev); +int pcie_port_device_resume_noirq(struct device *dev); +int pcie_port_device_resume(struct device *dev); +#endif +void pcie_port_device_remove(struct pci_dev *dev); +int __must_check pcie_port_bus_register(void); +void pcie_port_bus_unregister(void); + +struct pci_dev; + +#ifdef CONFIG_PCIE_PME +extern bool pcie_pme_msi_disabled; + +static inline void pcie_pme_disable_msi(void) +{ + pcie_pme_msi_disabled = true; +} + +static inline bool pcie_pme_no_msi(void) +{ + return pcie_pme_msi_disabled; +} + +void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable); +#else /* !CONFIG_PCIE_PME */ +static inline void pcie_pme_disable_msi(void) {} +static inline bool pcie_pme_no_msi(void) { return false; } +static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} +#endif /* !CONFIG_PCIE_PME */ + +#ifdef CONFIG_ACPI_APEI +int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); +#else +static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) +{ + if (pci_dev->__aer_firmware_first_valid) + return pci_dev->__aer_firmware_first; + return 0; +} +#endif + +#ifdef CONFIG_PCIEAER +irqreturn_t aer_irq(int irq, void *context); +#endif + +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service); +struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); +#endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c new file mode 100644 index 000000000..7c37d8152 --- /dev/null +++ b/drivers/pci/pcie/portdrv_core.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Purpose: PCI Express Port Bus Driver's Core Functions + * + * Copyright (C) 2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/aer.h> + +#include "../pci.h" +#include "portdrv.h" + +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + struct device *dev; + u32 service; +}; + +/** + * release_pcie_device - free PCI Express port service device structure + * @dev: Port service device to release + * + * Invoked automatically when device is being removed in response to + * device_unregister(dev). Release all resources being claimed. + */ +static void release_pcie_device(struct device *dev) +{ + kfree(to_pcie_device(dev)); +} + +/* + * Fill in *pme, *aer, *dpc with the relevant Interrupt Message Numbers if + * services are enabled in "mask". Return the number of MSI/MSI-X vectors + * required to accommodate the largest Message Number. + */ +static int pcie_message_numbers(struct pci_dev *dev, int mask, + u32 *pme, u32 *aer, u32 *dpc) +{ + u32 nvec = 0, pos; + u16 reg16; + + /* + * The Interrupt Message Number indicates which vector is used, i.e., + * the MSI-X table entry or the MSI offset between the base Message + * Data and the generated interrupt message. See PCIe r3.1, sec + * 7.8.2, 7.10.10, 7.31.2. + */ + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); + *pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9; + nvec = *pme + 1; + } + +#ifdef CONFIG_PCIEAER + if (mask & PCIE_PORT_SERVICE_AER) { + u32 reg32; + + pos = dev->aer_cap; + if (pos) { + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, + ®32); + *aer = (reg32 & PCI_ERR_ROOT_AER_IRQ) >> 27; + nvec = max(nvec, *aer + 1); + } + } +#endif + + if (mask & PCIE_PORT_SERVICE_DPC) { + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC); + if (pos) { + pci_read_config_word(dev, pos + PCI_EXP_DPC_CAP, + ®16); + *dpc = reg16 & PCI_EXP_DPC_IRQ; + nvec = max(nvec, *dpc + 1); + } + } + + return nvec; +} + +/** + * pcie_port_enable_irq_vec - try to set up MSI-X or MSI as interrupt mode + * for given port + * @dev: PCI Express port to handle + * @irqs: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) +{ + int nr_entries, nvec; + u32 pme = 0, aer = 0, dpc = 0; + + /* Allocate the maximum possible number of MSI/MSI-X vectors */ + nr_entries = pci_alloc_irq_vectors(dev, 1, PCIE_PORT_MAX_MSI_ENTRIES, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nr_entries < 0) + return nr_entries; + + /* See how many and which Interrupt Message Numbers we actually use */ + nvec = pcie_message_numbers(dev, mask, &pme, &aer, &dpc); + if (nvec > nr_entries) { + pci_free_irq_vectors(dev); + return -EIO; + } + + /* + * If we allocated more than we need, free them and reallocate fewer. + * + * Reallocating may change the specific vectors we get, so + * pci_irq_vector() must be done *after* the reallocation. + * + * If we're using MSI, hardware is *allowed* to change the Interrupt + * Message Numbers when we free and reallocate the vectors, but we + * assume it won't because we allocate enough vectors for the + * biggest Message Number we found. + */ + if (nvec != nr_entries) { + pci_free_irq_vectors(dev); + + nr_entries = pci_alloc_irq_vectors(dev, nvec, nvec, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nr_entries < 0) + return nr_entries; + } + + /* PME and hotplug share an MSI/MSI-X vector */ + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pci_irq_vector(dev, pme); + irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pci_irq_vector(dev, pme); + } + + if (mask & PCIE_PORT_SERVICE_AER) + irqs[PCIE_PORT_SERVICE_AER_SHIFT] = pci_irq_vector(dev, aer); + + if (mask & PCIE_PORT_SERVICE_DPC) + irqs[PCIE_PORT_SERVICE_DPC_SHIFT] = pci_irq_vector(dev, dpc); + + return 0; +} + +/** + * pcie_init_service_irqs - initialize irqs for PCI Express port services + * @dev: PCI Express port to handle + * @irqs: Array of irqs to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: Interrupt mode associated with the port + */ +static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask) +{ + int ret, i; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + irqs[i] = -1; + + /* + * If we support PME but can't use MSI/MSI-X for it, we have to + * fall back to INTx or other interrupts, e.g., a system shared + * interrupt. + */ + if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) + goto legacy_irq; + + /* Try to use MSI-X or MSI if supported */ + if (pcie_port_enable_irq_vec(dev, irqs, mask) == 0) + return 0; + +legacy_irq: + /* fall back to legacy IRQ */ + ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); + if (ret < 0) + return -ENODEV; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + irqs[i] = pci_irq_vector(dev, 0); + + return 0; +} + +/** + * get_port_device_capability - discover capabilities of a PCI Express port + * @dev: PCI Express port to examine + * + * The capabilities are read from the port's PCI Express configuration registers + * as described in PCI Express Base Specification 1.0a sections 7.8.2, 7.8.9 and + * 7.9 - 7.11. + * + * Return value: Bitmask of discovered port capabilities + */ +static int get_port_device_capability(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + int services = 0; + + if (dev->is_hotplug_bridge && + (pcie_ports_native || host->native_pcie_hotplug)) { + services |= PCIE_PORT_SERVICE_HP; + + /* + * Disable hot-plug interrupts in case they have been enabled + * by the BIOS and the hot-plug service driver is not loaded. + */ + pcie_capability_clear_word(dev, PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE); + } + +#ifdef CONFIG_PCIEAER + if (dev->aer_cap && pci_aer_available() && + (pcie_ports_native || host->native_aer)) { + services |= PCIE_PORT_SERVICE_AER; + + /* + * Disable AER on this port in case it's been enabled by the + * BIOS (the AER service driver will enable it when necessary). + */ + pci_disable_pcie_error_reporting(dev); + } +#endif + + /* + * Root ports are capable of generating PME too. Root Complex + * Event Collectors can also generate PMEs, but we don't handle + * those yet. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT && + (pcie_ports_native || host->native_pme)) { + services |= PCIE_PORT_SERVICE_PME; + + /* + * Disable PME interrupt on this port in case it's been enabled + * by the BIOS (the PME service driver will enable it when + * necessary). + */ + pcie_pme_interrupt_enable(dev, false); + } + + if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) && + pci_aer_available() && services & PCIE_PORT_SERVICE_AER) + services |= PCIE_PORT_SERVICE_DPC; + + return services; +} + +/** + * pcie_device_init - allocate and initialize PCI Express port service device + * @pdev: PCI Express port to associate the service device with + * @service: Type of service to associate with the service device + * @irq: Interrupt vector to associate with the service device + */ +static int pcie_device_init(struct pci_dev *pdev, int service, int irq) +{ + int retval; + struct pcie_device *pcie; + struct device *device; + + pcie = kzalloc(sizeof(*pcie), GFP_KERNEL); + if (!pcie) + return -ENOMEM; + pcie->port = pdev; + pcie->irq = irq; + pcie->service = service; + + /* Initialize generic device interface */ + device = &pcie->device; + device->bus = &pcie_port_bus_type; + device->release = release_pcie_device; /* callback to free pcie dev */ + dev_set_name(device, "%s:pcie%03x", + pci_name(pdev), + get_descriptor_id(pci_pcie_type(pdev), service)); + device->parent = &pdev->dev; + device_enable_async_suspend(device); + + retval = device_register(device); + if (retval) { + put_device(device); + return retval; + } + + pm_runtime_no_callbacks(device); + + return 0; +} + +/** + * pcie_port_device_register - register PCI Express port + * @dev: PCI Express port to register + * + * Allocate the port extension structure and register services associated with + * the port. + */ +int pcie_port_device_register(struct pci_dev *dev) +{ + int status, capabilities, i, nr_service; + int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; + + /* Enable PCI Express port device */ + status = pci_enable_device(dev); + if (status) + return status; + + /* Get and check PCI Express port services */ + capabilities = get_port_device_capability(dev); + if (!capabilities) + return 0; + + pci_set_master(dev); + /* + * Initialize service irqs. Don't use service devices that + * require interrupts if there is no way to generate them. + * However, some drivers may have a polling mode (e.g. pciehp_poll_mode) + * that can be used in the absence of irqs. Allow them to determine + * if that is to be used. + */ + status = pcie_init_service_irqs(dev, irqs, capabilities); + if (status) { + capabilities &= PCIE_PORT_SERVICE_HP; + if (!capabilities) + goto error_disable; + } + + /* Allocate child services if any */ + status = -ENODEV; + nr_service = 0; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { + int service = 1 << i; + if (!(capabilities & service)) + continue; + if (!pcie_device_init(dev, service, irqs[i])) + nr_service++; + } + if (!nr_service) + goto error_cleanup_irqs; + + return 0; + +error_cleanup_irqs: + pci_free_irq_vectors(dev); +error_disable: + pci_disable_device(dev); + return status; +} + +#ifdef CONFIG_PM +typedef int (*pcie_pm_callback_t)(struct pcie_device *); + +static int pm_iter(struct device *dev, void *data) +{ + struct pcie_port_service_driver *service_driver; + size_t offset = *(size_t *)data; + pcie_pm_callback_t cb; + + if ((dev->bus == &pcie_port_bus_type) && dev->driver) { + service_driver = to_service_driver(dev->driver); + cb = *(pcie_pm_callback_t *)((void *)service_driver + offset); + if (cb) + return cb(to_pcie_device(dev)); + } + return 0; +} + +/** + * pcie_port_device_suspend - suspend port services associated with a PCIe port + * @dev: PCI Express port to handle + */ +int pcie_port_device_suspend(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, suspend); + return device_for_each_child(dev, &off, pm_iter); +} + +int pcie_port_device_resume_noirq(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); + return device_for_each_child(dev, &off, pm_iter); +} + +/** + * pcie_port_device_resume - resume port services associated with a PCIe port + * @dev: PCI Express port to handle + */ +int pcie_port_device_resume(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, resume); + return device_for_each_child(dev, &off, pm_iter); +} +#endif /* PM */ + +static int remove_iter(struct device *dev, void *data) +{ + if (dev->bus == &pcie_port_bus_type) + device_unregister(dev); + return 0; +} + +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == &pcie_port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + pdrvs->dev = device; + return 1; + } + } + + return 0; +} + +/** + * pcie_port_find_service - find the service driver + * @dev: PCI Express port the service is associated with + * @service: Service to find + * + * Find PCI Express port service driver associated with given service + */ +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service) +{ + struct pcie_port_service_driver *drv; + struct portdrv_service_data pdrvs; + + pdrvs.drv = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + drv = pdrvs.drv; + return drv; +} + +/** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service is associated with + * @service: For the service to find + * + * Find the struct device associated with given service on a pci_dev + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + device = pdrvs.dev; + return device; +} + +/** + * pcie_port_device_remove - unregister PCI Express port service devices + * @dev: PCI Express port the service devices to unregister are associated with + * + * Remove PCI Express port service devices associated with given port and + * disable MSI-X or MSI for the port. + */ +void pcie_port_device_remove(struct pci_dev *dev) +{ + device_for_each_child(&dev->dev, NULL, remove_iter); + pci_free_irq_vectors(dev); + pci_disable_device(dev); +} + +/** + * pcie_port_probe_service - probe driver for given PCI Express port service + * @dev: PCI Express port service device to probe against + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * whenever match is found between the driver and a port service device. + */ +static int pcie_port_probe_service(struct device *dev) +{ + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + int status; + + if (!dev || !dev->driver) + return -ENODEV; + + driver = to_service_driver(dev->driver); + if (!driver || !driver->probe) + return -ENODEV; + + pciedev = to_pcie_device(dev); + status = driver->probe(pciedev); + if (status) + return status; + + get_device(dev); + return 0; +} + +/** + * pcie_port_remove_service - detach driver from given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_unregister() is called for the port service device associated + * with the driver. + */ +static int pcie_port_remove_service(struct device *dev) +{ + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + + if (!dev || !dev->driver) + return 0; + + pciedev = to_pcie_device(dev); + driver = to_service_driver(dev->driver); + if (driver && driver->remove) { + driver->remove(pciedev); + put_device(dev); + } + return 0; +} + +/** + * pcie_port_shutdown_service - shut down given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_shutdown() is called for the port service device associated + * with the driver. + */ +static void pcie_port_shutdown_service(struct device *dev) {} + +/** + * pcie_port_service_register - register PCI Express port service driver + * @new: PCI Express port service driver to register + */ +int pcie_port_service_register(struct pcie_port_service_driver *new) +{ + if (pcie_ports_disabled) + return -ENODEV; + + new->driver.name = new->name; + new->driver.bus = &pcie_port_bus_type; + new->driver.probe = pcie_port_probe_service; + new->driver.remove = pcie_port_remove_service; + new->driver.shutdown = pcie_port_shutdown_service; + + return driver_register(&new->driver); +} +EXPORT_SYMBOL(pcie_port_service_register); + +/** + * pcie_port_service_unregister - unregister PCI Express port service driver + * @drv: PCI Express port service driver to unregister + */ +void pcie_port_service_unregister(struct pcie_port_service_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(pcie_port_service_unregister); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c new file mode 100644 index 000000000..23a5a0c2c --- /dev/null +++ b/drivers/pci/pcie/portdrv_pci.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Purpose: PCI Express Port Bus Driver + * Author: Tom Nguyen <tom.l.nguyen@intel.com> + * + * Copyright (C) 2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/init.h> +#include <linux/aer.h> +#include <linux/dmi.h> + +#include "../pci.h" +#include "portdrv.h" + +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If the user specified "pcie_ports=native", use the PCIe services regardless + * of whether the platform has given us permission. On ACPI systems, this + * means we ignore _OSC. + */ +bool pcie_ports_native; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) + pcie_ports_disabled = true; + else if (!strncmp(str, "native", 6)) + pcie_ports_native = true; + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + +/* global data */ + +#ifdef CONFIG_PM +static int pcie_port_runtime_suspend(struct device *dev) +{ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static int pcie_port_runtime_resume(struct device *dev) +{ + return 0; +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static const struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume_noirq = pcie_port_device_resume_noirq, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore_noirq = pcie_port_device_resume_noirq, + .restore = pcie_port_device_resume, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, +}; + +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) + +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ + +/* + * pcie_portdrv_probe - Probe PCI-Express port devices + * @dev: PCI-Express port device being probed + * + * If detected invokes the pcie_port_device_register() method for + * this port device. + * + */ +static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int status; + + if (!pci_is_pcie(dev) || + ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM))) + return -ENODEV; + + status = pcie_port_device_register(dev); + if (status) + return status; + + pci_save_state(dev); + + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_LEAVE_SUSPENDED); + + if (pci_bridge_d3_possible(dev)) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + + return 0; +} + +static void pcie_portdrv_remove(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); +} + +static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, + enum pci_channel_state error) +{ + /* Root Port has no impact. Always recovers. */ + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +static int resume_iter(struct device *device, void *data) +{ + struct pcie_device *pcie_device; + struct pcie_port_service_driver *driver; + + if (device->bus == &pcie_port_bus_type && device->driver) { + driver = to_service_driver(device->driver); + if (driver && driver->error_resume) { + pcie_device = to_pcie_device(device); + + /* Forward error message to service drivers */ + driver->error_resume(pcie_device->port); + } + } + + return 0; +} + +static void pcie_portdrv_err_resume(struct pci_dev *dev) +{ + device_for_each_child(&dev->dev, NULL, resume_iter); +} + +/* + * LINUX Device Driver Model + */ +static const struct pci_device_id port_pci_ids[] = { { + /* handle any PCI-Express port */ + PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0), + }, { /* end: all zeroes */ } +}; + +static const struct pci_error_handlers pcie_portdrv_err_handler = { + .error_detected = pcie_portdrv_error_detected, + .mmio_enabled = pcie_portdrv_mmio_enabled, + .resume = pcie_portdrv_err_resume, +}; + +static struct pci_driver pcie_portdriver = { + .name = "pcieport", + .id_table = &port_pci_ids[0], + + .probe = pcie_portdrv_probe, + .remove = pcie_portdrv_remove, + .shutdown = pcie_portdrv_remove, + + .err_handler = &pcie_portdrv_err_handler, + + .driver.pm = PCIE_PORTDRV_PM_OPS, +}; + +static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) +{ + pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", + d->ident); + pcie_pme_disable_msi(); + return 0; +} + +static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { + /* + * Boxes that should not use MSI for PCIe PME signaling. + */ + { + .callback = dmi_pcie_pme_disable_msi, + .ident = "MSI Wind U-100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), + }, + }, + {} +}; + +static void __init pcie_init_services(void) +{ + pcie_aer_init(); + pcie_pme_init(); + pcie_dpc_init(); + pcie_hp_init(); +} + +static int __init pcie_portdrv_init(void) +{ + if (pcie_ports_disabled) + return -EACCES; + + pcie_init_services(); + dmi_check_system(pcie_portdrv_dmi_table); + + return pci_register_driver(&pcie_portdriver); +} +device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c new file mode 100644 index 000000000..357a454ca --- /dev/null +++ b/drivers/pci/pcie/ptm.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Express Precision Time Measurement + * Copyright (c) 2016, Intel Corporation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include "../pci.h" + +static void pci_ptm_info(struct pci_dev *dev) +{ + char clock_desc[8]; + + switch (dev->ptm_granularity) { + case 0: + snprintf(clock_desc, sizeof(clock_desc), "unknown"); + break; + case 255: + snprintf(clock_desc, sizeof(clock_desc), ">254ns"); + break; + default: + snprintf(clock_desc, sizeof(clock_desc), "%uns", + dev->ptm_granularity); + break; + } + pci_info(dev, "PTM enabled%s, %s granularity\n", + dev->ptm_root ? " (root)" : "", clock_desc); +} + +void pci_ptm_init(struct pci_dev *dev) +{ + int pos; + u32 cap, ctrl; + u8 local_clock; + struct pci_dev *ups; + + if (!pci_is_pcie(dev)) + return; + + /* + * Enable PTM only on interior devices (root ports, switch ports, + * etc.) on the assumption that it causes no link traffic until an + * endpoint enables it. + */ + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)) + return; + + /* + * Switch Downstream Ports are not permitted to have a PTM + * capability; their PTM behavior is controlled by the Upstream + * Port (PCIe r5.0, sec 7.9.16). + */ + ups = pci_upstream_bridge(dev); + if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM && + ups && ups->ptm_enabled) { + dev->ptm_granularity = ups->ptm_granularity; + dev->ptm_enabled = 1; + return; + } + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM); + if (!pos) + return; + + pci_read_config_dword(dev, pos + PCI_PTM_CAP, &cap); + local_clock = (cap & PCI_PTM_GRANULARITY_MASK) >> 8; + + /* + * There's no point in enabling PTM unless it's enabled in the + * upstream device or this device can be a PTM Root itself. Per + * the spec recommendation (PCIe r3.1, sec 7.32.3), select the + * furthest upstream Time Source as the PTM Root. + */ + if (ups && ups->ptm_enabled) { + ctrl = PCI_PTM_CTRL_ENABLE; + if (ups->ptm_granularity == 0) + dev->ptm_granularity = 0; + else if (ups->ptm_granularity > local_clock) + dev->ptm_granularity = ups->ptm_granularity; + } else { + if (cap & PCI_PTM_CAP_ROOT) { + ctrl = PCI_PTM_CTRL_ENABLE | PCI_PTM_CTRL_ROOT; + dev->ptm_root = 1; + dev->ptm_granularity = local_clock; + } else + return; + } + + ctrl |= dev->ptm_granularity << 8; + pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl); + dev->ptm_enabled = 1; + + pci_ptm_info(dev); +} + +int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) +{ + int pos; + u32 cap, ctrl; + struct pci_dev *ups; + + if (!pci_is_pcie(dev)) + return -EINVAL; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM); + if (!pos) + return -EINVAL; + + pci_read_config_dword(dev, pos + PCI_PTM_CAP, &cap); + if (!(cap & PCI_PTM_CAP_REQ)) + return -EINVAL; + + /* + * For a PCIe Endpoint, PTM is only useful if the endpoint can + * issue PTM requests to upstream devices that have PTM enabled. + * + * For Root Complex Integrated Endpoints, there is no upstream + * device, so there must be some implementation-specific way to + * associate the endpoint with a time source. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT) { + ups = pci_upstream_bridge(dev); + if (!ups || !ups->ptm_enabled) + return -EINVAL; + + dev->ptm_granularity = ups->ptm_granularity; + } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) { + dev->ptm_granularity = 0; + } else + return -EINVAL; + + ctrl = PCI_PTM_CTRL_ENABLE; + ctrl |= dev->ptm_granularity << 8; + pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl); + dev->ptm_enabled = 1; + + pci_ptm_info(dev); + + if (granularity) + *granularity = dev->ptm_granularity; + return 0; +} +EXPORT_SYMBOL(pci_enable_ptm); |