From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- drivers/pci/pcie/Kconfig | 148 +++++ drivers/pci/pcie/Makefile | 15 + drivers/pci/pcie/aer.c | 1424 ++++++++++++++++++++++++++++++++++++++++ drivers/pci/pcie/aer_inject.c | 548 ++++++++++++++++ drivers/pci/pcie/aspm.c | 1440 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/pcie/dpc.c | 408 ++++++++++++ drivers/pci/pcie/edr.c | 249 +++++++ drivers/pci/pcie/err.c | 264 ++++++++ drivers/pci/pcie/pme.c | 471 ++++++++++++++ drivers/pci/pcie/portdrv.c | 843 ++++++++++++++++++++++++ drivers/pci/pcie/portdrv.h | 124 ++++ drivers/pci/pcie/ptm.c | 253 ++++++++ drivers/pci/pcie/rcec.c | 190 ++++++ 13 files changed, 6377 insertions(+) create mode 100644 drivers/pci/pcie/Kconfig create mode 100644 drivers/pci/pcie/Makefile create mode 100644 drivers/pci/pcie/aer.c create mode 100644 drivers/pci/pcie/aer_inject.c create mode 100644 drivers/pci/pcie/aspm.c create mode 100644 drivers/pci/pcie/dpc.c create mode 100644 drivers/pci/pcie/edr.c create mode 100644 drivers/pci/pcie/err.c create mode 100644 drivers/pci/pcie/pme.c create mode 100644 drivers/pci/pcie/portdrv.c create mode 100644 drivers/pci/pcie/portdrv.h create mode 100644 drivers/pci/pcie/ptm.c create mode 100644 drivers/pci/pcie/rcec.c (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig new file mode 100644 index 0000000000..228652a59f --- /dev/null +++ b/drivers/pci/pcie/Kconfig @@ -0,0 +1,148 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# PCI Express Port Bus Configuration +# +config PCIEPORTBUS + bool "PCI Express Port Bus support" + default y if USB4 + help + This enables PCI Express Port Bus support. Users can then enable + support for Native Hot-Plug, Advanced Error Reporting, Power + Management Events, and Downstream Port Containment. + +# +# Include service Kconfig here +# +config HOTPLUG_PCI_PCIE + bool "PCI Express Hotplug driver" + depends on HOTPLUG_PCI && PCIEPORTBUS + default y if USB4 + help + Say Y here if you have a motherboard that supports PCIe native + hotplug. + + Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. + + When in doubt, say N. + +config PCIEAER + bool "PCI Express Advanced Error Reporting support" + depends on PCIEPORTBUS + select RAS + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) driver support. Error reporting messages sent to Root + Port will be handled by PCI Express AER driver. + +config PCIEAER_INJECT + tristate "PCI Express error injection support" + depends on PCIEAER + select GENERIC_IRQ_INJECTION + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) software error injector. + + Debugging AER code is quite difficult because it is hard + to trigger various real hardware errors. Software-based + error injection can fake almost all kinds of errors with the + help of a user space helper tool aer-inject, which can be + gotten from: + https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + +# +# PCI Express ECRC +# +config PCIE_ECRC + bool "PCI Express ECRC settings control" + depends on PCIEAER + help + Used to override firmware/bios settings for PCI Express ECRC + (transaction layer end-to-end CRC checking). + + When in doubt, say N. + +# +# PCI Express ASPM +# +config PCIEASPM + bool "PCI Express ASPM control" if EXPERT + default y + help + This enables OS control over PCI Express ASPM (Active State + Power Management) and Clock Power Management. ASPM supports + state L0/L0s/L1. + + ASPM is initially set up by the firmware. With this option enabled, + Linux can modify this state in order to disable ASPM on known-bad + hardware or configurations and enable it when known-safe. + + ASPM can be disabled or enabled at runtime via + /sys/module/pcie_aspm/parameters/policy + + When in doubt, say Y. + +choice + prompt "Default ASPM policy" + default PCIEASPM_DEFAULT + depends on PCIEASPM + +config PCIEASPM_DEFAULT + bool "BIOS default" + depends on PCIEASPM + help + Use the BIOS defaults for PCI Express ASPM. + +config PCIEASPM_POWERSAVE + bool "Powersave" + depends on PCIEASPM + help + Enable PCI Express ASPM L0s and L1 where possible, even if the + BIOS did not. + +config PCIEASPM_POWER_SUPERSAVE + bool "Power Supersave" + depends on PCIEASPM + help + Same as PCIEASPM_POWERSAVE, except it also enables L1 substates where + possible. This would result in higher power savings while staying in L1 + where the components support it. + +config PCIEASPM_PERFORMANCE + bool "Performance" + depends on PCIEASPM + help + Disable PCI Express ASPM L0s and L1, even if the BIOS enabled them. +endchoice + +config PCIE_PME + def_bool y + depends on PCIEPORTBUS && PM + +config PCIE_DPC + bool "PCI Express Downstream Port Containment support" + depends on PCIEPORTBUS && PCIEAER + help + This enables PCI Express Downstream Port Containment (DPC) + driver support. DPC events from Root and Downstream ports + will be handled by the DPC driver. If your system doesn't + have this capability or you do not want to use this feature, + it is safe to answer N. + +config PCIE_PTM + bool "PCI Express Precision Time Measurement support" + help + This enables PCI Express Precision Time Measurement (PTM) + support. + + This is only useful if you have devices that support PTM, but it + is safe to enable even if you don't. + +config PCIE_EDR + bool "PCI Express Error Disconnect Recover support" + depends on PCIE_DPC && ACPI + help + This option adds Error Disconnect Recover support as specified + in the Downstream Port Containment Related Enhancements ECN to + the PCI Firmware Specification r3.2. Enable this if you want to + support hybrid DPC model which uses both firmware and OS to + implement DPC. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile new file mode 100644 index 0000000000..8de4ed5f98 --- /dev/null +++ b/drivers/pci/pcie/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for PCI Express features and port driver + +pcieportdrv-y := portdrv.o rcec.o + +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o + +obj-$(CONFIG_PCIEASPM) += aspm.o +obj-$(CONFIG_PCIEAER) += aer.o err.o +obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o +obj-$(CONFIG_PCIE_PME) += pme.o +obj-$(CONFIG_PCIE_DPC) += dpc.o +obj-$(CONFIG_PCIE_PTM) += ptm.o +obj-$(CONFIG_PCIE_EDR) += edr.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c new file mode 100644 index 0000000000..40d84cb0c6 --- /dev/null +++ b/drivers/pci/pcie/aer.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implement the AER root port service driver. The driver registers an IRQ + * handler. When a root port triggers an AER interrupt, the IRQ handler + * collects root port status and schedules work. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. + * Andrew Patterson + */ + +#define pr_fmt(fmt) "AER: " fmt +#define dev_fmt pr_fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +#define AER_ERROR_SOURCES_MAX 128 + +#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ +#define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ + +struct aer_err_source { + unsigned int status; + unsigned int id; +}; + +struct aer_rpc { + struct pci_dev *rpd; /* Root Port device */ + DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); +}; + +/* AER stats for the device */ +struct aer_stats { + + /* + * Fields for all AER capable devices. They indicate the errors + * "as seen by this device". Note that this may mean that if an + * end point is causing problems, the AER counters may increment + * at its link partner (e.g. root port) because the errors will be + * "seen" by the link partner and not the problematic end point + * itself (which may report all counters as 0 as it never saw any + * problems). + */ + /* Counters for different type of correctable errors */ + u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; + /* Counters for different type of fatal uncorrectable errors */ + u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; + /* Counters for different type of nonfatal uncorrectable errors */ + u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; + /* Total number of ERR_COR sent by this device */ + u64 dev_total_cor_errs; + /* Total number of ERR_FATAL sent by this device */ + u64 dev_total_fatal_errs; + /* Total number of ERR_NONFATAL sent by this device */ + u64 dev_total_nonfatal_errs; + + /* + * Fields for Root ports & root complex event collectors only, these + * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL + * messages received by the root port / event collector, INCLUDING the + * ones that are generated internally (by the rootport itself) + */ + u64 rootport_total_cor_errs; + u64 rootport_total_fatal_errs; + u64 rootport_total_nonfatal_errs; +}; + +#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ + PCI_ERR_UNC_ECRC| \ + PCI_ERR_UNC_UNSUP| \ + PCI_ERR_UNC_COMP_ABORT| \ + PCI_ERR_UNC_UNX_COMP| \ + PCI_ERR_UNC_MALF_TLP) + +#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ + PCI_EXP_RTCTL_SENFEE| \ + PCI_EXP_RTCTL_SEFEE) +#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ + PCI_ERR_ROOT_CMD_NONFATAL_EN| \ + PCI_ERR_ROOT_CMD_FATAL_EN) +#define ERR_COR_ID(d) (d & 0xffff) +#define ERR_UNCOR_ID(d) (d >> 16) + +#define AER_ERR_STATUS_MASK (PCI_ERR_ROOT_UNCOR_RCV | \ + PCI_ERR_ROOT_COR_RCV | \ + PCI_ERR_ROOT_MULTI_COR_RCV | \ + PCI_ERR_ROOT_MULTI_UNCOR_RCV) + +static int pcie_aer_disable; +static pci_ers_result_t aer_root_reset(struct pci_dev *dev); + +void pci_no_aer(void) +{ + pcie_aer_disable = 1; +} + +bool pci_aer_available(void) +{ + return !pcie_aer_disable && pci_msi_enabled(); +} + +#ifdef CONFIG_PCIE_ECRC + +#define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ +#define ECRC_POLICY_OFF 1 /* ECRC off for performance */ +#define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ + +static int ecrc_policy = ECRC_POLICY_DEFAULT; + +static const char * const ecrc_policy_str[] = { + [ECRC_POLICY_DEFAULT] = "bios", + [ECRC_POLICY_OFF] = "off", + [ECRC_POLICY_ON] = "on" +}; + +/** + * enable_ecrc_checking - enable PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int enable_ecrc_checking(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 reg32; + + if (!aer) + return -ENODEV; + + pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); + if (reg32 & PCI_ERR_CAP_ECRC_GENC) + reg32 |= PCI_ERR_CAP_ECRC_GENE; + if (reg32 & PCI_ERR_CAP_ECRC_CHKC) + reg32 |= PCI_ERR_CAP_ECRC_CHKE; + pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * disable_ecrc_checking - disables PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int disable_ecrc_checking(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 reg32; + + if (!aer) + return -ENODEV; + + pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); + reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * @dev: the PCI device + */ +void pcie_set_ecrc_checking(struct pci_dev *dev) +{ + if (!pcie_aer_is_native(dev)) + return; + + switch (ecrc_policy) { + case ECRC_POLICY_DEFAULT: + return; + case ECRC_POLICY_OFF: + disable_ecrc_checking(dev); + break; + case ECRC_POLICY_ON: + enable_ecrc_checking(dev); + break; + default: + return; + } +} + +/** + * pcie_ecrc_get_policy - parse kernel command-line ecrc option + * @str: ECRC policy from kernel command line to use + */ +void pcie_ecrc_get_policy(char *str) +{ + int i; + + i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str); + if (i < 0) + return; + + ecrc_policy = i; +} +#endif /* CONFIG_PCIE_ECRC */ + +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) + +int pcie_aer_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + if (!dev->aer_cap) + return 0; + + return pcie_ports_native || host->native_aer; +} +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); + +static int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + int rc; + + if (!pcie_aer_is_native(dev)) + return -EIO; + + rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); + return pcibios_err_to_errno(rc); +} + +int pci_aer_clear_nonfatal_status(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 status, sev; + + if (!pcie_aer_is_native(dev)) + return -EIO; + + /* Clear status bits for ERR_NONFATAL errors only */ + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); + status &= ~sev; + if (status) + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status); + +void pci_aer_clear_fatal_status(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 status, sev; + + if (!pcie_aer_is_native(dev)) + return; + + /* Clear status bits for ERR_FATAL errors only */ + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); +} + +/** + * pci_aer_raw_clear_status - Clear AER error registers. + * @dev: the PCI device + * + * Clearing AER error status registers unconditionally, regardless of + * whether they're owned by firmware or the OS. + * + * Returns 0 on success, or negative on failure. + */ +int pci_aer_raw_clear_status(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 status; + int port_type; + + if (!aer) + return -EIO; + + port_type = pci_pcie_type(dev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT || + port_type == PCI_EXP_TYPE_RC_EC) { + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); + } + + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status); + + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} + +int pci_aer_clear_status(struct pci_dev *dev) +{ + if (!pcie_aer_is_native(dev)) + return -EIO; + + return pci_aer_raw_clear_status(dev); +} + +void pci_save_aer_state(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!aer) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); + if (!save_state) + return; + + cap = &save_state->cap.data[0]; + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++); + if (pcie_cap_has_rtctl(dev)) + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++); +} + +void pci_restore_aer_state(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!aer) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); + if (!save_state) + return; + + cap = &save_state->cap.data[0]; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++); + pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++); + if (pcie_cap_has_rtctl(dev)) + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++); +} + +void pci_aer_init(struct pci_dev *dev) +{ + int n; + + dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + if (!dev->aer_cap) + return; + + dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); + + /* + * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, + * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event + * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec + * 7.8.4). + */ + n = pcie_cap_has_rtctl(dev) ? 5 : 4; + pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); + + pci_aer_clear_status(dev); + + if (pci_aer_available()) + pci_enable_pcie_error_reporting(dev); + + pcie_set_ecrc_checking(dev); +} + +void pci_aer_exit(struct pci_dev *dev) +{ + kfree(dev->aer_stats); + dev->aer_stats = NULL; +} + +#define AER_AGENT_RECEIVER 0 +#define AER_AGENT_REQUESTER 1 +#define AER_AGENT_COMPLETER 2 +#define AER_AGENT_TRANSMITTER 3 + +#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) +#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : PCI_ERR_UNC_COMP_ABORT) +#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) + +#define AER_GET_AGENT(t, e) \ + ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ + (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ + (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ + AER_AGENT_RECEIVER) + +#define AER_PHYSICAL_LAYER_ERROR 0 +#define AER_DATA_LINK_LAYER_ERROR 1 +#define AER_TRANSACTION_LAYER_ERROR 2 + +#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + PCI_ERR_COR_RCVR : 0) +#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_BAD_TLP| \ + PCI_ERR_COR_BAD_DLLP| \ + PCI_ERR_COR_REP_ROLL| \ + PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) + +#define AER_GET_LAYER_ERROR(t, e) \ + ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ + (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ + AER_TRANSACTION_LAYER_ERROR) + +/* + * AER error strings + */ +static const char *aer_error_severity_string[] = { + "Uncorrected (Non-Fatal)", + "Uncorrected (Fatal)", + "Corrected" +}; + +static const char *aer_error_layer[] = { + "Physical Layer", + "Data Link Layer", + "Transaction Layer" +}; + +static const char *aer_correctable_error_string[] = { + "RxErr", /* Bit Position 0 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "BadTLP", /* Bit Position 6 */ + "BadDLLP", /* Bit Position 7 */ + "Rollover", /* Bit Position 8 */ + NULL, + NULL, + NULL, + "Timeout", /* Bit Position 12 */ + "NonFatalErr", /* Bit Position 13 */ + "CorrIntErr", /* Bit Position 14 */ + "HeaderOF", /* Bit Position 15 */ + NULL, /* Bit Position 16 */ + NULL, /* Bit Position 17 */ + NULL, /* Bit Position 18 */ + NULL, /* Bit Position 19 */ + NULL, /* Bit Position 20 */ + NULL, /* Bit Position 21 */ + NULL, /* Bit Position 22 */ + NULL, /* Bit Position 23 */ + NULL, /* Bit Position 24 */ + NULL, /* Bit Position 25 */ + NULL, /* Bit Position 26 */ + NULL, /* Bit Position 27 */ + NULL, /* Bit Position 28 */ + NULL, /* Bit Position 29 */ + NULL, /* Bit Position 30 */ + NULL, /* Bit Position 31 */ +}; + +static const char *aer_uncorrectable_error_string[] = { + "Undefined", /* Bit Position 0 */ + NULL, + NULL, + NULL, + "DLP", /* Bit Position 4 */ + "SDES", /* Bit Position 5 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "TLP", /* Bit Position 12 */ + "FCP", /* Bit Position 13 */ + "CmpltTO", /* Bit Position 14 */ + "CmpltAbrt", /* Bit Position 15 */ + "UnxCmplt", /* Bit Position 16 */ + "RxOF", /* Bit Position 17 */ + "MalfTLP", /* Bit Position 18 */ + "ECRC", /* Bit Position 19 */ + "UnsupReq", /* Bit Position 20 */ + "ACSViol", /* Bit Position 21 */ + "UncorrIntErr", /* Bit Position 22 */ + "BlockedTLP", /* Bit Position 23 */ + "AtomicOpBlocked", /* Bit Position 24 */ + "TLPBlockedErr", /* Bit Position 25 */ + "PoisonTLPBlocked", /* Bit Position 26 */ + NULL, /* Bit Position 27 */ + NULL, /* Bit Position 28 */ + NULL, /* Bit Position 29 */ + NULL, /* Bit Position 30 */ + NULL, /* Bit Position 31 */ +}; + +static const char *aer_agent_string[] = { + "Receiver ID", + "Requester ID", + "Completer ID", + "Transmitter ID" +}; + +#define aer_stats_dev_attr(name, stats_array, strings_array, \ + total_string, total_field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + unsigned int i; \ + struct pci_dev *pdev = to_pci_dev(dev); \ + u64 *stats = pdev->aer_stats->stats_array; \ + size_t len = 0; \ + \ + for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\ + if (strings_array[i]) \ + len += sysfs_emit_at(buf, len, "%s %llu\n", \ + strings_array[i], \ + stats[i]); \ + else if (stats[i]) \ + len += sysfs_emit_at(buf, len, \ + #stats_array "_bit[%d] %llu\n",\ + i, stats[i]); \ + } \ + len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \ + pdev->aer_stats->total_field); \ + return len; \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, + aer_correctable_error_string, "ERR_COR", + dev_total_cor_errs); +aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, + aer_uncorrectable_error_string, "ERR_FATAL", + dev_total_fatal_errs); +aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, + aer_uncorrectable_error_string, "ERR_NONFATAL", + dev_total_nonfatal_errs); + +#define aer_stats_rootport_attr(name, field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct pci_dev *pdev = to_pci_dev(dev); \ + return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_rootport_attr(aer_rootport_total_err_cor, + rootport_total_cor_errs); +aer_stats_rootport_attr(aer_rootport_total_err_fatal, + rootport_total_fatal_errs); +aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, + rootport_total_nonfatal_errs); + +static struct attribute *aer_stats_attrs[] __ro_after_init = { + &dev_attr_aer_dev_correctable.attr, + &dev_attr_aer_dev_fatal.attr, + &dev_attr_aer_dev_nonfatal.attr, + &dev_attr_aer_rootport_total_err_cor.attr, + &dev_attr_aer_rootport_total_err_fatal.attr, + &dev_attr_aer_rootport_total_err_nonfatal.attr, + NULL +}; + +static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pdev->aer_stats) + return 0; + + if ((a == &dev_attr_aer_rootport_total_err_cor.attr || + a == &dev_attr_aer_rootport_total_err_fatal.attr || + a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && + ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC))) + return 0; + + return a->mode; +} + +const struct attribute_group aer_stats_attr_group = { + .attrs = aer_stats_attrs, + .is_visible = aer_stats_attrs_are_visible, +}; + +static void pci_dev_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_info *info) +{ + unsigned long status = info->status & ~info->mask; + int i, max = -1; + u64 *counter = NULL; + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + switch (info->severity) { + case AER_CORRECTABLE: + aer_stats->dev_total_cor_errs++; + counter = &aer_stats->dev_cor_errs[0]; + max = AER_MAX_TYPEOF_COR_ERRS; + break; + case AER_NONFATAL: + aer_stats->dev_total_nonfatal_errs++; + counter = &aer_stats->dev_nonfatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + case AER_FATAL: + aer_stats->dev_total_fatal_errs++; + counter = &aer_stats->dev_fatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + } + + for_each_set_bit(i, &status, max) + counter[i]++; +} + +static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_source *e_src) +{ + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + if (e_src->status & PCI_ERR_ROOT_COR_RCV) + aer_stats->rootport_total_cor_errs++; + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + aer_stats->rootport_total_fatal_errs++; + else + aer_stats->rootport_total_nonfatal_errs++; + } +} + +static void __print_tlp_header(struct pci_dev *dev, + struct aer_header_log_regs *t) +{ + pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", + t->dw0, t->dw1, t->dw2, t->dw3); +} + +static void __aer_print_error(struct pci_dev *dev, + struct aer_err_info *info) +{ + const char **strings; + unsigned long status = info->status & ~info->mask; + const char *level, *errmsg; + int i; + + if (info->severity == AER_CORRECTABLE) { + strings = aer_correctable_error_string; + level = KERN_WARNING; + } else { + strings = aer_uncorrectable_error_string; + level = KERN_ERR; + } + + for_each_set_bit(i, &status, 32) { + errmsg = strings[i]; + if (!errmsg) + errmsg = "Unknown Error Bit"; + + pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, + info->first_error == i ? " (First)" : ""); + } + pci_dev_aer_stats_incr(dev, info); +} + +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +{ + int layer, agent; + int id = pci_dev_id(dev); + const char *level; + + if (!info->status) { + pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + aer_error_severity_string[info->severity]); + goto out; + } + + layer = AER_GET_LAYER_ERROR(info->severity, info->status); + agent = AER_GET_AGENT(info->severity, info->status); + + level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; + + pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); + + pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, info->status, info->mask); + + __aer_print_error(dev, info); + + if (info->tlp_header_valid) + __print_tlp_header(dev, &info->tlp); + +out: + if (info->id && info->error_dev_num > 1 && info->id == id) + pci_err(dev, " Error of this Agent is reported first\n"); + + trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), + info->severity, info->tlp_header_valid, &info->tlp); +} + +static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) +{ + u8 bus = info->id >> 8; + u8 devfn = info->id & 0xff; + + pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", + info->multi_error_valid ? "Multiple " : "", + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), + PCI_FUNC(devfn)); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER +int cper_severity_to_aer(int cper_severity) +{ + switch (cper_severity) { + case CPER_SEV_RECOVERABLE: + return AER_NONFATAL; + case CPER_SEV_FATAL: + return AER_FATAL; + default: + return AER_CORRECTABLE; + } +} +EXPORT_SYMBOL_GPL(cper_severity_to_aer); + +void cper_print_aer(struct pci_dev *dev, int aer_severity, + struct aer_capability_regs *aer) +{ + int layer, agent, tlp_header_valid = 0; + u32 status, mask; + struct aer_err_info info; + + if (aer_severity == AER_CORRECTABLE) { + status = aer->cor_status; + mask = aer->cor_mask; + } else { + status = aer->uncor_status; + mask = aer->uncor_mask; + tlp_header_valid = status & AER_LOG_TLP_MASKS; + } + + layer = AER_GET_LAYER_ERROR(aer_severity, status); + agent = AER_GET_AGENT(aer_severity, status); + + memset(&info, 0, sizeof(info)); + info.severity = aer_severity; + info.status = status; + info.mask = mask; + info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); + + pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); + __aer_print_error(dev, &info); + pci_err(dev, "aer_layer=%s, aer_agent=%s\n", + aer_error_layer[layer], aer_agent_string[agent]); + + if (aer_severity != AER_CORRECTABLE) + pci_err(dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); + + if (tlp_header_valid) + __print_tlp_header(dev, &aer->header_log); + + trace_aer_event(dev_name(&dev->dev), (status & ~mask), + aer_severity, tlp_header_valid, &aer->header_log); +} +#endif + +/** + * add_error_device - list device to be handled + * @e_info: pointer to error info + * @dev: pointer to pci_dev to be added + */ +static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) +{ + if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { + e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); + e_info->error_dev_num++; + return 0; + } + return -ENOSPC; +} + +/** + * is_error_source - check whether the device is source of reported error + * @dev: pointer to pci_dev to be checked + * @e_info: pointer to reported error info + */ +static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) +{ + int aer = dev->aer_cap; + u32 status, mask; + u16 reg16; + + /* + * When bus id is equal to 0, it might be a bad id + * reported by root port. + */ + if ((PCI_BUS_NUM(e_info->id) != 0) && + !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { + /* Device ID match? */ + if (e_info->id == pci_dev_id(dev)) + return true; + + /* Continue id comparing if there is no multiple error */ + if (!e_info->multi_error_valid) + return false; + } + + /* + * When either + * 1) bus id is equal to 0. Some ports might lose the bus + * id of error source id; + * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 3) There are multiple errors and prior ID comparing fails; + * We check AER status registers to find possible reporter. + */ + if (atomic_read(&dev->enable_cnt) == 0) + return false; + + /* Check if AER is enabled */ + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); + if (!(reg16 & PCI_EXP_AER_FLAGS)) + return false; + + if (!aer) + return false; + + /* Check if error is recorded */ + if (e_info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); + } else { + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); + } + if (status & ~mask) + return true; + + return false; +} + +static int find_device_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *e_info = (struct aer_err_info *)data; + + if (is_error_source(dev, e_info)) { + /* List this device */ + if (add_error_device(e_info, dev)) { + /* We cannot handle more... Stop iteration */ + /* TODO: Should print error message here? */ + return 1; + } + + /* If there is only a single error, stop iteration */ + if (!e_info->multi_error_valid) + return 1; + } + return 0; +} + +/** + * find_source_device - search through device hierarchy for source device + * @parent: pointer to Root Port pci_dev data structure + * @e_info: including detailed error information such like id + * + * Return true if found. + * + * Invoked by DPC when error is detected at the Root Port. + * Caller of this function must set id, severity, and multi_error_valid of + * struct aer_err_info pointed by @e_info properly. This function must fill + * e_info->error_dev_num and e_info->dev[], based on the given information. + */ +static bool find_source_device(struct pci_dev *parent, + struct aer_err_info *e_info) +{ + struct pci_dev *dev = parent; + int result; + + /* Must reset in this function */ + e_info->error_dev_num = 0; + + /* Is Root Port an agent that sends error message? */ + result = find_device_iter(dev, e_info); + if (result) + return true; + + if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(parent, find_device_iter, e_info); + else + pci_walk_bus(parent->subordinate, find_device_iter, e_info); + + if (!e_info->error_dev_num) { + pci_info(parent, "can't find device of ID%04x\n", e_info->id); + return false; + } + return true; +} + +/** + * handle_error_source - handle logging error into an event log + * @dev: pointer to pci_dev data structure of error source device + * @info: comprehensive error information + * + * Invoked when an error being detected by Root Port. + */ +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + int aer = dev->aer_cap; + + if (info->severity == AER_CORRECTABLE) { + /* + * Correctable error does not need software intervention. + * No need to go through error recovery process. + */ + if (aer) + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, + info->status); + if (pcie_aer_is_native(dev)) { + struct pci_driver *pdrv = dev->driver; + + if (pdrv && pdrv->err_handler && + pdrv->err_handler->cor_error_detected) + pdrv->err_handler->cor_error_detected(dev); + pcie_clear_device_status(dev); + } + } else if (info->severity == AER_NONFATAL) + pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); + else if (info->severity == AER_FATAL) + pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); + pci_dev_put(dev); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER + +#define AER_RECOVER_RING_SIZE 16 + +struct aer_recover_entry { + u8 bus; + u8 devfn; + u16 domain; + int severity; + struct aer_capability_regs *regs; +}; + +static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, + AER_RECOVER_RING_SIZE); + +static void aer_recover_work_func(struct work_struct *work) +{ + struct aer_recover_entry entry; + struct pci_dev *pdev; + + while (kfifo_get(&aer_recover_ring, &entry)) { + pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, + entry.devfn); + if (!pdev) { + pr_err("no pci_dev for %04x:%02x:%02x.%x\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + continue; + } + cper_print_aer(pdev, entry.severity, entry.regs); + /* + * Memory for aer_capability_regs(entry.regs) is being allocated from the + * ghes_estatus_pool to protect it from overwriting when multiple sections + * are present in the error status. Thus free the same after processing + * the data. + */ + ghes_estatus_pool_region_free((unsigned long)entry.regs, + sizeof(struct aer_capability_regs)); + + if (entry.severity == AER_NONFATAL) + pcie_do_recovery(pdev, pci_channel_io_normal, + aer_root_reset); + else if (entry.severity == AER_FATAL) + pcie_do_recovery(pdev, pci_channel_io_frozen, + aer_root_reset); + pci_dev_put(pdev); + } +} + +/* + * Mutual exclusion for writers of aer_recover_ring, reader side don't + * need lock, because there is only one reader and lock is not needed + * between reader and writer. + */ +static DEFINE_SPINLOCK(aer_recover_ring_lock); +static DECLARE_WORK(aer_recover_work, aer_recover_work_func); + +void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, + int severity, struct aer_capability_regs *aer_regs) +{ + struct aer_recover_entry entry = { + .bus = bus, + .devfn = devfn, + .domain = domain, + .severity = severity, + .regs = aer_regs, + }; + + if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, + &aer_recover_ring_lock)) + schedule_work(&aer_recover_work); + else + pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n", + domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); +} +EXPORT_SYMBOL_GPL(aer_recover_queue); +#endif + +/** + * aer_get_device_error_info - read error status from dev and store it to info + * @dev: pointer to the device expected to have a error record + * @info: pointer to structure to store the error record + * + * Return 1 on success, 0 on error. + * + * Note that @info is reused among all error devices. Clear fields properly. + */ +int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +{ + int type = pci_pcie_type(dev); + int aer = dev->aer_cap; + int temp; + + /* Must reset in this function */ + info->status = 0; + info->tlp_header_valid = 0; + + /* The device might not support AER */ + if (!aer) + return 0; + + if (info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, + &info->status); + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + } else if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_DOWNSTREAM || + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, + &info->status); + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + + /* Get First Error Pointer */ + pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); + info->first_error = PCI_ERR_CAP_FEP(temp); + + if (info->status & AER_LOG_TLP_MASKS) { + info->tlp_header_valid = 1; + pci_read_config_dword(dev, + aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + pci_read_config_dword(dev, + aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + pci_read_config_dword(dev, + aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + pci_read_config_dword(dev, + aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + } + } + + return 1; +} + +static inline void aer_process_err_devices(struct aer_err_info *e_info) +{ + int i; + + /* Report all before handle them, not to lost records by reset etc. */ + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (aer_get_device_error_info(e_info->dev[i], e_info)) + aer_print_error(e_info->dev[i], e_info); + } + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (aer_get_device_error_info(e_info->dev[i], e_info)) + handle_error_source(e_info->dev[i], e_info); + } +} + +/** + * aer_isr_one_error - consume an error detected by root port + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to an error source + */ +static void aer_isr_one_error(struct aer_rpc *rpc, + struct aer_err_source *e_src) +{ + struct pci_dev *pdev = rpc->rpd; + struct aer_err_info e_info; + + pci_rootport_aer_stats_incr(pdev, e_src); + + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + if (e_src->status & PCI_ERR_ROOT_COR_RCV) { + e_info.id = ERR_COR_ID(e_src->id); + e_info.severity = AER_CORRECTABLE; + + if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) + e_info.multi_error_valid = 1; + else + e_info.multi_error_valid = 0; + aer_print_port_info(pdev, &e_info); + + if (find_source_device(pdev, &e_info)) + aer_process_err_devices(&e_info); + } + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + e_info.id = ERR_UNCOR_ID(e_src->id); + + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + e_info.severity = AER_FATAL; + else + e_info.severity = AER_NONFATAL; + + if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) + e_info.multi_error_valid = 1; + else + e_info.multi_error_valid = 0; + + aer_print_port_info(pdev, &e_info); + + if (find_source_device(pdev, &e_info)) + aer_process_err_devices(&e_info); + } +} + +/** + * aer_isr - consume errors detected by root port + * @irq: IRQ assigned to Root Port + * @context: pointer to Root Port data structure + * + * Invoked, as DPC, when root port records new detected error + */ +static irqreturn_t aer_isr(int irq, void *context) +{ + struct pcie_device *dev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(dev); + struct aer_err_source e_src; + + if (kfifo_is_empty(&rpc->aer_fifo)) + return IRQ_NONE; + + while (kfifo_get(&rpc->aer_fifo, &e_src)) + aer_isr_one_error(rpc, &e_src); + return IRQ_HANDLED; +} + +/** + * aer_irq - Root Port's ISR + * @irq: IRQ assigned to Root Port + * @context: pointer to Root Port data structure + * + * Invoked when Root Port detects AER messages. + */ +static irqreturn_t aer_irq(int irq, void *context) +{ + struct pcie_device *pdev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(pdev); + struct pci_dev *rp = rpc->rpd; + int aer = rp->aer_cap; + struct aer_err_source e_src = {}; + + pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); + if (!(e_src.status & AER_ERR_STATUS_MASK)) + return IRQ_NONE; + + pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); + pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status); + + if (!kfifo_put(&rpc->aer_fifo, e_src)) + return IRQ_HANDLED; + + return IRQ_WAKE_THREAD; +} + +/** + * aer_enable_rootport - enable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus loads AER service driver. + */ +static void aer_enable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + int aer = pdev->aer_cap; + u16 reg16; + u32 reg32; + + /* Clear PCIe Capability's Device Status */ + pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); + + /* Disable system error generation in response to error messages */ + pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, + SYSTEM_ERROR_INTR_ON_MESG_MASK); + + /* Clear error status */ + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); +} + +/** + * aer_disable_rootport - disable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus unloads AER service driver. + */ +static void aer_disable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + int aer = pdev->aer_cap; + u32 reg32; + + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); + + /* Clear Root's error status reg */ + pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); +} + +/** + * aer_remove - clean up resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus unloads or AER probe fails. + */ +static void aer_remove(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + aer_disable_rootport(rpc); +} + +/** + * aer_probe - initialize resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus loads AER service driver. + */ +static int aer_probe(struct pcie_device *dev) +{ + int status; + struct aer_rpc *rpc; + struct device *device = &dev->device; + struct pci_dev *port = dev->port; + + BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) < + AER_MAX_TYPEOF_COR_ERRS); + BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) < + AER_MAX_TYPEOF_UNCOR_ERRS); + + /* Limit to Root Ports or Root Complex Event Collectors */ + if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && + (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) + return -ENODEV; + + rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); + if (!rpc) + return -ENOMEM; + + rpc->rpd = port; + INIT_KFIFO(rpc->aer_fifo); + set_service_data(dev, rpc); + + status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, + IRQF_SHARED, "aerdrv", dev); + if (status) { + pci_err(port, "request AER IRQ %d failed\n", dev->irq); + return status; + } + + aer_enable_rootport(rpc); + pci_info(port, "enabled with IRQ %d\n", dev->irq); + return 0; +} + +/** + * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP + * @dev: pointer to Root Port, RCEC, or RCiEP + * + * Invoked by Port Bus driver when performing reset. + */ +static pci_ers_result_t aer_root_reset(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + struct pci_dev *root; + int aer; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + u32 reg32; + int rc; + + /* + * Only Root Ports and RCECs have AER Root Command and Root Status + * registers. If "dev" is an RCiEP, the relevant registers are in + * the RCEC. + */ + if (type == PCI_EXP_TYPE_RC_END) + root = dev->rcec; + else + root = pcie_find_root_port(dev); + + /* + * If the platform retained control of AER, an RCiEP may not have + * an RCEC visible to us, so dev->rcec ("root") may be NULL. In + * that case, firmware is responsible for these registers. + */ + aer = root ? root->aer_cap : 0; + + if ((host->native_aer || pcie_ports_native) && aer) { + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + + if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { + rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET); + if (!rc) + pci_info(dev, "has been reset\n"); + else + pci_info(dev, "not reset (no FLR support: %d)\n", rc); + } else { + rc = pci_bus_error_reset(dev); + pci_info(dev, "%s Port link has been reset (%d)\n", + pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc); + } + + if ((host->native_aer || pcie_ports_native) && aer) { + /* Clear Root Error Status */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +} + +static struct pcie_port_service_driver aerdriver = { + .name = "aer", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, + + .probe = aer_probe, + .remove = aer_remove, +}; + +/** + * pcie_aer_init - register AER root service driver + * + * Invoked when AER root service driver is loaded. + */ +int __init pcie_aer_init(void) +{ + if (!pci_aer_available()) + return -ENXIO; + return pcie_port_service_register(&aerdriver); +} diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c new file mode 100644 index 0000000000..2dab275d25 --- /dev/null +++ b/drivers/pci/pcie/aer_inject.c @@ -0,0 +1,548 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe AER software error injection support. + * + * Debugging PCIe AER code is quite difficult because it is hard to + * trigger various real hardware errors. Software based error + * injection can fake almost all kinds of errors with the help of a + * user space helper tool aer-inject, which can be gotten from: + * https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + * + * Copyright 2009 Intel Corporation. + * Huang Ying + */ + +#define dev_fmt(fmt) "aer_inject: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "portdrv.h" + +/* Override the existing corrected and uncorrected error masks */ +static bool aer_mask_override; +module_param(aer_mask_override, bool, 0); + +struct aer_error_inj { + u8 bus; + u8 dev; + u8 fn; + u32 uncor_status; + u32 cor_status; + u32 header_log0; + u32 header_log1; + u32 header_log2; + u32 header_log3; + u32 domain; +}; + +struct aer_error { + struct list_head list; + u32 domain; + unsigned int bus; + unsigned int devfn; + int pos_cap_err; + + u32 uncor_status; + u32 cor_status; + u32 header_log0; + u32 header_log1; + u32 header_log2; + u32 header_log3; + u32 root_status; + u32 source_id; +}; + +struct pci_bus_ops { + struct list_head list; + struct pci_bus *bus; + struct pci_ops *ops; +}; + +static LIST_HEAD(einjected); + +static LIST_HEAD(pci_bus_ops_list); + +/* Protect einjected and pci_bus_ops_list */ +static DEFINE_SPINLOCK(inject_lock); + +static void aer_error_init(struct aer_error *err, u32 domain, + unsigned int bus, unsigned int devfn, + int pos_cap_err) +{ + INIT_LIST_HEAD(&err->list); + err->domain = domain; + err->bus = bus; + err->devfn = devfn; + err->pos_cap_err = pos_cap_err; +} + +/* inject_lock must be held before calling */ +static struct aer_error *__find_aer_error(u32 domain, unsigned int bus, + unsigned int devfn) +{ + struct aer_error *err; + + list_for_each_entry(err, &einjected, list) { + if (domain == err->domain && + bus == err->bus && + devfn == err->devfn) + return err; + } + return NULL; +} + +/* inject_lock must be held before calling */ +static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) +{ + int domain = pci_domain_nr(dev->bus); + if (domain < 0) + return NULL; + return __find_aer_error(domain, dev->bus->number, dev->devfn); +} + +/* inject_lock must be held before calling */ +static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) +{ + struct pci_bus_ops *bus_ops; + + list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { + if (bus_ops->bus == bus) + return bus_ops->ops; + } + return NULL; +} + +static struct pci_bus_ops *pci_bus_ops_pop(void) +{ + unsigned long flags; + struct pci_bus_ops *bus_ops; + + spin_lock_irqsave(&inject_lock, flags); + bus_ops = list_first_entry_or_null(&pci_bus_ops_list, + struct pci_bus_ops, list); + if (bus_ops) + list_del(&bus_ops->list); + spin_unlock_irqrestore(&inject_lock, flags); + return bus_ops; +} + +static u32 *find_pci_config_dword(struct aer_error *err, int where, + int *prw1cs) +{ + int rw1cs = 0; + u32 *target = NULL; + + if (err->pos_cap_err == -1) + return NULL; + + switch (where - err->pos_cap_err) { + case PCI_ERR_UNCOR_STATUS: + target = &err->uncor_status; + rw1cs = 1; + break; + case PCI_ERR_COR_STATUS: + target = &err->cor_status; + rw1cs = 1; + break; + case PCI_ERR_HEADER_LOG: + target = &err->header_log0; + break; + case PCI_ERR_HEADER_LOG+4: + target = &err->header_log1; + break; + case PCI_ERR_HEADER_LOG+8: + target = &err->header_log2; + break; + case PCI_ERR_HEADER_LOG+12: + target = &err->header_log3; + break; + case PCI_ERR_ROOT_STATUS: + target = &err->root_status; + rw1cs = 1; + break; + case PCI_ERR_ROOT_ERR_SRC: + target = &err->source_id; + break; + } + if (prw1cs) + *prw1cs = rw1cs; + return target; +} + +static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *val) +{ + struct pci_ops *ops, *my_ops; + int rv; + + ops = __find_pci_bus_ops(bus); + if (!ops) + return -1; + + my_ops = bus->ops; + bus->ops = ops; + rv = ops->read(bus, devfn, where, size, val); + bus->ops = my_ops; + + return rv; +} + +static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 val) +{ + struct pci_ops *ops, *my_ops; + int rv; + + ops = __find_pci_bus_ops(bus); + if (!ops) + return -1; + + my_ops = bus->ops; + bus->ops = ops; + rv = ops->write(bus, devfn, where, size, val); + bus->ops = my_ops; + + return rv; +} + +static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + u32 *sim; + struct aer_error *err; + unsigned long flags; + int domain; + int rv; + + spin_lock_irqsave(&inject_lock, flags); + if (size != sizeof(u32)) + goto out; + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error(domain, bus->number, devfn); + if (!err) + goto out; + + sim = find_pci_config_dword(err, where, NULL); + if (sim) { + *val = *sim; + spin_unlock_irqrestore(&inject_lock, flags); + return 0; + } +out: + rv = aer_inj_read(bus, devfn, where, size, val); + spin_unlock_irqrestore(&inject_lock, flags); + return rv; +} + +static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + u32 *sim; + struct aer_error *err; + unsigned long flags; + int rw1cs; + int domain; + int rv; + + spin_lock_irqsave(&inject_lock, flags); + if (size != sizeof(u32)) + goto out; + domain = pci_domain_nr(bus); + if (domain < 0) + goto out; + err = __find_aer_error(domain, bus->number, devfn); + if (!err) + goto out; + + sim = find_pci_config_dword(err, where, &rw1cs); + if (sim) { + if (rw1cs) + *sim ^= val; + else + *sim = val; + spin_unlock_irqrestore(&inject_lock, flags); + return 0; + } +out: + rv = aer_inj_write(bus, devfn, where, size, val); + spin_unlock_irqrestore(&inject_lock, flags); + return rv; +} + +static struct pci_ops aer_inj_pci_ops = { + .read = aer_inj_read_config, + .write = aer_inj_write_config, +}; + +static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, + struct pci_bus *bus, + struct pci_ops *ops) +{ + INIT_LIST_HEAD(&bus_ops->list); + bus_ops->bus = bus; + bus_ops->ops = ops; +} + +static int pci_bus_set_aer_ops(struct pci_bus *bus) +{ + struct pci_ops *ops; + struct pci_bus_ops *bus_ops; + unsigned long flags; + + bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + ops = pci_bus_set_ops(bus, &aer_inj_pci_ops); + spin_lock_irqsave(&inject_lock, flags); + if (ops == &aer_inj_pci_ops) + goto out; + pci_bus_ops_init(bus_ops, bus, ops); + list_add(&bus_ops->list, &pci_bus_ops_list); + bus_ops = NULL; +out: + spin_unlock_irqrestore(&inject_lock, flags); + kfree(bus_ops); + return 0; +} + +static int aer_inject(struct aer_error_inj *einj) +{ + struct aer_error *err, *rperr; + struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; + struct pci_dev *dev, *rpdev; + struct pcie_device *edev; + struct device *device; + unsigned long flags; + unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); + int pos_cap_err, rp_pos_cap_err; + u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; + int ret = 0; + + dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn); + if (!dev) + return -ENODEV; + rpdev = pcie_find_root_port(dev); + /* If Root Port not found, try to find an RCEC */ + if (!rpdev) + rpdev = dev->rcec; + if (!rpdev) { + pci_err(dev, "Neither Root Port nor RCEC found\n"); + ret = -ENODEV; + goto out_put; + } + + pos_cap_err = dev->aer_cap; + if (!pos_cap_err) { + pci_err(dev, "Device doesn't support AER\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + &uncor_mask); + + rp_pos_cap_err = rpdev->aer_cap; + if (!rp_pos_cap_err) { + pci_err(rpdev, "Root port doesn't support AER\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + + err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); + if (!err_alloc) { + ret = -ENOMEM; + goto out_put; + } + rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); + if (!rperr_alloc) { + ret = -ENOMEM; + goto out_put; + } + + if (aer_mask_override) { + cor_mask_orig = cor_mask; + cor_mask &= !(einj->cor_status); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, + cor_mask); + + uncor_mask_orig = uncor_mask; + uncor_mask &= !(einj->uncor_status); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + uncor_mask); + } + + spin_lock_irqsave(&inject_lock, flags); + + err = __find_aer_error_by_dev(dev); + if (!err) { + err = err_alloc; + err_alloc = NULL; + aer_error_init(err, einj->domain, einj->bus, devfn, + pos_cap_err); + list_add(&err->list, &einjected); + } + err->uncor_status |= einj->uncor_status; + err->cor_status |= einj->cor_status; + err->header_log0 = einj->header_log0; + err->header_log1 = einj->header_log1; + err->header_log2 = einj->header_log2; + err->header_log3 = einj->header_log3; + + if (!aer_mask_override && einj->cor_status && + !(einj->cor_status & ~cor_mask)) { + ret = -EINVAL; + pci_warn(dev, "The correctable error(s) is masked by device\n"); + spin_unlock_irqrestore(&inject_lock, flags); + goto out_put; + } + if (!aer_mask_override && einj->uncor_status && + !(einj->uncor_status & ~uncor_mask)) { + ret = -EINVAL; + pci_warn(dev, "The uncorrectable error(s) is masked by device\n"); + spin_unlock_irqrestore(&inject_lock, flags); + goto out_put; + } + + rperr = __find_aer_error_by_dev(rpdev); + if (!rperr) { + rperr = rperr_alloc; + rperr_alloc = NULL; + aer_error_init(rperr, pci_domain_nr(rpdev->bus), + rpdev->bus->number, rpdev->devfn, + rp_pos_cap_err); + list_add(&rperr->list, &einjected); + } + if (einj->cor_status) { + if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) + rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; + else + rperr->root_status |= PCI_ERR_ROOT_COR_RCV; + rperr->source_id &= 0xffff0000; + rperr->source_id |= (einj->bus << 8) | devfn; + } + if (einj->uncor_status) { + if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) + rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; + if (sever & einj->uncor_status) { + rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; + if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) + rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; + } else + rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; + rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; + rperr->source_id &= 0x0000ffff; + rperr->source_id |= ((einj->bus << 8) | devfn) << 16; + } + spin_unlock_irqrestore(&inject_lock, flags); + + if (aer_mask_override) { + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, + cor_mask_orig); + pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, + uncor_mask_orig); + } + + ret = pci_bus_set_aer_ops(dev->bus); + if (ret) + goto out_put; + ret = pci_bus_set_aer_ops(rpdev->bus); + if (ret) + goto out_put; + + device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER); + if (device) { + edev = to_pcie_device(device); + if (!get_service_data(edev)) { + pci_warn(edev->port, "AER service is not initialized\n"); + ret = -EPROTONOSUPPORT; + goto out_put; + } + pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n", + einj->cor_status, einj->uncor_status, pci_name(dev)); + ret = irq_inject_interrupt(edev->irq); + } else { + pci_err(rpdev, "AER device not found\n"); + ret = -ENODEV; + } +out_put: + kfree(err_alloc); + kfree(rperr_alloc); + pci_dev_put(dev); + return ret; +} + +static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off) +{ + struct aer_error_inj einj; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (usize < offsetof(struct aer_error_inj, domain) || + usize > sizeof(einj)) + return -EINVAL; + + memset(&einj, 0, sizeof(einj)); + if (copy_from_user(&einj, ubuf, usize)) + return -EFAULT; + + ret = aer_inject(&einj); + return ret ? ret : usize; +} + +static const struct file_operations aer_inject_fops = { + .write = aer_inject_write, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice aer_inject_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "aer_inject", + .fops = &aer_inject_fops, +}; + +static int __init aer_inject_init(void) +{ + return misc_register(&aer_inject_device); +} + +static void __exit aer_inject_exit(void) +{ + struct aer_error *err, *err_next; + unsigned long flags; + struct pci_bus_ops *bus_ops; + + misc_deregister(&aer_inject_device); + + while ((bus_ops = pci_bus_ops_pop())) { + pci_bus_set_ops(bus_ops->bus, bus_ops->ops); + kfree(bus_ops); + } + + spin_lock_irqsave(&inject_lock, flags); + list_for_each_entry_safe(err, err_next, &einjected, list) { + list_del(&err->list); + kfree(err); + } + spin_unlock_irqrestore(&inject_lock, flags); +} + +module_init(aer_inject_init); +module_exit(aer_inject_exit); + +MODULE_DESCRIPTION("PCIe AER software error injector"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c new file mode 100644 index 0000000000..7e3b342215 --- /dev/null +++ b/drivers/pci/pcie/aspm.c @@ -0,0 +1,1440 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Enable PCIe link L0s/L1 state and Clock Power Management + * + * Copyright (C) 2007 Intel + * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com) + * Copyright (C) Shaohua Li (shaohua.li@intel.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../pci.h" + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "pcie_aspm." + +/* Note: those are not register definitions */ +#define ASPM_STATE_L0S_UP (1) /* Upstream direction L0s state */ +#define ASPM_STATE_L0S_DW (2) /* Downstream direction L0s state */ +#define ASPM_STATE_L1 (4) /* L1 state */ +#define ASPM_STATE_L1_1 (8) /* ASPM L1.1 state */ +#define ASPM_STATE_L1_2 (0x10) /* ASPM L1.2 state */ +#define ASPM_STATE_L1_1_PCIPM (0x20) /* PCI PM L1.1 state */ +#define ASPM_STATE_L1_2_PCIPM (0x40) /* PCI PM L1.2 state */ +#define ASPM_STATE_L1_SS_PCIPM (ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM) +#define ASPM_STATE_L1_2_MASK (ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM) +#define ASPM_STATE_L1SS (ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\ + ASPM_STATE_L1_2_MASK) +#define ASPM_STATE_L0S (ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW) +#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \ + ASPM_STATE_L1SS) + +struct pcie_link_state { + struct pci_dev *pdev; /* Upstream component of the Link */ + struct pci_dev *downstream; /* Downstream component, function 0 */ + struct pcie_link_state *root; /* pointer to the root port link */ + struct pcie_link_state *parent; /* pointer to the parent Link state */ + struct list_head sibling; /* node in link_list */ + + /* ASPM state */ + u32 aspm_support:7; /* Supported ASPM state */ + u32 aspm_enabled:7; /* Enabled ASPM state */ + u32 aspm_capable:7; /* Capable ASPM state with latency */ + u32 aspm_default:7; /* Default ASPM state by BIOS */ + u32 aspm_disable:7; /* Disabled ASPM state */ + + /* Clock PM state */ + u32 clkpm_capable:1; /* Clock PM capable? */ + u32 clkpm_enabled:1; /* Current Clock PM state */ + u32 clkpm_default:1; /* Default Clock PM state by BIOS */ + u32 clkpm_disable:1; /* Clock PM disabled */ +}; + +static int aspm_disabled, aspm_force; +static bool aspm_support_enabled = true; +static DEFINE_MUTEX(aspm_lock); +static LIST_HEAD(link_list); + +#define POLICY_DEFAULT 0 /* BIOS default setting */ +#define POLICY_PERFORMANCE 1 /* high performance */ +#define POLICY_POWERSAVE 2 /* high power saving */ +#define POLICY_POWER_SUPERSAVE 3 /* possibly even more power saving */ + +#ifdef CONFIG_PCIEASPM_PERFORMANCE +static int aspm_policy = POLICY_PERFORMANCE; +#elif defined CONFIG_PCIEASPM_POWERSAVE +static int aspm_policy = POLICY_POWERSAVE; +#elif defined CONFIG_PCIEASPM_POWER_SUPERSAVE +static int aspm_policy = POLICY_POWER_SUPERSAVE; +#else +static int aspm_policy; +#endif + +static const char *policy_str[] = { + [POLICY_DEFAULT] = "default", + [POLICY_PERFORMANCE] = "performance", + [POLICY_POWERSAVE] = "powersave", + [POLICY_POWER_SUPERSAVE] = "powersupersave" +}; + +/* + * The L1 PM substate capability is only implemented in function 0 in a + * multi function device. + */ +static struct pci_dev *pci_function_0(struct pci_bus *linkbus) +{ + struct pci_dev *child; + + list_for_each_entry(child, &linkbus->devices, bus_list) + if (PCI_FUNC(child->devfn) == 0) + return child; + return NULL; +} + +static int policy_to_aspm_state(struct pcie_link_state *link) +{ + switch (aspm_policy) { + case POLICY_PERFORMANCE: + /* Disable ASPM and Clock PM */ + return 0; + case POLICY_POWERSAVE: + /* Enable ASPM L0s/L1 */ + return (ASPM_STATE_L0S | ASPM_STATE_L1); + case POLICY_POWER_SUPERSAVE: + /* Enable Everything */ + return ASPM_STATE_ALL; + case POLICY_DEFAULT: + return link->aspm_default; + } + return 0; +} + +static int policy_to_clkpm_state(struct pcie_link_state *link) +{ + switch (aspm_policy) { + case POLICY_PERFORMANCE: + /* Disable ASPM and Clock PM */ + return 0; + case POLICY_POWERSAVE: + case POLICY_POWER_SUPERSAVE: + /* Enable Clock PM */ + return 1; + case POLICY_DEFAULT: + return link->clkpm_default; + } + return 0; +} + +static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) +{ + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + u32 val = enable ? PCI_EXP_LNKCTL_CLKREQ_EN : 0; + + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN, + val); + link->clkpm_enabled = !!enable; +} + +static void pcie_set_clkpm(struct pcie_link_state *link, int enable) +{ + /* + * Don't enable Clock PM if the link is not Clock PM capable + * or Clock PM is disabled + */ + if (!link->clkpm_capable || link->clkpm_disable) + enable = 0; + /* Need nothing if the specified equals to current state */ + if (link->clkpm_enabled == enable) + return; + pcie_set_clkpm_nocheck(link, enable); +} + +static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) +{ + int capable = 1, enabled = 1; + u32 reg32; + u16 reg16; + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + + /* All functions should have the same cap and state, take the worst */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_dword(child, PCI_EXP_LNKCAP, ®32); + if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) { + capable = 0; + enabled = 0; + break; + } + pcie_capability_read_word(child, PCI_EXP_LNKCTL, ®16); + if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN)) + enabled = 0; + } + link->clkpm_enabled = enabled; + link->clkpm_default = enabled; + link->clkpm_capable = capable; + link->clkpm_disable = blacklist ? 1 : 0; +} + +/* + * pcie_aspm_configure_common_clock: check if the 2 ends of a link + * could use common clock. If they are, configure them to use the + * common clock. That will reduce the ASPM state exit latency. + */ +static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) +{ + int same_clock = 1; + u16 reg16, ccc, parent_old_ccc, child_old_ccc[8]; + struct pci_dev *child, *parent = link->pdev; + struct pci_bus *linkbus = parent->subordinate; + /* + * All functions of a slot should have the same Slot Clock + * Configuration, so just check one function + */ + child = list_entry(linkbus->devices.next, struct pci_dev, bus_list); + BUG_ON(!pci_is_pcie(child)); + + /* Check downstream component if bit Slot Clock Configuration is 1 */ + pcie_capability_read_word(child, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_SLC)) + same_clock = 0; + + /* Check upstream component if bit Slot Clock Configuration is 1 */ + pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_SLC)) + same_clock = 0; + + /* Port might be already in common clock mode */ + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); + parent_old_ccc = reg16 & PCI_EXP_LNKCTL_CCC; + if (same_clock && (reg16 & PCI_EXP_LNKCTL_CCC)) { + bool consistent = true; + + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_word(child, PCI_EXP_LNKCTL, + ®16); + if (!(reg16 & PCI_EXP_LNKCTL_CCC)) { + consistent = false; + break; + } + } + if (consistent) + return; + pci_info(parent, "ASPM: current common clock configuration is inconsistent, reconfiguring\n"); + } + + ccc = same_clock ? PCI_EXP_LNKCTL_CCC : 0; + /* Configure downstream component, all functions */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + pcie_capability_read_word(child, PCI_EXP_LNKCTL, ®16); + child_old_ccc[PCI_FUNC(child->devfn)] = reg16 & PCI_EXP_LNKCTL_CCC; + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CCC, ccc); + } + + /* Configure upstream component */ + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CCC, ccc); + + if (pcie_retrain_link(link->pdev, true)) { + + /* Training failed. Restore common clock configurations */ + pci_err(parent, "ASPM: Could not configure common clock\n"); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CCC, + child_old_ccc[PCI_FUNC(child->devfn)]); + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CCC, parent_old_ccc); + } +} + +/* Convert L0s latency encoding to ns */ +static u32 calc_l0s_latency(u32 lnkcap) +{ + u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L0SEL) >> 12; + + if (encoding == 0x7) + return (5 * 1000); /* > 4us */ + return (64 << encoding); +} + +/* Convert L0s acceptable latency encoding to ns */ +static u32 calc_l0s_acceptable(u32 encoding) +{ + if (encoding == 0x7) + return -1U; + return (64 << encoding); +} + +/* Convert L1 latency encoding to ns */ +static u32 calc_l1_latency(u32 lnkcap) +{ + u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L1EL) >> 15; + + if (encoding == 0x7) + return (65 * 1000); /* > 64us */ + return (1000 << encoding); +} + +/* Convert L1 acceptable latency encoding to ns */ +static u32 calc_l1_acceptable(u32 encoding) +{ + if (encoding == 0x7) + return -1U; + return (1000 << encoding); +} + +/* Convert L1SS T_pwr encoding to usec */ +static u32 calc_l12_pwron(struct pci_dev *pdev, u32 scale, u32 val) +{ + switch (scale) { + case 0: + return val * 2; + case 1: + return val * 10; + case 2: + return val * 100; + } + pci_err(pdev, "%s: Invalid T_PwrOn scale: %u\n", __func__, scale); + return 0; +} + +/* + * Encode an LTR_L1.2_THRESHOLD value for the L1 PM Substates Control 1 + * register. Ports enter L1.2 when the most recent LTR value is greater + * than or equal to LTR_L1.2_THRESHOLD, so we round up to make sure we + * don't enter L1.2 too aggressively. + * + * See PCIe r6.0, sec 5.5.1, 6.18, 7.8.3.3. + */ +static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value) +{ + u64 threshold_ns = (u64) threshold_us * 1000; + + /* + * LTR_L1.2_THRESHOLD_Value ("value") is a 10-bit field with max + * value of 0x3ff. + */ + if (threshold_ns <= 0x3ff * 1) { + *scale = 0; /* Value times 1ns */ + *value = threshold_ns; + } else if (threshold_ns <= 0x3ff * 32) { + *scale = 1; /* Value times 32ns */ + *value = roundup(threshold_ns, 32) / 32; + } else if (threshold_ns <= 0x3ff * 1024) { + *scale = 2; /* Value times 1024ns */ + *value = roundup(threshold_ns, 1024) / 1024; + } else if (threshold_ns <= 0x3ff * 32768) { + *scale = 3; /* Value times 32768ns */ + *value = roundup(threshold_ns, 32768) / 32768; + } else if (threshold_ns <= 0x3ff * 1048576) { + *scale = 4; /* Value times 1048576ns */ + *value = roundup(threshold_ns, 1048576) / 1048576; + } else if (threshold_ns <= 0x3ff * (u64) 33554432) { + *scale = 5; /* Value times 33554432ns */ + *value = roundup(threshold_ns, 33554432) / 33554432; + } else { + *scale = 5; + *value = 0x3ff; /* Max representable value */ + } +} + +static void pcie_aspm_check_latency(struct pci_dev *endpoint) +{ + u32 latency, encoding, lnkcap_up, lnkcap_dw; + u32 l1_switch_latency = 0, latency_up_l0s; + u32 latency_up_l1, latency_dw_l0s, latency_dw_l1; + u32 acceptable_l0s, acceptable_l1; + struct pcie_link_state *link; + + /* Device not in D0 doesn't need latency check */ + if ((endpoint->current_state != PCI_D0) && + (endpoint->current_state != PCI_UNKNOWN)) + return; + + link = endpoint->bus->self->link_state; + + /* Calculate endpoint L0s acceptable latency */ + encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L0S) >> 6; + acceptable_l0s = calc_l0s_acceptable(encoding); + + /* Calculate endpoint L1 acceptable latency */ + encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L1) >> 9; + acceptable_l1 = calc_l1_acceptable(encoding); + + while (link) { + struct pci_dev *dev = pci_function_0(link->pdev->subordinate); + + /* Read direction exit latencies */ + pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP, + &lnkcap_up); + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, + &lnkcap_dw); + latency_up_l0s = calc_l0s_latency(lnkcap_up); + latency_up_l1 = calc_l1_latency(lnkcap_up); + latency_dw_l0s = calc_l0s_latency(lnkcap_dw); + latency_dw_l1 = calc_l1_latency(lnkcap_dw); + + /* Check upstream direction L0s latency */ + if ((link->aspm_capable & ASPM_STATE_L0S_UP) && + (latency_up_l0s > acceptable_l0s)) + link->aspm_capable &= ~ASPM_STATE_L0S_UP; + + /* Check downstream direction L0s latency */ + if ((link->aspm_capable & ASPM_STATE_L0S_DW) && + (latency_dw_l0s > acceptable_l0s)) + link->aspm_capable &= ~ASPM_STATE_L0S_DW; + /* + * Check L1 latency. + * Every switch on the path to root complex need 1 + * more microsecond for L1. Spec doesn't mention L0s. + * + * The exit latencies for L1 substates are not advertised + * by a device. Since the spec also doesn't mention a way + * to determine max latencies introduced by enabling L1 + * substates on the components, it is not clear how to do + * a L1 substate exit latency check. We assume that the + * L1 exit latencies advertised by a device include L1 + * substate latencies (and hence do not do any check). + */ + latency = max_t(u32, latency_up_l1, latency_dw_l1); + if ((link->aspm_capable & ASPM_STATE_L1) && + (latency + l1_switch_latency > acceptable_l1)) + link->aspm_capable &= ~ASPM_STATE_L1; + l1_switch_latency += 1000; + + link = link->parent; + } +} + +static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos, + u32 clear, u32 set) +{ + u32 val; + + pci_read_config_dword(pdev, pos, &val); + val &= ~clear; + val |= set; + pci_write_config_dword(pdev, pos, val); +} + +/* Calculate L1.2 PM substate timing parameters */ +static void aspm_calc_l12_info(struct pcie_link_state *link, + u32 parent_l1ss_cap, u32 child_l1ss_cap) +{ + struct pci_dev *child = link->downstream, *parent = link->pdev; + u32 val1, val2, scale1, scale2; + u32 t_common_mode, t_power_on, l1_2_threshold, scale, value; + u32 ctl1 = 0, ctl2 = 0; + u32 pctl1, pctl2, cctl1, cctl2; + u32 pl1_2_enables, cl1_2_enables; + + /* Choose the greater of the two Port Common_Mode_Restore_Times */ + val1 = (parent_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8; + val2 = (child_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8; + t_common_mode = max(val1, val2); + + /* Choose the greater of the two Port T_POWER_ON times */ + val1 = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19; + scale1 = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16; + val2 = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19; + scale2 = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16; + + if (calc_l12_pwron(parent, scale1, val1) > + calc_l12_pwron(child, scale2, val2)) { + ctl2 |= scale1 | (val1 << 3); + t_power_on = calc_l12_pwron(parent, scale1, val1); + } else { + ctl2 |= scale2 | (val2 << 3); + t_power_on = calc_l12_pwron(child, scale2, val2); + } + + /* + * Set LTR_L1.2_THRESHOLD to the time required to transition the + * Link from L0 to L1.2 and back to L0 so we enter L1.2 only if + * downstream devices report (via LTR) that they can tolerate at + * least that much latency. + * + * Based on PCIe r3.1, sec 5.5.3.3.1, Figures 5-16 and 5-17, and + * Table 5-11. T(POWER_OFF) is at most 2us and T(L1.2) is at + * least 4us. + */ + l1_2_threshold = 2 + 4 + t_common_mode + t_power_on; + encode_l12_threshold(l1_2_threshold, &scale, &value); + ctl1 |= t_common_mode << 8 | scale << 29 | value << 16; + + /* Some broken devices only support dword access to L1 SS */ + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1); + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2); + pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1); + pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2); + + if (ctl1 == pctl1 && ctl1 == cctl1 && + ctl2 == pctl2 && ctl2 == cctl2) + return; + + /* Disable L1.2 while updating. See PCIe r5.0, sec 5.5.4, 7.8.3.3 */ + pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK; + cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK; + + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + } + + /* Program T_POWER_ON times in both ports */ + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2); + pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); + + /* Program Common_Mode_Restore_Time in upstream device */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); + + /* Program LTR_L1.2_THRESHOLD time in both ports */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0, + pl1_2_enables); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0, + cl1_2_enables); + } +} + +static void aspm_l1ss_init(struct pcie_link_state *link) +{ + struct pci_dev *child = link->downstream, *parent = link->pdev; + u32 parent_l1ss_cap, child_l1ss_cap; + u32 parent_l1ss_ctl1 = 0, child_l1ss_ctl1 = 0; + + if (!parent->l1ss || !child->l1ss) + return; + + /* Setup L1 substate */ + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP, + &parent_l1ss_cap); + pci_read_config_dword(child, child->l1ss + PCI_L1SS_CAP, + &child_l1ss_cap); + + if (!(parent_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS)) + parent_l1ss_cap = 0; + if (!(child_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS)) + child_l1ss_cap = 0; + + /* + * If we don't have LTR for the entire path from the Root Complex + * to this device, we can't use ASPM L1.2 because it relies on the + * LTR_L1.2_THRESHOLD. See PCIe r4.0, secs 5.5.4, 6.18. + */ + if (!child->ltr_path) + child_l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2; + + if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1) + link->aspm_support |= ASPM_STATE_L1_1; + if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2) + link->aspm_support |= ASPM_STATE_L1_2; + if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1) + link->aspm_support |= ASPM_STATE_L1_1_PCIPM; + if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2) + link->aspm_support |= ASPM_STATE_L1_2_PCIPM; + + if (parent_l1ss_cap) + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + &parent_l1ss_ctl1); + if (child_l1ss_cap) + pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + &child_l1ss_ctl1); + + if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1) + link->aspm_enabled |= ASPM_STATE_L1_1; + if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2) + link->aspm_enabled |= ASPM_STATE_L1_2; + if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1) + link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM; + if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2) + link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM; + + if (link->aspm_support & ASPM_STATE_L1_2_MASK) + aspm_calc_l12_info(link, parent_l1ss_cap, child_l1ss_cap); +} + +static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) +{ + struct pci_dev *child = link->downstream, *parent = link->pdev; + u32 parent_lnkcap, child_lnkcap; + u16 parent_lnkctl, child_lnkctl; + struct pci_bus *linkbus = parent->subordinate; + + if (blacklist) { + /* Set enabled/disable so that we will disable ASPM later */ + link->aspm_enabled = ASPM_STATE_ALL; + link->aspm_disable = ASPM_STATE_ALL; + return; + } + + /* + * If ASPM not supported, don't mess with the clocks and link, + * bail out now. + */ + pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); + pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); + if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS)) + return; + + /* Configure common clock before checking latencies */ + pcie_aspm_configure_common_clock(link); + + /* + * Re-read upstream/downstream components' register state after + * clock configuration. L0s & L1 exit latencies in the otherwise + * read-only Link Capabilities may change depending on common clock + * configuration (PCIe r5.0, sec 7.5.3.6). + */ + pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); + pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); + pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); + + /* + * Setup L0s state + * + * Note that we must not enable L0s in either direction on a + * given link unless components on both sides of the link each + * support L0s. + */ + if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) + link->aspm_support |= ASPM_STATE_L0S; + + if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) + link->aspm_enabled |= ASPM_STATE_L0S_UP; + if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) + link->aspm_enabled |= ASPM_STATE_L0S_DW; + + /* Setup L1 state */ + if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1) + link->aspm_support |= ASPM_STATE_L1; + + if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1) + link->aspm_enabled |= ASPM_STATE_L1; + + aspm_l1ss_init(link); + + /* Save default state */ + link->aspm_default = link->aspm_enabled; + + /* Setup initial capable state. Will be updated later */ + link->aspm_capable = link->aspm_support; + + /* Get and check endpoint acceptable latencies */ + list_for_each_entry(child, &linkbus->devices, bus_list) { + if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT && + pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END) + continue; + + pcie_aspm_check_latency(child); + } +} + +/* Configure the ASPM L1 substates */ +static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) +{ + u32 val, enable_req; + struct pci_dev *child = link->downstream, *parent = link->pdev; + + enable_req = (link->aspm_enabled ^ state) & state; + + /* + * Here are the rules specified in the PCIe spec for enabling L1SS: + * - When enabling L1.x, enable bit at parent first, then at child + * - When disabling L1.x, disable bit at child first, then at parent + * - When enabling ASPM L1.x, need to disable L1 + * (at child followed by parent). + * - The ASPM/PCIPM L1.2 must be disabled while programming timing + * parameters + * + * To keep it simple, disable all L1SS bits first, and later enable + * what is needed. + */ + + /* Disable all L1 substates */ + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + /* + * If needed, disable L1, and it gets enabled later + * in pcie_config_aspm_link(). + */ + if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { + pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1, 0); + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1, 0); + } + + val = 0; + if (state & ASPM_STATE_L1_1) + val |= PCI_L1SS_CTL1_ASPM_L1_1; + if (state & ASPM_STATE_L1_2) + val |= PCI_L1SS_CTL1_ASPM_L1_2; + if (state & ASPM_STATE_L1_1_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_1; + if (state & ASPM_STATE_L1_2_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_2; + + /* Enable what we need to enable */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, val); +} + +static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) +{ + pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, val); +} + +static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) +{ + u32 upstream = 0, dwstream = 0; + struct pci_dev *child = link->downstream, *parent = link->pdev; + struct pci_bus *linkbus = parent->subordinate; + + /* Enable only the states that were not explicitly disabled */ + state &= (link->aspm_capable & ~link->aspm_disable); + + /* Can't enable any substates if L1 is not enabled */ + if (!(state & ASPM_STATE_L1)) + state &= ~ASPM_STATE_L1SS; + + /* Spec says both ports must be in D0 before enabling PCI PM substates*/ + if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) { + state &= ~ASPM_STATE_L1_SS_PCIPM; + state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM); + } + + /* Nothing to do if the link is already in the requested state */ + if (link->aspm_enabled == state) + return; + /* Convert ASPM state to upstream/downstream ASPM register state */ + if (state & ASPM_STATE_L0S_UP) + dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; + if (state & ASPM_STATE_L0S_DW) + upstream |= PCI_EXP_LNKCTL_ASPM_L0S; + if (state & ASPM_STATE_L1) { + upstream |= PCI_EXP_LNKCTL_ASPM_L1; + dwstream |= PCI_EXP_LNKCTL_ASPM_L1; + } + + if (link->aspm_capable & ASPM_STATE_L1SS) + pcie_config_aspm_l1ss(link, state); + + /* + * Spec 2.0 suggests all functions should be configured the + * same setting for ASPM. Enabling ASPM L1 should be done in + * upstream component first and then downstream, and vice + * versa for disabling ASPM L1. Spec doesn't mention L0S. + */ + if (state & ASPM_STATE_L1) + pcie_config_aspm_dev(parent, upstream); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_config_aspm_dev(child, dwstream); + if (!(state & ASPM_STATE_L1)) + pcie_config_aspm_dev(parent, upstream); + + link->aspm_enabled = state; +} + +static void pcie_config_aspm_path(struct pcie_link_state *link) +{ + while (link) { + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + link = link->parent; + } +} + +static void free_link_state(struct pcie_link_state *link) +{ + link->pdev->link_state = NULL; + kfree(link); +} + +static int pcie_aspm_sanity_check(struct pci_dev *pdev) +{ + struct pci_dev *child; + u32 reg32; + + /* + * Some functions in a slot might not all be PCIe functions, + * very strange. Disable ASPM for the whole slot + */ + list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { + if (!pci_is_pcie(child)) + return -EINVAL; + + /* + * If ASPM is disabled then we're not going to change + * the BIOS state. It's safe to continue even if it's a + * pre-1.1 device + */ + + if (aspm_disabled) + continue; + + /* + * Disable ASPM for pre-1.1 PCIe device, we follow MS to use + * RBER bit to determine if a function is 1.1 version device + */ + pcie_capability_read_dword(child, PCI_EXP_DEVCAP, ®32); + if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) { + pci_info(child, "disabling ASPM on pre-1.1 PCIe device. You can enable it with 'pcie_aspm=force'\n"); + return -EINVAL; + } + } + return 0; +} + +static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) +{ + struct pcie_link_state *link; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return NULL; + + INIT_LIST_HEAD(&link->sibling); + link->pdev = pdev; + link->downstream = pci_function_0(pdev->subordinate); + + /* + * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe + * hierarchies. Note that some PCIe host implementations omit + * the root ports entirely, in which case a downstream port on + * a switch may become the root of the link state chain for all + * its subordinate endpoints. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE || + !pdev->bus->parent->self) { + link->root = link; + } else { + struct pcie_link_state *parent; + + parent = pdev->bus->parent->self->link_state; + if (!parent) { + kfree(link); + return NULL; + } + + link->parent = parent; + link->root = link->parent->root; + } + + list_add(&link->sibling, &link_list); + pdev->link_state = link; + return link; +} + +static void pcie_aspm_update_sysfs_visibility(struct pci_dev *pdev) +{ + struct pci_dev *child; + + list_for_each_entry(child, &pdev->subordinate->devices, bus_list) + sysfs_update_group(&child->dev.kobj, &aspm_ctrl_attr_group); +} + +/* + * pcie_aspm_init_link_state: Initiate PCI express link state. + * It is called after the pcie and its children devices are scanned. + * @pdev: the root port or switch downstream port + */ +void pcie_aspm_init_link_state(struct pci_dev *pdev) +{ + struct pcie_link_state *link; + int blacklist = !!pcie_aspm_sanity_check(pdev); + + if (!aspm_support_enabled) + return; + + if (pdev->link_state) + return; + + /* + * We allocate pcie_link_state for the component on the upstream + * end of a Link, so there's nothing to do unless this device is + * downstream port. + */ + if (!pcie_downstream_port(pdev)) + return; + + /* VIA has a strange chipset, root port is under a bridge */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT && + pdev->bus->self) + return; + + down_read(&pci_bus_sem); + if (list_empty(&pdev->subordinate->devices)) + goto out; + + mutex_lock(&aspm_lock); + link = alloc_pcie_link_state(pdev); + if (!link) + goto unlock; + /* + * Setup initial ASPM state. Note that we need to configure + * upstream links also because capable state of them can be + * update through pcie_aspm_cap_init(). + */ + pcie_aspm_cap_init(link, blacklist); + + /* Setup initial Clock PM state */ + pcie_clkpm_cap_init(link, blacklist); + + /* + * At this stage drivers haven't had an opportunity to change the + * link policy setting. Enabling ASPM on broken hardware can cripple + * it even before the driver has had a chance to disable ASPM, so + * default to a safe level right now. If we're enabling ASPM beyond + * the BIOS's expectation, we'll do so once pci_enable_device() is + * called. + */ + if (aspm_policy != POLICY_POWERSAVE && + aspm_policy != POLICY_POWER_SUPERSAVE) { + pcie_config_aspm_path(link); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + } + + pcie_aspm_update_sysfs_visibility(pdev); + +unlock: + mutex_unlock(&aspm_lock); +out: + up_read(&pci_bus_sem); +} + +/* Recheck latencies and update aspm_capable for links under the root */ +static void pcie_update_aspm_capable(struct pcie_link_state *root) +{ + struct pcie_link_state *link; + BUG_ON(root->parent); + list_for_each_entry(link, &link_list, sibling) { + if (link->root != root) + continue; + link->aspm_capable = link->aspm_support; + } + list_for_each_entry(link, &link_list, sibling) { + struct pci_dev *child; + struct pci_bus *linkbus = link->pdev->subordinate; + if (link->root != root) + continue; + list_for_each_entry(child, &linkbus->devices, bus_list) { + if ((pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT) && + (pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)) + continue; + pcie_aspm_check_latency(child); + } + } +} + +/* @pdev: the endpoint device */ +void pcie_aspm_exit_link_state(struct pci_dev *pdev) +{ + struct pci_dev *parent = pdev->bus->self; + struct pcie_link_state *link, *root, *parent_link; + + if (!parent || !parent->link_state) + return; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + + link = parent->link_state; + root = link->root; + parent_link = link->parent; + + /* + * link->downstream is a pointer to the pci_dev of function 0. If + * we remove that function, the pci_dev is about to be deallocated, + * so we can't use link->downstream again. Free the link state to + * avoid this. + * + * If we're removing a non-0 function, it's possible we could + * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends + * programming the same ASPM Control value for all functions of + * multi-function devices, so disable ASPM for all of them. + */ + pcie_config_aspm_link(link, 0); + list_del(&link->sibling); + free_link_state(link); + + /* Recheck latencies and configure upstream links */ + if (parent_link) { + pcie_update_aspm_capable(root); + pcie_config_aspm_path(parent_link); + } + + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +/* @pdev: the root port or switch downstream port */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev) +{ + struct pcie_link_state *link = pdev->link_state; + + if (aspm_disabled || !link) + return; + /* + * Devices changed PM state, we should recheck if latency + * meets all functions' requirement + */ + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + pcie_update_aspm_capable(link->root); + pcie_config_aspm_path(link); + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +void pcie_aspm_powersave_config_link(struct pci_dev *pdev) +{ + struct pcie_link_state *link = pdev->link_state; + + if (aspm_disabled || !link) + return; + + if (aspm_policy != POLICY_POWERSAVE && + aspm_policy != POLICY_POWER_SUPERSAVE) + return; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + pcie_config_aspm_path(link); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); +} + +static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + + if (!pci_is_pcie(pdev)) + return NULL; + + bridge = pci_upstream_bridge(pdev); + if (!bridge || !pci_is_pcie(bridge)) + return NULL; + + return bridge->link_state; +} + +static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) +{ + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + + if (!link) + return -EINVAL; + /* + * A driver requested that ASPM be disabled on this device, but + * if we don't have permission to manage ASPM (e.g., on ACPI + * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and + * the _OSC method), we can't honor that request. Windows has + * a similar mechanism using "PciASPMOptOut", which is also + * ignored in this situation. + */ + if (aspm_disabled) { + pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); + return -EPERM; + } + + if (sem) + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + if (state & PCIE_LINK_STATE_L0S) + link->aspm_disable |= ASPM_STATE_L0S; + if (state & PCIE_LINK_STATE_L1) + /* L1 PM substates require L1 */ + link->aspm_disable |= ASPM_STATE_L1 | ASPM_STATE_L1SS; + if (state & PCIE_LINK_STATE_L1_1) + link->aspm_disable |= ASPM_STATE_L1_1; + if (state & PCIE_LINK_STATE_L1_2) + link->aspm_disable |= ASPM_STATE_L1_2; + if (state & PCIE_LINK_STATE_L1_1_PCIPM) + link->aspm_disable |= ASPM_STATE_L1_1_PCIPM; + if (state & PCIE_LINK_STATE_L1_2_PCIPM) + link->aspm_disable |= ASPM_STATE_L1_2_PCIPM; + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + + if (state & PCIE_LINK_STATE_CLKPM) + link->clkpm_disable = 1; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + mutex_unlock(&aspm_lock); + if (sem) + up_read(&pci_bus_sem); + + return 0; +} + +int pci_disable_link_state_locked(struct pci_dev *pdev, int state) +{ + return __pci_disable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_disable_link_state_locked); + +/** + * pci_disable_link_state - Disable device's link state, so the link will + * never enter specific states. Note that if the BIOS didn't grant ASPM + * control to the OS, this does nothing because we can't touch the LNKCTL + * register. Returns 0 or a negative errno. + * + * @pdev: PCI device + * @state: ASPM link state to disable + */ +int pci_disable_link_state(struct pci_dev *pdev, int state) +{ + return __pci_disable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_disable_link_state); + +static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) +{ + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + + if (!link) + return -EINVAL; + /* + * A driver requested that ASPM be enabled on this device, but + * if we don't have permission to manage ASPM (e.g., on ACPI + * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and + * the _OSC method), we can't honor that request. + */ + if (aspm_disabled) { + pci_warn(pdev, "can't override BIOS ASPM; OS doesn't have ASPM control\n"); + return -EPERM; + } + + if (!locked) + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + link->aspm_default = 0; + if (state & PCIE_LINK_STATE_L0S) + link->aspm_default |= ASPM_STATE_L0S; + if (state & PCIE_LINK_STATE_L1) + link->aspm_default |= ASPM_STATE_L1; + /* L1 PM substates require L1 */ + if (state & PCIE_LINK_STATE_L1_1) + link->aspm_default |= ASPM_STATE_L1_1 | ASPM_STATE_L1; + if (state & PCIE_LINK_STATE_L1_2) + link->aspm_default |= ASPM_STATE_L1_2 | ASPM_STATE_L1; + if (state & PCIE_LINK_STATE_L1_1_PCIPM) + link->aspm_default |= ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1; + if (state & PCIE_LINK_STATE_L1_2_PCIPM) + link->aspm_default |= ASPM_STATE_L1_2_PCIPM | ASPM_STATE_L1; + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + + link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + mutex_unlock(&aspm_lock); + if (!locked) + up_read(&pci_bus_sem); + + return 0; +} + +/** + * pci_enable_link_state - Clear and set the default device link state so that + * the link may be allowed to enter the specified states. Note that if the + * BIOS didn't grant ASPM control to the OS, this does nothing because we can't + * touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + */ +int pci_enable_link_state(struct pci_dev *pdev, int state) +{ + return __pci_enable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_enable_link_state); + +/** + * pci_enable_link_state_locked - Clear and set the default device link state + * so that the link may be allowed to enter the specified states. Note that if + * the BIOS didn't grant ASPM control to the OS, this does nothing because we + * can't touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + * + * Context: Caller holds pci_bus_sem read lock. + */ +int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ + lockdep_assert_held_read(&pci_bus_sem); + + return __pci_enable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_enable_link_state_locked); + +static int pcie_aspm_set_policy(const char *val, + const struct kernel_param *kp) +{ + int i; + struct pcie_link_state *link; + + if (aspm_disabled) + return -EPERM; + i = sysfs_match_string(policy_str, val); + if (i < 0) + return i; + if (i == aspm_policy) + return 0; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + aspm_policy = i; + list_for_each_entry(link, &link_list, sibling) { + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + } + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + return 0; +} + +static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp) +{ + int i, cnt = 0; + for (i = 0; i < ARRAY_SIZE(policy_str); i++) + if (i == aspm_policy) + cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]); + else + cnt += sprintf(buffer + cnt, "%s ", policy_str[i]); + cnt += sprintf(buffer + cnt, "\n"); + return cnt; +} + +module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy, + NULL, 0644); + +/** + * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device. + * @pdev: Target device. + * + * Relies on the upstream bridge's link_state being valid. The link_state + * is deallocated only when the last child of the bridge (i.e., @pdev or a + * sibling) is removed, and the caller should be holding a reference to + * @pdev, so this should be safe. + */ +bool pcie_aspm_enabled(struct pci_dev *pdev) +{ + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + + if (!link) + return false; + + return link->aspm_enabled; +} +EXPORT_SYMBOL_GPL(pcie_aspm_enabled); + +static ssize_t aspm_attr_show_common(struct device *dev, + struct device_attribute *attr, + char *buf, u8 state) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + + return sysfs_emit(buf, "%d\n", (link->aspm_enabled & state) ? 1 : 0); +} + +static ssize_t aspm_attr_store_common(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, u8 state) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + bool state_enable; + + if (kstrtobool(buf, &state_enable) < 0) + return -EINVAL; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + + if (state_enable) { + link->aspm_disable &= ~state; + /* need to enable L1 for substates */ + if (state & ASPM_STATE_L1SS) + link->aspm_disable &= ~ASPM_STATE_L1; + } else { + link->aspm_disable |= state; + if (state & ASPM_STATE_L1) + link->aspm_disable |= ASPM_STATE_L1SS; + } + + pcie_config_aspm_link(link, policy_to_aspm_state(link)); + + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + + return len; +} + +#define ASPM_ATTR(_f, _s) \ +static ssize_t _f##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); } \ + \ +static ssize_t _f##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); } + +ASPM_ATTR(l0s_aspm, L0S) +ASPM_ATTR(l1_aspm, L1) +ASPM_ATTR(l1_1_aspm, L1_1) +ASPM_ATTR(l1_2_aspm, L1_2) +ASPM_ATTR(l1_1_pcipm, L1_1_PCIPM) +ASPM_ATTR(l1_2_pcipm, L1_2_PCIPM) + +static ssize_t clkpm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + + return sysfs_emit(buf, "%d\n", link->clkpm_enabled); +} + +static ssize_t clkpm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + bool state_enable; + + if (kstrtobool(buf, &state_enable) < 0) + return -EINVAL; + + down_read(&pci_bus_sem); + mutex_lock(&aspm_lock); + + link->clkpm_disable = !state_enable; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + + mutex_unlock(&aspm_lock); + up_read(&pci_bus_sem); + + return len; +} + +static DEVICE_ATTR_RW(clkpm); +static DEVICE_ATTR_RW(l0s_aspm); +static DEVICE_ATTR_RW(l1_aspm); +static DEVICE_ATTR_RW(l1_1_aspm); +static DEVICE_ATTR_RW(l1_2_aspm); +static DEVICE_ATTR_RW(l1_1_pcipm); +static DEVICE_ATTR_RW(l1_2_pcipm); + +static struct attribute *aspm_ctrl_attrs[] = { + &dev_attr_clkpm.attr, + &dev_attr_l0s_aspm.attr, + &dev_attr_l1_aspm.attr, + &dev_attr_l1_1_aspm.attr, + &dev_attr_l1_2_aspm.attr, + &dev_attr_l1_1_pcipm.attr, + &dev_attr_l1_2_pcipm.attr, + NULL +}; + +static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + struct pcie_link_state *link = pcie_aspm_get_link(pdev); + static const u8 aspm_state_map[] = { + ASPM_STATE_L0S, + ASPM_STATE_L1, + ASPM_STATE_L1_1, + ASPM_STATE_L1_2, + ASPM_STATE_L1_1_PCIPM, + ASPM_STATE_L1_2_PCIPM, + }; + + if (aspm_disabled || !link) + return 0; + + if (n == 0) + return link->clkpm_capable ? a->mode : 0; + + return link->aspm_capable & aspm_state_map[n - 1] ? a->mode : 0; +} + +const struct attribute_group aspm_ctrl_attr_group = { + .name = "link", + .attrs = aspm_ctrl_attrs, + .is_visible = aspm_ctrl_attrs_are_visible, +}; + +static int __init pcie_aspm_disable(char *str) +{ + if (!strcmp(str, "off")) { + aspm_policy = POLICY_DEFAULT; + aspm_disabled = 1; + aspm_support_enabled = false; + printk(KERN_INFO "PCIe ASPM is disabled\n"); + } else if (!strcmp(str, "force")) { + aspm_force = 1; + printk(KERN_INFO "PCIe ASPM is forcibly enabled\n"); + } + return 1; +} + +__setup("pcie_aspm=", pcie_aspm_disable); + +void pcie_no_aspm(void) +{ + /* + * Disabling ASPM is intended to prevent the kernel from modifying + * existing hardware state, not to clear existing state. To that end: + * (a) set policy to POLICY_DEFAULT in order to avoid changing state + * (b) prevent userspace from changing policy + */ + if (!aspm_force) { + aspm_policy = POLICY_DEFAULT; + aspm_disabled = 1; + } +} + +bool pcie_aspm_support_enabled(void) +{ + return aspm_support_enabled; +} diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c new file mode 100644 index 0000000000..3ceed8e3de --- /dev/null +++ b/drivers/pci/pcie/dpc.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Express Downstream Port Containment services driver + * Author: Keith Busch + * + * Copyright (C) 2016 Intel Corp. + */ + +#define dev_fmt(fmt) "DPC: " fmt + +#include +#include +#include +#include +#include + +#include "portdrv.h" +#include "../pci.h" + +static const char * const rp_pio_error_string[] = { + "Configuration Request received UR Completion", /* Bit Position 0 */ + "Configuration Request received CA Completion", /* Bit Position 1 */ + "Configuration Request Completion Timeout", /* Bit Position 2 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "I/O Request received UR Completion", /* Bit Position 8 */ + "I/O Request received CA Completion", /* Bit Position 9 */ + "I/O Request Completion Timeout", /* Bit Position 10 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "Memory Request received UR Completion", /* Bit Position 16 */ + "Memory Request received CA Completion", /* Bit Position 17 */ + "Memory Request Completion Timeout", /* Bit Position 18 */ +}; + +void pci_save_dpc_state(struct pci_dev *dev) +{ + struct pci_cap_saved_state *save_state; + u16 *cap; + + if (!pci_is_pcie(dev)) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); + if (!save_state) + return; + + cap = (u16 *)&save_state->cap.data[0]; + pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap); +} + +void pci_restore_dpc_state(struct pci_dev *dev) +{ + struct pci_cap_saved_state *save_state; + u16 *cap; + + if (!pci_is_pcie(dev)) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); + if (!save_state) + return; + + cap = (u16 *)&save_state->cap.data[0]; + pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); +} + +static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue); + +#ifdef CONFIG_HOTPLUG_PCI_PCIE +static bool dpc_completed(struct pci_dev *pdev) +{ + u16 status; + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status); + if ((!PCI_POSSIBLE_ERROR(status)) && (status & PCI_EXP_DPC_STATUS_TRIGGER)) + return false; + + if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags)) + return false; + + return true; +} + +/** + * pci_dpc_recovered - whether DPC triggered and has recovered successfully + * @pdev: PCI device + * + * Return true if DPC was triggered for @pdev and has recovered successfully. + * Wait for recovery if it hasn't completed yet. Called from the PCIe hotplug + * driver to recognize and ignore Link Down/Up events caused by DPC. + */ +bool pci_dpc_recovered(struct pci_dev *pdev) +{ + struct pci_host_bridge *host; + + if (!pdev->dpc_cap) + return false; + + /* + * Synchronization between hotplug and DPC is not supported + * if DPC is owned by firmware and EDR is not enabled. + */ + host = pci_find_host_bridge(pdev->bus); + if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR)) + return false; + + /* + * Need a timeout in case DPC never completes due to failure of + * dpc_wait_rp_inactive(). The spec doesn't mandate a time limit, + * but reports indicate that DPC completes within 4 seconds. + */ + wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev), + msecs_to_jiffies(4000)); + + return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); +} +#endif /* CONFIG_HOTPLUG_PCI_PCIE */ + +static int dpc_wait_rp_inactive(struct pci_dev *pdev) +{ + unsigned long timeout = jiffies + HZ; + u16 cap = pdev->dpc_cap, status; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + while (status & PCI_EXP_DPC_RP_BUSY && + !time_after(jiffies, timeout)) { + msleep(10); + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + } + if (status & PCI_EXP_DPC_RP_BUSY) { + pci_warn(pdev, "root port still busy\n"); + return -EBUSY; + } + return 0; +} + +pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) +{ + pci_ers_result_t ret; + u16 cap; + + set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + + /* + * DPC disables the Link automatically in hardware, so it has + * already been reset by the time we get here. + */ + cap = pdev->dpc_cap; + + /* + * Wait until the Link is inactive, then clear DPC Trigger Status + * to allow the Port to leave DPC. + */ + if (!pcie_wait_for_link(pdev, false)) + pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); + + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) { + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + goto out; + } + + pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_TRIGGER); + + if (pci_bridge_wait_for_secondary_bus(pdev, "DPC")) { + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + } else { + set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_RECOVERED; + } +out: + clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); + return ret; +} + +static void dpc_process_rp_pio_error(struct pci_dev *pdev) +{ + u16 cap = pdev->dpc_cap, dpc_status, first_error; + u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; + int i; + + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_MASK, &mask); + pci_err(pdev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n", + status, mask); + + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc); + pci_err(pdev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n", + sev, syserr, exc); + + /* Get First Error Pointer */ + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status); + first_error = (dpc_status & 0x1f00) >> 8; + + for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { + if ((status & ~mask) & (1 << i)) + pci_err(pdev, "[%2d] %s%s\n", i, rp_pio_error_string[i], + first_error == i ? " (First)" : ""); + } + + if (pdev->dpc_rp_log_size < 4) + goto clear_status; + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + &dw0); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4, + &dw1); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8, + &dw2); + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12, + &dw3); + pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", + dw0, dw1, dw2, dw3); + + if (pdev->dpc_rp_log_size < 5) + goto clear_status; + pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); + pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); + + for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { + pci_read_config_dword(pdev, + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); + } + clear_status: + pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); +} + +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, + struct aer_err_info *info) +{ + int pos = dev->aer_cap; + u32 status, mask, sev; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + status &= ~mask; + if (!status) + return 0; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) + info->severity = AER_FATAL; + else + info->severity = AER_NONFATAL; + + return 1; +} + +void dpc_process_error(struct pci_dev *pdev) +{ + u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; + struct aer_err_info info; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); + + pci_info(pdev, "containment event, status:%#06x source:%#06x\n", + status, source); + + reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1; + ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5; + pci_warn(pdev, "%s detected\n", + (reason == 0) ? "unmasked uncorrectable error" : + (reason == 1) ? "ERR_NONFATAL" : + (reason == 2) ? "ERR_FATAL" : + (ext_reason == 0) ? "RP PIO error" : + (ext_reason == 1) ? "software trigger" : + "reserved error"); + + /* show RP PIO error detail information */ + if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0) + dpc_process_rp_pio_error(pdev); + else if (reason == 0 && + dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(pdev, &info)) { + aer_print_error(pdev, &info); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } +} + +static irqreturn_t dpc_handler(int irq, void *context) +{ + struct pci_dev *pdev = context; + + dpc_process_error(pdev); + + /* We configure DPC so it only triggers on ERR_FATAL */ + pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link); + + return IRQ_HANDLED; +} + +static irqreturn_t dpc_irq(int irq, void *context) +{ + struct pci_dev *pdev = context; + u16 cap = pdev->dpc_cap, status; + + pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); + + if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || PCI_POSSIBLE_ERROR(status)) + return IRQ_NONE; + + pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_INTERRUPT); + if (status & PCI_EXP_DPC_STATUS_TRIGGER) + return IRQ_WAKE_THREAD; + return IRQ_HANDLED; +} + +void pci_dpc_init(struct pci_dev *pdev) +{ + u16 cap; + + pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); + if (!pdev->dpc_cap) + return; + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); + if (!(cap & PCI_EXP_DPC_CAP_RP_EXT)) + return; + + pdev->dpc_rp_extensions = true; + + /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ + if (!pdev->dpc_rp_log_size) { + pdev->dpc_rp_log_size = + (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { + pci_err(pdev, "RP PIO log size %u is invalid\n", + pdev->dpc_rp_log_size); + pdev->dpc_rp_log_size = 0; + } + } +} + +#define FLAG(x, y) (((x) & (y)) ? '+' : '-') +static int dpc_probe(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + struct device *device = &dev->device; + int status; + u16 ctl, cap; + + if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native) + return -ENOTSUPP; + + status = devm_request_threaded_irq(device, dev->irq, dpc_irq, + dpc_handler, IRQF_SHARED, + "pcie-dpc", pdev); + if (status) { + pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq, + status); + return status; + } + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); + + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); + pci_info(pdev, "enabled with IRQ %d\n", dev->irq); + + pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", + cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), + FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), + FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size, + FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); + + pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16)); + return status; +} + +static void dpc_remove(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + u16 ctl; + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); +} + +static struct pcie_port_service_driver dpcdriver = { + .name = "dpc", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_DPC, + .probe = dpc_probe, + .remove = dpc_remove, +}; + +int __init pcie_dpc_init(void) +{ + return pcie_port_service_register(&dpcdriver); +} diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c new file mode 100644 index 0000000000..5f4914d313 --- /dev/null +++ b/drivers/pci/pcie/edr.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Error Disconnect Recover support + * Author: Kuppuswamy Sathyanarayanan + * + * Copyright (C) 2020 Intel Corp. + */ + +#define dev_fmt(fmt) "EDR: " fmt + +#include +#include + +#include "portdrv.h" +#include "../pci.h" + +#define EDR_PORT_DPC_ENABLE_DSM 0x0C +#define EDR_PORT_LOCATE_DSM 0x0D +#define EDR_OST_SUCCESS 0x80 +#define EDR_OST_FAILED 0x81 + +/* + * _DSM wrapper function to enable/disable DPC + * @pdev : PCI device structure + * + * returns 0 on success or errno on failure. + */ +static int acpi_enable_dpc(struct pci_dev *pdev) +{ + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + union acpi_object *obj, argv4, req; + int status = 0; + + /* + * Behavior when calling unsupported _DSM functions is undefined, + * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + */ + if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + 1ULL << EDR_PORT_DPC_ENABLE_DSM)) + return 0; + + req.type = ACPI_TYPE_INTEGER; + req.integer.value = 1; + + argv4.type = ACPI_TYPE_PACKAGE; + argv4.package.count = 1; + argv4.package.elements = &req; + + /* + * Per Downstream Port Containment Related Enhancements ECN to PCI + * Firmware Specification r3.2, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is + * optional. Return success if it's not implemented. + */ + obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + EDR_PORT_DPC_ENABLE_DSM, &argv4); + if (!obj) + return 0; + + if (obj->type != ACPI_TYPE_INTEGER) { + pci_err(pdev, FW_BUG "Enable DPC _DSM returned non integer\n"); + status = -EIO; + } + + if (obj->integer.value != 1) { + pci_err(pdev, "Enable DPC _DSM failed to enable DPC\n"); + status = -EIO; + } + + ACPI_FREE(obj); + + return status; +} + +/* + * _DSM wrapper function to locate DPC port + * @pdev : Device which received EDR event + * + * Returns pci_dev or NULL. Caller is responsible for dropping a reference + * on the returned pci_dev with pci_dev_put(). + */ +static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) +{ + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + union acpi_object *obj; + u16 port; + + /* + * Behavior when calling unsupported _DSM functions is undefined, + * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + */ + if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + 1ULL << EDR_PORT_LOCATE_DSM)) + return pci_dev_get(pdev); + + obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + EDR_PORT_LOCATE_DSM, NULL); + if (!obj) + return pci_dev_get(pdev); + + if (obj->type != ACPI_TYPE_INTEGER) { + ACPI_FREE(obj); + pci_err(pdev, FW_BUG "Locate Port _DSM returned non integer\n"); + return NULL; + } + + /* + * Firmware returns DPC port BDF details in following format: + * 15:8 = bus + * 7:3 = device + * 2:0 = function + */ + port = obj->integer.value; + + ACPI_FREE(obj); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + PCI_BUS_NUM(port), port & 0xff); +} + +/* + * _OST wrapper function to let firmware know the status of EDR event + * @pdev : Device used to send _OST + * @edev : Device which experienced EDR event + * @status : Status of EDR event + */ +static int acpi_send_edr_status(struct pci_dev *pdev, struct pci_dev *edev, + u16 status) +{ + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + u32 ost_status; + + pci_dbg(pdev, "Status for %s: %#x\n", pci_name(edev), status); + + ost_status = PCI_DEVID(edev->bus->number, edev->devfn) << 16; + ost_status |= status; + + status = acpi_evaluate_ost(adev->handle, ACPI_NOTIFY_DISCONNECT_RECOVER, + ost_status, NULL); + if (ACPI_FAILURE(status)) + return -EINVAL; + + return 0; +} + +static void edr_handle_event(acpi_handle handle, u32 event, void *data) +{ + struct pci_dev *pdev = data, *edev; + pci_ers_result_t estate = PCI_ERS_RESULT_DISCONNECT; + u16 status; + + if (event != ACPI_NOTIFY_DISCONNECT_RECOVER) + return; + + /* + * pdev is a Root Port or Downstream Port that is still present and + * has triggered a containment event, e.g., DPC, so its child + * devices have been disconnected (ACPI r6.5, sec 5.6.6). + */ + pci_info(pdev, "EDR event received\n"); + + /* + * Locate the port that experienced the containment event. pdev + * may be that port or a parent of it (PCI Firmware r3.3, sec + * 4.6.13). + */ + edev = acpi_dpc_port_get(pdev); + if (!edev) { + pci_err(pdev, "Firmware failed to locate DPC port\n"); + return; + } + + pci_dbg(pdev, "Reported EDR dev: %s\n", pci_name(edev)); + + /* If port does not support DPC, just send the OST */ + if (!edev->dpc_cap) { + pci_err(edev, FW_BUG "This device doesn't support DPC\n"); + goto send_ost; + } + + /* Check if there is a valid DPC trigger */ + pci_read_config_word(edev, edev->dpc_cap + PCI_EXP_DPC_STATUS, &status); + if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) { + pci_err(edev, "Invalid DPC trigger %#010x\n", status); + goto send_ost; + } + + dpc_process_error(edev); + pci_aer_raw_clear_status(edev); + + /* + * Irrespective of whether the DPC event is triggered by ERR_FATAL + * or ERR_NONFATAL, since the link is already down, use the FATAL + * error recovery path for both cases. + */ + estate = pcie_do_recovery(edev, pci_channel_io_frozen, dpc_reset_link); + +send_ost: + + /* + * If recovery is successful, send _OST(0xF, BDF << 16 | 0x80) + * to firmware. If not successful, send _OST(0xF, BDF << 16 | 0x81). + */ + if (estate == PCI_ERS_RESULT_RECOVERED) { + pci_dbg(edev, "DPC port successfully recovered\n"); + pcie_clear_device_status(edev); + acpi_send_edr_status(pdev, edev, EDR_OST_SUCCESS); + } else { + pci_dbg(edev, "DPC port recovery failed\n"); + acpi_send_edr_status(pdev, edev, EDR_OST_FAILED); + } + + pci_dev_put(edev); +} + +void pci_acpi_add_edr_notifier(struct pci_dev *pdev) +{ + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + acpi_status status; + + if (!adev) { + pci_dbg(pdev, "No valid ACPI node, skipping EDR init\n"); + return; + } + + status = acpi_install_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, + edr_handle_event, pdev); + if (ACPI_FAILURE(status)) { + pci_err(pdev, "Failed to install notify handler\n"); + return; + } + + if (acpi_enable_dpc(pdev)) + acpi_remove_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, + edr_handle_event); + else + pci_dbg(pdev, "Notify handler installed\n"); +} + +void pci_acpi_remove_edr_notifier(struct pci_dev *pdev) +{ + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + + if (!adev) + return; + + acpi_remove_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, + edr_handle_event); + pci_dbg(pdev, "Notify handler removed\n"); +} diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c new file mode 100644 index 0000000000..59c90d04a6 --- /dev/null +++ b/drivers/pci/pcie/err.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file implements the error recovery as a core part of PCIe error + * reporting. When a PCIe error is delivered, an error message will be + * collected and printed to console, then, an error recovery procedure + * will be executed by following the PCI error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#define dev_fmt(fmt) "AER: " fmt + +#include +#include +#include +#include +#include +#include "portdrv.h" +#include "../pci.h" + +static pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) + return PCI_ERS_RESULT_NO_AER_DRIVER; + + if (new == PCI_ERS_RESULT_NONE) + return orig; + + switch (orig) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + orig = new; + break; + case PCI_ERS_RESULT_DISCONNECT: + if (new == PCI_ERS_RESULT_NEED_RESET) + orig = PCI_ERS_RESULT_NEED_RESET; + break; + default: + break; + } + + return orig; +} + +static int report_error_detected(struct pci_dev *dev, + pci_channel_state_t state, + enum pci_ers_result *result) +{ + struct pci_driver *pdrv; + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + pdrv = dev->driver; + if (pci_dev_is_disconnected(dev)) { + vote = PCI_ERS_RESULT_DISCONNECT; + } else if (!pci_dev_set_io_state(dev, state)) { + pci_info(dev, "can't recover (state transition %u -> %u invalid)\n", + dev->error_state, state); + vote = PCI_ERS_RESULT_NONE; + } else if (!pdrv || !pdrv->err_handler || + !pdrv->err_handler->error_detected) { + /* + * If any device in the subtree does not have an error_detected + * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent + * error callbacks of "any" device in the subtree, and will + * exit in the disconnected error state. + */ + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { + vote = PCI_ERS_RESULT_NO_AER_DRIVER; + pci_info(dev, "can't recover (no error_detected callback)\n"); + } else { + vote = PCI_ERS_RESULT_NONE; + } + } else { + err_handler = pdrv->err_handler; + vote = err_handler->error_detected(dev, state); + } + pci_uevent_ers(dev, vote); + *result = merge_result(*result, vote); + device_unlock(&dev->dev); + return 0; +} + +static int report_frozen_detected(struct pci_dev *dev, void *data) +{ + return report_error_detected(dev, pci_channel_io_frozen, data); +} + +static int report_normal_detected(struct pci_dev *dev, void *data) +{ + return report_error_detected(dev, pci_channel_io_normal, data); +} + +static int report_mmio_enabled(struct pci_dev *dev, void *data) +{ + struct pci_driver *pdrv; + pci_ers_result_t vote, *result = data; + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + pdrv = dev->driver; + if (!pdrv || + !pdrv->err_handler || + !pdrv->err_handler->mmio_enabled) + goto out; + + err_handler = pdrv->err_handler; + vote = err_handler->mmio_enabled(dev); + *result = merge_result(*result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_slot_reset(struct pci_dev *dev, void *data) +{ + struct pci_driver *pdrv; + pci_ers_result_t vote, *result = data; + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + pdrv = dev->driver; + if (!pdrv || + !pdrv->err_handler || + !pdrv->err_handler->slot_reset) + goto out; + + err_handler = pdrv->err_handler; + vote = err_handler->slot_reset(dev); + *result = merge_result(*result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_resume(struct pci_dev *dev, void *data) +{ + struct pci_driver *pdrv; + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + pdrv = dev->driver; + if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || + !pdrv || + !pdrv->err_handler || + !pdrv->err_handler->resume) + goto out; + + err_handler = pdrv->err_handler; + err_handler->resume(dev); +out: + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); + device_unlock(&dev->dev); + return 0; +} + +/** + * pci_walk_bridge - walk bridges potentially AER affected + * @bridge: bridge which may be a Port, an RCEC, or an RCiEP + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * If the device provided is a bridge, walk the subordinate bus, including + * any bridged devices on buses under this bus. Call the provided callback + * on each device found. + * + * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, + * call the callback on the device itself. + */ +static void pci_walk_bridge(struct pci_dev *bridge, + int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + if (bridge->subordinate) + pci_walk_bus(bridge->subordinate, cb, userdata); + else + cb(bridge, userdata); +} + +pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, + pci_channel_state_t state, + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) +{ + int type = pci_pcie_type(dev); + struct pci_dev *bridge; + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + /* + * If the error was detected by a Root Port, Downstream Port, RCEC, + * or RCiEP, recovery runs on the device itself. For Ports, that + * also includes any subordinate devices. + * + * If it was detected by another device (Endpoint, etc), recovery + * runs on the device and anything else under the same Port, i.e., + * everything under "bridge". + */ + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM || + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_RC_END) + bridge = dev; + else + bridge = pci_upstream_bridge(dev); + + pci_dbg(bridge, "broadcast error_detected message\n"); + if (state == pci_channel_io_frozen) { + pci_walk_bridge(bridge, report_frozen_detected, &status); + if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { + pci_warn(bridge, "subordinate device reset failed\n"); + goto failed; + } + } else { + pci_walk_bridge(bridge, report_normal_detected, &status); + } + + if (status == PCI_ERS_RESULT_CAN_RECOVER) { + status = PCI_ERS_RESULT_RECOVERED; + pci_dbg(bridge, "broadcast mmio_enabled message\n"); + pci_walk_bridge(bridge, report_mmio_enabled, &status); + } + + if (status == PCI_ERS_RESULT_NEED_RESET) { + /* + * TODO: Should call platform-specific + * functions to reset slot before calling + * drivers' slot_reset callbacks? + */ + status = PCI_ERS_RESULT_RECOVERED; + pci_dbg(bridge, "broadcast slot_reset message\n"); + pci_walk_bridge(bridge, report_slot_reset, &status); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + pci_dbg(bridge, "broadcast resume message\n"); + pci_walk_bridge(bridge, report_resume, &status); + + /* + * If we have native control of AER, clear error status in the device + * that detected the error. If the platform retained control of AER, + * it is responsible for clearing this status. In that case, the + * signaling device may not even be visible to the OS. + */ + if (host->native_aer || pcie_ports_native) { + pcie_clear_device_status(dev); + pci_aer_clear_nonfatal_status(dev); + } + pci_info(bridge, "device recovery successful\n"); + return status; + +failed: + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ + pci_info(bridge, "device recovery failed\n"); + + return status; +} diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c new file mode 100644 index 0000000000..ef8ce436ea --- /dev/null +++ b/drivers/pci/pcie/pme.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe Native PME support + * + * Copyright (C) 2007 - 2009 Intel Corp + * Copyright (C) 2007 - 2009 Shaohua Li + * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. + */ + +#define dev_fmt(fmt) "PME: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +/* + * If this switch is set, MSI will not be used for PCIe PME signaling. This + * causes the PCIe port driver to use INTx interrupts only, but it turns out + * that using MSI for PCIe PME signaling doesn't play well with PCIe PME-based + * wake-up from system sleep states. + */ +bool pcie_pme_msi_disabled; + +static int __init pcie_pme_setup(char *str) +{ + if (!strncmp(str, "nomsi", 5)) + pcie_pme_msi_disabled = true; + + return 1; +} +__setup("pcie_pme=", pcie_pme_setup); + +struct pcie_pme_service_data { + spinlock_t lock; + struct pcie_device *srv; + struct work_struct work; + bool noirq; /* If set, keep the PME interrupt disabled. */ +}; + +/** + * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation. + * @dev: PCIe root port or event collector. + * @enable: Enable or disable the interrupt. + */ +void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable) +{ + if (enable) + pcie_capability_set_word(dev, PCI_EXP_RTCTL, + PCI_EXP_RTCTL_PMEIE); + else + pcie_capability_clear_word(dev, PCI_EXP_RTCTL, + PCI_EXP_RTCTL_PMEIE); +} + +/** + * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#. + * @bus: PCI bus to scan. + * + * Scan given PCI bus and all buses under it for devices asserting PME#. + */ +static bool pcie_pme_walk_bus(struct pci_bus *bus) +{ + struct pci_dev *dev; + bool ret = false; + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Skip PCIe devices in case we started from a root port. */ + if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { + if (dev->pme_poll) + dev->pme_poll = false; + + pci_wakeup_event(dev); + pm_request_resume(&dev->dev); + ret = true; + } + + if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate)) + ret = true; + } + + return ret; +} + +/** + * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME. + * @bus: Secondary bus of the bridge. + * @devfn: Device/function number to check. + * + * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band + * PCIe PME message. In such that case the bridge should use the Requester ID + * of device/function number 0 on its secondary bus. + */ +static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn) +{ + struct pci_dev *dev; + bool found = false; + + if (devfn) + return false; + + dev = pci_dev_get(bus->self); + if (!dev) + return false; + + if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) { + down_read(&pci_bus_sem); + if (pcie_pme_walk_bus(bus)) + found = true; + up_read(&pci_bus_sem); + } + + pci_dev_put(dev); + return found; +} + +/** + * pcie_pme_handle_request - Find device that generated PME and handle it. + * @port: Root port or event collector that generated the PME interrupt. + * @req_id: PCIe Requester ID of the device that generated the PME. + */ +static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) +{ + u8 busnr = req_id >> 8, devfn = req_id & 0xff; + struct pci_bus *bus; + struct pci_dev *dev; + bool found = false; + + /* First, check if the PME is from the root port itself. */ + if (port->devfn == devfn && port->bus->number == busnr) { + if (port->pme_poll) + port->pme_poll = false; + + if (pci_check_pme_status(port)) { + pm_request_resume(&port->dev); + found = true; + } else { + /* + * Apparently, the root port generated the PME on behalf + * of a non-PCIe device downstream. If this is done by + * a root port, the Requester ID field in its status + * register may contain either the root port's, or the + * source device's information (PCI Express Base + * Specification, Rev. 2.0, Section 6.1.9). + */ + down_read(&pci_bus_sem); + found = pcie_pme_walk_bus(port->subordinate); + up_read(&pci_bus_sem); + } + goto out; + } + + /* Second, find the bus the source device is on. */ + bus = pci_find_bus(pci_domain_nr(port->bus), busnr); + if (!bus) + goto out; + + /* Next, check if the PME is from a PCIe-PCI bridge. */ + found = pcie_pme_from_pci_bridge(bus, devfn); + if (found) + goto out; + + /* Finally, try to find the PME source on the bus. */ + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_get(dev); + if (dev->devfn == devfn) { + found = true; + break; + } + pci_dev_put(dev); + } + up_read(&pci_bus_sem); + + if (found) { + /* The device is there, but we have to check its PME status. */ + found = pci_check_pme_status(dev); + if (found) { + if (dev->pme_poll) + dev->pme_poll = false; + + pci_wakeup_event(dev); + pm_request_resume(&dev->dev); + } + pci_dev_put(dev); + } else if (devfn) { + /* + * The device is not there, but we can still try to recover by + * assuming that the PME was reported by a PCIe-PCI bridge that + * used devfn different from zero. + */ + pci_info(port, "interrupt generated for non-existent device %02x:%02x.%d\n", + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); + found = pcie_pme_from_pci_bridge(bus, 0); + } + + out: + if (!found) + pci_info(port, "Spurious native interrupt!\n"); +} + +/** + * pcie_pme_work_fn - Work handler for PCIe PME interrupt. + * @work: Work structure giving access to service data. + */ +static void pcie_pme_work_fn(struct work_struct *work) +{ + struct pcie_pme_service_data *data = + container_of(work, struct pcie_pme_service_data, work); + struct pci_dev *port = data->srv->port; + u32 rtsta; + + spin_lock_irq(&data->lock); + + for (;;) { + if (data->noirq) + break; + + pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + if (PCI_POSSIBLE_ERROR(rtsta)) + break; + + if (rtsta & PCI_EXP_RTSTA_PME) { + /* + * Clear PME status of the port. If there are other + * pending PMEs, the status will be set again. + */ + pcie_clear_root_pme_status(port); + + spin_unlock_irq(&data->lock); + pcie_pme_handle_request(port, rtsta & 0xffff); + spin_lock_irq(&data->lock); + + continue; + } + + /* No need to loop if there are no more PMEs pending. */ + if (!(rtsta & PCI_EXP_RTSTA_PENDING)) + break; + + spin_unlock_irq(&data->lock); + cpu_relax(); + spin_lock_irq(&data->lock); + } + + if (!data->noirq) + pcie_pme_interrupt_enable(port, true); + + spin_unlock_irq(&data->lock); +} + +/** + * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt. + * @irq: Interrupt vector. + * @context: Interrupt context pointer. + */ +static irqreturn_t pcie_pme_irq(int irq, void *context) +{ + struct pci_dev *port; + struct pcie_pme_service_data *data; + u32 rtsta; + unsigned long flags; + + port = ((struct pcie_device *)context)->port; + data = get_service_data((struct pcie_device *)context); + + spin_lock_irqsave(&data->lock, flags); + pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta); + + if (PCI_POSSIBLE_ERROR(rtsta) || !(rtsta & PCI_EXP_RTSTA_PME)) { + spin_unlock_irqrestore(&data->lock, flags); + return IRQ_NONE; + } + + pcie_pme_interrupt_enable(port, false); + spin_unlock_irqrestore(&data->lock, flags); + + /* We don't use pm_wq, because it's freezable. */ + schedule_work(&data->work); + + return IRQ_HANDLED; +} + +/** + * pcie_pme_can_wakeup - Set the wakeup capability flag. + * @dev: PCI device to handle. + * @ign: Ignored. + */ +static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) +{ + device_set_wakeup_capable(&dev->dev, true); + return 0; +} + +/** + * pcie_pme_mark_devices - Set the wakeup flag for devices below a port. + * @port: PCIe root port or event collector to handle. + * + * For each device below given root port, including the port itself (or for each + * root complex integrated endpoint if @port is a root complex event collector) + * set the flag indicating that it can signal run-time wake-up events. + */ +static void pcie_pme_mark_devices(struct pci_dev *port) +{ + pcie_pme_can_wakeup(port, NULL); + + if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL); + else if (port->subordinate) + pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); +} + +/** + * pcie_pme_probe - Initialize PCIe PME service for given root port. + * @srv: PCIe service to initialize. + */ +static int pcie_pme_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + struct pcie_pme_service_data *data; + int type = pci_pcie_type(port); + int ret; + + /* Limit to Root Ports or Root Complex Event Collectors */ + if (type != PCI_EXP_TYPE_RC_EC && + type != PCI_EXP_TYPE_ROOT_PORT) + return -ENODEV; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&data->lock); + INIT_WORK(&data->work, pcie_pme_work_fn); + data->srv = srv; + set_service_data(srv, data); + + pcie_pme_interrupt_enable(port, false); + pcie_clear_root_pme_status(port); + + ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv); + if (ret) { + kfree(data); + return ret; + } + + pci_info(port, "Signaling with IRQ %d\n", srv->irq); + + pcie_pme_mark_devices(port); + pcie_pme_interrupt_enable(port, true); + return 0; +} + +static bool pcie_pme_check_wakeup(struct pci_bus *bus) +{ + struct pci_dev *dev; + + if (!bus) + return false; + + list_for_each_entry(dev, &bus->devices, bus_list) + if (device_may_wakeup(&dev->dev) + || pcie_pme_check_wakeup(dev->subordinate)) + return true; + + return false; +} + +static void pcie_pme_disable_interrupt(struct pci_dev *port, + struct pcie_pme_service_data *data) +{ + spin_lock_irq(&data->lock); + pcie_pme_interrupt_enable(port, false); + pcie_clear_root_pme_status(port); + data->noirq = true; + spin_unlock_irq(&data->lock); +} + +/** + * pcie_pme_suspend - Suspend PCIe PME service device. + * @srv: PCIe service device to suspend. + */ +static int pcie_pme_suspend(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + struct pci_dev *port = srv->port; + bool wakeup; + int ret; + + if (device_may_wakeup(&port->dev)) { + wakeup = true; + } else { + down_read(&pci_bus_sem); + wakeup = pcie_pme_check_wakeup(port->subordinate); + up_read(&pci_bus_sem); + } + if (wakeup) { + ret = enable_irq_wake(srv->irq); + if (!ret) + return 0; + } + + pcie_pme_disable_interrupt(port, data); + + synchronize_irq(srv->irq); + + return 0; +} + +/** + * pcie_pme_resume - Resume PCIe PME service device. + * @srv: PCIe service device to resume. + */ +static int pcie_pme_resume(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + + spin_lock_irq(&data->lock); + if (data->noirq) { + struct pci_dev *port = srv->port; + + pcie_clear_root_pme_status(port); + pcie_pme_interrupt_enable(port, true); + data->noirq = false; + } else { + disable_irq_wake(srv->irq); + } + spin_unlock_irq(&data->lock); + + return 0; +} + +/** + * pcie_pme_remove - Prepare PCIe PME service device for removal. + * @srv: PCIe service device to remove. + */ +static void pcie_pme_remove(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + + pcie_pme_disable_interrupt(srv->port, data); + free_irq(srv->irq, srv); + cancel_work_sync(&data->work); + kfree(data); +} + +static struct pcie_port_service_driver pcie_pme_driver = { + .name = "pcie_pme", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_PME, + + .probe = pcie_pme_probe, + .suspend = pcie_pme_suspend, + .resume = pcie_pme_resume, + .remove = pcie_pme_remove, +}; + +/** + * pcie_pme_init - Register the PCIe PME service driver. + */ +int __init pcie_pme_init(void) +{ + return pcie_port_service_register(&pcie_pme_driver); +} diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c new file mode 100644 index 0000000000..46fad0d813 --- /dev/null +++ b/drivers/pci/pcie/portdrv.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Purpose: PCI Express Port Bus Driver + * + * Copyright (C) 2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +/* + * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must + * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI + * supports a maximum of 32 vectors per function. + */ +#define PCIE_PORT_MAX_MSI_ENTRIES 32 + +#define get_descriptor_id(type, service) (((type - 4) << 8) | service) + +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + struct device *dev; + u32 service; +}; + +/** + * release_pcie_device - free PCI Express port service device structure + * @dev: Port service device to release + * + * Invoked automatically when device is being removed in response to + * device_unregister(dev). Release all resources being claimed. + */ +static void release_pcie_device(struct device *dev) +{ + kfree(to_pcie_device(dev)); +} + +/* + * Fill in *pme, *aer, *dpc with the relevant Interrupt Message Numbers if + * services are enabled in "mask". Return the number of MSI/MSI-X vectors + * required to accommodate the largest Message Number. + */ +static int pcie_message_numbers(struct pci_dev *dev, int mask, + u32 *pme, u32 *aer, u32 *dpc) +{ + u32 nvec = 0, pos; + u16 reg16; + + /* + * The Interrupt Message Number indicates which vector is used, i.e., + * the MSI-X table entry or the MSI offset between the base Message + * Data and the generated interrupt message. See PCIe r3.1, sec + * 7.8.2, 7.10.10, 7.31.2. + */ + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | + PCIE_PORT_SERVICE_BWNOTIF)) { + pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); + *pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9; + nvec = *pme + 1; + } + +#ifdef CONFIG_PCIEAER + if (mask & PCIE_PORT_SERVICE_AER) { + u32 reg32; + + pos = dev->aer_cap; + if (pos) { + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, + ®32); + *aer = (reg32 & PCI_ERR_ROOT_AER_IRQ) >> 27; + nvec = max(nvec, *aer + 1); + } + } +#endif + + if (mask & PCIE_PORT_SERVICE_DPC) { + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC); + if (pos) { + pci_read_config_word(dev, pos + PCI_EXP_DPC_CAP, + ®16); + *dpc = reg16 & PCI_EXP_DPC_IRQ; + nvec = max(nvec, *dpc + 1); + } + } + + return nvec; +} + +/** + * pcie_port_enable_irq_vec - try to set up MSI-X or MSI as interrupt mode + * for given port + * @dev: PCI Express port to handle + * @irqs: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) +{ + int nr_entries, nvec, pcie_irq; + u32 pme = 0, aer = 0, dpc = 0; + + /* Allocate the maximum possible number of MSI/MSI-X vectors */ + nr_entries = pci_alloc_irq_vectors(dev, 1, PCIE_PORT_MAX_MSI_ENTRIES, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nr_entries < 0) + return nr_entries; + + /* See how many and which Interrupt Message Numbers we actually use */ + nvec = pcie_message_numbers(dev, mask, &pme, &aer, &dpc); + if (nvec > nr_entries) { + pci_free_irq_vectors(dev); + return -EIO; + } + + /* + * If we allocated more than we need, free them and reallocate fewer. + * + * Reallocating may change the specific vectors we get, so + * pci_irq_vector() must be done *after* the reallocation. + * + * If we're using MSI, hardware is *allowed* to change the Interrupt + * Message Numbers when we free and reallocate the vectors, but we + * assume it won't because we allocate enough vectors for the + * biggest Message Number we found. + */ + if (nvec != nr_entries) { + pci_free_irq_vectors(dev); + + nr_entries = pci_alloc_irq_vectors(dev, nvec, nvec, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nr_entries < 0) + return nr_entries; + } + + /* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */ + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | + PCIE_PORT_SERVICE_BWNOTIF)) { + pcie_irq = pci_irq_vector(dev, pme); + irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq; + } + + if (mask & PCIE_PORT_SERVICE_AER) + irqs[PCIE_PORT_SERVICE_AER_SHIFT] = pci_irq_vector(dev, aer); + + if (mask & PCIE_PORT_SERVICE_DPC) + irqs[PCIE_PORT_SERVICE_DPC_SHIFT] = pci_irq_vector(dev, dpc); + + return 0; +} + +/** + * pcie_init_service_irqs - initialize irqs for PCI Express port services + * @dev: PCI Express port to handle + * @irqs: Array of irqs to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: Interrupt mode associated with the port + */ +static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask) +{ + int ret, i; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + irqs[i] = -1; + + /* + * If we support PME but can't use MSI/MSI-X for it, we have to + * fall back to INTx or other interrupts, e.g., a system shared + * interrupt. + */ + if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) + goto legacy_irq; + + /* Try to use MSI-X or MSI if supported */ + if (pcie_port_enable_irq_vec(dev, irqs, mask) == 0) + return 0; + +legacy_irq: + /* fall back to legacy IRQ */ + ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); + if (ret < 0) + return -ENODEV; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + irqs[i] = pci_irq_vector(dev, 0); + + return 0; +} + +/** + * get_port_device_capability - discover capabilities of a PCI Express port + * @dev: PCI Express port to examine + * + * The capabilities are read from the port's PCI Express configuration registers + * as described in PCI Express Base Specification 1.0a sections 7.8.2, 7.8.9 and + * 7.9 - 7.11. + * + * Return value: Bitmask of discovered port capabilities + */ +static int get_port_device_capability(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + int services = 0; + + if (dev->is_hotplug_bridge && + (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) && + (pcie_ports_native || host->native_pcie_hotplug)) { + services |= PCIE_PORT_SERVICE_HP; + + /* + * Disable hot-plug interrupts in case they have been enabled + * by the BIOS and the hot-plug service driver is not loaded. + */ + pcie_capability_clear_word(dev, PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE); + } + +#ifdef CONFIG_PCIEAER + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + dev->aer_cap && pci_aer_available() && + (pcie_ports_native || host->native_aer)) + services |= PCIE_PORT_SERVICE_AER; +#endif + + /* Root Ports and Root Complex Event Collectors may generate PMEs */ + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + (pcie_ports_native || host->native_pme)) { + services |= PCIE_PORT_SERVICE_PME; + + /* + * Disable PME interrupt on this port in case it's been enabled + * by the BIOS (the PME service driver will enable it when + * necessary). + */ + pcie_pme_interrupt_enable(dev, false); + } + + /* + * With dpc-native, allow Linux to use DPC even if it doesn't have + * permission to use AER. + */ + if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) && + pci_aer_available() && + (pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER))) + services |= PCIE_PORT_SERVICE_DPC; + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || + pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) { + u32 linkcap; + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); + if (linkcap & PCI_EXP_LNKCAP_LBNC) + services |= PCIE_PORT_SERVICE_BWNOTIF; + } + + return services; +} + +/** + * pcie_device_init - allocate and initialize PCI Express port service device + * @pdev: PCI Express port to associate the service device with + * @service: Type of service to associate with the service device + * @irq: Interrupt vector to associate with the service device + */ +static int pcie_device_init(struct pci_dev *pdev, int service, int irq) +{ + int retval; + struct pcie_device *pcie; + struct device *device; + + pcie = kzalloc(sizeof(*pcie), GFP_KERNEL); + if (!pcie) + return -ENOMEM; + pcie->port = pdev; + pcie->irq = irq; + pcie->service = service; + + /* Initialize generic device interface */ + device = &pcie->device; + device->bus = &pcie_port_bus_type; + device->release = release_pcie_device; /* callback to free pcie dev */ + dev_set_name(device, "%s:pcie%03x", + pci_name(pdev), + get_descriptor_id(pci_pcie_type(pdev), service)); + device->parent = &pdev->dev; + device_enable_async_suspend(device); + + retval = device_register(device); + if (retval) { + put_device(device); + return retval; + } + + pm_runtime_no_callbacks(device); + + return 0; +} + +/** + * pcie_port_device_register - register PCI Express port + * @dev: PCI Express port to register + * + * Allocate the port extension structure and register services associated with + * the port. + */ +static int pcie_port_device_register(struct pci_dev *dev) +{ + int status, capabilities, i, nr_service; + int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; + + /* Enable PCI Express port device */ + status = pci_enable_device(dev); + if (status) + return status; + + /* Get and check PCI Express port services */ + capabilities = get_port_device_capability(dev); + if (!capabilities) + return 0; + + pci_set_master(dev); + /* + * Initialize service irqs. Don't use service devices that + * require interrupts if there is no way to generate them. + * However, some drivers may have a polling mode (e.g. pciehp_poll_mode) + * that can be used in the absence of irqs. Allow them to determine + * if that is to be used. + */ + status = pcie_init_service_irqs(dev, irqs, capabilities); + if (status) { + capabilities &= PCIE_PORT_SERVICE_HP; + if (!capabilities) + goto error_disable; + } + + /* Allocate child services if any */ + status = -ENODEV; + nr_service = 0; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { + int service = 1 << i; + if (!(capabilities & service)) + continue; + if (!pcie_device_init(dev, service, irqs[i])) + nr_service++; + } + if (!nr_service) + goto error_cleanup_irqs; + + return 0; + +error_cleanup_irqs: + pci_free_irq_vectors(dev); +error_disable: + pci_disable_device(dev); + return status; +} + +typedef int (*pcie_callback_t)(struct pcie_device *); + +static int pcie_port_device_iter(struct device *dev, void *data) +{ + struct pcie_port_service_driver *service_driver; + size_t offset = *(size_t *)data; + pcie_callback_t cb; + + if ((dev->bus == &pcie_port_bus_type) && dev->driver) { + service_driver = to_service_driver(dev->driver); + cb = *(pcie_callback_t *)((void *)service_driver + offset); + if (cb) + return cb(to_pcie_device(dev)); + } + return 0; +} + +#ifdef CONFIG_PM +/** + * pcie_port_device_suspend - suspend port services associated with a PCIe port + * @dev: PCI Express port to handle + */ +static int pcie_port_device_suspend(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, suspend); + return device_for_each_child(dev, &off, pcie_port_device_iter); +} + +static int pcie_port_device_resume_noirq(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); + return device_for_each_child(dev, &off, pcie_port_device_iter); +} + +/** + * pcie_port_device_resume - resume port services associated with a PCIe port + * @dev: PCI Express port to handle + */ +static int pcie_port_device_resume(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, resume); + return device_for_each_child(dev, &off, pcie_port_device_iter); +} + +/** + * pcie_port_device_runtime_suspend - runtime suspend port services + * @dev: PCI Express port to handle + */ +static int pcie_port_device_runtime_suspend(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); + return device_for_each_child(dev, &off, pcie_port_device_iter); +} + +/** + * pcie_port_device_runtime_resume - runtime resume port services + * @dev: PCI Express port to handle + */ +static int pcie_port_device_runtime_resume(struct device *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); + return device_for_each_child(dev, &off, pcie_port_device_iter); +} +#endif /* PM */ + +static int remove_iter(struct device *dev, void *data) +{ + if (dev->bus == &pcie_port_bus_type) + device_unregister(dev); + return 0; +} + +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == &pcie_port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + pdrvs->dev = device; + return 1; + } + } + + return 0; +} + +/** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service is associated with + * @service: For the service to find + * + * Find the struct device associated with given service on a pci_dev + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + device = pdrvs.dev; + return device; +} +EXPORT_SYMBOL_GPL(pcie_port_find_device); + +/** + * pcie_port_device_remove - unregister PCI Express port service devices + * @dev: PCI Express port the service devices to unregister are associated with + * + * Remove PCI Express port service devices associated with given port and + * disable MSI-X or MSI for the port. + */ +static void pcie_port_device_remove(struct pci_dev *dev) +{ + device_for_each_child(&dev->dev, NULL, remove_iter); + pci_free_irq_vectors(dev); +} + +/** + * pcie_port_probe_service - probe driver for given PCI Express port service + * @dev: PCI Express port service device to probe against + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * whenever match is found between the driver and a port service device. + */ +static int pcie_port_probe_service(struct device *dev) +{ + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + int status; + + if (!dev || !dev->driver) + return -ENODEV; + + driver = to_service_driver(dev->driver); + if (!driver || !driver->probe) + return -ENODEV; + + pciedev = to_pcie_device(dev); + status = driver->probe(pciedev); + if (status) + return status; + + get_device(dev); + return 0; +} + +/** + * pcie_port_remove_service - detach driver from given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_unregister() is called for the port service device associated + * with the driver. + */ +static int pcie_port_remove_service(struct device *dev) +{ + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + + if (!dev || !dev->driver) + return 0; + + pciedev = to_pcie_device(dev); + driver = to_service_driver(dev->driver); + if (driver && driver->remove) { + driver->remove(pciedev); + put_device(dev); + } + return 0; +} + +/** + * pcie_port_shutdown_service - shut down given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_shutdown() is called for the port service device associated + * with the driver. + */ +static void pcie_port_shutdown_service(struct device *dev) {} + +/** + * pcie_port_service_register - register PCI Express port service driver + * @new: PCI Express port service driver to register + */ +int pcie_port_service_register(struct pcie_port_service_driver *new) +{ + if (pcie_ports_disabled) + return -ENODEV; + + new->driver.name = new->name; + new->driver.bus = &pcie_port_bus_type; + new->driver.probe = pcie_port_probe_service; + new->driver.remove = pcie_port_remove_service; + new->driver.shutdown = pcie_port_shutdown_service; + + return driver_register(&new->driver); +} + +/** + * pcie_port_service_unregister - unregister PCI Express port service driver + * @drv: PCI Express port service driver to unregister + */ +void pcie_port_service_unregister(struct pcie_port_service_driver *drv) +{ + driver_unregister(&drv->driver); +} + +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If the user specified "pcie_ports=native", use the PCIe services regardless + * of whether the platform has given us permission. On ACPI systems, this + * means we ignore _OSC. + */ +bool pcie_ports_native; + +/* + * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe + * service even if the platform hasn't given us permission. + */ +bool pcie_ports_dpc_native; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) + pcie_ports_disabled = true; + else if (!strncmp(str, "native", 6)) + pcie_ports_native = true; + else if (!strncmp(str, "dpc-native", 10)) + pcie_ports_dpc_native = true; + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + +/* global data */ + +#ifdef CONFIG_PM +static int pcie_port_runtime_suspend(struct device *dev) +{ + if (!to_pci_dev(dev)->bridge_d3) + return -EBUSY; + + return pcie_port_device_runtime_suspend(dev); +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static const struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume_noirq = pcie_port_device_resume_noirq, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore_noirq = pcie_port_device_resume_noirq, + .restore = pcie_port_device_resume, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_device_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, +}; + +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) + +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ + +/* + * pcie_portdrv_probe - Probe PCI-Express port devices + * @dev: PCI-Express port device being probed + * + * If detected invokes the pcie_port_device_register() method for + * this port device. + * + */ +static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int type = pci_pcie_type(dev); + int status; + + if (!pci_is_pcie(dev) || + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + + status = pcie_port_device_register(dev); + if (status) + return status; + + pci_save_state(dev); + + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | + DPM_FLAG_SMART_SUSPEND); + + if (pci_bridge_d3_possible(dev)) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + + return 0; +} + +static void pcie_portdrv_remove(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); + + pci_disable_device(dev); +} + +static void pcie_portdrv_shutdown(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); +} + +static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + if (error == pci_channel_io_frozen) + return PCI_ERS_RESULT_NEED_RESET; + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + + pci_restore_state(dev); + pci_save_state(dev); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +/* + * LINUX Device Driver Model + */ +static const struct pci_device_id port_pci_ids[] = { + /* handle any PCI-Express port */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, + /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, + { }, +}; + +static const struct pci_error_handlers pcie_portdrv_err_handler = { + .error_detected = pcie_portdrv_error_detected, + .slot_reset = pcie_portdrv_slot_reset, + .mmio_enabled = pcie_portdrv_mmio_enabled, +}; + +static struct pci_driver pcie_portdriver = { + .name = "pcieport", + .id_table = &port_pci_ids[0], + + .probe = pcie_portdrv_probe, + .remove = pcie_portdrv_remove, + .shutdown = pcie_portdrv_shutdown, + + .err_handler = &pcie_portdrv_err_handler, + + .driver_managed_dma = true, + + .driver.pm = PCIE_PORTDRV_PM_OPS, +}; + +static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) +{ + pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", + d->ident); + pcie_pme_disable_msi(); + return 0; +} + +static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { + /* + * Boxes that should not use MSI for PCIe PME signaling. + */ + { + .callback = dmi_pcie_pme_disable_msi, + .ident = "MSI Wind U-100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), + }, + }, + {} +}; + +static void __init pcie_init_services(void) +{ + pcie_aer_init(); + pcie_pme_init(); + pcie_dpc_init(); + pcie_hp_init(); +} + +static int __init pcie_portdrv_init(void) +{ + if (pcie_ports_disabled) + return -EACCES; + + pcie_init_services(); + dmi_check_system(pcie_portdrv_dmi_table); + + return pci_register_driver(&pcie_portdriver); +} +device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h new file mode 100644 index 0000000000..1f3803bde7 --- /dev/null +++ b/drivers/pci/pcie/portdrv.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Purpose: PCI Express Port Bus Driver's Internal Data Structures + * + * Copyright (C) 2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#ifndef _PORTDRV_H_ +#define _PORTDRV_H_ + +#include + +/* Service Type */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ +#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) +#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */ +#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT) + +#define PCIE_PORT_DEVICE_MAXSERVICES 5 + +extern bool pcie_ports_dpc_native; + +#ifdef CONFIG_PCIEAER +int pcie_aer_init(void); +#else +static inline int pcie_aer_init(void) { return 0; } +#endif + +#ifdef CONFIG_HOTPLUG_PCI_PCIE +int pcie_hp_init(void); +#else +static inline int pcie_hp_init(void) { return 0; } +#endif + +#ifdef CONFIG_PCIE_PME +int pcie_pme_init(void); +#else +static inline int pcie_pme_init(void) { return 0; } +#endif + +#ifdef CONFIG_PCIE_DPC +int pcie_dpc_init(void); +#else +static inline int pcie_dpc_init(void) { return 0; } +#endif + +/* Port Type */ +#define PCIE_ANY_PORT (~0) + +struct pcie_device { + int irq; /* Service IRQ/MSI/MSI-X Vector */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ + void *priv_data; /* Service Private Data */ + struct device device; /* Generic Device Interface */ +}; +#define to_pcie_device(d) container_of(d, struct pcie_device, device) + +static inline void set_service_data(struct pcie_device *dev, void *data) +{ + dev->priv_data = data; +} + +static inline void *get_service_data(struct pcie_device *dev) +{ + return dev->priv_data; +} + +struct pcie_port_service_driver { + const char *name; + int (*probe)(struct pcie_device *dev); + void (*remove)(struct pcie_device *dev); + int (*suspend)(struct pcie_device *dev); + int (*resume_noirq)(struct pcie_device *dev); + int (*resume)(struct pcie_device *dev); + int (*runtime_suspend)(struct pcie_device *dev); + int (*runtime_resume)(struct pcie_device *dev); + + int (*slot_reset)(struct pcie_device *dev); + + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + + struct device_driver driver; +}; +#define to_service_driver(d) \ + container_of(d, struct pcie_port_service_driver, driver) + +int pcie_port_service_register(struct pcie_port_service_driver *new); +void pcie_port_service_unregister(struct pcie_port_service_driver *new); + +extern struct bus_type pcie_port_bus_type; + +struct pci_dev; + +#ifdef CONFIG_PCIE_PME +extern bool pcie_pme_msi_disabled; + +static inline void pcie_pme_disable_msi(void) +{ + pcie_pme_msi_disabled = true; +} + +static inline bool pcie_pme_no_msi(void) +{ + return pcie_pme_msi_disabled; +} + +void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable); +#else /* !CONFIG_PCIE_PME */ +static inline void pcie_pme_disable_msi(void) {} +static inline bool pcie_pme_no_msi(void) { return false; } +static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} +#endif /* !CONFIG_PCIE_PME */ + +struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); +#endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c new file mode 100644 index 0000000000..b4e5f55346 --- /dev/null +++ b/drivers/pci/pcie/ptm.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Express Precision Time Measurement + * Copyright (c) 2016, Intel Corporation. + */ + +#include +#include +#include +#include "../pci.h" + +/* + * If the next upstream device supports PTM, return it; otherwise return + * NULL. PTM Messages are local, so both link partners must support it. + */ +static struct pci_dev *pci_upstream_ptm(struct pci_dev *dev) +{ + struct pci_dev *ups = pci_upstream_bridge(dev); + + /* + * Switch Downstream Ports are not permitted to have a PTM + * capability; their PTM behavior is controlled by the Upstream + * Port (PCIe r5.0, sec 7.9.16), so if the upstream bridge is a + * Switch Downstream Port, look up one more level. + */ + if (ups && pci_pcie_type(ups) == PCI_EXP_TYPE_DOWNSTREAM) + ups = pci_upstream_bridge(ups); + + if (ups && ups->ptm_cap) + return ups; + + return NULL; +} + +/* + * Find the PTM Capability (if present) and extract the information we need + * to use it. + */ +void pci_ptm_init(struct pci_dev *dev) +{ + u16 ptm; + u32 cap; + struct pci_dev *ups; + + if (!pci_is_pcie(dev)) + return; + + ptm = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM); + if (!ptm) + return; + + dev->ptm_cap = ptm; + pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_PTM, sizeof(u32)); + + pci_read_config_dword(dev, ptm + PCI_PTM_CAP, &cap); + dev->ptm_granularity = (cap & PCI_PTM_GRANULARITY_MASK) >> 8; + + /* + * Per the spec recommendation (PCIe r6.0, sec 7.9.15.3), select the + * furthest upstream Time Source as the PTM Root. For Endpoints, + * "the Effective Granularity is the maximum Local Clock Granularity + * reported by the PTM Root and all intervening PTM Time Sources." + */ + ups = pci_upstream_ptm(dev); + if (ups) { + if (ups->ptm_granularity == 0) + dev->ptm_granularity = 0; + else if (ups->ptm_granularity > dev->ptm_granularity) + dev->ptm_granularity = ups->ptm_granularity; + } else if (cap & PCI_PTM_CAP_ROOT) { + dev->ptm_root = 1; + } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) { + + /* + * Per sec 7.9.15.3, this should be the Local Clock + * Granularity of the associated Time Source. But it + * doesn't say how to find that Time Source. + */ + dev->ptm_granularity = 0; + } + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM) + pci_enable_ptm(dev, NULL); +} + +void pci_save_ptm_state(struct pci_dev *dev) +{ + u16 ptm = dev->ptm_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!ptm) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM); + if (!save_state) + return; + + cap = (u32 *)&save_state->cap.data[0]; + pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, cap); +} + +void pci_restore_ptm_state(struct pci_dev *dev) +{ + u16 ptm = dev->ptm_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!ptm) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM); + if (!save_state) + return; + + cap = (u32 *)&save_state->cap.data[0]; + pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, *cap); +} + +/* Enable PTM in the Control register if possible */ +static int __pci_enable_ptm(struct pci_dev *dev) +{ + u16 ptm = dev->ptm_cap; + struct pci_dev *ups; + u32 ctrl; + + if (!ptm) + return -EINVAL; + + /* + * A device uses local PTM Messages to request time information + * from a PTM Root that's farther upstream. Every device along the + * path must support PTM and have it enabled so it can handle the + * messages. Therefore, if this device is not a PTM Root, the + * upstream link partner must have PTM enabled before we can enable + * PTM. + */ + if (!dev->ptm_root) { + ups = pci_upstream_ptm(dev); + if (!ups || !ups->ptm_enabled) + return -EINVAL; + } + + pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, &ctrl); + + ctrl |= PCI_PTM_CTRL_ENABLE; + ctrl &= ~PCI_PTM_GRANULARITY_MASK; + ctrl |= dev->ptm_granularity << 8; + if (dev->ptm_root) + ctrl |= PCI_PTM_CTRL_ROOT; + + pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, ctrl); + return 0; +} + +/** + * pci_enable_ptm() - Enable Precision Time Measurement + * @dev: PCI device + * @granularity: pointer to return granularity + * + * Enable Precision Time Measurement for @dev. If successful and + * @granularity is non-NULL, return the Effective Granularity. + * + * Return: zero if successful, or -EINVAL if @dev lacks a PTM Capability or + * is not a PTM Root and lacks an upstream path of PTM-enabled devices. + */ +int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) +{ + int rc; + char clock_desc[8]; + + rc = __pci_enable_ptm(dev); + if (rc) + return rc; + + dev->ptm_enabled = 1; + + if (granularity) + *granularity = dev->ptm_granularity; + + switch (dev->ptm_granularity) { + case 0: + snprintf(clock_desc, sizeof(clock_desc), "unknown"); + break; + case 255: + snprintf(clock_desc, sizeof(clock_desc), ">254ns"); + break; + default: + snprintf(clock_desc, sizeof(clock_desc), "%uns", + dev->ptm_granularity); + break; + } + pci_info(dev, "PTM enabled%s, %s granularity\n", + dev->ptm_root ? " (root)" : "", clock_desc); + + return 0; +} +EXPORT_SYMBOL(pci_enable_ptm); + +static void __pci_disable_ptm(struct pci_dev *dev) +{ + u16 ptm = dev->ptm_cap; + u32 ctrl; + + if (!ptm) + return; + + pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, &ctrl); + ctrl &= ~(PCI_PTM_CTRL_ENABLE | PCI_PTM_CTRL_ROOT); + pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, ctrl); +} + +/** + * pci_disable_ptm() - Disable Precision Time Measurement + * @dev: PCI device + * + * Disable Precision Time Measurement for @dev. + */ +void pci_disable_ptm(struct pci_dev *dev) +{ + if (dev->ptm_enabled) { + __pci_disable_ptm(dev); + dev->ptm_enabled = 0; + } +} +EXPORT_SYMBOL(pci_disable_ptm); + +/* + * Disable PTM, but preserve dev->ptm_enabled so we silently re-enable it on + * resume if necessary. + */ +void pci_suspend_ptm(struct pci_dev *dev) +{ + if (dev->ptm_enabled) + __pci_disable_ptm(dev); +} + +/* If PTM was enabled before suspend, re-enable it when resuming */ +void pci_resume_ptm(struct pci_dev *dev) +{ + if (dev->ptm_enabled) + __pci_enable_ptm(dev); +} + +bool pcie_ptm_enabled(struct pci_dev *dev) +{ + if (!dev) + return false; + + return dev->ptm_enabled; +} +EXPORT_SYMBOL(pcie_ptm_enabled); diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c new file mode 100644 index 0000000000..d0bcd141ac --- /dev/null +++ b/drivers/pci/pcie/rcec.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Root Complex Event Collector Support + * + * Authors: + * Sean V Kelley + * Qiuxu Zhuo + * + * Copyright (C) 2020 Intel Corp. + */ + +#include +#include +#include + +#include "../pci.h" + +struct walk_rcec_data { + struct pci_dev *rcec; + int (*user_callback)(struct pci_dev *dev, void *data); + void *user_data; +}; + +static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep) +{ + unsigned long bitmap = rcec->rcec_ea->bitmap; + unsigned int devn; + + /* An RCiEP found on a different bus in range */ + if (rcec->bus->number != rciep->bus->number) + return true; + + /* Same bus, so check bitmap */ + for_each_set_bit(devn, &bitmap, 32) + if (devn == PCI_SLOT(rciep->devfn)) + return true; + + return false; +} + +static int link_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) { + dev->rcec = rcec; + pci_dbg(dev, "PME & error events signaled via %s\n", + pci_name(rcec)); + } + + return 0; +} + +static int walk_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) + rcec_data->user_callback(dev, rcec_data->user_data); + + return 0; +} + +static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), + void *userdata) +{ + struct walk_rcec_data *rcec_data = userdata; + struct pci_dev *rcec = rcec_data->rcec; + u8 nextbusn, lastbusn; + struct pci_bus *bus; + unsigned int bnr; + + if (!rcec->rcec_ea) + return; + + /* Walk own bus for bitmap based association */ + pci_walk_bus(rcec->bus, cb, rcec_data); + + nextbusn = rcec->rcec_ea->nextbusn; + lastbusn = rcec->rcec_ea->lastbusn; + + /* All RCiEP devices are on the same bus as the RCEC */ + if (nextbusn == 0xff && lastbusn == 0x00) + return; + + for (bnr = nextbusn; bnr <= lastbusn; bnr++) { + /* No association indicated (PCIe 5.0-1, 7.9.10.3) */ + if (bnr == rcec->bus->number) + continue; + + bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr); + if (!bus) + continue; + + /* Find RCiEP devices on the given bus ranges */ + pci_walk_bus(bus, cb, rcec_data); + } +} + +/** + * pcie_link_rcec - Link RCiEP devices associated with RCEC. + * @rcec: RCEC whose RCiEP devices should be linked. + * + * Link the given RCEC to each RCiEP device found. + */ +void pcie_link_rcec(struct pci_dev *rcec) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = NULL; + rcec_data.user_data = NULL; + + walk_rcec(link_rcec_helper, &rcec_data); +} + +/** + * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback. + * @rcec: RCEC whose RCiEP devices should be walked + * @cb: Callback to be called for each RCiEP device found + * @userdata: Arbitrary pointer to be passed to callback + * + * Walk the given RCEC. Call the callback on each RCiEP found. + * + * If @cb returns anything other than 0, break out. + */ +void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = cb; + rcec_data.user_data = userdata; + + walk_rcec(walk_rcec_helper, &rcec_data); +} + +void pci_rcec_init(struct pci_dev *dev) +{ + struct rcec_ea *rcec_ea; + u32 rcec, hdr, busn; + u8 ver; + + /* Only for Root Complex Event Collectors */ + if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) + return; + + rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC); + if (!rcec) + return; + + rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL); + if (!rcec_ea) + return; + + pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, + &rcec_ea->bitmap); + + /* Check whether RCEC BUSN register is present */ + pci_read_config_dword(dev, rcec, &hdr); + ver = PCI_EXT_CAP_VER(hdr); + if (ver >= PCI_RCEC_BUSN_REG_VER) { + pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn); + rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn); + rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn); + } else { + /* Avoid later ver check by setting nextbusn */ + rcec_ea->nextbusn = 0xff; + rcec_ea->lastbusn = 0x00; + } + + dev->rcec_ea = rcec_ea; +} + +void pci_rcec_exit(struct pci_dev *dev) +{ + kfree(dev->rcec_ea); + dev->rcec_ea = NULL; +} -- cgit v1.2.3