Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/pci
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
216 files changed, 145143 insertions, 0 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
new file mode 100644
index 000000000..e9ae66cc4
--- /dev/null
+++ b/drivers/pci/Kconfig
@@ -0,0 +1,295 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# PCI configuration
+#
+
+# select this to offer the PCI prompt
+config HAVE_PCI
+	bool
+
+# select this to unconditionally force on PCI support
+config FORCE_PCI
+	bool
+	select HAVE_PCI
+	select PCI
+
+menuconfig PCI
+	bool "PCI support"
+	depends on HAVE_PCI
+	help
+	  This option enables support for the PCI local bus, including
+	  support for PCI-X and the foundations for PCI Express support.
+	  Say 'Y' here unless you know what you are doing.
+
+if PCI
+
+config PCI_DOMAINS
+	bool
+	depends on PCI
+
+config PCI_DOMAINS_GENERIC
+	bool
+	select PCI_DOMAINS
+
+config PCI_SYSCALL
+	bool
+
+source "drivers/pci/pcie/Kconfig"
+
+config PCI_MSI
+	bool "Message Signaled Interrupts (MSI and MSI-X)"
+	select GENERIC_MSI_IRQ
+	help
+	   This allows device drivers to enable MSI (Message Signaled
+	   Interrupts).  Message Signaled Interrupts enable a device to
+	   generate an interrupt using an inbound Memory Write on its
+	   PCI bus instead of asserting a device IRQ pin.
+
+	   Use of PCI MSI interrupts can be disabled at kernel boot time
+	   by using the 'pci=nomsi' option.  This disables MSI for the
+	   entire system.
+
+	   If you don't know what to do here, say Y.
+
+config PCI_MSI_ARCH_FALLBACKS
+	bool
+
+config PCI_QUIRKS
+	default y
+	bool "Enable PCI quirk workarounds" if EXPERT
+	help
+	  This enables workarounds for various PCI chipset bugs/quirks.
+	  Disable this only if your target machine is unaffected by PCI
+	  quirks.
+
+config PCI_DEBUG
+	bool "PCI Debugging"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here if you want the PCI core to produce a bunch of debug
+	  messages to the system log.  Select this if you are having a
+	  problem with PCI support and want to see more of what is going on.
+
+	  When in doubt, say N.
+
+config PCI_REALLOC_ENABLE_AUTO
+	bool "Enable PCI resource re-allocation detection"
+	depends on PCI_IOV
+	help
+	  Say Y here if you want the PCI core to detect if PCI resource
+	  re-allocation needs to be enabled. You can always use pci=realloc=on
+	  or pci=realloc=off to override it.  It will automatically
+	  re-allocate PCI resources if SR-IOV BARs have not been allocated by
+	  the BIOS.
+
+	  When in doubt, say N.
+
+config PCI_STUB
+	tristate "PCI Stub driver"
+	help
+	  Say Y or M here if you want be able to reserve a PCI device
+	  when it is going to be assigned to a guest operating system.
+
+	  When in doubt, say N.
+
+config PCI_PF_STUB
+	tristate "PCI PF Stub driver"
+	depends on PCI_IOV
+	help
+	  Say Y or M here if you want to enable support for devices that
+	  require SR-IOV support, while at the same time the PF (Physical
+	  Function) itself is not providing any actual services on the
+	  host itself such as storage or networking.
+
+	  When in doubt, say N.
+
+config XEN_PCIDEV_FRONTEND
+	tristate "Xen PCI Frontend"
+	depends on XEN_PV
+	select PCI_XEN
+	select XEN_XENBUS_FRONTEND
+	default y
+	help
+	  The PCI device frontend driver allows the kernel to import arbitrary
+	  PCI devices from a PCI backend to support PCI driver domains.
+
+config PCI_ATS
+	bool
+
+config PCI_DOE
+	bool
+
+config PCI_ECAM
+	bool
+
+config PCI_LOCKLESS_CONFIG
+	bool
+
+config PCI_BRIDGE_EMUL
+	bool
+
+config PCI_IOV
+	bool "PCI IOV support"
+	select PCI_ATS
+	help
+	  I/O Virtualization is a PCI feature supported by some devices
+	  which allows them to create virtual devices which share their
+	  physical resources.
+
+	  If unsure, say N.
+
+config PCI_PRI
+	bool "PCI PRI support"
+	select PCI_ATS
+	help
+	  PRI is the PCI Page Request Interface. It allows PCI devices that are
+	  behind an IOMMU to recover from page faults.
+
+	  If unsure, say N.
+
+config PCI_PASID
+	bool "PCI PASID support"
+	select PCI_ATS
+	help
+	  Process Address Space Identifiers (PASIDs) can be used by PCI devices
+	  to access more than one IO address space at the same time. To make
+	  use of this feature an IOMMU is required which also supports PASIDs.
+	  Select this option if you have such an IOMMU and want to compile the
+	  driver for it into your kernel.
+
+	  If unsure, say N.
+
+config PCI_P2PDMA
+	bool "PCI peer-to-peer transfer support"
+	depends on ZONE_DEVICE
+	#
+	# The need for the scatterlist DMA bus address flag means PCI P2PDMA
+	# requires 64bit
+	#
+	depends on 64BIT
+	select GENERIC_ALLOCATOR
+	select NEED_SG_DMA_FLAGS
+	help
+	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
+	  BARs that are exposed in other devices that are the part of
+	  the hierarchy where peer-to-peer DMA is guaranteed by the PCI
+	  specification to work (ie. anything below a single PCI bridge).
+
+	  Many PCIe root complexes do not support P2P transactions and
+	  it's hard to tell which support it at all, so at this time,
+	  P2P DMA transactions must be between devices behind the same root
+	  port.
+
+	  If unsure, say N.
+
+config PCI_LABEL
+	def_bool y if (DMI || ACPI)
+	select NLS
+
+config PCI_HYPERV
+	tristate "Hyper-V PCI Frontend"
+	depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS
+	select PCI_HYPERV_INTERFACE
+	help
+	  The PCI device frontend driver allows the kernel to import arbitrary
+	  PCI devices from a PCI backend to support PCI driver domains.
+
+config PCI_DYNAMIC_OF_NODES
+	bool "Create Device tree nodes for PCI devices"
+	depends on OF_IRQ
+	select OF_DYNAMIC
+	help
+	  This option enables support for generating device tree nodes for some
+	  PCI devices. Thus, the driver of this kind can load and overlay
+	  flattened device tree for its downstream devices.
+
+	  Once this option is selected, the device tree nodes will be generated
+	  for all PCI bridges.
+
+choice
+	prompt "PCI Express hierarchy optimization setting"
+	default PCIE_BUS_DEFAULT
+	depends on PCI && EXPERT
+	help
+	  MPS (Max Payload Size) and MRRS (Max Read Request Size) are PCIe
+	  device parameters that affect performance and the ability to
+	  support hotplug and peer-to-peer DMA.
+
+	  The following choices set the MPS and MRRS optimization strategy
+	  at compile-time.  The choices are the same as those offered for
+	  the kernel command-line parameter 'pci', i.e.,
+	  'pci=pcie_bus_tune_off', 'pci=pcie_bus_safe',
+	  'pci=pcie_bus_perf', and 'pci=pcie_bus_peer2peer'.
+
+	  This is a compile-time setting and can be overridden by the above
+	  command-line parameters.  If unsure, choose PCIE_BUS_DEFAULT.
+
+config PCIE_BUS_TUNE_OFF
+	bool "Tune Off"
+	depends on PCI
+	help
+	  Use the BIOS defaults; don't touch MPS at all.  This is the same
+	  as booting with 'pci=pcie_bus_tune_off'.
+
+config PCIE_BUS_DEFAULT
+	bool "Default"
+	depends on PCI
+	help
+	  Default choice; ensure that the MPS matches upstream bridge.
+
+config PCIE_BUS_SAFE
+	bool "Safe"
+	depends on PCI
+	help
+	  Use largest MPS that boot-time devices support.  If you have a
+	  closed system with no possibility of adding new devices, this
+	  will use the largest MPS that's supported by all devices.  This
+	  is the same as booting with 'pci=pcie_bus_safe'.
+
+config PCIE_BUS_PERFORMANCE
+	bool "Performance"
+	depends on PCI
+	help
+	  Use MPS and MRRS for best performance.  Ensure that a given
+	  device's MPS is no larger than its parent MPS, which allows us to
+	  keep all switches/bridges to the max MPS supported by their
+	  parent.  This is the same as booting with 'pci=pcie_bus_perf'.
+
+config PCIE_BUS_PEER2PEER
+	bool "Peer2peer"
+	depends on PCI
+	help
+	  Set MPS = 128 for all devices.  MPS configuration effected by the
+	  other options could cause the MPS on one root port to be
+	  different than that of the MPS on another, which may cause
+	  hot-added devices or peer-to-peer DMA to fail.  Set MPS to the
+	  smallest possible value (128B) system-wide to avoid these issues.
+	  This is the same as booting with 'pci=pcie_bus_peer2peer'.
+
+endchoice
+
+config VGA_ARB
+	bool "VGA Arbitration" if EXPERT
+	default y
+	depends on (PCI && !S390)
+	help
+	  Some "legacy" VGA devices implemented on PCI typically have the same
+	  hard-decoded addresses as they did on ISA. When multiple PCI devices
+	  are accessed at same time they need some kind of coordination. Please
+	  see Documentation/gpu/vgaarbiter.rst for more details. Select this to
+	  enable VGA arbiter.
+
+config VGA_ARB_MAX_GPUS
+	int "Maximum number of GPUs"
+	default 16
+	depends on VGA_ARB
+	help
+	  Reserves space in the kernel to maintain resource locking for
+	  multiple GPUS.  The overhead for each GPU is very small.
+
+source "drivers/pci/hotplug/Kconfig"
+source "drivers/pci/controller/Kconfig"
+source "drivers/pci/endpoint/Kconfig"
+source "drivers/pci/switch/Kconfig"
+
+endif
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
new file mode 100644
index 000000000..cc8b4e01e
--- /dev/null
+++ b/drivers/pci/Makefile
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PCI bus specific drivers.
+
+obj-$(CONFIG_PCI)		+= access.o bus.o probe.o host-bridge.o \
+				   remove.o pci.o pci-driver.o search.o \
+				   pci-sysfs.o rom.o setup-res.o irq.o vpd.o \
+				   setup-bus.o vc.o mmap.o setup-irq.o
+
+obj-$(CONFIG_PCI)		+= msi/
+obj-$(CONFIG_PCI)		+= pcie/
+
+ifdef CONFIG_PCI
+obj-$(CONFIG_PROC_FS)		+= proc.o
+obj-$(CONFIG_SYSFS)		+= slot.o
+obj-$(CONFIG_ACPI)		+= pci-acpi.o
+endif
+
+obj-$(CONFIG_OF)		+= of.o
+obj-$(CONFIG_PCI_QUIRKS)	+= quirks.o
+obj-$(CONFIG_HOTPLUG_PCI)	+= hotplug/
+obj-$(CONFIG_PCI_ATS)		+= ats.o
+obj-$(CONFIG_PCI_IOV)		+= iov.o
+obj-$(CONFIG_PCI_BRIDGE_EMUL)	+= pci-bridge-emul.o
+obj-$(CONFIG_PCI_LABEL)		+= pci-label.o
+obj-$(CONFIG_X86_INTEL_MID)	+= pci-mid.o
+obj-$(CONFIG_PCI_SYSCALL)	+= syscall.o
+obj-$(CONFIG_PCI_STUB)		+= pci-stub.o
+obj-$(CONFIG_PCI_PF_STUB)	+= pci-pf-stub.o
+obj-$(CONFIG_PCI_ECAM)		+= ecam.o
+obj-$(CONFIG_PCI_P2PDMA)	+= p2pdma.o
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+obj-$(CONFIG_VGA_ARB)		+= vgaarb.o
+obj-$(CONFIG_PCI_DOE)		+= doe.o
+obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o
+
+# Endpoint library must be initialized before its users
+obj-$(CONFIG_PCI_ENDPOINT)	+= endpoint/
+
+obj-y				+= controller/
+obj-y				+= switch/
+
+subdir-ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
new file mode 100644
index 000000000..6554a2e89
--- /dev/null
+++ b/drivers/pci/access.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/wait.h>
+
+#include "pci.h"
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+DEFINE_RAW_SPINLOCK(pci_lock);
+
+/*
+ * Wrappers for all PCI configuration access functions.  They just check
+ * alignment, do locking and call the low-level functions pointed to
+ * by pci_dev->ops.
+ */
+
+#define PCI_byte_BAD 0
+#define PCI_word_BAD (pos & 1)
+#define PCI_dword_BAD (pos & 3)
+
+#ifdef CONFIG_PCI_LOCKLESS_CONFIG
+# define pci_lock_config(f)	do { (void)(f); } while (0)
+# define pci_unlock_config(f)	do { (void)(f); } while (0)
+#else
+# define pci_lock_config(f)	raw_spin_lock_irqsave(&pci_lock, f)
+# define pci_unlock_config(f)	raw_spin_unlock_irqrestore(&pci_lock, f)
+#endif
+
+#define PCI_OP_READ(size, type, len) \
+int noinline pci_bus_read_config_##size \
+	(struct pci_bus *bus, unsigned int devfn, int pos, type *value)	\
+{									\
+	int res;							\
+	unsigned long flags;						\
+	u32 data = 0;							\
+	if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;	\
+	pci_lock_config(flags);						\
+	res = bus->ops->read(bus, devfn, pos, len, &data);		\
+	if (res)							\
+		PCI_SET_ERROR_RESPONSE(value);				\
+	else								\
+		*value = (type)data;					\
+	pci_unlock_config(flags);					\
+	return res;							\
+}
+
+#define PCI_OP_WRITE(size, type, len) \
+int noinline pci_bus_write_config_##size \
+	(struct pci_bus *bus, unsigned int devfn, int pos, type value)	\
+{									\
+	int res;							\
+	unsigned long flags;						\
+	if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;	\
+	pci_lock_config(flags);						\
+	res = bus->ops->write(bus, devfn, pos, len, value);		\
+	pci_unlock_config(flags);					\
+	return res;							\
+}
+
+PCI_OP_READ(byte, u8, 1)
+PCI_OP_READ(word, u16, 2)
+PCI_OP_READ(dword, u32, 4)
+PCI_OP_WRITE(byte, u8, 1)
+PCI_OP_WRITE(word, u16, 2)
+PCI_OP_WRITE(dword, u32, 4)
+
+EXPORT_SYMBOL(pci_bus_read_config_byte);
+EXPORT_SYMBOL(pci_bus_read_config_word);
+EXPORT_SYMBOL(pci_bus_read_config_dword);
+EXPORT_SYMBOL(pci_bus_write_config_byte);
+EXPORT_SYMBOL(pci_bus_write_config_word);
+EXPORT_SYMBOL(pci_bus_write_config_dword);
+
+int pci_generic_config_read(struct pci_bus *bus, unsigned int devfn,
+			    int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		*val = readb(addr);
+	else if (size == 2)
+		*val = readw(addr);
+	else
+		*val = readl(addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_read);
+
+int pci_generic_config_write(struct pci_bus *bus, unsigned int devfn,
+			     int where, int size, u32 val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		writeb(val, addr);
+	else if (size == 2)
+		writew(val, addr);
+	else
+		writel(val, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_write);
+
+int pci_generic_config_read32(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where & ~0x3);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*val = readl(addr);
+
+	if (size <= 2)
+		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_read32);
+
+int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 val)
+{
+	void __iomem *addr;
+	u32 mask, tmp;
+
+	addr = bus->ops->map_bus(bus, devfn, where & ~0x3);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 4) {
+		writel(val, addr);
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	/*
+	 * In general, hardware that supports only 32-bit writes on PCI is
+	 * not spec-compliant.  For example, software may perform a 16-bit
+	 * write.  If the hardware only supports 32-bit accesses, we must
+	 * do a 32-bit read, merge in the 16 bits we intend to write,
+	 * followed by a 32-bit write.  If the 16 bits we *don't* intend to
+	 * write happen to have any RW1C (write-one-to-clear) bits set, we
+	 * just inadvertently cleared something we shouldn't have.
+	 */
+	if (!bus->unsafe_warn) {
+		dev_warn(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
+			 size, pci_domain_nr(bus), bus->number,
+			 PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+		bus->unsafe_warn = 1;
+	}
+
+	mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+	tmp = readl(addr) & mask;
+	tmp |= val << ((where & 0x3) * 8);
+	writel(tmp, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_write32);
+
+/**
+ * pci_bus_set_ops - Set raw operations of pci bus
+ * @bus:	pci bus struct
+ * @ops:	new raw operations
+ *
+ * Return previous raw operations
+ */
+struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops)
+{
+	struct pci_ops *old_ops;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+	old_ops = bus->ops;
+	bus->ops = ops;
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+	return old_ops;
+}
+EXPORT_SYMBOL(pci_bus_set_ops);
+
+/*
+ * The following routines are to prevent the user from accessing PCI config
+ * space when it's unsafe to do so.  Some devices require this during BIST and
+ * we're required to prevent it during D-state transitions.
+ *
+ * We have a bit per device to indicate it's blocked and a global wait queue
+ * for callers to sleep on until devices are unblocked.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(pci_cfg_wait);
+
+static noinline void pci_wait_cfg(struct pci_dev *dev)
+	__must_hold(&pci_lock)
+{
+	do {
+		raw_spin_unlock_irq(&pci_lock);
+		wait_event(pci_cfg_wait, !dev->block_cfg_access);
+		raw_spin_lock_irq(&pci_lock);
+	} while (dev->block_cfg_access);
+}
+
+/* Returns 0 on success, negative values indicate error. */
+#define PCI_USER_READ_CONFIG(size, type)					\
+int pci_user_read_config_##size						\
+	(struct pci_dev *dev, int pos, type *val)			\
+{									\
+	int ret = PCIBIOS_SUCCESSFUL;					\
+	u32 data = -1;							\
+	if (PCI_##size##_BAD)						\
+		return -EINVAL;						\
+	raw_spin_lock_irq(&pci_lock);				\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
+	ret = dev->bus->ops->read(dev->bus, dev->devfn,			\
+					pos, sizeof(type), &data);	\
+	raw_spin_unlock_irq(&pci_lock);				\
+	if (ret)							\
+		PCI_SET_ERROR_RESPONSE(val);				\
+	else								\
+		*val = (type)data;					\
+	return pcibios_err_to_errno(ret);				\
+}									\
+EXPORT_SYMBOL_GPL(pci_user_read_config_##size);
+
+/* Returns 0 on success, negative values indicate error. */
+#define PCI_USER_WRITE_CONFIG(size, type)				\
+int pci_user_write_config_##size					\
+	(struct pci_dev *dev, int pos, type val)			\
+{									\
+	int ret = PCIBIOS_SUCCESSFUL;					\
+	if (PCI_##size##_BAD)						\
+		return -EINVAL;						\
+	raw_spin_lock_irq(&pci_lock);				\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
+	ret = dev->bus->ops->write(dev->bus, dev->devfn,		\
+					pos, sizeof(type), val);	\
+	raw_spin_unlock_irq(&pci_lock);				\
+	return pcibios_err_to_errno(ret);				\
+}									\
+EXPORT_SYMBOL_GPL(pci_user_write_config_##size);
+
+PCI_USER_READ_CONFIG(byte, u8)
+PCI_USER_READ_CONFIG(word, u16)
+PCI_USER_READ_CONFIG(dword, u32)
+PCI_USER_WRITE_CONFIG(byte, u8)
+PCI_USER_WRITE_CONFIG(word, u16)
+PCI_USER_WRITE_CONFIG(dword, u32)
+
+/**
+ * pci_cfg_access_lock - Lock PCI config reads/writes
+ * @dev:	pci device struct
+ *
+ * When access is locked, any userspace reads or writes to config
+ * space and concurrent lock requests will sleep until access is
+ * allowed via pci_cfg_access_unlock() again.
+ */
+void pci_cfg_access_lock(struct pci_dev *dev)
+{
+	might_sleep();
+
+	raw_spin_lock_irq(&pci_lock);
+	if (dev->block_cfg_access)
+		pci_wait_cfg(dev);
+	dev->block_cfg_access = 1;
+	raw_spin_unlock_irq(&pci_lock);
+}
+EXPORT_SYMBOL_GPL(pci_cfg_access_lock);
+
+/**
+ * pci_cfg_access_trylock - try to lock PCI config reads/writes
+ * @dev:	pci device struct
+ *
+ * Same as pci_cfg_access_lock, but will return 0 if access is
+ * already locked, 1 otherwise. This function can be used from
+ * atomic contexts.
+ */
+bool pci_cfg_access_trylock(struct pci_dev *dev)
+{
+	unsigned long flags;
+	bool locked = true;
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+	if (dev->block_cfg_access)
+		locked = false;
+	else
+		dev->block_cfg_access = 1;
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+
+	return locked;
+}
+EXPORT_SYMBOL_GPL(pci_cfg_access_trylock);
+
+/**
+ * pci_cfg_access_unlock - Unlock PCI config reads/writes
+ * @dev:	pci device struct
+ *
+ * This function allows PCI config accesses to resume.
+ */
+void pci_cfg_access_unlock(struct pci_dev *dev)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+
+	/*
+	 * This indicates a problem in the caller, but we don't need
+	 * to kill them, unlike a double-block above.
+	 */
+	WARN_ON(!dev->block_cfg_access);
+
+	dev->block_cfg_access = 0;
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+
+	wake_up_all(&pci_cfg_wait);
+}
+EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
+
+static inline int pcie_cap_version(const struct pci_dev *dev)
+{
+	return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS;
+}
+
+bool pcie_cap_has_lnkctl(const struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+
+	return type == PCI_EXP_TYPE_ENDPOINT ||
+	       type == PCI_EXP_TYPE_LEG_END ||
+	       type == PCI_EXP_TYPE_ROOT_PORT ||
+	       type == PCI_EXP_TYPE_UPSTREAM ||
+	       type == PCI_EXP_TYPE_DOWNSTREAM ||
+	       type == PCI_EXP_TYPE_PCI_BRIDGE ||
+	       type == PCI_EXP_TYPE_PCIE_BRIDGE;
+}
+
+bool pcie_cap_has_lnkctl2(const struct pci_dev *dev)
+{
+	return pcie_cap_has_lnkctl(dev) && pcie_cap_version(dev) > 1;
+}
+
+static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev)
+{
+	return pcie_downstream_port(dev) &&
+	       pcie_caps_reg(dev) & PCI_EXP_FLAGS_SLOT;
+}
+
+bool pcie_cap_has_rtctl(const struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+
+	return type == PCI_EXP_TYPE_ROOT_PORT ||
+	       type == PCI_EXP_TYPE_RC_EC;
+}
+
+static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
+{
+	if (!pci_is_pcie(dev))
+		return false;
+
+	switch (pos) {
+	case PCI_EXP_FLAGS:
+		return true;
+	case PCI_EXP_DEVCAP:
+	case PCI_EXP_DEVCTL:
+	case PCI_EXP_DEVSTA:
+		return true;
+	case PCI_EXP_LNKCAP:
+	case PCI_EXP_LNKCTL:
+	case PCI_EXP_LNKSTA:
+		return pcie_cap_has_lnkctl(dev);
+	case PCI_EXP_SLTCAP:
+	case PCI_EXP_SLTCTL:
+	case PCI_EXP_SLTSTA:
+		return pcie_cap_has_sltctl(dev);
+	case PCI_EXP_RTCTL:
+	case PCI_EXP_RTCAP:
+	case PCI_EXP_RTSTA:
+		return pcie_cap_has_rtctl(dev);
+	case PCI_EXP_DEVCAP2:
+	case PCI_EXP_DEVCTL2:
+		return pcie_cap_version(dev) > 1;
+	case PCI_EXP_LNKCAP2:
+	case PCI_EXP_LNKCTL2:
+	case PCI_EXP_LNKSTA2:
+		return pcie_cap_has_lnkctl2(dev);
+	default:
+		return false;
+	}
+}
+
+/*
+ * Note that these accessor functions are only for the "PCI Express
+ * Capability" (see PCIe spec r3.0, sec 7.8).  They do not apply to the
+ * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
+ */
+int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
+{
+	int ret;
+
+	*val = 0;
+	if (pos & 1)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (pcie_capability_reg_implemented(dev, pos)) {
+		ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
+		/*
+		 * Reset *val to 0 if pci_read_config_word() fails; it may
+		 * have been written as 0xFFFF (PCI_ERROR_RESPONSE) if the
+		 * config read failed on PCI.
+		 */
+		if (ret)
+			*val = 0;
+		return ret;
+	}
+
+	/*
+	 * For Functions that do not implement the Slot Capabilities,
+	 * Slot Status, and Slot Control registers, these spaces must
+	 * be hardwired to 0b, with the exception of the Presence Detect
+	 * State bit in the Slot Status register of Downstream Ports,
+	 * which must be hardwired to 1b.  (PCIe Base Spec 3.0, sec 7.8)
+	 */
+	if (pci_is_pcie(dev) && pcie_downstream_port(dev) &&
+	    pos == PCI_EXP_SLTSTA)
+		*val = PCI_EXP_SLTSTA_PDS;
+
+	return 0;
+}
+EXPORT_SYMBOL(pcie_capability_read_word);
+
+int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val)
+{
+	int ret;
+
+	*val = 0;
+	if (pos & 3)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (pcie_capability_reg_implemented(dev, pos)) {
+		ret = pci_read_config_dword(dev, pci_pcie_cap(dev) + pos, val);
+		/*
+		 * Reset *val to 0 if pci_read_config_dword() fails; it may
+		 * have been written as 0xFFFFFFFF (PCI_ERROR_RESPONSE) if
+		 * the config read failed on PCI.
+		 */
+		if (ret)
+			*val = 0;
+		return ret;
+	}
+
+	if (pci_is_pcie(dev) && pcie_downstream_port(dev) &&
+	    pos == PCI_EXP_SLTSTA)
+		*val = PCI_EXP_SLTSTA_PDS;
+
+	return 0;
+}
+EXPORT_SYMBOL(pcie_capability_read_dword);
+
+int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
+{
+	if (pos & 1)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (!pcie_capability_reg_implemented(dev, pos))
+		return 0;
+
+	return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
+}
+EXPORT_SYMBOL(pcie_capability_write_word);
+
+int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val)
+{
+	if (pos & 3)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (!pcie_capability_reg_implemented(dev, pos))
+		return 0;
+
+	return pci_write_config_dword(dev, pci_pcie_cap(dev) + pos, val);
+}
+EXPORT_SYMBOL(pcie_capability_write_dword);
+
+int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos,
+						u16 clear, u16 set)
+{
+	int ret;
+	u16 val;
+
+	ret = pcie_capability_read_word(dev, pos, &val);
+	if (ret)
+		return ret;
+
+	val &= ~clear;
+	val |= set;
+	return pcie_capability_write_word(dev, pos, val);
+}
+EXPORT_SYMBOL(pcie_capability_clear_and_set_word_unlocked);
+
+int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos,
+					      u16 clear, u16 set)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev->pcie_cap_lock, flags);
+	ret = pcie_capability_clear_and_set_word_unlocked(dev, pos, clear, set);
+	spin_unlock_irqrestore(&dev->pcie_cap_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(pcie_capability_clear_and_set_word_locked);
+
+int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos,
+					u32 clear, u32 set)
+{
+	int ret;
+	u32 val;
+
+	ret = pcie_capability_read_dword(dev, pos, &val);
+	if (ret)
+		return ret;
+
+	val &= ~clear;
+	val |= set;
+	return pcie_capability_write_dword(dev, pos, val);
+}
+EXPORT_SYMBOL(pcie_capability_clear_and_set_dword);
+
+int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val)
+{
+	if (pci_dev_is_disconnected(dev)) {
+		PCI_SET_ERROR_RESPONSE(val);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_read_config_byte);
+
+int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val)
+{
+	if (pci_dev_is_disconnected(dev)) {
+		PCI_SET_ERROR_RESPONSE(val);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_read_config_word);
+
+int pci_read_config_dword(const struct pci_dev *dev, int where,
+					u32 *val)
+{
+	if (pci_dev_is_disconnected(dev)) {
+		PCI_SET_ERROR_RESPONSE(val);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_read_config_dword);
+
+int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val)
+{
+	if (pci_dev_is_disconnected(dev))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_write_config_byte);
+
+int pci_write_config_word(const struct pci_dev *dev, int where, u16 val)
+{
+	if (pci_dev_is_disconnected(dev))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return pci_bus_write_config_word(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_write_config_word);
+
+int pci_write_config_dword(const struct pci_dev *dev, int where,
+					 u32 val)
+{
+	if (pci_dev_is_disconnected(dev))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
+}
+EXPORT_SYMBOL(pci_write_config_dword);
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
new file mode 100644
index 000000000..f9cc2e10b
--- /dev/null
+++ b/drivers/pci/ats.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Express I/O Virtualization (IOV) support
+ *   Address Translation Service 1.0
+ *   Page Request Interface added by Joerg Roedel <joerg.roedel@amd.com>
+ *   PASID support added by Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ * Copyright (C) 2011 Advanced Micro Devices,
+ */
+
+#include <linux/export.h>
+#include <linux/pci-ats.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "pci.h"
+
+void pci_ats_init(struct pci_dev *dev)
+{
+	int pos;
+
+	if (pci_ats_disabled())
+		return;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
+	if (!pos)
+		return;
+
+	dev->ats_cap = pos;
+}
+
+/**
+ * pci_ats_supported - check if the device can use ATS
+ * @dev: the PCI device
+ *
+ * Returns true if the device supports ATS and is allowed to use it, false
+ * otherwise.
+ */
+bool pci_ats_supported(struct pci_dev *dev)
+{
+	if (!dev->ats_cap)
+		return false;
+
+	return (dev->untrusted == 0);
+}
+EXPORT_SYMBOL_GPL(pci_ats_supported);
+
+/**
+ * pci_enable_ats - enable the ATS capability
+ * @dev: the PCI device
+ * @ps: the IOMMU page shift
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	u16 ctrl;
+	struct pci_dev *pdev;
+
+	if (!pci_ats_supported(dev))
+		return -EINVAL;
+
+	if (WARN_ON(dev->ats_enabled))
+		return -EBUSY;
+
+	if (ps < PCI_ATS_MIN_STU)
+		return -EINVAL;
+
+	/*
+	 * Note that enabling ATS on a VF fails unless it's already enabled
+	 * with the same STU on the PF.
+	 */
+	ctrl = PCI_ATS_CTRL_ENABLE;
+	if (dev->is_virtfn) {
+		pdev = pci_physfn(dev);
+		if (pdev->ats_stu != ps)
+			return -EINVAL;
+	} else {
+		dev->ats_stu = ps;
+		ctrl |= PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
+	}
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
+
+	dev->ats_enabled = 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_enable_ats);
+
+/**
+ * pci_disable_ats - disable the ATS capability
+ * @dev: the PCI device
+ */
+void pci_disable_ats(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	if (WARN_ON(!dev->ats_enabled))
+		return;
+
+	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, &ctrl);
+	ctrl &= ~PCI_ATS_CTRL_ENABLE;
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
+
+	dev->ats_enabled = 0;
+}
+EXPORT_SYMBOL_GPL(pci_disable_ats);
+
+void pci_restore_ats_state(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	if (!dev->ats_enabled)
+		return;
+
+	ctrl = PCI_ATS_CTRL_ENABLE;
+	if (!dev->is_virtfn)
+		ctrl |= PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
+}
+
+/**
+ * pci_ats_queue_depth - query the ATS Invalidate Queue Depth
+ * @dev: the PCI device
+ *
+ * Returns the queue depth on success, or negative on failure.
+ *
+ * The ATS spec uses 0 in the Invalidate Queue Depth field to
+ * indicate that the function can accept 32 Invalidate Request.
+ * But here we use the `real' values (i.e. 1~32) for the Queue
+ * Depth; and 0 indicates the function shares the Queue with
+ * other functions (doesn't exclusively own a Queue).
+ */
+int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	u16 cap;
+
+	if (!dev->ats_cap)
+		return -EINVAL;
+
+	if (dev->is_virtfn)
+		return 0;
+
+	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CAP, &cap);
+	return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : PCI_ATS_MAX_QDEP;
+}
+
+/**
+ * pci_ats_page_aligned - Return Page Aligned Request bit status.
+ * @pdev: the PCI device
+ *
+ * Returns 1, if the Untranslated Addresses generated by the device
+ * are always aligned or 0 otherwise.
+ *
+ * Per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit
+ * is set, it indicates the Untranslated Addresses generated by the
+ * device are always aligned to a 4096 byte boundary.
+ */
+int pci_ats_page_aligned(struct pci_dev *pdev)
+{
+	u16 cap;
+
+	if (!pdev->ats_cap)
+		return 0;
+
+	pci_read_config_word(pdev, pdev->ats_cap + PCI_ATS_CAP, &cap);
+
+	if (cap & PCI_ATS_CAP_PAGE_ALIGNED)
+		return 1;
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_PRI
+void pci_pri_init(struct pci_dev *pdev)
+{
+	u16 status;
+
+	pdev->pri_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+
+	if (!pdev->pri_cap)
+		return;
+
+	pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status);
+	if (status & PCI_PRI_STATUS_PASID)
+		pdev->pasid_required = 1;
+}
+
+/**
+ * pci_enable_pri - Enable PRI capability
+ * @pdev: PCI device structure
+ * @reqs: outstanding requests
+ *
+ * Returns 0 on success, negative value on error
+ */
+int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
+{
+	u16 control, status;
+	u32 max_requests;
+	int pri = pdev->pri_cap;
+
+	/*
+	 * VFs must not implement the PRI Capability.  If their PF
+	 * implements PRI, it is shared by the VFs, so if the PF PRI is
+	 * enabled, it is also enabled for the VF.
+	 */
+	if (pdev->is_virtfn) {
+		if (pci_physfn(pdev)->pri_enabled)
+			return 0;
+		return -EINVAL;
+	}
+
+	if (WARN_ON(pdev->pri_enabled))
+		return -EBUSY;
+
+	if (!pri)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pri + PCI_PRI_STATUS, &status);
+	if (!(status & PCI_PRI_STATUS_STOPPED))
+		return -EBUSY;
+
+	pci_read_config_dword(pdev, pri + PCI_PRI_MAX_REQ, &max_requests);
+	reqs = min(max_requests, reqs);
+	pdev->pri_reqs_alloc = reqs;
+	pci_write_config_dword(pdev, pri + PCI_PRI_ALLOC_REQ, reqs);
+
+	control = PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pri + PCI_PRI_CTRL, control);
+
+	pdev->pri_enabled = 1;
+
+	return 0;
+}
+
+/**
+ * pci_disable_pri - Disable PRI capability
+ * @pdev: PCI device structure
+ *
+ * Only clears the enabled-bit, regardless of its former value
+ */
+void pci_disable_pri(struct pci_dev *pdev)
+{
+	u16 control;
+	int pri = pdev->pri_cap;
+
+	/* VFs share the PF PRI */
+	if (pdev->is_virtfn)
+		return;
+
+	if (WARN_ON(!pdev->pri_enabled))
+		return;
+
+	if (!pri)
+		return;
+
+	pci_read_config_word(pdev, pri + PCI_PRI_CTRL, &control);
+	control &= ~PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pri + PCI_PRI_CTRL, control);
+
+	pdev->pri_enabled = 0;
+}
+EXPORT_SYMBOL_GPL(pci_disable_pri);
+
+/**
+ * pci_restore_pri_state - Restore PRI
+ * @pdev: PCI device structure
+ */
+void pci_restore_pri_state(struct pci_dev *pdev)
+{
+	u16 control = PCI_PRI_CTRL_ENABLE;
+	u32 reqs = pdev->pri_reqs_alloc;
+	int pri = pdev->pri_cap;
+
+	if (pdev->is_virtfn)
+		return;
+
+	if (!pdev->pri_enabled)
+		return;
+
+	if (!pri)
+		return;
+
+	pci_write_config_dword(pdev, pri + PCI_PRI_ALLOC_REQ, reqs);
+	pci_write_config_word(pdev, pri + PCI_PRI_CTRL, control);
+}
+
+/**
+ * pci_reset_pri - Resets device's PRI state
+ * @pdev: PCI device structure
+ *
+ * The PRI capability must be disabled before this function is called.
+ * Returns 0 on success, negative value on error.
+ */
+int pci_reset_pri(struct pci_dev *pdev)
+{
+	u16 control;
+	int pri = pdev->pri_cap;
+
+	if (pdev->is_virtfn)
+		return 0;
+
+	if (WARN_ON(pdev->pri_enabled))
+		return -EBUSY;
+
+	if (!pri)
+		return -EINVAL;
+
+	control = PCI_PRI_CTRL_RESET;
+	pci_write_config_word(pdev, pri + PCI_PRI_CTRL, control);
+
+	return 0;
+}
+
+/**
+ * pci_prg_resp_pasid_required - Return PRG Response PASID Required bit
+ *				 status.
+ * @pdev: PCI device structure
+ *
+ * Returns 1 if PASID is required in PRG Response Message, 0 otherwise.
+ */
+int pci_prg_resp_pasid_required(struct pci_dev *pdev)
+{
+	if (pdev->is_virtfn)
+		pdev = pci_physfn(pdev);
+
+	return pdev->pasid_required;
+}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+	/* VFs share the PF PRI */
+	if (pci_physfn(pdev)->pri_cap)
+		return true;
+	return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
+#endif /* CONFIG_PCI_PRI */
+
+#ifdef CONFIG_PCI_PASID
+void pci_pasid_init(struct pci_dev *pdev)
+{
+	pdev->pasid_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+}
+
+/**
+ * pci_enable_pasid - Enable the PASID capability
+ * @pdev: PCI device structure
+ * @features: Features to enable
+ *
+ * Returns 0 on success, negative value on error. This function checks
+ * whether the features are actually supported by the device and returns
+ * an error if not.
+ */
+int pci_enable_pasid(struct pci_dev *pdev, int features)
+{
+	u16 control, supported;
+	int pasid = pdev->pasid_cap;
+
+	/*
+	 * VFs must not implement the PASID Capability, but if a PF
+	 * supports PASID, its VFs share the PF PASID configuration.
+	 */
+	if (pdev->is_virtfn) {
+		if (pci_physfn(pdev)->pasid_enabled)
+			return 0;
+		return -EINVAL;
+	}
+
+	if (WARN_ON(pdev->pasid_enabled))
+		return -EBUSY;
+
+	if (!pdev->eetlp_prefix_path && !pdev->pasid_no_tlp)
+		return -EINVAL;
+
+	if (!pasid)
+		return -EINVAL;
+
+	if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF))
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported);
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
+
+	/* User wants to enable anything unsupported? */
+	if ((supported & features) != features)
+		return -EINVAL;
+
+	control = PCI_PASID_CTRL_ENABLE | features;
+	pdev->pasid_features = features;
+
+	pci_write_config_word(pdev, pasid + PCI_PASID_CTRL, control);
+
+	pdev->pasid_enabled = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_enable_pasid);
+
+/**
+ * pci_disable_pasid - Disable the PASID capability
+ * @pdev: PCI device structure
+ */
+void pci_disable_pasid(struct pci_dev *pdev)
+{
+	u16 control = 0;
+	int pasid = pdev->pasid_cap;
+
+	/* VFs share the PF PASID configuration */
+	if (pdev->is_virtfn)
+		return;
+
+	if (WARN_ON(!pdev->pasid_enabled))
+		return;
+
+	if (!pasid)
+		return;
+
+	pci_write_config_word(pdev, pasid + PCI_PASID_CTRL, control);
+
+	pdev->pasid_enabled = 0;
+}
+EXPORT_SYMBOL_GPL(pci_disable_pasid);
+
+/**
+ * pci_restore_pasid_state - Restore PASID capabilities
+ * @pdev: PCI device structure
+ */
+void pci_restore_pasid_state(struct pci_dev *pdev)
+{
+	u16 control;
+	int pasid = pdev->pasid_cap;
+
+	if (pdev->is_virtfn)
+		return;
+
+	if (!pdev->pasid_enabled)
+		return;
+
+	if (!pasid)
+		return;
+
+	control = PCI_PASID_CTRL_ENABLE | pdev->pasid_features;
+	pci_write_config_word(pdev, pasid + PCI_PASID_CTRL, control);
+}
+
+/**
+ * pci_pasid_features - Check which PASID features are supported
+ * @pdev: PCI device structure
+ *
+ * Returns a negative value when no PASI capability is present.
+ * Otherwise is returns a bitmask with supported features. Current
+ * features reported are:
+ * PCI_PASID_CAP_EXEC - Execute permission supported
+ * PCI_PASID_CAP_PRIV - Privileged mode supported
+ */
+int pci_pasid_features(struct pci_dev *pdev)
+{
+	u16 supported;
+	int pasid;
+
+	if (pdev->is_virtfn)
+		pdev = pci_physfn(pdev);
+
+	pasid = pdev->pasid_cap;
+	if (!pasid)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported);
+
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
+
+	return supported;
+}
+EXPORT_SYMBOL_GPL(pci_pasid_features);
+
+#define PASID_NUMBER_SHIFT	8
+#define PASID_NUMBER_MASK	(0x1f << PASID_NUMBER_SHIFT)
+/**
+ * pci_max_pasids - Get maximum number of PASIDs supported by device
+ * @pdev: PCI device structure
+ *
+ * Returns negative value when PASID capability is not present.
+ * Otherwise it returns the number of supported PASIDs.
+ */
+int pci_max_pasids(struct pci_dev *pdev)
+{
+	u16 supported;
+	int pasid;
+
+	if (pdev->is_virtfn)
+		pdev = pci_physfn(pdev);
+
+	pasid = pdev->pasid_cap;
+	if (!pasid)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported);
+
+	supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
+
+	return (1 << supported);
+}
+EXPORT_SYMBOL_GPL(pci_max_pasids);
+#endif /* CONFIG_PCI_PASID */
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
new file mode 100644
index 000000000..9c2137dae
--- /dev/null
+++ b/drivers/pci/bus.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From setup-res.c, by:
+ *	Dave Rusling (david.rusling@reo.mts.dec.com)
+ *	David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *	Ivan Kokshaysky (ink@jurassic.park.msu.ru)
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+
+#include "pci.h"
+
+void pci_add_resource_offset(struct list_head *resources, struct resource *res,
+			     resource_size_t offset)
+{
+	struct resource_entry *entry;
+
+	entry = resource_list_create_entry(res, 0);
+	if (!entry) {
+		pr_err("PCI: can't add host bridge window %pR\n", res);
+		return;
+	}
+
+	entry->offset = offset;
+	resource_list_add_tail(entry, resources);
+}
+EXPORT_SYMBOL(pci_add_resource_offset);
+
+void pci_add_resource(struct list_head *resources, struct resource *res)
+{
+	pci_add_resource_offset(resources, res, 0);
+}
+EXPORT_SYMBOL(pci_add_resource);
+
+void pci_free_resource_list(struct list_head *resources)
+{
+	resource_list_free(resources);
+}
+EXPORT_SYMBOL(pci_free_resource_list);
+
+void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
+			  unsigned int flags)
+{
+	struct pci_bus_resource *bus_res;
+
+	bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
+	if (!bus_res) {
+		dev_err(&bus->dev, "can't add %pR resource\n", res);
+		return;
+	}
+
+	bus_res->res = res;
+	bus_res->flags = flags;
+	list_add_tail(&bus_res->list, &bus->resources);
+}
+
+struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
+{
+	struct pci_bus_resource *bus_res;
+
+	if (n < PCI_BRIDGE_RESOURCE_NUM)
+		return bus->resource[n];
+
+	n -= PCI_BRIDGE_RESOURCE_NUM;
+	list_for_each_entry(bus_res, &bus->resources, list) {
+		if (n-- == 0)
+			return bus_res->res;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_bus_resource_n);
+
+void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res)
+{
+	struct pci_bus_resource *bus_res, *tmp;
+	int i;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+		if (bus->resource[i] == res) {
+			bus->resource[i] = NULL;
+			return;
+		}
+	}
+
+	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
+		if (bus_res->res == res) {
+			list_del(&bus_res->list);
+			kfree(bus_res);
+			return;
+		}
+	}
+}
+
+void pci_bus_remove_resources(struct pci_bus *bus)
+{
+	int i;
+	struct pci_bus_resource *bus_res, *tmp;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+		bus->resource[i] = NULL;
+
+	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
+		list_del(&bus_res->list);
+		kfree(bus_res);
+	}
+}
+
+int devm_request_pci_bus_resources(struct device *dev,
+				   struct list_head *resources)
+{
+	struct resource_entry *win;
+	struct resource *parent, *res;
+	int err;
+
+	resource_list_for_each_entry(win, resources) {
+		res = win->res;
+		switch (resource_type(res)) {
+		case IORESOURCE_IO:
+			parent = &ioport_resource;
+			break;
+		case IORESOURCE_MEM:
+			parent = &iomem_resource;
+			break;
+		default:
+			continue;
+		}
+
+		err = devm_request_resource(dev, parent, res);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources);
+
+static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+static struct pci_bus_region pci_64_bit = {0,
+				(pci_bus_addr_t) 0xffffffffffffffffULL};
+static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
+				(pci_bus_addr_t) 0xffffffffffffffffULL};
+#endif
+
+/*
+ * @res contains CPU addresses.  Clip it so the corresponding bus addresses
+ * on @bus are entirely within @region.  This is used to control the bus
+ * addresses of resources we allocate, e.g., we may need a resource that
+ * can be mapped by a 32-bit BAR.
+ */
+static void pci_clip_resource_to_region(struct pci_bus *bus,
+					struct resource *res,
+					struct pci_bus_region *region)
+{
+	struct pci_bus_region r;
+
+	pcibios_resource_to_bus(bus, &r, res);
+	if (r.start < region->start)
+		r.start = region->start;
+	if (r.end > region->end)
+		r.end = region->end;
+
+	if (r.end < r.start)
+		res->end = res->start - 1;
+	else
+		pcibios_bus_to_resource(bus, res, &r);
+}
+
+static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res,
+		resource_size_t size, resource_size_t align,
+		resource_size_t min, unsigned long type_mask,
+		resource_size_t (*alignf)(void *,
+					  const struct resource *,
+					  resource_size_t,
+					  resource_size_t),
+		void *alignf_data,
+		struct pci_bus_region *region)
+{
+	struct resource *r, avail;
+	resource_size_t max;
+	int ret;
+
+	type_mask |= IORESOURCE_TYPE_BITS;
+
+	pci_bus_for_each_resource(bus, r) {
+		resource_size_t min_used = min;
+
+		if (!r)
+			continue;
+
+		/* type_mask must match */
+		if ((res->flags ^ r->flags) & type_mask)
+			continue;
+
+		/* We cannot allocate a non-prefetching resource
+		   from a pre-fetching area */
+		if ((r->flags & IORESOURCE_PREFETCH) &&
+		    !(res->flags & IORESOURCE_PREFETCH))
+			continue;
+
+		avail = *r;
+		pci_clip_resource_to_region(bus, &avail, region);
+
+		/*
+		 * "min" is typically PCIBIOS_MIN_IO or PCIBIOS_MIN_MEM to
+		 * protect badly documented motherboard resources, but if
+		 * this is an already-configured bridge window, its start
+		 * overrides "min".
+		 */
+		if (avail.start)
+			min_used = avail.start;
+
+		max = avail.end;
+
+		/* Don't bother if available space isn't large enough */
+		if (size > max - min_used + 1)
+			continue;
+
+		/* Ok, try it out.. */
+		ret = allocate_resource(r, res, size, min_used, max,
+					align, alignf, alignf_data);
+		if (ret == 0)
+			return 0;
+	}
+	return -ENOMEM;
+}
+
+/**
+ * pci_bus_alloc_resource - allocate a resource from a parent bus
+ * @bus: PCI bus
+ * @res: resource to allocate
+ * @size: size of resource to allocate
+ * @align: alignment of resource to allocate
+ * @min: minimum /proc/iomem address to allocate
+ * @type_mask: IORESOURCE_* type flags
+ * @alignf: resource alignment function
+ * @alignf_data: data argument for resource alignment function
+ *
+ * Given the PCI bus a device resides on, the size, minimum address,
+ * alignment and type, try to find an acceptable resource allocation
+ * for a specific device resource.
+ */
+int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
+		resource_size_t size, resource_size_t align,
+		resource_size_t min, unsigned long type_mask,
+		resource_size_t (*alignf)(void *,
+					  const struct resource *,
+					  resource_size_t,
+					  resource_size_t),
+		void *alignf_data)
+{
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	int rc;
+
+	if (res->flags & IORESOURCE_MEM_64) {
+		rc = pci_bus_alloc_from_region(bus, res, size, align, min,
+					       type_mask, alignf, alignf_data,
+					       &pci_high);
+		if (rc == 0)
+			return 0;
+
+		return pci_bus_alloc_from_region(bus, res, size, align, min,
+						 type_mask, alignf, alignf_data,
+						 &pci_64_bit);
+	}
+#endif
+
+	return pci_bus_alloc_from_region(bus, res, size, align, min,
+					 type_mask, alignf, alignf_data,
+					 &pci_32_bit);
+}
+EXPORT_SYMBOL(pci_bus_alloc_resource);
+
+/*
+ * The @idx resource of @dev should be a PCI-PCI bridge window.  If this
+ * resource fits inside a window of an upstream bridge, do nothing.  If it
+ * overlaps an upstream window but extends outside it, clip the resource so
+ * it fits completely inside.
+ */
+bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
+{
+	struct pci_bus *bus = dev->bus;
+	struct resource *res = &dev->resource[idx];
+	struct resource orig_res = *res;
+	struct resource *r;
+
+	pci_bus_for_each_resource(bus, r) {
+		resource_size_t start, end;
+
+		if (!r)
+			continue;
+
+		if (resource_type(res) != resource_type(r))
+			continue;
+
+		start = max(r->start, res->start);
+		end = min(r->end, res->end);
+
+		if (start > end)
+			continue;	/* no overlap */
+
+		if (res->start == start && res->end == end)
+			return false;	/* no change */
+
+		res->start = start;
+		res->end = end;
+		res->flags &= ~IORESOURCE_UNSET;
+		orig_res.flags &= ~IORESOURCE_UNSET;
+		pci_info(dev, "%pR clipped to %pR\n", &orig_res, res);
+
+		return true;
+	}
+
+	return false;
+}
+
+void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
+
+void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
+
+/**
+ * pci_bus_add_device - start driver for a single device
+ * @dev: device to add
+ *
+ * This adds add sysfs entries and start device drivers
+ */
+void pci_bus_add_device(struct pci_dev *dev)
+{
+	struct device_node *dn = dev->dev.of_node;
+	int retval;
+
+	/*
+	 * Can not put in pci_device_add yet because resources
+	 * are not assigned yet for some devices.
+	 */
+	pcibios_bus_add_device(dev);
+	pci_fixup_device(pci_fixup_final, dev);
+	if (pci_is_bridge(dev))
+		of_pci_make_dev_node(dev);
+	pci_create_sysfs_dev_files(dev);
+	pci_proc_attach_device(dev);
+	pci_bridge_d3_update(dev);
+
+	dev->match_driver = !dn || of_device_is_available(dn);
+	retval = device_attach(&dev->dev);
+	if (retval < 0 && retval != -EPROBE_DEFER)
+		pci_warn(dev, "device attach failed (%d)\n", retval);
+
+	pci_dev_assign_added(dev, true);
+}
+EXPORT_SYMBOL_GPL(pci_bus_add_device);
+
+/**
+ * pci_bus_add_devices - start driver for PCI devices
+ * @bus: bus to check for new devices
+ *
+ * Start driver for PCI devices and add some sysfs entries.
+ */
+void pci_bus_add_devices(const struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Skip already-added devices */
+		if (pci_dev_is_added(dev))
+			continue;
+		pci_bus_add_device(dev);
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Skip if device attach failed */
+		if (!pci_dev_is_added(dev))
+			continue;
+		child = dev->subordinate;
+		if (child)
+			pci_bus_add_devices(child);
+	}
+}
+EXPORT_SYMBOL(pci_bus_add_devices);
+
+/** pci_walk_bus - walk devices on/under bus, calling callback.
+ *  @top      bus whose devices should be walked
+ *  @cb       callback to be called for each device found
+ *  @userdata arbitrary pointer to be passed to callback.
+ *
+ *  Walk the given bus, including any bridged devices
+ *  on buses under this bus.  Call the provided callback
+ *  on each device found.
+ *
+ *  We check the return of @cb each time. If it returns anything
+ *  other than 0, we break out.
+ *
+ */
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+		  void *userdata)
+{
+	struct pci_dev *dev;
+	struct pci_bus *bus;
+	struct list_head *next;
+	int retval;
+
+	bus = top;
+	down_read(&pci_bus_sem);
+	next = top->devices.next;
+	for (;;) {
+		if (next == &bus->devices) {
+			/* end of this bus, go up or finish */
+			if (bus == top)
+				break;
+			next = bus->self->bus_list.next;
+			bus = bus->self->bus;
+			continue;
+		}
+		dev = list_entry(next, struct pci_dev, bus_list);
+		if (dev->subordinate) {
+			/* this is a pci-pci bridge, do its devices next */
+			next = dev->subordinate->devices.next;
+			bus = dev->subordinate;
+		} else
+			next = dev->bus_list.next;
+
+		retval = cb(dev, userdata);
+		if (retval)
+			break;
+	}
+	up_read(&pci_bus_sem);
+}
+EXPORT_SYMBOL_GPL(pci_walk_bus);
+
+struct pci_bus *pci_bus_get(struct pci_bus *bus)
+{
+	if (bus)
+		get_device(&bus->dev);
+	return bus;
+}
+
+void pci_bus_put(struct pci_bus *bus)
+{
+	if (bus)
+		put_device(&bus->dev);
+}
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
new file mode 100644
index 000000000..c0c3f2824
--- /dev/null
+++ b/drivers/pci/controller/Kconfig
@@ -0,0 +1,348 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "PCI controller drivers"
+	depends on PCI
+
+config PCI_AARDVARK
+	tristate "Aardvark PCIe controller"
+	depends on (ARCH_MVEBU && ARM64) || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	select PCI_BRIDGE_EMUL
+	help
+	 Add support for Aardvark 64bit PCIe Host Controller. This
+	 controller is part of the South Bridge of the Marvel Armada
+	 3700 SoC.
+
+config PCIE_ALTERA
+	tristate "Altera PCIe controller"
+	depends on ARM || NIOS2 || ARM64 || COMPILE_TEST
+	help
+	  Say Y here if you want to enable PCIe controller support on Altera
+	  FPGA.
+
+config PCIE_ALTERA_MSI
+	tristate "Altera PCIe MSI feature"
+	depends on PCIE_ALTERA
+	depends on PCI_MSI
+	help
+	  Say Y here if you want PCIe MSI support for the Altera FPGA.
+	  This MSI driver supports Altera MSI to GIC controller IP.
+
+config PCIE_APPLE_MSI_DOORBELL_ADDR
+	hex
+	default 0xfffff000
+	depends on PCIE_APPLE
+
+config PCIE_APPLE
+	tristate "Apple PCIe controller"
+	depends on ARCH_APPLE || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	select PCI_HOST_COMMON
+	help
+	  Say Y here if you want to enable PCIe controller support on Apple
+	  system-on-chips, like the Apple M1. This is required for the USB
+	  type-A ports, Ethernet, Wi-Fi, and Bluetooth.
+
+	  If unsure, say Y if you have an Apple Silicon system.
+
+config PCI_VERSATILE
+	bool "ARM Versatile PB PCI controller"
+	depends on ARCH_VERSATILE || COMPILE_TEST
+
+config PCIE_BRCMSTB
+	tristate "Broadcom Brcmstb PCIe controller"
+	depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCMBCA || \
+		   BMIPS_GENERIC || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	default ARCH_BRCMSTB || BMIPS_GENERIC
+	help
+	  Say Y here to enable PCIe host controller support for
+	  Broadcom STB based SoCs, like the Raspberry Pi 4.
+
+config PCIE_IPROC
+	tristate
+	help
+	  This enables the iProc PCIe core controller support for Broadcom's
+	  iProc family of SoCs. An appropriate bus interface driver needs
+	  to be enabled to select this.
+
+config PCIE_IPROC_PLATFORM
+	tristate "Broadcom iProc PCIe platform bus driver"
+	depends on ARCH_BCM_IPROC || (ARM && COMPILE_TEST)
+	depends on OF
+	select PCIE_IPROC
+	default ARCH_BCM_IPROC
+	help
+	  Say Y here if you want to use the Broadcom iProc PCIe controller
+	  through the generic platform bus interface
+
+config PCIE_IPROC_BCMA
+	tristate "Broadcom iProc BCMA PCIe controller"
+	depends on ARM && (ARCH_BCM_IPROC || COMPILE_TEST)
+	select PCIE_IPROC
+	select BCMA
+	default ARCH_BCM_5301X
+	help
+	  Say Y here if you want to use the Broadcom iProc PCIe controller
+	  through the BCMA bus interface
+
+config PCIE_IPROC_MSI
+	bool "Broadcom iProc PCIe MSI support"
+	depends on PCIE_IPROC_PLATFORM || PCIE_IPROC_BCMA
+	depends on PCI_MSI
+	default ARCH_BCM_IPROC
+	help
+	  Say Y here if you want to enable MSI support for Broadcom's iProc
+	  PCIe controller
+
+config PCI_HOST_THUNDER_PEM
+	bool "Cavium Thunder PCIe controller to off-chip devices"
+	depends on ARM64 || COMPILE_TEST
+	depends on OF || (ACPI && PCI_QUIRKS)
+	select PCI_HOST_COMMON
+	help
+	  Say Y here if you want PCIe support for CN88XX Cavium Thunder SoCs.
+
+config PCI_HOST_THUNDER_ECAM
+	bool "Cavium Thunder ECAM controller to on-chip devices on pass-1.x silicon"
+	depends on ARM64 || COMPILE_TEST
+	depends on OF || (ACPI && PCI_QUIRKS)
+	select PCI_HOST_COMMON
+	help
+	  Say Y here if you want ECAM support for CN88XX-Pass-1.x Cavium Thunder SoCs.
+
+config PCI_FTPCI100
+	bool "Faraday Technology FTPCI100 PCI controller"
+	depends on OF
+	default ARCH_GEMINI
+
+config PCI_HOST_COMMON
+	tristate
+	select PCI_ECAM
+
+config PCI_HOST_GENERIC
+	tristate "Generic PCI host controller"
+	depends on OF
+	select PCI_HOST_COMMON
+	select IRQ_DOMAIN
+	help
+	  Say Y here if you want to support a simple generic PCI host
+	  controller, such as the one emulated by kvmtool.
+
+config PCIE_HISI_ERR
+	depends on ACPI_APEI_GHES && (ARM64 || COMPILE_TEST)
+	bool "HiSilicon HIP PCIe controller error handling driver"
+	help
+	  Say Y here if you want error handling support
+	  for the PCIe controller's errors on HiSilicon HIP SoCs
+
+config PCI_IXP4XX
+	bool "Intel IXP4xx PCI controller"
+	depends on ARM && OF
+	depends on ARCH_IXP4XX || COMPILE_TEST
+	default ARCH_IXP4XX
+	help
+	  Say Y here if you want support for the PCI host controller found
+	  in the Intel IXP4xx XScale-based network processor SoC.
+
+config VMD
+	depends on PCI_MSI && X86_64 && !UML
+	tristate "Intel Volume Management Device Driver"
+	help
+	  Adds support for the Intel Volume Management Device (VMD). VMD is a
+	  secondary PCI host bridge that allows PCI Express root ports,
+	  and devices attached to them, to be removed from the default
+	  PCI domain and placed within the VMD domain. This provides
+	  more bus resources than are otherwise possible with a
+	  single domain. If you know your system provides one of these and
+	  has devices attached to it, say Y; if you are not sure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmd.
+
+config PCI_LOONGSON
+	bool "LOONGSON PCIe controller"
+	depends on MACH_LOONGSON64 || COMPILE_TEST
+	depends on OF || ACPI
+	depends on PCI_QUIRKS
+	default MACH_LOONGSON64
+	help
+	  Say Y here if you want to enable PCI controller support on
+	  Loongson systems.
+
+config PCI_MVEBU
+	tristate "Marvell EBU PCIe controller"
+	depends on ARCH_MVEBU || ARCH_DOVE || COMPILE_TEST
+	depends on MVEBU_MBUS
+	depends on ARM
+	depends on OF
+	select PCI_BRIDGE_EMUL
+	help
+	 Add support for Marvell EBU PCIe controller. This PCIe controller
+	 is used on 32-bit Marvell ARM SoCs: Dove, Kirkwood, Armada 370,
+	 Armada XP, Armada 375, Armada 38x and Armada 39x.
+
+config PCIE_MEDIATEK
+	tristate "MediaTek PCIe controller"
+	depends on ARCH_AIROHA || ARCH_MEDIATEK || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	help
+	  Say Y here if you want to enable PCIe controller support on
+	  MediaTek SoCs.
+
+config PCIE_MEDIATEK_GEN3
+	tristate "MediaTek Gen3 PCIe controller"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	depends on PCI_MSI
+	help
+	  Adds support for PCIe Gen3 MAC controller for MediaTek SoCs.
+	  This PCIe controller is compatible with Gen3, Gen2 and Gen1 speed,
+	  and support up to 256 MSI interrupt numbers for
+	  multi-function devices.
+
+	  Say Y here if you want to enable Gen3 PCIe controller support on
+	  MediaTek SoCs.
+
+config PCIE_MT7621
+	tristate "MediaTek MT7621 PCIe controller"
+	depends on SOC_MT7621 || COMPILE_TEST
+	select PHY_MT7621_PCI
+	default SOC_MT7621
+	help
+	  This selects a driver for the MediaTek MT7621 PCIe Controller.
+
+config PCIE_MICROCHIP_HOST
+	tristate "Microchip AXI PCIe controller"
+	depends on PCI_MSI && OF
+	select PCI_HOST_COMMON
+	help
+	  Say Y here if you want kernel to support the Microchip AXI PCIe
+	  Host Bridge driver.
+
+config PCI_HYPERV_INTERFACE
+	tristate "Microsoft Hyper-V PCI Interface"
+	depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI
+	help
+	  The Hyper-V PCI Interface is a helper driver that allows other
+	  drivers to have a common interface with the Hyper-V PCI frontend
+	  driver.
+
+config PCI_TEGRA
+	bool "NVIDIA Tegra PCIe controller"
+	depends on ARCH_TEGRA || COMPILE_TEST
+	depends on PCI_MSI
+	help
+	  Say Y here if you want support for the PCIe host controller found
+	  on NVIDIA Tegra SoCs.
+
+config PCIE_RCAR_HOST
+	bool "Renesas R-Car PCIe controller (host mode)"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on PCI_MSI
+	help
+	  Say Y here if you want PCIe controller support on R-Car SoCs in host
+	  mode.
+
+config PCIE_RCAR_EP
+	bool "Renesas R-Car PCIe controller (endpoint mode)"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on PCI_ENDPOINT
+	help
+	  Say Y here if you want PCIe controller support on R-Car SoCs in
+	  endpoint mode.
+
+config PCI_RCAR_GEN2
+	bool "Renesas R-Car Gen2 Internal PCI controller"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on ARM
+	help
+	  Say Y here if you want internal PCI support on R-Car Gen2 SoC.
+	  There are 3 internal PCI controllers available with a single
+	  built-in EHCI/OHCI host controller present on each one.
+
+config PCIE_ROCKCHIP
+	bool
+	depends on PCI
+
+config PCIE_ROCKCHIP_HOST
+	tristate "Rockchip PCIe controller (host mode)"
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	select MFD_SYSCON
+	select PCIE_ROCKCHIP
+	help
+	  Say Y here if you want internal PCI support on Rockchip SoC.
+	  There is 1 internal PCIe port available to support GEN2 with
+	  4 slots.
+
+config PCIE_ROCKCHIP_EP
+	bool "Rockchip PCIe controller (endpoint mode)"
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	depends on OF
+	depends on PCI_ENDPOINT
+	select MFD_SYSCON
+	select PCIE_ROCKCHIP
+	help
+	  Say Y here if you want to support Rockchip PCIe controller in
+	  endpoint mode on Rockchip SoC. There is 1 internal PCIe port
+	  available to support GEN2 with 4 slots.
+
+config PCI_V3_SEMI
+	bool "V3 Semiconductor PCI controller"
+	depends on OF
+	depends on ARM || COMPILE_TEST
+	default ARCH_INTEGRATOR_AP
+
+config PCI_XGENE
+	bool "X-Gene PCIe controller"
+	depends on ARM64 || COMPILE_TEST
+	depends on OF || (ACPI && PCI_QUIRKS)
+	help
+	  Say Y here if you want internal PCI support on APM X-Gene SoC.
+	  There are 5 internal PCIe ports available. Each port is GEN3 capable
+	  and have varied lanes from x1 to x8.
+
+config PCI_XGENE_MSI
+	bool "X-Gene v1 PCIe MSI feature"
+	depends on PCI_XGENE
+	depends on PCI_MSI
+	default y
+	help
+	  Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC.
+	  This MSI driver supports 5 PCIe ports on the APM X-Gene v1 SoC.
+
+config PCIE_XILINX
+	bool "Xilinx AXI PCIe controller"
+	depends on OF
+	depends on PCI_MSI
+	help
+	  Say 'Y' here if you want kernel to support the Xilinx AXI PCIe
+	  Host Bridge driver.
+
+config PCIE_XILINX_NWL
+	bool "Xilinx NWL PCIe controller"
+	depends on ARCH_ZYNQMP || COMPILE_TEST
+	depends on PCI_MSI
+	help
+	 Say 'Y' here if you want kernel support for Xilinx
+	 NWL PCIe controller. The controller can act as Root Port
+	 or End Point. The current option selection will only
+	 support root port enabling.
+
+config PCIE_XILINX_CPM
+	bool "Xilinx Versal CPM PCI controller"
+	depends on ARCH_ZYNQMP || COMPILE_TEST
+	select PCI_HOST_COMMON
+	help
+	  Say 'Y' here if you want kernel support for the
+	  Xilinx Versal CPM host bridge.
+
+source "drivers/pci/controller/cadence/Kconfig"
+source "drivers/pci/controller/dwc/Kconfig"
+source "drivers/pci/controller/mobiveil/Kconfig"
+endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
new file mode 100644
index 000000000..37c8663de
--- /dev/null
+++ b/drivers/pci/controller/Makefile
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PCIE_CADENCE) += cadence/
+obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
+obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
+obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
+obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
+obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
+obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
+obj-$(CONFIG_PCI_RCAR_GEN2) += pci-rcar-gen2.o
+obj-$(CONFIG_PCIE_RCAR_HOST) += pcie-rcar.o pcie-rcar-host.o
+obj-$(CONFIG_PCIE_RCAR_EP) += pcie-rcar.o pcie-rcar-ep.o
+obj-$(CONFIG_PCI_HOST_COMMON) += pci-host-common.o
+obj-$(CONFIG_PCI_HOST_GENERIC) += pci-host-generic.o
+obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
+obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o
+obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
+obj-$(CONFIG_PCIE_XILINX_NWL) += pcie-xilinx-nwl.o
+obj-$(CONFIG_PCIE_XILINX_CPM) += pcie-xilinx-cpm.o
+obj-$(CONFIG_PCI_V3_SEMI) += pci-v3-semi.o
+obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
+obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
+obj-$(CONFIG_PCIE_IPROC) += pcie-iproc.o
+obj-$(CONFIG_PCIE_IPROC_MSI) += pcie-iproc-msi.o
+obj-$(CONFIG_PCIE_IPROC_PLATFORM) += pcie-iproc-platform.o
+obj-$(CONFIG_PCIE_IPROC_BCMA) += pcie-iproc-bcma.o
+obj-$(CONFIG_PCIE_ALTERA) += pcie-altera.o
+obj-$(CONFIG_PCIE_ALTERA_MSI) += pcie-altera-msi.o
+obj-$(CONFIG_PCIE_ROCKCHIP) += pcie-rockchip.o
+obj-$(CONFIG_PCIE_ROCKCHIP_EP) += pcie-rockchip-ep.o
+obj-$(CONFIG_PCIE_ROCKCHIP_HOST) += pcie-rockchip-host.o
+obj-$(CONFIG_PCIE_MEDIATEK) += pcie-mediatek.o
+obj-$(CONFIG_PCIE_MEDIATEK_GEN3) += pcie-mediatek-gen3.o
+obj-$(CONFIG_PCIE_MICROCHIP_HOST) += pcie-microchip-host.o
+obj-$(CONFIG_VMD) += vmd.o
+obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
+obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o
+obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o
+obj-$(CONFIG_PCIE_APPLE) += pcie-apple.o
+obj-$(CONFIG_PCIE_MT7621) += pcie-mt7621.o
+
+# pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
+obj-y				+= dwc/
+obj-y				+= mobiveil/
+
+
+# The following drivers are for devices that use the generic ACPI
+# pci_root.c driver but don't support standard ECAM config access.
+# They contain MCFG quirks to replace the generic ECAM accessors with
+# device-specific ones that are shared with the DT driver.
+
+# The ACPI driver is generic and should not require driver-specific
+# config options to be enabled, so we always build these drivers on
+# ARM64 and use internal ifdefs to only build the pieces we need
+# depending on whether ACPI, the DT driver, or both are enabled.
+
+ifdef CONFIG_ACPI
+ifdef CONFIG_PCI_QUIRKS
+obj-$(CONFIG_ARM64) += pci-thunder-ecam.o
+obj-$(CONFIG_ARM64) += pci-thunder-pem.o
+obj-$(CONFIG_ARM64) += pci-xgene.o
+endif
+endif
diff --git a/drivers/pci/controller/cadence/Kconfig b/drivers/pci/controller/cadence/Kconfig
new file mode 100644
index 000000000..291d12711
--- /dev/null
+++ b/drivers/pci/controller/cadence/Kconfig
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Cadence-based PCIe controllers"
+	depends on PCI
+
+config PCIE_CADENCE
+	bool
+
+config PCIE_CADENCE_HOST
+	bool
+	depends on OF
+	select IRQ_DOMAIN
+	select PCIE_CADENCE
+
+config PCIE_CADENCE_EP
+	bool
+	depends on OF
+	depends on PCI_ENDPOINT
+	select PCIE_CADENCE
+
+config PCIE_CADENCE_PLAT
+	bool
+
+config PCIE_CADENCE_PLAT_HOST
+	bool "Cadence platform PCIe controller (host mode)"
+	depends on OF
+	select PCIE_CADENCE_HOST
+	select PCIE_CADENCE_PLAT
+	help
+	  Say Y here if you want to support the Cadence PCIe platform controller in
+	  host mode. This PCIe controller may be embedded into many different
+	  vendors SoCs.
+
+config PCIE_CADENCE_PLAT_EP
+	bool "Cadence platform PCIe controller (endpoint mode)"
+	depends on OF
+	depends on PCI_ENDPOINT
+	select PCIE_CADENCE_EP
+	select PCIE_CADENCE_PLAT
+	help
+	  Say Y here if you want to support the Cadence PCIe  platform controller in
+	  endpoint mode. This PCIe controller may be embedded into many
+	  different vendors SoCs.
+
+config PCI_J721E
+	bool
+
+config PCI_J721E_HOST
+	bool "TI J721E PCIe controller (host mode)"
+	depends on OF
+	select PCIE_CADENCE_HOST
+	select PCI_J721E
+	help
+	  Say Y here if you want to support the TI J721E PCIe platform
+	  controller in host mode. TI J721E PCIe controller uses Cadence PCIe
+	  core.
+
+config PCI_J721E_EP
+	bool "TI J721E PCIe controller (endpoint mode)"
+	depends on OF
+	depends on PCI_ENDPOINT
+	select PCIE_CADENCE_EP
+	select PCI_J721E
+	help
+	  Say Y here if you want to support the TI J721E PCIe platform
+	  controller in endpoint mode. TI J721E PCIe controller uses Cadence PCIe
+	  core.
+endmenu
diff --git a/drivers/pci/controller/cadence/Makefile b/drivers/pci/controller/cadence/Makefile
new file mode 100644
index 000000000..9bac5fb2f
--- /dev/null
+++ b/drivers/pci/controller/cadence/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PCIE_CADENCE) += pcie-cadence.o
+obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
+obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
+obj-$(CONFIG_PCIE_CADENCE_PLAT) += pcie-cadence-plat.o
+obj-$(CONFIG_PCI_J721E) += pci-j721e.o
diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
new file mode 100644
index 000000000..2c87e7728
--- /dev/null
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pci-j721e - PCIe controller driver for TI's J721E SoCs
+ *
+ * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/io.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include "../../pci.h"
+#include "pcie-cadence.h"
+
+#define ENABLE_REG_SYS_2	0x108
+#define STATUS_REG_SYS_2	0x508
+#define STATUS_CLR_REG_SYS_2	0x708
+#define LINK_DOWN		BIT(1)
+#define J7200_LINK_DOWN		BIT(10)
+
+#define J721E_PCIE_USER_CMD_STATUS	0x4
+#define LINK_TRAINING_ENABLE		BIT(0)
+
+#define J721E_PCIE_USER_LINKSTATUS	0x14
+#define LINK_STATUS			GENMASK(1, 0)
+
+enum link_status {
+	NO_RECEIVERS_DETECTED,
+	LINK_TRAINING_IN_PROGRESS,
+	LINK_UP_DL_IN_PROGRESS,
+	LINK_UP_DL_COMPLETED,
+};
+
+#define J721E_MODE_RC			BIT(7)
+#define LANE_COUNT_MASK			BIT(8)
+#define LANE_COUNT(n)			((n) << 8)
+
+#define GENERATION_SEL_MASK		GENMASK(1, 0)
+
+#define MAX_LANES			2
+
+struct j721e_pcie {
+	struct cdns_pcie	*cdns_pcie;
+	struct clk		*refclk;
+	u32			mode;
+	u32			num_lanes;
+	void __iomem		*user_cfg_base;
+	void __iomem		*intd_cfg_base;
+	u32			linkdown_irq_regfield;
+};
+
+enum j721e_pcie_mode {
+	PCI_MODE_RC,
+	PCI_MODE_EP,
+};
+
+struct j721e_pcie_data {
+	enum j721e_pcie_mode	mode;
+	unsigned int		quirk_retrain_flag:1;
+	unsigned int		quirk_detect_quiet_flag:1;
+	unsigned int		quirk_disable_flr:1;
+	u32			linkdown_irq_regfield;
+	unsigned int		byte_access_allowed:1;
+};
+
+static inline u32 j721e_pcie_user_readl(struct j721e_pcie *pcie, u32 offset)
+{
+	return readl(pcie->user_cfg_base + offset);
+}
+
+static inline void j721e_pcie_user_writel(struct j721e_pcie *pcie, u32 offset,
+					  u32 value)
+{
+	writel(value, pcie->user_cfg_base + offset);
+}
+
+static inline u32 j721e_pcie_intd_readl(struct j721e_pcie *pcie, u32 offset)
+{
+	return readl(pcie->intd_cfg_base + offset);
+}
+
+static inline void j721e_pcie_intd_writel(struct j721e_pcie *pcie, u32 offset,
+					  u32 value)
+{
+	writel(value, pcie->intd_cfg_base + offset);
+}
+
+static irqreturn_t j721e_pcie_link_irq_handler(int irq, void *priv)
+{
+	struct j721e_pcie *pcie = priv;
+	struct device *dev = pcie->cdns_pcie->dev;
+	u32 reg;
+
+	reg = j721e_pcie_intd_readl(pcie, STATUS_REG_SYS_2);
+	if (!(reg & pcie->linkdown_irq_regfield))
+		return IRQ_NONE;
+
+	dev_err(dev, "LINK DOWN!\n");
+
+	j721e_pcie_intd_writel(pcie, STATUS_CLR_REG_SYS_2, pcie->linkdown_irq_regfield);
+	return IRQ_HANDLED;
+}
+
+static void j721e_pcie_config_link_irq(struct j721e_pcie *pcie)
+{
+	u32 reg;
+
+	reg = j721e_pcie_intd_readl(pcie, ENABLE_REG_SYS_2);
+	reg |= pcie->linkdown_irq_regfield;
+	j721e_pcie_intd_writel(pcie, ENABLE_REG_SYS_2, reg);
+}
+
+static int j721e_pcie_start_link(struct cdns_pcie *cdns_pcie)
+{
+	struct j721e_pcie *pcie = dev_get_drvdata(cdns_pcie->dev);
+	u32 reg;
+
+	reg = j721e_pcie_user_readl(pcie, J721E_PCIE_USER_CMD_STATUS);
+	reg |= LINK_TRAINING_ENABLE;
+	j721e_pcie_user_writel(pcie, J721E_PCIE_USER_CMD_STATUS, reg);
+
+	return 0;
+}
+
+static void j721e_pcie_stop_link(struct cdns_pcie *cdns_pcie)
+{
+	struct j721e_pcie *pcie = dev_get_drvdata(cdns_pcie->dev);
+	u32 reg;
+
+	reg = j721e_pcie_user_readl(pcie, J721E_PCIE_USER_CMD_STATUS);
+	reg &= ~LINK_TRAINING_ENABLE;
+	j721e_pcie_user_writel(pcie, J721E_PCIE_USER_CMD_STATUS, reg);
+}
+
+static bool j721e_pcie_link_up(struct cdns_pcie *cdns_pcie)
+{
+	struct j721e_pcie *pcie = dev_get_drvdata(cdns_pcie->dev);
+	u32 reg;
+
+	reg = j721e_pcie_user_readl(pcie, J721E_PCIE_USER_LINKSTATUS);
+	reg &= LINK_STATUS;
+	if (reg == LINK_UP_DL_COMPLETED)
+		return true;
+
+	return false;
+}
+
+static const struct cdns_pcie_ops j721e_pcie_ops = {
+	.start_link = j721e_pcie_start_link,
+	.stop_link = j721e_pcie_stop_link,
+	.link_up = j721e_pcie_link_up,
+};
+
+static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct regmap *syscon,
+			       unsigned int offset)
+{
+	struct device *dev = pcie->cdns_pcie->dev;
+	u32 mask = J721E_MODE_RC;
+	u32 mode = pcie->mode;
+	u32 val = 0;
+	int ret = 0;
+
+	if (mode == PCI_MODE_RC)
+		val = J721E_MODE_RC;
+
+	ret = regmap_update_bits(syscon, offset, mask, val);
+	if (ret)
+		dev_err(dev, "failed to set pcie mode\n");
+
+	return ret;
+}
+
+static int j721e_pcie_set_link_speed(struct j721e_pcie *pcie,
+				     struct regmap *syscon, unsigned int offset)
+{
+	struct device *dev = pcie->cdns_pcie->dev;
+	struct device_node *np = dev->of_node;
+	int link_speed;
+	u32 val = 0;
+	int ret;
+
+	link_speed = of_pci_get_max_link_speed(np);
+	if (link_speed < 2)
+		link_speed = 2;
+
+	val = link_speed - 1;
+	ret = regmap_update_bits(syscon, offset, GENERATION_SEL_MASK, val);
+	if (ret)
+		dev_err(dev, "failed to set link speed\n");
+
+	return ret;
+}
+
+static int j721e_pcie_set_lane_count(struct j721e_pcie *pcie,
+				     struct regmap *syscon, unsigned int offset)
+{
+	struct device *dev = pcie->cdns_pcie->dev;
+	u32 lanes = pcie->num_lanes;
+	u32 val = 0;
+	int ret;
+
+	val = LANE_COUNT(lanes - 1);
+	ret = regmap_update_bits(syscon, offset, LANE_COUNT_MASK, val);
+	if (ret)
+		dev_err(dev, "failed to set link count\n");
+
+	return ret;
+}
+
+static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie)
+{
+	struct device *dev = pcie->cdns_pcie->dev;
+	struct device_node *node = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int offset = 0;
+	struct regmap *syscon;
+	int ret;
+
+	syscon = syscon_regmap_lookup_by_phandle(node, "ti,syscon-pcie-ctrl");
+	if (IS_ERR(syscon)) {
+		dev_err(dev, "Unable to get ti,syscon-pcie-ctrl regmap\n");
+		return PTR_ERR(syscon);
+	}
+
+	/* Do not error out to maintain old DT compatibility */
+	ret = of_parse_phandle_with_fixed_args(node, "ti,syscon-pcie-ctrl", 1,
+					       0, &args);
+	if (!ret)
+		offset = args.args[0];
+
+	ret = j721e_pcie_set_mode(pcie, syscon, offset);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set pci mode\n");
+		return ret;
+	}
+
+	ret = j721e_pcie_set_link_speed(pcie, syscon, offset);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set link speed\n");
+		return ret;
+	}
+
+	ret = j721e_pcie_set_lane_count(pcie, syscon, offset);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set num-lanes\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int cdns_ti_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 *value)
+{
+	if (pci_is_root_bus(bus))
+		return pci_generic_config_read32(bus, devfn, where, size,
+						 value);
+
+	return pci_generic_config_read(bus, devfn, where, size, value);
+}
+
+static int cdns_ti_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+				     int where, int size, u32 value)
+{
+	if (pci_is_root_bus(bus))
+		return pci_generic_config_write32(bus, devfn, where, size,
+						  value);
+
+	return pci_generic_config_write(bus, devfn, where, size, value);
+}
+
+static struct pci_ops cdns_ti_pcie_host_ops = {
+	.map_bus	= cdns_pci_map_bus,
+	.read		= cdns_ti_pcie_config_read,
+	.write		= cdns_ti_pcie_config_write,
+};
+
+static const struct j721e_pcie_data j721e_pcie_rc_data = {
+	.mode = PCI_MODE_RC,
+	.quirk_retrain_flag = true,
+	.byte_access_allowed = false,
+	.linkdown_irq_regfield = LINK_DOWN,
+};
+
+static const struct j721e_pcie_data j721e_pcie_ep_data = {
+	.mode = PCI_MODE_EP,
+	.linkdown_irq_regfield = LINK_DOWN,
+};
+
+static const struct j721e_pcie_data j7200_pcie_rc_data = {
+	.mode = PCI_MODE_RC,
+	.quirk_detect_quiet_flag = true,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
+	.byte_access_allowed = true,
+};
+
+static const struct j721e_pcie_data j7200_pcie_ep_data = {
+	.mode = PCI_MODE_EP,
+	.quirk_detect_quiet_flag = true,
+	.quirk_disable_flr = true,
+};
+
+static const struct j721e_pcie_data am64_pcie_rc_data = {
+	.mode = PCI_MODE_RC,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
+	.byte_access_allowed = true,
+};
+
+static const struct j721e_pcie_data am64_pcie_ep_data = {
+	.mode = PCI_MODE_EP,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
+};
+
+static const struct of_device_id of_j721e_pcie_match[] = {
+	{
+		.compatible = "ti,j721e-pcie-host",
+		.data = &j721e_pcie_rc_data,
+	},
+	{
+		.compatible = "ti,j721e-pcie-ep",
+		.data = &j721e_pcie_ep_data,
+	},
+	{
+		.compatible = "ti,j7200-pcie-host",
+		.data = &j7200_pcie_rc_data,
+	},
+	{
+		.compatible = "ti,j7200-pcie-ep",
+		.data = &j7200_pcie_ep_data,
+	},
+	{
+		.compatible = "ti,am64-pcie-host",
+		.data = &am64_pcie_rc_data,
+	},
+	{
+		.compatible = "ti,am64-pcie-ep",
+		.data = &am64_pcie_ep_data,
+	},
+	{},
+};
+
+static int j721e_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct pci_host_bridge *bridge;
+	const struct j721e_pcie_data *data;
+	struct cdns_pcie *cdns_pcie;
+	struct j721e_pcie *pcie;
+	struct cdns_pcie_rc *rc = NULL;
+	struct cdns_pcie_ep *ep = NULL;
+	struct gpio_desc *gpiod;
+	void __iomem *base;
+	struct clk *clk;
+	u32 num_lanes;
+	u32 mode;
+	int ret;
+	int irq;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	mode = (u32)data->mode;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	switch (mode) {
+	case PCI_MODE_RC:
+		if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST))
+			return -ENODEV;
+
+		bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
+		if (!bridge)
+			return -ENOMEM;
+
+		if (!data->byte_access_allowed)
+			bridge->ops = &cdns_ti_pcie_host_ops;
+		rc = pci_host_bridge_priv(bridge);
+		rc->quirk_retrain_flag = data->quirk_retrain_flag;
+		rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+
+		cdns_pcie = &rc->pcie;
+		cdns_pcie->dev = dev;
+		cdns_pcie->ops = &j721e_pcie_ops;
+		pcie->cdns_pcie = cdns_pcie;
+		break;
+	case PCI_MODE_EP:
+		if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP))
+			return -ENODEV;
+
+		ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+		if (!ep)
+			return -ENOMEM;
+
+		ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+		ep->quirk_disable_flr = data->quirk_disable_flr;
+
+		cdns_pcie = &ep->pcie;
+		cdns_pcie->dev = dev;
+		cdns_pcie->ops = &j721e_pcie_ops;
+		pcie->cdns_pcie = cdns_pcie;
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", mode);
+		return 0;
+	}
+
+	pcie->mode = mode;
+	pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
+
+	base = devm_platform_ioremap_resource_byname(pdev, "intd_cfg");
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	pcie->intd_cfg_base = base;
+
+	base = devm_platform_ioremap_resource_byname(pdev, "user_cfg");
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	pcie->user_cfg_base = base;
+
+	ret = of_property_read_u32(node, "num-lanes", &num_lanes);
+	if (ret || num_lanes > MAX_LANES)
+		num_lanes = 1;
+	pcie->num_lanes = num_lanes;
+
+	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)))
+		return -EINVAL;
+
+	irq = platform_get_irq_byname(pdev, "link_state");
+	if (irq < 0)
+		return irq;
+
+	dev_set_drvdata(dev, pcie);
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync failed\n");
+		goto err_get_sync;
+	}
+
+	ret = j721e_pcie_ctrl_init(pcie);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync failed\n");
+		goto err_get_sync;
+	}
+
+	ret = devm_request_irq(dev, irq, j721e_pcie_link_irq_handler, 0,
+			       "j721e-pcie-link-down-irq", pcie);
+	if (ret < 0) {
+		dev_err(dev, "failed to request link state IRQ %d\n", irq);
+		goto err_get_sync;
+	}
+
+	j721e_pcie_config_link_irq(pcie);
+
+	switch (mode) {
+	case PCI_MODE_RC:
+		gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+		if (IS_ERR(gpiod)) {
+			ret = PTR_ERR(gpiod);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to get reset GPIO\n");
+			goto err_get_sync;
+		}
+
+		ret = cdns_pcie_init_phy(dev, cdns_pcie);
+		if (ret) {
+			dev_err(dev, "Failed to init phy\n");
+			goto err_get_sync;
+		}
+
+		clk = devm_clk_get_optional(dev, "pcie_refclk");
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(dev, "failed to get pcie_refclk\n");
+			goto err_pcie_setup;
+		}
+
+		ret = clk_prepare_enable(clk);
+		if (ret) {
+			dev_err(dev, "failed to enable pcie_refclk\n");
+			goto err_pcie_setup;
+		}
+		pcie->refclk = clk;
+
+		/*
+		 * "Power Sequencing and Reset Signal Timings" table in
+		 * PCI EXPRESS CARD ELECTROMECHANICAL SPECIFICATION, REV. 3.0
+		 * indicates PERST# should be deasserted after minimum of 100us
+		 * once REFCLK is stable. The REFCLK to the connector in RC
+		 * mode is selected while enabling the PHY. So deassert PERST#
+		 * after 100 us.
+		 */
+		if (gpiod) {
+			usleep_range(100, 200);
+			gpiod_set_value_cansleep(gpiod, 1);
+		}
+
+		ret = cdns_pcie_host_setup(rc);
+		if (ret < 0) {
+			clk_disable_unprepare(pcie->refclk);
+			goto err_pcie_setup;
+		}
+
+		break;
+	case PCI_MODE_EP:
+		ret = cdns_pcie_init_phy(dev, cdns_pcie);
+		if (ret) {
+			dev_err(dev, "Failed to init phy\n");
+			goto err_get_sync;
+		}
+
+		ret = cdns_pcie_ep_setup(ep);
+		if (ret < 0)
+			goto err_pcie_setup;
+
+		break;
+	}
+
+	return 0;
+
+err_pcie_setup:
+	cdns_pcie_disable_phy(cdns_pcie);
+
+err_get_sync:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static void j721e_pcie_remove(struct platform_device *pdev)
+{
+	struct j721e_pcie *pcie = platform_get_drvdata(pdev);
+	struct cdns_pcie *cdns_pcie = pcie->cdns_pcie;
+	struct device *dev = &pdev->dev;
+
+	clk_disable_unprepare(pcie->refclk);
+	cdns_pcie_disable_phy(cdns_pcie);
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+}
+
+static struct platform_driver j721e_pcie_driver = {
+	.probe  = j721e_pcie_probe,
+	.remove_new = j721e_pcie_remove,
+	.driver = {
+		.name	= "j721e-pcie",
+		.of_match_table = of_j721e_pcie_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(j721e_pcie_driver);
diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c
new file mode 100644
index 000000000..b8b655d40
--- /dev/null
+++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Cadence
+// Cadence PCIe endpoint controller driver.
+// Author: Cyrille Pitchen <cyrille.pitchen@free-electrons.com>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/pci-epc.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+
+#include "pcie-cadence.h"
+
+#define CDNS_PCIE_EP_MIN_APERTURE		128	/* 128 bytes */
+#define CDNS_PCIE_EP_IRQ_PCI_ADDR_NONE		0x1
+#define CDNS_PCIE_EP_IRQ_PCI_ADDR_LEGACY	0x3
+
+static u8 cdns_pcie_get_fn_from_vfn(struct cdns_pcie *pcie, u8 fn, u8 vfn)
+{
+	u32 cap = CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET;
+	u32 first_vf_offset, stride;
+
+	if (vfn == 0)
+		return fn;
+
+	first_vf_offset = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_SRIOV_VF_OFFSET);
+	stride = cdns_pcie_ep_fn_readw(pcie, fn, cap +  PCI_SRIOV_VF_STRIDE);
+	fn = fn + first_vf_offset + ((vfn - 1) * stride);
+
+	return fn;
+}
+
+static int cdns_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
+				     struct pci_epf_header *hdr)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	u32 cap = CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET;
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 reg;
+
+	if (vfn > 1) {
+		dev_err(&epc->dev, "Only Virtual Function #1 has deviceID\n");
+		return -EINVAL;
+	} else if (vfn == 1) {
+		reg = cap + PCI_SRIOV_VF_DID;
+		cdns_pcie_ep_fn_writew(pcie, fn, reg, hdr->deviceid);
+		return 0;
+	}
+
+	cdns_pcie_ep_fn_writew(pcie, fn, PCI_DEVICE_ID, hdr->deviceid);
+	cdns_pcie_ep_fn_writeb(pcie, fn, PCI_REVISION_ID, hdr->revid);
+	cdns_pcie_ep_fn_writeb(pcie, fn, PCI_CLASS_PROG, hdr->progif_code);
+	cdns_pcie_ep_fn_writew(pcie, fn, PCI_CLASS_DEVICE,
+			       hdr->subclass_code | hdr->baseclass_code << 8);
+	cdns_pcie_ep_fn_writeb(pcie, fn, PCI_CACHE_LINE_SIZE,
+			       hdr->cache_line_size);
+	cdns_pcie_ep_fn_writew(pcie, fn, PCI_SUBSYSTEM_ID, hdr->subsys_id);
+	cdns_pcie_ep_fn_writeb(pcie, fn, PCI_INTERRUPT_PIN, hdr->interrupt_pin);
+
+	/*
+	 * Vendor ID can only be modified from function 0, all other functions
+	 * use the same vendor ID as function 0.
+	 */
+	if (fn == 0) {
+		/* Update the vendor IDs. */
+		u32 id = CDNS_PCIE_LM_ID_VENDOR(hdr->vendorid) |
+			 CDNS_PCIE_LM_ID_SUBSYS(hdr->subsys_vendor_id);
+
+		cdns_pcie_writel(pcie, CDNS_PCIE_LM_ID, id);
+	}
+
+	return 0;
+}
+
+static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+				struct pci_epf_bar *epf_bar)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie_epf *epf = &ep->epf[fn];
+	struct cdns_pcie *pcie = &ep->pcie;
+	dma_addr_t bar_phys = epf_bar->phys_addr;
+	enum pci_barno bar = epf_bar->barno;
+	int flags = epf_bar->flags;
+	u32 addr0, addr1, reg, cfg, b, aperture, ctrl;
+	u64 sz;
+
+	/* BAR size is 2^(aperture + 7) */
+	sz = max_t(size_t, epf_bar->size, CDNS_PCIE_EP_MIN_APERTURE);
+	/*
+	 * roundup_pow_of_two() returns an unsigned long, which is not suited
+	 * for 64bit values.
+	 */
+	sz = 1ULL << fls64(sz - 1);
+	aperture = ilog2(sz) - 7; /* 128B -> 0, 256B -> 1, 512B -> 2, ... */
+
+	if ((flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
+		ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_IO_32BITS;
+	} else {
+		bool is_prefetch = !!(flags & PCI_BASE_ADDRESS_MEM_PREFETCH);
+		bool is_64bits = sz > SZ_2G;
+
+		if (is_64bits && (bar & 1))
+			return -EINVAL;
+
+		if (is_64bits && !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
+			epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+
+		if (is_64bits && is_prefetch)
+			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_64BITS;
+		else if (is_prefetch)
+			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_32BITS;
+		else if (is_64bits)
+			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_64BITS;
+		else
+			ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_32BITS;
+	}
+
+	addr0 = lower_32_bits(bar_phys);
+	addr1 = upper_32_bits(bar_phys);
+
+	if (vfn == 1)
+		reg = CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn);
+	else
+		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn);
+	b = (bar < BAR_4) ? bar : bar - BAR_4;
+
+	if (vfn == 0 || vfn == 1) {
+		cfg = cdns_pcie_readl(pcie, reg);
+		cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+			 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+		cfg |= (CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) |
+			CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl));
+		cdns_pcie_writel(pcie, reg, cfg);
+	}
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar),
+			 addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar),
+			 addr1);
+
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
+	epf->epf_bar[bar] = epf_bar;
+
+	return 0;
+}
+
+static void cdns_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+				   struct pci_epf_bar *epf_bar)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie_epf *epf = &ep->epf[fn];
+	struct cdns_pcie *pcie = &ep->pcie;
+	enum pci_barno bar = epf_bar->barno;
+	u32 reg, cfg, b, ctrl;
+
+	if (vfn == 1)
+		reg = CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn);
+	else
+		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn);
+	b = (bar < BAR_4) ? bar : bar - BAR_4;
+
+	if (vfn == 0 || vfn == 1) {
+		ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED;
+		cfg = cdns_pcie_readl(pcie, reg);
+		cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+			 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+		cfg |= CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl);
+		cdns_pcie_writel(pcie, reg, cfg);
+	}
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar), 0);
+
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
+	epf->epf_bar[bar] = NULL;
+}
+
+static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				 phys_addr_t addr, u64 pci_addr, size_t size)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 r;
+
+	r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG);
+	if (r >= ep->max_regions - 1) {
+		dev_err(&epc->dev, "no free outbound region\n");
+		return -EINVAL;
+	}
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+	cdns_pcie_set_outbound_region(pcie, 0, fn, r, false, addr, pci_addr, size);
+
+	set_bit(r, &ep->ob_region_map);
+	ep->ob_addr[r] = addr;
+
+	return 0;
+}
+
+static void cdns_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				    phys_addr_t addr)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 r;
+
+	for (r = 0; r < ep->max_regions - 1; r++)
+		if (ep->ob_addr[r] == addr)
+			break;
+
+	if (r == ep->max_regions - 1)
+		return;
+
+	cdns_pcie_reset_outbound_region(pcie, r);
+
+	ep->ob_addr[r] = 0;
+	clear_bit(r, &ep->ob_region_map);
+}
+
+static int cdns_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn, u8 mmc)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
+	u16 flags;
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	/*
+	 * Set the Multiple Message Capable bitfield into the Message Control
+	 * register.
+	 */
+	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
+	flags = (flags & ~PCI_MSI_FLAGS_QMASK) | (mmc << 1);
+	flags |= PCI_MSI_FLAGS_64BIT;
+	flags &= ~PCI_MSI_FLAGS_MASKBIT;
+	cdns_pcie_ep_fn_writew(pcie, fn, cap + PCI_MSI_FLAGS, flags);
+
+	return 0;
+}
+
+static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
+	u16 flags, mme;
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	/* Validate that the MSI feature is actually enabled. */
+	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
+	if (!(flags & PCI_MSI_FLAGS_ENABLE))
+		return -EINVAL;
+
+	/*
+	 * Get the Multiple Message Enable bitfield from the Message Control
+	 * register.
+	 */
+	mme = (flags & PCI_MSI_FLAGS_QSIZE) >> 4;
+
+	return mme;
+}
+
+static int cdns_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
+	u32 val, reg;
+
+	func_no = cdns_pcie_get_fn_from_vfn(pcie, func_no, vfunc_no);
+
+	reg = cap + PCI_MSIX_FLAGS;
+	val = cdns_pcie_ep_fn_readw(pcie, func_no, reg);
+	if (!(val & PCI_MSIX_FLAGS_ENABLE))
+		return -EINVAL;
+
+	val &= PCI_MSIX_FLAGS_QSIZE;
+
+	return val;
+}
+
+static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u8 vfn,
+				 u16 interrupts, enum pci_barno bir,
+				 u32 offset)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
+	u32 val, reg;
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	reg = cap + PCI_MSIX_FLAGS;
+	val = cdns_pcie_ep_fn_readw(pcie, fn, reg);
+	val &= ~PCI_MSIX_FLAGS_QSIZE;
+	val |= interrupts;
+	cdns_pcie_ep_fn_writew(pcie, fn, reg, val);
+
+	/* Set MSIX BAR and offset */
+	reg = cap + PCI_MSIX_TABLE;
+	val = offset | bir;
+	cdns_pcie_ep_fn_writel(pcie, fn, reg, val);
+
+	/* Set PBA BAR and offset.  BAR must match MSIX BAR */
+	reg = cap + PCI_MSIX_PBA;
+	val = (offset + (interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
+	cdns_pcie_ep_fn_writel(pcie, fn, reg, val);
+
+	return 0;
+}
+
+static void cdns_pcie_ep_assert_intx(struct cdns_pcie_ep *ep, u8 fn, u8 intx,
+				     bool is_asserted)
+{
+	struct cdns_pcie *pcie = &ep->pcie;
+	unsigned long flags;
+	u32 offset;
+	u16 status;
+	u8 msg_code;
+
+	intx &= 3;
+
+	/* Set the outbound region if needed. */
+	if (unlikely(ep->irq_pci_addr != CDNS_PCIE_EP_IRQ_PCI_ADDR_LEGACY ||
+		     ep->irq_pci_fn != fn)) {
+		/* First region was reserved for IRQ writes. */
+		cdns_pcie_set_outbound_region_for_normal_msg(pcie, 0, fn, 0,
+							     ep->irq_phys_addr);
+		ep->irq_pci_addr = CDNS_PCIE_EP_IRQ_PCI_ADDR_LEGACY;
+		ep->irq_pci_fn = fn;
+	}
+
+	if (is_asserted) {
+		ep->irq_pending |= BIT(intx);
+		msg_code = MSG_CODE_ASSERT_INTA + intx;
+	} else {
+		ep->irq_pending &= ~BIT(intx);
+		msg_code = MSG_CODE_DEASSERT_INTA + intx;
+	}
+
+	spin_lock_irqsave(&ep->lock, flags);
+	status = cdns_pcie_ep_fn_readw(pcie, fn, PCI_STATUS);
+	if (((status & PCI_STATUS_INTERRUPT) != 0) ^ (ep->irq_pending != 0)) {
+		status ^= PCI_STATUS_INTERRUPT;
+		cdns_pcie_ep_fn_writew(pcie, fn, PCI_STATUS, status);
+	}
+	spin_unlock_irqrestore(&ep->lock, flags);
+
+	offset = CDNS_PCIE_NORMAL_MSG_ROUTING(MSG_ROUTING_LOCAL) |
+		 CDNS_PCIE_NORMAL_MSG_CODE(msg_code) |
+		 CDNS_PCIE_MSG_NO_DATA;
+	writel(0, ep->irq_cpu_addr + offset);
+}
+
+static int cdns_pcie_ep_send_legacy_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
+					u8 intx)
+{
+	u16 cmd;
+
+	cmd = cdns_pcie_ep_fn_readw(&ep->pcie, fn, PCI_COMMAND);
+	if (cmd & PCI_COMMAND_INTX_DISABLE)
+		return -EINVAL;
+
+	cdns_pcie_ep_assert_intx(ep, fn, intx, true);
+	/*
+	 * The mdelay() value was taken from dra7xx_pcie_raise_legacy_irq()
+	 */
+	mdelay(1);
+	cdns_pcie_ep_assert_intx(ep, fn, intx, false);
+	return 0;
+}
+
+static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
+				     u8 interrupt_num)
+{
+	struct cdns_pcie *pcie = &ep->pcie;
+	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
+	u16 flags, mme, data, data_mask;
+	u8 msi_count;
+	u64 pci_addr, pci_addr_mask = 0xff;
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	/* Check whether the MSI feature has been enabled by the PCI host. */
+	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
+	if (!(flags & PCI_MSI_FLAGS_ENABLE))
+		return -EINVAL;
+
+	/* Get the number of enabled MSIs */
+	mme = (flags & PCI_MSI_FLAGS_QSIZE) >> 4;
+	msi_count = 1 << mme;
+	if (!interrupt_num || interrupt_num > msi_count)
+		return -EINVAL;
+
+	/* Compute the data value to be written. */
+	data_mask = msi_count - 1;
+	data = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_DATA_64);
+	data = (data & ~data_mask) | ((interrupt_num - 1) & data_mask);
+
+	/* Get the PCI address where to write the data into. */
+	pci_addr = cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_HI);
+	pci_addr <<= 32;
+	pci_addr |= cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_LO);
+	pci_addr &= GENMASK_ULL(63, 2);
+
+	/* Set the outbound region if needed. */
+	if (unlikely(ep->irq_pci_addr != (pci_addr & ~pci_addr_mask) ||
+		     ep->irq_pci_fn != fn)) {
+		/* First region was reserved for IRQ writes. */
+		cdns_pcie_set_outbound_region(pcie, 0, fn, 0,
+					      false,
+					      ep->irq_phys_addr,
+					      pci_addr & ~pci_addr_mask,
+					      pci_addr_mask + 1);
+		ep->irq_pci_addr = (pci_addr & ~pci_addr_mask);
+		ep->irq_pci_fn = fn;
+	}
+	writel(data, ep->irq_cpu_addr + (pci_addr & pci_addr_mask));
+
+	return 0;
+}
+
+static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn, u8 vfn,
+				    phys_addr_t addr, u8 interrupt_num,
+				    u32 entry_size, u32 *msi_data,
+				    u32 *msi_addr_offset)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
+	struct cdns_pcie *pcie = &ep->pcie;
+	u64 pci_addr, pci_addr_mask = 0xff;
+	u16 flags, mme, data, data_mask;
+	u8 msi_count;
+	int ret;
+	int i;
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	/* Check whether the MSI feature has been enabled by the PCI host. */
+	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
+	if (!(flags & PCI_MSI_FLAGS_ENABLE))
+		return -EINVAL;
+
+	/* Get the number of enabled MSIs */
+	mme = (flags & PCI_MSI_FLAGS_QSIZE) >> 4;
+	msi_count = 1 << mme;
+	if (!interrupt_num || interrupt_num > msi_count)
+		return -EINVAL;
+
+	/* Compute the data value to be written. */
+	data_mask = msi_count - 1;
+	data = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_DATA_64);
+	data = data & ~data_mask;
+
+	/* Get the PCI address where to write the data into. */
+	pci_addr = cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_HI);
+	pci_addr <<= 32;
+	pci_addr |= cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_LO);
+	pci_addr &= GENMASK_ULL(63, 2);
+
+	for (i = 0; i < interrupt_num; i++) {
+		ret = cdns_pcie_ep_map_addr(epc, fn, vfn, addr,
+					    pci_addr & ~pci_addr_mask,
+					    entry_size);
+		if (ret)
+			return ret;
+		addr = addr + entry_size;
+	}
+
+	*msi_data = data;
+	*msi_addr_offset = pci_addr & pci_addr_mask;
+
+	return 0;
+}
+
+static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
+				      u16 interrupt_num)
+{
+	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
+	u32 tbl_offset, msg_data, reg;
+	struct cdns_pcie *pcie = &ep->pcie;
+	struct pci_epf_msix_tbl *msix_tbl;
+	struct cdns_pcie_epf *epf;
+	u64 pci_addr_mask = 0xff;
+	u64 msg_addr;
+	u16 flags;
+	u8 bir;
+
+	epf = &ep->epf[fn];
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
+	/* Check whether the MSI-X feature has been enabled by the PCI host. */
+	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSIX_FLAGS);
+	if (!(flags & PCI_MSIX_FLAGS_ENABLE))
+		return -EINVAL;
+
+	reg = cap + PCI_MSIX_TABLE;
+	tbl_offset = cdns_pcie_ep_fn_readl(pcie, fn, reg);
+	bir = tbl_offset & PCI_MSIX_TABLE_BIR;
+	tbl_offset &= PCI_MSIX_TABLE_OFFSET;
+
+	msix_tbl = epf->epf_bar[bir]->addr + tbl_offset;
+	msg_addr = msix_tbl[(interrupt_num - 1)].msg_addr;
+	msg_data = msix_tbl[(interrupt_num - 1)].msg_data;
+
+	/* Set the outbound region if needed. */
+	if (ep->irq_pci_addr != (msg_addr & ~pci_addr_mask) ||
+	    ep->irq_pci_fn != fn) {
+		/* First region was reserved for IRQ writes. */
+		cdns_pcie_set_outbound_region(pcie, 0, fn, 0,
+					      false,
+					      ep->irq_phys_addr,
+					      msg_addr & ~pci_addr_mask,
+					      pci_addr_mask + 1);
+		ep->irq_pci_addr = (msg_addr & ~pci_addr_mask);
+		ep->irq_pci_fn = fn;
+	}
+	writel(msg_data, ep->irq_cpu_addr + (msg_addr & pci_addr_mask));
+
+	return 0;
+}
+
+static int cdns_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
+				  enum pci_epc_irq_type type,
+				  u16 interrupt_num)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	struct device *dev = pcie->dev;
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		if (vfn > 0) {
+			dev_err(dev, "Cannot raise legacy interrupts for VF\n");
+			return -EINVAL;
+		}
+		return cdns_pcie_ep_send_legacy_irq(ep, fn, vfn, 0);
+
+	case PCI_EPC_IRQ_MSI:
+		return cdns_pcie_ep_send_msi_irq(ep, fn, vfn, interrupt_num);
+
+	case PCI_EPC_IRQ_MSIX:
+		return cdns_pcie_ep_send_msix_irq(ep, fn, vfn, interrupt_num);
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int cdns_pcie_ep_start(struct pci_epc *epc)
+{
+	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	struct device *dev = pcie->dev;
+	int max_epfs = sizeof(epc->function_num_map) * 8;
+	int ret, value, epf;
+
+	/*
+	 * BIT(0) is hardwired to 1, hence function 0 is always enabled
+	 * and can't be disabled anyway.
+	 */
+	cdns_pcie_writel(pcie, CDNS_PCIE_LM_EP_FUNC_CFG, epc->function_num_map);
+
+	if (ep->quirk_disable_flr) {
+		for (epf = 0; epf < max_epfs; epf++) {
+			if (!(epc->function_num_map & BIT(epf)))
+				continue;
+
+			value = cdns_pcie_ep_fn_readl(pcie, epf,
+					CDNS_PCIE_EP_FUNC_DEV_CAP_OFFSET +
+					PCI_EXP_DEVCAP);
+			value &= ~PCI_EXP_DEVCAP_FLR;
+			cdns_pcie_ep_fn_writel(pcie, epf,
+					CDNS_PCIE_EP_FUNC_DEV_CAP_OFFSET +
+					PCI_EXP_DEVCAP, value);
+		}
+	}
+
+	ret = cdns_pcie_start_link(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to start link\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features cdns_pcie_epc_vf_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = true,
+	.align = 65536,
+};
+
+static const struct pci_epc_features cdns_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = true,
+	.align = 256,
+};
+
+static const struct pci_epc_features*
+cdns_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	if (!vfunc_no)
+		return &cdns_pcie_epc_features;
+
+	return &cdns_pcie_epc_vf_features;
+}
+
+static const struct pci_epc_ops cdns_pcie_epc_ops = {
+	.write_header	= cdns_pcie_ep_write_header,
+	.set_bar	= cdns_pcie_ep_set_bar,
+	.clear_bar	= cdns_pcie_ep_clear_bar,
+	.map_addr	= cdns_pcie_ep_map_addr,
+	.unmap_addr	= cdns_pcie_ep_unmap_addr,
+	.set_msi	= cdns_pcie_ep_set_msi,
+	.get_msi	= cdns_pcie_ep_get_msi,
+	.set_msix	= cdns_pcie_ep_set_msix,
+	.get_msix	= cdns_pcie_ep_get_msix,
+	.raise_irq	= cdns_pcie_ep_raise_irq,
+	.map_msi_irq	= cdns_pcie_ep_map_msi_irq,
+	.start		= cdns_pcie_ep_start,
+	.get_features	= cdns_pcie_ep_get_features,
+};
+
+
+int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
+{
+	struct device *dev = ep->pcie.dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct device_node *np = dev->of_node;
+	struct cdns_pcie *pcie = &ep->pcie;
+	struct cdns_pcie_epf *epf;
+	struct resource *res;
+	struct pci_epc *epc;
+	int ret;
+	int i;
+
+	pcie->is_rc = false;
+
+	pcie->reg_base = devm_platform_ioremap_resource_byname(pdev, "reg");
+	if (IS_ERR(pcie->reg_base)) {
+		dev_err(dev, "missing \"reg\"\n");
+		return PTR_ERR(pcie->reg_base);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mem");
+	if (!res) {
+		dev_err(dev, "missing \"mem\"\n");
+		return -EINVAL;
+	}
+	pcie->mem_res = res;
+
+	ep->max_regions = CDNS_PCIE_MAX_OB;
+	of_property_read_u32(np, "cdns,max-outbound-regions", &ep->max_regions);
+
+	ep->ob_addr = devm_kcalloc(dev,
+				   ep->max_regions, sizeof(*ep->ob_addr),
+				   GFP_KERNEL);
+	if (!ep->ob_addr)
+		return -ENOMEM;
+
+	/* Disable all but function 0 (anyway BIT(0) is hardwired to 1). */
+	cdns_pcie_writel(pcie, CDNS_PCIE_LM_EP_FUNC_CFG, BIT(0));
+
+	epc = devm_pci_epc_create(dev, &cdns_pcie_epc_ops);
+	if (IS_ERR(epc)) {
+		dev_err(dev, "failed to create epc device\n");
+		return PTR_ERR(epc);
+	}
+
+	epc_set_drvdata(epc, ep);
+
+	if (of_property_read_u8(np, "max-functions", &epc->max_functions) < 0)
+		epc->max_functions = 1;
+
+	ep->epf = devm_kcalloc(dev, epc->max_functions, sizeof(*ep->epf),
+			       GFP_KERNEL);
+	if (!ep->epf)
+		return -ENOMEM;
+
+	epc->max_vfs = devm_kcalloc(dev, epc->max_functions,
+				    sizeof(*epc->max_vfs), GFP_KERNEL);
+	if (!epc->max_vfs)
+		return -ENOMEM;
+
+	ret = of_property_read_u8_array(np, "max-virtual-functions",
+					epc->max_vfs, epc->max_functions);
+	if (ret == 0) {
+		for (i = 0; i < epc->max_functions; i++) {
+			epf = &ep->epf[i];
+			if (epc->max_vfs[i] == 0)
+				continue;
+			epf->epf = devm_kcalloc(dev, epc->max_vfs[i],
+						sizeof(*ep->epf), GFP_KERNEL);
+			if (!epf->epf)
+				return -ENOMEM;
+		}
+	}
+
+	ret = pci_epc_mem_init(epc, pcie->mem_res->start,
+			       resource_size(pcie->mem_res), PAGE_SIZE);
+	if (ret < 0) {
+		dev_err(dev, "failed to initialize the memory space\n");
+		return ret;
+	}
+
+	ep->irq_cpu_addr = pci_epc_mem_alloc_addr(epc, &ep->irq_phys_addr,
+						  SZ_128K);
+	if (!ep->irq_cpu_addr) {
+		dev_err(dev, "failed to reserve memory space for MSI\n");
+		ret = -ENOMEM;
+		goto free_epc_mem;
+	}
+	ep->irq_pci_addr = CDNS_PCIE_EP_IRQ_PCI_ADDR_NONE;
+	/* Reserve region 0 for IRQs */
+	set_bit(0, &ep->ob_region_map);
+
+	if (ep->quirk_detect_quiet_flag)
+		cdns_pcie_detect_quiet_min_delay_set(&ep->pcie);
+
+	spin_lock_init(&ep->lock);
+
+	return 0;
+
+ free_epc_mem:
+	pci_epc_mem_exit(epc);
+
+	return ret;
+}
diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c
new file mode 100644
index 000000000..5b14f7ee3
--- /dev/null
+++ b/drivers/pci/controller/cadence/pcie-cadence-host.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Cadence
+// Cadence PCIe host controller driver.
+// Author: Cyrille Pitchen <cyrille.pitchen@free-electrons.com>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/list_sort.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/platform_device.h>
+
+#include "pcie-cadence.h"
+
+#define LINK_RETRAIN_TIMEOUT HZ
+
+static u64 bar_max_size[] = {
+	[RP_BAR0] = _ULL(128 * SZ_2G),
+	[RP_BAR1] = SZ_2G,
+	[RP_NO_BAR] = _BITULL(63),
+};
+
+static u8 bar_aperture_mask[] = {
+	[RP_BAR0] = 0x1F,
+	[RP_BAR1] = 0xF,
+};
+
+void __iomem *cdns_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
+	struct cdns_pcie_rc *rc = pci_host_bridge_priv(bridge);
+	struct cdns_pcie *pcie = &rc->pcie;
+	unsigned int busn = bus->number;
+	u32 addr0, desc0;
+
+	if (pci_is_root_bus(bus)) {
+		/*
+		 * Only the root port (devfn == 0) is connected to this bus.
+		 * All other PCI devices are behind some bridge hence on another
+		 * bus.
+		 */
+		if (devfn)
+			return NULL;
+
+		return pcie->reg_base + (where & 0xfff);
+	}
+	/* Check that the link is up */
+	if (!(cdns_pcie_readl(pcie, CDNS_PCIE_LM_BASE) & 0x1))
+		return NULL;
+	/* Clear AXI link-down status */
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_LINKDOWN, 0x0);
+
+	/* Update Output registers for AXI region 0. */
+	addr0 = CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_NBITS(12) |
+		CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN(devfn) |
+		CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_BUS(busn);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR0(0), addr0);
+
+	/* Configuration Type 0 or Type 1 access. */
+	desc0 = CDNS_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID |
+		CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(0);
+	/*
+	 * The bus number was already set once for all in desc1 by
+	 * cdns_pcie_host_init_address_translation().
+	 */
+	if (busn == bridge->busnr + 1)
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_CONF_TYPE0;
+	else
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_CONF_TYPE1;
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC0(0), desc0);
+
+	return rc->cfg_base + (where & 0xfff);
+}
+
+static struct pci_ops cdns_pcie_host_ops = {
+	.map_bus	= cdns_pci_map_bus,
+	.read		= pci_generic_config_read,
+	.write		= pci_generic_config_write,
+};
+
+static int cdns_pcie_host_training_complete(struct cdns_pcie *pcie)
+{
+	u32 pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET;
+	unsigned long end_jiffies;
+	u16 lnk_stat;
+
+	/* Wait for link training to complete. Exit after timeout. */
+	end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
+	do {
+		lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA);
+		if (!(lnk_stat & PCI_EXP_LNKSTA_LT))
+			break;
+		usleep_range(0, 1000);
+	} while (time_before(jiffies, end_jiffies));
+
+	if (!(lnk_stat & PCI_EXP_LNKSTA_LT))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	int retries;
+
+	/* Check if the link is up or not */
+	for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+		if (cdns_pcie_link_up(pcie)) {
+			dev_info(dev, "Link up\n");
+			return 0;
+		}
+		usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int cdns_pcie_retrain(struct cdns_pcie *pcie)
+{
+	u32 lnk_cap_sls, pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET;
+	u16 lnk_stat, lnk_ctl;
+	int ret = 0;
+
+	/*
+	 * Set retrain bit if current speed is 2.5 GB/s,
+	 * but the PCIe root port support is > 2.5 GB/s.
+	 */
+
+	lnk_cap_sls = cdns_pcie_readl(pcie, (CDNS_PCIE_RP_BASE + pcie_cap_off +
+					     PCI_EXP_LNKCAP));
+	if ((lnk_cap_sls & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB)
+		return ret;
+
+	lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA);
+	if ((lnk_stat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) {
+		lnk_ctl = cdns_pcie_rp_readw(pcie,
+					     pcie_cap_off + PCI_EXP_LNKCTL);
+		lnk_ctl |= PCI_EXP_LNKCTL_RL;
+		cdns_pcie_rp_writew(pcie, pcie_cap_off + PCI_EXP_LNKCTL,
+				    lnk_ctl);
+
+		ret = cdns_pcie_host_training_complete(pcie);
+		if (ret)
+			return ret;
+
+		ret = cdns_pcie_host_wait_for_link(pcie);
+	}
+	return ret;
+}
+
+static void cdns_pcie_host_enable_ptm_response(struct cdns_pcie *pcie)
+{
+	u32 val;
+
+	val = cdns_pcie_readl(pcie, CDNS_PCIE_LM_PTM_CTRL);
+	cdns_pcie_writel(pcie, CDNS_PCIE_LM_PTM_CTRL, val | CDNS_PCIE_LM_TPM_CTRL_PTMRSEN);
+}
+
+static int cdns_pcie_host_start_link(struct cdns_pcie_rc *rc)
+{
+	struct cdns_pcie *pcie = &rc->pcie;
+	int ret;
+
+	ret = cdns_pcie_host_wait_for_link(pcie);
+
+	/*
+	 * Retrain link for Gen2 training defect
+	 * if quirk flag is set.
+	 */
+	if (!ret && rc->quirk_retrain_flag)
+		ret = cdns_pcie_retrain(pcie);
+
+	return ret;
+}
+
+static int cdns_pcie_host_init_root_port(struct cdns_pcie_rc *rc)
+{
+	struct cdns_pcie *pcie = &rc->pcie;
+	u32 value, ctrl;
+	u32 id;
+
+	/*
+	 * Set the root complex BAR configuration register:
+	 * - disable both BAR0 and BAR1.
+	 * - enable Prefetchable Memory Base and Limit registers in type 1
+	 *   config space (64 bits).
+	 * - enable IO Base and Limit registers in type 1 config
+	 *   space (32 bits).
+	 */
+	ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED;
+	value = CDNS_PCIE_LM_RC_BAR_CFG_BAR0_CTRL(ctrl) |
+		CDNS_PCIE_LM_RC_BAR_CFG_BAR1_CTRL(ctrl) |
+		CDNS_PCIE_LM_RC_BAR_CFG_PREFETCH_MEM_ENABLE |
+		CDNS_PCIE_LM_RC_BAR_CFG_PREFETCH_MEM_64BITS |
+		CDNS_PCIE_LM_RC_BAR_CFG_IO_ENABLE |
+		CDNS_PCIE_LM_RC_BAR_CFG_IO_32BITS;
+	cdns_pcie_writel(pcie, CDNS_PCIE_LM_RC_BAR_CFG, value);
+
+	/* Set root port configuration space */
+	if (rc->vendor_id != 0xffff) {
+		id = CDNS_PCIE_LM_ID_VENDOR(rc->vendor_id) |
+			CDNS_PCIE_LM_ID_SUBSYS(rc->vendor_id);
+		cdns_pcie_writel(pcie, CDNS_PCIE_LM_ID, id);
+	}
+
+	if (rc->device_id != 0xffff)
+		cdns_pcie_rp_writew(pcie, PCI_DEVICE_ID, rc->device_id);
+
+	cdns_pcie_rp_writeb(pcie, PCI_CLASS_REVISION, 0);
+	cdns_pcie_rp_writeb(pcie, PCI_CLASS_PROG, 0);
+	cdns_pcie_rp_writew(pcie, PCI_CLASS_DEVICE, PCI_CLASS_BRIDGE_PCI);
+
+	return 0;
+}
+
+static int cdns_pcie_host_bar_ib_config(struct cdns_pcie_rc *rc,
+					enum cdns_pcie_rp_bar bar,
+					u64 cpu_addr, u64 size,
+					unsigned long flags)
+{
+	struct cdns_pcie *pcie = &rc->pcie;
+	u32 addr0, addr1, aperture, value;
+
+	if (!rc->avail_ib_bar[bar])
+		return -EBUSY;
+
+	rc->avail_ib_bar[bar] = false;
+
+	aperture = ilog2(size);
+	addr0 = CDNS_PCIE_AT_IB_RP_BAR_ADDR0_NBITS(aperture) |
+		(lower_32_bits(cpu_addr) & GENMASK(31, 8));
+	addr1 = upper_32_bits(cpu_addr);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_RP_BAR_ADDR0(bar), addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_RP_BAR_ADDR1(bar), addr1);
+
+	if (bar == RP_NO_BAR)
+		return 0;
+
+	value = cdns_pcie_readl(pcie, CDNS_PCIE_LM_RC_BAR_CFG);
+	value &= ~(LM_RC_BAR_CFG_CTRL_MEM_64BITS(bar) |
+		   LM_RC_BAR_CFG_CTRL_PREF_MEM_64BITS(bar) |
+		   LM_RC_BAR_CFG_CTRL_MEM_32BITS(bar) |
+		   LM_RC_BAR_CFG_CTRL_PREF_MEM_32BITS(bar) |
+		   LM_RC_BAR_CFG_APERTURE(bar, bar_aperture_mask[bar] + 2));
+	if (size + cpu_addr >= SZ_4G) {
+		if (!(flags & IORESOURCE_PREFETCH))
+			value |= LM_RC_BAR_CFG_CTRL_MEM_64BITS(bar);
+		value |= LM_RC_BAR_CFG_CTRL_PREF_MEM_64BITS(bar);
+	} else {
+		if (!(flags & IORESOURCE_PREFETCH))
+			value |= LM_RC_BAR_CFG_CTRL_MEM_32BITS(bar);
+		value |= LM_RC_BAR_CFG_CTRL_PREF_MEM_32BITS(bar);
+	}
+
+	value |= LM_RC_BAR_CFG_APERTURE(bar, aperture);
+	cdns_pcie_writel(pcie, CDNS_PCIE_LM_RC_BAR_CFG, value);
+
+	return 0;
+}
+
+static enum cdns_pcie_rp_bar
+cdns_pcie_host_find_min_bar(struct cdns_pcie_rc *rc, u64 size)
+{
+	enum cdns_pcie_rp_bar bar, sel_bar;
+
+	sel_bar = RP_BAR_UNDEFINED;
+	for (bar = RP_BAR0; bar <= RP_NO_BAR; bar++) {
+		if (!rc->avail_ib_bar[bar])
+			continue;
+
+		if (size <= bar_max_size[bar]) {
+			if (sel_bar == RP_BAR_UNDEFINED) {
+				sel_bar = bar;
+				continue;
+			}
+
+			if (bar_max_size[bar] < bar_max_size[sel_bar])
+				sel_bar = bar;
+		}
+	}
+
+	return sel_bar;
+}
+
+static enum cdns_pcie_rp_bar
+cdns_pcie_host_find_max_bar(struct cdns_pcie_rc *rc, u64 size)
+{
+	enum cdns_pcie_rp_bar bar, sel_bar;
+
+	sel_bar = RP_BAR_UNDEFINED;
+	for (bar = RP_BAR0; bar <= RP_NO_BAR; bar++) {
+		if (!rc->avail_ib_bar[bar])
+			continue;
+
+		if (size >= bar_max_size[bar]) {
+			if (sel_bar == RP_BAR_UNDEFINED) {
+				sel_bar = bar;
+				continue;
+			}
+
+			if (bar_max_size[bar] > bar_max_size[sel_bar])
+				sel_bar = bar;
+		}
+	}
+
+	return sel_bar;
+}
+
+static int cdns_pcie_host_bar_config(struct cdns_pcie_rc *rc,
+				     struct resource_entry *entry)
+{
+	u64 cpu_addr, pci_addr, size, winsize;
+	struct cdns_pcie *pcie = &rc->pcie;
+	struct device *dev = pcie->dev;
+	enum cdns_pcie_rp_bar bar;
+	unsigned long flags;
+	int ret;
+
+	cpu_addr = entry->res->start;
+	pci_addr = entry->res->start - entry->offset;
+	flags = entry->res->flags;
+	size = resource_size(entry->res);
+
+	if (entry->offset) {
+		dev_err(dev, "PCI addr: %llx must be equal to CPU addr: %llx\n",
+			pci_addr, cpu_addr);
+		return -EINVAL;
+	}
+
+	while (size > 0) {
+		/*
+		 * Try to find a minimum BAR whose size is greater than
+		 * or equal to the remaining resource_entry size. This will
+		 * fail if the size of each of the available BARs is less than
+		 * the remaining resource_entry size.
+		 * If a minimum BAR is found, IB ATU will be configured and
+		 * exited.
+		 */
+		bar = cdns_pcie_host_find_min_bar(rc, size);
+		if (bar != RP_BAR_UNDEFINED) {
+			ret = cdns_pcie_host_bar_ib_config(rc, bar, cpu_addr,
+							   size, flags);
+			if (ret)
+				dev_err(dev, "IB BAR: %d config failed\n", bar);
+			return ret;
+		}
+
+		/*
+		 * If the control reaches here, it would mean the remaining
+		 * resource_entry size cannot be fitted in a single BAR. So we
+		 * find a maximum BAR whose size is less than or equal to the
+		 * remaining resource_entry size and split the resource entry
+		 * so that part of resource entry is fitted inside the maximum
+		 * BAR. The remaining size would be fitted during the next
+		 * iteration of the loop.
+		 * If a maximum BAR is not found, there is no way we can fit
+		 * this resource_entry, so we error out.
+		 */
+		bar = cdns_pcie_host_find_max_bar(rc, size);
+		if (bar == RP_BAR_UNDEFINED) {
+			dev_err(dev, "No free BAR to map cpu_addr %llx\n",
+				cpu_addr);
+			return -EINVAL;
+		}
+
+		winsize = bar_max_size[bar];
+		ret = cdns_pcie_host_bar_ib_config(rc, bar, cpu_addr, winsize,
+						   flags);
+		if (ret) {
+			dev_err(dev, "IB BAR: %d config failed\n", bar);
+			return ret;
+		}
+
+		size -= winsize;
+		cpu_addr += winsize;
+	}
+
+	return 0;
+}
+
+static int cdns_pcie_host_dma_ranges_cmp(void *priv, const struct list_head *a,
+					 const struct list_head *b)
+{
+	struct resource_entry *entry1, *entry2;
+
+        entry1 = container_of(a, struct resource_entry, node);
+        entry2 = container_of(b, struct resource_entry, node);
+
+        return resource_size(entry2->res) - resource_size(entry1->res);
+}
+
+static int cdns_pcie_host_map_dma_ranges(struct cdns_pcie_rc *rc)
+{
+	struct cdns_pcie *pcie = &rc->pcie;
+	struct device *dev = pcie->dev;
+	struct device_node *np = dev->of_node;
+	struct pci_host_bridge *bridge;
+	struct resource_entry *entry;
+	u32 no_bar_nbits = 32;
+	int err;
+
+	bridge = pci_host_bridge_from_priv(rc);
+	if (!bridge)
+		return -ENOMEM;
+
+	if (list_empty(&bridge->dma_ranges)) {
+		of_property_read_u32(np, "cdns,no-bar-match-nbits",
+				     &no_bar_nbits);
+		err = cdns_pcie_host_bar_ib_config(rc, RP_NO_BAR, 0x0,
+						   (u64)1 << no_bar_nbits, 0);
+		if (err)
+			dev_err(dev, "IB BAR: %d config failed\n", RP_NO_BAR);
+		return err;
+	}
+
+	list_sort(NULL, &bridge->dma_ranges, cdns_pcie_host_dma_ranges_cmp);
+
+	resource_list_for_each_entry(entry, &bridge->dma_ranges) {
+		err = cdns_pcie_host_bar_config(rc, entry);
+		if (err) {
+			dev_err(dev, "Fail to configure IB using dma-ranges\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int cdns_pcie_host_init_address_translation(struct cdns_pcie_rc *rc)
+{
+	struct cdns_pcie *pcie = &rc->pcie;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(rc);
+	struct resource *cfg_res = rc->cfg_res;
+	struct resource_entry *entry;
+	u64 cpu_addr = cfg_res->start;
+	u32 addr0, addr1, desc1;
+	int r, busnr = 0;
+
+	entry = resource_list_first_type(&bridge->windows, IORESOURCE_BUS);
+	if (entry)
+		busnr = entry->res->start;
+
+	/*
+	 * Reserve region 0 for PCI configure space accesses:
+	 * OB_REGION_PCI_ADDR0 and OB_REGION_DESC0 are updated dynamically by
+	 * cdns_pci_map_bus(), other region registers are set here once for all.
+	 */
+	addr1 = 0; /* Should be programmed to zero. */
+	desc1 = CDNS_PCIE_AT_OB_REGION_DESC1_BUS(busnr);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR1(0), addr1);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC1(0), desc1);
+
+	if (pcie->ops->cpu_addr_fixup)
+		cpu_addr = pcie->ops->cpu_addr_fixup(pcie, cpu_addr);
+
+	addr0 = CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS(12) |
+		(lower_32_bits(cpu_addr) & GENMASK(31, 8));
+	addr1 = upper_32_bits(cpu_addr);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR0(0), addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR1(0), addr1);
+
+	r = 1;
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		struct resource *res = entry->res;
+		u64 pci_addr = res->start - entry->offset;
+
+		if (resource_type(res) == IORESOURCE_IO)
+			cdns_pcie_set_outbound_region(pcie, busnr, 0, r,
+						      true,
+						      pci_pio_to_address(res->start),
+						      pci_addr,
+						      resource_size(res));
+		else
+			cdns_pcie_set_outbound_region(pcie, busnr, 0, r,
+						      false,
+						      res->start,
+						      pci_addr,
+						      resource_size(res));
+
+		r++;
+	}
+
+	return cdns_pcie_host_map_dma_ranges(rc);
+}
+
+static int cdns_pcie_host_init(struct device *dev,
+			       struct cdns_pcie_rc *rc)
+{
+	int err;
+
+	err = cdns_pcie_host_init_root_port(rc);
+	if (err)
+		return err;
+
+	return cdns_pcie_host_init_address_translation(rc);
+}
+
+int cdns_pcie_host_setup(struct cdns_pcie_rc *rc)
+{
+	struct device *dev = rc->pcie.dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct device_node *np = dev->of_node;
+	struct pci_host_bridge *bridge;
+	enum cdns_pcie_rp_bar bar;
+	struct cdns_pcie *pcie;
+	struct resource *res;
+	int ret;
+
+	bridge = pci_host_bridge_from_priv(rc);
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = &rc->pcie;
+	pcie->is_rc = true;
+
+	rc->vendor_id = 0xffff;
+	of_property_read_u32(np, "vendor-id", &rc->vendor_id);
+
+	rc->device_id = 0xffff;
+	of_property_read_u32(np, "device-id", &rc->device_id);
+
+	pcie->reg_base = devm_platform_ioremap_resource_byname(pdev, "reg");
+	if (IS_ERR(pcie->reg_base)) {
+		dev_err(dev, "missing \"reg\"\n");
+		return PTR_ERR(pcie->reg_base);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
+	rc->cfg_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(rc->cfg_base))
+		return PTR_ERR(rc->cfg_base);
+	rc->cfg_res = res;
+
+	if (rc->quirk_detect_quiet_flag)
+		cdns_pcie_detect_quiet_min_delay_set(&rc->pcie);
+
+	cdns_pcie_host_enable_ptm_response(pcie);
+
+	ret = cdns_pcie_start_link(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to start link\n");
+		return ret;
+	}
+
+	ret = cdns_pcie_host_start_link(rc);
+	if (ret)
+		dev_dbg(dev, "PCIe link never came up\n");
+
+	for (bar = RP_BAR0; bar <= RP_NO_BAR; bar++)
+		rc->avail_ib_bar[bar] = true;
+
+	ret = cdns_pcie_host_init(dev, rc);
+	if (ret)
+		return ret;
+
+	if (!bridge->ops)
+		bridge->ops = &cdns_pcie_host_ops;
+
+	ret = pci_host_probe(bridge);
+	if (ret < 0)
+		goto err_init;
+
+	return 0;
+
+ err_init:
+	pm_runtime_put_sync(dev);
+
+	return ret;
+}
diff --git a/drivers/pci/controller/cadence/pcie-cadence-plat.c b/drivers/pci/controller/cadence/pcie-cadence-plat.c
new file mode 100644
index 000000000..371ffc1f0
--- /dev/null
+++ b/drivers/pci/controller/cadence/pcie-cadence-plat.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cadence PCIe platform  driver.
+ *
+ * Copyright (c) 2019, Cadence Design Systems
+ * Author: Tom Joseph <tjoseph@cadence.com>
+ */
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include "pcie-cadence.h"
+
+#define CDNS_PLAT_CPU_TO_BUS_ADDR	0x0FFFFFFF
+
+/**
+ * struct cdns_plat_pcie - private data for this PCIe platform driver
+ * @pcie: Cadence PCIe controller
+ * @is_rc: Set to 1 indicates the PCIe controller mode is Root Complex,
+ *         if 0 it is in Endpoint mode.
+ */
+struct cdns_plat_pcie {
+	struct cdns_pcie        *pcie;
+	bool is_rc;
+};
+
+struct cdns_plat_pcie_of_data {
+	bool is_rc;
+};
+
+static const struct of_device_id cdns_plat_pcie_of_match[];
+
+static u64 cdns_plat_cpu_addr_fixup(struct cdns_pcie *pcie, u64 cpu_addr)
+{
+	return cpu_addr & CDNS_PLAT_CPU_TO_BUS_ADDR;
+}
+
+static const struct cdns_pcie_ops cdns_plat_ops = {
+	.cpu_addr_fixup = cdns_plat_cpu_addr_fixup,
+};
+
+static int cdns_plat_pcie_probe(struct platform_device *pdev)
+{
+	const struct cdns_plat_pcie_of_data *data;
+	struct cdns_plat_pcie *cdns_plat_pcie;
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *bridge;
+	struct cdns_pcie_ep *ep;
+	struct cdns_pcie_rc *rc;
+	int phy_count;
+	bool is_rc;
+	int ret;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	is_rc = data->is_rc;
+
+	pr_debug(" Started %s with is_rc: %d\n", __func__, is_rc);
+	cdns_plat_pcie = devm_kzalloc(dev, sizeof(*cdns_plat_pcie), GFP_KERNEL);
+	if (!cdns_plat_pcie)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, cdns_plat_pcie);
+	if (is_rc) {
+		if (!IS_ENABLED(CONFIG_PCIE_CADENCE_PLAT_HOST))
+			return -ENODEV;
+
+		bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
+		if (!bridge)
+			return -ENOMEM;
+
+		rc = pci_host_bridge_priv(bridge);
+		rc->pcie.dev = dev;
+		rc->pcie.ops = &cdns_plat_ops;
+		cdns_plat_pcie->pcie = &rc->pcie;
+		cdns_plat_pcie->is_rc = is_rc;
+
+		ret = cdns_pcie_init_phy(dev, cdns_plat_pcie->pcie);
+		if (ret) {
+			dev_err(dev, "failed to init phy\n");
+			return ret;
+		}
+		pm_runtime_enable(dev);
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0) {
+			dev_err(dev, "pm_runtime_get_sync() failed\n");
+			goto err_get_sync;
+		}
+
+		ret = cdns_pcie_host_setup(rc);
+		if (ret)
+			goto err_init;
+	} else {
+		if (!IS_ENABLED(CONFIG_PCIE_CADENCE_PLAT_EP))
+			return -ENODEV;
+
+		ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+		if (!ep)
+			return -ENOMEM;
+
+		ep->pcie.dev = dev;
+		ep->pcie.ops = &cdns_plat_ops;
+		cdns_plat_pcie->pcie = &ep->pcie;
+		cdns_plat_pcie->is_rc = is_rc;
+
+		ret = cdns_pcie_init_phy(dev, cdns_plat_pcie->pcie);
+		if (ret) {
+			dev_err(dev, "failed to init phy\n");
+			return ret;
+		}
+
+		pm_runtime_enable(dev);
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0) {
+			dev_err(dev, "pm_runtime_get_sync() failed\n");
+			goto err_get_sync;
+		}
+
+		ret = cdns_pcie_ep_setup(ep);
+		if (ret)
+			goto err_init;
+	}
+
+	return 0;
+
+ err_init:
+ err_get_sync:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+	cdns_pcie_disable_phy(cdns_plat_pcie->pcie);
+	phy_count = cdns_plat_pcie->pcie->phy_count;
+	while (phy_count--)
+		device_link_del(cdns_plat_pcie->pcie->link[phy_count]);
+
+	return 0;
+}
+
+static void cdns_plat_pcie_shutdown(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cdns_pcie *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_put_sync(dev);
+	if (ret < 0)
+		dev_dbg(dev, "pm_runtime_put_sync failed\n");
+
+	pm_runtime_disable(dev);
+
+	cdns_pcie_disable_phy(pcie);
+}
+
+static const struct cdns_plat_pcie_of_data cdns_plat_pcie_host_of_data = {
+	.is_rc = true,
+};
+
+static const struct cdns_plat_pcie_of_data cdns_plat_pcie_ep_of_data = {
+	.is_rc = false,
+};
+
+static const struct of_device_id cdns_plat_pcie_of_match[] = {
+	{
+		.compatible = "cdns,cdns-pcie-host",
+		.data = &cdns_plat_pcie_host_of_data,
+	},
+	{
+		.compatible = "cdns,cdns-pcie-ep",
+		.data = &cdns_plat_pcie_ep_of_data,
+	},
+	{},
+};
+
+static struct platform_driver cdns_plat_pcie_driver = {
+	.driver = {
+		.name = "cdns-pcie",
+		.of_match_table = cdns_plat_pcie_of_match,
+		.pm	= &cdns_pcie_pm_ops,
+	},
+	.probe = cdns_plat_pcie_probe,
+	.shutdown = cdns_plat_pcie_shutdown,
+};
+builtin_platform_driver(cdns_plat_pcie_driver);
diff --git a/drivers/pci/controller/cadence/pcie-cadence.c b/drivers/pci/controller/cadence/pcie-cadence.c
new file mode 100644
index 000000000..4251fac5e
--- /dev/null
+++ b/drivers/pci/controller/cadence/pcie-cadence.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Cadence
+// Cadence PCIe controller driver.
+// Author: Cyrille Pitchen <cyrille.pitchen@free-electrons.com>
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include "pcie-cadence.h"
+
+void cdns_pcie_detect_quiet_min_delay_set(struct cdns_pcie *pcie)
+{
+	u32 delay = 0x3;
+	u32 ltssm_control_cap;
+
+	/*
+	 * Set the LTSSM Detect Quiet state min. delay to 2ms.
+	 */
+	ltssm_control_cap = cdns_pcie_readl(pcie, CDNS_PCIE_LTSSM_CONTROL_CAP);
+	ltssm_control_cap = ((ltssm_control_cap &
+			    ~CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK) |
+			    CDNS_PCIE_DETECT_QUIET_MIN_DELAY(delay));
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_LTSSM_CONTROL_CAP, ltssm_control_cap);
+}
+
+void cdns_pcie_set_outbound_region(struct cdns_pcie *pcie, u8 busnr, u8 fn,
+				   u32 r, bool is_io,
+				   u64 cpu_addr, u64 pci_addr, size_t size)
+{
+	/*
+	 * roundup_pow_of_two() returns an unsigned long, which is not suited
+	 * for 64bit values.
+	 */
+	u64 sz = 1ULL << fls64(size - 1);
+	int nbits = ilog2(sz);
+	u32 addr0, addr1, desc0, desc1;
+
+	if (nbits < 8)
+		nbits = 8;
+
+	/* Set the PCI address */
+	addr0 = CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_NBITS(nbits) |
+		(lower_32_bits(pci_addr) & GENMASK(31, 8));
+	addr1 = upper_32_bits(pci_addr);
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR0(r), addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR1(r), addr1);
+
+	/* Set the PCIe header descriptor */
+	if (is_io)
+		desc0 = CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_IO;
+	else
+		desc0 = CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_MEM;
+	desc1 = 0;
+
+	/*
+	 * Whatever Bit [23] is set or not inside DESC0 register of the outbound
+	 * PCIe descriptor, the PCI function number must be set into
+	 * Bits [26:24] of DESC0 anyway.
+	 *
+	 * In Root Complex mode, the function number is always 0 but in Endpoint
+	 * mode, the PCIe controller may support more than one function. This
+	 * function number needs to be set properly into the outbound PCIe
+	 * descriptor.
+	 *
+	 * Besides, setting Bit [23] is mandatory when in Root Complex mode:
+	 * then the driver must provide the bus, resp. device, number in
+	 * Bits [7:0] of DESC1, resp. Bits[31:27] of DESC0. Like the function
+	 * number, the device number is always 0 in Root Complex mode.
+	 *
+	 * However when in Endpoint mode, we can clear Bit [23] of DESC0, hence
+	 * the PCIe controller will use the captured values for the bus and
+	 * device numbers.
+	 */
+	if (pcie->is_rc) {
+		/* The device and function numbers are always 0. */
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID |
+			 CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(0);
+		desc1 |= CDNS_PCIE_AT_OB_REGION_DESC1_BUS(busnr);
+	} else {
+		/*
+		 * Use captured values for bus and device numbers but still
+		 * need to set the function number.
+		 */
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(fn);
+	}
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC0(r), desc0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC1(r), desc1);
+
+	/* Set the CPU address */
+	if (pcie->ops->cpu_addr_fixup)
+		cpu_addr = pcie->ops->cpu_addr_fixup(pcie, cpu_addr);
+
+	addr0 = CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS(nbits) |
+		(lower_32_bits(cpu_addr) & GENMASK(31, 8));
+	addr1 = upper_32_bits(cpu_addr);
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR0(r), addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR1(r), addr1);
+}
+
+void cdns_pcie_set_outbound_region_for_normal_msg(struct cdns_pcie *pcie,
+						  u8 busnr, u8 fn,
+						  u32 r, u64 cpu_addr)
+{
+	u32 addr0, addr1, desc0, desc1;
+
+	desc0 = CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_NORMAL_MSG;
+	desc1 = 0;
+
+	/* See cdns_pcie_set_outbound_region() comments above. */
+	if (pcie->is_rc) {
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID |
+			 CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(0);
+		desc1 |= CDNS_PCIE_AT_OB_REGION_DESC1_BUS(busnr);
+	} else {
+		desc0 |= CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(fn);
+	}
+
+	/* Set the CPU address */
+	if (pcie->ops->cpu_addr_fixup)
+		cpu_addr = pcie->ops->cpu_addr_fixup(pcie, cpu_addr);
+
+	addr0 = CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS(17) |
+		(lower_32_bits(cpu_addr) & GENMASK(31, 8));
+	addr1 = upper_32_bits(cpu_addr);
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR0(r), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR1(r), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC0(r), desc0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC1(r), desc1);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR0(r), addr0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR1(r), addr1);
+}
+
+void cdns_pcie_reset_outbound_region(struct cdns_pcie *pcie, u32 r)
+{
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR0(r), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_PCI_ADDR1(r), 0);
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC0(r), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_DESC1(r), 0);
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR0(r), 0);
+	cdns_pcie_writel(pcie, CDNS_PCIE_AT_OB_REGION_CPU_ADDR1(r), 0);
+}
+
+void cdns_pcie_disable_phy(struct cdns_pcie *pcie)
+{
+	int i = pcie->phy_count;
+
+	while (i--) {
+		phy_power_off(pcie->phy[i]);
+		phy_exit(pcie->phy[i]);
+	}
+}
+
+int cdns_pcie_enable_phy(struct cdns_pcie *pcie)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < pcie->phy_count; i++) {
+		ret = phy_init(pcie->phy[i]);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_power_on(pcie->phy[i]);
+		if (ret < 0) {
+			phy_exit(pcie->phy[i]);
+			goto err_phy;
+		}
+	}
+
+	return 0;
+
+err_phy:
+	while (--i >= 0) {
+		phy_power_off(pcie->phy[i]);
+		phy_exit(pcie->phy[i]);
+	}
+
+	return ret;
+}
+
+int cdns_pcie_init_phy(struct device *dev, struct cdns_pcie *pcie)
+{
+	struct device_node *np = dev->of_node;
+	int phy_count;
+	struct phy **phy;
+	struct device_link **link;
+	int i;
+	int ret;
+	const char *name;
+
+	phy_count = of_property_count_strings(np, "phy-names");
+	if (phy_count < 1) {
+		dev_err(dev, "no phy-names.  PHY will not be initialized\n");
+		pcie->phy_count = 0;
+		return 0;
+	}
+
+	phy = devm_kcalloc(dev, phy_count, sizeof(*phy), GFP_KERNEL);
+	if (!phy)
+		return -ENOMEM;
+
+	link = devm_kcalloc(dev, phy_count, sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	for (i = 0; i < phy_count; i++) {
+		of_property_read_string_index(np, "phy-names", i, &name);
+		phy[i] = devm_phy_get(dev, name);
+		if (IS_ERR(phy[i])) {
+			ret = PTR_ERR(phy[i]);
+			goto err_phy;
+		}
+		link[i] = device_link_add(dev, &phy[i]->dev, DL_FLAG_STATELESS);
+		if (!link[i]) {
+			devm_phy_put(dev, phy[i]);
+			ret = -EINVAL;
+			goto err_phy;
+		}
+	}
+
+	pcie->phy_count = phy_count;
+	pcie->phy = phy;
+	pcie->link = link;
+
+	ret =  cdns_pcie_enable_phy(pcie);
+	if (ret)
+		goto err_phy;
+
+	return 0;
+
+err_phy:
+	while (--i >= 0) {
+		device_link_del(link[i]);
+		devm_phy_put(dev, phy[i]);
+	}
+
+	return ret;
+}
+
+static int cdns_pcie_suspend_noirq(struct device *dev)
+{
+	struct cdns_pcie *pcie = dev_get_drvdata(dev);
+
+	cdns_pcie_disable_phy(pcie);
+
+	return 0;
+}
+
+static int cdns_pcie_resume_noirq(struct device *dev)
+{
+	struct cdns_pcie *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	ret = cdns_pcie_enable_phy(pcie);
+	if (ret) {
+		dev_err(dev, "failed to enable phy\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+const struct dev_pm_ops cdns_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(cdns_pcie_suspend_noirq,
+				  cdns_pcie_resume_noirq)
+};
diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h
new file mode 100644
index 000000000..373cb50fc
--- /dev/null
+++ b/drivers/pci/controller/cadence/pcie-cadence.h
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2017 Cadence
+// Cadence PCIe controller driver.
+// Author: Cyrille Pitchen <cyrille.pitchen@free-electrons.com>
+
+#ifndef _PCIE_CADENCE_H
+#define _PCIE_CADENCE_H
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-epf.h>
+#include <linux/phy/phy.h>
+
+/* Parameters for the waiting for link up routine */
+#define LINK_WAIT_MAX_RETRIES	10
+#define LINK_WAIT_USLEEP_MIN	90000
+#define LINK_WAIT_USLEEP_MAX	100000
+
+/*
+ * Local Management Registers
+ */
+#define CDNS_PCIE_LM_BASE	0x00100000
+
+/* Vendor ID Register */
+#define CDNS_PCIE_LM_ID		(CDNS_PCIE_LM_BASE + 0x0044)
+#define  CDNS_PCIE_LM_ID_VENDOR_MASK	GENMASK(15, 0)
+#define  CDNS_PCIE_LM_ID_VENDOR_SHIFT	0
+#define  CDNS_PCIE_LM_ID_VENDOR(vid) \
+	(((vid) << CDNS_PCIE_LM_ID_VENDOR_SHIFT) & CDNS_PCIE_LM_ID_VENDOR_MASK)
+#define  CDNS_PCIE_LM_ID_SUBSYS_MASK	GENMASK(31, 16)
+#define  CDNS_PCIE_LM_ID_SUBSYS_SHIFT	16
+#define  CDNS_PCIE_LM_ID_SUBSYS(sub) \
+	(((sub) << CDNS_PCIE_LM_ID_SUBSYS_SHIFT) & CDNS_PCIE_LM_ID_SUBSYS_MASK)
+
+/* Root Port Requester ID Register */
+#define CDNS_PCIE_LM_RP_RID	(CDNS_PCIE_LM_BASE + 0x0228)
+#define  CDNS_PCIE_LM_RP_RID_MASK	GENMASK(15, 0)
+#define  CDNS_PCIE_LM_RP_RID_SHIFT	0
+#define  CDNS_PCIE_LM_RP_RID_(rid) \
+	(((rid) << CDNS_PCIE_LM_RP_RID_SHIFT) & CDNS_PCIE_LM_RP_RID_MASK)
+
+/* Endpoint Bus and Device Number Register */
+#define CDNS_PCIE_LM_EP_ID	(CDNS_PCIE_LM_BASE + 0x022c)
+#define  CDNS_PCIE_LM_EP_ID_DEV_MASK	GENMASK(4, 0)
+#define  CDNS_PCIE_LM_EP_ID_DEV_SHIFT	0
+#define  CDNS_PCIE_LM_EP_ID_BUS_MASK	GENMASK(15, 8)
+#define  CDNS_PCIE_LM_EP_ID_BUS_SHIFT	8
+
+/* Endpoint Function f BAR b Configuration Registers */
+#define CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn) \
+	(((bar) < BAR_4) ? CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn) : CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn))
+#define CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0240 + (fn) * 0x0008)
+#define CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0244 + (fn) * 0x0008)
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn) \
+	(((bar) < BAR_4) ? CDNS_PCIE_LM_EP_VFUNC_BAR_CFG0(fn) : CDNS_PCIE_LM_EP_VFUNC_BAR_CFG1(fn))
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG0(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0280 + (fn) * 0x0008)
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG1(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0284 + (fn) * 0x0008)
+#define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) \
+	(GENMASK(4, 0) << ((b) * 8))
+#define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, a) \
+	(((a) << ((b) * 8)) & CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b))
+#define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b) \
+	(GENMASK(7, 5) << ((b) * 8))
+#define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, c) \
+	(((c) << ((b) * 8 + 5)) & CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b))
+
+/* Endpoint Function Configuration Register */
+#define CDNS_PCIE_LM_EP_FUNC_CFG	(CDNS_PCIE_LM_BASE + 0x02c0)
+
+/* Root Complex BAR Configuration Register */
+#define CDNS_PCIE_LM_RC_BAR_CFG	(CDNS_PCIE_LM_BASE + 0x0300)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR0_APERTURE_MASK	GENMASK(5, 0)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR0_APERTURE(a) \
+	(((a) << 0) & CDNS_PCIE_LM_RC_BAR_CFG_BAR0_APERTURE_MASK)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR0_CTRL_MASK		GENMASK(8, 6)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR0_CTRL(c) \
+	(((c) << 6) & CDNS_PCIE_LM_RC_BAR_CFG_BAR0_CTRL_MASK)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR1_APERTURE_MASK	GENMASK(13, 9)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR1_APERTURE(a) \
+	(((a) << 9) & CDNS_PCIE_LM_RC_BAR_CFG_BAR1_APERTURE_MASK)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR1_CTRL_MASK		GENMASK(16, 14)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_BAR1_CTRL(c) \
+	(((c) << 14) & CDNS_PCIE_LM_RC_BAR_CFG_BAR1_CTRL_MASK)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_PREFETCH_MEM_ENABLE	BIT(17)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_PREFETCH_MEM_32BITS	0
+#define  CDNS_PCIE_LM_RC_BAR_CFG_PREFETCH_MEM_64BITS	BIT(18)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_IO_ENABLE		BIT(19)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_IO_16BITS		0
+#define  CDNS_PCIE_LM_RC_BAR_CFG_IO_32BITS		BIT(20)
+#define  CDNS_PCIE_LM_RC_BAR_CFG_CHECK_ENABLE		BIT(31)
+
+/* BAR control values applicable to both Endpoint Function and Root Complex */
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED		0x0
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_IO_32BITS		0x1
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_32BITS		0x4
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_32BITS	0x5
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_64BITS		0x6
+#define  CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_64BITS	0x7
+
+#define LM_RC_BAR_CFG_CTRL_DISABLED(bar)		\
+		(CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_CTRL_IO_32BITS(bar)		\
+		(CDNS_PCIE_LM_BAR_CFG_CTRL_IO_32BITS << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_CTRL_MEM_32BITS(bar)		\
+		(CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_32BITS << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_CTRL_PREF_MEM_32BITS(bar)	\
+	(CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_32BITS << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_CTRL_MEM_64BITS(bar)		\
+		(CDNS_PCIE_LM_BAR_CFG_CTRL_MEM_64BITS << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_CTRL_PREF_MEM_64BITS(bar)	\
+	(CDNS_PCIE_LM_BAR_CFG_CTRL_PREFETCH_MEM_64BITS << (((bar) * 8) + 6))
+#define LM_RC_BAR_CFG_APERTURE(bar, aperture)		\
+					(((aperture) - 2) << ((bar) * 8))
+
+/* PTM Control Register */
+#define CDNS_PCIE_LM_PTM_CTRL 	(CDNS_PCIE_LM_BASE + 0x0da8)
+#define CDNS_PCIE_LM_TPM_CTRL_PTMRSEN 	BIT(17)
+
+/*
+ * Endpoint Function Registers (PCI configuration space for endpoint functions)
+ */
+#define CDNS_PCIE_EP_FUNC_BASE(fn)	(((fn) << 12) & GENMASK(19, 12))
+
+#define CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET	0x90
+#define CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET	0xb0
+#define CDNS_PCIE_EP_FUNC_DEV_CAP_OFFSET	0xc0
+#define CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET	0x200
+
+/*
+ * Root Port Registers (PCI configuration space for the root port function)
+ */
+#define CDNS_PCIE_RP_BASE	0x00200000
+#define CDNS_PCIE_RP_CAP_OFFSET 0xc0
+
+/*
+ * Address Translation Registers
+ */
+#define CDNS_PCIE_AT_BASE	0x00400000
+
+/* Region r Outbound AXI to PCIe Address Translation Register 0 */
+#define CDNS_PCIE_AT_OB_REGION_PCI_ADDR0(r) \
+	(CDNS_PCIE_AT_BASE + 0x0000 + ((r) & 0x1f) * 0x0020)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_NBITS_MASK	GENMASK(5, 0)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_NBITS(nbits) \
+	(((nbits) - 1) & CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_NBITS_MASK)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN_MASK	GENMASK(19, 12)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN(devfn) \
+	(((devfn) << 12) & CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN_MASK)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK	GENMASK(27, 20)
+#define  CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_BUS(bus) \
+	(((bus) << 20) & CDNS_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK)
+
+/* Region r Outbound AXI to PCIe Address Translation Register 1 */
+#define CDNS_PCIE_AT_OB_REGION_PCI_ADDR1(r) \
+	(CDNS_PCIE_AT_BASE + 0x0004 + ((r) & 0x1f) * 0x0020)
+
+/* Region r Outbound PCIe Descriptor Register 0 */
+#define CDNS_PCIE_AT_OB_REGION_DESC0(r) \
+	(CDNS_PCIE_AT_BASE + 0x0008 + ((r) & 0x1f) * 0x0020)
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_MASK		GENMASK(3, 0)
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_MEM		0x2
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_IO		0x6
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_CONF_TYPE0	0xa
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_CONF_TYPE1	0xb
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_NORMAL_MSG	0xc
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_TYPE_VENDOR_MSG	0xd
+/* Bit 23 MUST be set in RC mode. */
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID	BIT(23)
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK	GENMASK(31, 24)
+#define  CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN(devfn) \
+	(((devfn) << 24) & CDNS_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK)
+
+/* Region r Outbound PCIe Descriptor Register 1 */
+#define CDNS_PCIE_AT_OB_REGION_DESC1(r)	\
+	(CDNS_PCIE_AT_BASE + 0x000c + ((r) & 0x1f) * 0x0020)
+#define  CDNS_PCIE_AT_OB_REGION_DESC1_BUS_MASK	GENMASK(7, 0)
+#define  CDNS_PCIE_AT_OB_REGION_DESC1_BUS(bus) \
+	((bus) & CDNS_PCIE_AT_OB_REGION_DESC1_BUS_MASK)
+
+/* Region r AXI Region Base Address Register 0 */
+#define CDNS_PCIE_AT_OB_REGION_CPU_ADDR0(r) \
+	(CDNS_PCIE_AT_BASE + 0x0018 + ((r) & 0x1f) * 0x0020)
+#define  CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS_MASK	GENMASK(5, 0)
+#define  CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS(nbits) \
+	(((nbits) - 1) & CDNS_PCIE_AT_OB_REGION_CPU_ADDR0_NBITS_MASK)
+
+/* Region r AXI Region Base Address Register 1 */
+#define CDNS_PCIE_AT_OB_REGION_CPU_ADDR1(r) \
+	(CDNS_PCIE_AT_BASE + 0x001c + ((r) & 0x1f) * 0x0020)
+
+/* Root Port BAR Inbound PCIe to AXI Address Translation Register */
+#define CDNS_PCIE_AT_IB_RP_BAR_ADDR0(bar) \
+	(CDNS_PCIE_AT_BASE + 0x0800 + (bar) * 0x0008)
+#define  CDNS_PCIE_AT_IB_RP_BAR_ADDR0_NBITS_MASK	GENMASK(5, 0)
+#define  CDNS_PCIE_AT_IB_RP_BAR_ADDR0_NBITS(nbits) \
+	(((nbits) - 1) & CDNS_PCIE_AT_IB_RP_BAR_ADDR0_NBITS_MASK)
+#define CDNS_PCIE_AT_IB_RP_BAR_ADDR1(bar) \
+	(CDNS_PCIE_AT_BASE + 0x0804 + (bar) * 0x0008)
+
+/* AXI link down register */
+#define CDNS_PCIE_AT_LINKDOWN (CDNS_PCIE_AT_BASE + 0x0824)
+
+/* LTSSM Capabilities register */
+#define CDNS_PCIE_LTSSM_CONTROL_CAP             (CDNS_PCIE_LM_BASE + 0x0054)
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK  GENMASK(2, 1)
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY_SHIFT 1
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY(delay) \
+	 (((delay) << CDNS_PCIE_DETECT_QUIET_MIN_DELAY_SHIFT) & \
+	 CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK)
+
+enum cdns_pcie_rp_bar {
+	RP_BAR_UNDEFINED = -1,
+	RP_BAR0,
+	RP_BAR1,
+	RP_NO_BAR
+};
+
+#define CDNS_PCIE_RP_MAX_IB	0x3
+#define CDNS_PCIE_MAX_OB	32
+
+struct cdns_pcie_rp_ib_bar {
+	u64 size;
+	bool free;
+};
+
+/* Endpoint Function BAR Inbound PCIe to AXI Address Translation Register */
+#define CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \
+	(CDNS_PCIE_AT_BASE + 0x0840 + (fn) * 0x0040 + (bar) * 0x0008)
+#define CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar) \
+	(CDNS_PCIE_AT_BASE + 0x0844 + (fn) * 0x0040 + (bar) * 0x0008)
+
+/* Normal/Vendor specific message access: offset inside some outbound region */
+#define CDNS_PCIE_NORMAL_MSG_ROUTING_MASK	GENMASK(7, 5)
+#define CDNS_PCIE_NORMAL_MSG_ROUTING(route) \
+	(((route) << 5) & CDNS_PCIE_NORMAL_MSG_ROUTING_MASK)
+#define CDNS_PCIE_NORMAL_MSG_CODE_MASK		GENMASK(15, 8)
+#define CDNS_PCIE_NORMAL_MSG_CODE(code) \
+	(((code) << 8) & CDNS_PCIE_NORMAL_MSG_CODE_MASK)
+#define CDNS_PCIE_MSG_NO_DATA			BIT(16)
+
+struct cdns_pcie;
+
+enum cdns_pcie_msg_code {
+	MSG_CODE_ASSERT_INTA	= 0x20,
+	MSG_CODE_ASSERT_INTB	= 0x21,
+	MSG_CODE_ASSERT_INTC	= 0x22,
+	MSG_CODE_ASSERT_INTD	= 0x23,
+	MSG_CODE_DEASSERT_INTA	= 0x24,
+	MSG_CODE_DEASSERT_INTB	= 0x25,
+	MSG_CODE_DEASSERT_INTC	= 0x26,
+	MSG_CODE_DEASSERT_INTD	= 0x27,
+};
+
+enum cdns_pcie_msg_routing {
+	/* Route to Root Complex */
+	MSG_ROUTING_TO_RC,
+
+	/* Use Address Routing */
+	MSG_ROUTING_BY_ADDR,
+
+	/* Use ID Routing */
+	MSG_ROUTING_BY_ID,
+
+	/* Route as Broadcast Message from Root Complex */
+	MSG_ROUTING_BCAST,
+
+	/* Local message; terminate at receiver (INTx messages) */
+	MSG_ROUTING_LOCAL,
+
+	/* Gather & route to Root Complex (PME_TO_Ack message) */
+	MSG_ROUTING_GATHER,
+};
+
+struct cdns_pcie_ops {
+	int	(*start_link)(struct cdns_pcie *pcie);
+	void	(*stop_link)(struct cdns_pcie *pcie);
+	bool	(*link_up)(struct cdns_pcie *pcie);
+	u64     (*cpu_addr_fixup)(struct cdns_pcie *pcie, u64 cpu_addr);
+};
+
+/**
+ * struct cdns_pcie - private data for Cadence PCIe controller drivers
+ * @reg_base: IO mapped register base
+ * @mem_res: start/end offsets in the physical system memory to map PCI accesses
+ * @dev: PCIe controller
+ * @is_rc: tell whether the PCIe controller mode is Root Complex or Endpoint.
+ * @phy_count: number of supported PHY devices
+ * @phy: list of pointers to specific PHY control blocks
+ * @link: list of pointers to corresponding device link representations
+ * @ops: Platform-specific ops to control various inputs from Cadence PCIe
+ *       wrapper
+ */
+struct cdns_pcie {
+	void __iomem		*reg_base;
+	struct resource		*mem_res;
+	struct device		*dev;
+	bool			is_rc;
+	int			phy_count;
+	struct phy		**phy;
+	struct device_link	**link;
+	const struct cdns_pcie_ops *ops;
+};
+
+/**
+ * struct cdns_pcie_rc - private data for this PCIe Root Complex driver
+ * @pcie: Cadence PCIe controller
+ * @dev: pointer to PCIe device
+ * @cfg_res: start/end offsets in the physical system memory to map PCI
+ *           configuration space accesses
+ * @cfg_base: IO mapped window to access the PCI configuration space of a
+ *            single function at a time
+ * @vendor_id: PCI vendor ID
+ * @device_id: PCI device ID
+ * @avail_ib_bar: Status of RP_BAR0, RP_BAR1 and RP_NO_BAR if it's free or
+ *                available
+ * @quirk_retrain_flag: Retrain link as quirk for PCIe Gen2
+ * @quirk_detect_quiet_flag: LTSSM Detect Quiet min delay set as quirk
+ */
+struct cdns_pcie_rc {
+	struct cdns_pcie	pcie;
+	struct resource		*cfg_res;
+	void __iomem		*cfg_base;
+	u32			vendor_id;
+	u32			device_id;
+	bool			avail_ib_bar[CDNS_PCIE_RP_MAX_IB];
+	unsigned int		quirk_retrain_flag:1;
+	unsigned int		quirk_detect_quiet_flag:1;
+};
+
+/**
+ * struct cdns_pcie_epf - Structure to hold info about endpoint function
+ * @epf: Info about virtual functions attached to the physical function
+ * @epf_bar: reference to the pci_epf_bar for the six Base Address Registers
+ */
+struct cdns_pcie_epf {
+	struct cdns_pcie_epf *epf;
+	struct pci_epf_bar *epf_bar[PCI_STD_NUM_BARS];
+};
+
+/**
+ * struct cdns_pcie_ep - private data for this PCIe endpoint controller driver
+ * @pcie: Cadence PCIe controller
+ * @max_regions: maximum number of regions supported by hardware
+ * @ob_region_map: bitmask of mapped outbound regions
+ * @ob_addr: base addresses in the AXI bus where the outbound regions start
+ * @irq_phys_addr: base address on the AXI bus where the MSI/legacy IRQ
+ *		   dedicated outbound regions is mapped.
+ * @irq_cpu_addr: base address in the CPU space where a write access triggers
+ *		  the sending of a memory write (MSI) / normal message (legacy
+ *		  IRQ) TLP through the PCIe bus.
+ * @irq_pci_addr: used to save the current mapping of the MSI/legacy IRQ
+ *		  dedicated outbound region.
+ * @irq_pci_fn: the latest PCI function that has updated the mapping of
+ *		the MSI/legacy IRQ dedicated outbound region.
+ * @irq_pending: bitmask of asserted legacy IRQs.
+ * @lock: spin lock to disable interrupts while modifying PCIe controller
+ *        registers fields (RMW) accessible by both remote RC and EP to
+ *        minimize time between read and write
+ * @epf: Structure to hold info about endpoint function
+ * @quirk_detect_quiet_flag: LTSSM Detect Quiet min delay set as quirk
+ * @quirk_disable_flr: Disable FLR (Function Level Reset) quirk flag
+ */
+struct cdns_pcie_ep {
+	struct cdns_pcie	pcie;
+	u32			max_regions;
+	unsigned long		ob_region_map;
+	phys_addr_t		*ob_addr;
+	phys_addr_t		irq_phys_addr;
+	void __iomem		*irq_cpu_addr;
+	u64			irq_pci_addr;
+	u8			irq_pci_fn;
+	u8			irq_pending;
+	/* protect writing to PCI_STATUS while raising legacy interrupts */
+	spinlock_t		lock;
+	struct cdns_pcie_epf	*epf;
+	unsigned int		quirk_detect_quiet_flag:1;
+	unsigned int		quirk_disable_flr:1;
+};
+
+
+/* Register access */
+static inline void cdns_pcie_writel(struct cdns_pcie *pcie, u32 reg, u32 value)
+{
+	writel(value, pcie->reg_base + reg);
+}
+
+static inline u32 cdns_pcie_readl(struct cdns_pcie *pcie, u32 reg)
+{
+	return readl(pcie->reg_base + reg);
+}
+
+static inline u32 cdns_pcie_read_sz(void __iomem *addr, int size)
+{
+	void __iomem *aligned_addr = PTR_ALIGN_DOWN(addr, 0x4);
+	unsigned int offset = (unsigned long)addr & 0x3;
+	u32 val = readl(aligned_addr);
+
+	if (!IS_ALIGNED((uintptr_t)addr, size)) {
+		pr_warn("Address %p and size %d are not aligned\n", addr, size);
+		return 0;
+	}
+
+	if (size > 2)
+		return val;
+
+	return (val >> (8 * offset)) & ((1 << (size * 8)) - 1);
+}
+
+static inline void cdns_pcie_write_sz(void __iomem *addr, int size, u32 value)
+{
+	void __iomem *aligned_addr = PTR_ALIGN_DOWN(addr, 0x4);
+	unsigned int offset = (unsigned long)addr & 0x3;
+	u32 mask;
+	u32 val;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size)) {
+		pr_warn("Address %p and size %d are not aligned\n", addr, size);
+		return;
+	}
+
+	if (size > 2) {
+		writel(value, addr);
+		return;
+	}
+
+	mask = ~(((1 << (size * 8)) - 1) << (offset * 8));
+	val = readl(aligned_addr) & mask;
+	val |= value << (offset * 8);
+	writel(val, aligned_addr);
+}
+
+/* Root Port register access */
+static inline void cdns_pcie_rp_writeb(struct cdns_pcie *pcie,
+				       u32 reg, u8 value)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_RP_BASE + reg;
+
+	cdns_pcie_write_sz(addr, 0x1, value);
+}
+
+static inline void cdns_pcie_rp_writew(struct cdns_pcie *pcie,
+				       u32 reg, u16 value)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_RP_BASE + reg;
+
+	cdns_pcie_write_sz(addr, 0x2, value);
+}
+
+static inline u16 cdns_pcie_rp_readw(struct cdns_pcie *pcie, u32 reg)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_RP_BASE + reg;
+
+	return cdns_pcie_read_sz(addr, 0x2);
+}
+
+/* Endpoint Function register access */
+static inline void cdns_pcie_ep_fn_writeb(struct cdns_pcie *pcie, u8 fn,
+					  u32 reg, u8 value)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_EP_FUNC_BASE(fn) + reg;
+
+	cdns_pcie_write_sz(addr, 0x1, value);
+}
+
+static inline void cdns_pcie_ep_fn_writew(struct cdns_pcie *pcie, u8 fn,
+					  u32 reg, u16 value)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_EP_FUNC_BASE(fn) + reg;
+
+	cdns_pcie_write_sz(addr, 0x2, value);
+}
+
+static inline void cdns_pcie_ep_fn_writel(struct cdns_pcie *pcie, u8 fn,
+					  u32 reg, u32 value)
+{
+	writel(value, pcie->reg_base + CDNS_PCIE_EP_FUNC_BASE(fn) + reg);
+}
+
+static inline u16 cdns_pcie_ep_fn_readw(struct cdns_pcie *pcie, u8 fn, u32 reg)
+{
+	void __iomem *addr = pcie->reg_base + CDNS_PCIE_EP_FUNC_BASE(fn) + reg;
+
+	return cdns_pcie_read_sz(addr, 0x2);
+}
+
+static inline u32 cdns_pcie_ep_fn_readl(struct cdns_pcie *pcie, u8 fn, u32 reg)
+{
+	return readl(pcie->reg_base + CDNS_PCIE_EP_FUNC_BASE(fn) + reg);
+}
+
+static inline int cdns_pcie_start_link(struct cdns_pcie *pcie)
+{
+	if (pcie->ops->start_link)
+		return pcie->ops->start_link(pcie);
+
+	return 0;
+}
+
+static inline void cdns_pcie_stop_link(struct cdns_pcie *pcie)
+{
+	if (pcie->ops->stop_link)
+		pcie->ops->stop_link(pcie);
+}
+
+static inline bool cdns_pcie_link_up(struct cdns_pcie *pcie)
+{
+	if (pcie->ops->link_up)
+		return pcie->ops->link_up(pcie);
+
+	return true;
+}
+
+#ifdef CONFIG_PCIE_CADENCE_HOST
+int cdns_pcie_host_setup(struct cdns_pcie_rc *rc);
+void __iomem *cdns_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where);
+#else
+static inline int cdns_pcie_host_setup(struct cdns_pcie_rc *rc)
+{
+	return 0;
+}
+
+static inline void __iomem *cdns_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
+					     int where)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_PCIE_CADENCE_EP
+int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep);
+#else
+static inline int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
+{
+	return 0;
+}
+#endif
+
+void cdns_pcie_detect_quiet_min_delay_set(struct cdns_pcie *pcie);
+
+void cdns_pcie_set_outbound_region(struct cdns_pcie *pcie, u8 busnr, u8 fn,
+				   u32 r, bool is_io,
+				   u64 cpu_addr, u64 pci_addr, size_t size);
+
+void cdns_pcie_set_outbound_region_for_normal_msg(struct cdns_pcie *pcie,
+						  u8 busnr, u8 fn,
+						  u32 r, u64 cpu_addr);
+
+void cdns_pcie_reset_outbound_region(struct cdns_pcie *pcie, u32 r);
+void cdns_pcie_disable_phy(struct cdns_pcie *pcie);
+int cdns_pcie_enable_phy(struct cdns_pcie *pcie);
+int cdns_pcie_init_phy(struct device *dev, struct cdns_pcie *pcie);
+extern const struct dev_pm_ops cdns_pcie_pm_ops;
+
+#endif /* _PCIE_CADENCE_H */
diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
new file mode 100644
index 000000000..ab96da43e
--- /dev/null
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -0,0 +1,418 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "DesignWare-based PCIe controllers"
+	depends on PCI
+
+config PCIE_DW
+	bool
+
+config PCIE_DW_HOST
+	bool
+	select PCIE_DW
+
+config PCIE_DW_EP
+	bool
+	select PCIE_DW
+
+config PCIE_AL
+	bool "Amazon Annapurna Labs PCIe controller"
+	depends on OF && (ARM64 || COMPILE_TEST)
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCI_ECAM
+	help
+	  Say Y here to enable support of the Amazon's Annapurna Labs PCIe
+	  controller IP on Amazon SoCs. The PCIe controller uses the DesignWare
+	  core plus Annapurna Labs proprietary hardware wrappers. This is
+	  required only for DT-based platforms. ACPI platforms with the
+	  Annapurna Labs PCIe controller don't need to enable this.
+
+config PCI_MESON
+	tristate "Amlogic Meson PCIe controller"
+	default m if ARCH_MESON
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want to enable PCI controller support on Amlogic
+	  SoCs. The PCI controller on Amlogic is based on DesignWare hardware
+	  and therefore the driver re-uses the DesignWare core functions to
+	  implement the driver.
+
+config PCIE_ARTPEC6
+	bool
+
+config PCIE_ARTPEC6_HOST
+	bool "Axis ARTPEC-6 PCIe controller (host mode)"
+	depends on MACH_ARTPEC6 || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCIE_ARTPEC6
+	help
+	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
+	  host mode. This uses the DesignWare core.
+
+config PCIE_ARTPEC6_EP
+	bool "Axis ARTPEC-6 PCIe controller (endpoint mode)"
+	depends on MACH_ARTPEC6 || COMPILE_TEST
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCIE_ARTPEC6
+	help
+	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
+	  endpoint mode. This uses the DesignWare core.
+
+config PCIE_BT1
+	tristate "Baikal-T1 PCIe controller"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
+	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
+
+config PCI_IMX6
+	bool
+
+config PCI_IMX6_HOST
+	bool "Freescale i.MX6/7/8 PCIe controller (host mode)"
+	depends on ARCH_MXC || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCI_IMX6
+	help
+	  Enables support for the PCIe controller in the i.MX SoCs to
+	  work in Root Complex mode. The PCI controller on i.MX is based
+	  on DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
+
+config PCI_IMX6_EP
+	bool "Freescale i.MX6/7/8 PCIe controller (endpoint mode)"
+	depends on ARCH_MXC || COMPILE_TEST
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCI_IMX6
+	help
+	  Enables support for the PCIe controller in the i.MX SoCs to
+	  work in endpoint mode. The PCI controller on i.MX is based
+	  on DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
+
+config PCI_LAYERSCAPE
+	bool "Freescale Layerscape PCIe controller (host mode)"
+	depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select MFD_SYSCON
+	help
+	  Say Y here if you want to enable PCIe controller support on Layerscape
+	  SoCs to work in Host mode.
+	  This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+	  determines which PCIe controller works in EP mode and which PCIe
+	  controller works in RC mode.
+
+config PCI_LAYERSCAPE_EP
+	bool "Freescale Layerscape PCIe controller (endpoint mode)"
+	depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST)
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	help
+	  Say Y here if you want to enable PCIe controller support on Layerscape
+	  SoCs to work in Endpoint mode.
+	  This controller can work either as EP or RC. The RCW[HOST_AGT_PEX]
+	  determines which PCIe controller works in EP mode and which PCIe
+	  controller works in RC mode.
+
+config PCI_HISI
+	depends on OF && (ARM64 || COMPILE_TEST)
+	bool "HiSilicon Hip05 and Hip06 SoCs PCIe controller"
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCI_HOST_COMMON
+	help
+	  Say Y here if you want PCIe controller support on HiSilicon
+	  Hip05 and Hip06 SoCs
+
+config PCIE_KIRIN
+	depends on OF && (ARM64 || COMPILE_TEST)
+	tristate "HiSilicon Kirin PCIe controller"
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select REGMAP_MMIO
+	help
+	  Say Y here if you want PCIe controller support
+	  on HiSilicon Kirin series SoCs.
+
+config PCIE_HISI_STB
+	bool "HiSilicon STB PCIe controller"
+	depends on ARCH_HISI || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe controller support on HiSilicon STB SoCs
+
+config PCIE_INTEL_GW
+	bool "Intel Gateway PCIe controller "
+	depends on OF && (X86 || COMPILE_TEST)
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say 'Y' here to enable PCIe Host controller support on Intel
+	  Gateway SoCs.
+	  The PCIe controller uses the DesignWare core plus Intel-specific
+	  hardware wrappers.
+
+config PCIE_KEEMBAY
+	bool
+
+config PCIE_KEEMBAY_HOST
+	bool "Intel Keem Bay PCIe controller (host mode)"
+	depends on ARCH_KEEMBAY || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCIE_KEEMBAY
+	help
+	  Say 'Y' here to enable support for the PCIe controller in Keem Bay
+	  to work in host mode.
+	  The PCIe controller is based on DesignWare Hardware and uses
+	  DesignWare core functions.
+
+config PCIE_KEEMBAY_EP
+	bool "Intel Keem Bay PCIe controller (endpoint mode)"
+	depends on ARCH_KEEMBAY || COMPILE_TEST
+	depends on PCI_MSI
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCIE_KEEMBAY
+	help
+	  Say 'Y' here to enable support for the PCIe controller in Keem Bay
+	  to work in endpoint mode.
+	  The PCIe controller is based on DesignWare Hardware and uses
+	  DesignWare core functions.
+
+config PCIE_ARMADA_8K
+	bool "Marvell Armada-8K PCIe controller"
+	depends on ARCH_MVEBU || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want to enable PCIe controller support on
+	  Armada-8K SoCs. The PCIe controller on Armada-8K is based on
+	  DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
+
+config PCIE_TEGRA194
+	tristate
+
+config PCIE_TEGRA194_HOST
+	tristate "NVIDIA Tegra194 (and later) PCIe controller (host mode)"
+	depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PHY_TEGRA194_P2U
+	select PCIE_TEGRA194
+	help
+	  Enables support for the PCIe controller in the NVIDIA Tegra194 SoC to
+	  work in host mode. There are two instances of PCIe controllers in
+	  Tegra194. This controller can work either as EP or RC. In order to
+	  enable host-specific features PCIE_TEGRA194_HOST must be selected and
+	  in order to enable device-specific features PCIE_TEGRA194_EP must be
+	  selected. This uses the DesignWare core.
+
+config PCIE_TEGRA194_EP
+	tristate "NVIDIA Tegra194 (and later) PCIe controller (endpoint mode)"
+	depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PHY_TEGRA194_P2U
+	select PCIE_TEGRA194
+	help
+	  Enables support for the PCIe controller in the NVIDIA Tegra194 SoC to
+	  work in endpoint mode. There are two instances of PCIe controllers in
+	  Tegra194. This controller can work either as EP or RC. In order to
+	  enable host-specific features PCIE_TEGRA194_HOST must be selected and
+	  in order to enable device-specific features PCIE_TEGRA194_EP must be
+	  selected. This uses the DesignWare core.
+
+config PCIE_DW_PLAT
+	bool
+
+config PCIE_DW_PLAT_HOST
+	bool "Platform bus based DesignWare PCIe controller (host mode)"
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCIE_DW_PLAT
+	help
+	  Enables support for the PCIe controller in the Designware IP to
+	  work in host mode. There are two instances of PCIe controller in
+	  Designware IP.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCIE_DW_PLAT_HOST must be selected and in
+	  order to enable device-specific features PCI_DW_PLAT_EP must be
+	  selected.
+
+config PCIE_DW_PLAT_EP
+	bool "Platform bus based DesignWare PCIe controller (endpoint mode)"
+	depends on PCI && PCI_MSI
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCIE_DW_PLAT
+	help
+	  Enables support for the PCIe controller in the Designware IP to
+	  work in endpoint mode. There are two instances of PCIe controller
+	  in Designware IP.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCIE_DW_PLAT_HOST must be selected and in
+	  order to enable device-specific features PCI_DW_PLAT_EP must be
+	  selected.
+
+config PCIE_QCOM
+	bool "Qualcomm PCIe controller (host mode)"
+	depends on OF && (ARCH_QCOM || COMPILE_TEST)
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select CRC8
+	help
+	  Say Y here to enable PCIe controller support on Qualcomm SoCs. The
+	  PCIe controller uses the DesignWare core plus Qualcomm-specific
+	  hardware wrappers.
+
+config PCIE_QCOM_EP
+	tristate "Qualcomm PCIe controller (endpoint mode)"
+	depends on OF && (ARCH_QCOM || COMPILE_TEST)
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	help
+	  Say Y here to enable support for the PCIe controllers on Qualcomm SoCs
+	  to work in endpoint mode. The PCIe controller uses the DesignWare core
+	  plus Qualcomm-specific hardware wrappers.
+
+config PCIE_ROCKCHIP_DW_HOST
+	bool "Rockchip DesignWare PCIe controller"
+	select PCIE_DW
+	select PCIE_DW_HOST
+	depends on PCI_MSI
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	depends on OF
+	help
+	  Enables support for the DesignWare PCIe controller in the
+	  Rockchip SoC except RK3399.
+
+config PCI_EXYNOS
+	tristate "Samsung Exynos PCIe controller"
+	depends on ARCH_EXYNOS || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Enables support for the PCIe controller in the Samsung Exynos SoCs
+	  to work in host mode. The PCI controller is based on the DesignWare
+	  hardware and therefore the driver re-uses the DesignWare core
+	  functions to implement the driver.
+
+config PCIE_FU740
+	bool "SiFive FU740 PCIe controller"
+	depends on PCI_MSI
+	depends on SOC_SIFIVE || COMPILE_TEST
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe controller support for the SiFive
+	  FU740.
+
+config PCIE_UNIPHIER
+	bool "Socionext UniPhier PCIe controller (host mode)"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe host controller support on UniPhier SoCs.
+	  This driver supports LD20 and PXs3 SoCs.
+
+config PCIE_UNIPHIER_EP
+	bool "Socionext UniPhier PCIe controller (endpoint mode)"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	help
+	  Say Y here if you want PCIe endpoint controller support on
+	  UniPhier SoCs. This driver supports Pro5 SoC.
+
+config PCIE_SPEAR13XX
+	bool "STMicroelectronics SPEAr PCIe controller"
+	depends on ARCH_SPEAR13XX || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe support on SPEAr13XX SoCs.
+
+config PCI_DRA7XX
+	tristate
+
+config PCI_DRA7XX_HOST
+	tristate "TI DRA7xx PCIe controller (host mode)"
+	depends on SOC_DRA7XX || COMPILE_TEST
+	depends on OF && HAS_IOMEM && TI_PIPE3
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCI_DRA7XX
+	default y if SOC_DRA7XX
+	help
+	  Enables support for the PCIe controller in the DRA7xx SoC to work in
+	  host mode. There are two instances of PCIe controller in DRA7xx.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCI_DRA7XX_HOST must be selected and in order
+	  to enable device-specific features PCI_DRA7XX_EP must be selected.
+	  This uses the DesignWare core.
+
+config PCI_DRA7XX_EP
+	tristate "TI DRA7xx PCIe controller (endpoint mode)"
+	depends on SOC_DRA7XX || COMPILE_TEST
+	depends on OF && HAS_IOMEM && TI_PIPE3
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCI_DRA7XX
+	help
+	  Enables support for the PCIe controller in the DRA7xx SoC to work in
+	  endpoint mode. There are two instances of PCIe controller in DRA7xx.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCI_DRA7XX_HOST must be selected and in order
+	  to enable device-specific features PCI_DRA7XX_EP must be selected.
+	  This uses the DesignWare core.
+
+config PCI_KEYSTONE
+	bool
+
+config PCI_KEYSTONE_HOST
+	bool "TI Keystone PCIe controller (host mode)"
+	depends on ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	select PCI_KEYSTONE
+	help
+	  Enables support for the PCIe controller in the Keystone SoC to
+	  work in host mode. The PCI controller on Keystone is based on
+	  DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
+
+config PCI_KEYSTONE_EP
+	bool "TI Keystone PCIe controller (endpoint mode)"
+	depends on ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCI_KEYSTONE
+	help
+	  Enables support for the PCIe controller in the Keystone SoC to
+	  work in endpoint mode. The PCI controller on Keystone is based
+	  on DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
+
+config PCIE_VISCONTI_HOST
+	bool "Toshiba Visconti PCIe controller"
+	depends on ARCH_VISCONTI || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
+	  This driver supports TMPV7708 SoC.
+
+endmenu
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
new file mode 100644
index 000000000..bf5c31187
--- /dev/null
+++ b/drivers/pci/controller/dwc/Makefile
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PCIE_DW) += pcie-designware.o
+obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
+obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
+obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
+obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
+obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
+obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
+obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
+obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
+obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
+obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o
+obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
+obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o
+obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
+obj-$(CONFIG_PCIE_QCOM_EP) += pcie-qcom-ep.o
+obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
+obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
+obj-$(CONFIG_PCIE_ROCKCHIP_DW_HOST) += pcie-dw-rockchip.o
+obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
+obj-$(CONFIG_PCIE_KEEMBAY) += pcie-keembay.o
+obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
+obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
+obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
+obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
+obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
+obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
+
+# The following drivers are for devices that use the generic ACPI
+# pci_root.c driver but don't support standard ECAM config access.
+# They contain MCFG quirks to replace the generic ECAM accessors with
+# device-specific ones that are shared with the DT driver.
+
+# The ACPI driver is generic and should not require driver-specific
+# config options to be enabled, so we always build these drivers on
+# ARM64 and use internal ifdefs to only build the pieces we need
+# depending on whether ACPI, the DT driver, or both are enabled.
+
+obj-$(CONFIG_PCIE_AL) += pcie-al.o
+obj-$(CONFIG_PCI_HISI) += pcie-hisi.o
+
+ifdef CONFIG_ACPI
+ifdef CONFIG_PCI_QUIRKS
+obj-$(CONFIG_ARM64) += pcie-al.o
+obj-$(CONFIG_ARM64) += pcie-hisi.o
+obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o
+endif
+endif
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
new file mode 100644
index 000000000..b445ffe95
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -0,0 +1,960 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pcie-dra7xx - PCIe controller driver for TI DRA7xx SoCs
+ *
+ * Copyright (C) 2013-2014 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Authors: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/gpio/consumer.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+/* PCIe controller wrapper DRA7XX configuration registers */
+
+#define	PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN		0x0024
+#define	PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN		0x0028
+#define	ERR_SYS						BIT(0)
+#define	ERR_FATAL					BIT(1)
+#define	ERR_NONFATAL					BIT(2)
+#define	ERR_COR						BIT(3)
+#define	ERR_AXI						BIT(4)
+#define	ERR_ECRC					BIT(5)
+#define	PME_TURN_OFF					BIT(8)
+#define	PME_TO_ACK					BIT(9)
+#define	PM_PME						BIT(10)
+#define	LINK_REQ_RST					BIT(11)
+#define	LINK_UP_EVT					BIT(12)
+#define	CFG_BME_EVT					BIT(13)
+#define	CFG_MSE_EVT					BIT(14)
+#define	INTERRUPTS (ERR_SYS | ERR_FATAL | ERR_NONFATAL | ERR_COR | ERR_AXI | \
+			ERR_ECRC | PME_TURN_OFF | PME_TO_ACK | PM_PME | \
+			LINK_REQ_RST | LINK_UP_EVT | CFG_BME_EVT | CFG_MSE_EVT)
+
+#define	PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI		0x0034
+#define	PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI		0x0038
+#define	INTA						BIT(0)
+#define	INTB						BIT(1)
+#define	INTC						BIT(2)
+#define	INTD						BIT(3)
+#define	MSI						BIT(4)
+#define	LEG_EP_INTERRUPTS (INTA | INTB | INTC | INTD)
+
+#define	PCIECTRL_TI_CONF_DEVICE_TYPE			0x0100
+#define	DEVICE_TYPE_EP					0x0
+#define	DEVICE_TYPE_LEG_EP				0x1
+#define	DEVICE_TYPE_RC					0x4
+
+#define	PCIECTRL_DRA7XX_CONF_DEVICE_CMD			0x0104
+#define	LTSSM_EN					0x1
+
+#define	PCIECTRL_DRA7XX_CONF_PHY_CS			0x010C
+#define	LINK_UP						BIT(16)
+#define	DRA7XX_CPU_TO_BUS_ADDR				0x0FFFFFFF
+
+#define	PCIECTRL_TI_CONF_INTX_ASSERT			0x0124
+#define	PCIECTRL_TI_CONF_INTX_DEASSERT			0x0128
+
+#define	PCIECTRL_TI_CONF_MSI_XMT			0x012c
+#define MSI_REQ_GRANT					BIT(0)
+#define MSI_VECTOR_SHIFT				7
+
+#define PCIE_1LANE_2LANE_SELECTION			BIT(13)
+#define PCIE_B1C0_MODE_SEL				BIT(2)
+#define PCIE_B0_B1_TSYNCEN				BIT(0)
+
+struct dra7xx_pcie {
+	struct dw_pcie		*pci;
+	void __iomem		*base;		/* DT ti_conf */
+	int			phy_count;	/* DT phy-names count */
+	struct phy		**phy;
+	struct irq_domain	*irq_domain;
+	struct clk              *clk;
+	enum dw_pcie_device_mode mode;
+};
+
+struct dra7xx_pcie_of_data {
+	enum dw_pcie_device_mode mode;
+	u32 b1co_mode_sel_mask;
+};
+
+#define to_dra7xx_pcie(x)	dev_get_drvdata((x)->dev)
+
+static inline u32 dra7xx_pcie_readl(struct dra7xx_pcie *pcie, u32 offset)
+{
+	return readl(pcie->base + offset);
+}
+
+static inline void dra7xx_pcie_writel(struct dra7xx_pcie *pcie, u32 offset,
+				      u32 value)
+{
+	writel(value, pcie->base + offset);
+}
+
+static u64 dra7xx_pcie_cpu_addr_fixup(struct dw_pcie *pci, u64 pci_addr)
+{
+	return pci_addr & DRA7XX_CPU_TO_BUS_ADDR;
+}
+
+static int dra7xx_pcie_link_up(struct dw_pcie *pci)
+{
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	u32 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_PHY_CS);
+
+	return !!(reg & LINK_UP);
+}
+
+static void dra7xx_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	u32 reg;
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
+	reg &= ~LTSSM_EN;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
+}
+
+static int dra7xx_pcie_establish_link(struct dw_pcie *pci)
+{
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	struct device *dev = pci->dev;
+	u32 reg;
+
+	if (dw_pcie_link_up(pci)) {
+		dev_err(dev, "link is already up\n");
+		return 0;
+	}
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
+	reg |= LTSSM_EN;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
+
+	return 0;
+}
+
+static void dra7xx_pcie_enable_msi_interrupts(struct dra7xx_pcie *dra7xx)
+{
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI,
+			   LEG_EP_INTERRUPTS | MSI);
+
+	dra7xx_pcie_writel(dra7xx,
+			   PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI,
+			   MSI | LEG_EP_INTERRUPTS);
+}
+
+static void dra7xx_pcie_enable_wrapper_interrupts(struct dra7xx_pcie *dra7xx)
+{
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN,
+			   INTERRUPTS);
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN,
+			   INTERRUPTS);
+}
+
+static void dra7xx_pcie_enable_interrupts(struct dra7xx_pcie *dra7xx)
+{
+	dra7xx_pcie_enable_wrapper_interrupts(dra7xx);
+	dra7xx_pcie_enable_msi_interrupts(dra7xx);
+}
+
+static int dra7xx_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+
+	dra7xx_pcie_enable_interrupts(dra7xx);
+
+	return 0;
+}
+
+static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = dra7xx_pcie_intx_map,
+	.xlate = pci_irqd_intx_xlate,
+};
+
+static int dra7xx_pcie_handle_msi(struct dw_pcie_rp *pp, int index)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned long val;
+	int pos;
+
+	val = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
+				   (index * MSI_REG_CTRL_BLOCK_SIZE));
+	if (!val)
+		return 0;
+
+	pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL);
+	while (pos != MAX_MSI_IRQS_PER_CTRL) {
+		generic_handle_domain_irq(pp->irq_domain,
+					  (index * MAX_MSI_IRQS_PER_CTRL) + pos);
+		pos++;
+		pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, pos);
+	}
+
+	return 1;
+}
+
+static void dra7xx_pcie_handle_msi_irq(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	int ret, i, count, num_ctrls;
+
+	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+
+	/**
+	 * Need to make sure all MSI status bits read 0 before exiting.
+	 * Else, new MSI IRQs are not registered by the wrapper. Have an
+	 * upperbound for the loop and exit the IRQ in case of IRQ flood
+	 * to avoid locking up system in interrupt context.
+	 */
+	count = 0;
+	do {
+		ret = 0;
+
+		for (i = 0; i < num_ctrls; i++)
+			ret |= dra7xx_pcie_handle_msi(pp, i);
+		count++;
+	} while (ret && count <= 1000);
+
+	if (count > 1000)
+		dev_warn_ratelimited(pci->dev,
+				     "Too many MSI IRQs to handle\n");
+}
+
+static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct dra7xx_pcie *dra7xx;
+	struct dw_pcie_rp *pp;
+	struct dw_pcie *pci;
+	unsigned long reg;
+	u32 bit;
+
+	chained_irq_enter(chip, desc);
+
+	pp = irq_desc_get_handler_data(desc);
+	pci = to_dw_pcie_from_pp(pp);
+	dra7xx = to_dra7xx_pcie(pci);
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
+
+	switch (reg) {
+	case MSI:
+		dra7xx_pcie_handle_msi_irq(pp);
+		break;
+	case INTA:
+	case INTB:
+	case INTC:
+	case INTD:
+		for_each_set_bit(bit, &reg, PCI_NUM_INTX)
+			generic_handle_domain_irq(dra7xx->irq_domain, bit);
+		break;
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
+{
+	struct dra7xx_pcie *dra7xx = arg;
+	struct dw_pcie *pci = dra7xx->pci;
+	struct device *dev = pci->dev;
+	struct dw_pcie_ep *ep = &pci->ep;
+	u32 reg;
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN);
+
+	if (reg & ERR_SYS)
+		dev_dbg(dev, "System Error\n");
+
+	if (reg & ERR_FATAL)
+		dev_dbg(dev, "Fatal Error\n");
+
+	if (reg & ERR_NONFATAL)
+		dev_dbg(dev, "Non Fatal Error\n");
+
+	if (reg & ERR_COR)
+		dev_dbg(dev, "Correctable Error\n");
+
+	if (reg & ERR_AXI)
+		dev_dbg(dev, "AXI tag lookup fatal Error\n");
+
+	if (reg & ERR_ECRC)
+		dev_dbg(dev, "ECRC Error\n");
+
+	if (reg & PME_TURN_OFF)
+		dev_dbg(dev,
+			"Power Management Event Turn-Off message received\n");
+
+	if (reg & PME_TO_ACK)
+		dev_dbg(dev,
+			"Power Management Turn-Off Ack message received\n");
+
+	if (reg & PM_PME)
+		dev_dbg(dev, "PM Power Management Event message received\n");
+
+	if (reg & LINK_REQ_RST)
+		dev_dbg(dev, "Link Request Reset\n");
+
+	if (reg & LINK_UP_EVT) {
+		if (dra7xx->mode == DW_PCIE_EP_TYPE)
+			dw_pcie_ep_linkup(ep);
+		dev_dbg(dev, "Link-up state change\n");
+	}
+
+	if (reg & CFG_BME_EVT)
+		dev_dbg(dev, "CFG 'Bus Master Enable' change\n");
+
+	if (reg & CFG_MSE_EVT)
+		dev_dbg(dev, "CFG 'Memory Space Enable' change\n");
+
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN, reg);
+
+	return IRQ_HANDLED;
+}
+
+static int dra7xx_pcie_init_irq_domain(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
+
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	irq_set_chained_handler_and_data(pp->irq, dra7xx_pcie_msi_irq_handler,
+					 pp);
+	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						   &intx_domain_ops, pp);
+	of_node_put(pcie_intc_node);
+	if (!dra7xx->irq_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
+	.host_init = dra7xx_pcie_host_init,
+};
+
+static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	enum pci_barno bar;
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+
+	dra7xx_pcie_enable_wrapper_interrupts(dra7xx);
+}
+
+static void dra7xx_pcie_raise_legacy_irq(struct dra7xx_pcie *dra7xx)
+{
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_INTX_ASSERT, 0x1);
+	mdelay(1);
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_INTX_DEASSERT, 0x1);
+}
+
+static void dra7xx_pcie_raise_msi_irq(struct dra7xx_pcie *dra7xx,
+				      u8 interrupt_num)
+{
+	u32 reg;
+
+	reg = (interrupt_num - 1) << MSI_VECTOR_SHIFT;
+	reg |= MSI_REQ_GRANT;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_MSI_XMT, reg);
+}
+
+static int dra7xx_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				 enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		dra7xx_pcie_raise_legacy_irq(dra7xx);
+		break;
+	case PCI_EPC_IRQ_MSI:
+		dra7xx_pcie_raise_msi_irq(dra7xx, interrupt_num);
+		break;
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features dra7xx_pcie_epc_features = {
+	.linkup_notifier = true,
+	.msi_capable = true,
+	.msix_capable = false,
+};
+
+static const struct pci_epc_features*
+dra7xx_pcie_get_features(struct dw_pcie_ep *ep)
+{
+	return &dra7xx_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
+	.ep_init = dra7xx_pcie_ep_init,
+	.raise_irq = dra7xx_pcie_raise_irq,
+	.get_features = dra7xx_pcie_get_features,
+};
+
+static int dra7xx_add_pcie_ep(struct dra7xx_pcie *dra7xx,
+			      struct platform_device *pdev)
+{
+	int ret;
+	struct dw_pcie_ep *ep;
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci = dra7xx->pci;
+
+	ep = &pci->ep;
+	ep->ops = &pcie_ep_ops;
+
+	pci->dbi_base = devm_platform_ioremap_resource_byname(pdev, "ep_dbics");
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	pci->dbi_base2 =
+		devm_platform_ioremap_resource_byname(pdev, "ep_dbics2");
+	if (IS_ERR(pci->dbi_base2))
+		return PTR_ERR(pci->dbi_base2);
+
+	ret = dw_pcie_ep_init(ep);
+	if (ret) {
+		dev_err(dev, "failed to initialize endpoint\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx,
+				struct platform_device *pdev)
+{
+	int ret;
+	struct dw_pcie *pci = dra7xx->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = pci->dev;
+
+	pp->irq = platform_get_irq(pdev, 1);
+	if (pp->irq < 0)
+		return pp->irq;
+
+	/* MSI IRQ is muxed */
+	pp->msi_irq[0] = -ENODEV;
+
+	ret = dra7xx_pcie_init_irq_domain(pp);
+	if (ret < 0)
+		return ret;
+
+	pci->dbi_base = devm_platform_ioremap_resource_byname(pdev, "rc_dbics");
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	pp->ops = &dra7xx_pcie_host_ops;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.cpu_addr_fixup = dra7xx_pcie_cpu_addr_fixup,
+	.start_link = dra7xx_pcie_establish_link,
+	.stop_link = dra7xx_pcie_stop_link,
+	.link_up = dra7xx_pcie_link_up,
+};
+
+static void dra7xx_pcie_disable_phy(struct dra7xx_pcie *dra7xx)
+{
+	int phy_count = dra7xx->phy_count;
+
+	while (phy_count--) {
+		phy_power_off(dra7xx->phy[phy_count]);
+		phy_exit(dra7xx->phy[phy_count]);
+	}
+}
+
+static int dra7xx_pcie_enable_phy(struct dra7xx_pcie *dra7xx)
+{
+	int phy_count = dra7xx->phy_count;
+	int ret;
+	int i;
+
+	for (i = 0; i < phy_count; i++) {
+		ret = phy_set_mode(dra7xx->phy[i], PHY_MODE_PCIE);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_init(dra7xx->phy[i]);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_power_on(dra7xx->phy[i]);
+		if (ret < 0) {
+			phy_exit(dra7xx->phy[i]);
+			goto err_phy;
+		}
+	}
+
+	return 0;
+
+err_phy:
+	while (--i >= 0) {
+		phy_power_off(dra7xx->phy[i]);
+		phy_exit(dra7xx->phy[i]);
+	}
+
+	return ret;
+}
+
+static const struct dra7xx_pcie_of_data dra7xx_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct dra7xx_pcie_of_data dra7xx_pcie_ep_of_data = {
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct dra7xx_pcie_of_data dra746_pcie_rc_of_data = {
+	.b1co_mode_sel_mask = BIT(2),
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct dra7xx_pcie_of_data dra726_pcie_rc_of_data = {
+	.b1co_mode_sel_mask = GENMASK(3, 2),
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct dra7xx_pcie_of_data dra746_pcie_ep_of_data = {
+	.b1co_mode_sel_mask = BIT(2),
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct dra7xx_pcie_of_data dra726_pcie_ep_of_data = {
+	.b1co_mode_sel_mask = GENMASK(3, 2),
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct of_device_id of_dra7xx_pcie_match[] = {
+	{
+		.compatible = "ti,dra7-pcie",
+		.data = &dra7xx_pcie_rc_of_data,
+	},
+	{
+		.compatible = "ti,dra7-pcie-ep",
+		.data = &dra7xx_pcie_ep_of_data,
+	},
+	{
+		.compatible = "ti,dra746-pcie-rc",
+		.data = &dra746_pcie_rc_of_data,
+	},
+	{
+		.compatible = "ti,dra726-pcie-rc",
+		.data = &dra726_pcie_rc_of_data,
+	},
+	{
+		.compatible = "ti,dra746-pcie-ep",
+		.data = &dra746_pcie_ep_of_data,
+	},
+	{
+		.compatible = "ti,dra726-pcie-ep",
+		.data = &dra726_pcie_ep_of_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_dra7xx_pcie_match);
+
+/*
+ * dra7xx_pcie_unaligned_memaccess: workaround for AM572x/AM571x Errata i870
+ * @dra7xx: the dra7xx device where the workaround should be applied
+ *
+ * Access to the PCIe slave port that are not 32-bit aligned will result
+ * in incorrect mapping to TLP Address and Byte enable fields. Therefore,
+ * byte and half-word accesses are not possible to byte offset 0x1, 0x2, or
+ * 0x3.
+ *
+ * To avoid this issue set PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE to 1.
+ */
+static int dra7xx_pcie_unaligned_memaccess(struct device *dev)
+{
+	int ret;
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	struct regmap *regmap;
+
+	regmap = syscon_regmap_lookup_by_phandle(np,
+						 "ti,syscon-unaligned-access");
+	if (IS_ERR(regmap)) {
+		dev_dbg(dev, "can't get ti,syscon-unaligned-access\n");
+		return -EINVAL;
+	}
+
+	ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-unaligned-access",
+					       2, 0, &args);
+	if (ret) {
+		dev_err(dev, "failed to parse ti,syscon-unaligned-access\n");
+		return ret;
+	}
+
+	ret = regmap_update_bits(regmap, args.args[0], args.args[1],
+				 args.args[1]);
+	if (ret)
+		dev_err(dev, "failed to enable unaligned access\n");
+
+	of_node_put(args.np);
+
+	return ret;
+}
+
+static int dra7xx_pcie_configure_two_lane(struct device *dev,
+					  u32 b1co_mode_sel_mask)
+{
+	struct device_node *np = dev->of_node;
+	struct regmap *pcie_syscon;
+	unsigned int pcie_reg;
+	u32 mask;
+	u32 val;
+
+	pcie_syscon = syscon_regmap_lookup_by_phandle(np, "ti,syscon-lane-sel");
+	if (IS_ERR(pcie_syscon)) {
+		dev_err(dev, "unable to get ti,syscon-lane-sel\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_index(np, "ti,syscon-lane-sel", 1,
+				       &pcie_reg)) {
+		dev_err(dev, "couldn't get lane selection reg offset\n");
+		return -EINVAL;
+	}
+
+	mask = b1co_mode_sel_mask | PCIE_B0_B1_TSYNCEN;
+	val = PCIE_B1C0_MODE_SEL | PCIE_B0_B1_TSYNCEN;
+	regmap_update_bits(pcie_syscon, pcie_reg, mask, val);
+
+	return 0;
+}
+
+static int dra7xx_pcie_probe(struct platform_device *pdev)
+{
+	u32 reg;
+	int ret;
+	int irq;
+	int i;
+	int phy_count;
+	struct phy **phy;
+	struct device_link **link;
+	void __iomem *base;
+	struct dw_pcie *pci;
+	struct dra7xx_pcie *dra7xx;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	char name[10];
+	struct gpio_desc *reset;
+	const struct dra7xx_pcie_of_data *data;
+	enum dw_pcie_device_mode mode;
+	u32 b1co_mode_sel_mask;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	mode = (enum dw_pcie_device_mode)data->mode;
+	b1co_mode_sel_mask = data->b1co_mode_sel_mask;
+
+	dra7xx = devm_kzalloc(dev, sizeof(*dra7xx), GFP_KERNEL);
+	if (!dra7xx)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	base = devm_platform_ioremap_resource_byname(pdev, "ti_conf");
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	phy_count = of_property_count_strings(np, "phy-names");
+	if (phy_count < 0) {
+		dev_err(dev, "unable to find the strings\n");
+		return phy_count;
+	}
+
+	phy = devm_kcalloc(dev, phy_count, sizeof(*phy), GFP_KERNEL);
+	if (!phy)
+		return -ENOMEM;
+
+	link = devm_kcalloc(dev, phy_count, sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	dra7xx->clk = devm_clk_get_optional(dev, NULL);
+	if (IS_ERR(dra7xx->clk))
+		return dev_err_probe(dev, PTR_ERR(dra7xx->clk),
+				     "clock request failed");
+
+	ret = clk_prepare_enable(dra7xx->clk);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < phy_count; i++) {
+		snprintf(name, sizeof(name), "pcie-phy%d", i);
+		phy[i] = devm_phy_get(dev, name);
+		if (IS_ERR(phy[i]))
+			return PTR_ERR(phy[i]);
+
+		link[i] = device_link_add(dev, &phy[i]->dev, DL_FLAG_STATELESS);
+		if (!link[i]) {
+			ret = -EINVAL;
+			goto err_link;
+		}
+	}
+
+	dra7xx->base = base;
+	dra7xx->phy = phy;
+	dra7xx->pci = pci;
+	dra7xx->phy_count = phy_count;
+
+	if (phy_count == 2) {
+		ret = dra7xx_pcie_configure_two_lane(dev, b1co_mode_sel_mask);
+		if (ret < 0)
+			dra7xx->phy_count = 1; /* Fallback to x1 lane mode */
+	}
+
+	ret = dra7xx_pcie_enable_phy(dra7xx);
+	if (ret) {
+		dev_err(dev, "failed to enable phy\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, dra7xx);
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync failed\n");
+		goto err_get_sync;
+	}
+
+	reset = devm_gpiod_get_optional(dev, NULL, GPIOD_OUT_HIGH);
+	if (IS_ERR(reset)) {
+		ret = PTR_ERR(reset);
+		dev_err(&pdev->dev, "gpio request failed, ret %d\n", ret);
+		goto err_gpio;
+	}
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
+	reg &= ~LTSSM_EN;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
+
+	switch (mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCI_DRA7XX_HOST)) {
+			ret = -ENODEV;
+			goto err_gpio;
+		}
+
+		dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE,
+				   DEVICE_TYPE_RC);
+
+		ret = dra7xx_pcie_unaligned_memaccess(dev);
+		if (ret)
+			dev_err(dev, "WA for Errata i870 not applied\n");
+
+		ret = dra7xx_add_pcie_port(dra7xx, pdev);
+		if (ret < 0)
+			goto err_gpio;
+		break;
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCI_DRA7XX_EP)) {
+			ret = -ENODEV;
+			goto err_gpio;
+		}
+
+		dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE,
+				   DEVICE_TYPE_EP);
+
+		ret = dra7xx_pcie_unaligned_memaccess(dev);
+		if (ret)
+			goto err_gpio;
+
+		ret = dra7xx_add_pcie_ep(dra7xx, pdev);
+		if (ret < 0)
+			goto err_gpio;
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", mode);
+	}
+	dra7xx->mode = mode;
+
+	ret = devm_request_threaded_irq(dev, irq, NULL, dra7xx_pcie_irq_handler,
+			       IRQF_SHARED, "dra7xx-pcie-main", dra7xx);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		goto err_gpio;
+	}
+
+	return 0;
+
+err_gpio:
+err_get_sync:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+	dra7xx_pcie_disable_phy(dra7xx);
+
+err_link:
+	while (--i >= 0)
+		device_link_del(link[i]);
+
+	return ret;
+}
+
+static int dra7xx_pcie_suspend(struct device *dev)
+{
+	struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev);
+	struct dw_pcie *pci = dra7xx->pci;
+	u32 val;
+
+	if (dra7xx->mode != DW_PCIE_RC_TYPE)
+		return 0;
+
+	/* clear MSE */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	val &= ~PCI_COMMAND_MEMORY;
+	dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+	return 0;
+}
+
+static int dra7xx_pcie_resume(struct device *dev)
+{
+	struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev);
+	struct dw_pcie *pci = dra7xx->pci;
+	u32 val;
+
+	if (dra7xx->mode != DW_PCIE_RC_TYPE)
+		return 0;
+
+	/* set MSE */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	val |= PCI_COMMAND_MEMORY;
+	dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+	return 0;
+}
+
+static int dra7xx_pcie_suspend_noirq(struct device *dev)
+{
+	struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev);
+
+	dra7xx_pcie_disable_phy(dra7xx);
+
+	return 0;
+}
+
+static int dra7xx_pcie_resume_noirq(struct device *dev)
+{
+	struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev);
+	int ret;
+
+	ret = dra7xx_pcie_enable_phy(dra7xx);
+	if (ret) {
+		dev_err(dev, "failed to enable phy\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void dra7xx_pcie_shutdown(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev);
+	int ret;
+
+	dra7xx_pcie_stop_link(dra7xx->pci);
+
+	ret = pm_runtime_put_sync(dev);
+	if (ret < 0)
+		dev_dbg(dev, "pm_runtime_put_sync failed\n");
+
+	pm_runtime_disable(dev);
+	dra7xx_pcie_disable_phy(dra7xx);
+
+	clk_disable_unprepare(dra7xx->clk);
+}
+
+static const struct dev_pm_ops dra7xx_pcie_pm_ops = {
+	SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend, dra7xx_pcie_resume)
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend_noirq,
+				  dra7xx_pcie_resume_noirq)
+};
+
+static struct platform_driver dra7xx_pcie_driver = {
+	.probe = dra7xx_pcie_probe,
+	.driver = {
+		.name	= "dra7-pcie",
+		.of_match_table = of_dra7xx_pcie_match,
+		.suppress_bind_attrs = true,
+		.pm	= &dra7xx_pcie_pm_ops,
+	},
+	.shutdown = dra7xx_pcie_shutdown,
+};
+module_platform_driver(dra7xx_pcie_driver);
+
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_DESCRIPTION("PCIe controller driver for TI DRA7xx SoCs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c
new file mode 100644
index 000000000..c6bede346
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-exynos.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Samsung Exynos SoCs
+ *
+ * Copyright (C) 2013-2020 Samsung Electronics Co., Ltd.
+ *		https://www.samsung.com
+ *
+ * Author: Jingoo Han <jg1.han@samsung.com>
+ *	   Jaehoon Chung <jh80.chung@samsung.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+
+#include "pcie-designware.h"
+
+#define to_exynos_pcie(x)	dev_get_drvdata((x)->dev)
+
+/* PCIe ELBI registers */
+#define PCIE_IRQ_PULSE			0x000
+#define IRQ_INTA_ASSERT			BIT(0)
+#define IRQ_INTB_ASSERT			BIT(2)
+#define IRQ_INTC_ASSERT			BIT(4)
+#define IRQ_INTD_ASSERT			BIT(6)
+#define PCIE_IRQ_LEVEL			0x004
+#define PCIE_IRQ_SPECIAL		0x008
+#define PCIE_IRQ_EN_PULSE		0x00c
+#define PCIE_IRQ_EN_LEVEL		0x010
+#define PCIE_IRQ_EN_SPECIAL		0x014
+#define PCIE_SW_WAKE			0x018
+#define PCIE_BUS_EN			BIT(1)
+#define PCIE_CORE_RESET			0x01c
+#define PCIE_CORE_RESET_ENABLE		BIT(0)
+#define PCIE_STICKY_RESET		0x020
+#define PCIE_NONSTICKY_RESET		0x024
+#define PCIE_APP_INIT_RESET		0x028
+#define PCIE_APP_LTSSM_ENABLE		0x02c
+#define PCIE_ELBI_RDLH_LINKUP		0x074
+#define PCIE_ELBI_XMLH_LINKUP		BIT(4)
+#define PCIE_ELBI_LTSSM_ENABLE		0x1
+#define PCIE_ELBI_SLV_AWMISC		0x11c
+#define PCIE_ELBI_SLV_ARMISC		0x120
+#define PCIE_ELBI_SLV_DBI_ENABLE	BIT(21)
+
+struct exynos_pcie {
+	struct dw_pcie			pci;
+	void __iomem			*elbi_base;
+	struct clk			*clk;
+	struct clk			*bus_clk;
+	struct phy			*phy;
+	struct regulator_bulk_data	supplies[2];
+};
+
+static int exynos_pcie_init_clk_resources(struct exynos_pcie *ep)
+{
+	struct device *dev = ep->pci.dev;
+	int ret;
+
+	ret = clk_prepare_enable(ep->clk);
+	if (ret) {
+		dev_err(dev, "cannot enable pcie rc clock");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ep->bus_clk);
+	if (ret) {
+		dev_err(dev, "cannot enable pcie bus clock");
+		goto err_bus_clk;
+	}
+
+	return 0;
+
+err_bus_clk:
+	clk_disable_unprepare(ep->clk);
+
+	return ret;
+}
+
+static void exynos_pcie_deinit_clk_resources(struct exynos_pcie *ep)
+{
+	clk_disable_unprepare(ep->bus_clk);
+	clk_disable_unprepare(ep->clk);
+}
+
+static void exynos_pcie_writel(void __iomem *base, u32 val, u32 reg)
+{
+	writel(val, base + reg);
+}
+
+static u32 exynos_pcie_readl(void __iomem *base, u32 reg)
+{
+	return readl(base + reg);
+}
+
+static void exynos_pcie_sideband_dbi_w_mode(struct exynos_pcie *ep, bool on)
+{
+	u32 val;
+
+	val = exynos_pcie_readl(ep->elbi_base, PCIE_ELBI_SLV_AWMISC);
+	if (on)
+		val |= PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val &= ~PCIE_ELBI_SLV_DBI_ENABLE;
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_ELBI_SLV_AWMISC);
+}
+
+static void exynos_pcie_sideband_dbi_r_mode(struct exynos_pcie *ep, bool on)
+{
+	u32 val;
+
+	val = exynos_pcie_readl(ep->elbi_base, PCIE_ELBI_SLV_ARMISC);
+	if (on)
+		val |= PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val &= ~PCIE_ELBI_SLV_DBI_ENABLE;
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_ELBI_SLV_ARMISC);
+}
+
+static void exynos_pcie_assert_core_reset(struct exynos_pcie *ep)
+{
+	u32 val;
+
+	val = exynos_pcie_readl(ep->elbi_base, PCIE_CORE_RESET);
+	val &= ~PCIE_CORE_RESET_ENABLE;
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_CORE_RESET);
+	exynos_pcie_writel(ep->elbi_base, 0, PCIE_STICKY_RESET);
+	exynos_pcie_writel(ep->elbi_base, 0, PCIE_NONSTICKY_RESET);
+}
+
+static void exynos_pcie_deassert_core_reset(struct exynos_pcie *ep)
+{
+	u32 val;
+
+	val = exynos_pcie_readl(ep->elbi_base, PCIE_CORE_RESET);
+	val |= PCIE_CORE_RESET_ENABLE;
+
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_CORE_RESET);
+	exynos_pcie_writel(ep->elbi_base, 1, PCIE_STICKY_RESET);
+	exynos_pcie_writel(ep->elbi_base, 1, PCIE_NONSTICKY_RESET);
+	exynos_pcie_writel(ep->elbi_base, 1, PCIE_APP_INIT_RESET);
+	exynos_pcie_writel(ep->elbi_base, 0, PCIE_APP_INIT_RESET);
+}
+
+static int exynos_pcie_start_link(struct dw_pcie *pci)
+{
+	struct exynos_pcie *ep = to_exynos_pcie(pci);
+	u32 val;
+
+	val = exynos_pcie_readl(ep->elbi_base, PCIE_SW_WAKE);
+	val &= ~PCIE_BUS_EN;
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_SW_WAKE);
+
+	/* assert LTSSM enable */
+	exynos_pcie_writel(ep->elbi_base, PCIE_ELBI_LTSSM_ENABLE,
+			  PCIE_APP_LTSSM_ENABLE);
+	return 0;
+}
+
+static void exynos_pcie_clear_irq_pulse(struct exynos_pcie *ep)
+{
+	u32 val = exynos_pcie_readl(ep->elbi_base, PCIE_IRQ_PULSE);
+
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_IRQ_PULSE);
+}
+
+static irqreturn_t exynos_pcie_irq_handler(int irq, void *arg)
+{
+	struct exynos_pcie *ep = arg;
+
+	exynos_pcie_clear_irq_pulse(ep);
+	return IRQ_HANDLED;
+}
+
+static void exynos_pcie_enable_irq_pulse(struct exynos_pcie *ep)
+{
+	u32 val = IRQ_INTA_ASSERT | IRQ_INTB_ASSERT |
+		  IRQ_INTC_ASSERT | IRQ_INTD_ASSERT;
+
+	exynos_pcie_writel(ep->elbi_base, val, PCIE_IRQ_EN_PULSE);
+	exynos_pcie_writel(ep->elbi_base, 0, PCIE_IRQ_EN_LEVEL);
+	exynos_pcie_writel(ep->elbi_base, 0, PCIE_IRQ_EN_SPECIAL);
+}
+
+static u32 exynos_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base,
+				u32 reg, size_t size)
+{
+	struct exynos_pcie *ep = to_exynos_pcie(pci);
+	u32 val;
+
+	exynos_pcie_sideband_dbi_r_mode(ep, true);
+	dw_pcie_read(base + reg, size, &val);
+	exynos_pcie_sideband_dbi_r_mode(ep, false);
+	return val;
+}
+
+static void exynos_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base,
+				  u32 reg, size_t size, u32 val)
+{
+	struct exynos_pcie *ep = to_exynos_pcie(pci);
+
+	exynos_pcie_sideband_dbi_w_mode(ep, true);
+	dw_pcie_write(base + reg, size, val);
+	exynos_pcie_sideband_dbi_w_mode(ep, false);
+}
+
+static int exynos_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 *val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*val = dw_pcie_read_dbi(pci, where, size);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int exynos_pcie_wr_own_conf(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	dw_pcie_write_dbi(pci, where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops exynos_pci_ops = {
+	.read = exynos_pcie_rd_own_conf,
+	.write = exynos_pcie_wr_own_conf,
+};
+
+static int exynos_pcie_link_up(struct dw_pcie *pci)
+{
+	struct exynos_pcie *ep = to_exynos_pcie(pci);
+	u32 val = exynos_pcie_readl(ep->elbi_base, PCIE_ELBI_RDLH_LINKUP);
+
+	return (val & PCIE_ELBI_XMLH_LINKUP);
+}
+
+static int exynos_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct exynos_pcie *ep = to_exynos_pcie(pci);
+
+	pp->bridge->ops = &exynos_pci_ops;
+
+	exynos_pcie_assert_core_reset(ep);
+
+	phy_init(ep->phy);
+	phy_power_on(ep->phy);
+
+	exynos_pcie_deassert_core_reset(ep);
+	exynos_pcie_enable_irq_pulse(ep);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops exynos_pcie_host_ops = {
+	.host_init = exynos_pcie_host_init,
+};
+
+static int exynos_add_pcie_port(struct exynos_pcie *ep,
+				       struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &ep->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->irq = platform_get_irq(pdev, 0);
+	if (pp->irq < 0)
+		return pp->irq;
+
+	ret = devm_request_irq(dev, pp->irq, exynos_pcie_irq_handler,
+			       IRQF_SHARED, "exynos-pcie", ep);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		return ret;
+	}
+
+	pp->ops = &exynos_pcie_host_ops;
+	pp->msi_irq[0] = -ENODEV;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.read_dbi = exynos_pcie_read_dbi,
+	.write_dbi = exynos_pcie_write_dbi,
+	.link_up = exynos_pcie_link_up,
+	.start_link = exynos_pcie_start_link,
+};
+
+static int exynos_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct exynos_pcie *ep;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	ep->pci.dev = dev;
+	ep->pci.ops = &dw_pcie_ops;
+
+	ep->phy = devm_of_phy_get(dev, np, NULL);
+	if (IS_ERR(ep->phy))
+		return PTR_ERR(ep->phy);
+
+	/* External Local Bus interface (ELBI) registers */
+	ep->elbi_base = devm_platform_ioremap_resource_byname(pdev, "elbi");
+	if (IS_ERR(ep->elbi_base))
+		return PTR_ERR(ep->elbi_base);
+
+	ep->clk = devm_clk_get(dev, "pcie");
+	if (IS_ERR(ep->clk)) {
+		dev_err(dev, "Failed to get pcie rc clock\n");
+		return PTR_ERR(ep->clk);
+	}
+
+	ep->bus_clk = devm_clk_get(dev, "pcie_bus");
+	if (IS_ERR(ep->bus_clk)) {
+		dev_err(dev, "Failed to get pcie bus clock\n");
+		return PTR_ERR(ep->bus_clk);
+	}
+
+	ep->supplies[0].supply = "vdd18";
+	ep->supplies[1].supply = "vdd10";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ep->supplies),
+				      ep->supplies);
+	if (ret)
+		return ret;
+
+	ret = exynos_pcie_init_clk_resources(ep);
+	if (ret)
+		return ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ep->supplies), ep->supplies);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, ep);
+
+	ret = exynos_add_pcie_port(ep, pdev);
+	if (ret < 0)
+		goto fail_probe;
+
+	return 0;
+
+fail_probe:
+	phy_exit(ep->phy);
+	exynos_pcie_deinit_clk_resources(ep);
+	regulator_bulk_disable(ARRAY_SIZE(ep->supplies), ep->supplies);
+
+	return ret;
+}
+
+static int exynos_pcie_remove(struct platform_device *pdev)
+{
+	struct exynos_pcie *ep = platform_get_drvdata(pdev);
+
+	dw_pcie_host_deinit(&ep->pci.pp);
+	exynos_pcie_assert_core_reset(ep);
+	phy_power_off(ep->phy);
+	phy_exit(ep->phy);
+	exynos_pcie_deinit_clk_resources(ep);
+	regulator_bulk_disable(ARRAY_SIZE(ep->supplies), ep->supplies);
+
+	return 0;
+}
+
+static int exynos_pcie_suspend_noirq(struct device *dev)
+{
+	struct exynos_pcie *ep = dev_get_drvdata(dev);
+
+	exynos_pcie_assert_core_reset(ep);
+	phy_power_off(ep->phy);
+	phy_exit(ep->phy);
+	regulator_bulk_disable(ARRAY_SIZE(ep->supplies), ep->supplies);
+
+	return 0;
+}
+
+static int exynos_pcie_resume_noirq(struct device *dev)
+{
+	struct exynos_pcie *ep = dev_get_drvdata(dev);
+	struct dw_pcie *pci = &ep->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ep->supplies), ep->supplies);
+	if (ret)
+		return ret;
+
+	/* exynos_pcie_host_init controls ep->phy */
+	exynos_pcie_host_init(pp);
+	dw_pcie_setup_rc(pp);
+	exynos_pcie_start_link(pci);
+	return dw_pcie_wait_for_link(pci);
+}
+
+static const struct dev_pm_ops exynos_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(exynos_pcie_suspend_noirq,
+				  exynos_pcie_resume_noirq)
+};
+
+static const struct of_device_id exynos_pcie_of_match[] = {
+	{ .compatible = "samsung,exynos5433-pcie", },
+	{ },
+};
+
+static struct platform_driver exynos_pcie_driver = {
+	.probe		= exynos_pcie_probe,
+	.remove		= exynos_pcie_remove,
+	.driver = {
+		.name	= "exynos-pcie",
+		.of_match_table = exynos_pcie_of_match,
+		.pm		= &exynos_pcie_pm_ops,
+	},
+};
+module_platform_driver(exynos_pcie_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, exynos_pcie_of_match);
diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
new file mode 100644
index 000000000..74703362a
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -0,0 +1,1612 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Freescale i.MX6 SoCs
+ *
+ * Copyright (C) 2013 Kosagi
+ *		https://www.kosagi.com
+ *
+ * Author: Sean Cross <xobs@kosagi.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include <linux/mfd/syscon/imx7-iomuxc-gpr.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/resource.h>
+#include <linux/signal.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/reset.h>
+#include <linux/phy/phy.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include "pcie-designware.h"
+
+#define IMX8MQ_GPR_PCIE_REF_USE_PAD		BIT(9)
+#define IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE_EN	BIT(10)
+#define IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE	BIT(11)
+#define IMX8MQ_GPR_PCIE_VREG_BYPASS		BIT(12)
+#define IMX8MQ_GPR12_PCIE2_CTRL_DEVICE_TYPE	GENMASK(11, 8)
+#define IMX8MQ_PCIE2_BASE_ADDR			0x33c00000
+
+#define to_imx6_pcie(x)	dev_get_drvdata((x)->dev)
+
+enum imx6_pcie_variants {
+	IMX6Q,
+	IMX6SX,
+	IMX6QP,
+	IMX7D,
+	IMX8MQ,
+	IMX8MM,
+	IMX8MP,
+	IMX8MQ_EP,
+	IMX8MM_EP,
+	IMX8MP_EP,
+};
+
+#define IMX6_PCIE_FLAG_IMX6_PHY			BIT(0)
+#define IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE	BIT(1)
+#define IMX6_PCIE_FLAG_SUPPORTS_SUSPEND		BIT(2)
+
+struct imx6_pcie_drvdata {
+	enum imx6_pcie_variants variant;
+	enum dw_pcie_device_mode mode;
+	u32 flags;
+	int dbi_length;
+	const char *gpr;
+};
+
+struct imx6_pcie {
+	struct dw_pcie		*pci;
+	int			reset_gpio;
+	bool			gpio_active_high;
+	bool			link_is_up;
+	struct clk		*pcie_bus;
+	struct clk		*pcie_phy;
+	struct clk		*pcie_inbound_axi;
+	struct clk		*pcie;
+	struct clk		*pcie_aux;
+	struct regmap		*iomuxc_gpr;
+	u16			msi_ctrl;
+	u32			controller_id;
+	struct reset_control	*pciephy_reset;
+	struct reset_control	*apps_reset;
+	struct reset_control	*turnoff_reset;
+	u32			tx_deemph_gen1;
+	u32			tx_deemph_gen2_3p5db;
+	u32			tx_deemph_gen2_6db;
+	u32			tx_swing_full;
+	u32			tx_swing_low;
+	struct regulator	*vpcie;
+	struct regulator	*vph;
+	void __iomem		*phy_base;
+
+	/* power domain for pcie */
+	struct device		*pd_pcie;
+	/* power domain for pcie phy */
+	struct device		*pd_pcie_phy;
+	struct phy		*phy;
+	const struct imx6_pcie_drvdata *drvdata;
+};
+
+/* Parameters for the waiting for PCIe PHY PLL to lock on i.MX7 */
+#define PHY_PLL_LOCK_WAIT_USLEEP_MAX	200
+#define PHY_PLL_LOCK_WAIT_TIMEOUT	(2000 * PHY_PLL_LOCK_WAIT_USLEEP_MAX)
+
+/* PCIe Port Logic registers (memory-mapped) */
+#define PL_OFFSET 0x700
+
+#define PCIE_PHY_CTRL (PL_OFFSET + 0x114)
+#define PCIE_PHY_CTRL_DATA(x)		FIELD_PREP(GENMASK(15, 0), (x))
+#define PCIE_PHY_CTRL_CAP_ADR		BIT(16)
+#define PCIE_PHY_CTRL_CAP_DAT		BIT(17)
+#define PCIE_PHY_CTRL_WR		BIT(18)
+#define PCIE_PHY_CTRL_RD		BIT(19)
+
+#define PCIE_PHY_STAT (PL_OFFSET + 0x110)
+#define PCIE_PHY_STAT_ACK		BIT(16)
+
+/* PHY registers (not memory-mapped) */
+#define PCIE_PHY_ATEOVRD			0x10
+#define  PCIE_PHY_ATEOVRD_EN			BIT(2)
+#define  PCIE_PHY_ATEOVRD_REF_CLKDIV_SHIFT	0
+#define  PCIE_PHY_ATEOVRD_REF_CLKDIV_MASK	0x1
+
+#define PCIE_PHY_MPLL_OVRD_IN_LO		0x11
+#define  PCIE_PHY_MPLL_MULTIPLIER_SHIFT		2
+#define  PCIE_PHY_MPLL_MULTIPLIER_MASK		0x7f
+#define  PCIE_PHY_MPLL_MULTIPLIER_OVRD		BIT(9)
+
+#define PCIE_PHY_RX_ASIC_OUT 0x100D
+#define PCIE_PHY_RX_ASIC_OUT_VALID	(1 << 0)
+
+/* iMX7 PCIe PHY registers */
+#define PCIE_PHY_CMN_REG4		0x14
+/* These are probably the bits that *aren't* DCC_FB_EN */
+#define PCIE_PHY_CMN_REG4_DCC_FB_EN	0x29
+
+#define PCIE_PHY_CMN_REG15	        0x54
+#define PCIE_PHY_CMN_REG15_DLY_4	BIT(2)
+#define PCIE_PHY_CMN_REG15_PLL_PD	BIT(5)
+#define PCIE_PHY_CMN_REG15_OVRD_PLL_PD	BIT(7)
+
+#define PCIE_PHY_CMN_REG24		0x90
+#define PCIE_PHY_CMN_REG24_RX_EQ	BIT(6)
+#define PCIE_PHY_CMN_REG24_RX_EQ_SEL	BIT(3)
+
+#define PCIE_PHY_CMN_REG26		0x98
+#define PCIE_PHY_CMN_REG26_ATT_MODE	0xBC
+
+#define PHY_RX_OVRD_IN_LO 0x1005
+#define PHY_RX_OVRD_IN_LO_RX_DATA_EN		BIT(5)
+#define PHY_RX_OVRD_IN_LO_RX_PLL_EN		BIT(3)
+
+static unsigned int imx6_pcie_grp_offset(const struct imx6_pcie *imx6_pcie)
+{
+	WARN_ON(imx6_pcie->drvdata->variant != IMX8MQ &&
+		imx6_pcie->drvdata->variant != IMX8MQ_EP &&
+		imx6_pcie->drvdata->variant != IMX8MM &&
+		imx6_pcie->drvdata->variant != IMX8MM_EP &&
+		imx6_pcie->drvdata->variant != IMX8MP &&
+		imx6_pcie->drvdata->variant != IMX8MP_EP);
+	return imx6_pcie->controller_id == 1 ? IOMUXC_GPR16 : IOMUXC_GPR14;
+}
+
+static void imx6_pcie_configure_type(struct imx6_pcie *imx6_pcie)
+{
+	unsigned int mask, val, mode;
+
+	if (imx6_pcie->drvdata->mode == DW_PCIE_EP_TYPE)
+		mode = PCI_EXP_TYPE_ENDPOINT;
+	else
+		mode = PCI_EXP_TYPE_ROOT_PORT;
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX8MQ:
+	case IMX8MQ_EP:
+		if (imx6_pcie->controller_id == 1) {
+			mask = IMX8MQ_GPR12_PCIE2_CTRL_DEVICE_TYPE;
+			val  = FIELD_PREP(IMX8MQ_GPR12_PCIE2_CTRL_DEVICE_TYPE,
+					  mode);
+		} else {
+			mask = IMX6Q_GPR12_DEVICE_TYPE;
+			val  = FIELD_PREP(IMX6Q_GPR12_DEVICE_TYPE, mode);
+		}
+		break;
+	default:
+		mask = IMX6Q_GPR12_DEVICE_TYPE;
+		val  = FIELD_PREP(IMX6Q_GPR12_DEVICE_TYPE, mode);
+		break;
+	}
+
+	regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, mask, val);
+}
+
+static int pcie_phy_poll_ack(struct imx6_pcie *imx6_pcie, bool exp_val)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	bool val;
+	u32 max_iterations = 10;
+	u32 wait_counter = 0;
+
+	do {
+		val = dw_pcie_readl_dbi(pci, PCIE_PHY_STAT) &
+			PCIE_PHY_STAT_ACK;
+		wait_counter++;
+
+		if (val == exp_val)
+			return 0;
+
+		udelay(1);
+	} while (wait_counter < max_iterations);
+
+	return -ETIMEDOUT;
+}
+
+static int pcie_phy_wait_ack(struct imx6_pcie *imx6_pcie, int addr)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	u32 val;
+	int ret;
+
+	val = PCIE_PHY_CTRL_DATA(addr);
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, val);
+
+	val |= PCIE_PHY_CTRL_CAP_ADR;
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, val);
+
+	ret = pcie_phy_poll_ack(imx6_pcie, true);
+	if (ret)
+		return ret;
+
+	val = PCIE_PHY_CTRL_DATA(addr);
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, val);
+
+	return pcie_phy_poll_ack(imx6_pcie, false);
+}
+
+/* Read from the 16-bit PCIe PHY control registers (not memory-mapped) */
+static int pcie_phy_read(struct imx6_pcie *imx6_pcie, int addr, u16 *data)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	u32 phy_ctl;
+	int ret;
+
+	ret = pcie_phy_wait_ack(imx6_pcie, addr);
+	if (ret)
+		return ret;
+
+	/* assert Read signal */
+	phy_ctl = PCIE_PHY_CTRL_RD;
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, phy_ctl);
+
+	ret = pcie_phy_poll_ack(imx6_pcie, true);
+	if (ret)
+		return ret;
+
+	*data = dw_pcie_readl_dbi(pci, PCIE_PHY_STAT);
+
+	/* deassert Read signal */
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, 0x00);
+
+	return pcie_phy_poll_ack(imx6_pcie, false);
+}
+
+static int pcie_phy_write(struct imx6_pcie *imx6_pcie, int addr, u16 data)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	u32 var;
+	int ret;
+
+	/* write addr */
+	/* cap addr */
+	ret = pcie_phy_wait_ack(imx6_pcie, addr);
+	if (ret)
+		return ret;
+
+	var = PCIE_PHY_CTRL_DATA(data);
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, var);
+
+	/* capture data */
+	var |= PCIE_PHY_CTRL_CAP_DAT;
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, var);
+
+	ret = pcie_phy_poll_ack(imx6_pcie, true);
+	if (ret)
+		return ret;
+
+	/* deassert cap data */
+	var = PCIE_PHY_CTRL_DATA(data);
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, var);
+
+	/* wait for ack de-assertion */
+	ret = pcie_phy_poll_ack(imx6_pcie, false);
+	if (ret)
+		return ret;
+
+	/* assert wr signal */
+	var = PCIE_PHY_CTRL_WR;
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, var);
+
+	/* wait for ack */
+	ret = pcie_phy_poll_ack(imx6_pcie, true);
+	if (ret)
+		return ret;
+
+	/* deassert wr signal */
+	var = PCIE_PHY_CTRL_DATA(data);
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, var);
+
+	/* wait for ack de-assertion */
+	ret = pcie_phy_poll_ack(imx6_pcie, false);
+	if (ret)
+		return ret;
+
+	dw_pcie_writel_dbi(pci, PCIE_PHY_CTRL, 0x0);
+
+	return 0;
+}
+
+static void imx6_pcie_init_phy(struct imx6_pcie *imx6_pcie)
+{
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		/*
+		 * The PHY initialization had been done in the PHY
+		 * driver, break here directly.
+		 */
+		break;
+	case IMX8MQ:
+	case IMX8MQ_EP:
+		/*
+		 * TODO: Currently this code assumes external
+		 * oscillator is being used
+		 */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr,
+				   imx6_pcie_grp_offset(imx6_pcie),
+				   IMX8MQ_GPR_PCIE_REF_USE_PAD,
+				   IMX8MQ_GPR_PCIE_REF_USE_PAD);
+		/*
+		 * Regarding the datasheet, the PCIE_VPH is suggested
+		 * to be 1.8V. If the PCIE_VPH is supplied by 3.3V, the
+		 * VREG_BYPASS should be cleared to zero.
+		 */
+		if (imx6_pcie->vph &&
+		    regulator_get_voltage(imx6_pcie->vph) > 3000000)
+			regmap_update_bits(imx6_pcie->iomuxc_gpr,
+					   imx6_pcie_grp_offset(imx6_pcie),
+					   IMX8MQ_GPR_PCIE_VREG_BYPASS,
+					   0);
+		break;
+	case IMX7D:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX7D_GPR12_PCIE_PHY_REFCLK_SEL, 0);
+		break;
+	case IMX6SX:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6SX_GPR12_PCIE_RX_EQ_MASK,
+				   IMX6SX_GPR12_PCIE_RX_EQ_2);
+		fallthrough;
+	default:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6Q_GPR12_PCIE_CTL_2, 0 << 10);
+
+		/* configure constant input signal to the pcie ctrl and phy */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6Q_GPR12_LOS_LEVEL, 9 << 4);
+
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR8,
+				   IMX6Q_GPR8_TX_DEEMPH_GEN1,
+				   imx6_pcie->tx_deemph_gen1 << 0);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR8,
+				   IMX6Q_GPR8_TX_DEEMPH_GEN2_3P5DB,
+				   imx6_pcie->tx_deemph_gen2_3p5db << 6);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR8,
+				   IMX6Q_GPR8_TX_DEEMPH_GEN2_6DB,
+				   imx6_pcie->tx_deemph_gen2_6db << 12);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR8,
+				   IMX6Q_GPR8_TX_SWING_FULL,
+				   imx6_pcie->tx_swing_full << 18);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR8,
+				   IMX6Q_GPR8_TX_SWING_LOW,
+				   imx6_pcie->tx_swing_low << 25);
+		break;
+	}
+
+	imx6_pcie_configure_type(imx6_pcie);
+}
+
+static void imx7d_pcie_wait_for_phy_pll_lock(struct imx6_pcie *imx6_pcie)
+{
+	u32 val;
+	struct device *dev = imx6_pcie->pci->dev;
+
+	if (regmap_read_poll_timeout(imx6_pcie->iomuxc_gpr,
+				     IOMUXC_GPR22, val,
+				     val & IMX7D_GPR22_PCIE_PHY_PLL_LOCKED,
+				     PHY_PLL_LOCK_WAIT_USLEEP_MAX,
+				     PHY_PLL_LOCK_WAIT_TIMEOUT))
+		dev_err(dev, "PCIe PLL lock timeout\n");
+}
+
+static int imx6_setup_phy_mpll(struct imx6_pcie *imx6_pcie)
+{
+	unsigned long phy_rate = clk_get_rate(imx6_pcie->pcie_phy);
+	int mult, div;
+	u16 val;
+
+	if (!(imx6_pcie->drvdata->flags & IMX6_PCIE_FLAG_IMX6_PHY))
+		return 0;
+
+	switch (phy_rate) {
+	case 125000000:
+		/*
+		 * The default settings of the MPLL are for a 125MHz input
+		 * clock, so no need to reconfigure anything in that case.
+		 */
+		return 0;
+	case 100000000:
+		mult = 25;
+		div = 0;
+		break;
+	case 200000000:
+		mult = 25;
+		div = 1;
+		break;
+	default:
+		dev_err(imx6_pcie->pci->dev,
+			"Unsupported PHY reference clock rate %lu\n", phy_rate);
+		return -EINVAL;
+	}
+
+	pcie_phy_read(imx6_pcie, PCIE_PHY_MPLL_OVRD_IN_LO, &val);
+	val &= ~(PCIE_PHY_MPLL_MULTIPLIER_MASK <<
+		 PCIE_PHY_MPLL_MULTIPLIER_SHIFT);
+	val |= mult << PCIE_PHY_MPLL_MULTIPLIER_SHIFT;
+	val |= PCIE_PHY_MPLL_MULTIPLIER_OVRD;
+	pcie_phy_write(imx6_pcie, PCIE_PHY_MPLL_OVRD_IN_LO, val);
+
+	pcie_phy_read(imx6_pcie, PCIE_PHY_ATEOVRD, &val);
+	val &= ~(PCIE_PHY_ATEOVRD_REF_CLKDIV_MASK <<
+		 PCIE_PHY_ATEOVRD_REF_CLKDIV_SHIFT);
+	val |= div << PCIE_PHY_ATEOVRD_REF_CLKDIV_SHIFT;
+	val |= PCIE_PHY_ATEOVRD_EN;
+	pcie_phy_write(imx6_pcie, PCIE_PHY_ATEOVRD, val);
+
+	return 0;
+}
+
+static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie)
+{
+	u16 tmp;
+
+	if (!(imx6_pcie->drvdata->flags & IMX6_PCIE_FLAG_IMX6_PHY))
+		return;
+
+	pcie_phy_read(imx6_pcie, PHY_RX_OVRD_IN_LO, &tmp);
+	tmp |= (PHY_RX_OVRD_IN_LO_RX_DATA_EN |
+		PHY_RX_OVRD_IN_LO_RX_PLL_EN);
+	pcie_phy_write(imx6_pcie, PHY_RX_OVRD_IN_LO, tmp);
+
+	usleep_range(2000, 3000);
+
+	pcie_phy_read(imx6_pcie, PHY_RX_OVRD_IN_LO, &tmp);
+	tmp &= ~(PHY_RX_OVRD_IN_LO_RX_DATA_EN |
+		  PHY_RX_OVRD_IN_LO_RX_PLL_EN);
+	pcie_phy_write(imx6_pcie, PHY_RX_OVRD_IN_LO, tmp);
+}
+
+#ifdef CONFIG_ARM
+/*  Added for PCI abort handling */
+static int imx6q_pcie_abort_handler(unsigned long addr,
+		unsigned int fsr, struct pt_regs *regs)
+{
+	unsigned long pc = instruction_pointer(regs);
+	unsigned long instr = *(unsigned long *)pc;
+	int reg = (instr >> 12) & 15;
+
+	/*
+	 * If the instruction being executed was a read,
+	 * make it look like it read all-ones.
+	 */
+	if ((instr & 0x0c100000) == 0x04100000) {
+		unsigned long val;
+
+		if (instr & 0x00400000)
+			val = 255;
+		else
+			val = -1;
+
+		regs->uregs[reg] = val;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	return 1;
+}
+#endif
+
+static int imx6_pcie_attach_pd(struct device *dev)
+{
+	struct imx6_pcie *imx6_pcie = dev_get_drvdata(dev);
+	struct device_link *link;
+
+	/* Do nothing when in a single power domain */
+	if (dev->pm_domain)
+		return 0;
+
+	imx6_pcie->pd_pcie = dev_pm_domain_attach_by_name(dev, "pcie");
+	if (IS_ERR(imx6_pcie->pd_pcie))
+		return PTR_ERR(imx6_pcie->pd_pcie);
+	/* Do nothing when power domain missing */
+	if (!imx6_pcie->pd_pcie)
+		return 0;
+	link = device_link_add(dev, imx6_pcie->pd_pcie,
+			DL_FLAG_STATELESS |
+			DL_FLAG_PM_RUNTIME |
+			DL_FLAG_RPM_ACTIVE);
+	if (!link) {
+		dev_err(dev, "Failed to add device_link to pcie pd.\n");
+		return -EINVAL;
+	}
+
+	imx6_pcie->pd_pcie_phy = dev_pm_domain_attach_by_name(dev, "pcie_phy");
+	if (IS_ERR(imx6_pcie->pd_pcie_phy))
+		return PTR_ERR(imx6_pcie->pd_pcie_phy);
+
+	link = device_link_add(dev, imx6_pcie->pd_pcie_phy,
+			DL_FLAG_STATELESS |
+			DL_FLAG_PM_RUNTIME |
+			DL_FLAG_RPM_ACTIVE);
+	if (!link) {
+		dev_err(dev, "Failed to add device_link to pcie_phy pd.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	struct device *dev = pci->dev;
+	unsigned int offset;
+	int ret = 0;
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6SX:
+		ret = clk_prepare_enable(imx6_pcie->pcie_inbound_axi);
+		if (ret) {
+			dev_err(dev, "unable to enable pcie_axi clock\n");
+			break;
+		}
+
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN, 0);
+		break;
+	case IMX6QP:
+	case IMX6Q:
+		/* power up core phy and enable ref clock */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_TEST_PD, 0 << 18);
+		/*
+		 * the async reset input need ref clock to sync internally,
+		 * when the ref clock comes after reset, internal synced
+		 * reset time is too short, cannot meet the requirement.
+		 * add one ~10us delay here.
+		 */
+		usleep_range(10, 100);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_REF_CLK_EN, 1 << 16);
+		break;
+	case IMX7D:
+		break;
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MQ:
+	case IMX8MQ_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		ret = clk_prepare_enable(imx6_pcie->pcie_aux);
+		if (ret) {
+			dev_err(dev, "unable to enable pcie_aux clock\n");
+			break;
+		}
+
+		offset = imx6_pcie_grp_offset(imx6_pcie);
+		/*
+		 * Set the over ride low and enabled
+		 * make sure that REF_CLK is turned on.
+		 */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, offset,
+				   IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE,
+				   0);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, offset,
+				   IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE_EN,
+				   IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE_EN);
+		break;
+	}
+
+	return ret;
+}
+
+static void imx6_pcie_disable_ref_clk(struct imx6_pcie *imx6_pcie)
+{
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6SX:
+		clk_disable_unprepare(imx6_pcie->pcie_inbound_axi);
+		break;
+	case IMX6QP:
+	case IMX6Q:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				IMX6Q_GPR1_PCIE_REF_CLK_EN, 0);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				IMX6Q_GPR1_PCIE_TEST_PD,
+				IMX6Q_GPR1_PCIE_TEST_PD);
+		break;
+	case IMX7D:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX7D_GPR12_PCIE_PHY_REFCLK_SEL,
+				   IMX7D_GPR12_PCIE_PHY_REFCLK_SEL);
+		break;
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MQ:
+	case IMX8MQ_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		clk_disable_unprepare(imx6_pcie->pcie_aux);
+		break;
+	default:
+		break;
+	}
+}
+
+static int imx6_pcie_clk_enable(struct imx6_pcie *imx6_pcie)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = clk_prepare_enable(imx6_pcie->pcie_phy);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie_phy clock\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(imx6_pcie->pcie_bus);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie_bus clock\n");
+		goto err_pcie_bus;
+	}
+
+	ret = clk_prepare_enable(imx6_pcie->pcie);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie clock\n");
+		goto err_pcie;
+	}
+
+	ret = imx6_pcie_enable_ref_clk(imx6_pcie);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie ref clock\n");
+		goto err_ref_clk;
+	}
+
+	/* allow the clocks to stabilize */
+	usleep_range(200, 500);
+	return 0;
+
+err_ref_clk:
+	clk_disable_unprepare(imx6_pcie->pcie);
+err_pcie:
+	clk_disable_unprepare(imx6_pcie->pcie_bus);
+err_pcie_bus:
+	clk_disable_unprepare(imx6_pcie->pcie_phy);
+
+	return ret;
+}
+
+static void imx6_pcie_clk_disable(struct imx6_pcie *imx6_pcie)
+{
+	imx6_pcie_disable_ref_clk(imx6_pcie);
+	clk_disable_unprepare(imx6_pcie->pcie);
+	clk_disable_unprepare(imx6_pcie->pcie_bus);
+	clk_disable_unprepare(imx6_pcie->pcie_phy);
+}
+
+static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
+{
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX7D:
+	case IMX8MQ:
+	case IMX8MQ_EP:
+		reset_control_assert(imx6_pcie->pciephy_reset);
+		fallthrough;
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		reset_control_assert(imx6_pcie->apps_reset);
+		break;
+	case IMX6SX:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN,
+				   IMX6SX_GPR12_PCIE_TEST_POWERDOWN);
+		/* Force PCIe PHY reset */
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET);
+		break;
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_SW_RST,
+				   IMX6Q_GPR1_PCIE_SW_RST);
+		break;
+	case IMX6Q:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_TEST_PD, 1 << 18);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_REF_CLK_EN, 0 << 16);
+		break;
+	}
+
+	/* Some boards don't have PCIe reset GPIO. */
+	if (gpio_is_valid(imx6_pcie->reset_gpio))
+		gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+					imx6_pcie->gpio_active_high);
+}
+
+static int imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	struct device *dev = pci->dev;
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX8MQ:
+	case IMX8MQ_EP:
+		reset_control_deassert(imx6_pcie->pciephy_reset);
+		break;
+	case IMX7D:
+		reset_control_deassert(imx6_pcie->pciephy_reset);
+
+		/* Workaround for ERR010728, failure of PCI-e PLL VCO to
+		 * oscillate, especially when cold.  This turns off "Duty-cycle
+		 * Corrector" and other mysterious undocumented things.
+		 */
+		if (likely(imx6_pcie->phy_base)) {
+			/* De-assert DCC_FB_EN */
+			writel(PCIE_PHY_CMN_REG4_DCC_FB_EN,
+			       imx6_pcie->phy_base + PCIE_PHY_CMN_REG4);
+			/* Assert RX_EQS and RX_EQS_SEL */
+			writel(PCIE_PHY_CMN_REG24_RX_EQ_SEL
+				| PCIE_PHY_CMN_REG24_RX_EQ,
+			       imx6_pcie->phy_base + PCIE_PHY_CMN_REG24);
+			/* Assert ATT_MODE */
+			writel(PCIE_PHY_CMN_REG26_ATT_MODE,
+			       imx6_pcie->phy_base + PCIE_PHY_CMN_REG26);
+		} else {
+			dev_warn(dev, "Unable to apply ERR010728 workaround. DT missing fsl,imx7d-pcie-phy phandle ?\n");
+		}
+
+		imx7d_pcie_wait_for_phy_pll_lock(imx6_pcie);
+		break;
+	case IMX6SX:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR5,
+				   IMX6SX_GPR5_PCIE_BTNRST_RESET, 0);
+		break;
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR1,
+				   IMX6Q_GPR1_PCIE_SW_RST, 0);
+
+		usleep_range(200, 500);
+		break;
+	case IMX6Q:		/* Nothing to do */
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		break;
+	}
+
+	/* Some boards don't have PCIe reset GPIO. */
+	if (gpio_is_valid(imx6_pcie->reset_gpio)) {
+		msleep(100);
+		gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+					!imx6_pcie->gpio_active_high);
+		/* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */
+		msleep(100);
+	}
+
+	return 0;
+}
+
+static int imx6_pcie_wait_for_speed_change(struct imx6_pcie *imx6_pcie)
+{
+	struct dw_pcie *pci = imx6_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 tmp;
+	unsigned int retries;
+
+	for (retries = 0; retries < 200; retries++) {
+		tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		/* Test if the speed change finished. */
+		if (!(tmp & PORT_LOGIC_SPEED_CHANGE))
+			return 0;
+		usleep_range(100, 1000);
+	}
+
+	dev_err(dev, "Speed change timeout\n");
+	return -ETIMEDOUT;
+}
+
+static void imx6_pcie_ltssm_enable(struct device *dev)
+{
+	struct imx6_pcie *imx6_pcie = dev_get_drvdata(dev);
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6Q:
+	case IMX6SX:
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6Q_GPR12_PCIE_CTL_2,
+				   IMX6Q_GPR12_PCIE_CTL_2);
+		break;
+	case IMX7D:
+	case IMX8MQ:
+	case IMX8MQ_EP:
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		reset_control_deassert(imx6_pcie->apps_reset);
+		break;
+	}
+}
+
+static void imx6_pcie_ltssm_disable(struct device *dev)
+{
+	struct imx6_pcie *imx6_pcie = dev_get_drvdata(dev);
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6Q:
+	case IMX6SX:
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				   IMX6Q_GPR12_PCIE_CTL_2, 0);
+		break;
+	case IMX7D:
+	case IMX8MQ:
+	case IMX8MQ_EP:
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		reset_control_assert(imx6_pcie->apps_reset);
+		break;
+	}
+}
+
+static int imx6_pcie_start_link(struct dw_pcie *pci)
+{
+	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+	struct device *dev = pci->dev;
+	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 tmp;
+	int ret;
+
+	/*
+	 * Force Gen1 operation when starting the link.  In case the link is
+	 * started in Gen2 mode, there is a possibility the devices on the
+	 * bus will not be detected at all.  This happens with PCIe switches.
+	 */
+	dw_pcie_dbi_ro_wr_en(pci);
+	tmp = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+	tmp &= ~PCI_EXP_LNKCAP_SLS;
+	tmp |= PCI_EXP_LNKCAP_SLS_2_5GB;
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, tmp);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	/* Start LTSSM. */
+	imx6_pcie_ltssm_enable(dev);
+
+	ret = dw_pcie_wait_for_link(pci);
+	if (ret)
+		goto err_reset_phy;
+
+	if (pci->link_gen > 1) {
+		/* Allow faster modes after the link is up */
+		dw_pcie_dbi_ro_wr_en(pci);
+		tmp = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+		tmp &= ~PCI_EXP_LNKCAP_SLS;
+		tmp |= pci->link_gen;
+		dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, tmp);
+
+		/*
+		 * Start Directed Speed Change so the best possible
+		 * speed both link partners support can be negotiated.
+		 */
+		tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		tmp |= PORT_LOGIC_SPEED_CHANGE;
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp);
+		dw_pcie_dbi_ro_wr_dis(pci);
+
+		if (imx6_pcie->drvdata->flags &
+		    IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE) {
+			/*
+			 * On i.MX7, DIRECT_SPEED_CHANGE behaves differently
+			 * from i.MX6 family when no link speed transition
+			 * occurs and we go Gen1 -> yep, Gen1. The difference
+			 * is that, in such case, it will not be cleared by HW
+			 * which will cause the following code to report false
+			 * failure.
+			 */
+
+			ret = imx6_pcie_wait_for_speed_change(imx6_pcie);
+			if (ret) {
+				dev_err(dev, "Failed to bring link up!\n");
+				goto err_reset_phy;
+			}
+		}
+
+		/* Make sure link training is finished as well! */
+		ret = dw_pcie_wait_for_link(pci);
+		if (ret)
+			goto err_reset_phy;
+	} else {
+		dev_info(dev, "Link: Only Gen1 is enabled\n");
+	}
+
+	imx6_pcie->link_is_up = true;
+	tmp = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+	dev_info(dev, "Link up, Gen%i\n", tmp & PCI_EXP_LNKSTA_CLS);
+	return 0;
+
+err_reset_phy:
+	imx6_pcie->link_is_up = false;
+	dev_dbg(dev, "PHY DEBUG_R0=0x%08x DEBUG_R1=0x%08x\n",
+		dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG0),
+		dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG1));
+	imx6_pcie_reset_phy(imx6_pcie);
+	return 0;
+}
+
+static void imx6_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct device *dev = pci->dev;
+
+	/* Turn off PCIe LTSSM */
+	imx6_pcie_ltssm_disable(dev);
+}
+
+static int imx6_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+	int ret;
+
+	if (imx6_pcie->vpcie) {
+		ret = regulator_enable(imx6_pcie->vpcie);
+		if (ret) {
+			dev_err(dev, "failed to enable vpcie regulator: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	imx6_pcie_assert_core_reset(imx6_pcie);
+	imx6_pcie_init_phy(imx6_pcie);
+
+	ret = imx6_pcie_clk_enable(imx6_pcie);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie clocks: %d\n", ret);
+		goto err_reg_disable;
+	}
+
+	if (imx6_pcie->phy) {
+		ret = phy_init(imx6_pcie->phy);
+		if (ret) {
+			dev_err(dev, "pcie PHY power up failed\n");
+			goto err_clk_disable;
+		}
+	}
+
+	if (imx6_pcie->phy) {
+		ret = phy_power_on(imx6_pcie->phy);
+		if (ret) {
+			dev_err(dev, "waiting for PHY ready timeout!\n");
+			goto err_phy_off;
+		}
+	}
+
+	ret = imx6_pcie_deassert_core_reset(imx6_pcie);
+	if (ret < 0) {
+		dev_err(dev, "pcie deassert core reset failed: %d\n", ret);
+		goto err_phy_off;
+	}
+
+	imx6_setup_phy_mpll(imx6_pcie);
+
+	return 0;
+
+err_phy_off:
+	if (imx6_pcie->phy)
+		phy_exit(imx6_pcie->phy);
+err_clk_disable:
+	imx6_pcie_clk_disable(imx6_pcie);
+err_reg_disable:
+	if (imx6_pcie->vpcie)
+		regulator_disable(imx6_pcie->vpcie);
+	return ret;
+}
+
+static void imx6_pcie_host_exit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+
+	if (imx6_pcie->phy) {
+		if (phy_power_off(imx6_pcie->phy))
+			dev_err(pci->dev, "unable to power off PHY\n");
+		phy_exit(imx6_pcie->phy);
+	}
+	imx6_pcie_clk_disable(imx6_pcie);
+
+	if (imx6_pcie->vpcie)
+		regulator_disable(imx6_pcie->vpcie);
+}
+
+static const struct dw_pcie_host_ops imx6_pcie_host_ops = {
+	.host_init = imx6_pcie_host_init,
+	.host_deinit = imx6_pcie_host_exit,
+};
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.start_link = imx6_pcie_start_link,
+	.stop_link = imx6_pcie_stop_link,
+};
+
+static void imx6_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	enum pci_barno bar;
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	for (bar = BAR_0; bar <= BAR_5; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static int imx6_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				  enum pci_epc_irq_type type,
+				  u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return dw_pcie_ep_raise_legacy_irq(ep, func_no);
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	case PCI_EPC_IRQ_MSIX:
+		return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features imx8m_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = false,
+	.reserved_bar = 1 << BAR_1 | 1 << BAR_3,
+	.align = SZ_64K,
+};
+
+static const struct pci_epc_features*
+imx6_pcie_ep_get_features(struct dw_pcie_ep *ep)
+{
+	return &imx8m_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
+	.ep_init = imx6_pcie_ep_init,
+	.raise_irq = imx6_pcie_ep_raise_irq,
+	.get_features = imx6_pcie_ep_get_features,
+};
+
+static int imx6_add_pcie_ep(struct imx6_pcie *imx6_pcie,
+			   struct platform_device *pdev)
+{
+	int ret;
+	unsigned int pcie_dbi2_offset;
+	struct dw_pcie_ep *ep;
+	struct resource *res;
+	struct dw_pcie *pci = imx6_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = pci->dev;
+
+	imx6_pcie_host_init(pp);
+	ep = &pci->ep;
+	ep->ops = &pcie_ep_ops;
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX8MQ_EP:
+	case IMX8MM_EP:
+	case IMX8MP_EP:
+		pcie_dbi2_offset = SZ_1M;
+		break;
+	default:
+		pcie_dbi2_offset = SZ_4K;
+		break;
+	}
+	pci->dbi_base2 = pci->dbi_base + pcie_dbi2_offset;
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
+	if (!res)
+		return -EINVAL;
+
+	ep->phys_base = res->start;
+	ep->addr_size = resource_size(res);
+	ep->page_size = SZ_64K;
+
+	ret = dw_pcie_ep_init(ep);
+	if (ret) {
+		dev_err(dev, "failed to initialize endpoint\n");
+		return ret;
+	}
+	/* Start LTSSM. */
+	imx6_pcie_ltssm_enable(dev);
+
+	return 0;
+}
+
+static void imx6_pcie_pm_turnoff(struct imx6_pcie *imx6_pcie)
+{
+	struct device *dev = imx6_pcie->pci->dev;
+
+	/* Some variants have a turnoff reset in DT */
+	if (imx6_pcie->turnoff_reset) {
+		reset_control_assert(imx6_pcie->turnoff_reset);
+		reset_control_deassert(imx6_pcie->turnoff_reset);
+		goto pm_turnoff_sleep;
+	}
+
+	/* Others poke directly at IOMUXC registers */
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6SX:
+	case IMX6QP:
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				IMX6SX_GPR12_PCIE_PM_TURN_OFF,
+				IMX6SX_GPR12_PCIE_PM_TURN_OFF);
+		regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12,
+				IMX6SX_GPR12_PCIE_PM_TURN_OFF, 0);
+		break;
+	default:
+		dev_err(dev, "PME_Turn_Off not implemented\n");
+		return;
+	}
+
+	/*
+	 * Components with an upstream port must respond to
+	 * PME_Turn_Off with PME_TO_Ack but we can't check.
+	 *
+	 * The standard recommends a 1-10ms timeout after which to
+	 * proceed anyway as if acks were received.
+	 */
+pm_turnoff_sleep:
+	usleep_range(1000, 10000);
+}
+
+static void imx6_pcie_msi_save_restore(struct imx6_pcie *imx6_pcie, bool save)
+{
+	u8 offset;
+	u16 val;
+	struct dw_pcie *pci = imx6_pcie->pci;
+
+	if (pci_msi_enabled()) {
+		offset = dw_pcie_find_capability(pci, PCI_CAP_ID_MSI);
+		if (save) {
+			val = dw_pcie_readw_dbi(pci, offset + PCI_MSI_FLAGS);
+			imx6_pcie->msi_ctrl = val;
+		} else {
+			dw_pcie_dbi_ro_wr_en(pci);
+			val = imx6_pcie->msi_ctrl;
+			dw_pcie_writew_dbi(pci, offset + PCI_MSI_FLAGS, val);
+			dw_pcie_dbi_ro_wr_dis(pci);
+		}
+	}
+}
+
+static int imx6_pcie_suspend_noirq(struct device *dev)
+{
+	struct imx6_pcie *imx6_pcie = dev_get_drvdata(dev);
+	struct dw_pcie_rp *pp = &imx6_pcie->pci->pp;
+
+	if (!(imx6_pcie->drvdata->flags & IMX6_PCIE_FLAG_SUPPORTS_SUSPEND))
+		return 0;
+
+	imx6_pcie_msi_save_restore(imx6_pcie, true);
+	imx6_pcie_pm_turnoff(imx6_pcie);
+	imx6_pcie_stop_link(imx6_pcie->pci);
+	imx6_pcie_host_exit(pp);
+
+	return 0;
+}
+
+static int imx6_pcie_resume_noirq(struct device *dev)
+{
+	int ret;
+	struct imx6_pcie *imx6_pcie = dev_get_drvdata(dev);
+	struct dw_pcie_rp *pp = &imx6_pcie->pci->pp;
+
+	if (!(imx6_pcie->drvdata->flags & IMX6_PCIE_FLAG_SUPPORTS_SUSPEND))
+		return 0;
+
+	ret = imx6_pcie_host_init(pp);
+	if (ret)
+		return ret;
+	imx6_pcie_msi_save_restore(imx6_pcie, false);
+	dw_pcie_setup_rc(pp);
+
+	if (imx6_pcie->link_is_up)
+		imx6_pcie_start_link(imx6_pcie->pci);
+
+	return 0;
+}
+
+static const struct dev_pm_ops imx6_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(imx6_pcie_suspend_noirq,
+				  imx6_pcie_resume_noirq)
+};
+
+static int imx6_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct imx6_pcie *imx6_pcie;
+	struct device_node *np;
+	struct resource *dbi_base;
+	struct device_node *node = dev->of_node;
+	int ret;
+	u16 val;
+
+	imx6_pcie = devm_kzalloc(dev, sizeof(*imx6_pcie), GFP_KERNEL);
+	if (!imx6_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+	pci->pp.ops = &imx6_pcie_host_ops;
+
+	imx6_pcie->pci = pci;
+	imx6_pcie->drvdata = of_device_get_match_data(dev);
+
+	/* Find the PHY if one is defined, only imx7d uses it */
+	np = of_parse_phandle(node, "fsl,imx7d-pcie-phy", 0);
+	if (np) {
+		struct resource res;
+
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret) {
+			dev_err(dev, "Unable to map PCIe PHY\n");
+			return ret;
+		}
+		imx6_pcie->phy_base = devm_ioremap_resource(dev, &res);
+		if (IS_ERR(imx6_pcie->phy_base))
+			return PTR_ERR(imx6_pcie->phy_base);
+	}
+
+	pci->dbi_base = devm_platform_get_and_ioremap_resource(pdev, 0, &dbi_base);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	/* Fetch GPIOs */
+	imx6_pcie->reset_gpio = of_get_named_gpio(node, "reset-gpio", 0);
+	imx6_pcie->gpio_active_high = of_property_read_bool(node,
+						"reset-gpio-active-high");
+	if (gpio_is_valid(imx6_pcie->reset_gpio)) {
+		ret = devm_gpio_request_one(dev, imx6_pcie->reset_gpio,
+				imx6_pcie->gpio_active_high ?
+					GPIOF_OUT_INIT_HIGH :
+					GPIOF_OUT_INIT_LOW,
+				"PCIe reset");
+		if (ret) {
+			dev_err(dev, "unable to get reset gpio\n");
+			return ret;
+		}
+	} else if (imx6_pcie->reset_gpio == -EPROBE_DEFER) {
+		return imx6_pcie->reset_gpio;
+	}
+
+	/* Fetch clocks */
+	imx6_pcie->pcie_bus = devm_clk_get(dev, "pcie_bus");
+	if (IS_ERR(imx6_pcie->pcie_bus))
+		return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_bus),
+				     "pcie_bus clock source missing or invalid\n");
+
+	imx6_pcie->pcie = devm_clk_get(dev, "pcie");
+	if (IS_ERR(imx6_pcie->pcie))
+		return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie),
+				     "pcie clock source missing or invalid\n");
+
+	switch (imx6_pcie->drvdata->variant) {
+	case IMX6SX:
+		imx6_pcie->pcie_inbound_axi = devm_clk_get(dev,
+							   "pcie_inbound_axi");
+		if (IS_ERR(imx6_pcie->pcie_inbound_axi))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_inbound_axi),
+					     "pcie_inbound_axi clock missing or invalid\n");
+		break;
+	case IMX8MQ:
+	case IMX8MQ_EP:
+		imx6_pcie->pcie_aux = devm_clk_get(dev, "pcie_aux");
+		if (IS_ERR(imx6_pcie->pcie_aux))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_aux),
+					     "pcie_aux clock source missing or invalid\n");
+		fallthrough;
+	case IMX7D:
+		if (dbi_base->start == IMX8MQ_PCIE2_BASE_ADDR)
+			imx6_pcie->controller_id = 1;
+
+		imx6_pcie->pciephy_reset = devm_reset_control_get_exclusive(dev,
+									    "pciephy");
+		if (IS_ERR(imx6_pcie->pciephy_reset)) {
+			dev_err(dev, "Failed to get PCIEPHY reset control\n");
+			return PTR_ERR(imx6_pcie->pciephy_reset);
+		}
+
+		imx6_pcie->apps_reset = devm_reset_control_get_exclusive(dev,
+									 "apps");
+		if (IS_ERR(imx6_pcie->apps_reset)) {
+			dev_err(dev, "Failed to get PCIE APPS reset control\n");
+			return PTR_ERR(imx6_pcie->apps_reset);
+		}
+		break;
+	case IMX8MM:
+	case IMX8MM_EP:
+	case IMX8MP:
+	case IMX8MP_EP:
+		imx6_pcie->pcie_aux = devm_clk_get(dev, "pcie_aux");
+		if (IS_ERR(imx6_pcie->pcie_aux))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_aux),
+					     "pcie_aux clock source missing or invalid\n");
+		imx6_pcie->apps_reset = devm_reset_control_get_exclusive(dev,
+									 "apps");
+		if (IS_ERR(imx6_pcie->apps_reset))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->apps_reset),
+					     "failed to get pcie apps reset control\n");
+
+		imx6_pcie->phy = devm_phy_get(dev, "pcie-phy");
+		if (IS_ERR(imx6_pcie->phy))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->phy),
+					     "failed to get pcie phy\n");
+
+		break;
+	default:
+		break;
+	}
+	/* Don't fetch the pcie_phy clock, if it has abstract PHY driver */
+	if (imx6_pcie->phy == NULL) {
+		imx6_pcie->pcie_phy = devm_clk_get(dev, "pcie_phy");
+		if (IS_ERR(imx6_pcie->pcie_phy))
+			return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_phy),
+					     "pcie_phy clock source missing or invalid\n");
+	}
+
+
+	/* Grab turnoff reset */
+	imx6_pcie->turnoff_reset = devm_reset_control_get_optional_exclusive(dev, "turnoff");
+	if (IS_ERR(imx6_pcie->turnoff_reset)) {
+		dev_err(dev, "Failed to get TURNOFF reset control\n");
+		return PTR_ERR(imx6_pcie->turnoff_reset);
+	}
+
+	/* Grab GPR config register range */
+	imx6_pcie->iomuxc_gpr =
+		 syscon_regmap_lookup_by_compatible(imx6_pcie->drvdata->gpr);
+	if (IS_ERR(imx6_pcie->iomuxc_gpr)) {
+		dev_err(dev, "unable to find iomuxc registers\n");
+		return PTR_ERR(imx6_pcie->iomuxc_gpr);
+	}
+
+	/* Grab PCIe PHY Tx Settings */
+	if (of_property_read_u32(node, "fsl,tx-deemph-gen1",
+				 &imx6_pcie->tx_deemph_gen1))
+		imx6_pcie->tx_deemph_gen1 = 0;
+
+	if (of_property_read_u32(node, "fsl,tx-deemph-gen2-3p5db",
+				 &imx6_pcie->tx_deemph_gen2_3p5db))
+		imx6_pcie->tx_deemph_gen2_3p5db = 0;
+
+	if (of_property_read_u32(node, "fsl,tx-deemph-gen2-6db",
+				 &imx6_pcie->tx_deemph_gen2_6db))
+		imx6_pcie->tx_deemph_gen2_6db = 20;
+
+	if (of_property_read_u32(node, "fsl,tx-swing-full",
+				 &imx6_pcie->tx_swing_full))
+		imx6_pcie->tx_swing_full = 127;
+
+	if (of_property_read_u32(node, "fsl,tx-swing-low",
+				 &imx6_pcie->tx_swing_low))
+		imx6_pcie->tx_swing_low = 127;
+
+	/* Limit link speed */
+	pci->link_gen = 1;
+	of_property_read_u32(node, "fsl,max-link-speed", &pci->link_gen);
+
+	imx6_pcie->vpcie = devm_regulator_get_optional(&pdev->dev, "vpcie");
+	if (IS_ERR(imx6_pcie->vpcie)) {
+		if (PTR_ERR(imx6_pcie->vpcie) != -ENODEV)
+			return PTR_ERR(imx6_pcie->vpcie);
+		imx6_pcie->vpcie = NULL;
+	}
+
+	imx6_pcie->vph = devm_regulator_get_optional(&pdev->dev, "vph");
+	if (IS_ERR(imx6_pcie->vph)) {
+		if (PTR_ERR(imx6_pcie->vph) != -ENODEV)
+			return PTR_ERR(imx6_pcie->vph);
+		imx6_pcie->vph = NULL;
+	}
+
+	platform_set_drvdata(pdev, imx6_pcie);
+
+	ret = imx6_pcie_attach_pd(dev);
+	if (ret)
+		return ret;
+
+	if (imx6_pcie->drvdata->mode == DW_PCIE_EP_TYPE) {
+		ret = imx6_add_pcie_ep(imx6_pcie, pdev);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = dw_pcie_host_init(&pci->pp);
+		if (ret < 0)
+			return ret;
+
+		if (pci_msi_enabled()) {
+			u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_MSI);
+
+			val = dw_pcie_readw_dbi(pci, offset + PCI_MSI_FLAGS);
+			val |= PCI_MSI_FLAGS_ENABLE;
+			dw_pcie_writew_dbi(pci, offset + PCI_MSI_FLAGS, val);
+		}
+	}
+
+	return 0;
+}
+
+static void imx6_pcie_shutdown(struct platform_device *pdev)
+{
+	struct imx6_pcie *imx6_pcie = platform_get_drvdata(pdev);
+
+	/* bring down link, so bootloader gets clean state in case of reboot */
+	imx6_pcie_assert_core_reset(imx6_pcie);
+}
+
+static const struct imx6_pcie_drvdata drvdata[] = {
+	[IMX6Q] = {
+		.variant = IMX6Q,
+		.flags = IMX6_PCIE_FLAG_IMX6_PHY |
+			 IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE,
+		.dbi_length = 0x200,
+		.gpr = "fsl,imx6q-iomuxc-gpr",
+	},
+	[IMX6SX] = {
+		.variant = IMX6SX,
+		.flags = IMX6_PCIE_FLAG_IMX6_PHY |
+			 IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE |
+			 IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+		.gpr = "fsl,imx6q-iomuxc-gpr",
+	},
+	[IMX6QP] = {
+		.variant = IMX6QP,
+		.flags = IMX6_PCIE_FLAG_IMX6_PHY |
+			 IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE |
+			 IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+		.dbi_length = 0x200,
+		.gpr = "fsl,imx6q-iomuxc-gpr",
+	},
+	[IMX7D] = {
+		.variant = IMX7D,
+		.flags = IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+		.gpr = "fsl,imx7d-iomuxc-gpr",
+	},
+	[IMX8MQ] = {
+		.variant = IMX8MQ,
+		.gpr = "fsl,imx8mq-iomuxc-gpr",
+	},
+	[IMX8MM] = {
+		.variant = IMX8MM,
+		.flags = IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+		.gpr = "fsl,imx8mm-iomuxc-gpr",
+	},
+	[IMX8MP] = {
+		.variant = IMX8MP,
+		.flags = IMX6_PCIE_FLAG_SUPPORTS_SUSPEND,
+		.gpr = "fsl,imx8mp-iomuxc-gpr",
+	},
+	[IMX8MQ_EP] = {
+		.variant = IMX8MQ_EP,
+		.mode = DW_PCIE_EP_TYPE,
+		.gpr = "fsl,imx8mq-iomuxc-gpr",
+	},
+	[IMX8MM_EP] = {
+		.variant = IMX8MM_EP,
+		.mode = DW_PCIE_EP_TYPE,
+		.gpr = "fsl,imx8mm-iomuxc-gpr",
+	},
+	[IMX8MP_EP] = {
+		.variant = IMX8MP_EP,
+		.mode = DW_PCIE_EP_TYPE,
+		.gpr = "fsl,imx8mp-iomuxc-gpr",
+	},
+};
+
+static const struct of_device_id imx6_pcie_of_match[] = {
+	{ .compatible = "fsl,imx6q-pcie",  .data = &drvdata[IMX6Q],  },
+	{ .compatible = "fsl,imx6sx-pcie", .data = &drvdata[IMX6SX], },
+	{ .compatible = "fsl,imx6qp-pcie", .data = &drvdata[IMX6QP], },
+	{ .compatible = "fsl,imx7d-pcie",  .data = &drvdata[IMX7D],  },
+	{ .compatible = "fsl,imx8mq-pcie", .data = &drvdata[IMX8MQ], },
+	{ .compatible = "fsl,imx8mm-pcie", .data = &drvdata[IMX8MM], },
+	{ .compatible = "fsl,imx8mp-pcie", .data = &drvdata[IMX8MP], },
+	{ .compatible = "fsl,imx8mq-pcie-ep", .data = &drvdata[IMX8MQ_EP], },
+	{ .compatible = "fsl,imx8mm-pcie-ep", .data = &drvdata[IMX8MM_EP], },
+	{ .compatible = "fsl,imx8mp-pcie-ep", .data = &drvdata[IMX8MP_EP], },
+	{},
+};
+
+static struct platform_driver imx6_pcie_driver = {
+	.driver = {
+		.name	= "imx6q-pcie",
+		.of_match_table = imx6_pcie_of_match,
+		.suppress_bind_attrs = true,
+		.pm = &imx6_pcie_pm_ops,
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
+	.probe    = imx6_pcie_probe,
+	.shutdown = imx6_pcie_shutdown,
+};
+
+static void imx6_pcie_quirk(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	struct dw_pcie_rp *pp = bus->sysdata;
+
+	/* Bus parent is the PCI bridge, its parent is this platform driver */
+	if (!bus->dev.parent || !bus->dev.parent->parent)
+		return;
+
+	/* Make sure we only quirk devices associated with this driver */
+	if (bus->dev.parent->parent->driver != &imx6_pcie_driver.driver)
+		return;
+
+	if (pci_is_root_bus(bus)) {
+		struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+		struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
+
+		/*
+		 * Limit config length to avoid the kernel reading beyond
+		 * the register set and causing an abort on i.MX 6Quad
+		 */
+		if (imx6_pcie->drvdata->dbi_length) {
+			dev->cfg_size = imx6_pcie->drvdata->dbi_length;
+			dev_info(&dev->dev, "Limiting cfg_size to %d\n",
+					dev->cfg_size);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
+			PCI_CLASS_BRIDGE_PCI, 8, imx6_pcie_quirk);
+
+static int __init imx6_pcie_init(void)
+{
+#ifdef CONFIG_ARM
+	struct device_node *np;
+
+	np = of_find_matching_node(NULL, imx6_pcie_of_match);
+	if (!np)
+		return -ENODEV;
+	of_node_put(np);
+
+	/*
+	 * Since probe() can be deferred we need to make sure that
+	 * hook_fault_code is not called after __init memory is freed
+	 * by kernel and since imx6q_pcie_abort_handler() is a no-op,
+	 * we can install the handler here without risking it
+	 * accessing some uninitialized driver state.
+	 */
+	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
+			"external abort on non-linefetch");
+#endif
+
+	return platform_driver_register(&imx6_pcie_driver);
+}
+device_initcall(imx6_pcie_init);
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
new file mode 100644
index 000000000..cf3836561
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -0,0 +1,1338 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Texas Instruments Keystone SoCs
+ *
+ * Copyright (C) 2013-2014 Texas Instruments., Ltd.
+ *		https://www.ti.com
+ *
+ * Author: Murali Karicheri <m-karicheri2@ti.com>
+ * Implementation based on pci-exynos.c and pcie-designware.c
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/syscon.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/resource.h>
+#include <linux/signal.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+#define PCIE_VENDORID_MASK	0xffff
+#define PCIE_DEVICEID_SHIFT	16
+
+/* Application registers */
+#define CMD_STATUS			0x004
+#define LTSSM_EN_VAL		        BIT(0)
+#define OB_XLAT_EN_VAL		        BIT(1)
+#define DBI_CS2				BIT(5)
+
+#define CFG_SETUP			0x008
+#define CFG_BUS(x)			(((x) & 0xff) << 16)
+#define CFG_DEVICE(x)			(((x) & 0x1f) << 8)
+#define CFG_FUNC(x)			((x) & 0x7)
+#define CFG_TYPE1			BIT(24)
+
+#define OB_SIZE				0x030
+#define OB_OFFSET_INDEX(n)		(0x200 + (8 * (n)))
+#define OB_OFFSET_HI(n)			(0x204 + (8 * (n)))
+#define OB_ENABLEN			BIT(0)
+#define OB_WIN_SIZE			8	/* 8MB */
+
+#define PCIE_LEGACY_IRQ_ENABLE_SET(n)	(0x188 + (0x10 * ((n) - 1)))
+#define PCIE_LEGACY_IRQ_ENABLE_CLR(n)	(0x18c + (0x10 * ((n) - 1)))
+#define PCIE_EP_IRQ_SET			0x64
+#define PCIE_EP_IRQ_CLR			0x68
+#define INT_ENABLE			BIT(0)
+
+/* IRQ register defines */
+#define IRQ_EOI				0x050
+
+#define MSI_IRQ				0x054
+#define MSI_IRQ_STATUS(n)		(0x104 + ((n) << 4))
+#define MSI_IRQ_ENABLE_SET(n)		(0x108 + ((n) << 4))
+#define MSI_IRQ_ENABLE_CLR(n)		(0x10c + ((n) << 4))
+#define MSI_IRQ_OFFSET			4
+
+#define IRQ_STATUS(n)			(0x184 + ((n) << 4))
+#define IRQ_ENABLE_SET(n)		(0x188 + ((n) << 4))
+#define INTx_EN				BIT(0)
+
+#define ERR_IRQ_STATUS			0x1c4
+#define ERR_IRQ_ENABLE_SET		0x1c8
+#define ERR_AER				BIT(5)	/* ECRC error */
+#define AM6_ERR_AER			BIT(4)	/* AM6 ECRC error */
+#define ERR_AXI				BIT(4)	/* AXI tag lookup fatal error */
+#define ERR_CORR			BIT(3)	/* Correctable error */
+#define ERR_NONFATAL			BIT(2)	/* Non-fatal error */
+#define ERR_FATAL			BIT(1)	/* Fatal error */
+#define ERR_SYS				BIT(0)	/* System error */
+#define ERR_IRQ_ALL			(ERR_AER | ERR_AXI | ERR_CORR | \
+					 ERR_NONFATAL | ERR_FATAL | ERR_SYS)
+
+/* PCIE controller device IDs */
+#define PCIE_RC_K2HK			0xb008
+#define PCIE_RC_K2E			0xb009
+#define PCIE_RC_K2L			0xb00a
+#define PCIE_RC_K2G			0xb00b
+
+#define KS_PCIE_DEV_TYPE_MASK		(0x3 << 1)
+#define KS_PCIE_DEV_TYPE(mode)		((mode) << 1)
+
+#define EP				0x0
+#define LEG_EP				0x1
+#define RC				0x2
+
+#define KS_PCIE_SYSCLOCKOUTEN		BIT(0)
+
+#define AM654_PCIE_DEV_TYPE_MASK	0x3
+#define AM654_WIN_SIZE			SZ_64K
+
+#define APP_ADDR_SPACE_0		(16 * SZ_1K)
+
+#define to_keystone_pcie(x)		dev_get_drvdata((x)->dev)
+
+struct ks_pcie_of_data {
+	enum dw_pcie_device_mode mode;
+	const struct dw_pcie_host_ops *host_ops;
+	const struct dw_pcie_ep_ops *ep_ops;
+	u32 version;
+};
+
+struct keystone_pcie {
+	struct dw_pcie		*pci;
+	/* PCI Device ID */
+	u32			device_id;
+	int			legacy_host_irqs[PCI_NUM_INTX];
+	struct			device_node *legacy_intc_np;
+
+	int			msi_host_irq;
+	int			num_lanes;
+	u32			num_viewport;
+	struct phy		**phy;
+	struct device_link	**link;
+	struct			device_node *msi_intc_np;
+	struct irq_domain	*legacy_irq_domain;
+	struct device_node	*np;
+
+	/* Application register space */
+	void __iomem		*va_app_base;	/* DT 1st resource */
+	struct resource		app;
+	bool			is_am6;
+};
+
+static u32 ks_pcie_app_readl(struct keystone_pcie *ks_pcie, u32 offset)
+{
+	return readl(ks_pcie->va_app_base + offset);
+}
+
+static void ks_pcie_app_writel(struct keystone_pcie *ks_pcie, u32 offset,
+			       u32 val)
+{
+	writel(val, ks_pcie->va_app_base + offset);
+}
+
+static void ks_pcie_msi_irq_ack(struct irq_data *data)
+{
+	struct dw_pcie_rp *pp  = irq_data_get_irq_chip_data(data);
+	struct keystone_pcie *ks_pcie;
+	u32 irq = data->hwirq;
+	struct dw_pcie *pci;
+	u32 reg_offset;
+	u32 bit_pos;
+
+	pci = to_dw_pcie_from_pp(pp);
+	ks_pcie = to_keystone_pcie(pci);
+
+	reg_offset = irq % 8;
+	bit_pos = irq >> 3;
+
+	ks_pcie_app_writel(ks_pcie, MSI_IRQ_STATUS(reg_offset),
+			   BIT(bit_pos));
+	ks_pcie_app_writel(ks_pcie, IRQ_EOI, reg_offset + MSI_IRQ_OFFSET);
+}
+
+static void ks_pcie_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(data);
+	struct keystone_pcie *ks_pcie;
+	struct dw_pcie *pci;
+	u64 msi_target;
+
+	pci = to_dw_pcie_from_pp(pp);
+	ks_pcie = to_keystone_pcie(pci);
+
+	msi_target = ks_pcie->app.start + MSI_IRQ;
+	msg->address_lo = lower_32_bits(msi_target);
+	msg->address_hi = upper_32_bits(msi_target);
+	msg->data = data->hwirq;
+
+	dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int ks_pcie_msi_set_affinity(struct irq_data *irq_data,
+				    const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void ks_pcie_msi_mask(struct irq_data *data)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(data);
+	struct keystone_pcie *ks_pcie;
+	u32 irq = data->hwirq;
+	struct dw_pcie *pci;
+	unsigned long flags;
+	u32 reg_offset;
+	u32 bit_pos;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	pci = to_dw_pcie_from_pp(pp);
+	ks_pcie = to_keystone_pcie(pci);
+
+	reg_offset = irq % 8;
+	bit_pos = irq >> 3;
+
+	ks_pcie_app_writel(ks_pcie, MSI_IRQ_ENABLE_CLR(reg_offset),
+			   BIT(bit_pos));
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void ks_pcie_msi_unmask(struct irq_data *data)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(data);
+	struct keystone_pcie *ks_pcie;
+	u32 irq = data->hwirq;
+	struct dw_pcie *pci;
+	unsigned long flags;
+	u32 reg_offset;
+	u32 bit_pos;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	pci = to_dw_pcie_from_pp(pp);
+	ks_pcie = to_keystone_pcie(pci);
+
+	reg_offset = irq % 8;
+	bit_pos = irq >> 3;
+
+	ks_pcie_app_writel(ks_pcie, MSI_IRQ_ENABLE_SET(reg_offset),
+			   BIT(bit_pos));
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static struct irq_chip ks_pcie_msi_irq_chip = {
+	.name = "KEYSTONE-PCI-MSI",
+	.irq_ack = ks_pcie_msi_irq_ack,
+	.irq_compose_msi_msg = ks_pcie_compose_msi_msg,
+	.irq_set_affinity = ks_pcie_msi_set_affinity,
+	.irq_mask = ks_pcie_msi_mask,
+	.irq_unmask = ks_pcie_msi_unmask,
+};
+
+static int ks_pcie_msi_host_init(struct dw_pcie_rp *pp)
+{
+	pp->msi_irq_chip = &ks_pcie_msi_irq_chip;
+	return dw_pcie_allocate_domains(pp);
+}
+
+static void ks_pcie_handle_legacy_irq(struct keystone_pcie *ks_pcie,
+				      int offset)
+{
+	struct dw_pcie *pci = ks_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 pending;
+
+	pending = ks_pcie_app_readl(ks_pcie, IRQ_STATUS(offset));
+
+	if (BIT(0) & pending) {
+		dev_dbg(dev, ": irq: irq_offset %d", offset);
+		generic_handle_domain_irq(ks_pcie->legacy_irq_domain, offset);
+	}
+
+	/* EOI the INTx interrupt */
+	ks_pcie_app_writel(ks_pcie, IRQ_EOI, offset);
+}
+
+static void ks_pcie_enable_error_irq(struct keystone_pcie *ks_pcie)
+{
+	ks_pcie_app_writel(ks_pcie, ERR_IRQ_ENABLE_SET, ERR_IRQ_ALL);
+}
+
+static irqreturn_t ks_pcie_handle_error_irq(struct keystone_pcie *ks_pcie)
+{
+	u32 reg;
+	struct device *dev = ks_pcie->pci->dev;
+
+	reg = ks_pcie_app_readl(ks_pcie, ERR_IRQ_STATUS);
+	if (!reg)
+		return IRQ_NONE;
+
+	if (reg & ERR_SYS)
+		dev_err(dev, "System Error\n");
+
+	if (reg & ERR_FATAL)
+		dev_err(dev, "Fatal Error\n");
+
+	if (reg & ERR_NONFATAL)
+		dev_dbg(dev, "Non Fatal Error\n");
+
+	if (reg & ERR_CORR)
+		dev_dbg(dev, "Correctable Error\n");
+
+	if (!ks_pcie->is_am6 && (reg & ERR_AXI))
+		dev_err(dev, "AXI tag lookup fatal Error\n");
+
+	if (reg & ERR_AER || (ks_pcie->is_am6 && (reg & AM6_ERR_AER)))
+		dev_err(dev, "ECRC Error\n");
+
+	ks_pcie_app_writel(ks_pcie, ERR_IRQ_STATUS, reg);
+
+	return IRQ_HANDLED;
+}
+
+static void ks_pcie_ack_legacy_irq(struct irq_data *d)
+{
+}
+
+static void ks_pcie_mask_legacy_irq(struct irq_data *d)
+{
+}
+
+static void ks_pcie_unmask_legacy_irq(struct irq_data *d)
+{
+}
+
+static struct irq_chip ks_pcie_legacy_irq_chip = {
+	.name = "Keystone-PCI-Legacy-IRQ",
+	.irq_ack = ks_pcie_ack_legacy_irq,
+	.irq_mask = ks_pcie_mask_legacy_irq,
+	.irq_unmask = ks_pcie_unmask_legacy_irq,
+};
+
+static int ks_pcie_init_legacy_irq_map(struct irq_domain *d,
+				       unsigned int irq,
+				       irq_hw_number_t hw_irq)
+{
+	irq_set_chip_and_handler(irq, &ks_pcie_legacy_irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(irq, d->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops ks_pcie_legacy_irq_domain_ops = {
+	.map = ks_pcie_init_legacy_irq_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+/**
+ * ks_pcie_set_dbi_mode() - Set DBI mode to access overlaid BAR mask registers
+ * @ks_pcie: A pointer to the keystone_pcie structure which holds the KeyStone
+ *	     PCIe host controller driver information.
+ *
+ * Since modification of dbi_cs2 involves different clock domain, read the
+ * status back to ensure the transition is complete.
+ */
+static void ks_pcie_set_dbi_mode(struct keystone_pcie *ks_pcie)
+{
+	u32 val;
+
+	val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	val |= DBI_CS2;
+	ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
+
+	do {
+		val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	} while (!(val & DBI_CS2));
+}
+
+/**
+ * ks_pcie_clear_dbi_mode() - Disable DBI mode
+ * @ks_pcie: A pointer to the keystone_pcie structure which holds the KeyStone
+ *	     PCIe host controller driver information.
+ *
+ * Since modification of dbi_cs2 involves different clock domain, read the
+ * status back to ensure the transition is complete.
+ */
+static void ks_pcie_clear_dbi_mode(struct keystone_pcie *ks_pcie)
+{
+	u32 val;
+
+	val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	val &= ~DBI_CS2;
+	ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
+
+	do {
+		val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	} while (val & DBI_CS2);
+}
+
+static void ks_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie)
+{
+	u32 val;
+	u32 num_viewport = ks_pcie->num_viewport;
+	struct dw_pcie *pci = ks_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	u64 start, end;
+	struct resource *mem;
+	int i;
+
+	mem = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM)->res;
+	start = mem->start;
+	end = mem->end;
+
+	/* Disable BARs for inbound access */
+	ks_pcie_set_dbi_mode(ks_pcie);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0);
+	ks_pcie_clear_dbi_mode(ks_pcie);
+
+	if (ks_pcie->is_am6)
+		return;
+
+	val = ilog2(OB_WIN_SIZE);
+	ks_pcie_app_writel(ks_pcie, OB_SIZE, val);
+
+	/* Using Direct 1:1 mapping of RC <-> PCI memory space */
+	for (i = 0; i < num_viewport && (start < end); i++) {
+		ks_pcie_app_writel(ks_pcie, OB_OFFSET_INDEX(i),
+				   lower_32_bits(start) | OB_ENABLEN);
+		ks_pcie_app_writel(ks_pcie, OB_OFFSET_HI(i),
+				   upper_32_bits(start));
+		start += OB_WIN_SIZE * SZ_1M;
+	}
+
+	val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	val |= OB_XLAT_EN_VAL;
+	ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
+}
+
+static void __iomem *ks_pcie_other_map_bus(struct pci_bus *bus,
+					   unsigned int devfn, int where)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+	u32 reg;
+
+	reg = CFG_BUS(bus->number) | CFG_DEVICE(PCI_SLOT(devfn)) |
+		CFG_FUNC(PCI_FUNC(devfn));
+	if (!pci_is_root_bus(bus->parent))
+		reg |= CFG_TYPE1;
+	ks_pcie_app_writel(ks_pcie, CFG_SETUP, reg);
+
+	return pp->va_cfg0_base + where;
+}
+
+static struct pci_ops ks_child_pcie_ops = {
+	.map_bus = ks_pcie_other_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+/**
+ * ks_pcie_v3_65_add_bus() - keystone add_bus post initialization
+ * @bus: A pointer to the PCI bus structure.
+ *
+ * This sets BAR0 to enable inbound access for MSI_IRQ register
+ */
+static int ks_pcie_v3_65_add_bus(struct pci_bus *bus)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+
+	if (!pci_is_root_bus(bus))
+		return 0;
+
+	/* Configure and set up BAR0 */
+	ks_pcie_set_dbi_mode(ks_pcie);
+
+	/* Enable BAR0 */
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 1);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, SZ_4K - 1);
+
+	ks_pcie_clear_dbi_mode(ks_pcie);
+
+	 /*
+	  * For BAR0, just setting bus address for inbound writes (MSI) should
+	  * be sufficient.  Use physical address to avoid any conflicts.
+	  */
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, ks_pcie->app.start);
+
+	return 0;
+}
+
+static struct pci_ops ks_pcie_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+	.add_bus = ks_pcie_v3_65_add_bus,
+};
+
+/**
+ * ks_pcie_link_up() - Check if link up
+ * @pci: A pointer to the dw_pcie structure which holds the DesignWare PCIe host
+ *	 controller driver information.
+ */
+static int ks_pcie_link_up(struct dw_pcie *pci)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG0);
+	val &= PORT_LOGIC_LTSSM_STATE_MASK;
+	return (val == PORT_LOGIC_LTSSM_STATE_L0);
+}
+
+static void ks_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+	u32 val;
+
+	/* Disable Link training */
+	val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	val &= ~LTSSM_EN_VAL;
+	ks_pcie_app_writel(ks_pcie, CMD_STATUS, val);
+}
+
+static int ks_pcie_start_link(struct dw_pcie *pci)
+{
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+	u32 val;
+
+	/* Initiate Link Training */
+	val = ks_pcie_app_readl(ks_pcie, CMD_STATUS);
+	ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val);
+
+	return 0;
+}
+
+static void ks_pcie_quirk(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	struct pci_dev *bridge;
+	static const struct pci_device_id rc_pci_devids[] = {
+		{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2HK),
+		 .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, },
+		{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2E),
+		 .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, },
+		{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2L),
+		 .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, },
+		{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2G),
+		 .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, },
+		{ 0, },
+	};
+
+	if (pci_is_root_bus(bus))
+		bridge = dev;
+
+	/* look for the host bridge */
+	while (!pci_is_root_bus(bus)) {
+		bridge = bus->self;
+		bus = bus->parent;
+	}
+
+	if (!bridge)
+		return;
+
+	/*
+	 * Keystone PCI controller has a h/w limitation of
+	 * 256 bytes maximum read request size.  It can't handle
+	 * anything higher than this.  So force this limit on
+	 * all downstream devices.
+	 */
+	if (pci_match_id(rc_pci_devids, bridge)) {
+		if (pcie_get_readrq(dev) > 256) {
+			dev_info(&dev->dev, "limiting MRRS to 256\n");
+			pcie_set_readrq(dev, 256);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, ks_pcie_quirk);
+
+static void ks_pcie_msi_irq_handler(struct irq_desc *desc)
+{
+	unsigned int irq = desc->irq_data.hwirq;
+	struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc);
+	u32 offset = irq - ks_pcie->msi_host_irq;
+	struct dw_pcie *pci = ks_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = pci->dev;
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 vector, reg, pos;
+
+	dev_dbg(dev, "%s, irq %d\n", __func__, irq);
+
+	/*
+	 * The chained irq handler installation would have replaced normal
+	 * interrupt driver handler so we need to take care of mask/unmask and
+	 * ack operation.
+	 */
+	chained_irq_enter(chip, desc);
+
+	reg = ks_pcie_app_readl(ks_pcie, MSI_IRQ_STATUS(offset));
+	/*
+	 * MSI0 status bit 0-3 shows vectors 0, 8, 16, 24, MSI1 status bit
+	 * shows 1, 9, 17, 25 and so forth
+	 */
+	for (pos = 0; pos < 4; pos++) {
+		if (!(reg & BIT(pos)))
+			continue;
+
+		vector = offset + (pos << 3);
+		dev_dbg(dev, "irq: bit %d, vector %d\n", pos, vector);
+		generic_handle_domain_irq(pp->irq_domain, vector);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+/**
+ * ks_pcie_legacy_irq_handler() - Handle legacy interrupt
+ * @desc: Pointer to irq descriptor
+ *
+ * Traverse through pending legacy interrupts and invoke handler for each. Also
+ * takes care of interrupt controller level mask/ack operation.
+ */
+static void ks_pcie_legacy_irq_handler(struct irq_desc *desc)
+{
+	unsigned int irq = irq_desc_get_irq(desc);
+	struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc);
+	struct dw_pcie *pci = ks_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 irq_offset = irq - ks_pcie->legacy_host_irqs[0];
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+
+	dev_dbg(dev, ": Handling legacy irq %d\n", irq);
+
+	/*
+	 * The chained irq handler installation would have replaced normal
+	 * interrupt driver handler so we need to take care of mask/unmask and
+	 * ack operation.
+	 */
+	chained_irq_enter(chip, desc);
+	ks_pcie_handle_legacy_irq(ks_pcie, irq_offset);
+	chained_irq_exit(chip, desc);
+}
+
+static int ks_pcie_config_msi_irq(struct keystone_pcie *ks_pcie)
+{
+	struct device *dev = ks_pcie->pci->dev;
+	struct device_node *np = ks_pcie->np;
+	struct device_node *intc_np;
+	struct irq_data *irq_data;
+	int irq_count, irq, ret, i;
+
+	if (!IS_ENABLED(CONFIG_PCI_MSI))
+		return 0;
+
+	intc_np = of_get_child_by_name(np, "msi-interrupt-controller");
+	if (!intc_np) {
+		if (ks_pcie->is_am6)
+			return 0;
+		dev_warn(dev, "msi-interrupt-controller node is absent\n");
+		return -EINVAL;
+	}
+
+	irq_count = of_irq_count(intc_np);
+	if (!irq_count) {
+		dev_err(dev, "No IRQ entries in msi-interrupt-controller\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < irq_count; i++) {
+		irq = irq_of_parse_and_map(intc_np, i);
+		if (!irq) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		if (!ks_pcie->msi_host_irq) {
+			irq_data = irq_get_irq_data(irq);
+			if (!irq_data) {
+				ret = -EINVAL;
+				goto err;
+			}
+			ks_pcie->msi_host_irq = irq_data->hwirq;
+		}
+
+		irq_set_chained_handler_and_data(irq, ks_pcie_msi_irq_handler,
+						 ks_pcie);
+	}
+
+	of_node_put(intc_np);
+	return 0;
+
+err:
+	of_node_put(intc_np);
+	return ret;
+}
+
+static int ks_pcie_config_legacy_irq(struct keystone_pcie *ks_pcie)
+{
+	struct device *dev = ks_pcie->pci->dev;
+	struct irq_domain *legacy_irq_domain;
+	struct device_node *np = ks_pcie->np;
+	struct device_node *intc_np;
+	int irq_count, irq, ret = 0, i;
+
+	intc_np = of_get_child_by_name(np, "legacy-interrupt-controller");
+	if (!intc_np) {
+		/*
+		 * Since legacy interrupts are modeled as edge-interrupts in
+		 * AM6, keep it disabled for now.
+		 */
+		if (ks_pcie->is_am6)
+			return 0;
+		dev_warn(dev, "legacy-interrupt-controller node is absent\n");
+		return -EINVAL;
+	}
+
+	irq_count = of_irq_count(intc_np);
+	if (!irq_count) {
+		dev_err(dev, "No IRQ entries in legacy-interrupt-controller\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < irq_count; i++) {
+		irq = irq_of_parse_and_map(intc_np, i);
+		if (!irq) {
+			ret = -EINVAL;
+			goto err;
+		}
+		ks_pcie->legacy_host_irqs[i] = irq;
+
+		irq_set_chained_handler_and_data(irq,
+						 ks_pcie_legacy_irq_handler,
+						 ks_pcie);
+	}
+
+	legacy_irq_domain =
+		irq_domain_add_linear(intc_np, PCI_NUM_INTX,
+				      &ks_pcie_legacy_irq_domain_ops, NULL);
+	if (!legacy_irq_domain) {
+		dev_err(dev, "Failed to add irq domain for legacy irqs\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ks_pcie->legacy_irq_domain = legacy_irq_domain;
+
+	for (i = 0; i < PCI_NUM_INTX; i++)
+		ks_pcie_app_writel(ks_pcie, IRQ_ENABLE_SET(i), INTx_EN);
+
+err:
+	of_node_put(intc_np);
+	return ret;
+}
+
+#ifdef CONFIG_ARM
+/*
+ * When a PCI device does not exist during config cycles, keystone host
+ * gets a bus error instead of returning 0xffffffff (PCI_ERROR_RESPONSE).
+ * This handler always returns 0 for this kind of fault.
+ */
+static int ks_pcie_fault(unsigned long addr, unsigned int fsr,
+			 struct pt_regs *regs)
+{
+	unsigned long instr = *(unsigned long *) instruction_pointer(regs);
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		int reg = (instr >> 12) & 15;
+
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+	}
+
+	return 0;
+}
+#endif
+
+static int __init ks_pcie_init_id(struct keystone_pcie *ks_pcie)
+{
+	int ret;
+	unsigned int id;
+	struct regmap *devctrl_regs;
+	struct dw_pcie *pci = ks_pcie->pci;
+	struct device *dev = pci->dev;
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int offset = 0;
+
+	devctrl_regs = syscon_regmap_lookup_by_phandle(np, "ti,syscon-pcie-id");
+	if (IS_ERR(devctrl_regs))
+		return PTR_ERR(devctrl_regs);
+
+	/* Do not error out to maintain old DT compatibility */
+	ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-id", 1, 0, &args);
+	if (!ret)
+		offset = args.args[0];
+
+	ret = regmap_read(devctrl_regs, offset, &id);
+	if (ret)
+		return ret;
+
+	dw_pcie_dbi_ro_wr_en(pci);
+	dw_pcie_writew_dbi(pci, PCI_VENDOR_ID, id & PCIE_VENDORID_MASK);
+	dw_pcie_writew_dbi(pci, PCI_DEVICE_ID, id >> PCIE_DEVICEID_SHIFT);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int __init ks_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+	int ret;
+
+	pp->bridge->ops = &ks_pcie_ops;
+	if (!ks_pcie->is_am6)
+		pp->bridge->child_ops = &ks_child_pcie_ops;
+
+	ret = ks_pcie_config_legacy_irq(ks_pcie);
+	if (ret)
+		return ret;
+
+	ret = ks_pcie_config_msi_irq(ks_pcie);
+	if (ret)
+		return ret;
+
+	ks_pcie_stop_link(pci);
+	ks_pcie_setup_rc_app_regs(ks_pcie);
+	writew(PCI_IO_RANGE_TYPE_32 | (PCI_IO_RANGE_TYPE_32 << 8),
+			pci->dbi_base + PCI_IO_BASE);
+
+	ret = ks_pcie_init_id(ks_pcie);
+	if (ret < 0)
+		return ret;
+
+#ifdef CONFIG_ARM
+	/*
+	 * PCIe access errors that result into OCP errors are caught by ARM as
+	 * "External aborts"
+	 */
+	hook_fault_code(17, ks_pcie_fault, SIGBUS, 0,
+			"Asynchronous external abort");
+#endif
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops ks_pcie_host_ops = {
+	.host_init = ks_pcie_host_init,
+	.msi_host_init = ks_pcie_msi_host_init,
+};
+
+static const struct dw_pcie_host_ops ks_pcie_am654_host_ops = {
+	.host_init = ks_pcie_host_init,
+};
+
+static irqreturn_t ks_pcie_err_irq_handler(int irq, void *priv)
+{
+	struct keystone_pcie *ks_pcie = priv;
+
+	return ks_pcie_handle_error_irq(ks_pcie);
+}
+
+static void ks_pcie_am654_write_dbi2(struct dw_pcie *pci, void __iomem *base,
+				     u32 reg, size_t size, u32 val)
+{
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+
+	ks_pcie_set_dbi_mode(ks_pcie);
+	dw_pcie_write(base + reg, size, val);
+	ks_pcie_clear_dbi_mode(ks_pcie);
+}
+
+static const struct dw_pcie_ops ks_pcie_dw_pcie_ops = {
+	.start_link = ks_pcie_start_link,
+	.stop_link = ks_pcie_stop_link,
+	.link_up = ks_pcie_link_up,
+	.write_dbi2 = ks_pcie_am654_write_dbi2,
+};
+
+static void ks_pcie_am654_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	int flags;
+
+	ep->page_size = AM654_WIN_SIZE;
+	flags = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_32;
+	dw_pcie_writel_dbi2(pci, PCI_BASE_ADDRESS_0, APP_ADDR_SPACE_0 - 1);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, flags);
+}
+
+static void ks_pcie_am654_raise_legacy_irq(struct keystone_pcie *ks_pcie)
+{
+	struct dw_pcie *pci = ks_pcie->pci;
+	u8 int_pin;
+
+	int_pin = dw_pcie_readb_dbi(pci, PCI_INTERRUPT_PIN);
+	if (int_pin == 0 || int_pin > 4)
+		return;
+
+	ks_pcie_app_writel(ks_pcie, PCIE_LEGACY_IRQ_ENABLE_SET(int_pin),
+			   INT_ENABLE);
+	ks_pcie_app_writel(ks_pcie, PCIE_EP_IRQ_SET, INT_ENABLE);
+	mdelay(1);
+	ks_pcie_app_writel(ks_pcie, PCIE_EP_IRQ_CLR, INT_ENABLE);
+	ks_pcie_app_writel(ks_pcie, PCIE_LEGACY_IRQ_ENABLE_CLR(int_pin),
+			   INT_ENABLE);
+}
+
+static int ks_pcie_am654_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				   enum pci_epc_irq_type type,
+				   u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		ks_pcie_am654_raise_legacy_irq(ks_pcie);
+		break;
+	case PCI_EPC_IRQ_MSI:
+		dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+		break;
+	case PCI_EPC_IRQ_MSIX:
+		dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
+		break;
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features ks_pcie_am654_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = true,
+	.reserved_bar = 1 << BAR_0 | 1 << BAR_1,
+	.bar_fixed_64bit = 1 << BAR_0,
+	.bar_fixed_size[2] = SZ_1M,
+	.bar_fixed_size[3] = SZ_64K,
+	.bar_fixed_size[4] = 256,
+	.bar_fixed_size[5] = SZ_1M,
+	.align = SZ_1M,
+};
+
+static const struct pci_epc_features*
+ks_pcie_am654_get_features(struct dw_pcie_ep *ep)
+{
+	return &ks_pcie_am654_epc_features;
+}
+
+static const struct dw_pcie_ep_ops ks_pcie_am654_ep_ops = {
+	.ep_init = ks_pcie_am654_ep_init,
+	.raise_irq = ks_pcie_am654_raise_irq,
+	.get_features = &ks_pcie_am654_get_features,
+};
+
+static void ks_pcie_disable_phy(struct keystone_pcie *ks_pcie)
+{
+	int num_lanes = ks_pcie->num_lanes;
+
+	while (num_lanes--) {
+		phy_power_off(ks_pcie->phy[num_lanes]);
+		phy_exit(ks_pcie->phy[num_lanes]);
+	}
+}
+
+static int ks_pcie_enable_phy(struct keystone_pcie *ks_pcie)
+{
+	int i;
+	int ret;
+	int num_lanes = ks_pcie->num_lanes;
+
+	for (i = 0; i < num_lanes; i++) {
+		ret = phy_reset(ks_pcie->phy[i]);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_init(ks_pcie->phy[i]);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_power_on(ks_pcie->phy[i]);
+		if (ret < 0) {
+			phy_exit(ks_pcie->phy[i]);
+			goto err_phy;
+		}
+	}
+
+	return 0;
+
+err_phy:
+	while (--i >= 0) {
+		phy_power_off(ks_pcie->phy[i]);
+		phy_exit(ks_pcie->phy[i]);
+	}
+
+	return ret;
+}
+
+static int ks_pcie_set_mode(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int offset = 0;
+	struct regmap *syscon;
+	u32 val;
+	u32 mask;
+	int ret = 0;
+
+	syscon = syscon_regmap_lookup_by_phandle(np, "ti,syscon-pcie-mode");
+	if (IS_ERR(syscon))
+		return 0;
+
+	/* Do not error out to maintain old DT compatibility */
+	ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-mode", 1, 0, &args);
+	if (!ret)
+		offset = args.args[0];
+
+	mask = KS_PCIE_DEV_TYPE_MASK | KS_PCIE_SYSCLOCKOUTEN;
+	val = KS_PCIE_DEV_TYPE(RC) | KS_PCIE_SYSCLOCKOUTEN;
+
+	ret = regmap_update_bits(syscon, offset, mask, val);
+	if (ret) {
+		dev_err(dev, "failed to set pcie mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ks_pcie_am654_set_mode(struct device *dev,
+				  enum dw_pcie_device_mode mode)
+{
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int offset = 0;
+	struct regmap *syscon;
+	u32 val;
+	u32 mask;
+	int ret = 0;
+
+	syscon = syscon_regmap_lookup_by_phandle(np, "ti,syscon-pcie-mode");
+	if (IS_ERR(syscon))
+		return 0;
+
+	/* Do not error out to maintain old DT compatibility */
+	ret = of_parse_phandle_with_fixed_args(np, "ti,syscon-pcie-mode", 1, 0, &args);
+	if (!ret)
+		offset = args.args[0];
+
+	mask = AM654_PCIE_DEV_TYPE_MASK;
+
+	switch (mode) {
+	case DW_PCIE_RC_TYPE:
+		val = RC;
+		break;
+	case DW_PCIE_EP_TYPE:
+		val = EP;
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", mode);
+		return -EINVAL;
+	}
+
+	ret = regmap_update_bits(syscon, offset, mask, val);
+	if (ret) {
+		dev_err(dev, "failed to set pcie mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct ks_pcie_of_data ks_pcie_rc_of_data = {
+	.host_ops = &ks_pcie_host_ops,
+	.version = DW_PCIE_VER_365A,
+};
+
+static const struct ks_pcie_of_data ks_pcie_am654_rc_of_data = {
+	.host_ops = &ks_pcie_am654_host_ops,
+	.mode = DW_PCIE_RC_TYPE,
+	.version = DW_PCIE_VER_490A,
+};
+
+static const struct ks_pcie_of_data ks_pcie_am654_ep_of_data = {
+	.ep_ops = &ks_pcie_am654_ep_ops,
+	.mode = DW_PCIE_EP_TYPE,
+	.version = DW_PCIE_VER_490A,
+};
+
+static const struct of_device_id ks_pcie_of_match[] = {
+	{
+		.type = "pci",
+		.data = &ks_pcie_rc_of_data,
+		.compatible = "ti,keystone-pcie",
+	},
+	{
+		.data = &ks_pcie_am654_rc_of_data,
+		.compatible = "ti,am654-pcie-rc",
+	},
+	{
+		.data = &ks_pcie_am654_ep_of_data,
+		.compatible = "ti,am654-pcie-ep",
+	},
+	{ },
+};
+
+static int ks_pcie_probe(struct platform_device *pdev)
+{
+	const struct dw_pcie_host_ops *host_ops;
+	const struct dw_pcie_ep_ops *ep_ops;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	const struct ks_pcie_of_data *data;
+	enum dw_pcie_device_mode mode;
+	struct dw_pcie *pci;
+	struct keystone_pcie *ks_pcie;
+	struct device_link **link;
+	struct gpio_desc *gpiod;
+	struct resource *res;
+	void __iomem *base;
+	u32 num_viewport;
+	struct phy **phy;
+	u32 num_lanes;
+	char name[10];
+	u32 version;
+	int ret;
+	int irq;
+	int i;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	version = data->version;
+	host_ops = data->host_ops;
+	ep_ops = data->ep_ops;
+	mode = data->mode;
+
+	ks_pcie = devm_kzalloc(dev, sizeof(*ks_pcie), GFP_KERNEL);
+	if (!ks_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "app");
+	ks_pcie->va_app_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(ks_pcie->va_app_base))
+		return PTR_ERR(ks_pcie->va_app_base);
+
+	ks_pcie->app = *res;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbics");
+	base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	if (of_device_is_compatible(np, "ti,am654-pcie-rc"))
+		ks_pcie->is_am6 = true;
+
+	pci->dbi_base = base;
+	pci->dbi_base2 = base;
+	pci->dev = dev;
+	pci->ops = &ks_pcie_dw_pcie_ops;
+	pci->version = version;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = request_irq(irq, ks_pcie_err_irq_handler, IRQF_SHARED,
+			  "ks-pcie-error-irq", ks_pcie);
+	if (ret < 0) {
+		dev_err(dev, "failed to request error IRQ %d\n",
+			irq);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "num-lanes", &num_lanes);
+	if (ret)
+		num_lanes = 1;
+
+	phy = devm_kzalloc(dev, sizeof(*phy) * num_lanes, GFP_KERNEL);
+	if (!phy)
+		return -ENOMEM;
+
+	link = devm_kzalloc(dev, sizeof(*link) * num_lanes, GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	for (i = 0; i < num_lanes; i++) {
+		snprintf(name, sizeof(name), "pcie-phy%d", i);
+		phy[i] = devm_phy_optional_get(dev, name);
+		if (IS_ERR(phy[i])) {
+			ret = PTR_ERR(phy[i]);
+			goto err_link;
+		}
+
+		if (!phy[i])
+			continue;
+
+		link[i] = device_link_add(dev, &phy[i]->dev, DL_FLAG_STATELESS);
+		if (!link[i]) {
+			ret = -EINVAL;
+			goto err_link;
+		}
+	}
+
+	ks_pcie->np = np;
+	ks_pcie->pci = pci;
+	ks_pcie->link = link;
+	ks_pcie->num_lanes = num_lanes;
+	ks_pcie->phy = phy;
+
+	gpiod = devm_gpiod_get_optional(dev, "reset",
+					GPIOD_OUT_LOW);
+	if (IS_ERR(gpiod)) {
+		ret = PTR_ERR(gpiod);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get reset GPIO\n");
+		goto err_link;
+	}
+
+	/* Obtain references to the PHYs */
+	for (i = 0; i < num_lanes; i++)
+		phy_pm_runtime_get_sync(ks_pcie->phy[i]);
+
+	ret = ks_pcie_enable_phy(ks_pcie);
+
+	/* Release references to the PHYs */
+	for (i = 0; i < num_lanes; i++)
+		phy_pm_runtime_put_sync(ks_pcie->phy[i]);
+
+	if (ret) {
+		dev_err(dev, "failed to enable phy\n");
+		goto err_link;
+	}
+
+	platform_set_drvdata(pdev, ks_pcie);
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync failed\n");
+		goto err_get_sync;
+	}
+
+	if (dw_pcie_ver_is_ge(pci, 480A))
+		ret = ks_pcie_am654_set_mode(dev, mode);
+	else
+		ret = ks_pcie_set_mode(dev);
+	if (ret < 0)
+		goto err_get_sync;
+
+	switch (mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCI_KEYSTONE_HOST)) {
+			ret = -ENODEV;
+			goto err_get_sync;
+		}
+
+		ret = of_property_read_u32(np, "num-viewport", &num_viewport);
+		if (ret < 0) {
+			dev_err(dev, "unable to read *num-viewport* property\n");
+			goto err_get_sync;
+		}
+
+		/*
+		 * "Power Sequencing and Reset Signal Timings" table in
+		 * PCI EXPRESS CARD ELECTROMECHANICAL SPECIFICATION, REV. 2.0
+		 * indicates PERST# should be deasserted after minimum of 100us
+		 * once REFCLK is stable. The REFCLK to the connector in RC
+		 * mode is selected while enabling the PHY. So deassert PERST#
+		 * after 100 us.
+		 */
+		if (gpiod) {
+			usleep_range(100, 200);
+			gpiod_set_value_cansleep(gpiod, 1);
+		}
+
+		ks_pcie->num_viewport = num_viewport;
+		pci->pp.ops = host_ops;
+		ret = dw_pcie_host_init(&pci->pp);
+		if (ret < 0)
+			goto err_get_sync;
+		break;
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCI_KEYSTONE_EP)) {
+			ret = -ENODEV;
+			goto err_get_sync;
+		}
+
+		pci->ep.ops = ep_ops;
+		ret = dw_pcie_ep_init(&pci->ep);
+		if (ret < 0)
+			goto err_get_sync;
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", mode);
+	}
+
+	ks_pcie_enable_error_irq(ks_pcie);
+
+	return 0;
+
+err_get_sync:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+	ks_pcie_disable_phy(ks_pcie);
+
+err_link:
+	while (--i >= 0 && link[i])
+		device_link_del(link[i]);
+
+	return ret;
+}
+
+static int ks_pcie_remove(struct platform_device *pdev)
+{
+	struct keystone_pcie *ks_pcie = platform_get_drvdata(pdev);
+	struct device_link **link = ks_pcie->link;
+	int num_lanes = ks_pcie->num_lanes;
+	struct device *dev = &pdev->dev;
+
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+	ks_pcie_disable_phy(ks_pcie);
+	while (num_lanes--)
+		device_link_del(link[num_lanes]);
+
+	return 0;
+}
+
+static struct platform_driver ks_pcie_driver = {
+	.probe  = ks_pcie_probe,
+	.remove = ks_pcie_remove,
+	.driver = {
+		.name	= "keystone-pcie",
+		.of_match_table = ks_pcie_of_match,
+	},
+};
+builtin_platform_driver(ks_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
new file mode 100644
index 000000000..b1faf41a2
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe controller EP driver for Freescale Layerscape SoCs
+ *
+ * Copyright (C) 2018 NXP Semiconductor.
+ *
+ * Author: Xiaowei Bao <xiaowei.bao@nxp.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+
+#include "pcie-designware.h"
+
+#define PEX_PF0_CONFIG			0xC0014
+#define PEX_PF0_CFG_READY		BIT(0)
+
+/* PEX PFa PCIE PME and message interrupt registers*/
+#define PEX_PF0_PME_MES_DR		0xC0020
+#define PEX_PF0_PME_MES_DR_LUD		BIT(7)
+#define PEX_PF0_PME_MES_DR_LDD		BIT(9)
+#define PEX_PF0_PME_MES_DR_HRD		BIT(10)
+
+#define PEX_PF0_PME_MES_IER		0xC0028
+#define PEX_PF0_PME_MES_IER_LUDIE	BIT(7)
+#define PEX_PF0_PME_MES_IER_LDDIE	BIT(9)
+#define PEX_PF0_PME_MES_IER_HRDIE	BIT(10)
+
+#define to_ls_pcie_ep(x)	dev_get_drvdata((x)->dev)
+
+struct ls_pcie_ep_drvdata {
+	u32				func_offset;
+	const struct dw_pcie_ep_ops	*ops;
+	const struct dw_pcie_ops	*dw_pcie_ops;
+};
+
+struct ls_pcie_ep {
+	struct dw_pcie			*pci;
+	struct pci_epc_features		*ls_epc;
+	const struct ls_pcie_ep_drvdata *drvdata;
+	int				irq;
+	u32				lnkcap;
+	bool				big_endian;
+};
+
+static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset)
+{
+	struct dw_pcie *pci = pcie->pci;
+
+	if (pcie->big_endian)
+		return ioread32be(pci->dbi_base + offset);
+	else
+		return ioread32(pci->dbi_base + offset);
+}
+
+static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value)
+{
+	struct dw_pcie *pci = pcie->pci;
+
+	if (pcie->big_endian)
+		iowrite32be(value, pci->dbi_base + offset);
+	else
+		iowrite32(value, pci->dbi_base + offset);
+}
+
+static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id)
+{
+	struct ls_pcie_ep *pcie = dev_id;
+	struct dw_pcie *pci = pcie->pci;
+	u32 val, cfg;
+	u8 offset;
+
+	val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR);
+	ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val);
+
+	if (!val)
+		return IRQ_NONE;
+
+	if (val & PEX_PF0_PME_MES_DR_LUD) {
+
+		offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+		/*
+		 * The values of the Maximum Link Width and Supported Link
+		 * Speed from the Link Capabilities Register will be lost
+		 * during link down or hot reset. Restore initial value
+		 * that configured by the Reset Configuration Word (RCW).
+		 */
+		dw_pcie_dbi_ro_wr_en(pci);
+		dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap);
+		dw_pcie_dbi_ro_wr_dis(pci);
+
+		cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG);
+		cfg |= PEX_PF0_CFG_READY;
+		ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg);
+		dw_pcie_ep_linkup(&pci->ep);
+
+		dev_dbg(pci->dev, "Link up\n");
+	} else if (val & PEX_PF0_PME_MES_DR_LDD) {
+		dev_dbg(pci->dev, "Link down\n");
+		pci_epc_linkdown(pci->ep.epc);
+	} else if (val & PEX_PF0_PME_MES_DR_HRD) {
+		dev_dbg(pci->dev, "Hot reset\n");
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie,
+				     struct platform_device *pdev)
+{
+	u32 val;
+	int ret;
+
+	pcie->irq = platform_get_irq_byname(pdev, "pme");
+	if (pcie->irq < 0)
+		return pcie->irq;
+
+	ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler,
+			       IRQF_SHARED, pdev->name, pcie);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't register PCIe IRQ\n");
+		return ret;
+	}
+
+	/* Enable interrupts */
+	val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER);
+	val |=  PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE |
+		PEX_PF0_PME_MES_IER_LUDIE;
+	ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val);
+
+	return 0;
+}
+
+static const struct pci_epc_features*
+ls_pcie_ep_get_features(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct ls_pcie_ep *pcie = to_ls_pcie_ep(pci);
+
+	return pcie->ls_epc;
+}
+
+static void ls_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct ls_pcie_ep *pcie = to_ls_pcie_ep(pci);
+	struct dw_pcie_ep_func *ep_func;
+	enum pci_barno bar;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, 0);
+	if (!ep_func)
+		return;
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+
+	pcie->ls_epc->msi_capable = ep_func->msi_cap ? true : false;
+	pcie->ls_epc->msix_capable = ep_func->msix_cap ? true : false;
+}
+
+static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return dw_pcie_ep_raise_legacy_irq(ep, func_no);
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	case PCI_EPC_IRQ_MSIX:
+		return dw_pcie_ep_raise_msix_irq_doorbell(ep, func_no,
+							  interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+		return -EINVAL;
+	}
+}
+
+static unsigned int ls_pcie_ep_func_conf_select(struct dw_pcie_ep *ep,
+						u8 func_no)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct ls_pcie_ep *pcie = to_ls_pcie_ep(pci);
+
+	WARN_ON(func_no && !pcie->drvdata->func_offset);
+	return pcie->drvdata->func_offset * func_no;
+}
+
+static const struct dw_pcie_ep_ops ls_pcie_ep_ops = {
+	.ep_init = ls_pcie_ep_init,
+	.raise_irq = ls_pcie_ep_raise_irq,
+	.get_features = ls_pcie_ep_get_features,
+	.func_conf_select = ls_pcie_ep_func_conf_select,
+};
+
+static const struct ls_pcie_ep_drvdata ls1_ep_drvdata = {
+	.ops = &ls_pcie_ep_ops,
+};
+
+static const struct ls_pcie_ep_drvdata ls2_ep_drvdata = {
+	.func_offset = 0x20000,
+	.ops = &ls_pcie_ep_ops,
+};
+
+static const struct ls_pcie_ep_drvdata lx2_ep_drvdata = {
+	.func_offset = 0x8000,
+	.ops = &ls_pcie_ep_ops,
+};
+
+static const struct of_device_id ls_pcie_ep_of_match[] = {
+	{ .compatible = "fsl,ls1028a-pcie-ep", .data = &ls1_ep_drvdata },
+	{ .compatible = "fsl,ls1046a-pcie-ep", .data = &ls1_ep_drvdata },
+	{ .compatible = "fsl,ls1088a-pcie-ep", .data = &ls2_ep_drvdata },
+	{ .compatible = "fsl,ls2088a-pcie-ep", .data = &ls2_ep_drvdata },
+	{ .compatible = "fsl,lx2160ar2-pcie-ep", .data = &lx2_ep_drvdata },
+	{ },
+};
+
+static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct ls_pcie_ep *pcie;
+	struct pci_epc_features *ls_epc;
+	struct resource *dbi_base;
+	u8 offset;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	ls_epc = devm_kzalloc(dev, sizeof(*ls_epc), GFP_KERNEL);
+	if (!ls_epc)
+		return -ENOMEM;
+
+	pcie->drvdata = of_device_get_match_data(dev);
+
+	pci->dev = dev;
+	pci->ops = pcie->drvdata->dw_pcie_ops;
+
+	ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
+	ls_epc->linkup_notifier = true;
+
+	pcie->pci = pci;
+	pcie->ls_epc = ls_epc;
+
+	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, dbi_base);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	pci->ep.ops = &ls_pcie_ep_ops;
+
+	pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
+
+	platform_set_drvdata(pdev, pcie);
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+
+	ret = dw_pcie_ep_init(&pci->ep);
+	if (ret)
+		return ret;
+
+	return ls_pcie_ep_interrupt_init(pcie, pdev);
+}
+
+static struct platform_driver ls_pcie_ep_driver = {
+	.driver = {
+		.name = "layerscape-pcie-ep",
+		.of_match_table = ls_pcie_ep_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver_probe(ls_pcie_ep_driver, ls_pcie_ep_probe);
diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c
new file mode 100644
index 000000000..b931d5976
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Freescale Layerscape SoCs
+ *
+ * Copyright (C) 2014 Freescale Semiconductor.
+ * Copyright 2021 NXP
+ *
+ * Author: Minghuan Lian <Minghuan.Lian@freescale.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+/* PEX Internal Configuration Registers */
+#define PCIE_STRFMR1		0x71c /* Symbol Timer & Filter Mask Register1 */
+#define PCIE_ABSERR		0x8d0 /* Bridge Slave Error Response Register */
+#define PCIE_ABSERR_SETTING	0x9401 /* Forward error of non-posted request */
+
+/* PF Message Command Register */
+#define LS_PCIE_PF_MCR		0x2c
+#define PF_MCR_PTOMR		BIT(0)
+#define PF_MCR_EXL2S		BIT(1)
+
+#define PCIE_IATU_NUM		6
+
+struct ls_pcie_drvdata {
+	const u32 pf_off;
+	bool pm_support;
+};
+
+struct ls_pcie {
+	struct dw_pcie *pci;
+	const struct ls_pcie_drvdata *drvdata;
+	void __iomem *pf_base;
+	bool big_endian;
+};
+
+#define ls_pcie_pf_readl_addr(addr)	ls_pcie_pf_readl(pcie, addr)
+#define to_ls_pcie(x)	dev_get_drvdata((x)->dev)
+
+static bool ls_pcie_is_bridge(struct ls_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	u32 header_type;
+
+	header_type = ioread8(pci->dbi_base + PCI_HEADER_TYPE);
+	header_type &= 0x7f;
+
+	return header_type == PCI_HEADER_TYPE_BRIDGE;
+}
+
+/* Clear multi-function bit */
+static void ls_pcie_clear_multifunction(struct ls_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+
+	iowrite8(PCI_HEADER_TYPE_BRIDGE, pci->dbi_base + PCI_HEADER_TYPE);
+}
+
+/* Drop MSG TLP except for Vendor MSG */
+static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
+{
+	u32 val;
+	struct dw_pcie *pci = pcie->pci;
+
+	val = ioread32(pci->dbi_base + PCIE_STRFMR1);
+	val &= 0xDFFFFFFF;
+	iowrite32(val, pci->dbi_base + PCIE_STRFMR1);
+}
+
+/* Forward error response of outbound non-posted requests */
+static void ls_pcie_fix_error_response(struct ls_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+
+	iowrite32(PCIE_ABSERR_SETTING, pci->dbi_base + PCIE_ABSERR);
+}
+
+static u32 ls_pcie_pf_readl(struct ls_pcie *pcie, u32 off)
+{
+	if (pcie->big_endian)
+		return ioread32be(pcie->pf_base + off);
+
+	return ioread32(pcie->pf_base + off);
+}
+
+static void ls_pcie_pf_writel(struct ls_pcie *pcie, u32 off, u32 val)
+{
+	if (pcie->big_endian)
+		iowrite32be(val, pcie->pf_base + off);
+	else
+		iowrite32(val, pcie->pf_base + off);
+}
+
+static void ls_pcie_send_turnoff_msg(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct ls_pcie *pcie = to_ls_pcie(pci);
+	u32 val;
+	int ret;
+
+	val = ls_pcie_pf_readl(pcie, LS_PCIE_PF_MCR);
+	val |= PF_MCR_PTOMR;
+	ls_pcie_pf_writel(pcie, LS_PCIE_PF_MCR, val);
+
+	ret = readx_poll_timeout(ls_pcie_pf_readl_addr, LS_PCIE_PF_MCR,
+				 val, !(val & PF_MCR_PTOMR),
+				 PCIE_PME_TO_L2_TIMEOUT_US/10,
+				 PCIE_PME_TO_L2_TIMEOUT_US);
+	if (ret)
+		dev_err(pcie->pci->dev, "PME_Turn_off timeout\n");
+}
+
+static void ls_pcie_exit_from_l2(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct ls_pcie *pcie = to_ls_pcie(pci);
+	u32 val;
+	int ret;
+
+	/*
+	 * Set PF_MCR_EXL2S bit in LS_PCIE_PF_MCR register for the link
+	 * to exit L2 state.
+	 */
+	val = ls_pcie_pf_readl(pcie, LS_PCIE_PF_MCR);
+	val |= PF_MCR_EXL2S;
+	ls_pcie_pf_writel(pcie, LS_PCIE_PF_MCR, val);
+
+	/*
+	 * L2 exit timeout of 10ms is not defined in the specifications,
+	 * it was chosen based on empirical observations.
+	 */
+	ret = readx_poll_timeout(ls_pcie_pf_readl_addr, LS_PCIE_PF_MCR,
+				 val, !(val & PF_MCR_EXL2S),
+				 1000,
+				 10000);
+	if (ret)
+		dev_err(pcie->pci->dev, "L2 exit timeout\n");
+}
+
+static int ls_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct ls_pcie *pcie = to_ls_pcie(pci);
+
+	ls_pcie_fix_error_response(pcie);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+	ls_pcie_clear_multifunction(pcie);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	ls_pcie_drop_msg_tlp(pcie);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops ls_pcie_host_ops = {
+	.host_init = ls_pcie_host_init,
+	.pme_turn_off = ls_pcie_send_turnoff_msg,
+};
+
+static const struct ls_pcie_drvdata ls1021a_drvdata = {
+	.pm_support = false,
+};
+
+static const struct ls_pcie_drvdata layerscape_drvdata = {
+	.pf_off = 0xc0000,
+	.pm_support = true,
+};
+
+static const struct of_device_id ls_pcie_of_match[] = {
+	{ .compatible = "fsl,ls1012a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls1021a-pcie", .data = &ls1021a_drvdata },
+	{ .compatible = "fsl,ls1028a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls1043a-pcie", .data = &ls1021a_drvdata },
+	{ .compatible = "fsl,ls1046a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls2080a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls2085a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls2088a-pcie", .data = &layerscape_drvdata },
+	{ .compatible = "fsl,ls1088a-pcie", .data = &layerscape_drvdata },
+	{ },
+};
+
+static int ls_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct ls_pcie *pcie;
+	struct resource *dbi_base;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pcie->drvdata = of_device_get_match_data(dev);
+
+	pci->dev = dev;
+	pci->pp.ops = &ls_pcie_host_ops;
+
+	pcie->pci = pci;
+
+	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, dbi_base);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
+
+	pcie->pf_base = pci->dbi_base + pcie->drvdata->pf_off;
+
+	if (!ls_pcie_is_bridge(pcie))
+		return -ENODEV;
+
+	platform_set_drvdata(pdev, pcie);
+
+	return dw_pcie_host_init(&pci->pp);
+}
+
+static int ls_pcie_suspend_noirq(struct device *dev)
+{
+	struct ls_pcie *pcie = dev_get_drvdata(dev);
+
+	if (!pcie->drvdata->pm_support)
+		return 0;
+
+	return dw_pcie_suspend_noirq(pcie->pci);
+}
+
+static int ls_pcie_resume_noirq(struct device *dev)
+{
+	struct ls_pcie *pcie = dev_get_drvdata(dev);
+
+	if (!pcie->drvdata->pm_support)
+		return 0;
+
+	ls_pcie_exit_from_l2(&pcie->pci->pp);
+
+	return dw_pcie_resume_noirq(pcie->pci);
+}
+
+static const struct dev_pm_ops ls_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(ls_pcie_suspend_noirq, ls_pcie_resume_noirq)
+};
+
+static struct platform_driver ls_pcie_driver = {
+	.probe = ls_pcie_probe,
+	.driver = {
+		.name = "layerscape-pcie",
+		.of_match_table = ls_pcie_of_match,
+		.suppress_bind_attrs = true,
+		.pm = &ls_pcie_pm_ops,
+	},
+};
+builtin_platform_driver(ls_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pci-meson.c b/drivers/pci/controller/dwc/pci-meson.c
new file mode 100644
index 000000000..407558f5d
--- /dev/null
+++ b/drivers/pci/controller/dwc/pci-meson.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Amlogic MESON SoCs
+ *
+ * Copyright (c) 2018 Amlogic, inc.
+ * Author: Yue Wang <yue.wang@amlogic.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/of_gpio.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include <linux/phy/phy.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+
+#include "pcie-designware.h"
+
+#define to_meson_pcie(x) dev_get_drvdata((x)->dev)
+
+#define PCIE_CAP_MAX_PAYLOAD_SIZE(x)	((x) << 5)
+#define PCIE_CAP_MAX_READ_REQ_SIZE(x)	((x) << 12)
+
+/* PCIe specific config registers */
+#define PCIE_CFG0			0x0
+#define APP_LTSSM_ENABLE		BIT(7)
+
+#define PCIE_CFG_STATUS12		0x30
+#define IS_SMLH_LINK_UP(x)		((x) & (1 << 6))
+#define IS_RDLH_LINK_UP(x)		((x) & (1 << 16))
+#define IS_LTSSM_UP(x)			((((x) >> 10) & 0x1f) == 0x11)
+
+#define PCIE_CFG_STATUS17		0x44
+#define PM_CURRENT_STATE(x)		(((x) >> 7) & 0x1)
+
+#define WAIT_LINKUP_TIMEOUT		4000
+#define PORT_CLK_RATE			100000000UL
+#define MAX_PAYLOAD_SIZE		256
+#define MAX_READ_REQ_SIZE		256
+#define PCIE_RESET_DELAY		500
+#define PCIE_SHARED_RESET		1
+#define PCIE_NORMAL_RESET		0
+
+enum pcie_data_rate {
+	PCIE_GEN1,
+	PCIE_GEN2,
+	PCIE_GEN3,
+	PCIE_GEN4
+};
+
+struct meson_pcie_clk_res {
+	struct clk *clk;
+	struct clk *port_clk;
+	struct clk *general_clk;
+};
+
+struct meson_pcie_rc_reset {
+	struct reset_control *port;
+	struct reset_control *apb;
+};
+
+struct meson_pcie {
+	struct dw_pcie pci;
+	void __iomem *cfg_base;
+	struct meson_pcie_clk_res clk_res;
+	struct meson_pcie_rc_reset mrst;
+	struct gpio_desc *reset_gpio;
+	struct phy *phy;
+};
+
+static struct reset_control *meson_pcie_get_reset(struct meson_pcie *mp,
+						  const char *id,
+						  u32 reset_type)
+{
+	struct device *dev = mp->pci.dev;
+	struct reset_control *reset;
+
+	if (reset_type == PCIE_SHARED_RESET)
+		reset = devm_reset_control_get_shared(dev, id);
+	else
+		reset = devm_reset_control_get(dev, id);
+
+	return reset;
+}
+
+static int meson_pcie_get_resets(struct meson_pcie *mp)
+{
+	struct meson_pcie_rc_reset *mrst = &mp->mrst;
+
+	mrst->port = meson_pcie_get_reset(mp, "port", PCIE_NORMAL_RESET);
+	if (IS_ERR(mrst->port))
+		return PTR_ERR(mrst->port);
+	reset_control_deassert(mrst->port);
+
+	mrst->apb = meson_pcie_get_reset(mp, "apb", PCIE_SHARED_RESET);
+	if (IS_ERR(mrst->apb))
+		return PTR_ERR(mrst->apb);
+	reset_control_deassert(mrst->apb);
+
+	return 0;
+}
+
+static int meson_pcie_get_mems(struct platform_device *pdev,
+			       struct meson_pcie *mp)
+{
+	struct dw_pcie *pci = &mp->pci;
+
+	pci->dbi_base = devm_platform_ioremap_resource_byname(pdev, "elbi");
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	mp->cfg_base = devm_platform_ioremap_resource_byname(pdev, "cfg");
+	if (IS_ERR(mp->cfg_base))
+		return PTR_ERR(mp->cfg_base);
+
+	return 0;
+}
+
+static int meson_pcie_power_on(struct meson_pcie *mp)
+{
+	int ret = 0;
+
+	ret = phy_init(mp->phy);
+	if (ret)
+		return ret;
+
+	ret = phy_power_on(mp->phy);
+	if (ret) {
+		phy_exit(mp->phy);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void meson_pcie_power_off(struct meson_pcie *mp)
+{
+	phy_power_off(mp->phy);
+	phy_exit(mp->phy);
+}
+
+static int meson_pcie_reset(struct meson_pcie *mp)
+{
+	struct meson_pcie_rc_reset *mrst = &mp->mrst;
+	int ret = 0;
+
+	ret = phy_reset(mp->phy);
+	if (ret)
+		return ret;
+
+	reset_control_assert(mrst->port);
+	reset_control_assert(mrst->apb);
+	udelay(PCIE_RESET_DELAY);
+	reset_control_deassert(mrst->port);
+	reset_control_deassert(mrst->apb);
+	udelay(PCIE_RESET_DELAY);
+
+	return 0;
+}
+
+static inline void meson_pcie_disable_clock(void *data)
+{
+	struct clk *clk = data;
+
+	clk_disable_unprepare(clk);
+}
+
+static inline struct clk *meson_pcie_probe_clock(struct device *dev,
+						 const char *id, u64 rate)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = devm_clk_get(dev, id);
+	if (IS_ERR(clk))
+		return clk;
+
+	if (rate) {
+		ret = clk_set_rate(clk, rate);
+		if (ret) {
+			dev_err(dev, "set clk rate failed, ret = %d\n", ret);
+			return ERR_PTR(ret);
+		}
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(dev, "couldn't enable clk\n");
+		return ERR_PTR(ret);
+	}
+
+	devm_add_action_or_reset(dev, meson_pcie_disable_clock, clk);
+
+	return clk;
+}
+
+static int meson_pcie_probe_clocks(struct meson_pcie *mp)
+{
+	struct device *dev = mp->pci.dev;
+	struct meson_pcie_clk_res *res = &mp->clk_res;
+
+	res->port_clk = meson_pcie_probe_clock(dev, "port", PORT_CLK_RATE);
+	if (IS_ERR(res->port_clk))
+		return PTR_ERR(res->port_clk);
+
+	res->general_clk = meson_pcie_probe_clock(dev, "general", 0);
+	if (IS_ERR(res->general_clk))
+		return PTR_ERR(res->general_clk);
+
+	res->clk = meson_pcie_probe_clock(dev, "pclk", 0);
+	if (IS_ERR(res->clk))
+		return PTR_ERR(res->clk);
+
+	return 0;
+}
+
+static inline u32 meson_cfg_readl(struct meson_pcie *mp, u32 reg)
+{
+	return readl(mp->cfg_base + reg);
+}
+
+static inline void meson_cfg_writel(struct meson_pcie *mp, u32 val, u32 reg)
+{
+	writel(val, mp->cfg_base + reg);
+}
+
+static void meson_pcie_assert_reset(struct meson_pcie *mp)
+{
+	gpiod_set_value_cansleep(mp->reset_gpio, 1);
+	udelay(500);
+	gpiod_set_value_cansleep(mp->reset_gpio, 0);
+}
+
+static void meson_pcie_ltssm_enable(struct meson_pcie *mp)
+{
+	u32 val;
+
+	val = meson_cfg_readl(mp, PCIE_CFG0);
+	val |= APP_LTSSM_ENABLE;
+	meson_cfg_writel(mp, val, PCIE_CFG0);
+}
+
+static int meson_size_to_payload(struct meson_pcie *mp, int size)
+{
+	struct device *dev = mp->pci.dev;
+
+	/*
+	 * dwc supports 2^(val+7) payload size, which val is 0~5 default to 1.
+	 * So if input size is not 2^order alignment or less than 2^7 or bigger
+	 * than 2^12, just set to default size 2^(1+7).
+	 */
+	if (!is_power_of_2(size) || size < 128 || size > 4096) {
+		dev_warn(dev, "payload size %d, set to default 256\n", size);
+		return 1;
+	}
+
+	return fls(size) - 8;
+}
+
+static void meson_set_max_payload(struct meson_pcie *mp, int size)
+{
+	struct dw_pcie *pci = &mp->pci;
+	u32 val;
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	int max_payload_size = meson_size_to_payload(mp, size);
+
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
+	val &= ~PCI_EXP_DEVCTL_PAYLOAD;
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, val);
+
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
+	val |= PCIE_CAP_MAX_PAYLOAD_SIZE(max_payload_size);
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, val);
+}
+
+static void meson_set_max_rd_req_size(struct meson_pcie *mp, int size)
+{
+	struct dw_pcie *pci = &mp->pci;
+	u32 val;
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	int max_rd_req_size = meson_size_to_payload(mp, size);
+
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
+	val &= ~PCI_EXP_DEVCTL_READRQ;
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, val);
+
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
+	val |= PCIE_CAP_MAX_READ_REQ_SIZE(max_rd_req_size);
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, val);
+}
+
+static int meson_pcie_start_link(struct dw_pcie *pci)
+{
+	struct meson_pcie *mp = to_meson_pcie(pci);
+
+	meson_pcie_ltssm_enable(mp);
+	meson_pcie_assert_reset(mp);
+
+	return 0;
+}
+
+static int meson_pcie_rd_own_conf(struct pci_bus *bus, u32 devfn,
+				  int where, int size, u32 *val)
+{
+	int ret;
+
+	ret = pci_generic_config_read(bus, devfn, where, size, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	/*
+	 * There is a bug in the MESON AXG PCIe controller whereby software
+	 * cannot program the PCI_CLASS_DEVICE register, so we must fabricate
+	 * the return value in the config accessors.
+	 */
+	if ((where & ~3) == PCI_CLASS_REVISION) {
+		if (size <= 2)
+			*val = (*val & ((1 << (size * 8)) - 1)) << (8 * (where & 3));
+		*val &= ~0xffffff00;
+		*val |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+		if (size <= 2)
+			*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops meson_pci_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = meson_pcie_rd_own_conf,
+	.write = pci_generic_config_write,
+};
+
+static int meson_pcie_link_up(struct dw_pcie *pci)
+{
+	struct meson_pcie *mp = to_meson_pcie(pci);
+	struct device *dev = pci->dev;
+	u32 speed_okay = 0;
+	u32 cnt = 0;
+	u32 state12, state17, smlh_up, ltssm_up, rdlh_up;
+
+	do {
+		state12 = meson_cfg_readl(mp, PCIE_CFG_STATUS12);
+		state17 = meson_cfg_readl(mp, PCIE_CFG_STATUS17);
+		smlh_up = IS_SMLH_LINK_UP(state12);
+		rdlh_up = IS_RDLH_LINK_UP(state12);
+		ltssm_up = IS_LTSSM_UP(state12);
+
+		if (PM_CURRENT_STATE(state17) < PCIE_GEN3)
+			speed_okay = 1;
+
+		if (smlh_up)
+			dev_dbg(dev, "smlh_link_up is on\n");
+		if (rdlh_up)
+			dev_dbg(dev, "rdlh_link_up is on\n");
+		if (ltssm_up)
+			dev_dbg(dev, "ltssm_up is on\n");
+		if (speed_okay)
+			dev_dbg(dev, "speed_okay\n");
+
+		if (smlh_up && rdlh_up && ltssm_up && speed_okay)
+			return 1;
+
+		cnt++;
+
+		udelay(10);
+	} while (cnt < WAIT_LINKUP_TIMEOUT);
+
+	dev_err(dev, "error: wait linkup timeout\n");
+	return 0;
+}
+
+static int meson_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct meson_pcie *mp = to_meson_pcie(pci);
+
+	pp->bridge->ops = &meson_pci_ops;
+
+	meson_set_max_payload(mp, MAX_PAYLOAD_SIZE);
+	meson_set_max_rd_req_size(mp, MAX_READ_REQ_SIZE);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops meson_pcie_host_ops = {
+	.host_init = meson_pcie_host_init,
+};
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = meson_pcie_link_up,
+	.start_link = meson_pcie_start_link,
+};
+
+static int meson_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct meson_pcie *mp;
+	int ret;
+
+	mp = devm_kzalloc(dev, sizeof(*mp), GFP_KERNEL);
+	if (!mp)
+		return -ENOMEM;
+
+	pci = &mp->pci;
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+	pci->pp.ops = &meson_pcie_host_ops;
+	pci->num_lanes = 1;
+
+	mp->phy = devm_phy_get(dev, "pcie");
+	if (IS_ERR(mp->phy)) {
+		dev_err(dev, "get phy failed, %ld\n", PTR_ERR(mp->phy));
+		return PTR_ERR(mp->phy);
+	}
+
+	mp->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(mp->reset_gpio)) {
+		dev_err(dev, "get reset gpio failed\n");
+		return PTR_ERR(mp->reset_gpio);
+	}
+
+	ret = meson_pcie_get_resets(mp);
+	if (ret) {
+		dev_err(dev, "get reset resource failed, %d\n", ret);
+		return ret;
+	}
+
+	ret = meson_pcie_get_mems(pdev, mp);
+	if (ret) {
+		dev_err(dev, "get memory resource failed, %d\n", ret);
+		return ret;
+	}
+
+	ret = meson_pcie_power_on(mp);
+	if (ret) {
+		dev_err(dev, "phy power on failed, %d\n", ret);
+		return ret;
+	}
+
+	ret = meson_pcie_reset(mp);
+	if (ret) {
+		dev_err(dev, "reset failed, %d\n", ret);
+		goto err_phy;
+	}
+
+	ret = meson_pcie_probe_clocks(mp);
+	if (ret) {
+		dev_err(dev, "init clock resources failed, %d\n", ret);
+		goto err_phy;
+	}
+
+	platform_set_drvdata(pdev, mp);
+
+	ret = dw_pcie_host_init(&pci->pp);
+	if (ret < 0) {
+		dev_err(dev, "Add PCIe port failed, %d\n", ret);
+		goto err_phy;
+	}
+
+	return 0;
+
+err_phy:
+	meson_pcie_power_off(mp);
+	return ret;
+}
+
+static const struct of_device_id meson_pcie_of_match[] = {
+	{
+		.compatible = "amlogic,axg-pcie",
+	},
+	{
+		.compatible = "amlogic,g12a-pcie",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, meson_pcie_of_match);
+
+static struct platform_driver meson_pcie_driver = {
+	.probe = meson_pcie_probe,
+	.driver = {
+		.name = "meson-pcie",
+		.of_match_table = meson_pcie_of_match,
+	},
+};
+
+module_platform_driver(meson_pcie_driver);
+
+MODULE_AUTHOR("Yue Wang <yue.wang@amlogic.com>");
+MODULE_DESCRIPTION("Amlogic PCIe Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/pcie-al.c b/drivers/pci/controller/dwc/pcie-al.c
new file mode 100644
index 000000000..b8cb77c9c
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-al.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Amazon's Annapurna Labs IP (used in chips
+ * such as Graviton and Alpine)
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Author: Jonathan Chocron <jonnyc@amazon.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/pci-acpi.h>
+#include "../../pci.h"
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+struct al_pcie_acpi  {
+	void __iomem *dbi_base;
+};
+
+static void __iomem *al_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+				     int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct al_pcie_acpi *pcie = cfg->priv;
+	void __iomem *dbi_base = pcie->dbi_base;
+
+	if (bus->number == cfg->busr.start) {
+		/*
+		 * The DW PCIe core doesn't filter out transactions to other
+		 * devices/functions on the root bus num, so we do this here.
+		 */
+		if (PCI_SLOT(devfn) > 0)
+			return NULL;
+		else
+			return dbi_base + where;
+	}
+
+	return pci_ecam_map_bus(bus, devfn, where);
+}
+
+static int al_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct al_pcie_acpi *al_pcie;
+	struct resource *res;
+	int ret;
+
+	al_pcie = devm_kzalloc(dev, sizeof(*al_pcie), GFP_KERNEL);
+	if (!al_pcie)
+		return -ENOMEM;
+
+	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	ret = acpi_get_rc_resources(dev, "AMZN0001", root->segment, res);
+	if (ret) {
+		dev_err(dev, "can't get rc dbi base address for SEG %d\n",
+			root->segment);
+		return ret;
+	}
+
+	dev_dbg(dev, "Root port dbi res: %pR\n", res);
+
+	al_pcie->dbi_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(al_pcie->dbi_base))
+		return PTR_ERR(al_pcie->dbi_base);
+
+	cfg->priv = al_pcie;
+
+	return 0;
+}
+
+const struct pci_ecam_ops al_pcie_ops = {
+	.init         =  al_pcie_init,
+	.pci_ops      = {
+		.map_bus    = al_pcie_map_bus,
+		.read       = pci_generic_config_read,
+		.write      = pci_generic_config_write,
+	}
+};
+
+#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
+
+#ifdef CONFIG_PCIE_AL
+
+#include <linux/of_pci.h>
+#include "pcie-designware.h"
+
+#define AL_PCIE_REV_ID_2	2
+#define AL_PCIE_REV_ID_3	3
+#define AL_PCIE_REV_ID_4	4
+
+#define AXI_BASE_OFFSET		0x0
+
+#define DEVICE_ID_OFFSET	0x16c
+
+#define DEVICE_REV_ID			0x0
+#define DEVICE_REV_ID_DEV_ID_MASK	GENMASK(31, 16)
+
+#define DEVICE_REV_ID_DEV_ID_X4		0
+#define DEVICE_REV_ID_DEV_ID_X8		2
+#define DEVICE_REV_ID_DEV_ID_X16	4
+
+#define OB_CTRL_REV1_2_OFFSET	0x0040
+#define OB_CTRL_REV3_5_OFFSET	0x0030
+
+#define CFG_TARGET_BUS			0x0
+#define CFG_TARGET_BUS_MASK_MASK	GENMASK(7, 0)
+#define CFG_TARGET_BUS_BUSNUM_MASK	GENMASK(15, 8)
+
+#define CFG_CONTROL			0x4
+#define CFG_CONTROL_SUBBUS_MASK		GENMASK(15, 8)
+#define CFG_CONTROL_SEC_BUS_MASK	GENMASK(23, 16)
+
+struct al_pcie_reg_offsets {
+	unsigned int ob_ctrl;
+};
+
+struct al_pcie_target_bus_cfg {
+	u8 reg_val;
+	u8 reg_mask;
+	u8 ecam_mask;
+};
+
+struct al_pcie {
+	struct dw_pcie *pci;
+	void __iomem *controller_base; /* base of PCIe unit (not DW core) */
+	struct device *dev;
+	resource_size_t ecam_size;
+	unsigned int controller_rev_id;
+	struct al_pcie_reg_offsets reg_offsets;
+	struct al_pcie_target_bus_cfg target_bus_cfg;
+};
+
+#define to_al_pcie(x)		dev_get_drvdata((x)->dev)
+
+static inline u32 al_pcie_controller_readl(struct al_pcie *pcie, u32 offset)
+{
+	return readl_relaxed(pcie->controller_base + offset);
+}
+
+static inline void al_pcie_controller_writel(struct al_pcie *pcie, u32 offset,
+					     u32 val)
+{
+	writel_relaxed(val, pcie->controller_base + offset);
+}
+
+static int al_pcie_rev_id_get(struct al_pcie *pcie, unsigned int *rev_id)
+{
+	u32 dev_rev_id_val;
+	u32 dev_id_val;
+
+	dev_rev_id_val = al_pcie_controller_readl(pcie, AXI_BASE_OFFSET +
+						  DEVICE_ID_OFFSET +
+						  DEVICE_REV_ID);
+	dev_id_val = FIELD_GET(DEVICE_REV_ID_DEV_ID_MASK, dev_rev_id_val);
+
+	switch (dev_id_val) {
+	case DEVICE_REV_ID_DEV_ID_X4:
+		*rev_id = AL_PCIE_REV_ID_2;
+		break;
+	case DEVICE_REV_ID_DEV_ID_X8:
+		*rev_id = AL_PCIE_REV_ID_3;
+		break;
+	case DEVICE_REV_ID_DEV_ID_X16:
+		*rev_id = AL_PCIE_REV_ID_4;
+		break;
+	default:
+		dev_err(pcie->dev, "Unsupported dev_id_val (0x%x)\n",
+			dev_id_val);
+		return -EINVAL;
+	}
+
+	dev_dbg(pcie->dev, "dev_id_val: 0x%x\n", dev_id_val);
+
+	return 0;
+}
+
+static int al_pcie_reg_offsets_set(struct al_pcie *pcie)
+{
+	switch (pcie->controller_rev_id) {
+	case AL_PCIE_REV_ID_2:
+		pcie->reg_offsets.ob_ctrl = OB_CTRL_REV1_2_OFFSET;
+		break;
+	case AL_PCIE_REV_ID_3:
+	case AL_PCIE_REV_ID_4:
+		pcie->reg_offsets.ob_ctrl = OB_CTRL_REV3_5_OFFSET;
+		break;
+	default:
+		dev_err(pcie->dev, "Unsupported controller rev_id: 0x%x\n",
+			pcie->controller_rev_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline void al_pcie_target_bus_set(struct al_pcie *pcie,
+					  u8 target_bus,
+					  u8 mask_target_bus)
+{
+	u32 reg;
+
+	reg = FIELD_PREP(CFG_TARGET_BUS_MASK_MASK, mask_target_bus) |
+	      FIELD_PREP(CFG_TARGET_BUS_BUSNUM_MASK, target_bus);
+
+	al_pcie_controller_writel(pcie, AXI_BASE_OFFSET +
+				  pcie->reg_offsets.ob_ctrl + CFG_TARGET_BUS,
+				  reg);
+}
+
+static void __iomem *al_pcie_conf_addr_map_bus(struct pci_bus *bus,
+					       unsigned int devfn, int where)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct al_pcie *pcie = to_al_pcie(to_dw_pcie_from_pp(pp));
+	unsigned int busnr = bus->number;
+	struct al_pcie_target_bus_cfg *target_bus_cfg = &pcie->target_bus_cfg;
+	unsigned int busnr_ecam = busnr & target_bus_cfg->ecam_mask;
+	unsigned int busnr_reg = busnr & target_bus_cfg->reg_mask;
+
+	if (busnr_reg != target_bus_cfg->reg_val) {
+		dev_dbg(pcie->pci->dev, "Changing target bus busnum val from 0x%x to 0x%x\n",
+			target_bus_cfg->reg_val, busnr_reg);
+		target_bus_cfg->reg_val = busnr_reg;
+		al_pcie_target_bus_set(pcie,
+				       target_bus_cfg->reg_val,
+				       target_bus_cfg->reg_mask);
+	}
+
+	return pp->va_cfg0_base + PCIE_ECAM_OFFSET(busnr_ecam, devfn, where);
+}
+
+static struct pci_ops al_child_pci_ops = {
+	.map_bus = al_pcie_conf_addr_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void al_pcie_config_prepare(struct al_pcie *pcie)
+{
+	struct al_pcie_target_bus_cfg *target_bus_cfg;
+	struct dw_pcie_rp *pp = &pcie->pci->pp;
+	unsigned int ecam_bus_mask;
+	u32 cfg_control_offset;
+	u8 subordinate_bus;
+	u8 secondary_bus;
+	u32 cfg_control;
+	u32 reg;
+	struct resource *bus = resource_list_first_type(&pp->bridge->windows, IORESOURCE_BUS)->res;
+
+	target_bus_cfg = &pcie->target_bus_cfg;
+
+	ecam_bus_mask = (pcie->ecam_size >> PCIE_ECAM_BUS_SHIFT) - 1;
+	if (ecam_bus_mask > 255) {
+		dev_warn(pcie->dev, "ECAM window size is larger than 256MB. Cutting off at 256\n");
+		ecam_bus_mask = 255;
+	}
+
+	/* This portion is taken from the transaction address */
+	target_bus_cfg->ecam_mask = ecam_bus_mask;
+	/* This portion is taken from the cfg_target_bus reg */
+	target_bus_cfg->reg_mask = ~target_bus_cfg->ecam_mask;
+	target_bus_cfg->reg_val = bus->start & target_bus_cfg->reg_mask;
+
+	al_pcie_target_bus_set(pcie, target_bus_cfg->reg_val,
+			       target_bus_cfg->reg_mask);
+
+	secondary_bus = bus->start + 1;
+	subordinate_bus = bus->end;
+
+	/* Set the valid values of secondary and subordinate buses */
+	cfg_control_offset = AXI_BASE_OFFSET + pcie->reg_offsets.ob_ctrl +
+			     CFG_CONTROL;
+
+	cfg_control = al_pcie_controller_readl(pcie, cfg_control_offset);
+
+	reg = cfg_control &
+	      ~(CFG_CONTROL_SEC_BUS_MASK | CFG_CONTROL_SUBBUS_MASK);
+
+	reg |= FIELD_PREP(CFG_CONTROL_SUBBUS_MASK, subordinate_bus) |
+	       FIELD_PREP(CFG_CONTROL_SEC_BUS_MASK, secondary_bus);
+
+	al_pcie_controller_writel(pcie, cfg_control_offset, reg);
+}
+
+static int al_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct al_pcie *pcie = to_al_pcie(pci);
+	int rc;
+
+	pp->bridge->child_ops = &al_child_pci_ops;
+
+	rc = al_pcie_rev_id_get(pcie, &pcie->controller_rev_id);
+	if (rc)
+		return rc;
+
+	rc = al_pcie_reg_offsets_set(pcie);
+	if (rc)
+		return rc;
+
+	al_pcie_config_prepare(pcie);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops al_pcie_host_ops = {
+	.host_init = al_pcie_host_init,
+};
+
+static int al_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *controller_res;
+	struct resource *ecam_res;
+	struct al_pcie *al_pcie;
+	struct dw_pcie *pci;
+
+	al_pcie = devm_kzalloc(dev, sizeof(*al_pcie), GFP_KERNEL);
+	if (!al_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->pp.ops = &al_pcie_host_ops;
+
+	al_pcie->pci = pci;
+	al_pcie->dev = dev;
+
+	ecam_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
+	if (!ecam_res) {
+		dev_err(dev, "couldn't find 'config' reg in DT\n");
+		return -ENOENT;
+	}
+	al_pcie->ecam_size = resource_size(ecam_res);
+
+	controller_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						      "controller");
+	al_pcie->controller_base = devm_ioremap_resource(dev, controller_res);
+	if (IS_ERR(al_pcie->controller_base)) {
+		dev_err(dev, "couldn't remap controller base %pR\n",
+			controller_res);
+		return PTR_ERR(al_pcie->controller_base);
+	}
+
+	dev_dbg(dev, "From DT: controller_base: %pR\n", controller_res);
+
+	platform_set_drvdata(pdev, al_pcie);
+
+	return dw_pcie_host_init(&pci->pp);
+}
+
+static const struct of_device_id al_pcie_of_match[] = {
+	{ .compatible = "amazon,al-alpine-v2-pcie",
+	},
+	{ .compatible = "amazon,al-alpine-v3-pcie",
+	},
+	{},
+};
+
+static struct platform_driver al_pcie_driver = {
+	.driver = {
+		.name	= "al-pcie",
+		.of_match_table = al_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = al_pcie_probe,
+};
+builtin_platform_driver(al_pcie_driver);
+
+#endif /* CONFIG_PCIE_AL*/
diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c
new file mode 100644
index 000000000..5c999e15c
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-armada8k.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Marvell Armada-8K SoCs
+ *
+ * Armada-8K PCIe Glue Layer Source Code
+ *
+ * Copyright (C) 2016 Marvell Technology Group Ltd.
+ *
+ * Author: Yehuda Yitshak <yehuday@marvell.com>
+ * Author: Shadi Ammouri <shadi@marvell.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/of_pci.h>
+
+#include "pcie-designware.h"
+
+#define ARMADA8K_PCIE_MAX_LANES PCIE_LNK_X4
+
+struct armada8k_pcie {
+	struct dw_pcie *pci;
+	struct clk *clk;
+	struct clk *clk_reg;
+	struct phy *phy[ARMADA8K_PCIE_MAX_LANES];
+	unsigned int phy_count;
+};
+
+#define PCIE_VENDOR_REGS_OFFSET		0x8000
+
+#define PCIE_GLOBAL_CONTROL_REG		(PCIE_VENDOR_REGS_OFFSET + 0x0)
+#define PCIE_APP_LTSSM_EN		BIT(2)
+#define PCIE_DEVICE_TYPE_SHIFT		4
+#define PCIE_DEVICE_TYPE_MASK		0xF
+#define PCIE_DEVICE_TYPE_RC		0x4 /* Root complex */
+
+#define PCIE_GLOBAL_STATUS_REG		(PCIE_VENDOR_REGS_OFFSET + 0x8)
+#define PCIE_GLB_STS_RDLH_LINK_UP	BIT(1)
+#define PCIE_GLB_STS_PHY_LINK_UP	BIT(9)
+
+#define PCIE_GLOBAL_INT_CAUSE1_REG	(PCIE_VENDOR_REGS_OFFSET + 0x1C)
+#define PCIE_GLOBAL_INT_MASK1_REG	(PCIE_VENDOR_REGS_OFFSET + 0x20)
+#define PCIE_INT_A_ASSERT_MASK		BIT(9)
+#define PCIE_INT_B_ASSERT_MASK		BIT(10)
+#define PCIE_INT_C_ASSERT_MASK		BIT(11)
+#define PCIE_INT_D_ASSERT_MASK		BIT(12)
+
+#define PCIE_ARCACHE_TRC_REG		(PCIE_VENDOR_REGS_OFFSET + 0x50)
+#define PCIE_AWCACHE_TRC_REG		(PCIE_VENDOR_REGS_OFFSET + 0x54)
+#define PCIE_ARUSER_REG			(PCIE_VENDOR_REGS_OFFSET + 0x5C)
+#define PCIE_AWUSER_REG			(PCIE_VENDOR_REGS_OFFSET + 0x60)
+/*
+ * AR/AW Cache defaults: Normal memory, Write-Back, Read / Write
+ * allocate
+ */
+#define ARCACHE_DEFAULT_VALUE		0x3511
+#define AWCACHE_DEFAULT_VALUE		0x5311
+
+#define DOMAIN_OUTER_SHAREABLE		0x2
+#define AX_USER_DOMAIN_MASK		0x3
+#define AX_USER_DOMAIN_SHIFT		4
+
+#define to_armada8k_pcie(x)	dev_get_drvdata((x)->dev)
+
+static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie)
+{
+	int i;
+
+	for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
+		phy_power_off(pcie->phy[i]);
+		phy_exit(pcie->phy[i]);
+	}
+}
+
+static int armada8k_pcie_enable_phys(struct armada8k_pcie *pcie)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
+		ret = phy_init(pcie->phy[i]);
+		if (ret)
+			return ret;
+
+		ret = phy_set_mode_ext(pcie->phy[i], PHY_MODE_PCIE,
+				       pcie->phy_count);
+		if (ret) {
+			phy_exit(pcie->phy[i]);
+			return ret;
+		}
+
+		ret = phy_power_on(pcie->phy[i]);
+		if (ret) {
+			phy_exit(pcie->phy[i]);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int armada8k_pcie_setup_phys(struct armada8k_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	struct device_node *node = dev->of_node;
+	int ret = 0;
+	int i;
+
+	for (i = 0; i < ARMADA8K_PCIE_MAX_LANES; i++) {
+		pcie->phy[i] = devm_of_phy_get_by_index(dev, node, i);
+		if (IS_ERR(pcie->phy[i])) {
+			if (PTR_ERR(pcie->phy[i]) != -ENODEV)
+				return PTR_ERR(pcie->phy[i]);
+
+			pcie->phy[i] = NULL;
+			continue;
+		}
+
+		pcie->phy_count++;
+	}
+
+	/* Old bindings miss the PHY handle, so just warn if there is no PHY */
+	if (!pcie->phy_count)
+		dev_warn(dev, "No available PHY\n");
+
+	ret = armada8k_pcie_enable_phys(pcie);
+	if (ret)
+		dev_err(dev, "Failed to initialize PHY(s) (%d)\n", ret);
+
+	return ret;
+}
+
+static int armada8k_pcie_link_up(struct dw_pcie *pci)
+{
+	u32 reg;
+	u32 mask = PCIE_GLB_STS_RDLH_LINK_UP | PCIE_GLB_STS_PHY_LINK_UP;
+
+	reg = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_STATUS_REG);
+
+	if ((reg & mask) == mask)
+		return 1;
+
+	dev_dbg(pci->dev, "No link detected (Global-Status: 0x%08x).\n", reg);
+	return 0;
+}
+
+static int armada8k_pcie_start_link(struct dw_pcie *pci)
+{
+	u32 reg;
+
+	/* Start LTSSM */
+	reg = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_CONTROL_REG);
+	reg |= PCIE_APP_LTSSM_EN;
+	dw_pcie_writel_dbi(pci, PCIE_GLOBAL_CONTROL_REG, reg);
+
+	return 0;
+}
+
+static int armada8k_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	u32 reg;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	if (!dw_pcie_link_up(pci)) {
+		/* Disable LTSSM state machine to enable configuration */
+		reg = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_CONTROL_REG);
+		reg &= ~(PCIE_APP_LTSSM_EN);
+		dw_pcie_writel_dbi(pci, PCIE_GLOBAL_CONTROL_REG, reg);
+	}
+
+	/* Set the device to root complex mode */
+	reg = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_CONTROL_REG);
+	reg &= ~(PCIE_DEVICE_TYPE_MASK << PCIE_DEVICE_TYPE_SHIFT);
+	reg |= PCIE_DEVICE_TYPE_RC << PCIE_DEVICE_TYPE_SHIFT;
+	dw_pcie_writel_dbi(pci, PCIE_GLOBAL_CONTROL_REG, reg);
+
+	/* Set the PCIe master AxCache attributes */
+	dw_pcie_writel_dbi(pci, PCIE_ARCACHE_TRC_REG, ARCACHE_DEFAULT_VALUE);
+	dw_pcie_writel_dbi(pci, PCIE_AWCACHE_TRC_REG, AWCACHE_DEFAULT_VALUE);
+
+	/* Set the PCIe master AxDomain attributes */
+	reg = dw_pcie_readl_dbi(pci, PCIE_ARUSER_REG);
+	reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT);
+	reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT;
+	dw_pcie_writel_dbi(pci, PCIE_ARUSER_REG, reg);
+
+	reg = dw_pcie_readl_dbi(pci, PCIE_AWUSER_REG);
+	reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT);
+	reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT;
+	dw_pcie_writel_dbi(pci, PCIE_AWUSER_REG, reg);
+
+	/* Enable INT A-D interrupts */
+	reg = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_INT_MASK1_REG);
+	reg |= PCIE_INT_A_ASSERT_MASK | PCIE_INT_B_ASSERT_MASK |
+	       PCIE_INT_C_ASSERT_MASK | PCIE_INT_D_ASSERT_MASK;
+	dw_pcie_writel_dbi(pci, PCIE_GLOBAL_INT_MASK1_REG, reg);
+
+	return 0;
+}
+
+static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg)
+{
+	struct armada8k_pcie *pcie = arg;
+	struct dw_pcie *pci = pcie->pci;
+	u32 val;
+
+	/*
+	 * Interrupts are directly handled by the device driver of the
+	 * PCI device. However, they are also latched into the PCIe
+	 * controller, so we simply discard them.
+	 */
+	val = dw_pcie_readl_dbi(pci, PCIE_GLOBAL_INT_CAUSE1_REG);
+	dw_pcie_writel_dbi(pci, PCIE_GLOBAL_INT_CAUSE1_REG, val);
+
+	return IRQ_HANDLED;
+}
+
+static const struct dw_pcie_host_ops armada8k_pcie_host_ops = {
+	.host_init = armada8k_pcie_host_init,
+};
+
+static int armada8k_add_pcie_port(struct armada8k_pcie *pcie,
+				  struct platform_device *pdev)
+{
+	struct dw_pcie *pci = pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->ops = &armada8k_pcie_host_ops;
+
+	pp->irq = platform_get_irq(pdev, 0);
+	if (pp->irq < 0)
+		return pp->irq;
+
+	ret = devm_request_irq(dev, pp->irq, armada8k_pcie_irq_handler,
+			       IRQF_SHARED, "armada8k-pcie", pcie);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", pp->irq);
+		return ret;
+	}
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = armada8k_pcie_link_up,
+	.start_link = armada8k_pcie_start_link,
+};
+
+static int armada8k_pcie_probe(struct platform_device *pdev)
+{
+	struct dw_pcie *pci;
+	struct armada8k_pcie *pcie;
+	struct device *dev = &pdev->dev;
+	struct resource *base;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	pcie->pci = pci;
+
+	pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	ret = clk_prepare_enable(pcie->clk);
+	if (ret)
+		return ret;
+
+	pcie->clk_reg = devm_clk_get(dev, "reg");
+	if (pcie->clk_reg == ERR_PTR(-EPROBE_DEFER)) {
+		ret = -EPROBE_DEFER;
+		goto fail;
+	}
+	if (!IS_ERR(pcie->clk_reg)) {
+		ret = clk_prepare_enable(pcie->clk_reg);
+		if (ret)
+			goto fail_clkreg;
+	}
+
+	/* Get the dw-pcie unit configuration/control registers base. */
+	base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl");
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, base);
+	if (IS_ERR(pci->dbi_base)) {
+		ret = PTR_ERR(pci->dbi_base);
+		goto fail_clkreg;
+	}
+
+	ret = armada8k_pcie_setup_phys(pcie);
+	if (ret)
+		goto fail_clkreg;
+
+	platform_set_drvdata(pdev, pcie);
+
+	ret = armada8k_add_pcie_port(pcie, pdev);
+	if (ret)
+		goto disable_phy;
+
+	return 0;
+
+disable_phy:
+	armada8k_pcie_disable_phys(pcie);
+fail_clkreg:
+	clk_disable_unprepare(pcie->clk_reg);
+fail:
+	clk_disable_unprepare(pcie->clk);
+
+	return ret;
+}
+
+static const struct of_device_id armada8k_pcie_of_match[] = {
+	{ .compatible = "marvell,armada8k-pcie", },
+	{},
+};
+
+static struct platform_driver armada8k_pcie_driver = {
+	.probe		= armada8k_pcie_probe,
+	.driver = {
+		.name	= "armada8k-pcie",
+		.of_match_table = armada8k_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(armada8k_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-artpec6.c b/drivers/pci/controller/dwc/pcie-artpec6.c
new file mode 100644
index 000000000..9b572a2b2
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-artpec6.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Axis ARTPEC-6 SoC
+ *
+ * Author: Niklas Cassel <niklas.cassel@axis.com>
+ *
+ * Based on work done by Phil Edworthy <phil@edworthys.org>
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/signal.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#include "pcie-designware.h"
+
+#define to_artpec6_pcie(x)	dev_get_drvdata((x)->dev)
+
+enum artpec_pcie_variants {
+	ARTPEC6,
+	ARTPEC7,
+};
+
+struct artpec6_pcie {
+	struct dw_pcie		*pci;
+	struct regmap		*regmap;	/* DT axis,syscon-pcie */
+	void __iomem		*phy_base;	/* DT phy */
+	enum artpec_pcie_variants variant;
+	enum dw_pcie_device_mode mode;
+};
+
+struct artpec_pcie_of_data {
+	enum artpec_pcie_variants variant;
+	enum dw_pcie_device_mode mode;
+};
+
+static const struct of_device_id artpec6_pcie_of_match[];
+
+/* ARTPEC-6 specific registers */
+#define PCIECFG				0x18
+#define  PCIECFG_DBG_OEN		BIT(24)
+#define  PCIECFG_CORE_RESET_REQ		BIT(21)
+#define  PCIECFG_LTSSM_ENABLE		BIT(20)
+#define  PCIECFG_DEVICE_TYPE_MASK	GENMASK(19, 16)
+#define  PCIECFG_CLKREQ_B		BIT(11)
+#define  PCIECFG_REFCLK_ENABLE		BIT(10)
+#define  PCIECFG_PLL_ENABLE		BIT(9)
+#define  PCIECFG_PCLK_ENABLE		BIT(8)
+#define  PCIECFG_RISRCREN		BIT(4)
+#define  PCIECFG_MODE_TX_DRV_EN		BIT(3)
+#define  PCIECFG_CISRREN		BIT(2)
+#define  PCIECFG_MACRO_ENABLE		BIT(0)
+/* ARTPEC-7 specific fields */
+#define  PCIECFG_REFCLKSEL		BIT(23)
+#define  PCIECFG_NOC_RESET		BIT(3)
+
+#define PCIESTAT			0x1c
+/* ARTPEC-7 specific fields */
+#define  PCIESTAT_EXTREFCLK		BIT(3)
+
+#define NOCCFG				0x40
+#define  NOCCFG_ENABLE_CLK_PCIE		BIT(4)
+#define  NOCCFG_POWER_PCIE_IDLEACK	BIT(3)
+#define  NOCCFG_POWER_PCIE_IDLE		BIT(2)
+#define  NOCCFG_POWER_PCIE_IDLEREQ	BIT(1)
+
+#define PHY_STATUS			0x118
+#define  PHY_COSPLLLOCK			BIT(0)
+
+#define PHY_TX_ASIC_OUT			0x4040
+#define  PHY_TX_ASIC_OUT_TX_ACK		BIT(0)
+
+#define PHY_RX_ASIC_OUT			0x405c
+#define  PHY_RX_ASIC_OUT_ACK		BIT(0)
+
+static u32 artpec6_pcie_readl(struct artpec6_pcie *artpec6_pcie, u32 offset)
+{
+	u32 val;
+
+	regmap_read(artpec6_pcie->regmap, offset, &val);
+	return val;
+}
+
+static void artpec6_pcie_writel(struct artpec6_pcie *artpec6_pcie, u32 offset, u32 val)
+{
+	regmap_write(artpec6_pcie->regmap, offset, val);
+}
+
+static u64 artpec6_pcie_cpu_addr_fixup(struct dw_pcie *pci, u64 pci_addr)
+{
+	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct dw_pcie_ep *ep = &pci->ep;
+
+	switch (artpec6_pcie->mode) {
+	case DW_PCIE_RC_TYPE:
+		return pci_addr - pp->cfg0_base;
+	case DW_PCIE_EP_TYPE:
+		return pci_addr - ep->phys_base;
+	default:
+		dev_err(pci->dev, "UNKNOWN device type\n");
+	}
+	return pci_addr;
+}
+
+static int artpec6_pcie_establish_link(struct dw_pcie *pci)
+{
+	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
+	u32 val;
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	val |= PCIECFG_LTSSM_ENABLE;
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+
+	return 0;
+}
+
+static void artpec6_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
+	u32 val;
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	val &= ~PCIECFG_LTSSM_ENABLE;
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.cpu_addr_fixup = artpec6_pcie_cpu_addr_fixup,
+	.start_link = artpec6_pcie_establish_link,
+	.stop_link = artpec6_pcie_stop_link,
+};
+
+static void artpec6_pcie_wait_for_phy_a6(struct artpec6_pcie *artpec6_pcie)
+{
+	struct dw_pcie *pci = artpec6_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 val;
+	unsigned int retries;
+
+	retries = 50;
+	do {
+		usleep_range(1000, 2000);
+		val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+		retries--;
+	} while (retries &&
+		(val & (NOCCFG_POWER_PCIE_IDLEACK | NOCCFG_POWER_PCIE_IDLE)));
+	if (!retries)
+		dev_err(dev, "PCIe clock manager did not leave idle state\n");
+
+	retries = 50;
+	do {
+		usleep_range(1000, 2000);
+		val = readl(artpec6_pcie->phy_base + PHY_STATUS);
+		retries--;
+	} while (retries && !(val & PHY_COSPLLLOCK));
+	if (!retries)
+		dev_err(dev, "PHY PLL did not lock\n");
+}
+
+static void artpec6_pcie_wait_for_phy_a7(struct artpec6_pcie *artpec6_pcie)
+{
+	struct dw_pcie *pci = artpec6_pcie->pci;
+	struct device *dev = pci->dev;
+	u32 val;
+	u16 phy_status_tx, phy_status_rx;
+	unsigned int retries;
+
+	retries = 50;
+	do {
+		usleep_range(1000, 2000);
+		val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+		retries--;
+	} while (retries &&
+		(val & (NOCCFG_POWER_PCIE_IDLEACK | NOCCFG_POWER_PCIE_IDLE)));
+	if (!retries)
+		dev_err(dev, "PCIe clock manager did not leave idle state\n");
+
+	retries = 50;
+	do {
+		usleep_range(1000, 2000);
+		phy_status_tx = readw(artpec6_pcie->phy_base + PHY_TX_ASIC_OUT);
+		phy_status_rx = readw(artpec6_pcie->phy_base + PHY_RX_ASIC_OUT);
+		retries--;
+	} while (retries && ((phy_status_tx & PHY_TX_ASIC_OUT_TX_ACK) ||
+				(phy_status_rx & PHY_RX_ASIC_OUT_ACK)));
+	if (!retries)
+		dev_err(dev, "PHY did not enter Pn state\n");
+}
+
+static void artpec6_pcie_wait_for_phy(struct artpec6_pcie *artpec6_pcie)
+{
+	switch (artpec6_pcie->variant) {
+	case ARTPEC6:
+		artpec6_pcie_wait_for_phy_a6(artpec6_pcie);
+		break;
+	case ARTPEC7:
+		artpec6_pcie_wait_for_phy_a7(artpec6_pcie);
+		break;
+	}
+}
+
+static void artpec6_pcie_init_phy_a6(struct artpec6_pcie *artpec6_pcie)
+{
+	u32 val;
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	val |=  PCIECFG_RISRCREN |	/* Receiver term. 50 Ohm */
+		PCIECFG_MODE_TX_DRV_EN |
+		PCIECFG_CISRREN |	/* Reference clock term. 100 Ohm */
+		PCIECFG_MACRO_ENABLE;
+	val |= PCIECFG_REFCLK_ENABLE;
+	val &= ~PCIECFG_DBG_OEN;
+	val &= ~PCIECFG_CLKREQ_B;
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+	usleep_range(5000, 6000);
+
+	val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+	val |= NOCCFG_ENABLE_CLK_PCIE;
+	artpec6_pcie_writel(artpec6_pcie, NOCCFG, val);
+	usleep_range(20, 30);
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	val |= PCIECFG_PCLK_ENABLE | PCIECFG_PLL_ENABLE;
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+	usleep_range(6000, 7000);
+
+	val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+	val &= ~NOCCFG_POWER_PCIE_IDLEREQ;
+	artpec6_pcie_writel(artpec6_pcie, NOCCFG, val);
+}
+
+static void artpec6_pcie_init_phy_a7(struct artpec6_pcie *artpec6_pcie)
+{
+	struct dw_pcie *pci = artpec6_pcie->pci;
+	u32 val;
+	bool extrefclk;
+
+	/* Check if external reference clock is connected */
+	val = artpec6_pcie_readl(artpec6_pcie, PCIESTAT);
+	extrefclk = !!(val & PCIESTAT_EXTREFCLK);
+	dev_dbg(pci->dev, "Using reference clock: %s\n",
+		extrefclk ? "external" : "internal");
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	val |=  PCIECFG_RISRCREN |	/* Receiver term. 50 Ohm */
+		PCIECFG_PCLK_ENABLE;
+	if (extrefclk)
+		val |= PCIECFG_REFCLKSEL;
+	else
+		val &= ~PCIECFG_REFCLKSEL;
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+	usleep_range(10, 20);
+
+	val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+	val |= NOCCFG_ENABLE_CLK_PCIE;
+	artpec6_pcie_writel(artpec6_pcie, NOCCFG, val);
+	usleep_range(20, 30);
+
+	val = artpec6_pcie_readl(artpec6_pcie, NOCCFG);
+	val &= ~NOCCFG_POWER_PCIE_IDLEREQ;
+	artpec6_pcie_writel(artpec6_pcie, NOCCFG, val);
+}
+
+static void artpec6_pcie_init_phy(struct artpec6_pcie *artpec6_pcie)
+{
+	switch (artpec6_pcie->variant) {
+	case ARTPEC6:
+		artpec6_pcie_init_phy_a6(artpec6_pcie);
+		break;
+	case ARTPEC7:
+		artpec6_pcie_init_phy_a7(artpec6_pcie);
+		break;
+	}
+}
+
+static void artpec6_pcie_assert_core_reset(struct artpec6_pcie *artpec6_pcie)
+{
+	u32 val;
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	switch (artpec6_pcie->variant) {
+	case ARTPEC6:
+		val |= PCIECFG_CORE_RESET_REQ;
+		break;
+	case ARTPEC7:
+		val &= ~PCIECFG_NOC_RESET;
+		break;
+	}
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+}
+
+static void artpec6_pcie_deassert_core_reset(struct artpec6_pcie *artpec6_pcie)
+{
+	u32 val;
+
+	val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+	switch (artpec6_pcie->variant) {
+	case ARTPEC6:
+		val &= ~PCIECFG_CORE_RESET_REQ;
+		break;
+	case ARTPEC7:
+		val |= PCIECFG_NOC_RESET;
+		break;
+	}
+	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+	usleep_range(100, 200);
+}
+
+static int artpec6_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
+
+	if (artpec6_pcie->variant == ARTPEC7) {
+		pci->n_fts[0] = 180;
+		pci->n_fts[1] = 180;
+	}
+	artpec6_pcie_assert_core_reset(artpec6_pcie);
+	artpec6_pcie_init_phy(artpec6_pcie);
+	artpec6_pcie_deassert_core_reset(artpec6_pcie);
+	artpec6_pcie_wait_for_phy(artpec6_pcie);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops artpec6_pcie_host_ops = {
+	.host_init = artpec6_pcie_host_init,
+};
+
+static void artpec6_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
+	enum pci_barno bar;
+
+	artpec6_pcie_assert_core_reset(artpec6_pcie);
+	artpec6_pcie_init_phy(artpec6_pcie);
+	artpec6_pcie_deassert_core_reset(artpec6_pcie);
+	artpec6_pcie_wait_for_phy(artpec6_pcie);
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static int artpec6_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				  enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		dev_err(pci->dev, "EP cannot trigger legacy IRQs\n");
+		return -EINVAL;
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
+	.ep_init = artpec6_pcie_ep_init,
+	.raise_irq = artpec6_pcie_raise_irq,
+};
+
+static int artpec6_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct artpec6_pcie *artpec6_pcie;
+	int ret;
+	const struct artpec_pcie_of_data *data;
+	enum artpec_pcie_variants variant;
+	enum dw_pcie_device_mode mode;
+	u32 val;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	variant = (enum artpec_pcie_variants)data->variant;
+	mode = (enum dw_pcie_device_mode)data->mode;
+
+	artpec6_pcie = devm_kzalloc(dev, sizeof(*artpec6_pcie), GFP_KERNEL);
+	if (!artpec6_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	artpec6_pcie->pci = pci;
+	artpec6_pcie->variant = variant;
+	artpec6_pcie->mode = mode;
+
+	artpec6_pcie->phy_base =
+		devm_platform_ioremap_resource_byname(pdev, "phy");
+	if (IS_ERR(artpec6_pcie->phy_base))
+		return PTR_ERR(artpec6_pcie->phy_base);
+
+	artpec6_pcie->regmap =
+		syscon_regmap_lookup_by_phandle(dev->of_node,
+						"axis,syscon-pcie");
+	if (IS_ERR(artpec6_pcie->regmap))
+		return PTR_ERR(artpec6_pcie->regmap);
+
+	platform_set_drvdata(pdev, artpec6_pcie);
+
+	switch (artpec6_pcie->mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_ARTPEC6_HOST))
+			return -ENODEV;
+
+		pci->pp.ops = &artpec6_pcie_host_ops;
+
+		ret = dw_pcie_host_init(&pci->pp);
+		if (ret < 0)
+			return ret;
+		break;
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_ARTPEC6_EP))
+			return -ENODEV;
+
+		val = artpec6_pcie_readl(artpec6_pcie, PCIECFG);
+		val &= ~PCIECFG_DEVICE_TYPE_MASK;
+		artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
+
+		pci->ep.ops = &pcie_ep_ops;
+
+		return dw_pcie_ep_init(&pci->ep);
+	default:
+		dev_err(dev, "INVALID device type %d\n", artpec6_pcie->mode);
+	}
+
+	return 0;
+}
+
+static const struct artpec_pcie_of_data artpec6_pcie_rc_of_data = {
+	.variant = ARTPEC6,
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct artpec_pcie_of_data artpec6_pcie_ep_of_data = {
+	.variant = ARTPEC6,
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct artpec_pcie_of_data artpec7_pcie_rc_of_data = {
+	.variant = ARTPEC7,
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct artpec_pcie_of_data artpec7_pcie_ep_of_data = {
+	.variant = ARTPEC7,
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct of_device_id artpec6_pcie_of_match[] = {
+	{
+		.compatible = "axis,artpec6-pcie",
+		.data = &artpec6_pcie_rc_of_data,
+	},
+	{
+		.compatible = "axis,artpec6-pcie-ep",
+		.data = &artpec6_pcie_ep_of_data,
+	},
+	{
+		.compatible = "axis,artpec7-pcie",
+		.data = &artpec7_pcie_rc_of_data,
+	},
+	{
+		.compatible = "axis,artpec7-pcie-ep",
+		.data = &artpec7_pcie_ep_of_data,
+	},
+	{},
+};
+
+static struct platform_driver artpec6_pcie_driver = {
+	.probe = artpec6_pcie_probe,
+	.driver = {
+		.name	= "artpec6-pcie",
+		.of_match_table = artpec6_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(artpec6_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
new file mode 100644
index 000000000..17e696797
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-bt1.c
@@ -0,0 +1,645 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
+ *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ * Baikal-T1 PCIe controller driver
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+
+/* Baikal-T1 System CCU control registers */
+#define BT1_CCU_PCIE_CLKC			0x140
+#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
+#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
+#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
+
+#define BT1_CCU_PCIE_RSTC			0x144
+#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
+#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
+#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
+#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
+#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
+#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
+
+#define BT1_CCU_PCIE_PMSC			0x148
+#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
+#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
+#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
+#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
+#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
+#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
+#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
+#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
+#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
+#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
+#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
+#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
+#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
+#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
+#define BT1_CCU_PCIE_LTSSM_L0			0x11
+#define BT1_CCU_PCIE_LTSSM_L0S			0x12
+#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
+#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
+#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
+#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
+#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
+#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
+#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
+#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
+#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
+#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
+#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
+#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
+#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
+#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
+#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
+#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
+#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
+#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
+#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
+#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
+#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
+#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
+#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
+
+#define BT1_CCU_PCIE_GENC			0x14c
+#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
+#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
+#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
+#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
+#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
+#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
+#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
+#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
+#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
+
+#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
+({ \
+	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
+	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
+})
+
+/* Baikal-T1 PCIe specific control registers */
+#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
+#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
+
+#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
+#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
+#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
+#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
+#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
+
+#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
+#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
+
+#define BT1_PCIE_AXI2MGM_READDATA		0xd10
+#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
+
+/* Generic Baikal-T1 PCIe interface resources */
+#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
+#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
+#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
+#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
+
+/* PCIe bus setup delays and timeouts */
+#define BT1_PCIE_RST_DELAY_MS			100
+#define BT1_PCIE_RUN_DELAY_US			100
+#define BT1_PCIE_REQ_DELAY_US			1
+#define BT1_PCIE_REQ_TIMEOUT_US			1000
+#define BT1_PCIE_LNK_DELAY_US			1000
+#define BT1_PCIE_LNK_TIMEOUT_US			1000000
+
+static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
+	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
+};
+
+static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
+	DW_PCIE_REF_CLK,
+};
+
+static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
+	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
+};
+
+static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
+	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
+	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
+};
+
+struct bt1_pcie {
+	struct dw_pcie dw;
+	struct platform_device *pdev;
+	struct regmap *sys_regs;
+};
+#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
+
+/*
+ * Baikal-T1 MMIO space must be read/written by the dword-aligned
+ * instructions. Note the methods are optimized to have the dword operations
+ * performed with minimum overhead as the most frequently used ones.
+ */
+static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return -EINVAL;
+
+	*val = readl(addr - ofs) >> ofs * BITS_PER_BYTE;
+	if (size == 4) {
+		return 0;
+	} else if (size == 2) {
+		*val &= 0xffff;
+		return 0;
+	} else if (size == 1) {
+		*val &= 0xff;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+	u32 tmp, mask;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return -EINVAL;
+
+	if (size == 4) {
+		writel(val, addr);
+		return 0;
+	} else if (size == 2 || size == 1) {
+		mask = GENMASK(size * BITS_PER_BYTE - 1, 0);
+		tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE);
+		tmp |= (val & mask) << ofs * BITS_PER_BYTE;
+		writel(tmp, addr - ofs);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			     size_t size)
+{
+	int ret;
+	u32 val;
+
+	ret = bt1_pcie_read_mmio(base + reg, size, &val);
+	if (ret) {
+		dev_err(pci->dev, "Read DBI address failed\n");
+		return ~0U;
+	}
+
+	return val;
+}
+
+static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			       size_t size, u32 val)
+{
+	int ret;
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Write DBI address failed\n");
+}
+
+static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
+				size_t size, u32 val)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE);
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Write DBI2 address failed\n");
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, 0);
+}
+
+static int bt1_pcie_start_link(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	u32 val;
+	int ret;
+
+	/*
+	 * Enable LTSSM and make sure it was able to establish both PHY and
+	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
+	 */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
+		return ret;
+	}
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set data link up\n");
+		return ret;
+	}
+
+	/*
+	 * Activate direct speed change after the link is established in an
+	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
+	 * This is required at least to get 8.0 GT/s speed.
+	 */
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret)
+		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
+
+	return ret;
+}
+
+static void bt1_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+}
+
+static const struct dw_pcie_ops bt1_pcie_ops = {
+	.read_dbi = bt1_pcie_read_dbi,
+	.write_dbi = bt1_pcie_write_dbi,
+	.write_dbi2 = bt1_pcie_write_dbi2,
+	.start_link = bt1_pcie_start_link,
+	.stop_link = bt1_pcie_stop_link,
+};
+
+static struct pci_ops bt1_pci_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = pci_generic_config_read32,
+	.write = pci_generic_config_write32,
+};
+
+static int bt1_pcie_get_resources(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	int i;
+
+	/* DBI access is supposed to be performed by the dword-aligned IOs */
+	btpci->dw.pp.bridge->ops = &bt1_pci_ops;
+
+	/* These CSRs are in MMIO so we won't check the regmap-methods status */
+	btpci->sys_regs =
+		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
+	if (IS_ERR(btpci->sys_regs))
+		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
+				     "Failed to get syscon\n");
+
+	/* Make sure all the required resources have been specified */
+	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
+		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
+			dev_err(dev, "App clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
+		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
+			dev_err(dev, "Core clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
+		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
+			dev_err(dev, "App resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
+		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
+			dev_err(dev, "Core resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	return 0;
+}
+
+static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	int ret;
+
+	/* Disable LTSSM for sure */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+
+	/*
+	 * Application reset controls are trigger-based so assert the core
+	 * resets only.
+	 */
+	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
+	if (ret)
+		dev_err(dev, "Failed to assert core resets\n");
+
+	/*
+	 * Clocks are disabled by default at least in accordance with the clk
+	 * enable counter value on init stage.
+	 */
+	if (!init) {
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	}
+
+	/* The peripheral devices are unavailable anyway so reset them too */
+	gpiod_set_value_cansleep(pci->pe_rst, 1);
+
+	/* Make sure all the resets are settled */
+	msleep(BT1_PCIE_RST_DELAY_MS);
+}
+
+/*
+ * Implements the cold reset procedure in accordance with the reference manual
+ * and available PM signals.
+ */
+static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	u32 val;
+	int ret;
+
+	/* First get out of the Power/Hot reset state */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		return ret;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert hot reset\n");
+		goto err_assert_pwr_rst;
+	}
+
+	/* Wait for the PM-core to stop requesting the PHY reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_PHY_RST),
+				       BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
+		goto err_assert_hot_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		goto err_assert_hot_rst;
+	}
+
+	/* Clocks can be now enabled, but the ref one is crucial at this stage */
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable app clocks\n");
+		goto err_assert_phy_rst;
+	}
+
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable ref clocks\n");
+		goto err_disable_app_clk;
+	}
+
+	/* Wait for the PM to stop requesting the controller core reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_CORE_RST),
+				       BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
+		goto err_disable_core_clk;
+	}
+
+	/* PCS-PIPE interface and controller core can be now activated */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PIPE reset\n");
+		goto err_disable_core_clk;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert core reset\n");
+		goto err_assert_pipe_rst;
+	}
+
+	/* It's recommended to reset the core and application logic together */
+	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
+	if (ret) {
+		dev_err(dev, "Failed to reset app domain\n");
+		goto err_assert_core_rst;
+	}
+
+	/* Sticky/Non-sticky CSR flags can be now unreset too */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert sticky reset\n");
+		goto err_assert_core_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert non-sticky reset\n");
+		goto err_assert_sticky_rst;
+	}
+
+	/* Activate the PCIe bus peripheral devices */
+	gpiod_set_value_cansleep(pci->pe_rst, 0);
+
+	/* Make sure the state is settled (LTSSM is still disabled though) */
+	usleep_range(BT1_PCIE_RUN_DELAY_US, BT1_PCIE_RUN_DELAY_US + 100);
+
+	return 0;
+
+err_assert_sticky_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+
+err_assert_core_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+
+err_assert_pipe_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+
+err_disable_core_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+err_disable_app_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+
+err_assert_phy_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+
+err_assert_hot_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+
+err_assert_pwr_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+
+	return ret;
+}
+
+static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	ret = bt1_pcie_get_resources(btpci);
+	if (ret)
+		return ret;
+
+	bt1_pcie_full_stop_bus(btpci, true);
+
+	return bt1_pcie_cold_start_bus(btpci);
+}
+
+static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	bt1_pcie_full_stop_bus(btpci, false);
+}
+
+static const struct dw_pcie_host_ops bt1_pcie_host_ops = {
+	.host_init = bt1_pcie_host_init,
+	.host_deinit = bt1_pcie_host_deinit,
+};
+
+static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
+	if (!btpci)
+		return ERR_PTR(-ENOMEM);
+
+	btpci->pdev = pdev;
+
+	platform_set_drvdata(pdev, btpci);
+
+	return btpci;
+}
+
+static int bt1_pcie_add_port(struct bt1_pcie *btpci)
+{
+	struct device *dev = &btpci->pdev->dev;
+	int ret;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	btpci->dw.version = DW_PCIE_VER_460A;
+	btpci->dw.dev = dev;
+	btpci->dw.ops = &bt1_pcie_ops;
+
+	btpci->dw.pp.num_vectors = MAX_MSI_IRQS;
+	btpci->dw.pp.ops = &bt1_pcie_host_ops;
+
+	dw_pcie_cap_set(&btpci->dw, REQ_RES);
+
+	ret = dw_pcie_host_init(&btpci->dw.pp);
+
+	return dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n");
+}
+
+static void bt1_pcie_del_port(struct bt1_pcie *btpci)
+{
+	dw_pcie_host_deinit(&btpci->dw.pp);
+}
+
+static int bt1_pcie_probe(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = bt1_pcie_create_data(pdev);
+	if (IS_ERR(btpci))
+		return PTR_ERR(btpci);
+
+	return bt1_pcie_add_port(btpci);
+}
+
+static void bt1_pcie_remove(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci = platform_get_drvdata(pdev);
+
+	bt1_pcie_del_port(btpci);
+}
+
+static const struct of_device_id bt1_pcie_of_match[] = {
+	{ .compatible = "baikal,bt1-pcie" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bt1_pcie_of_match);
+
+static struct platform_driver bt1_pcie_driver = {
+	.probe = bt1_pcie_probe,
+	.remove_new = bt1_pcie_remove,
+	.driver = {
+		.name	= "bt1-pcie",
+		.of_match_table = bt1_pcie_of_match,
+	},
+};
+module_platform_driver(bt1_pcie_driver);
+
+MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 PCIe driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
new file mode 100644
index 000000000..8d79dd0e1
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -0,0 +1,820 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe Endpoint controller driver
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "pcie-designware.h"
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
+{
+	struct pci_epc *epc = ep->epc;
+
+	pci_epc_linkup(epc);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_linkup);
+
+void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep)
+{
+	struct pci_epc *epc = ep->epc;
+
+	pci_epc_init_notify(epc);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_init_notify);
+
+struct dw_pcie_ep_func *
+dw_pcie_ep_get_func_from_ep(struct dw_pcie_ep *ep, u8 func_no)
+{
+	struct dw_pcie_ep_func *ep_func;
+
+	list_for_each_entry(ep_func, &ep->func_list, list) {
+		if (ep_func->func_no == func_no)
+			return ep_func;
+	}
+
+	return NULL;
+}
+
+static unsigned int dw_pcie_ep_func_select(struct dw_pcie_ep *ep, u8 func_no)
+{
+	unsigned int func_offset = 0;
+
+	if (ep->ops->func_conf_select)
+		func_offset = ep->ops->func_conf_select(ep, func_no);
+
+	return func_offset;
+}
+
+static void __dw_pcie_ep_reset_bar(struct dw_pcie *pci, u8 func_no,
+				   enum pci_barno bar, int flags)
+{
+	u32 reg;
+	unsigned int func_offset = 0;
+	struct dw_pcie_ep *ep = &pci->ep;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = func_offset + PCI_BASE_ADDRESS_0 + (4 * bar);
+	dw_pcie_dbi_ro_wr_en(pci);
+	dw_pcie_writel_dbi2(pci, reg, 0x0);
+	dw_pcie_writel_dbi(pci, reg, 0x0);
+	if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+		dw_pcie_writel_dbi2(pci, reg + 4, 0x0);
+		dw_pcie_writel_dbi(pci, reg + 4, 0x0);
+	}
+	dw_pcie_dbi_ro_wr_dis(pci);
+}
+
+void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar)
+{
+	u8 func_no, funcs;
+
+	funcs = pci->ep.epc->max_functions;
+
+	for (func_no = 0; func_no < funcs; func_no++)
+		__dw_pcie_ep_reset_bar(pci, func_no, bar, 0);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_reset_bar);
+
+static u8 __dw_pcie_ep_find_next_cap(struct dw_pcie_ep *ep, u8 func_no,
+		u8 cap_ptr, u8 cap)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	unsigned int func_offset = 0;
+	u8 cap_id, next_cap_ptr;
+	u16 reg;
+
+	if (!cap_ptr)
+		return 0;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = dw_pcie_readw_dbi(pci, func_offset + cap_ptr);
+	cap_id = (reg & 0x00ff);
+
+	if (cap_id > PCI_CAP_ID_MAX)
+		return 0;
+
+	if (cap_id == cap)
+		return cap_ptr;
+
+	next_cap_ptr = (reg & 0xff00) >> 8;
+	return __dw_pcie_ep_find_next_cap(ep, func_no, next_cap_ptr, cap);
+}
+
+static u8 dw_pcie_ep_find_capability(struct dw_pcie_ep *ep, u8 func_no, u8 cap)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	unsigned int func_offset = 0;
+	u8 next_cap_ptr;
+	u16 reg;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = dw_pcie_readw_dbi(pci, func_offset + PCI_CAPABILITY_LIST);
+	next_cap_ptr = (reg & 0x00ff);
+
+	return __dw_pcie_ep_find_next_cap(ep, func_no, next_cap_ptr, cap);
+}
+
+static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+				   struct pci_epf_header *hdr)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	unsigned int func_offset = 0;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+	dw_pcie_writew_dbi(pci, func_offset + PCI_VENDOR_ID, hdr->vendorid);
+	dw_pcie_writew_dbi(pci, func_offset + PCI_DEVICE_ID, hdr->deviceid);
+	dw_pcie_writeb_dbi(pci, func_offset + PCI_REVISION_ID, hdr->revid);
+	dw_pcie_writeb_dbi(pci, func_offset + PCI_CLASS_PROG, hdr->progif_code);
+	dw_pcie_writew_dbi(pci, func_offset + PCI_CLASS_DEVICE,
+			   hdr->subclass_code | hdr->baseclass_code << 8);
+	dw_pcie_writeb_dbi(pci, func_offset + PCI_CACHE_LINE_SIZE,
+			   hdr->cache_line_size);
+	dw_pcie_writew_dbi(pci, func_offset + PCI_SUBSYSTEM_VENDOR_ID,
+			   hdr->subsys_vendor_id);
+	dw_pcie_writew_dbi(pci, func_offset + PCI_SUBSYSTEM_ID, hdr->subsys_id);
+	dw_pcie_writeb_dbi(pci, func_offset + PCI_INTERRUPT_PIN,
+			   hdr->interrupt_pin);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
+				  dma_addr_t cpu_addr, enum pci_barno bar)
+{
+	int ret;
+	u32 free_win;
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	if (!ep->bar_to_atu[bar])
+		free_win = find_first_zero_bit(ep->ib_window_map, pci->num_ib_windows);
+	else
+		free_win = ep->bar_to_atu[bar];
+
+	if (free_win >= pci->num_ib_windows) {
+		dev_err(pci->dev, "No free inbound window\n");
+		return -EINVAL;
+	}
+
+	ret = dw_pcie_prog_ep_inbound_atu(pci, func_no, free_win, type,
+					  cpu_addr, bar);
+	if (ret < 0) {
+		dev_err(pci->dev, "Failed to program IB window\n");
+		return ret;
+	}
+
+	ep->bar_to_atu[bar] = free_win;
+	set_bit(free_win, ep->ib_window_map);
+
+	return 0;
+}
+
+static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep, u8 func_no,
+				   phys_addr_t phys_addr,
+				   u64 pci_addr, size_t size)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 free_win;
+	int ret;
+
+	free_win = find_first_zero_bit(ep->ob_window_map, pci->num_ob_windows);
+	if (free_win >= pci->num_ob_windows) {
+		dev_err(pci->dev, "No free outbound window\n");
+		return -EINVAL;
+	}
+
+	ret = dw_pcie_prog_ep_outbound_atu(pci, func_no, free_win, PCIE_ATU_TYPE_MEM,
+					   phys_addr, pci_addr, size);
+	if (ret)
+		return ret;
+
+	set_bit(free_win, ep->ob_window_map);
+	ep->outbound_addr[free_win] = phys_addr;
+
+	return 0;
+}
+
+static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+				 struct pci_epf_bar *epf_bar)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar = epf_bar->barno;
+	u32 atu_index = ep->bar_to_atu[bar];
+
+	__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
+
+	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
+	clear_bit(atu_index, ep->ib_window_map);
+	ep->epf_bar[bar] = NULL;
+	ep->bar_to_atu[bar] = 0;
+}
+
+static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			      struct pci_epf_bar *epf_bar)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar = epf_bar->barno;
+	size_t size = epf_bar->size;
+	int flags = epf_bar->flags;
+	unsigned int func_offset = 0;
+	int ret, type;
+	u32 reg;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = PCI_BASE_ADDRESS_0 + (4 * bar) + func_offset;
+
+	if (!(flags & PCI_BASE_ADDRESS_SPACE))
+		type = PCIE_ATU_TYPE_MEM;
+	else
+		type = PCIE_ATU_TYPE_IO;
+
+	ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar);
+	if (ret)
+		return ret;
+
+	if (ep->epf_bar[bar])
+		return 0;
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	dw_pcie_writel_dbi2(pci, reg, lower_32_bits(size - 1));
+	dw_pcie_writel_dbi(pci, reg, flags);
+
+	if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+		dw_pcie_writel_dbi2(pci, reg + 4, upper_32_bits(size - 1));
+		dw_pcie_writel_dbi(pci, reg + 4, 0);
+	}
+
+	ep->epf_bar[bar] = epf_bar;
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int dw_pcie_find_index(struct dw_pcie_ep *ep, phys_addr_t addr,
+			      u32 *atu_index)
+{
+	u32 index;
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	for (index = 0; index < pci->num_ob_windows; index++) {
+		if (ep->outbound_addr[index] != addr)
+			continue;
+		*atu_index = index;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+				  phys_addr_t addr)
+{
+	int ret;
+	u32 atu_index;
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	ret = dw_pcie_find_index(ep, addr, &atu_index);
+	if (ret < 0)
+		return;
+
+	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, atu_index);
+	clear_bit(atu_index, ep->ob_window_map);
+}
+
+static int dw_pcie_ep_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			       phys_addr_t addr, u64 pci_addr, size_t size)
+{
+	int ret;
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	ret = dw_pcie_ep_outbound_atu(ep, func_no, addr, pci_addr, size);
+	if (ret) {
+		dev_err(pci->dev, "Failed to enable address\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dw_pcie_ep_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 val, reg;
+	unsigned int func_offset = 0;
+	struct dw_pcie_ep_func *ep_func;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msi_cap)
+		return -EINVAL;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = ep_func->msi_cap + func_offset + PCI_MSI_FLAGS;
+	val = dw_pcie_readw_dbi(pci, reg);
+	if (!(val & PCI_MSI_FLAGS_ENABLE))
+		return -EINVAL;
+
+	val = (val & PCI_MSI_FLAGS_QSIZE) >> 4;
+
+	return val;
+}
+
+static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			      u8 interrupts)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 val, reg;
+	unsigned int func_offset = 0;
+	struct dw_pcie_ep_func *ep_func;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msi_cap)
+		return -EINVAL;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = ep_func->msi_cap + func_offset + PCI_MSI_FLAGS;
+	val = dw_pcie_readw_dbi(pci, reg);
+	val &= ~PCI_MSI_FLAGS_QMASK;
+	val |= (interrupts << 1) & PCI_MSI_FLAGS_QMASK;
+	dw_pcie_dbi_ro_wr_en(pci);
+	dw_pcie_writew_dbi(pci, reg, val);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int dw_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 val, reg;
+	unsigned int func_offset = 0;
+	struct dw_pcie_ep_func *ep_func;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msix_cap)
+		return -EINVAL;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = ep_func->msix_cap + func_offset + PCI_MSIX_FLAGS;
+	val = dw_pcie_readw_dbi(pci, reg);
+	if (!(val & PCI_MSIX_FLAGS_ENABLE))
+		return -EINVAL;
+
+	val &= PCI_MSIX_FLAGS_QSIZE;
+
+	return val;
+}
+
+static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			       u16 interrupts, enum pci_barno bir, u32 offset)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 val, reg;
+	unsigned int func_offset = 0;
+	struct dw_pcie_ep_func *ep_func;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msix_cap)
+		return -EINVAL;
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = ep_func->msix_cap + func_offset + PCI_MSIX_FLAGS;
+	val = dw_pcie_readw_dbi(pci, reg);
+	val &= ~PCI_MSIX_FLAGS_QSIZE;
+	val |= interrupts;
+	dw_pcie_writew_dbi(pci, reg, val);
+
+	reg = ep_func->msix_cap + func_offset + PCI_MSIX_TABLE;
+	val = offset | bir;
+	dw_pcie_writel_dbi(pci, reg, val);
+
+	reg = ep_func->msix_cap + func_offset + PCI_MSIX_PBA;
+	val = (offset + (interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
+	dw_pcie_writel_dbi(pci, reg, val);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int dw_pcie_ep_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+				enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+
+	if (!ep->ops->raise_irq)
+		return -EINVAL;
+
+	return ep->ops->raise_irq(ep, func_no, type, interrupt_num);
+}
+
+static void dw_pcie_ep_stop(struct pci_epc *epc)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	dw_pcie_stop_link(pci);
+}
+
+static int dw_pcie_ep_start(struct pci_epc *epc)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	return dw_pcie_start_link(pci);
+}
+
+static const struct pci_epc_features*
+dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+
+	if (!ep->ops->get_features)
+		return NULL;
+
+	return ep->ops->get_features(ep);
+}
+
+static const struct pci_epc_ops epc_ops = {
+	.write_header		= dw_pcie_ep_write_header,
+	.set_bar		= dw_pcie_ep_set_bar,
+	.clear_bar		= dw_pcie_ep_clear_bar,
+	.map_addr		= dw_pcie_ep_map_addr,
+	.unmap_addr		= dw_pcie_ep_unmap_addr,
+	.set_msi		= dw_pcie_ep_set_msi,
+	.get_msi		= dw_pcie_ep_get_msi,
+	.set_msix		= dw_pcie_ep_set_msix,
+	.get_msix		= dw_pcie_ep_get_msix,
+	.raise_irq		= dw_pcie_ep_raise_irq,
+	.start			= dw_pcie_ep_start,
+	.stop			= dw_pcie_ep_stop,
+	.get_features		= dw_pcie_ep_get_features,
+};
+
+int dw_pcie_ep_raise_legacy_irq(struct dw_pcie_ep *ep, u8 func_no)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct device *dev = pci->dev;
+
+	dev_err(dev, "EP cannot trigger legacy IRQs\n");
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_raise_legacy_irq);
+
+int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
+			     u8 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dw_pcie_ep_func *ep_func;
+	struct pci_epc *epc = ep->epc;
+	unsigned int aligned_offset;
+	unsigned int func_offset = 0;
+	u16 msg_ctrl, msg_data;
+	u32 msg_addr_lower, msg_addr_upper, reg;
+	u64 msg_addr;
+	bool has_upper;
+	int ret;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msi_cap)
+		return -EINVAL;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	/* Raise MSI per the PCI Local Bus Specification Revision 3.0, 6.8.1. */
+	reg = ep_func->msi_cap + func_offset + PCI_MSI_FLAGS;
+	msg_ctrl = dw_pcie_readw_dbi(pci, reg);
+	has_upper = !!(msg_ctrl & PCI_MSI_FLAGS_64BIT);
+	reg = ep_func->msi_cap + func_offset + PCI_MSI_ADDRESS_LO;
+	msg_addr_lower = dw_pcie_readl_dbi(pci, reg);
+	if (has_upper) {
+		reg = ep_func->msi_cap + func_offset + PCI_MSI_ADDRESS_HI;
+		msg_addr_upper = dw_pcie_readl_dbi(pci, reg);
+		reg = ep_func->msi_cap + func_offset + PCI_MSI_DATA_64;
+		msg_data = dw_pcie_readw_dbi(pci, reg);
+	} else {
+		msg_addr_upper = 0;
+		reg = ep_func->msi_cap + func_offset + PCI_MSI_DATA_32;
+		msg_data = dw_pcie_readw_dbi(pci, reg);
+	}
+	aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1);
+	msg_addr = ((u64)msg_addr_upper) << 32 |
+			(msg_addr_lower & ~aligned_offset);
+	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
+				  epc->mem->window.page_size);
+	if (ret)
+		return ret;
+
+	writel(msg_data | (interrupt_num - 1), ep->msi_mem + aligned_offset);
+
+	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_raise_msi_irq);
+
+int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8 func_no,
+				       u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dw_pcie_ep_func *ep_func;
+	u32 msg_data;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msix_cap)
+		return -EINVAL;
+
+	msg_data = (func_no << PCIE_MSIX_DOORBELL_PF_SHIFT) |
+		   (interrupt_num - 1);
+
+	dw_pcie_writel_dbi(pci, PCIE_MSIX_DOORBELL, msg_data);
+
+	return 0;
+}
+
+int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
+			      u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dw_pcie_ep_func *ep_func;
+	struct pci_epf_msix_tbl *msix_tbl;
+	struct pci_epc *epc = ep->epc;
+	unsigned int func_offset = 0;
+	u32 reg, msg_data, vec_ctrl;
+	unsigned int aligned_offset;
+	u32 tbl_offset;
+	u64 msg_addr;
+	int ret;
+	u8 bir;
+
+	ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
+	if (!ep_func || !ep_func->msix_cap)
+		return -EINVAL;
+
+	func_offset = dw_pcie_ep_func_select(ep, func_no);
+
+	reg = ep_func->msix_cap + func_offset + PCI_MSIX_TABLE;
+	tbl_offset = dw_pcie_readl_dbi(pci, reg);
+	bir = (tbl_offset & PCI_MSIX_TABLE_BIR);
+	tbl_offset &= PCI_MSIX_TABLE_OFFSET;
+
+	msix_tbl = ep->epf_bar[bir]->addr + tbl_offset;
+	msg_addr = msix_tbl[(interrupt_num - 1)].msg_addr;
+	msg_data = msix_tbl[(interrupt_num - 1)].msg_data;
+	vec_ctrl = msix_tbl[(interrupt_num - 1)].vector_ctrl;
+
+	if (vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) {
+		dev_dbg(pci->dev, "MSI-X entry ctrl set\n");
+		return -EPERM;
+	}
+
+	aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
+	msg_addr &= ~aligned_offset;
+	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
+				  epc->mem->window.page_size);
+	if (ret)
+		return ret;
+
+	writel(msg_data, ep->msi_mem + aligned_offset);
+
+	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
+
+	return 0;
+}
+
+void dw_pcie_ep_exit(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct pci_epc *epc = ep->epc;
+
+	dw_pcie_edma_remove(pci);
+
+	pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem,
+			      epc->mem->window.page_size);
+
+	pci_epc_mem_exit(epc);
+}
+
+static unsigned int dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap)
+{
+	u32 header;
+	int pos = PCI_CFG_SPACE_SIZE;
+
+	while (pos) {
+		header = dw_pcie_readl_dbi(pci, pos);
+		if (PCI_EXT_CAP_ID(header) == cap)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (!pos)
+			break;
+	}
+
+	return 0;
+}
+
+int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	unsigned int offset, ptm_cap_base;
+	unsigned int nbars;
+	u8 hdr_type;
+	u32 reg;
+	int i;
+
+	hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) &
+		   PCI_HEADER_TYPE_MASK;
+	if (hdr_type != PCI_HEADER_TYPE_NORMAL) {
+		dev_err(pci->dev,
+			"PCIe controller is not set to EP mode (hdr_type:0x%x)!\n",
+			hdr_type);
+		return -EIO;
+	}
+
+	offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
+	ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	if (offset) {
+		reg = dw_pcie_readl_dbi(pci, offset + PCI_REBAR_CTRL);
+		nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >>
+			PCI_REBAR_CTRL_NBAR_SHIFT;
+
+		for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL)
+			dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0);
+	}
+
+	/*
+	 * PTM responder capability can be disabled only after disabling
+	 * PTM root capability.
+	 */
+	if (ptm_cap_base) {
+		dw_pcie_dbi_ro_wr_en(pci);
+		reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
+		reg &= ~PCI_PTM_CAP_ROOT;
+		dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
+
+		reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
+		reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK);
+		dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
+		dw_pcie_dbi_ro_wr_dis(pci);
+	}
+
+	dw_pcie_setup(pci);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_init_complete);
+
+int dw_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	int ret;
+	void *addr;
+	u8 func_no;
+	struct resource *res;
+	struct pci_epc *epc;
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct device *dev = pci->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct device_node *np = dev->of_node;
+	const struct pci_epc_features *epc_features;
+	struct dw_pcie_ep_func *ep_func;
+
+	INIT_LIST_HEAD(&ep->func_list);
+
+	ret = dw_pcie_get_resources(pci);
+	if (ret)
+		return ret;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
+	if (!res)
+		return -EINVAL;
+
+	ep->phys_base = res->start;
+	ep->addr_size = resource_size(res);
+
+	dw_pcie_version_detect(pci);
+
+	dw_pcie_iatu_detect(pci);
+
+	ep->ib_window_map = devm_bitmap_zalloc(dev, pci->num_ib_windows,
+					       GFP_KERNEL);
+	if (!ep->ib_window_map)
+		return -ENOMEM;
+
+	ep->ob_window_map = devm_bitmap_zalloc(dev, pci->num_ob_windows,
+					       GFP_KERNEL);
+	if (!ep->ob_window_map)
+		return -ENOMEM;
+
+	addr = devm_kcalloc(dev, pci->num_ob_windows, sizeof(phys_addr_t),
+			    GFP_KERNEL);
+	if (!addr)
+		return -ENOMEM;
+	ep->outbound_addr = addr;
+
+	epc = devm_pci_epc_create(dev, &epc_ops);
+	if (IS_ERR(epc)) {
+		dev_err(dev, "Failed to create epc device\n");
+		return PTR_ERR(epc);
+	}
+
+	ep->epc = epc;
+	epc_set_drvdata(epc, ep);
+
+	ret = of_property_read_u8(np, "max-functions", &epc->max_functions);
+	if (ret < 0)
+		epc->max_functions = 1;
+
+	for (func_no = 0; func_no < epc->max_functions; func_no++) {
+		ep_func = devm_kzalloc(dev, sizeof(*ep_func), GFP_KERNEL);
+		if (!ep_func)
+			return -ENOMEM;
+
+		ep_func->func_no = func_no;
+		ep_func->msi_cap = dw_pcie_ep_find_capability(ep, func_no,
+							      PCI_CAP_ID_MSI);
+		ep_func->msix_cap = dw_pcie_ep_find_capability(ep, func_no,
+							       PCI_CAP_ID_MSIX);
+
+		list_add_tail(&ep_func->list, &ep->func_list);
+	}
+
+	if (ep->ops->ep_init)
+		ep->ops->ep_init(ep);
+
+	ret = pci_epc_mem_init(epc, ep->phys_base, ep->addr_size,
+			       ep->page_size);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize address space\n");
+		return ret;
+	}
+
+	ep->msi_mem = pci_epc_mem_alloc_addr(epc, &ep->msi_mem_phys,
+					     epc->mem->window.page_size);
+	if (!ep->msi_mem) {
+		ret = -ENOMEM;
+		dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
+		goto err_exit_epc_mem;
+	}
+
+	ret = dw_pcie_edma_detect(pci);
+	if (ret)
+		goto err_free_epc_mem;
+
+	if (ep->ops->get_features) {
+		epc_features = ep->ops->get_features(ep);
+		if (epc_features->core_init_notifier)
+			return 0;
+	}
+
+	ret = dw_pcie_ep_init_complete(ep);
+	if (ret)
+		goto err_remove_edma;
+
+	return 0;
+
+err_remove_edma:
+	dw_pcie_edma_remove(pci);
+
+err_free_epc_mem:
+	pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem,
+			      epc->mem->window.page_size);
+
+err_exit_epc_mem:
+	pci_epc_mem_exit(epc);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_ep_init);
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
new file mode 100644
index 000000000..a7170fd0e
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -0,0 +1,880 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe host controller driver
+ *
+ * Copyright (C) 2013 Samsung Electronics Co., Ltd.
+ *		https://www.samsung.com
+ *
+ * Author: Jingoo Han <jg1.han@samsung.com>
+ */
+
+#include <linux/iopoll.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci_regs.h>
+#include <linux/platform_device.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+static struct pci_ops dw_pcie_ops;
+static struct pci_ops dw_child_pcie_ops;
+
+static void dw_msi_ack_irq(struct irq_data *d)
+{
+	irq_chip_ack_parent(d);
+}
+
+static void dw_msi_mask_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void dw_msi_unmask_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip dw_pcie_msi_irq_chip = {
+	.name = "PCI-MSI",
+	.irq_ack = dw_msi_ack_irq,
+	.irq_mask = dw_msi_mask_irq,
+	.irq_unmask = dw_msi_unmask_irq,
+};
+
+static struct msi_domain_info dw_pcie_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &dw_pcie_msi_irq_chip,
+};
+
+/* MSI int handler */
+irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp)
+{
+	int i, pos;
+	unsigned long val;
+	u32 status, num_ctrls;
+	irqreturn_t ret = IRQ_NONE;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+
+	for (i = 0; i < num_ctrls; i++) {
+		status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
+					   (i * MSI_REG_CTRL_BLOCK_SIZE));
+		if (!status)
+			continue;
+
+		ret = IRQ_HANDLED;
+		val = status;
+		pos = 0;
+		while ((pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL,
+					    pos)) != MAX_MSI_IRQS_PER_CTRL) {
+			generic_handle_domain_irq(pp->irq_domain,
+						  (i * MAX_MSI_IRQS_PER_CTRL) +
+						  pos);
+			pos++;
+		}
+	}
+
+	return ret;
+}
+
+/* Chained MSI interrupt service routine */
+static void dw_chained_msi_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct dw_pcie_rp *pp;
+
+	chained_irq_enter(chip, desc);
+
+	pp = irq_desc_get_handler_data(desc);
+	dw_handle_msi_irq(pp);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void dw_pci_setup_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	u64 msi_target;
+
+	msi_target = (u64)pp->msi_data;
+
+	msg->address_lo = lower_32_bits(msi_target);
+	msg->address_hi = upper_32_bits(msi_target);
+
+	msg->data = d->hwirq;
+
+	dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)d->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int dw_pci_msi_set_affinity(struct irq_data *d,
+				   const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void dw_pci_bottom_mask(struct irq_data *d)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	pp->irq_mask[ctrl] |= BIT(bit);
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void dw_pci_bottom_unmask(struct irq_data *d)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	pp->irq_mask[ctrl] &= ~BIT(bit);
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void dw_pci_bottom_ack(struct irq_data *d)
+{
+	struct dw_pcie_rp *pp  = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned int res, bit, ctrl;
+
+	ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
+	res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
+	bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
+
+	dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_STATUS + res, BIT(bit));
+}
+
+static struct irq_chip dw_pci_msi_bottom_irq_chip = {
+	.name = "DWPCI-MSI",
+	.irq_ack = dw_pci_bottom_ack,
+	.irq_compose_msi_msg = dw_pci_setup_msi_msg,
+	.irq_set_affinity = dw_pci_msi_set_affinity,
+	.irq_mask = dw_pci_bottom_mask,
+	.irq_unmask = dw_pci_bottom_unmask,
+};
+
+static int dw_pcie_irq_domain_alloc(struct irq_domain *domain,
+				    unsigned int virq, unsigned int nr_irqs,
+				    void *args)
+{
+	struct dw_pcie_rp *pp = domain->host_data;
+	unsigned long flags;
+	u32 i;
+	int bit;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors,
+				      order_base_2(nr_irqs));
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+
+	if (bit < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, bit + i,
+				    pp->msi_irq_chip,
+				    pp, handle_edge_irq,
+				    NULL, NULL);
+
+	return 0;
+}
+
+static void dw_pcie_irq_domain_free(struct irq_domain *domain,
+				    unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct dw_pcie_rp *pp = domain->host_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	bitmap_release_region(pp->msi_irq_in_use, d->hwirq,
+			      order_base_2(nr_irqs));
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static const struct irq_domain_ops dw_pcie_msi_domain_ops = {
+	.alloc	= dw_pcie_irq_domain_alloc,
+	.free	= dw_pcie_irq_domain_free,
+};
+
+int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node);
+
+	pp->irq_domain = irq_domain_create_linear(fwnode, pp->num_vectors,
+					       &dw_pcie_msi_domain_ops, pp);
+	if (!pp->irq_domain) {
+		dev_err(pci->dev, "Failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	irq_domain_update_bus_token(pp->irq_domain, DOMAIN_BUS_NEXUS);
+
+	pp->msi_domain = pci_msi_create_irq_domain(fwnode,
+						   &dw_pcie_msi_domain_info,
+						   pp->irq_domain);
+	if (!pp->msi_domain) {
+		dev_err(pci->dev, "Failed to create MSI domain\n");
+		irq_domain_remove(pp->irq_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void dw_pcie_free_msi(struct dw_pcie_rp *pp)
+{
+	u32 ctrl;
+
+	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++) {
+		if (pp->msi_irq[ctrl] > 0)
+			irq_set_chained_handler_and_data(pp->msi_irq[ctrl],
+							 NULL, NULL);
+	}
+
+	irq_domain_remove(pp->msi_domain);
+	irq_domain_remove(pp->irq_domain);
+}
+
+static void dw_pcie_msi_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	u64 msi_target = (u64)pp->msi_data;
+
+	if (!pci_msi_enabled() || !pp->has_msi_ctrl)
+		return;
+
+	/* Program the msi_data */
+	dw_pcie_writel_dbi(pci, PCIE_MSI_ADDR_LO, lower_32_bits(msi_target));
+	dw_pcie_writel_dbi(pci, PCIE_MSI_ADDR_HI, upper_32_bits(msi_target));
+}
+
+static int dw_pcie_parse_split_msi_irq(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	u32 ctrl, max_vectors;
+	int irq;
+
+	/* Parse any "msiX" IRQs described in the devicetree */
+	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++) {
+		char msi_name[] = "msiX";
+
+		msi_name[3] = '0' + ctrl;
+		irq = platform_get_irq_byname_optional(pdev, msi_name);
+		if (irq == -ENXIO)
+			break;
+		if (irq < 0)
+			return dev_err_probe(dev, irq,
+					     "Failed to parse MSI IRQ '%s'\n",
+					     msi_name);
+
+		pp->msi_irq[ctrl] = irq;
+	}
+
+	/* If no "msiX" IRQs, caller should fallback to "msi" IRQ */
+	if (ctrl == 0)
+		return -ENXIO;
+
+	max_vectors = ctrl * MAX_MSI_IRQS_PER_CTRL;
+	if (pp->num_vectors > max_vectors) {
+		dev_warn(dev, "Exceeding number of MSI vectors, limiting to %u\n",
+			 max_vectors);
+		pp->num_vectors = max_vectors;
+	}
+	if (!pp->num_vectors)
+		pp->num_vectors = max_vectors;
+
+	return 0;
+}
+
+static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	u64 *msi_vaddr;
+	int ret;
+	u32 ctrl, num_ctrls;
+
+	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
+		pp->irq_mask[ctrl] = ~0;
+
+	if (!pp->msi_irq[0]) {
+		ret = dw_pcie_parse_split_msi_irq(pp);
+		if (ret < 0 && ret != -ENXIO)
+			return ret;
+	}
+
+	if (!pp->num_vectors)
+		pp->num_vectors = MSI_DEF_NUM_VECTORS;
+	num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+
+	if (!pp->msi_irq[0]) {
+		pp->msi_irq[0] = platform_get_irq_byname_optional(pdev, "msi");
+		if (pp->msi_irq[0] < 0) {
+			pp->msi_irq[0] = platform_get_irq(pdev, 0);
+			if (pp->msi_irq[0] < 0)
+				return pp->msi_irq[0];
+		}
+	}
+
+	dev_dbg(dev, "Using %d MSI vectors\n", pp->num_vectors);
+
+	pp->msi_irq_chip = &dw_pci_msi_bottom_irq_chip;
+
+	ret = dw_pcie_allocate_domains(pp);
+	if (ret)
+		return ret;
+
+	for (ctrl = 0; ctrl < num_ctrls; ctrl++) {
+		if (pp->msi_irq[ctrl] > 0)
+			irq_set_chained_handler_and_data(pp->msi_irq[ctrl],
+						    dw_chained_msi_isr, pp);
+	}
+
+	/*
+	 * Even though the iMSI-RX Module supports 64-bit addresses some
+	 * peripheral PCIe devices may lack 64-bit message support. In
+	 * order not to miss MSI TLPs from those devices the MSI target
+	 * address has to be within the lowest 4GB.
+	 *
+	 * Note until there is a better alternative found the reservation is
+	 * done by allocating from the artificially limited DMA-coherent
+	 * memory.
+	 */
+	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
+	if (ret)
+		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
+
+	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
+					GFP_KERNEL);
+	if (!msi_vaddr) {
+		dev_err(dev, "Failed to alloc and map MSI data\n");
+		dw_pcie_free_msi(pp);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int dw_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct device_node *np = dev->of_node;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource_entry *win;
+	struct pci_host_bridge *bridge;
+	struct resource *res;
+	int ret;
+
+	raw_spin_lock_init(&pp->lock);
+
+	ret = dw_pcie_get_resources(pci);
+	if (ret)
+		return ret;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
+	if (res) {
+		pp->cfg0_size = resource_size(res);
+		pp->cfg0_base = res->start;
+
+		pp->va_cfg0_base = devm_pci_remap_cfg_resource(dev, res);
+		if (IS_ERR(pp->va_cfg0_base))
+			return PTR_ERR(pp->va_cfg0_base);
+	} else {
+		dev_err(dev, "Missing *config* reg space\n");
+		return -ENODEV;
+	}
+
+	bridge = devm_pci_alloc_host_bridge(dev, 0);
+	if (!bridge)
+		return -ENOMEM;
+
+	pp->bridge = bridge;
+
+	/* Get the I/O range from DT */
+	win = resource_list_first_type(&bridge->windows, IORESOURCE_IO);
+	if (win) {
+		pp->io_size = resource_size(win->res);
+		pp->io_bus_addr = win->res->start - win->offset;
+		pp->io_base = pci_pio_to_address(win->res->start);
+	}
+
+	/* Set default bus ops */
+	bridge->ops = &dw_pcie_ops;
+	bridge->child_ops = &dw_child_pcie_ops;
+
+	if (pp->ops->host_init) {
+		ret = pp->ops->host_init(pp);
+		if (ret)
+			return ret;
+	}
+
+	if (pci_msi_enabled()) {
+		pp->has_msi_ctrl = !(pp->ops->msi_host_init ||
+				     of_property_read_bool(np, "msi-parent") ||
+				     of_property_read_bool(np, "msi-map"));
+
+		/*
+		 * For the has_msi_ctrl case the default assignment is handled
+		 * in the dw_pcie_msi_host_init().
+		 */
+		if (!pp->has_msi_ctrl && !pp->num_vectors) {
+			pp->num_vectors = MSI_DEF_NUM_VECTORS;
+		} else if (pp->num_vectors > MAX_MSI_IRQS) {
+			dev_err(dev, "Invalid number of vectors\n");
+			ret = -EINVAL;
+			goto err_deinit_host;
+		}
+
+		if (pp->ops->msi_host_init) {
+			ret = pp->ops->msi_host_init(pp);
+			if (ret < 0)
+				goto err_deinit_host;
+		} else if (pp->has_msi_ctrl) {
+			ret = dw_pcie_msi_host_init(pp);
+			if (ret < 0)
+				goto err_deinit_host;
+		}
+	}
+
+	dw_pcie_version_detect(pci);
+
+	dw_pcie_iatu_detect(pci);
+
+	ret = dw_pcie_edma_detect(pci);
+	if (ret)
+		goto err_free_msi;
+
+	ret = dw_pcie_setup_rc(pp);
+	if (ret)
+		goto err_remove_edma;
+
+	if (!dw_pcie_link_up(pci)) {
+		ret = dw_pcie_start_link(pci);
+		if (ret)
+			goto err_remove_edma;
+	}
+
+	/* Ignore errors, the link may come up later */
+	dw_pcie_wait_for_link(pci);
+
+	bridge->sysdata = pp;
+
+	ret = pci_host_probe(bridge);
+	if (ret)
+		goto err_stop_link;
+
+	return 0;
+
+err_stop_link:
+	dw_pcie_stop_link(pci);
+
+err_remove_edma:
+	dw_pcie_edma_remove(pci);
+
+err_free_msi:
+	if (pp->has_msi_ctrl)
+		dw_pcie_free_msi(pp);
+
+err_deinit_host:
+	if (pp->ops->host_deinit)
+		pp->ops->host_deinit(pp);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_host_init);
+
+void dw_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	pci_stop_root_bus(pp->bridge->bus);
+	pci_remove_root_bus(pp->bridge->bus);
+
+	dw_pcie_stop_link(pci);
+
+	dw_pcie_edma_remove(pci);
+
+	if (pp->has_msi_ctrl)
+		dw_pcie_free_msi(pp);
+
+	if (pp->ops->host_deinit)
+		pp->ops->host_deinit(pp);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_host_deinit);
+
+static void __iomem *dw_pcie_other_conf_map_bus(struct pci_bus *bus,
+						unsigned int devfn, int where)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	int type, ret;
+	u32 busdev;
+
+	/*
+	 * Checking whether the link is up here is a last line of defense
+	 * against platforms that forward errors on the system bus as
+	 * SError upon PCI configuration transactions issued when the link
+	 * is down. This check is racy by definition and does not stop
+	 * the system from triggering an SError if the link goes down
+	 * after this check is performed.
+	 */
+	if (!dw_pcie_link_up(pci))
+		return NULL;
+
+	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+		 PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+	if (pci_is_root_bus(bus->parent))
+		type = PCIE_ATU_TYPE_CFG0;
+	else
+		type = PCIE_ATU_TYPE_CFG1;
+
+	ret = dw_pcie_prog_outbound_atu(pci, 0, type, pp->cfg0_base, busdev,
+					pp->cfg0_size);
+	if (ret)
+		return NULL;
+
+	return pp->va_cfg0_base + where;
+}
+
+static int dw_pcie_rd_other_conf(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 *val)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	int ret;
+
+	ret = pci_generic_config_read(bus, devfn, where, size, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	if (pp->cfg0_io_shared) {
+		ret = dw_pcie_prog_outbound_atu(pci, 0, PCIE_ATU_TYPE_IO,
+						pp->io_base, pp->io_bus_addr,
+						pp->io_size);
+		if (ret)
+			return PCIBIOS_SET_FAILED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int dw_pcie_wr_other_conf(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	int ret;
+
+	ret = pci_generic_config_write(bus, devfn, where, size, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	if (pp->cfg0_io_shared) {
+		ret = dw_pcie_prog_outbound_atu(pci, 0, PCIE_ATU_TYPE_IO,
+						pp->io_base, pp->io_bus_addr,
+						pp->io_size);
+		if (ret)
+			return PCIBIOS_SET_FAILED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops dw_child_pcie_ops = {
+	.map_bus = dw_pcie_other_conf_map_bus,
+	.read = dw_pcie_rd_other_conf,
+	.write = dw_pcie_wr_other_conf,
+};
+
+void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus, unsigned int devfn, int where)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+	if (PCI_SLOT(devfn) > 0)
+		return NULL;
+
+	return pci->dbi_base + where;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_own_conf_map_bus);
+
+static struct pci_ops dw_pcie_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct resource_entry *entry;
+	int i, ret;
+
+	/* Note the very first outbound ATU is used for CFG IOs */
+	if (!pci->num_ob_windows) {
+		dev_err(pci->dev, "No outbound iATU found\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Ensure all out/inbound windows are disabled before proceeding with
+	 * the MEM/IO (dma-)ranges setups.
+	 */
+	for (i = 0; i < pci->num_ob_windows; i++)
+		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, i);
+
+	for (i = 0; i < pci->num_ib_windows; i++)
+		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, i);
+
+	i = 0;
+	resource_list_for_each_entry(entry, &pp->bridge->windows) {
+		if (resource_type(entry->res) != IORESOURCE_MEM)
+			continue;
+
+		if (pci->num_ob_windows <= ++i)
+			break;
+
+		ret = dw_pcie_prog_outbound_atu(pci, i, PCIE_ATU_TYPE_MEM,
+						entry->res->start,
+						entry->res->start - entry->offset,
+						resource_size(entry->res));
+		if (ret) {
+			dev_err(pci->dev, "Failed to set MEM range %pr\n",
+				entry->res);
+			return ret;
+		}
+	}
+
+	if (pp->io_size) {
+		if (pci->num_ob_windows > ++i) {
+			ret = dw_pcie_prog_outbound_atu(pci, i, PCIE_ATU_TYPE_IO,
+							pp->io_base,
+							pp->io_bus_addr,
+							pp->io_size);
+			if (ret) {
+				dev_err(pci->dev, "Failed to set IO range %pr\n",
+					entry->res);
+				return ret;
+			}
+		} else {
+			pp->cfg0_io_shared = true;
+		}
+	}
+
+	if (pci->num_ob_windows <= i)
+		dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n",
+			 pci->num_ob_windows);
+
+	i = 0;
+	resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) {
+		if (resource_type(entry->res) != IORESOURCE_MEM)
+			continue;
+
+		if (pci->num_ib_windows <= i)
+			break;
+
+		ret = dw_pcie_prog_inbound_atu(pci, i++, PCIE_ATU_TYPE_MEM,
+					       entry->res->start,
+					       entry->res->start - entry->offset,
+					       resource_size(entry->res));
+		if (ret) {
+			dev_err(pci->dev, "Failed to set DMA range %pr\n",
+				entry->res);
+			return ret;
+		}
+	}
+
+	if (pci->num_ib_windows <= i)
+		dev_warn(pci->dev, "Dma-ranges exceed inbound iATU size (%u)\n",
+			 pci->num_ib_windows);
+
+	return 0;
+}
+
+int dw_pcie_setup_rc(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	u32 val, ctrl, num_ctrls;
+	int ret;
+
+	/*
+	 * Enable DBI read-only registers for writing/updating configuration.
+	 * Write permission gets disabled towards the end of this function.
+	 */
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	dw_pcie_setup(pci);
+
+	if (pp->has_msi_ctrl) {
+		num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
+
+		/* Initialize IRQ Status array */
+		for (ctrl = 0; ctrl < num_ctrls; ctrl++) {
+			dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK +
+					    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
+					    pp->irq_mask[ctrl]);
+			dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_ENABLE +
+					    (ctrl * MSI_REG_CTRL_BLOCK_SIZE),
+					    ~0);
+		}
+	}
+
+	dw_pcie_msi_init(pp);
+
+	/* Setup RC BARs */
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0x00000004);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
+
+	/* Setup interrupt pins */
+	val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
+	val &= 0xffff00ff;
+	val |= 0x00000100;
+	dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
+
+	/* Setup bus numbers */
+	val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
+	val &= 0xff000000;
+	val |= 0x00ff0100;
+	dw_pcie_writel_dbi(pci, PCI_PRIMARY_BUS, val);
+
+	/* Setup command register */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	val &= 0xffff0000;
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
+	dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+	/*
+	 * If the platform provides its own child bus config accesses, it means
+	 * the platform uses its own address translation component rather than
+	 * ATU, so we should not program the ATU here.
+	 */
+	if (pp->bridge->child_ops == &dw_child_pcie_ops) {
+		ret = dw_pcie_iatu_setup(pp);
+		if (ret)
+			return ret;
+	}
+
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+	/* Program correct class for RC */
+	dw_pcie_writew_dbi(pci, PCI_CLASS_DEVICE, PCI_CLASS_BRIDGE_PCI);
+
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
+
+int dw_pcie_suspend_noirq(struct dw_pcie *pci)
+{
+	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 val;
+	int ret;
+
+	/*
+	 * If L1SS is supported, then do not put the link into L2 as some
+	 * devices such as NVMe expect low resume latency.
+	 */
+	if (dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKCTL) & PCI_EXP_LNKCTL_ASPM_L1)
+		return 0;
+
+	if (dw_pcie_get_ltssm(pci) <= DW_PCIE_LTSSM_DETECT_ACT)
+		return 0;
+
+	if (!pci->pp.ops->pme_turn_off)
+		return 0;
+
+	pci->pp.ops->pme_turn_off(&pci->pp);
+
+	ret = read_poll_timeout(dw_pcie_get_ltssm, val, val == DW_PCIE_LTSSM_L2_IDLE,
+				PCIE_PME_TO_L2_TIMEOUT_US/10,
+				PCIE_PME_TO_L2_TIMEOUT_US, false, pci);
+	if (ret) {
+		dev_err(pci->dev, "Timeout waiting for L2 entry! LTSSM: 0x%x\n", val);
+		return ret;
+	}
+
+	if (pci->pp.ops->host_deinit)
+		pci->pp.ops->host_deinit(&pci->pp);
+
+	pci->suspended = true;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_suspend_noirq);
+
+int dw_pcie_resume_noirq(struct dw_pcie *pci)
+{
+	int ret;
+
+	if (!pci->suspended)
+		return 0;
+
+	pci->suspended = false;
+
+	if (pci->pp.ops->host_init) {
+		ret = pci->pp.ops->host_init(&pci->pp);
+		if (ret) {
+			dev_err(pci->dev, "Host init failed: %d\n", ret);
+			return ret;
+		}
+	}
+
+	dw_pcie_setup_rc(&pci->pp);
+
+	ret = dw_pcie_start_link(pci);
+	if (ret)
+		return ret;
+
+	ret = dw_pcie_wait_for_link(pci);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_resume_noirq);
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
new file mode 100644
index 000000000..b625841e9
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe RC driver for Synopsys DesignWare Core
+ *
+ * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Authors: Joao Pinto <Joao.Pinto@synopsys.com>
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+
+struct dw_plat_pcie {
+	struct dw_pcie			*pci;
+	enum dw_pcie_device_mode	mode;
+};
+
+struct dw_plat_pcie_of_data {
+	enum dw_pcie_device_mode	mode;
+};
+
+static const struct dw_pcie_host_ops dw_plat_pcie_host_ops = {
+};
+
+static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar;
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				     enum pci_epc_irq_type type,
+				     u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return dw_pcie_ep_raise_legacy_irq(ep, func_no);
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	case PCI_EPC_IRQ_MSIX:
+		return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features dw_plat_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = true,
+};
+
+static const struct pci_epc_features*
+dw_plat_pcie_get_features(struct dw_pcie_ep *ep)
+{
+	return &dw_plat_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
+	.ep_init = dw_plat_pcie_ep_init,
+	.raise_irq = dw_plat_pcie_ep_raise_irq,
+	.get_features = dw_plat_pcie_get_features,
+};
+
+static int dw_plat_add_pcie_port(struct dw_plat_pcie *dw_plat_pcie,
+				 struct platform_device *pdev)
+{
+	struct dw_pcie *pci = dw_plat_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->irq = platform_get_irq(pdev, 1);
+	if (pp->irq < 0)
+		return pp->irq;
+
+	pp->num_vectors = MAX_MSI_IRQS;
+	pp->ops = &dw_plat_pcie_host_ops;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "Failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dw_plat_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_plat_pcie *dw_plat_pcie;
+	struct dw_pcie *pci;
+	int ret;
+	const struct dw_plat_pcie_of_data *data;
+	enum dw_pcie_device_mode mode;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -EINVAL;
+
+	mode = (enum dw_pcie_device_mode)data->mode;
+
+	dw_plat_pcie = devm_kzalloc(dev, sizeof(*dw_plat_pcie), GFP_KERNEL);
+	if (!dw_plat_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+
+	dw_plat_pcie->pci = pci;
+	dw_plat_pcie->mode = mode;
+
+	platform_set_drvdata(pdev, dw_plat_pcie);
+
+	switch (dw_plat_pcie->mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_DW_PLAT_HOST))
+			return -ENODEV;
+
+		ret = dw_plat_add_pcie_port(dw_plat_pcie, pdev);
+		break;
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_DW_PLAT_EP))
+			return -ENODEV;
+
+		pci->ep.ops = &pcie_ep_ops;
+		ret = dw_pcie_ep_init(&pci->ep);
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", dw_plat_pcie->mode);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct dw_plat_pcie_of_data dw_plat_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct dw_plat_pcie_of_data dw_plat_pcie_ep_of_data = {
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct of_device_id dw_plat_pcie_of_match[] = {
+	{
+		.compatible = "snps,dw-pcie",
+		.data = &dw_plat_pcie_rc_of_data,
+	},
+	{
+		.compatible = "snps,dw-pcie-ep",
+		.data = &dw_plat_pcie_ep_of_data,
+	},
+	{},
+};
+
+static struct platform_driver dw_plat_pcie_driver = {
+	.driver = {
+		.name	= "dw-pcie",
+		.of_match_table = dw_plat_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = dw_plat_pcie_probe,
+};
+builtin_platform_driver(dw_plat_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
new file mode 100644
index 000000000..2b60d20df
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -0,0 +1,1064 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe host controller driver
+ *
+ * Copyright (C) 2013 Samsung Electronics Co., Ltd.
+ *		https://www.samsung.com
+ *
+ * Author: Jingoo Han <jg1.han@samsung.com>
+ */
+
+#include <linux/align.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma/edma.h>
+#include <linux/gpio/consumer.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+static const char * const dw_pcie_app_clks[DW_PCIE_NUM_APP_CLKS] = {
+	[DW_PCIE_DBI_CLK] = "dbi",
+	[DW_PCIE_MSTR_CLK] = "mstr",
+	[DW_PCIE_SLV_CLK] = "slv",
+};
+
+static const char * const dw_pcie_core_clks[DW_PCIE_NUM_CORE_CLKS] = {
+	[DW_PCIE_PIPE_CLK] = "pipe",
+	[DW_PCIE_CORE_CLK] = "core",
+	[DW_PCIE_AUX_CLK] = "aux",
+	[DW_PCIE_REF_CLK] = "ref",
+};
+
+static const char * const dw_pcie_app_rsts[DW_PCIE_NUM_APP_RSTS] = {
+	[DW_PCIE_DBI_RST] = "dbi",
+	[DW_PCIE_MSTR_RST] = "mstr",
+	[DW_PCIE_SLV_RST] = "slv",
+};
+
+static const char * const dw_pcie_core_rsts[DW_PCIE_NUM_CORE_RSTS] = {
+	[DW_PCIE_NON_STICKY_RST] = "non-sticky",
+	[DW_PCIE_STICKY_RST] = "sticky",
+	[DW_PCIE_CORE_RST] = "core",
+	[DW_PCIE_PIPE_RST] = "pipe",
+	[DW_PCIE_PHY_RST] = "phy",
+	[DW_PCIE_HOT_RST] = "hot",
+	[DW_PCIE_PWR_RST] = "pwr",
+};
+
+static int dw_pcie_get_clocks(struct dw_pcie *pci)
+{
+	int i, ret;
+
+	for (i = 0; i < DW_PCIE_NUM_APP_CLKS; i++)
+		pci->app_clks[i].id = dw_pcie_app_clks[i];
+
+	for (i = 0; i < DW_PCIE_NUM_CORE_CLKS; i++)
+		pci->core_clks[i].id = dw_pcie_core_clks[i];
+
+	ret = devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_APP_CLKS,
+					 pci->app_clks);
+	if (ret)
+		return ret;
+
+	return devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_CORE_CLKS,
+					  pci->core_clks);
+}
+
+static int dw_pcie_get_resets(struct dw_pcie *pci)
+{
+	int i, ret;
+
+	for (i = 0; i < DW_PCIE_NUM_APP_RSTS; i++)
+		pci->app_rsts[i].id = dw_pcie_app_rsts[i];
+
+	for (i = 0; i < DW_PCIE_NUM_CORE_RSTS; i++)
+		pci->core_rsts[i].id = dw_pcie_core_rsts[i];
+
+	ret = devm_reset_control_bulk_get_optional_shared(pci->dev,
+							  DW_PCIE_NUM_APP_RSTS,
+							  pci->app_rsts);
+	if (ret)
+		return ret;
+
+	ret = devm_reset_control_bulk_get_optional_exclusive(pci->dev,
+							     DW_PCIE_NUM_CORE_RSTS,
+							     pci->core_rsts);
+	if (ret)
+		return ret;
+
+	pci->pe_rst = devm_gpiod_get_optional(pci->dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(pci->pe_rst))
+		return PTR_ERR(pci->pe_rst);
+
+	return 0;
+}
+
+int dw_pcie_get_resources(struct dw_pcie *pci)
+{
+	struct platform_device *pdev = to_platform_device(pci->dev);
+	struct device_node *np = dev_of_node(pci->dev);
+	struct resource *res;
+	int ret;
+
+	if (!pci->dbi_base) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+		pci->dbi_base = devm_pci_remap_cfg_resource(pci->dev, res);
+		if (IS_ERR(pci->dbi_base))
+			return PTR_ERR(pci->dbi_base);
+	}
+
+	/* DBI2 is mainly useful for the endpoint controller */
+	if (!pci->dbi_base2) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2");
+		if (res) {
+			pci->dbi_base2 = devm_pci_remap_cfg_resource(pci->dev, res);
+			if (IS_ERR(pci->dbi_base2))
+				return PTR_ERR(pci->dbi_base2);
+		} else {
+			pci->dbi_base2 = pci->dbi_base + SZ_4K;
+		}
+	}
+
+	/* For non-unrolled iATU/eDMA platforms this range will be ignored */
+	if (!pci->atu_base) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
+		if (res) {
+			pci->atu_size = resource_size(res);
+			pci->atu_base = devm_ioremap_resource(pci->dev, res);
+			if (IS_ERR(pci->atu_base))
+				return PTR_ERR(pci->atu_base);
+		} else {
+			pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
+		}
+	}
+
+	/* Set a default value suitable for at most 8 in and 8 out windows */
+	if (!pci->atu_size)
+		pci->atu_size = SZ_4K;
+
+	/* eDMA region can be mapped to a custom base address */
+	if (!pci->edma.reg_base) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dma");
+		if (res) {
+			pci->edma.reg_base = devm_ioremap_resource(pci->dev, res);
+			if (IS_ERR(pci->edma.reg_base))
+				return PTR_ERR(pci->edma.reg_base);
+		} else if (pci->atu_size >= 2 * DEFAULT_DBI_DMA_OFFSET) {
+			pci->edma.reg_base = pci->atu_base + DEFAULT_DBI_DMA_OFFSET;
+		}
+	}
+
+	/* LLDD is supposed to manually switch the clocks and resets state */
+	if (dw_pcie_cap_is(pci, REQ_RES)) {
+		ret = dw_pcie_get_clocks(pci);
+		if (ret)
+			return ret;
+
+		ret = dw_pcie_get_resets(pci);
+		if (ret)
+			return ret;
+	}
+
+	if (pci->link_gen < 1)
+		pci->link_gen = of_pci_get_max_link_speed(np);
+
+	of_property_read_u32(np, "num-lanes", &pci->num_lanes);
+
+	if (of_property_read_bool(np, "snps,enable-cdm-check"))
+		dw_pcie_cap_set(pci, CDM_CHECK);
+
+	return 0;
+}
+
+void dw_pcie_version_detect(struct dw_pcie *pci)
+{
+	u32 ver;
+
+	/* The content of the CSR is zero on DWC PCIe older than v4.70a */
+	ver = dw_pcie_readl_dbi(pci, PCIE_VERSION_NUMBER);
+	if (!ver)
+		return;
+
+	if (pci->version && pci->version != ver)
+		dev_warn(pci->dev, "Versions don't match (%08x != %08x)\n",
+			 pci->version, ver);
+	else
+		pci->version = ver;
+
+	ver = dw_pcie_readl_dbi(pci, PCIE_VERSION_TYPE);
+
+	if (pci->type && pci->type != ver)
+		dev_warn(pci->dev, "Types don't match (%08x != %08x)\n",
+			 pci->type, ver);
+	else
+		pci->type = ver;
+}
+
+/*
+ * These interfaces resemble the pci_find_*capability() interfaces, but these
+ * are for configuring host controllers, which are bridges *to* PCI devices but
+ * are not PCI devices themselves.
+ */
+static u8 __dw_pcie_find_next_cap(struct dw_pcie *pci, u8 cap_ptr,
+				  u8 cap)
+{
+	u8 cap_id, next_cap_ptr;
+	u16 reg;
+
+	if (!cap_ptr)
+		return 0;
+
+	reg = dw_pcie_readw_dbi(pci, cap_ptr);
+	cap_id = (reg & 0x00ff);
+
+	if (cap_id > PCI_CAP_ID_MAX)
+		return 0;
+
+	if (cap_id == cap)
+		return cap_ptr;
+
+	next_cap_ptr = (reg & 0xff00) >> 8;
+	return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap)
+{
+	u8 next_cap_ptr;
+	u16 reg;
+
+	reg = dw_pcie_readw_dbi(pci, PCI_CAPABILITY_LIST);
+	next_cap_ptr = (reg & 0x00ff);
+
+	return __dw_pcie_find_next_cap(pci, next_cap_ptr, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_capability);
+
+static u16 dw_pcie_find_next_ext_capability(struct dw_pcie *pci, u16 start,
+					    u8 cap)
+{
+	u32 header;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
+
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (start)
+		pos = start;
+
+	header = dw_pcie_readl_dbi(pci, pos);
+	/*
+	 * If we have no capabilities, this is indicated by cap ID,
+	 * cap version and next pointer all being 0.
+	 */
+	if (header == 0)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos != start)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < PCI_CFG_SPACE_SIZE)
+			break;
+
+		header = dw_pcie_readl_dbi(pci, pos);
+	}
+
+	return 0;
+}
+
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap)
+{
+	return dw_pcie_find_next_ext_capability(pci, 0, cap);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_find_ext_capability);
+
+int dw_pcie_read(void __iomem *addr, int size, u32 *val)
+{
+	if (!IS_ALIGNED((uintptr_t)addr, size)) {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (size == 4) {
+		*val = readl(addr);
+	} else if (size == 2) {
+		*val = readw(addr);
+	} else if (size == 1) {
+		*val = readb(addr);
+	} else {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_read);
+
+int dw_pcie_write(void __iomem *addr, int size, u32 val)
+{
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (size == 4)
+		writel(val, addr);
+	else if (size == 2)
+		writew(val, addr);
+	else if (size == 1)
+		writeb(val, addr);
+	else
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_write);
+
+u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size)
+{
+	int ret;
+	u32 val;
+
+	if (pci->ops && pci->ops->read_dbi)
+		return pci->ops->read_dbi(pci, pci->dbi_base, reg, size);
+
+	ret = dw_pcie_read(pci->dbi_base + reg, size, &val);
+	if (ret)
+		dev_err(pci->dev, "Read DBI address failed\n");
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_read_dbi);
+
+void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
+{
+	int ret;
+
+	if (pci->ops && pci->ops->write_dbi) {
+		pci->ops->write_dbi(pci, pci->dbi_base, reg, size, val);
+		return;
+	}
+
+	ret = dw_pcie_write(pci->dbi_base + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Write DBI address failed\n");
+}
+EXPORT_SYMBOL_GPL(dw_pcie_write_dbi);
+
+void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
+{
+	int ret;
+
+	if (pci->ops && pci->ops->write_dbi2) {
+		pci->ops->write_dbi2(pci, pci->dbi_base2, reg, size, val);
+		return;
+	}
+
+	ret = dw_pcie_write(pci->dbi_base2 + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "write DBI address failed\n");
+}
+
+static inline void __iomem *dw_pcie_select_atu(struct dw_pcie *pci, u32 dir,
+					       u32 index)
+{
+	if (dw_pcie_cap_is(pci, IATU_UNROLL))
+		return pci->atu_base + PCIE_ATU_UNROLL_BASE(dir, index);
+
+	dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, dir | index);
+	return pci->atu_base;
+}
+
+static u32 dw_pcie_readl_atu(struct dw_pcie *pci, u32 dir, u32 index, u32 reg)
+{
+	void __iomem *base;
+	int ret;
+	u32 val;
+
+	base = dw_pcie_select_atu(pci, dir, index);
+
+	if (pci->ops && pci->ops->read_dbi)
+		return pci->ops->read_dbi(pci, base, reg, 4);
+
+	ret = dw_pcie_read(base + reg, 4, &val);
+	if (ret)
+		dev_err(pci->dev, "Read ATU address failed\n");
+
+	return val;
+}
+
+static void dw_pcie_writel_atu(struct dw_pcie *pci, u32 dir, u32 index,
+			       u32 reg, u32 val)
+{
+	void __iomem *base;
+	int ret;
+
+	base = dw_pcie_select_atu(pci, dir, index);
+
+	if (pci->ops && pci->ops->write_dbi) {
+		pci->ops->write_dbi(pci, base, reg, 4, val);
+		return;
+	}
+
+	ret = dw_pcie_write(base + reg, 4, val);
+	if (ret)
+		dev_err(pci->dev, "Write ATU address failed\n");
+}
+
+static inline u32 dw_pcie_readl_atu_ob(struct dw_pcie *pci, u32 index, u32 reg)
+{
+	return dw_pcie_readl_atu(pci, PCIE_ATU_REGION_DIR_OB, index, reg);
+}
+
+static inline void dw_pcie_writel_atu_ob(struct dw_pcie *pci, u32 index, u32 reg,
+					 u32 val)
+{
+	dw_pcie_writel_atu(pci, PCIE_ATU_REGION_DIR_OB, index, reg, val);
+}
+
+static inline u32 dw_pcie_enable_ecrc(u32 val)
+{
+	/*
+	 * DesignWare core version 4.90A has a design issue where the 'TD'
+	 * bit in the Control register-1 of the ATU outbound region acts
+	 * like an override for the ECRC setting, i.e., the presence of TLP
+	 * Digest (ECRC) in the outgoing TLPs is solely determined by this
+	 * bit. This is contrary to the PCIe spec which says that the
+	 * enablement of the ECRC is solely determined by the AER
+	 * registers.
+	 *
+	 * Because of this, even when the ECRC is enabled through AER
+	 * registers, the transactions going through ATU won't have TLP
+	 * Digest as there is no way the PCI core AER code could program
+	 * the TD bit which is specific to the DesignWare core.
+	 *
+	 * The best way to handle this scenario is to program the TD bit
+	 * always. It affects only the traffic from root port to downstream
+	 * devices.
+	 *
+	 * At this point,
+	 * When ECRC is enabled in AER registers, everything works normally
+	 * When ECRC is NOT enabled in AER registers, then,
+	 * on Root Port:- TLP Digest (DWord size) gets appended to each packet
+	 *                even through it is not required. Since downstream
+	 *                TLPs are mostly for configuration accesses and BAR
+	 *                accesses, they are not in critical path and won't
+	 *                have much negative effect on the performance.
+	 * on End Point:- TLP Digest is received for some/all the packets coming
+	 *                from the root port. TLP Digest is ignored because,
+	 *                as per the PCIe Spec r5.0 v1.0 section 2.2.3
+	 *                "TLP Digest Rules", when an endpoint receives TLP
+	 *                Digest when its ECRC check functionality is disabled
+	 *                in AER registers, received TLP Digest is just ignored.
+	 * Since there is no issue or error reported either side, best way to
+	 * handle the scenario is to program TD bit by default.
+	 */
+
+	return val | PCIE_ATU_TD;
+}
+
+static int __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
+				       int index, int type, u64 cpu_addr,
+				       u64 pci_addr, u64 size)
+{
+	u32 retries, val;
+	u64 limit_addr;
+
+	if (pci->ops && pci->ops->cpu_addr_fixup)
+		cpu_addr = pci->ops->cpu_addr_fixup(pci, cpu_addr);
+
+	limit_addr = cpu_addr + size - 1;
+
+	if ((limit_addr & ~pci->region_limit) != (cpu_addr & ~pci->region_limit) ||
+	    !IS_ALIGNED(cpu_addr, pci->region_align) ||
+	    !IS_ALIGNED(pci_addr, pci->region_align) || !size) {
+		return -EINVAL;
+	}
+
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_LOWER_BASE,
+			      lower_32_bits(cpu_addr));
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_UPPER_BASE,
+			      upper_32_bits(cpu_addr));
+
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_LIMIT,
+			      lower_32_bits(limit_addr));
+	if (dw_pcie_ver_is_ge(pci, 460A))
+		dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_UPPER_LIMIT,
+				      upper_32_bits(limit_addr));
+
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_LOWER_TARGET,
+			      lower_32_bits(pci_addr));
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_UPPER_TARGET,
+			      upper_32_bits(pci_addr));
+
+	val = type | PCIE_ATU_FUNC_NUM(func_no);
+	if (upper_32_bits(limit_addr) > upper_32_bits(cpu_addr) &&
+	    dw_pcie_ver_is_ge(pci, 460A))
+		val |= PCIE_ATU_INCREASE_REGION_SIZE;
+	if (dw_pcie_ver_is(pci, 490A))
+		val = dw_pcie_enable_ecrc(val);
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_REGION_CTRL1, val);
+
+	dw_pcie_writel_atu_ob(pci, index, PCIE_ATU_REGION_CTRL2, PCIE_ATU_ENABLE);
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
+		val = dw_pcie_readl_atu_ob(pci, index, PCIE_ATU_REGION_CTRL2);
+		if (val & PCIE_ATU_ENABLE)
+			return 0;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+
+	dev_err(pci->dev, "Outbound iATU is not being enabled\n");
+
+	return -ETIMEDOUT;
+}
+
+int dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
+			      u64 cpu_addr, u64 pci_addr, u64 size)
+{
+	return __dw_pcie_prog_outbound_atu(pci, 0, index, type,
+					   cpu_addr, pci_addr, size);
+}
+
+int dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				 int type, u64 cpu_addr, u64 pci_addr,
+				 u64 size)
+{
+	return __dw_pcie_prog_outbound_atu(pci, func_no, index, type,
+					   cpu_addr, pci_addr, size);
+}
+
+static inline u32 dw_pcie_readl_atu_ib(struct dw_pcie *pci, u32 index, u32 reg)
+{
+	return dw_pcie_readl_atu(pci, PCIE_ATU_REGION_DIR_IB, index, reg);
+}
+
+static inline void dw_pcie_writel_atu_ib(struct dw_pcie *pci, u32 index, u32 reg,
+					 u32 val)
+{
+	dw_pcie_writel_atu(pci, PCIE_ATU_REGION_DIR_IB, index, reg, val);
+}
+
+int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type,
+			     u64 cpu_addr, u64 pci_addr, u64 size)
+{
+	u64 limit_addr = pci_addr + size - 1;
+	u32 retries, val;
+
+	if ((limit_addr & ~pci->region_limit) != (pci_addr & ~pci->region_limit) ||
+	    !IS_ALIGNED(cpu_addr, pci->region_align) ||
+	    !IS_ALIGNED(pci_addr, pci->region_align) || !size) {
+		return -EINVAL;
+	}
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_BASE,
+			      lower_32_bits(pci_addr));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_BASE,
+			      upper_32_bits(pci_addr));
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LIMIT,
+			      lower_32_bits(limit_addr));
+	if (dw_pcie_ver_is_ge(pci, 460A))
+		dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_LIMIT,
+				      upper_32_bits(limit_addr));
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_TARGET,
+			      lower_32_bits(cpu_addr));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_TARGET,
+			      upper_32_bits(cpu_addr));
+
+	val = type;
+	if (upper_32_bits(limit_addr) > upper_32_bits(pci_addr) &&
+	    dw_pcie_ver_is_ge(pci, 460A))
+		val |= PCIE_ATU_INCREASE_REGION_SIZE;
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL1, val);
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2, PCIE_ATU_ENABLE);
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
+		val = dw_pcie_readl_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2);
+		if (val & PCIE_ATU_ENABLE)
+			return 0;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+
+	dev_err(pci->dev, "Inbound iATU is not being enabled\n");
+
+	return -ETIMEDOUT;
+}
+
+int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				int type, u64 cpu_addr, u8 bar)
+{
+	u32 retries, val;
+
+	if (!IS_ALIGNED(cpu_addr, pci->region_align))
+		return -EINVAL;
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_TARGET,
+			      lower_32_bits(cpu_addr));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_TARGET,
+			      upper_32_bits(cpu_addr));
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL1, type |
+			      PCIE_ATU_FUNC_NUM(func_no));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2,
+			      PCIE_ATU_ENABLE | PCIE_ATU_FUNC_NUM_MATCH_EN |
+			      PCIE_ATU_BAR_MODE_ENABLE | (bar << 8));
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
+		val = dw_pcie_readl_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2);
+		if (val & PCIE_ATU_ENABLE)
+			return 0;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+
+	dev_err(pci->dev, "Inbound iATU is not being enabled\n");
+
+	return -ETIMEDOUT;
+}
+
+void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index)
+{
+	dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0);
+}
+
+int dw_pcie_wait_for_link(struct dw_pcie *pci)
+{
+	u32 offset, val;
+	int retries;
+
+	/* Check if the link is up or not */
+	for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+		if (dw_pcie_link_up(pci))
+			break;
+
+		usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
+	}
+
+	if (retries >= LINK_WAIT_MAX_RETRIES) {
+		dev_info(pci->dev, "Phy link never came up\n");
+		return -ETIMEDOUT;
+	}
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+
+	dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
+		 FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
+		 FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_pcie_wait_for_link);
+
+int dw_pcie_link_up(struct dw_pcie *pci)
+{
+	u32 val;
+
+	if (pci->ops && pci->ops->link_up)
+		return pci->ops->link_up(pci);
+
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG1);
+	return ((val & PCIE_PORT_DEBUG1_LINK_UP) &&
+		(!(val & PCIE_PORT_DEBUG1_LINK_IN_TRAINING)));
+}
+EXPORT_SYMBOL_GPL(dw_pcie_link_up);
+
+void dw_pcie_upconfig_setup(struct dw_pcie *pci)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL);
+	val |= PORT_MLTI_UPCFG_SUPPORT;
+	dw_pcie_writel_dbi(pci, PCIE_PORT_MULTI_LANE_CTRL, val);
+}
+EXPORT_SYMBOL_GPL(dw_pcie_upconfig_setup);
+
+static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen)
+{
+	u32 cap, ctrl2, link_speed;
+	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+	cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+	ctrl2 = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCTL2);
+	ctrl2 &= ~PCI_EXP_LNKCTL2_TLS;
+
+	switch (pcie_link_speed[link_gen]) {
+	case PCIE_SPEED_2_5GT:
+		link_speed = PCI_EXP_LNKCTL2_TLS_2_5GT;
+		break;
+	case PCIE_SPEED_5_0GT:
+		link_speed = PCI_EXP_LNKCTL2_TLS_5_0GT;
+		break;
+	case PCIE_SPEED_8_0GT:
+		link_speed = PCI_EXP_LNKCTL2_TLS_8_0GT;
+		break;
+	case PCIE_SPEED_16_0GT:
+		link_speed = PCI_EXP_LNKCTL2_TLS_16_0GT;
+		break;
+	default:
+		/* Use hardware capability */
+		link_speed = FIELD_GET(PCI_EXP_LNKCAP_SLS, cap);
+		ctrl2 &= ~PCI_EXP_LNKCTL2_HASD;
+		break;
+	}
+
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCTL2, ctrl2 | link_speed);
+
+	cap &= ~((u32)PCI_EXP_LNKCAP_SLS);
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, cap | link_speed);
+
+}
+
+static void dw_pcie_link_set_max_link_width(struct dw_pcie *pci, u32 num_lanes)
+{
+	u32 lnkcap, lwsc, plc;
+	u8 cap;
+
+	if (!num_lanes)
+		return;
+
+	/* Set the number of lanes */
+	plc = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+	plc &= ~PORT_LINK_FAST_LINK_MODE;
+	plc &= ~PORT_LINK_MODE_MASK;
+
+	/* Set link width speed control register */
+	lwsc = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	lwsc &= ~PORT_LOGIC_LINK_WIDTH_MASK;
+	switch (num_lanes) {
+	case 1:
+		plc |= PORT_LINK_MODE_1_LANES;
+		lwsc |= PORT_LOGIC_LINK_WIDTH_1_LANES;
+		break;
+	case 2:
+		plc |= PORT_LINK_MODE_2_LANES;
+		lwsc |= PORT_LOGIC_LINK_WIDTH_2_LANES;
+		break;
+	case 4:
+		plc |= PORT_LINK_MODE_4_LANES;
+		lwsc |= PORT_LOGIC_LINK_WIDTH_4_LANES;
+		break;
+	case 8:
+		plc |= PORT_LINK_MODE_8_LANES;
+		lwsc |= PORT_LOGIC_LINK_WIDTH_8_LANES;
+		break;
+	default:
+		dev_err(pci->dev, "num-lanes %u: invalid value\n", num_lanes);
+		return;
+	}
+	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, plc);
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, lwsc);
+
+	cap = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	lnkcap = dw_pcie_readl_dbi(pci, cap + PCI_EXP_LNKCAP);
+	lnkcap &= ~PCI_EXP_LNKCAP_MLW;
+	lnkcap |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, num_lanes);
+	dw_pcie_writel_dbi(pci, cap + PCI_EXP_LNKCAP, lnkcap);
+}
+
+void dw_pcie_iatu_detect(struct dw_pcie *pci)
+{
+	int max_region, ob, ib;
+	u32 val, min, dir;
+	u64 max;
+
+	val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT);
+	if (val == 0xFFFFFFFF) {
+		dw_pcie_cap_set(pci, IATU_UNROLL);
+
+		max_region = min((int)pci->atu_size / 512, 256);
+	} else {
+		pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE;
+		pci->atu_size = PCIE_ATU_VIEWPORT_SIZE;
+
+		dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF);
+		max_region = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT) + 1;
+	}
+
+	for (ob = 0; ob < max_region; ob++) {
+		dw_pcie_writel_atu_ob(pci, ob, PCIE_ATU_LOWER_TARGET, 0x11110000);
+		val = dw_pcie_readl_atu_ob(pci, ob, PCIE_ATU_LOWER_TARGET);
+		if (val != 0x11110000)
+			break;
+	}
+
+	for (ib = 0; ib < max_region; ib++) {
+		dw_pcie_writel_atu_ib(pci, ib, PCIE_ATU_LOWER_TARGET, 0x11110000);
+		val = dw_pcie_readl_atu_ib(pci, ib, PCIE_ATU_LOWER_TARGET);
+		if (val != 0x11110000)
+			break;
+	}
+
+	if (ob) {
+		dir = PCIE_ATU_REGION_DIR_OB;
+	} else if (ib) {
+		dir = PCIE_ATU_REGION_DIR_IB;
+	} else {
+		dev_err(pci->dev, "No iATU regions found\n");
+		return;
+	}
+
+	dw_pcie_writel_atu(pci, dir, 0, PCIE_ATU_LIMIT, 0x0);
+	min = dw_pcie_readl_atu(pci, dir, 0, PCIE_ATU_LIMIT);
+
+	if (dw_pcie_ver_is_ge(pci, 460A)) {
+		dw_pcie_writel_atu(pci, dir, 0, PCIE_ATU_UPPER_LIMIT, 0xFFFFFFFF);
+		max = dw_pcie_readl_atu(pci, dir, 0, PCIE_ATU_UPPER_LIMIT);
+	} else {
+		max = 0;
+	}
+
+	pci->num_ob_windows = ob;
+	pci->num_ib_windows = ib;
+	pci->region_align = 1 << fls(min);
+	pci->region_limit = (max << 32) | (SZ_4G - 1);
+
+	dev_info(pci->dev, "iATU: unroll %s, %u ob, %u ib, align %uK, limit %lluG\n",
+		 dw_pcie_cap_is(pci, IATU_UNROLL) ? "T" : "F",
+		 pci->num_ob_windows, pci->num_ib_windows,
+		 pci->region_align / SZ_1K, (pci->region_limit + 1) / SZ_1G);
+}
+
+static u32 dw_pcie_readl_dma(struct dw_pcie *pci, u32 reg)
+{
+	u32 val = 0;
+	int ret;
+
+	if (pci->ops && pci->ops->read_dbi)
+		return pci->ops->read_dbi(pci, pci->edma.reg_base, reg, 4);
+
+	ret = dw_pcie_read(pci->edma.reg_base + reg, 4, &val);
+	if (ret)
+		dev_err(pci->dev, "Read DMA address failed\n");
+
+	return val;
+}
+
+static int dw_pcie_edma_irq_vector(struct device *dev, unsigned int nr)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	char name[6];
+	int ret;
+
+	if (nr >= EDMA_MAX_WR_CH + EDMA_MAX_RD_CH)
+		return -EINVAL;
+
+	ret = platform_get_irq_byname_optional(pdev, "dma");
+	if (ret > 0)
+		return ret;
+
+	snprintf(name, sizeof(name), "dma%u", nr);
+
+	return platform_get_irq_byname_optional(pdev, name);
+}
+
+static struct dw_edma_plat_ops dw_pcie_edma_ops = {
+	.irq_vector = dw_pcie_edma_irq_vector,
+};
+
+static int dw_pcie_edma_find_chip(struct dw_pcie *pci)
+{
+	u32 val;
+
+	/*
+	 * Indirect eDMA CSRs access has been completely removed since v5.40a
+	 * thus no space is now reserved for the eDMA channels viewport and
+	 * former DMA CTRL register is no longer fixed to FFs.
+	 */
+	if (dw_pcie_ver_is_ge(pci, 540A))
+		val = 0xFFFFFFFF;
+	else
+		val = dw_pcie_readl_dbi(pci, PCIE_DMA_VIEWPORT_BASE + PCIE_DMA_CTRL);
+
+	if (val == 0xFFFFFFFF && pci->edma.reg_base) {
+		pci->edma.mf = EDMA_MF_EDMA_UNROLL;
+
+		val = dw_pcie_readl_dma(pci, PCIE_DMA_CTRL);
+	} else if (val != 0xFFFFFFFF) {
+		pci->edma.mf = EDMA_MF_EDMA_LEGACY;
+
+		pci->edma.reg_base = pci->dbi_base + PCIE_DMA_VIEWPORT_BASE;
+	} else {
+		return -ENODEV;
+	}
+
+	pci->edma.dev = pci->dev;
+
+	if (!pci->edma.ops)
+		pci->edma.ops = &dw_pcie_edma_ops;
+
+	pci->edma.flags |= DW_EDMA_CHIP_LOCAL;
+
+	pci->edma.ll_wr_cnt = FIELD_GET(PCIE_DMA_NUM_WR_CHAN, val);
+	pci->edma.ll_rd_cnt = FIELD_GET(PCIE_DMA_NUM_RD_CHAN, val);
+
+	/* Sanity check the channels count if the mapping was incorrect */
+	if (!pci->edma.ll_wr_cnt || pci->edma.ll_wr_cnt > EDMA_MAX_WR_CH ||
+	    !pci->edma.ll_rd_cnt || pci->edma.ll_rd_cnt > EDMA_MAX_RD_CH)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int dw_pcie_edma_irq_verify(struct dw_pcie *pci)
+{
+	struct platform_device *pdev = to_platform_device(pci->dev);
+	u16 ch_cnt = pci->edma.ll_wr_cnt + pci->edma.ll_rd_cnt;
+	char name[6];
+	int ret;
+
+	if (pci->edma.nr_irqs == 1)
+		return 0;
+	else if (pci->edma.nr_irqs > 1)
+		return pci->edma.nr_irqs != ch_cnt ? -EINVAL : 0;
+
+	ret = platform_get_irq_byname_optional(pdev, "dma");
+	if (ret > 0) {
+		pci->edma.nr_irqs = 1;
+		return 0;
+	}
+
+	for (; pci->edma.nr_irqs < ch_cnt; pci->edma.nr_irqs++) {
+		snprintf(name, sizeof(name), "dma%d", pci->edma.nr_irqs);
+
+		ret = platform_get_irq_byname_optional(pdev, name);
+		if (ret <= 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dw_pcie_edma_ll_alloc(struct dw_pcie *pci)
+{
+	struct dw_edma_region *ll;
+	dma_addr_t paddr;
+	int i;
+
+	for (i = 0; i < pci->edma.ll_wr_cnt; i++) {
+		ll = &pci->edma.ll_region_wr[i];
+		ll->sz = DMA_LLP_MEM_SIZE;
+		ll->vaddr.mem = dmam_alloc_coherent(pci->dev, ll->sz,
+						    &paddr, GFP_KERNEL);
+		if (!ll->vaddr.mem)
+			return -ENOMEM;
+
+		ll->paddr = paddr;
+	}
+
+	for (i = 0; i < pci->edma.ll_rd_cnt; i++) {
+		ll = &pci->edma.ll_region_rd[i];
+		ll->sz = DMA_LLP_MEM_SIZE;
+		ll->vaddr.mem = dmam_alloc_coherent(pci->dev, ll->sz,
+						    &paddr, GFP_KERNEL);
+		if (!ll->vaddr.mem)
+			return -ENOMEM;
+
+		ll->paddr = paddr;
+	}
+
+	return 0;
+}
+
+int dw_pcie_edma_detect(struct dw_pcie *pci)
+{
+	int ret;
+
+	/* Don't fail if no eDMA was found (for the backward compatibility) */
+	ret = dw_pcie_edma_find_chip(pci);
+	if (ret)
+		return 0;
+
+	/* Don't fail on the IRQs verification (for the backward compatibility) */
+	ret = dw_pcie_edma_irq_verify(pci);
+	if (ret) {
+		dev_err(pci->dev, "Invalid eDMA IRQs found\n");
+		return 0;
+	}
+
+	ret = dw_pcie_edma_ll_alloc(pci);
+	if (ret) {
+		dev_err(pci->dev, "Couldn't allocate LLP memory\n");
+		return ret;
+	}
+
+	/* Don't fail if the DW eDMA driver can't find the device */
+	ret = dw_edma_probe(&pci->edma);
+	if (ret && ret != -ENODEV) {
+		dev_err(pci->dev, "Couldn't register eDMA device\n");
+		return ret;
+	}
+
+	dev_info(pci->dev, "eDMA: unroll %s, %hu wr, %hu rd\n",
+		 pci->edma.mf == EDMA_MF_EDMA_UNROLL ? "T" : "F",
+		 pci->edma.ll_wr_cnt, pci->edma.ll_rd_cnt);
+
+	return 0;
+}
+
+void dw_pcie_edma_remove(struct dw_pcie *pci)
+{
+	dw_edma_remove(&pci->edma);
+}
+
+void dw_pcie_setup(struct dw_pcie *pci)
+{
+	u32 val;
+
+	if (pci->link_gen > 0)
+		dw_pcie_link_set_max_speed(pci, pci->link_gen);
+
+	/* Configure Gen1 N_FTS */
+	if (pci->n_fts[0]) {
+		val = dw_pcie_readl_dbi(pci, PCIE_PORT_AFR);
+		val &= ~(PORT_AFR_N_FTS_MASK | PORT_AFR_CC_N_FTS_MASK);
+		val |= PORT_AFR_N_FTS(pci->n_fts[0]);
+		val |= PORT_AFR_CC_N_FTS(pci->n_fts[0]);
+		dw_pcie_writel_dbi(pci, PCIE_PORT_AFR, val);
+	}
+
+	/* Configure Gen2+ N_FTS */
+	if (pci->n_fts[1]) {
+		val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		val &= ~PORT_LOGIC_N_FTS_MASK;
+		val |= pci->n_fts[1];
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+	}
+
+	if (dw_pcie_cap_is(pci, CDM_CHECK)) {
+		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+		val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
+		       PCIE_PL_CHK_REG_CHK_REG_START;
+		dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+	}
+
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+	val &= ~PORT_LINK_FAST_LINK_MODE;
+	val |= PORT_LINK_DLL_LINK_EN;
+	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
+	dw_pcie_link_set_max_link_width(pci, pci->num_lanes);
+}
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
new file mode 100644
index 000000000..ef0b2efa9
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -0,0 +1,645 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Synopsys DesignWare PCIe host controller driver
+ *
+ * Copyright (C) 2013 Samsung Electronics Co., Ltd.
+ *		https://www.samsung.com
+ *
+ * Author: Jingoo Han <jg1.han@samsung.com>
+ */
+
+#ifndef _PCIE_DESIGNWARE_H
+#define _PCIE_DESIGNWARE_H
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/edma.h>
+#include <linux/gpio/consumer.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/reset.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+/* DWC PCIe IP-core versions (native support since v4.70a) */
+#define DW_PCIE_VER_365A		0x3336352a
+#define DW_PCIE_VER_460A		0x3436302a
+#define DW_PCIE_VER_470A		0x3437302a
+#define DW_PCIE_VER_480A		0x3438302a
+#define DW_PCIE_VER_490A		0x3439302a
+#define DW_PCIE_VER_520A		0x3532302a
+#define DW_PCIE_VER_540A		0x3534302a
+
+#define __dw_pcie_ver_cmp(_pci, _ver, _op) \
+	((_pci)->version _op DW_PCIE_VER_ ## _ver)
+
+#define dw_pcie_ver_is(_pci, _ver) __dw_pcie_ver_cmp(_pci, _ver, ==)
+
+#define dw_pcie_ver_is_ge(_pci, _ver) __dw_pcie_ver_cmp(_pci, _ver, >=)
+
+#define dw_pcie_ver_type_is(_pci, _ver, _type) \
+	(__dw_pcie_ver_cmp(_pci, _ver, ==) && \
+	 __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, ==))
+
+#define dw_pcie_ver_type_is_ge(_pci, _ver, _type) \
+	(__dw_pcie_ver_cmp(_pci, _ver, ==) && \
+	 __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=))
+
+/* DWC PCIe controller capabilities */
+#define DW_PCIE_CAP_REQ_RES		0
+#define DW_PCIE_CAP_IATU_UNROLL		1
+#define DW_PCIE_CAP_CDM_CHECK		2
+
+#define dw_pcie_cap_is(_pci, _cap) \
+	test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps)
+
+#define dw_pcie_cap_set(_pci, _cap) \
+	set_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps)
+
+/* Parameters for the waiting for link up routine */
+#define LINK_WAIT_MAX_RETRIES		10
+#define LINK_WAIT_USLEEP_MIN		90000
+#define LINK_WAIT_USLEEP_MAX		100000
+
+/* Parameters for the waiting for iATU enabled routine */
+#define LINK_WAIT_MAX_IATU_RETRIES	5
+#define LINK_WAIT_IATU			9
+
+/* Synopsys-specific PCIe configuration registers */
+#define PCIE_PORT_AFR			0x70C
+#define PORT_AFR_N_FTS_MASK		GENMASK(15, 8)
+#define PORT_AFR_N_FTS(n)		FIELD_PREP(PORT_AFR_N_FTS_MASK, n)
+#define PORT_AFR_CC_N_FTS_MASK		GENMASK(23, 16)
+#define PORT_AFR_CC_N_FTS(n)		FIELD_PREP(PORT_AFR_CC_N_FTS_MASK, n)
+#define PORT_AFR_ENTER_ASPM		BIT(30)
+#define PORT_AFR_L0S_ENTRANCE_LAT_SHIFT	24
+#define PORT_AFR_L0S_ENTRANCE_LAT_MASK	GENMASK(26, 24)
+#define PORT_AFR_L1_ENTRANCE_LAT_SHIFT	27
+#define PORT_AFR_L1_ENTRANCE_LAT_MASK	GENMASK(29, 27)
+
+#define PCIE_PORT_LINK_CONTROL		0x710
+#define PORT_LINK_DLL_LINK_EN		BIT(5)
+#define PORT_LINK_FAST_LINK_MODE	BIT(7)
+#define PORT_LINK_MODE_MASK		GENMASK(21, 16)
+#define PORT_LINK_MODE(n)		FIELD_PREP(PORT_LINK_MODE_MASK, n)
+#define PORT_LINK_MODE_1_LANES		PORT_LINK_MODE(0x1)
+#define PORT_LINK_MODE_2_LANES		PORT_LINK_MODE(0x3)
+#define PORT_LINK_MODE_4_LANES		PORT_LINK_MODE(0x7)
+#define PORT_LINK_MODE_8_LANES		PORT_LINK_MODE(0xf)
+
+#define PCIE_PORT_DEBUG0		0x728
+#define PORT_LOGIC_LTSSM_STATE_MASK	0x1f
+#define PORT_LOGIC_LTSSM_STATE_L0	0x11
+#define PCIE_PORT_DEBUG1		0x72C
+#define PCIE_PORT_DEBUG1_LINK_UP		BIT(4)
+#define PCIE_PORT_DEBUG1_LINK_IN_TRAINING	BIT(29)
+
+#define PCIE_LINK_WIDTH_SPEED_CONTROL	0x80C
+#define PORT_LOGIC_N_FTS_MASK		GENMASK(7, 0)
+#define PORT_LOGIC_SPEED_CHANGE		BIT(17)
+#define PORT_LOGIC_LINK_WIDTH_MASK	GENMASK(12, 8)
+#define PORT_LOGIC_LINK_WIDTH(n)	FIELD_PREP(PORT_LOGIC_LINK_WIDTH_MASK, n)
+#define PORT_LOGIC_LINK_WIDTH_1_LANES	PORT_LOGIC_LINK_WIDTH(0x1)
+#define PORT_LOGIC_LINK_WIDTH_2_LANES	PORT_LOGIC_LINK_WIDTH(0x2)
+#define PORT_LOGIC_LINK_WIDTH_4_LANES	PORT_LOGIC_LINK_WIDTH(0x4)
+#define PORT_LOGIC_LINK_WIDTH_8_LANES	PORT_LOGIC_LINK_WIDTH(0x8)
+
+#define PCIE_MSI_ADDR_LO		0x820
+#define PCIE_MSI_ADDR_HI		0x824
+#define PCIE_MSI_INTR0_ENABLE		0x828
+#define PCIE_MSI_INTR0_MASK		0x82C
+#define PCIE_MSI_INTR0_STATUS		0x830
+
+#define GEN3_RELATED_OFF			0x890
+#define GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL	BIT(0)
+#define GEN3_RELATED_OFF_RXEQ_RGRDLESS_RXTS	BIT(13)
+#define GEN3_RELATED_OFF_GEN3_EQ_DISABLE	BIT(16)
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT	24
+#define GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK	GENMASK(25, 24)
+
+#define PCIE_PORT_MULTI_LANE_CTRL	0x8C0
+#define PORT_MLTI_UPCFG_SUPPORT		BIT(7)
+
+#define PCIE_VERSION_NUMBER		0x8F8
+#define PCIE_VERSION_TYPE		0x8FC
+
+/*
+ * iATU inbound and outbound windows CSRs. Before the IP-core v4.80a each
+ * iATU region CSRs had been indirectly accessible by means of the dedicated
+ * viewport selector. The iATU/eDMA CSRs space was re-designed in DWC PCIe
+ * v4.80a in a way so the viewport was unrolled into the directly accessible
+ * iATU/eDMA CSRs space.
+ */
+#define PCIE_ATU_VIEWPORT		0x900
+#define PCIE_ATU_REGION_DIR_IB		BIT(31)
+#define PCIE_ATU_REGION_DIR_OB		0
+#define PCIE_ATU_VIEWPORT_BASE		0x904
+#define PCIE_ATU_UNROLL_BASE(dir, index) \
+	(((index) << 9) | ((dir == PCIE_ATU_REGION_DIR_IB) ? BIT(8) : 0))
+#define PCIE_ATU_VIEWPORT_SIZE		0x2C
+#define PCIE_ATU_REGION_CTRL1		0x000
+#define PCIE_ATU_INCREASE_REGION_SIZE	BIT(13)
+#define PCIE_ATU_TYPE_MEM		0x0
+#define PCIE_ATU_TYPE_IO		0x2
+#define PCIE_ATU_TYPE_CFG0		0x4
+#define PCIE_ATU_TYPE_CFG1		0x5
+#define PCIE_ATU_TD			BIT(8)
+#define PCIE_ATU_FUNC_NUM(pf)           ((pf) << 20)
+#define PCIE_ATU_REGION_CTRL2		0x004
+#define PCIE_ATU_ENABLE			BIT(31)
+#define PCIE_ATU_BAR_MODE_ENABLE	BIT(30)
+#define PCIE_ATU_FUNC_NUM_MATCH_EN      BIT(19)
+#define PCIE_ATU_LOWER_BASE		0x008
+#define PCIE_ATU_UPPER_BASE		0x00C
+#define PCIE_ATU_LIMIT			0x010
+#define PCIE_ATU_LOWER_TARGET		0x014
+#define PCIE_ATU_BUS(x)			FIELD_PREP(GENMASK(31, 24), x)
+#define PCIE_ATU_DEV(x)			FIELD_PREP(GENMASK(23, 19), x)
+#define PCIE_ATU_FUNC(x)		FIELD_PREP(GENMASK(18, 16), x)
+#define PCIE_ATU_UPPER_TARGET		0x018
+#define PCIE_ATU_UPPER_LIMIT		0x020
+
+#define PCIE_MISC_CONTROL_1_OFF		0x8BC
+#define PCIE_DBI_RO_WR_EN		BIT(0)
+
+#define PCIE_MSIX_DOORBELL		0x948
+#define PCIE_MSIX_DOORBELL_PF_SHIFT	24
+
+/*
+ * eDMA CSRs. DW PCIe IP-core v4.70a and older had the eDMA registers accessible
+ * over the Port Logic registers space. Afterwards the unrolled mapping was
+ * introduced so eDMA and iATU could be accessed via a dedicated registers
+ * space.
+ */
+#define PCIE_DMA_VIEWPORT_BASE		0x970
+#define PCIE_DMA_UNROLL_BASE		0x80000
+#define PCIE_DMA_CTRL			0x008
+#define PCIE_DMA_NUM_WR_CHAN		GENMASK(3, 0)
+#define PCIE_DMA_NUM_RD_CHAN		GENMASK(19, 16)
+
+#define PCIE_PL_CHK_REG_CONTROL_STATUS			0xB20
+#define PCIE_PL_CHK_REG_CHK_REG_START			BIT(0)
+#define PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS		BIT(1)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR	BIT(16)
+#define PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR		BIT(17)
+#define PCIE_PL_CHK_REG_CHK_REG_COMPLETE		BIT(18)
+
+#define PCIE_PL_CHK_REG_ERR_ADDR			0xB28
+
+/*
+ * iATU Unroll-specific register definitions
+ * From 4.80 core version the address translation will be made by unroll
+ */
+#define PCIE_ATU_UNR_REGION_CTRL1	0x00
+#define PCIE_ATU_UNR_REGION_CTRL2	0x04
+#define PCIE_ATU_UNR_LOWER_BASE		0x08
+#define PCIE_ATU_UNR_UPPER_BASE		0x0C
+#define PCIE_ATU_UNR_LOWER_LIMIT	0x10
+#define PCIE_ATU_UNR_LOWER_TARGET	0x14
+#define PCIE_ATU_UNR_UPPER_TARGET	0x18
+#define PCIE_ATU_UNR_UPPER_LIMIT	0x20
+
+/*
+ * RAS-DES register definitions
+ */
+#define PCIE_RAS_DES_EVENT_COUNTER_CONTROL	0x8
+#define EVENT_COUNTER_ALL_CLEAR		0x3
+#define EVENT_COUNTER_ENABLE_ALL	0x7
+#define EVENT_COUNTER_ENABLE_SHIFT	2
+#define EVENT_COUNTER_EVENT_SEL_MASK	GENMASK(7, 0)
+#define EVENT_COUNTER_EVENT_SEL_SHIFT	16
+#define EVENT_COUNTER_EVENT_Tx_L0S	0x2
+#define EVENT_COUNTER_EVENT_Rx_L0S	0x3
+#define EVENT_COUNTER_EVENT_L1		0x5
+#define EVENT_COUNTER_EVENT_L1_1	0x7
+#define EVENT_COUNTER_EVENT_L1_2	0x8
+#define EVENT_COUNTER_GROUP_SEL_SHIFT	24
+#define EVENT_COUNTER_GROUP_5		0x5
+
+#define PCIE_RAS_DES_EVENT_COUNTER_DATA		0xc
+
+/*
+ * The default address offset between dbi_base and atu_base. Root controller
+ * drivers are not required to initialize atu_base if the offset matches this
+ * default; the driver core automatically derives atu_base from dbi_base using
+ * this offset, if atu_base not set.
+ */
+#define DEFAULT_DBI_ATU_OFFSET (0x3 << 20)
+#define DEFAULT_DBI_DMA_OFFSET PCIE_DMA_UNROLL_BASE
+
+#define MAX_MSI_IRQS			256
+#define MAX_MSI_IRQS_PER_CTRL		32
+#define MAX_MSI_CTRLS			(MAX_MSI_IRQS / MAX_MSI_IRQS_PER_CTRL)
+#define MSI_REG_CTRL_BLOCK_SIZE		12
+#define MSI_DEF_NUM_VECTORS		32
+
+/* Maximum number of inbound/outbound iATUs */
+#define MAX_IATU_IN			256
+#define MAX_IATU_OUT			256
+
+/* Default eDMA LLP memory size */
+#define DMA_LLP_MEM_SIZE		PAGE_SIZE
+
+struct dw_pcie;
+struct dw_pcie_rp;
+struct dw_pcie_ep;
+
+enum dw_pcie_device_mode {
+	DW_PCIE_UNKNOWN_TYPE,
+	DW_PCIE_EP_TYPE,
+	DW_PCIE_LEG_EP_TYPE,
+	DW_PCIE_RC_TYPE,
+};
+
+enum dw_pcie_app_clk {
+	DW_PCIE_DBI_CLK,
+	DW_PCIE_MSTR_CLK,
+	DW_PCIE_SLV_CLK,
+	DW_PCIE_NUM_APP_CLKS
+};
+
+enum dw_pcie_core_clk {
+	DW_PCIE_PIPE_CLK,
+	DW_PCIE_CORE_CLK,
+	DW_PCIE_AUX_CLK,
+	DW_PCIE_REF_CLK,
+	DW_PCIE_NUM_CORE_CLKS
+};
+
+enum dw_pcie_app_rst {
+	DW_PCIE_DBI_RST,
+	DW_PCIE_MSTR_RST,
+	DW_PCIE_SLV_RST,
+	DW_PCIE_NUM_APP_RSTS
+};
+
+enum dw_pcie_core_rst {
+	DW_PCIE_NON_STICKY_RST,
+	DW_PCIE_STICKY_RST,
+	DW_PCIE_CORE_RST,
+	DW_PCIE_PIPE_RST,
+	DW_PCIE_PHY_RST,
+	DW_PCIE_HOT_RST,
+	DW_PCIE_PWR_RST,
+	DW_PCIE_NUM_CORE_RSTS
+};
+
+enum dw_pcie_ltssm {
+	/* Need to align with PCIE_PORT_DEBUG0 bits 0:5 */
+	DW_PCIE_LTSSM_DETECT_QUIET = 0x0,
+	DW_PCIE_LTSSM_DETECT_ACT = 0x1,
+	DW_PCIE_LTSSM_L0 = 0x11,
+	DW_PCIE_LTSSM_L2_IDLE = 0x15,
+
+	DW_PCIE_LTSSM_UNKNOWN = 0xFFFFFFFF,
+};
+
+struct dw_pcie_host_ops {
+	int (*host_init)(struct dw_pcie_rp *pp);
+	void (*host_deinit)(struct dw_pcie_rp *pp);
+	int (*msi_host_init)(struct dw_pcie_rp *pp);
+	void (*pme_turn_off)(struct dw_pcie_rp *pp);
+};
+
+struct dw_pcie_rp {
+	bool			has_msi_ctrl:1;
+	bool			cfg0_io_shared:1;
+	u64			cfg0_base;
+	void __iomem		*va_cfg0_base;
+	u32			cfg0_size;
+	resource_size_t		io_base;
+	phys_addr_t		io_bus_addr;
+	u32			io_size;
+	int			irq;
+	const struct dw_pcie_host_ops *ops;
+	int			msi_irq[MAX_MSI_CTRLS];
+	struct irq_domain	*irq_domain;
+	struct irq_domain	*msi_domain;
+	dma_addr_t		msi_data;
+	struct irq_chip		*msi_irq_chip;
+	u32			num_vectors;
+	u32			irq_mask[MAX_MSI_CTRLS];
+	struct pci_host_bridge  *bridge;
+	raw_spinlock_t		lock;
+	DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS);
+};
+
+struct dw_pcie_ep_ops {
+	void	(*ep_init)(struct dw_pcie_ep *ep);
+	int	(*raise_irq)(struct dw_pcie_ep *ep, u8 func_no,
+			     enum pci_epc_irq_type type, u16 interrupt_num);
+	const struct pci_epc_features* (*get_features)(struct dw_pcie_ep *ep);
+	/*
+	 * Provide a method to implement the different func config space
+	 * access for different platform, if different func have different
+	 * offset, return the offset of func. if use write a register way
+	 * return a 0, and implement code in callback function of platform
+	 * driver.
+	 */
+	unsigned int (*func_conf_select)(struct dw_pcie_ep *ep, u8 func_no);
+};
+
+struct dw_pcie_ep_func {
+	struct list_head	list;
+	u8			func_no;
+	u8			msi_cap;	/* MSI capability offset */
+	u8			msix_cap;	/* MSI-X capability offset */
+};
+
+struct dw_pcie_ep {
+	struct pci_epc		*epc;
+	struct list_head	func_list;
+	const struct dw_pcie_ep_ops *ops;
+	phys_addr_t		phys_base;
+	size_t			addr_size;
+	size_t			page_size;
+	u8			bar_to_atu[PCI_STD_NUM_BARS];
+	phys_addr_t		*outbound_addr;
+	unsigned long		*ib_window_map;
+	unsigned long		*ob_window_map;
+	void __iomem		*msi_mem;
+	phys_addr_t		msi_mem_phys;
+	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
+};
+
+struct dw_pcie_ops {
+	u64	(*cpu_addr_fixup)(struct dw_pcie *pcie, u64 cpu_addr);
+	u32	(*read_dbi)(struct dw_pcie *pcie, void __iomem *base, u32 reg,
+			    size_t size);
+	void	(*write_dbi)(struct dw_pcie *pcie, void __iomem *base, u32 reg,
+			     size_t size, u32 val);
+	void    (*write_dbi2)(struct dw_pcie *pcie, void __iomem *base, u32 reg,
+			      size_t size, u32 val);
+	int	(*link_up)(struct dw_pcie *pcie);
+	enum dw_pcie_ltssm (*get_ltssm)(struct dw_pcie *pcie);
+	int	(*start_link)(struct dw_pcie *pcie);
+	void	(*stop_link)(struct dw_pcie *pcie);
+};
+
+struct dw_pcie {
+	struct device		*dev;
+	void __iomem		*dbi_base;
+	void __iomem		*dbi_base2;
+	void __iomem		*atu_base;
+	size_t			atu_size;
+	u32			num_ib_windows;
+	u32			num_ob_windows;
+	u32			region_align;
+	u64			region_limit;
+	struct dw_pcie_rp	pp;
+	struct dw_pcie_ep	ep;
+	const struct dw_pcie_ops *ops;
+	u32			version;
+	u32			type;
+	unsigned long		caps;
+	int			num_lanes;
+	int			link_gen;
+	u8			n_fts[2];
+	struct dw_edma_chip	edma;
+	struct clk_bulk_data	app_clks[DW_PCIE_NUM_APP_CLKS];
+	struct clk_bulk_data	core_clks[DW_PCIE_NUM_CORE_CLKS];
+	struct reset_control_bulk_data	app_rsts[DW_PCIE_NUM_APP_RSTS];
+	struct reset_control_bulk_data	core_rsts[DW_PCIE_NUM_CORE_RSTS];
+	struct gpio_desc		*pe_rst;
+	bool			suspended;
+};
+
+#define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp)
+
+#define to_dw_pcie_from_ep(endpoint)   \
+		container_of((endpoint), struct dw_pcie, ep)
+
+int dw_pcie_get_resources(struct dw_pcie *pci);
+
+void dw_pcie_version_detect(struct dw_pcie *pci);
+
+u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap);
+u16 dw_pcie_find_ext_capability(struct dw_pcie *pci, u8 cap);
+
+int dw_pcie_read(void __iomem *addr, int size, u32 *val);
+int dw_pcie_write(void __iomem *addr, int size, u32 val);
+
+u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size);
+void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
+void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val);
+int dw_pcie_link_up(struct dw_pcie *pci);
+void dw_pcie_upconfig_setup(struct dw_pcie *pci);
+int dw_pcie_wait_for_link(struct dw_pcie *pci);
+int dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
+			      u64 cpu_addr, u64 pci_addr, u64 size);
+int dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				 int type, u64 cpu_addr, u64 pci_addr, u64 size);
+int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type,
+			     u64 cpu_addr, u64 pci_addr, u64 size);
+int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				int type, u64 cpu_addr, u8 bar);
+void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index);
+void dw_pcie_setup(struct dw_pcie *pci);
+void dw_pcie_iatu_detect(struct dw_pcie *pci);
+int dw_pcie_edma_detect(struct dw_pcie *pci);
+void dw_pcie_edma_remove(struct dw_pcie *pci);
+
+int dw_pcie_suspend_noirq(struct dw_pcie *pci);
+int dw_pcie_resume_noirq(struct dw_pcie *pci);
+
+static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
+{
+	dw_pcie_write_dbi(pci, reg, 0x4, val);
+}
+
+static inline u32 dw_pcie_readl_dbi(struct dw_pcie *pci, u32 reg)
+{
+	return dw_pcie_read_dbi(pci, reg, 0x4);
+}
+
+static inline void dw_pcie_writew_dbi(struct dw_pcie *pci, u32 reg, u16 val)
+{
+	dw_pcie_write_dbi(pci, reg, 0x2, val);
+}
+
+static inline u16 dw_pcie_readw_dbi(struct dw_pcie *pci, u32 reg)
+{
+	return dw_pcie_read_dbi(pci, reg, 0x2);
+}
+
+static inline void dw_pcie_writeb_dbi(struct dw_pcie *pci, u32 reg, u8 val)
+{
+	dw_pcie_write_dbi(pci, reg, 0x1, val);
+}
+
+static inline u8 dw_pcie_readb_dbi(struct dw_pcie *pci, u32 reg)
+{
+	return dw_pcie_read_dbi(pci, reg, 0x1);
+}
+
+static inline void dw_pcie_writel_dbi2(struct dw_pcie *pci, u32 reg, u32 val)
+{
+	dw_pcie_write_dbi2(pci, reg, 0x4, val);
+}
+
+static inline void dw_pcie_dbi_ro_wr_en(struct dw_pcie *pci)
+{
+	u32 reg;
+	u32 val;
+
+	reg = PCIE_MISC_CONTROL_1_OFF;
+	val = dw_pcie_readl_dbi(pci, reg);
+	val |= PCIE_DBI_RO_WR_EN;
+	dw_pcie_writel_dbi(pci, reg, val);
+}
+
+static inline void dw_pcie_dbi_ro_wr_dis(struct dw_pcie *pci)
+{
+	u32 reg;
+	u32 val;
+
+	reg = PCIE_MISC_CONTROL_1_OFF;
+	val = dw_pcie_readl_dbi(pci, reg);
+	val &= ~PCIE_DBI_RO_WR_EN;
+	dw_pcie_writel_dbi(pci, reg, val);
+}
+
+static inline int dw_pcie_start_link(struct dw_pcie *pci)
+{
+	if (pci->ops && pci->ops->start_link)
+		return pci->ops->start_link(pci);
+
+	return 0;
+}
+
+static inline void dw_pcie_stop_link(struct dw_pcie *pci)
+{
+	if (pci->ops && pci->ops->stop_link)
+		pci->ops->stop_link(pci);
+}
+
+static inline enum dw_pcie_ltssm dw_pcie_get_ltssm(struct dw_pcie *pci)
+{
+	u32 val;
+
+	if (pci->ops && pci->ops->get_ltssm)
+		return pci->ops->get_ltssm(pci);
+
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG0);
+
+	return (enum dw_pcie_ltssm)FIELD_GET(PORT_LOGIC_LTSSM_STATE_MASK, val);
+}
+
+#ifdef CONFIG_PCIE_DW_HOST
+irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp);
+int dw_pcie_setup_rc(struct dw_pcie_rp *pp);
+int dw_pcie_host_init(struct dw_pcie_rp *pp);
+void dw_pcie_host_deinit(struct dw_pcie_rp *pp);
+int dw_pcie_allocate_domains(struct dw_pcie_rp *pp);
+void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus, unsigned int devfn,
+				       int where);
+#else
+static inline irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp)
+{
+	return IRQ_NONE;
+}
+
+static inline int dw_pcie_setup_rc(struct dw_pcie_rp *pp)
+{
+	return 0;
+}
+
+static inline int dw_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	return 0;
+}
+
+static inline void dw_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+}
+
+static inline int dw_pcie_allocate_domains(struct dw_pcie_rp *pp)
+{
+	return 0;
+}
+static inline void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus,
+						     unsigned int devfn,
+						     int where)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_PCIE_DW_EP
+void dw_pcie_ep_linkup(struct dw_pcie_ep *ep);
+int dw_pcie_ep_init(struct dw_pcie_ep *ep);
+int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep);
+void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep);
+void dw_pcie_ep_exit(struct dw_pcie_ep *ep);
+int dw_pcie_ep_raise_legacy_irq(struct dw_pcie_ep *ep, u8 func_no);
+int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
+			     u8 interrupt_num);
+int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
+			     u16 interrupt_num);
+int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8 func_no,
+				       u16 interrupt_num);
+void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar);
+struct dw_pcie_ep_func *
+dw_pcie_ep_get_func_from_ep(struct dw_pcie_ep *ep, u8 func_no);
+#else
+static inline void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
+{
+}
+
+static inline int dw_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	return 0;
+}
+
+static inline int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
+{
+	return 0;
+}
+
+static inline void dw_pcie_ep_init_notify(struct dw_pcie_ep *ep)
+{
+}
+
+static inline void dw_pcie_ep_exit(struct dw_pcie_ep *ep)
+{
+}
+
+static inline int dw_pcie_ep_raise_legacy_irq(struct dw_pcie_ep *ep, u8 func_no)
+{
+	return 0;
+}
+
+static inline int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
+					   u8 interrupt_num)
+{
+	return 0;
+}
+
+static inline int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
+					   u16 interrupt_num)
+{
+	return 0;
+}
+
+static inline int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep,
+						     u8 func_no,
+						     u16 interrupt_num)
+{
+	return 0;
+}
+
+static inline void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar)
+{
+}
+
+static inline struct dw_pcie_ep_func *
+dw_pcie_ep_get_func_from_ep(struct dw_pcie_ep *ep, u8 func_no)
+{
+	return NULL;
+}
+#endif
+#endif /* _PCIE_DESIGNWARE_H */
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
new file mode 100644
index 000000000..2fe42c700
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Rockchip SoCs.
+ *
+ * Copyright (C) 2021 Rockchip Electronics Co., Ltd.
+ *		http://www.rock-chips.com
+ *
+ * Author: Simon Xue <xxm@rock-chips.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+/*
+ * The upper 16 bits of PCIE_CLIENT_CONFIG are a write
+ * mask for the lower 16 bits.
+ */
+#define HIWORD_UPDATE(mask, val) (((mask) << 16) | (val))
+#define HIWORD_UPDATE_BIT(val)	HIWORD_UPDATE(val, val)
+#define HIWORD_DISABLE_BIT(val)	HIWORD_UPDATE(val, ~val)
+
+#define to_rockchip_pcie(x) dev_get_drvdata((x)->dev)
+
+#define PCIE_CLIENT_RC_MODE		HIWORD_UPDATE_BIT(0x40)
+#define PCIE_CLIENT_ENABLE_LTSSM	HIWORD_UPDATE_BIT(0xc)
+#define PCIE_SMLH_LINKUP		BIT(16)
+#define PCIE_RDLH_LINKUP		BIT(17)
+#define PCIE_LINKUP			(PCIE_SMLH_LINKUP | PCIE_RDLH_LINKUP)
+#define PCIE_L0S_ENTRY			0x11
+#define PCIE_CLIENT_GENERAL_CONTROL	0x0
+#define PCIE_CLIENT_INTR_STATUS_LEGACY	0x8
+#define PCIE_CLIENT_INTR_MASK_LEGACY	0x1c
+#define PCIE_CLIENT_GENERAL_DEBUG	0x104
+#define PCIE_CLIENT_HOT_RESET_CTRL	0x180
+#define PCIE_CLIENT_LTSSM_STATUS	0x300
+#define PCIE_LTSSM_ENABLE_ENHANCE	BIT(4)
+#define PCIE_LTSSM_STATUS_MASK		GENMASK(5, 0)
+
+struct rockchip_pcie {
+	struct dw_pcie			pci;
+	void __iomem			*apb_base;
+	struct phy			*phy;
+	struct clk_bulk_data		*clks;
+	unsigned int			clk_cnt;
+	struct reset_control		*rst;
+	struct gpio_desc		*rst_gpio;
+	struct regulator                *vpcie3v3;
+	struct irq_domain		*irq_domain;
+};
+
+static int rockchip_pcie_readl_apb(struct rockchip_pcie *rockchip,
+					     u32 reg)
+{
+	return readl_relaxed(rockchip->apb_base + reg);
+}
+
+static void rockchip_pcie_writel_apb(struct rockchip_pcie *rockchip,
+						u32 val, u32 reg)
+{
+	writel_relaxed(val, rockchip->apb_base + reg);
+}
+
+static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct rockchip_pcie *rockchip = irq_desc_get_handler_data(desc);
+	unsigned long reg, hwirq;
+
+	chained_irq_enter(chip, desc);
+
+	reg = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_INTR_STATUS_LEGACY);
+
+	for_each_set_bit(hwirq, &reg, 4)
+		generic_handle_domain_irq(rockchip->irq_domain, hwirq);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void rockchip_intx_mask(struct irq_data *data)
+{
+	rockchip_pcie_writel_apb(irq_data_get_irq_chip_data(data),
+				 HIWORD_UPDATE_BIT(BIT(data->hwirq)),
+				 PCIE_CLIENT_INTR_MASK_LEGACY);
+};
+
+static void rockchip_intx_unmask(struct irq_data *data)
+{
+	rockchip_pcie_writel_apb(irq_data_get_irq_chip_data(data),
+				 HIWORD_DISABLE_BIT(BIT(data->hwirq)),
+				 PCIE_CLIENT_INTR_MASK_LEGACY);
+};
+
+static struct irq_chip rockchip_intx_irq_chip = {
+	.name			= "INTx",
+	.irq_mask		= rockchip_intx_mask,
+	.irq_unmask		= rockchip_intx_unmask,
+	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static int rockchip_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				  irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &rockchip_intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = rockchip_pcie_intx_map,
+};
+
+static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+	struct device_node *intc;
+
+	intc = of_get_child_by_name(dev->of_node, "legacy-interrupt-controller");
+	if (!intc) {
+		dev_err(dev, "missing child interrupt-controller node\n");
+		return -EINVAL;
+	}
+
+	rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX,
+						    &intx_domain_ops, rockchip);
+	of_node_put(intc);
+	if (!rockchip->irq_domain) {
+		dev_err(dev, "failed to get a INTx IRQ domain\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void rockchip_pcie_enable_ltssm(struct rockchip_pcie *rockchip)
+{
+	rockchip_pcie_writel_apb(rockchip, PCIE_CLIENT_ENABLE_LTSSM,
+				 PCIE_CLIENT_GENERAL_CONTROL);
+}
+
+static int rockchip_pcie_link_up(struct dw_pcie *pci)
+{
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+	u32 val = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_LTSSM_STATUS);
+
+	if ((val & PCIE_LINKUP) == PCIE_LINKUP &&
+	    (val & PCIE_LTSSM_STATUS_MASK) == PCIE_L0S_ENTRY)
+		return 1;
+
+	return 0;
+}
+
+static int rockchip_pcie_start_link(struct dw_pcie *pci)
+{
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+
+	/* Reset device */
+	gpiod_set_value_cansleep(rockchip->rst_gpio, 0);
+
+	rockchip_pcie_enable_ltssm(rockchip);
+
+	/*
+	 * PCIe requires the refclk to be stable for 100µs prior to releasing
+	 * PERST. See table 2-4 in section 2.6.2 AC Specifications of the PCI
+	 * Express Card Electromechanical Specification, 1.1. However, we don't
+	 * know if the refclk is coming from RC's PHY or external OSC. If it's
+	 * from RC, so enabling LTSSM is the just right place to release #PERST.
+	 * We need more extra time as before, rather than setting just
+	 * 100us as we don't know how long should the device need to reset.
+	 */
+	msleep(100);
+	gpiod_set_value_cansleep(rockchip->rst_gpio, 1);
+
+	return 0;
+}
+
+static int rockchip_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+	struct device *dev = rockchip->pci.dev;
+	u32 val = HIWORD_UPDATE_BIT(PCIE_LTSSM_ENABLE_ENHANCE);
+	int irq, ret;
+
+	irq = of_irq_get_byname(dev->of_node, "legacy");
+	if (irq < 0)
+		return irq;
+
+	ret = rockchip_pcie_init_irq_domain(rockchip);
+	if (ret < 0)
+		dev_err(dev, "failed to init irq domain\n");
+
+	irq_set_chained_handler_and_data(irq, rockchip_pcie_legacy_int_handler,
+					 rockchip);
+
+	/* LTSSM enable control mode */
+	rockchip_pcie_writel_apb(rockchip, val, PCIE_CLIENT_HOT_RESET_CTRL);
+
+	rockchip_pcie_writel_apb(rockchip, PCIE_CLIENT_RC_MODE,
+				 PCIE_CLIENT_GENERAL_CONTROL);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops rockchip_pcie_host_ops = {
+	.host_init = rockchip_pcie_host_init,
+};
+
+static int rockchip_pcie_clk_init(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+	int ret;
+
+	ret = devm_clk_bulk_get_all(dev, &rockchip->clks);
+	if (ret < 0)
+		return ret;
+
+	rockchip->clk_cnt = ret;
+
+	return clk_bulk_prepare_enable(rockchip->clk_cnt, rockchip->clks);
+}
+
+static int rockchip_pcie_resource_get(struct platform_device *pdev,
+				      struct rockchip_pcie *rockchip)
+{
+	rockchip->apb_base = devm_platform_ioremap_resource_byname(pdev, "apb");
+	if (IS_ERR(rockchip->apb_base))
+		return PTR_ERR(rockchip->apb_base);
+
+	rockchip->rst_gpio = devm_gpiod_get_optional(&pdev->dev, "reset",
+						     GPIOD_OUT_HIGH);
+	if (IS_ERR(rockchip->rst_gpio))
+		return PTR_ERR(rockchip->rst_gpio);
+
+	rockchip->rst = devm_reset_control_array_get_exclusive(&pdev->dev);
+	if (IS_ERR(rockchip->rst))
+		return dev_err_probe(&pdev->dev, PTR_ERR(rockchip->rst),
+				     "failed to get reset lines\n");
+
+	return 0;
+}
+
+static int rockchip_pcie_phy_init(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+	int ret;
+
+	rockchip->phy = devm_phy_get(dev, "pcie-phy");
+	if (IS_ERR(rockchip->phy))
+		return dev_err_probe(dev, PTR_ERR(rockchip->phy),
+				     "missing PHY\n");
+
+	ret = phy_init(rockchip->phy);
+	if (ret < 0)
+		return ret;
+
+	ret = phy_power_on(rockchip->phy);
+	if (ret)
+		phy_exit(rockchip->phy);
+
+	return ret;
+}
+
+static void rockchip_pcie_phy_deinit(struct rockchip_pcie *rockchip)
+{
+	phy_exit(rockchip->phy);
+	phy_power_off(rockchip->phy);
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = rockchip_pcie_link_up,
+	.start_link = rockchip_pcie_start_link,
+};
+
+static int rockchip_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie *rockchip;
+	struct dw_pcie_rp *pp;
+	int ret;
+
+	rockchip = devm_kzalloc(dev, sizeof(*rockchip), GFP_KERNEL);
+	if (!rockchip)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, rockchip);
+
+	rockchip->pci.dev = dev;
+	rockchip->pci.ops = &dw_pcie_ops;
+
+	pp = &rockchip->pci.pp;
+	pp->ops = &rockchip_pcie_host_ops;
+
+	ret = rockchip_pcie_resource_get(pdev, rockchip);
+	if (ret)
+		return ret;
+
+	ret = reset_control_assert(rockchip->rst);
+	if (ret)
+		return ret;
+
+	/* DON'T MOVE ME: must be enable before PHY init */
+	rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
+	if (IS_ERR(rockchip->vpcie3v3)) {
+		if (PTR_ERR(rockchip->vpcie3v3) != -ENODEV)
+			return dev_err_probe(dev, PTR_ERR(rockchip->vpcie3v3),
+					"failed to get vpcie3v3 regulator\n");
+		rockchip->vpcie3v3 = NULL;
+	} else {
+		ret = regulator_enable(rockchip->vpcie3v3);
+		if (ret) {
+			dev_err(dev, "failed to enable vpcie3v3 regulator\n");
+			return ret;
+		}
+	}
+
+	ret = rockchip_pcie_phy_init(rockchip);
+	if (ret)
+		goto disable_regulator;
+
+	ret = reset_control_deassert(rockchip->rst);
+	if (ret)
+		goto deinit_phy;
+
+	ret = rockchip_pcie_clk_init(rockchip);
+	if (ret)
+		goto deinit_phy;
+
+	ret = dw_pcie_host_init(pp);
+	if (!ret)
+		return 0;
+
+	clk_bulk_disable_unprepare(rockchip->clk_cnt, rockchip->clks);
+deinit_phy:
+	rockchip_pcie_phy_deinit(rockchip);
+disable_regulator:
+	if (rockchip->vpcie3v3)
+		regulator_disable(rockchip->vpcie3v3);
+
+	return ret;
+}
+
+static const struct of_device_id rockchip_pcie_of_match[] = {
+	{ .compatible = "rockchip,rk3568-pcie", },
+	{},
+};
+
+static struct platform_driver rockchip_pcie_driver = {
+	.driver = {
+		.name	= "rockchip-dw-pcie",
+		.of_match_table = rockchip_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = rockchip_pcie_probe,
+};
+builtin_platform_driver(rockchip_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c
new file mode 100644
index 000000000..1e9b44b8b
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-fu740.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FU740 DesignWare PCIe Controller integration
+ * Copyright (C) 2019-2021 SiFive, Inc.
+ * Paul Walmsley
+ * Greentime Hu
+ *
+ * Based in part on the i.MX6 PCIe host controller shim which is:
+ *
+ * Copyright (C) 2013 Kosagi
+ *		https://www.kosagi.com
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+#define to_fu740_pcie(x)	dev_get_drvdata((x)->dev)
+
+struct fu740_pcie {
+	struct dw_pcie pci;
+	void __iomem *mgmt_base;
+	struct gpio_desc *reset;
+	struct gpio_desc *pwren;
+	struct clk *pcie_aux;
+	struct reset_control *rst;
+};
+
+#define SIFIVE_DEVICESRESETREG		0x28
+
+#define PCIEX8MGMT_PERST_N		0x0
+#define PCIEX8MGMT_APP_LTSSM_ENABLE	0x10
+#define PCIEX8MGMT_APP_HOLD_PHY_RST	0x18
+#define PCIEX8MGMT_DEVICE_TYPE		0x708
+#define PCIEX8MGMT_PHY0_CR_PARA_ADDR	0x860
+#define PCIEX8MGMT_PHY0_CR_PARA_RD_EN	0x870
+#define PCIEX8MGMT_PHY0_CR_PARA_RD_DATA	0x878
+#define PCIEX8MGMT_PHY0_CR_PARA_SEL	0x880
+#define PCIEX8MGMT_PHY0_CR_PARA_WR_DATA	0x888
+#define PCIEX8MGMT_PHY0_CR_PARA_WR_EN	0x890
+#define PCIEX8MGMT_PHY0_CR_PARA_ACK	0x898
+#define PCIEX8MGMT_PHY1_CR_PARA_ADDR	0x8a0
+#define PCIEX8MGMT_PHY1_CR_PARA_RD_EN	0x8b0
+#define PCIEX8MGMT_PHY1_CR_PARA_RD_DATA	0x8b8
+#define PCIEX8MGMT_PHY1_CR_PARA_SEL	0x8c0
+#define PCIEX8MGMT_PHY1_CR_PARA_WR_DATA	0x8c8
+#define PCIEX8MGMT_PHY1_CR_PARA_WR_EN	0x8d0
+#define PCIEX8MGMT_PHY1_CR_PARA_ACK	0x8d8
+
+#define PCIEX8MGMT_PHY_CDR_TRACK_EN	BIT(0)
+#define PCIEX8MGMT_PHY_LOS_THRSHLD	BIT(5)
+#define PCIEX8MGMT_PHY_TERM_EN		BIT(9)
+#define PCIEX8MGMT_PHY_TERM_ACDC	BIT(10)
+#define PCIEX8MGMT_PHY_EN		BIT(11)
+#define PCIEX8MGMT_PHY_INIT_VAL		(PCIEX8MGMT_PHY_CDR_TRACK_EN|\
+					 PCIEX8MGMT_PHY_LOS_THRSHLD|\
+					 PCIEX8MGMT_PHY_TERM_EN|\
+					 PCIEX8MGMT_PHY_TERM_ACDC|\
+					 PCIEX8MGMT_PHY_EN)
+
+#define PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3	0x1008
+#define PCIEX8MGMT_PHY_LANE_OFF		0x100
+#define PCIEX8MGMT_PHY_LANE0_BASE	(PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 0)
+#define PCIEX8MGMT_PHY_LANE1_BASE	(PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 1)
+#define PCIEX8MGMT_PHY_LANE2_BASE	(PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 2)
+#define PCIEX8MGMT_PHY_LANE3_BASE	(PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 3)
+
+static void fu740_pcie_assert_reset(struct fu740_pcie *afp)
+{
+	/* Assert PERST_N GPIO */
+	gpiod_set_value_cansleep(afp->reset, 0);
+	/* Assert controller PERST_N */
+	writel_relaxed(0x0, afp->mgmt_base + PCIEX8MGMT_PERST_N);
+}
+
+static void fu740_pcie_deassert_reset(struct fu740_pcie *afp)
+{
+	/* Deassert controller PERST_N */
+	writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PERST_N);
+	/* Deassert PERST_N GPIO */
+	gpiod_set_value_cansleep(afp->reset, 1);
+}
+
+static void fu740_pcie_power_on(struct fu740_pcie *afp)
+{
+	gpiod_set_value_cansleep(afp->pwren, 1);
+	/*
+	 * Ensure that PERST has been asserted for at least 100 ms.
+	 * Section 2.2 of PCI Express Card Electromechanical Specification
+	 * Revision 3.0
+	 */
+	msleep(100);
+}
+
+static void fu740_pcie_drive_reset(struct fu740_pcie *afp)
+{
+	fu740_pcie_assert_reset(afp);
+	fu740_pcie_power_on(afp);
+	fu740_pcie_deassert_reset(afp);
+}
+
+static void fu740_phyregwrite(const uint8_t phy, const uint16_t addr,
+			      const uint16_t wrdata, struct fu740_pcie *afp)
+{
+	struct device *dev = afp->pci.dev;
+	void __iomem *phy_cr_para_addr;
+	void __iomem *phy_cr_para_wr_data;
+	void __iomem *phy_cr_para_wr_en;
+	void __iomem *phy_cr_para_ack;
+	int ret, val;
+
+	/* Setup */
+	if (phy) {
+		phy_cr_para_addr = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_ADDR;
+		phy_cr_para_wr_data = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_WR_DATA;
+		phy_cr_para_wr_en = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_WR_EN;
+		phy_cr_para_ack = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_ACK;
+	} else {
+		phy_cr_para_addr = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_ADDR;
+		phy_cr_para_wr_data = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_WR_DATA;
+		phy_cr_para_wr_en = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_WR_EN;
+		phy_cr_para_ack = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_ACK;
+	}
+
+	writel_relaxed(addr, phy_cr_para_addr);
+	writel_relaxed(wrdata, phy_cr_para_wr_data);
+	writel_relaxed(1, phy_cr_para_wr_en);
+
+	/* Wait for wait_idle */
+	ret = readl_poll_timeout(phy_cr_para_ack, val, val, 10, 5000);
+	if (ret)
+		dev_warn(dev, "Wait for wait_idle state failed!\n");
+
+	/* Clear */
+	writel_relaxed(0, phy_cr_para_wr_en);
+
+	/* Wait for ~wait_idle */
+	ret = readl_poll_timeout(phy_cr_para_ack, val, !val, 10, 5000);
+	if (ret)
+		dev_warn(dev, "Wait for !wait_idle state failed!\n");
+}
+
+static void fu740_pcie_init_phy(struct fu740_pcie *afp)
+{
+	/* Enable phy cr_para_sel interfaces */
+	writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_SEL);
+	writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_SEL);
+
+	/*
+	 * Wait 10 cr_para cycles to guarantee that the registers are ready
+	 * to be edited.
+	 */
+	ndelay(10);
+
+	/* Set PHY AC termination mode */
+	fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE0_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE1_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE2_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE3_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE0_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE1_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE2_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+	fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE3_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+}
+
+static int fu740_pcie_start_link(struct dw_pcie *pci)
+{
+	struct device *dev = pci->dev;
+	struct fu740_pcie *afp = dev_get_drvdata(dev);
+	u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	int ret;
+	u32 orig, tmp;
+
+	/*
+	 * Force 2.5GT/s when starting the link, due to some devices not
+	 * probing at higher speeds. This happens with the PCIe switch
+	 * on the Unmatched board when U-Boot has not initialised the PCIe.
+	 * The fix in U-Boot is to force 2.5GT/s, which then gets cleared
+	 * by the soft reset done by this driver.
+	 */
+	dev_dbg(dev, "cap_exp at %x\n", cap_exp);
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
+	orig = tmp & PCI_EXP_LNKCAP_SLS;
+	tmp &= ~PCI_EXP_LNKCAP_SLS;
+	tmp |= PCI_EXP_LNKCAP_SLS_2_5GB;
+	dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
+
+	/* Enable LTSSM */
+	writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE);
+
+	ret = dw_pcie_wait_for_link(pci);
+	if (ret) {
+		dev_err(dev, "error: link did not start\n");
+		goto err;
+	}
+
+	tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
+	if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) {
+		dev_dbg(dev, "changing speed back to original\n");
+
+		tmp &= ~PCI_EXP_LNKCAP_SLS;
+		tmp |= orig;
+		dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
+
+		tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		tmp |= PORT_LOGIC_SPEED_CHANGE;
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp);
+
+		ret = dw_pcie_wait_for_link(pci);
+		if (ret) {
+			dev_err(dev, "error: link did not start at new speed\n");
+			goto err;
+		}
+	}
+
+	ret = 0;
+err:
+	WARN_ON(ret);	/* we assume that errors will be very rare */
+	dw_pcie_dbi_ro_wr_dis(pci);
+	return ret;
+}
+
+static int fu740_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct fu740_pcie *afp = to_fu740_pcie(pci);
+	struct device *dev = pci->dev;
+	int ret;
+
+	/* Power on reset */
+	fu740_pcie_drive_reset(afp);
+
+	/* Enable pcieauxclk */
+	ret = clk_prepare_enable(afp->pcie_aux);
+	if (ret) {
+		dev_err(dev, "unable to enable pcie_aux clock\n");
+		return ret;
+	}
+
+	/*
+	 * Assert hold_phy_rst (hold the controller LTSSM in reset after
+	 * power_up_rst_n for register programming with cr_para)
+	 */
+	writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_HOLD_PHY_RST);
+
+	/* Deassert power_up_rst_n */
+	ret = reset_control_deassert(afp->rst);
+	if (ret) {
+		dev_err(dev, "unable to deassert pcie_power_up_rst_n\n");
+		return ret;
+	}
+
+	fu740_pcie_init_phy(afp);
+
+	/* Disable pcieauxclk */
+	clk_disable_unprepare(afp->pcie_aux);
+	/* Clear hold_phy_rst */
+	writel_relaxed(0x0, afp->mgmt_base + PCIEX8MGMT_APP_HOLD_PHY_RST);
+	/* Enable pcieauxclk */
+	clk_prepare_enable(afp->pcie_aux);
+	/* Set RC mode */
+	writel_relaxed(0x4, afp->mgmt_base + PCIEX8MGMT_DEVICE_TYPE);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops fu740_pcie_host_ops = {
+	.host_init = fu740_pcie_host_init,
+};
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.start_link = fu740_pcie_start_link,
+};
+
+static int fu740_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct fu740_pcie *afp;
+
+	afp = devm_kzalloc(dev, sizeof(*afp), GFP_KERNEL);
+	if (!afp)
+		return -ENOMEM;
+	pci = &afp->pci;
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+	pci->pp.ops = &fu740_pcie_host_ops;
+	pci->pp.num_vectors = MAX_MSI_IRQS;
+
+	/* SiFive specific region: mgmt */
+	afp->mgmt_base = devm_platform_ioremap_resource_byname(pdev, "mgmt");
+	if (IS_ERR(afp->mgmt_base))
+		return PTR_ERR(afp->mgmt_base);
+
+	/* Fetch GPIOs */
+	afp->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(afp->reset))
+		return dev_err_probe(dev, PTR_ERR(afp->reset), "unable to get reset-gpios\n");
+
+	afp->pwren = devm_gpiod_get_optional(dev, "pwren", GPIOD_OUT_LOW);
+	if (IS_ERR(afp->pwren))
+		return dev_err_probe(dev, PTR_ERR(afp->pwren), "unable to get pwren-gpios\n");
+
+	/* Fetch clocks */
+	afp->pcie_aux = devm_clk_get(dev, "pcie_aux");
+	if (IS_ERR(afp->pcie_aux))
+		return dev_err_probe(dev, PTR_ERR(afp->pcie_aux),
+					     "pcie_aux clock source missing or invalid\n");
+
+	/* Fetch reset */
+	afp->rst = devm_reset_control_get_exclusive(dev, NULL);
+	if (IS_ERR(afp->rst))
+		return dev_err_probe(dev, PTR_ERR(afp->rst), "unable to get reset\n");
+
+	platform_set_drvdata(pdev, afp);
+
+	return dw_pcie_host_init(&pci->pp);
+}
+
+static void fu740_pcie_shutdown(struct platform_device *pdev)
+{
+	struct fu740_pcie *afp = platform_get_drvdata(pdev);
+
+	/* Bring down link, so bootloader gets clean state in case of reboot */
+	fu740_pcie_assert_reset(afp);
+}
+
+static const struct of_device_id fu740_pcie_of_match[] = {
+	{ .compatible = "sifive,fu740-pcie", },
+	{},
+};
+
+static struct platform_driver fu740_pcie_driver = {
+	.driver = {
+		   .name = "fu740-pcie",
+		   .of_match_table = fu740_pcie_of_match,
+		   .suppress_bind_attrs = true,
+	},
+	.probe = fu740_pcie_probe,
+	.shutdown = fu740_pcie_shutdown,
+};
+
+builtin_platform_driver(fu740_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-hisi.c b/drivers/pci/controller/dwc/pcie-hisi.c
new file mode 100644
index 000000000..8904b5b85
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-hisi.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for HiSilicon SoCs
+ *
+ * Copyright (C) 2015 HiSilicon Co., Ltd. http://www.hisilicon.com
+ *
+ * Authors: Zhou Wang <wangzhou1@hisilicon.com>
+ *          Dacai Zhu <zhudacai@hisilicon.com>
+ *          Gabriele Paoloni <gabriele.paoloni@huawei.com>
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+#include "../../pci.h"
+
+#if defined(CONFIG_PCI_HISI) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+
+struct hisi_pcie {
+	void __iomem	*reg_base;
+};
+
+static int hisi_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+			     int size, u32 *val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	int dev = PCI_SLOT(devfn);
+
+	if (bus->number == cfg->busr.start) {
+		/* access only one slot on each root port */
+		if (dev > 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		else
+			return pci_generic_config_read32(bus, devfn, where,
+							 size, val);
+	}
+
+	return pci_generic_config_read(bus, devfn, where, size, val);
+}
+
+static int hisi_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+			     int where, int size, u32 val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	int dev = PCI_SLOT(devfn);
+
+	if (bus->number == cfg->busr.start) {
+		/* access only one slot on each root port */
+		if (dev > 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		else
+			return pci_generic_config_write32(bus, devfn, where,
+							  size, val);
+	}
+
+	return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+static void __iomem *hisi_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+				       int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct hisi_pcie *pcie = cfg->priv;
+
+	if (bus->number == cfg->busr.start)
+		return pcie->reg_base + where;
+	else
+		return pci_ecam_map_bus(bus, devfn, where);
+}
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+static int hisi_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct hisi_pcie *pcie;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct resource *res;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	/*
+	 * Retrieve RC base and size from a HISI0081 device with _UID
+	 * matching our segment.
+	 */
+	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	ret = acpi_get_rc_resources(dev, "HISI0081", root->segment, res);
+	if (ret) {
+		dev_err(dev, "can't get rc base address\n");
+		return -ENOMEM;
+	}
+
+	pcie->reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
+	if (!pcie->reg_base)
+		return -ENOMEM;
+
+	cfg->priv = pcie;
+	return 0;
+}
+
+const struct pci_ecam_ops hisi_pcie_ops = {
+	.init         =  hisi_pcie_init,
+	.pci_ops      = {
+		.map_bus    = hisi_pcie_map_bus,
+		.read       = hisi_pcie_rd_conf,
+		.write      = hisi_pcie_wr_conf,
+	}
+};
+
+#endif
+
+#ifdef CONFIG_PCI_HISI
+
+static int hisi_pcie_platform_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct hisi_pcie *pcie;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		dev_err(dev, "missing \"reg[1]\"property\n");
+		return -EINVAL;
+	}
+
+	pcie->reg_base = devm_pci_remap_cfgspace(dev, res->start, resource_size(res));
+	if (!pcie->reg_base)
+		return -ENOMEM;
+
+	cfg->priv = pcie;
+	return 0;
+}
+
+static const struct pci_ecam_ops hisi_pcie_platform_ops = {
+	.init         =  hisi_pcie_platform_init,
+	.pci_ops      = {
+		.map_bus    = hisi_pcie_map_bus,
+		.read       = hisi_pcie_rd_conf,
+		.write      = hisi_pcie_wr_conf,
+	}
+};
+
+static const struct of_device_id hisi_pcie_almost_ecam_of_match[] = {
+	{
+		.compatible =  "hisilicon,hip06-pcie-ecam",
+		.data	    =  &hisi_pcie_platform_ops,
+	},
+	{
+		.compatible =  "hisilicon,hip07-pcie-ecam",
+		.data       =  &hisi_pcie_platform_ops,
+	},
+	{},
+};
+
+static struct platform_driver hisi_pcie_almost_ecam_driver = {
+	.probe  = pci_host_common_probe,
+	.driver = {
+		   .name = "hisi-pcie-almost-ecam",
+		   .of_match_table = hisi_pcie_almost_ecam_of_match,
+		   .suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(hisi_pcie_almost_ecam_driver);
+
+#endif
+#endif
diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c
new file mode 100644
index 000000000..fd484cc7c
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-histb.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for HiSilicon STB SoCs
+ *
+ * Copyright (C) 2016-2017 HiSilicon Co., Ltd. http://www.hisilicon.com
+ *
+ * Authors: Ruqiang Ju <juruqiang@hisilicon.com>
+ *          Jianguo Sun <sunjianguo1@huawei.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+#define to_histb_pcie(x)	dev_get_drvdata((x)->dev)
+
+#define PCIE_SYS_CTRL0			0x0000
+#define PCIE_SYS_CTRL1			0x0004
+#define PCIE_SYS_CTRL7			0x001C
+#define PCIE_SYS_CTRL13			0x0034
+#define PCIE_SYS_CTRL15			0x003C
+#define PCIE_SYS_CTRL16			0x0040
+#define PCIE_SYS_CTRL17			0x0044
+
+#define PCIE_SYS_STAT0			0x0100
+#define PCIE_SYS_STAT4			0x0110
+
+#define PCIE_RDLH_LINK_UP		BIT(5)
+#define PCIE_XMLH_LINK_UP		BIT(15)
+#define PCIE_ELBI_SLV_DBI_ENABLE	BIT(21)
+#define PCIE_APP_LTSSM_ENABLE		BIT(11)
+
+#define PCIE_DEVICE_TYPE_MASK		GENMASK(31, 28)
+#define PCIE_WM_EP			0
+#define PCIE_WM_LEGACY			BIT(1)
+#define PCIE_WM_RC			BIT(30)
+
+#define PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
+#define PCIE_LTSSM_STATE_ACTIVE		0x11
+
+struct histb_pcie {
+	struct dw_pcie *pci;
+	struct clk *aux_clk;
+	struct clk *pipe_clk;
+	struct clk *sys_clk;
+	struct clk *bus_clk;
+	struct phy *phy;
+	struct reset_control *soft_reset;
+	struct reset_control *sys_reset;
+	struct reset_control *bus_reset;
+	void __iomem *ctrl;
+	struct gpio_desc *reset_gpio;
+	struct regulator *vpcie;
+};
+
+static u32 histb_pcie_readl(struct histb_pcie *histb_pcie, u32 reg)
+{
+	return readl(histb_pcie->ctrl + reg);
+}
+
+static void histb_pcie_writel(struct histb_pcie *histb_pcie, u32 reg, u32 val)
+{
+	writel(val, histb_pcie->ctrl + reg);
+}
+
+static void histb_pcie_dbi_w_mode(struct dw_pcie_rp *pp, bool enable)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	u32 val;
+
+	val = histb_pcie_readl(hipcie, PCIE_SYS_CTRL0);
+	if (enable)
+		val |= PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val &= ~PCIE_ELBI_SLV_DBI_ENABLE;
+	histb_pcie_writel(hipcie, PCIE_SYS_CTRL0, val);
+}
+
+static void histb_pcie_dbi_r_mode(struct dw_pcie_rp *pp, bool enable)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	u32 val;
+
+	val = histb_pcie_readl(hipcie, PCIE_SYS_CTRL1);
+	if (enable)
+		val |= PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val &= ~PCIE_ELBI_SLV_DBI_ENABLE;
+	histb_pcie_writel(hipcie, PCIE_SYS_CTRL1, val);
+}
+
+static u32 histb_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base,
+			       u32 reg, size_t size)
+{
+	u32 val;
+
+	histb_pcie_dbi_r_mode(&pci->pp, true);
+	dw_pcie_read(base + reg, size, &val);
+	histb_pcie_dbi_r_mode(&pci->pp, false);
+
+	return val;
+}
+
+static void histb_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base,
+				 u32 reg, size_t size, u32 val)
+{
+	histb_pcie_dbi_w_mode(&pci->pp, true);
+	dw_pcie_write(base + reg, size, val);
+	histb_pcie_dbi_w_mode(&pci->pp, false);
+}
+
+static int histb_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*val = dw_pcie_read_dbi(pci, where, size);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int histb_pcie_wr_own_conf(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	dw_pcie_write_dbi(pci, where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops histb_pci_ops = {
+	.read = histb_pcie_rd_own_conf,
+	.write = histb_pcie_wr_own_conf,
+};
+
+static int histb_pcie_link_up(struct dw_pcie *pci)
+{
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	u32 regval;
+	u32 status;
+
+	regval = histb_pcie_readl(hipcie, PCIE_SYS_STAT0);
+	status = histb_pcie_readl(hipcie, PCIE_SYS_STAT4);
+	status &= PCIE_LTSSM_STATE_MASK;
+	if ((regval & PCIE_XMLH_LINK_UP) && (regval & PCIE_RDLH_LINK_UP) &&
+	    (status == PCIE_LTSSM_STATE_ACTIVE))
+		return 1;
+
+	return 0;
+}
+
+static int histb_pcie_start_link(struct dw_pcie *pci)
+{
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	u32 regval;
+
+	/* assert LTSSM enable */
+	regval = histb_pcie_readl(hipcie, PCIE_SYS_CTRL7);
+	regval |= PCIE_APP_LTSSM_ENABLE;
+	histb_pcie_writel(hipcie, PCIE_SYS_CTRL7, regval);
+
+	return 0;
+}
+
+static int histb_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	u32 regval;
+
+	pp->bridge->ops = &histb_pci_ops;
+
+	/* PCIe RC work mode */
+	regval = histb_pcie_readl(hipcie, PCIE_SYS_CTRL0);
+	regval &= ~PCIE_DEVICE_TYPE_MASK;
+	regval |= PCIE_WM_RC;
+	histb_pcie_writel(hipcie, PCIE_SYS_CTRL0, regval);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops histb_pcie_host_ops = {
+	.host_init = histb_pcie_host_init,
+};
+
+static void histb_pcie_host_disable(struct histb_pcie *hipcie)
+{
+	reset_control_assert(hipcie->soft_reset);
+	reset_control_assert(hipcie->sys_reset);
+	reset_control_assert(hipcie->bus_reset);
+
+	clk_disable_unprepare(hipcie->aux_clk);
+	clk_disable_unprepare(hipcie->pipe_clk);
+	clk_disable_unprepare(hipcie->sys_clk);
+	clk_disable_unprepare(hipcie->bus_clk);
+
+	if (hipcie->reset_gpio)
+		gpiod_set_value_cansleep(hipcie->reset_gpio, 1);
+
+	if (hipcie->vpcie)
+		regulator_disable(hipcie->vpcie);
+}
+
+static int histb_pcie_host_enable(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct histb_pcie *hipcie = to_histb_pcie(pci);
+	struct device *dev = pci->dev;
+	int ret;
+
+	/* power on PCIe device if have */
+	if (hipcie->vpcie) {
+		ret = regulator_enable(hipcie->vpcie);
+		if (ret) {
+			dev_err(dev, "failed to enable regulator: %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (hipcie->reset_gpio)
+		gpiod_set_value_cansleep(hipcie->reset_gpio, 0);
+
+	ret = clk_prepare_enable(hipcie->bus_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable bus clk\n");
+		goto err_bus_clk;
+	}
+
+	ret = clk_prepare_enable(hipcie->sys_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable sys clk\n");
+		goto err_sys_clk;
+	}
+
+	ret = clk_prepare_enable(hipcie->pipe_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable pipe clk\n");
+		goto err_pipe_clk;
+	}
+
+	ret = clk_prepare_enable(hipcie->aux_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable aux clk\n");
+		goto err_aux_clk;
+	}
+
+	reset_control_assert(hipcie->soft_reset);
+	reset_control_deassert(hipcie->soft_reset);
+
+	reset_control_assert(hipcie->sys_reset);
+	reset_control_deassert(hipcie->sys_reset);
+
+	reset_control_assert(hipcie->bus_reset);
+	reset_control_deassert(hipcie->bus_reset);
+
+	return 0;
+
+err_aux_clk:
+	clk_disable_unprepare(hipcie->pipe_clk);
+err_pipe_clk:
+	clk_disable_unprepare(hipcie->sys_clk);
+err_sys_clk:
+	clk_disable_unprepare(hipcie->bus_clk);
+err_bus_clk:
+	if (hipcie->vpcie)
+		regulator_disable(hipcie->vpcie);
+
+	return ret;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.read_dbi = histb_pcie_read_dbi,
+	.write_dbi = histb_pcie_write_dbi,
+	.link_up = histb_pcie_link_up,
+	.start_link = histb_pcie_start_link,
+};
+
+static int histb_pcie_probe(struct platform_device *pdev)
+{
+	struct histb_pcie *hipcie;
+	struct dw_pcie *pci;
+	struct dw_pcie_rp *pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	hipcie = devm_kzalloc(dev, sizeof(*hipcie), GFP_KERNEL);
+	if (!hipcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	hipcie->pci = pci;
+	pp = &pci->pp;
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	hipcie->ctrl = devm_platform_ioremap_resource_byname(pdev, "control");
+	if (IS_ERR(hipcie->ctrl)) {
+		dev_err(dev, "cannot get control reg base\n");
+		return PTR_ERR(hipcie->ctrl);
+	}
+
+	pci->dbi_base = devm_platform_ioremap_resource_byname(pdev, "rc-dbi");
+	if (IS_ERR(pci->dbi_base)) {
+		dev_err(dev, "cannot get rc-dbi base\n");
+		return PTR_ERR(pci->dbi_base);
+	}
+
+	hipcie->vpcie = devm_regulator_get_optional(dev, "vpcie");
+	if (IS_ERR(hipcie->vpcie)) {
+		if (PTR_ERR(hipcie->vpcie) != -ENODEV)
+			return PTR_ERR(hipcie->vpcie);
+		hipcie->vpcie = NULL;
+	}
+
+	hipcie->reset_gpio = devm_gpiod_get_optional(dev, "reset",
+						     GPIOD_OUT_HIGH);
+	ret = PTR_ERR_OR_ZERO(hipcie->reset_gpio);
+	if (ret) {
+		dev_err(dev, "unable to request reset gpio: %d\n", ret);
+		return ret;
+	}
+
+	ret = gpiod_set_consumer_name(hipcie->reset_gpio,
+				      "PCIe device power control");
+	if (ret) {
+		dev_err(dev, "unable to set reset gpio name: %d\n", ret);
+		return ret;
+	}
+
+	hipcie->aux_clk = devm_clk_get(dev, "aux");
+	if (IS_ERR(hipcie->aux_clk)) {
+		dev_err(dev, "Failed to get PCIe aux clk\n");
+		return PTR_ERR(hipcie->aux_clk);
+	}
+
+	hipcie->pipe_clk = devm_clk_get(dev, "pipe");
+	if (IS_ERR(hipcie->pipe_clk)) {
+		dev_err(dev, "Failed to get PCIe pipe clk\n");
+		return PTR_ERR(hipcie->pipe_clk);
+	}
+
+	hipcie->sys_clk = devm_clk_get(dev, "sys");
+	if (IS_ERR(hipcie->sys_clk)) {
+		dev_err(dev, "Failed to get PCIEe sys clk\n");
+		return PTR_ERR(hipcie->sys_clk);
+	}
+
+	hipcie->bus_clk = devm_clk_get(dev, "bus");
+	if (IS_ERR(hipcie->bus_clk)) {
+		dev_err(dev, "Failed to get PCIe bus clk\n");
+		return PTR_ERR(hipcie->bus_clk);
+	}
+
+	hipcie->soft_reset = devm_reset_control_get(dev, "soft");
+	if (IS_ERR(hipcie->soft_reset)) {
+		dev_err(dev, "couldn't get soft reset\n");
+		return PTR_ERR(hipcie->soft_reset);
+	}
+
+	hipcie->sys_reset = devm_reset_control_get(dev, "sys");
+	if (IS_ERR(hipcie->sys_reset)) {
+		dev_err(dev, "couldn't get sys reset\n");
+		return PTR_ERR(hipcie->sys_reset);
+	}
+
+	hipcie->bus_reset = devm_reset_control_get(dev, "bus");
+	if (IS_ERR(hipcie->bus_reset)) {
+		dev_err(dev, "couldn't get bus reset\n");
+		return PTR_ERR(hipcie->bus_reset);
+	}
+
+	hipcie->phy = devm_phy_get(dev, "phy");
+	if (IS_ERR(hipcie->phy)) {
+		dev_info(dev, "no pcie-phy found\n");
+		hipcie->phy = NULL;
+		/* fall through here!
+		 * if no pcie-phy found, phy init
+		 * should be done under boot!
+		 */
+	} else {
+		phy_init(hipcie->phy);
+	}
+
+	pp->ops = &histb_pcie_host_ops;
+
+	platform_set_drvdata(pdev, hipcie);
+
+	ret = histb_pcie_host_enable(pp);
+	if (ret) {
+		dev_err(dev, "failed to enable host\n");
+		return ret;
+	}
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void histb_pcie_remove(struct platform_device *pdev)
+{
+	struct histb_pcie *hipcie = platform_get_drvdata(pdev);
+
+	histb_pcie_host_disable(hipcie);
+
+	if (hipcie->phy)
+		phy_exit(hipcie->phy);
+}
+
+static const struct of_device_id histb_pcie_of_match[] = {
+	{ .compatible = "hisilicon,hi3798cv200-pcie", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, histb_pcie_of_match);
+
+static struct platform_driver histb_pcie_platform_driver = {
+	.probe	= histb_pcie_probe,
+	.remove_new = histb_pcie_remove,
+	.driver = {
+		.name = "histb-pcie",
+		.of_match_table = histb_pcie_of_match,
+	},
+};
+module_platform_driver(histb_pcie_platform_driver);
+
+MODULE_DESCRIPTION("HiSilicon STB PCIe host controller driver");
diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c
new file mode 100644
index 000000000..c9c93524e
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-intel-gw.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Intel Gateway SoCs
+ *
+ * Copyright (c) 2019 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iopoll.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci_regs.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/reset.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+#define PORT_AFR_N_FTS_GEN12_DFT	(SZ_128 - 1)
+#define PORT_AFR_N_FTS_GEN3		180
+#define PORT_AFR_N_FTS_GEN4		196
+
+/* PCIe Application logic Registers */
+#define PCIE_APP_CCR			0x10
+#define PCIE_APP_CCR_LTSSM_ENABLE	BIT(0)
+
+#define PCIE_APP_MSG_CR			0x30
+#define PCIE_APP_MSG_XMT_PM_TURNOFF	BIT(0)
+
+#define PCIE_APP_PMC			0x44
+#define PCIE_APP_PMC_IN_L2		BIT(20)
+
+#define PCIE_APP_IRNEN			0xF4
+#define PCIE_APP_IRNCR			0xF8
+#define PCIE_APP_IRN_AER_REPORT		BIT(0)
+#define PCIE_APP_IRN_PME		BIT(2)
+#define PCIE_APP_IRN_RX_VDM_MSG		BIT(4)
+#define PCIE_APP_IRN_PM_TO_ACK		BIT(9)
+#define PCIE_APP_IRN_LINK_AUTO_BW_STAT	BIT(11)
+#define PCIE_APP_IRN_BW_MGT		BIT(12)
+#define PCIE_APP_IRN_INTA		BIT(13)
+#define PCIE_APP_IRN_INTB		BIT(14)
+#define PCIE_APP_IRN_INTC		BIT(15)
+#define PCIE_APP_IRN_INTD		BIT(16)
+#define PCIE_APP_IRN_MSG_LTR		BIT(18)
+#define PCIE_APP_IRN_SYS_ERR_RC		BIT(29)
+#define PCIE_APP_INTX_OFST		12
+
+#define PCIE_APP_IRN_INT \
+	(PCIE_APP_IRN_AER_REPORT | PCIE_APP_IRN_PME | \
+	PCIE_APP_IRN_RX_VDM_MSG | PCIE_APP_IRN_SYS_ERR_RC | \
+	PCIE_APP_IRN_PM_TO_ACK | PCIE_APP_IRN_MSG_LTR | \
+	PCIE_APP_IRN_BW_MGT | PCIE_APP_IRN_LINK_AUTO_BW_STAT | \
+	PCIE_APP_IRN_INTA | PCIE_APP_IRN_INTB | \
+	PCIE_APP_IRN_INTC | PCIE_APP_IRN_INTD)
+
+#define BUS_IATU_OFFSET			SZ_256M
+#define RESET_INTERVAL_MS		100
+
+struct intel_pcie {
+	struct dw_pcie		pci;
+	void __iomem		*app_base;
+	struct gpio_desc	*reset_gpio;
+	u32			rst_intrvl;
+	struct clk		*core_clk;
+	struct reset_control	*core_rst;
+	struct phy		*phy;
+};
+
+static void pcie_update_bits(void __iomem *base, u32 ofs, u32 mask, u32 val)
+{
+	u32 old;
+
+	old = readl(base + ofs);
+	val = (old & ~mask) | (val & mask);
+
+	if (val != old)
+		writel(val, base + ofs);
+}
+
+static inline void pcie_app_wr(struct intel_pcie *pcie, u32 ofs, u32 val)
+{
+	writel(val, pcie->app_base + ofs);
+}
+
+static void pcie_app_wr_mask(struct intel_pcie *pcie, u32 ofs,
+			     u32 mask, u32 val)
+{
+	pcie_update_bits(pcie->app_base, ofs, mask, val);
+}
+
+static inline u32 pcie_rc_cfg_rd(struct intel_pcie *pcie, u32 ofs)
+{
+	return dw_pcie_readl_dbi(&pcie->pci, ofs);
+}
+
+static inline void pcie_rc_cfg_wr(struct intel_pcie *pcie, u32 ofs, u32 val)
+{
+	dw_pcie_writel_dbi(&pcie->pci, ofs, val);
+}
+
+static void pcie_rc_cfg_wr_mask(struct intel_pcie *pcie, u32 ofs,
+				u32 mask, u32 val)
+{
+	pcie_update_bits(pcie->pci.dbi_base, ofs, mask, val);
+}
+
+static void intel_pcie_ltssm_enable(struct intel_pcie *pcie)
+{
+	pcie_app_wr_mask(pcie, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE,
+			 PCIE_APP_CCR_LTSSM_ENABLE);
+}
+
+static void intel_pcie_ltssm_disable(struct intel_pcie *pcie)
+{
+	pcie_app_wr_mask(pcie, PCIE_APP_CCR, PCIE_APP_CCR_LTSSM_ENABLE, 0);
+}
+
+static void intel_pcie_link_setup(struct intel_pcie *pcie)
+{
+	u32 val;
+	u8 offset = dw_pcie_find_capability(&pcie->pci, PCI_CAP_ID_EXP);
+
+	val = pcie_rc_cfg_rd(pcie, offset + PCI_EXP_LNKCTL);
+
+	val &= ~(PCI_EXP_LNKCTL_LD | PCI_EXP_LNKCTL_ASPMC);
+	pcie_rc_cfg_wr(pcie, offset + PCI_EXP_LNKCTL, val);
+}
+
+static void intel_pcie_init_n_fts(struct dw_pcie *pci)
+{
+	switch (pci->link_gen) {
+	case 3:
+		pci->n_fts[1] = PORT_AFR_N_FTS_GEN3;
+		break;
+	case 4:
+		pci->n_fts[1] = PORT_AFR_N_FTS_GEN4;
+		break;
+	default:
+		pci->n_fts[1] = PORT_AFR_N_FTS_GEN12_DFT;
+		break;
+	}
+	pci->n_fts[0] = PORT_AFR_N_FTS_GEN12_DFT;
+}
+
+static int intel_pcie_ep_rst_init(struct intel_pcie *pcie)
+{
+	struct device *dev = pcie->pci.dev;
+	int ret;
+
+	pcie->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(pcie->reset_gpio)) {
+		ret = PTR_ERR(pcie->reset_gpio);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to request PCIe GPIO: %d\n", ret);
+		return ret;
+	}
+
+	/* Make initial reset last for 100us */
+	usleep_range(100, 200);
+
+	return 0;
+}
+
+static void intel_pcie_core_rst_assert(struct intel_pcie *pcie)
+{
+	reset_control_assert(pcie->core_rst);
+}
+
+static void intel_pcie_core_rst_deassert(struct intel_pcie *pcie)
+{
+	/*
+	 * One micro-second delay to make sure the reset pulse
+	 * wide enough so that core reset is clean.
+	 */
+	udelay(1);
+	reset_control_deassert(pcie->core_rst);
+
+	/*
+	 * Some SoC core reset also reset PHY, more delay needed
+	 * to make sure the reset process is done.
+	 */
+	usleep_range(1000, 2000);
+}
+
+static void intel_pcie_device_rst_assert(struct intel_pcie *pcie)
+{
+	gpiod_set_value_cansleep(pcie->reset_gpio, 1);
+}
+
+static void intel_pcie_device_rst_deassert(struct intel_pcie *pcie)
+{
+	msleep(pcie->rst_intrvl);
+	gpiod_set_value_cansleep(pcie->reset_gpio, 0);
+}
+
+static void intel_pcie_core_irq_disable(struct intel_pcie *pcie)
+{
+	pcie_app_wr(pcie, PCIE_APP_IRNEN, 0);
+	pcie_app_wr(pcie, PCIE_APP_IRNCR, PCIE_APP_IRN_INT);
+}
+
+static int intel_pcie_get_resources(struct platform_device *pdev)
+{
+	struct intel_pcie *pcie = platform_get_drvdata(pdev);
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	pcie->core_clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->core_clk)) {
+		ret = PTR_ERR(pcie->core_clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get clks: %d\n", ret);
+		return ret;
+	}
+
+	pcie->core_rst = devm_reset_control_get(dev, NULL);
+	if (IS_ERR(pcie->core_rst)) {
+		ret = PTR_ERR(pcie->core_rst);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get resets: %d\n", ret);
+		return ret;
+	}
+
+	ret = device_property_read_u32(dev, "reset-assert-ms",
+				       &pcie->rst_intrvl);
+	if (ret)
+		pcie->rst_intrvl = RESET_INTERVAL_MS;
+
+	pcie->app_base = devm_platform_ioremap_resource_byname(pdev, "app");
+	if (IS_ERR(pcie->app_base))
+		return PTR_ERR(pcie->app_base);
+
+	pcie->phy = devm_phy_get(dev, "pcie");
+	if (IS_ERR(pcie->phy)) {
+		ret = PTR_ERR(pcie->phy);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Couldn't get pcie-phy: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int intel_pcie_wait_l2(struct intel_pcie *pcie)
+{
+	u32 value;
+	int ret;
+	struct dw_pcie *pci = &pcie->pci;
+
+	if (pci->link_gen < 3)
+		return 0;
+
+	/* Send PME_TURN_OFF message */
+	pcie_app_wr_mask(pcie, PCIE_APP_MSG_CR, PCIE_APP_MSG_XMT_PM_TURNOFF,
+			 PCIE_APP_MSG_XMT_PM_TURNOFF);
+
+	/* Read PMC status and wait for falling into L2 link state */
+	ret = readl_poll_timeout(pcie->app_base + PCIE_APP_PMC, value,
+				 value & PCIE_APP_PMC_IN_L2, 20,
+				 jiffies_to_usecs(5 * HZ));
+	if (ret)
+		dev_err(pcie->pci.dev, "PCIe link enter L2 timeout!\n");
+
+	return ret;
+}
+
+static void intel_pcie_turn_off(struct intel_pcie *pcie)
+{
+	if (dw_pcie_link_up(&pcie->pci))
+		intel_pcie_wait_l2(pcie);
+
+	/* Put endpoint device in reset state */
+	intel_pcie_device_rst_assert(pcie);
+	pcie_rc_cfg_wr_mask(pcie, PCI_COMMAND, PCI_COMMAND_MEMORY, 0);
+}
+
+static int intel_pcie_host_setup(struct intel_pcie *pcie)
+{
+	int ret;
+	struct dw_pcie *pci = &pcie->pci;
+
+	intel_pcie_core_rst_assert(pcie);
+	intel_pcie_device_rst_assert(pcie);
+
+	ret = phy_init(pcie->phy);
+	if (ret)
+		return ret;
+
+	intel_pcie_core_rst_deassert(pcie);
+
+	ret = clk_prepare_enable(pcie->core_clk);
+	if (ret) {
+		dev_err(pcie->pci.dev, "Core clock enable failed: %d\n", ret);
+		goto clk_err;
+	}
+
+	pci->atu_base = pci->dbi_base + 0xC0000;
+
+	intel_pcie_ltssm_disable(pcie);
+	intel_pcie_link_setup(pcie);
+	intel_pcie_init_n_fts(pci);
+
+	ret = dw_pcie_setup_rc(&pci->pp);
+	if (ret)
+		goto app_init_err;
+
+	dw_pcie_upconfig_setup(pci);
+
+	intel_pcie_device_rst_deassert(pcie);
+	intel_pcie_ltssm_enable(pcie);
+
+	ret = dw_pcie_wait_for_link(pci);
+	if (ret)
+		goto app_init_err;
+
+	/* Enable integrated interrupts */
+	pcie_app_wr_mask(pcie, PCIE_APP_IRNEN, PCIE_APP_IRN_INT,
+			 PCIE_APP_IRN_INT);
+
+	return 0;
+
+app_init_err:
+	clk_disable_unprepare(pcie->core_clk);
+clk_err:
+	intel_pcie_core_rst_assert(pcie);
+	phy_exit(pcie->phy);
+
+	return ret;
+}
+
+static void __intel_pcie_remove(struct intel_pcie *pcie)
+{
+	intel_pcie_core_irq_disable(pcie);
+	intel_pcie_turn_off(pcie);
+	clk_disable_unprepare(pcie->core_clk);
+	intel_pcie_core_rst_assert(pcie);
+	phy_exit(pcie->phy);
+}
+
+static void intel_pcie_remove(struct platform_device *pdev)
+{
+	struct intel_pcie *pcie = platform_get_drvdata(pdev);
+	struct dw_pcie_rp *pp = &pcie->pci.pp;
+
+	dw_pcie_host_deinit(pp);
+	__intel_pcie_remove(pcie);
+}
+
+static int intel_pcie_suspend_noirq(struct device *dev)
+{
+	struct intel_pcie *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	intel_pcie_core_irq_disable(pcie);
+	ret = intel_pcie_wait_l2(pcie);
+	if (ret)
+		return ret;
+
+	phy_exit(pcie->phy);
+	clk_disable_unprepare(pcie->core_clk);
+	return ret;
+}
+
+static int intel_pcie_resume_noirq(struct device *dev)
+{
+	struct intel_pcie *pcie = dev_get_drvdata(dev);
+
+	return intel_pcie_host_setup(pcie);
+}
+
+static int intel_pcie_rc_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct intel_pcie *pcie = dev_get_drvdata(pci->dev);
+
+	return intel_pcie_host_setup(pcie);
+}
+
+static u64 intel_pcie_cpu_addr(struct dw_pcie *pcie, u64 cpu_addr)
+{
+	return cpu_addr + BUS_IATU_OFFSET;
+}
+
+static const struct dw_pcie_ops intel_pcie_ops = {
+	.cpu_addr_fixup = intel_pcie_cpu_addr,
+};
+
+static const struct dw_pcie_host_ops intel_pcie_dw_ops = {
+	.host_init =		intel_pcie_rc_init,
+};
+
+static int intel_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct intel_pcie *pcie;
+	struct dw_pcie_rp *pp;
+	struct dw_pcie *pci;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, pcie);
+	pci = &pcie->pci;
+	pci->dev = dev;
+	pp = &pci->pp;
+
+	ret = intel_pcie_get_resources(pdev);
+	if (ret)
+		return ret;
+
+	ret = intel_pcie_ep_rst_init(pcie);
+	if (ret)
+		return ret;
+
+	pci->ops = &intel_pcie_ops;
+	pp->ops = &intel_pcie_dw_ops;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "Cannot initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops intel_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(intel_pcie_suspend_noirq,
+				  intel_pcie_resume_noirq)
+};
+
+static const struct of_device_id of_intel_pcie_match[] = {
+	{ .compatible = "intel,lgm-pcie" },
+	{}
+};
+
+static struct platform_driver intel_pcie_driver = {
+	.probe = intel_pcie_probe,
+	.remove_new = intel_pcie_remove,
+	.driver = {
+		.name = "intel-gw-pcie",
+		.of_match_table = of_intel_pcie_match,
+		.pm = &intel_pcie_pm_ops,
+	},
+};
+builtin_platform_driver(intel_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-keembay.c b/drivers/pci/controller/dwc/pcie-keembay.c
new file mode 100644
index 000000000..289bff99d
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-keembay.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe controller driver for Intel Keem Bay
+ * Copyright (C) 2020 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+#include "pcie-designware.h"
+
+/* PCIE_REGS_APB_SLV Registers */
+#define PCIE_REGS_PCIE_CFG		0x0004
+#define  PCIE_DEVICE_TYPE		BIT(8)
+#define  PCIE_RSTN			BIT(0)
+#define PCIE_REGS_PCIE_APP_CNTRL	0x0008
+#define  APP_LTSSM_ENABLE		BIT(0)
+#define PCIE_REGS_INTERRUPT_ENABLE	0x0028
+#define  MSI_CTRL_INT_EN		BIT(8)
+#define  EDMA_INT_EN			GENMASK(7, 0)
+#define PCIE_REGS_INTERRUPT_STATUS	0x002c
+#define  MSI_CTRL_INT			BIT(8)
+#define PCIE_REGS_PCIE_SII_PM_STATE	0x00b0
+#define  SMLH_LINK_UP			BIT(19)
+#define  RDLH_LINK_UP			BIT(8)
+#define  PCIE_REGS_PCIE_SII_LINK_UP	(SMLH_LINK_UP | RDLH_LINK_UP)
+#define PCIE_REGS_PCIE_PHY_CNTL		0x0164
+#define  PHY0_SRAM_BYPASS		BIT(8)
+#define PCIE_REGS_PCIE_PHY_STAT		0x0168
+#define  PHY0_MPLLA_STATE		BIT(1)
+#define PCIE_REGS_LJPLL_STA		0x016c
+#define  LJPLL_LOCK			BIT(0)
+#define PCIE_REGS_LJPLL_CNTRL_0		0x0170
+#define  LJPLL_EN			BIT(29)
+#define  LJPLL_FOUT_EN			GENMASK(24, 21)
+#define PCIE_REGS_LJPLL_CNTRL_2		0x0178
+#define  LJPLL_REF_DIV			GENMASK(17, 12)
+#define  LJPLL_FB_DIV			GENMASK(11, 0)
+#define PCIE_REGS_LJPLL_CNTRL_3		0x017c
+#define  LJPLL_POST_DIV3A		GENMASK(24, 22)
+#define  LJPLL_POST_DIV2A		GENMASK(18, 16)
+
+#define PERST_DELAY_US		1000
+#define AUX_CLK_RATE_HZ		24000000
+
+struct keembay_pcie {
+	struct dw_pcie		pci;
+	void __iomem		*apb_base;
+	enum dw_pcie_device_mode mode;
+
+	struct clk		*clk_master;
+	struct clk		*clk_aux;
+	struct gpio_desc	*reset;
+};
+
+struct keembay_pcie_of_data {
+	enum dw_pcie_device_mode mode;
+};
+
+static void keembay_ep_reset_assert(struct keembay_pcie *pcie)
+{
+	gpiod_set_value_cansleep(pcie->reset, 1);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static void keembay_ep_reset_deassert(struct keembay_pcie *pcie)
+{
+	/*
+	 * Ensure that PERST# is asserted for a minimum of 100ms.
+	 *
+	 * For more details, refer to PCI Express Card Electromechanical
+	 * Specification Revision 1.1, Table-2.4.
+	 */
+	msleep(100);
+
+	gpiod_set_value_cansleep(pcie->reset, 0);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static void keembay_pcie_ltssm_set(struct keembay_pcie *pcie, bool enable)
+{
+	u32 val;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_APP_CNTRL);
+	if (enable)
+		val |= APP_LTSSM_ENABLE;
+	else
+		val &= ~APP_LTSSM_ENABLE;
+	writel(val, pcie->apb_base + PCIE_REGS_PCIE_APP_CNTRL);
+}
+
+static int keembay_pcie_link_up(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_SII_PM_STATE);
+
+	return (val & PCIE_REGS_PCIE_SII_LINK_UP) == PCIE_REGS_PCIE_SII_LINK_UP;
+}
+
+static int keembay_pcie_start_link(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+	int ret;
+
+	if (pcie->mode == DW_PCIE_EP_TYPE)
+		return 0;
+
+	keembay_pcie_ltssm_set(pcie, false);
+
+	ret = readl_poll_timeout(pcie->apb_base + PCIE_REGS_PCIE_PHY_STAT,
+				 val, val & PHY0_MPLLA_STATE, 20,
+				 500 * USEC_PER_MSEC);
+	if (ret) {
+		dev_err(pci->dev, "MPLLA is not locked\n");
+		return ret;
+	}
+
+	keembay_pcie_ltssm_set(pcie, true);
+
+	return 0;
+}
+
+static void keembay_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+
+	keembay_pcie_ltssm_set(pcie, false);
+}
+
+static const struct dw_pcie_ops keembay_pcie_ops = {
+	.link_up	= keembay_pcie_link_up,
+	.start_link	= keembay_pcie_start_link,
+	.stop_link	= keembay_pcie_stop_link,
+};
+
+static inline void keembay_pcie_disable_clock(void *data)
+{
+	struct clk *clk = data;
+
+	clk_disable_unprepare(clk);
+}
+
+static inline struct clk *keembay_pcie_probe_clock(struct device *dev,
+						   const char *id, u64 rate)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = devm_clk_get(dev, id);
+	if (IS_ERR(clk))
+		return clk;
+
+	if (rate) {
+		ret = clk_set_rate(clk, rate);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = devm_add_action_or_reset(dev, keembay_pcie_disable_clock, clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return clk;
+}
+
+static int keembay_pcie_probe_clocks(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pci->dev;
+
+	pcie->clk_master = keembay_pcie_probe_clock(dev, "master", 0);
+	if (IS_ERR(pcie->clk_master))
+		return dev_err_probe(dev, PTR_ERR(pcie->clk_master),
+				     "Failed to enable master clock");
+
+	pcie->clk_aux = keembay_pcie_probe_clock(dev, "aux", AUX_CLK_RATE_HZ);
+	if (IS_ERR(pcie->clk_aux))
+		return dev_err_probe(dev, PTR_ERR(pcie->clk_aux),
+				     "Failed to enable auxiliary clock");
+
+	return 0;
+}
+
+/*
+ * Initialize the internal PCIe PLL in Host mode.
+ * See the following sections in Keem Bay data book,
+ * (1) 6.4.6.1 PCIe Subsystem Example Initialization,
+ * (2) 6.8 PCIe Low Jitter PLL for Ref Clk Generation.
+ */
+static int keembay_pcie_pll_init(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val;
+	int ret;
+
+	val = FIELD_PREP(LJPLL_REF_DIV, 0) | FIELD_PREP(LJPLL_FB_DIV, 0x32);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_2);
+
+	val = FIELD_PREP(LJPLL_POST_DIV3A, 0x2) |
+		FIELD_PREP(LJPLL_POST_DIV2A, 0x2);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_3);
+
+	val = FIELD_PREP(LJPLL_EN, 0x1) | FIELD_PREP(LJPLL_FOUT_EN, 0xc);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_0);
+
+	ret = readl_poll_timeout(pcie->apb_base + PCIE_REGS_LJPLL_STA,
+				 val, val & LJPLL_LOCK, 20,
+				 500 * USEC_PER_MSEC);
+	if (ret)
+		dev_err(pci->dev, "Low jitter PLL is not locked\n");
+
+	return ret;
+}
+
+static void keembay_pcie_msi_irq_handler(struct irq_desc *desc)
+{
+	struct keembay_pcie *pcie = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 val, mask, status;
+	struct dw_pcie_rp *pp;
+
+	/*
+	 * Keem Bay PCIe Controller provides an additional IP logic on top of
+	 * standard DWC IP to clear MSI IRQ by writing '1' to the respective
+	 * bit of the status register.
+	 *
+	 * So, a chained irq handler is defined to handle this additional
+	 * IP logic.
+	 */
+
+	chained_irq_enter(chip, desc);
+
+	pp = &pcie->pci.pp;
+	val = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_STATUS);
+	mask = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+
+	status = val & mask;
+
+	if (status & MSI_CTRL_INT) {
+		dw_handle_msi_irq(pp);
+		writel(status, pcie->apb_base + PCIE_REGS_INTERRUPT_STATUS);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int keembay_pcie_setup_msi_irq(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pci->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int irq;
+
+	irq = platform_get_irq_byname(pdev, "pcie");
+	if (irq < 0)
+		return irq;
+
+	irq_set_chained_handler_and_data(irq, keembay_pcie_msi_irq_handler,
+					 pcie);
+
+	return 0;
+}
+
+static void keembay_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+
+	writel(EDMA_INT_EN, pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+}
+
+static int keembay_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				     enum pci_epc_irq_type type,
+				     u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		/* Legacy interrupts are not supported in Keem Bay */
+		dev_err(pci->dev, "Legacy IRQ is not supported\n");
+		return -EINVAL;
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	case PCI_EPC_IRQ_MSIX:
+		return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "Unknown IRQ type %d\n", type);
+		return -EINVAL;
+	}
+}
+
+static const struct pci_epc_features keembay_pcie_epc_features = {
+	.linkup_notifier	= false,
+	.msi_capable		= true,
+	.msix_capable		= true,
+	.reserved_bar		= BIT(BAR_1) | BIT(BAR_3) | BIT(BAR_5),
+	.bar_fixed_64bit	= BIT(BAR_0) | BIT(BAR_2) | BIT(BAR_4),
+	.align			= SZ_16K,
+};
+
+static const struct pci_epc_features *
+keembay_pcie_get_features(struct dw_pcie_ep *ep)
+{
+	return &keembay_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops keembay_pcie_ep_ops = {
+	.ep_init	= keembay_pcie_ep_init,
+	.raise_irq	= keembay_pcie_ep_raise_irq,
+	.get_features	= keembay_pcie_get_features,
+};
+
+static const struct dw_pcie_host_ops keembay_pcie_host_ops = {
+};
+
+static int keembay_pcie_add_pcie_port(struct keembay_pcie *pcie,
+				      struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	u32 val;
+	int ret;
+
+	pp->ops = &keembay_pcie_host_ops;
+	pp->msi_irq[0] = -ENODEV;
+
+	ret = keembay_pcie_setup_msi_irq(pcie);
+	if (ret)
+		return ret;
+
+	pcie->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(pcie->reset))
+		return PTR_ERR(pcie->reset);
+
+	ret = keembay_pcie_probe_clocks(pcie);
+	if (ret)
+		return ret;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_PHY_CNTL);
+	val |= PHY0_SRAM_BYPASS;
+	writel(val, pcie->apb_base + PCIE_REGS_PCIE_PHY_CNTL);
+
+	writel(PCIE_DEVICE_TYPE, pcie->apb_base + PCIE_REGS_PCIE_CFG);
+
+	ret = keembay_pcie_pll_init(pcie);
+	if (ret)
+		return ret;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_CFG);
+	writel(val | PCIE_RSTN, pcie->apb_base + PCIE_REGS_PCIE_CFG);
+	keembay_ep_reset_deassert(pcie);
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		keembay_ep_reset_assert(pcie);
+		dev_err(dev, "Failed to initialize host: %d\n", ret);
+		return ret;
+	}
+
+	val = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		val |= MSI_CTRL_INT_EN;
+	writel(val, pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+
+	return 0;
+}
+
+static int keembay_pcie_probe(struct platform_device *pdev)
+{
+	const struct keembay_pcie_of_data *data;
+	struct device *dev = &pdev->dev;
+	struct keembay_pcie *pcie;
+	struct dw_pcie *pci;
+	enum dw_pcie_device_mode mode;
+
+	data = device_get_match_data(dev);
+	if (!data)
+		return -ENODEV;
+
+	mode = (enum dw_pcie_device_mode)data->mode;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = dev;
+	pci->ops = &keembay_pcie_ops;
+
+	pcie->mode = mode;
+
+	pcie->apb_base = devm_platform_ioremap_resource_byname(pdev, "apb");
+	if (IS_ERR(pcie->apb_base))
+		return PTR_ERR(pcie->apb_base);
+
+	platform_set_drvdata(pdev, pcie);
+
+	switch (pcie->mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_KEEMBAY_HOST))
+			return -ENODEV;
+
+		return keembay_pcie_add_pcie_port(pcie, pdev);
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_KEEMBAY_EP))
+			return -ENODEV;
+
+		pci->ep.ops = &keembay_pcie_ep_ops;
+		return dw_pcie_ep_init(&pci->ep);
+	default:
+		dev_err(dev, "Invalid device type %d\n", pcie->mode);
+		return -ENODEV;
+	}
+}
+
+static const struct keembay_pcie_of_data keembay_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct keembay_pcie_of_data keembay_pcie_ep_of_data = {
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct of_device_id keembay_pcie_of_match[] = {
+	{
+		.compatible = "intel,keembay-pcie",
+		.data = &keembay_pcie_rc_of_data,
+	},
+	{
+		.compatible = "intel,keembay-pcie-ep",
+		.data = &keembay_pcie_ep_of_data,
+	},
+	{}
+};
+
+static struct platform_driver keembay_pcie_driver = {
+	.driver = {
+		.name = "keembay-pcie",
+		.of_match_table = keembay_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe  = keembay_pcie_probe,
+};
+builtin_platform_driver(keembay_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c
new file mode 100644
index 000000000..2ee146767
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-kirin.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Kirin Phone SoCs
+ *
+ * Copyright (C) 2017 HiSilicon Electronics Co., Ltd.
+ *		https://www.huawei.com
+ *
+ * Author: Xiaowei Song <songxiaowei@huawei.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_pci.h>
+#include <linux/phy/phy.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include "pcie-designware.h"
+
+#define to_kirin_pcie(x) dev_get_drvdata((x)->dev)
+
+/* PCIe ELBI registers */
+#define SOC_PCIECTRL_CTRL0_ADDR		0x000
+#define SOC_PCIECTRL_CTRL1_ADDR		0x004
+#define PCIE_ELBI_SLV_DBI_ENABLE	(0x1 << 21)
+
+/* info located in APB */
+#define PCIE_APP_LTSSM_ENABLE	0x01c
+#define PCIE_APB_PHY_STATUS0	0x400
+#define PCIE_LINKUP_ENABLE	(0x8020)
+#define PCIE_LTSSM_ENABLE_BIT	(0x1 << 11)
+
+/* info located in sysctrl */
+#define SCTRL_PCIE_CMOS_OFFSET	0x60
+#define SCTRL_PCIE_CMOS_BIT	0x10
+#define SCTRL_PCIE_ISO_OFFSET	0x44
+#define SCTRL_PCIE_ISO_BIT	0x30
+#define SCTRL_PCIE_HPCLK_OFFSET	0x190
+#define SCTRL_PCIE_HPCLK_BIT	0x184000
+#define SCTRL_PCIE_OE_OFFSET	0x14a
+#define PCIE_DEBOUNCE_PARAM	0xF0F400
+#define PCIE_OE_BYPASS		(0x3 << 28)
+
+/*
+ * Max number of connected PCI slots at an external PCI bridge
+ *
+ * This is used on HiKey 970, which has a PEX 8606 bridge with 4 connected
+ * lanes (lane 0 upstream, and the other three lanes, one connected to an
+ * in-board Ethernet adapter and the other two connected to M.2 and mini
+ * PCI slots.
+ *
+ * Each slot has a different clock source and uses a separate PERST# pin.
+ */
+#define MAX_PCI_SLOTS		3
+
+enum pcie_kirin_phy_type {
+	PCIE_KIRIN_INTERNAL_PHY,
+	PCIE_KIRIN_EXTERNAL_PHY
+};
+
+struct kirin_pcie {
+	enum pcie_kirin_phy_type	type;
+
+	struct dw_pcie	*pci;
+	struct regmap   *apb;
+	struct phy	*phy;
+	void		*phy_priv;	/* only for PCIE_KIRIN_INTERNAL_PHY */
+
+	/* DWC PERST# */
+	int		gpio_id_dwc_perst;
+
+	/* Per-slot PERST# */
+	int		num_slots;
+	int		gpio_id_reset[MAX_PCI_SLOTS];
+	const char	*reset_names[MAX_PCI_SLOTS];
+
+	/* Per-slot clkreq */
+	int		n_gpio_clkreq;
+	int		gpio_id_clkreq[MAX_PCI_SLOTS];
+	const char	*clkreq_names[MAX_PCI_SLOTS];
+};
+
+/*
+ * Kirin 960 PHY. Can't be split into a PHY driver without changing the
+ * DT schema.
+ */
+
+#define REF_CLK_FREQ			100000000
+
+/* PHY info located in APB */
+#define PCIE_APB_PHY_CTRL0	0x0
+#define PCIE_APB_PHY_CTRL1	0x4
+#define PCIE_APB_PHY_STATUS0   0x400
+#define PIPE_CLK_STABLE		BIT(19)
+#define PHY_REF_PAD_BIT		BIT(8)
+#define PHY_PWR_DOWN_BIT	BIT(22)
+#define PHY_RST_ACK_BIT		BIT(16)
+
+/* peri_crg ctrl */
+#define CRGCTRL_PCIE_ASSERT_OFFSET	0x88
+#define CRGCTRL_PCIE_ASSERT_BIT		0x8c000000
+
+/* Time for delay */
+#define REF_2_PERST_MIN		21000
+#define REF_2_PERST_MAX		25000
+#define PERST_2_ACCESS_MIN	10000
+#define PERST_2_ACCESS_MAX	12000
+#define PIPE_CLK_WAIT_MIN	550
+#define PIPE_CLK_WAIT_MAX	600
+#define TIME_CMOS_MIN		100
+#define TIME_CMOS_MAX		105
+#define TIME_PHY_PD_MIN		10
+#define TIME_PHY_PD_MAX		11
+
+struct hi3660_pcie_phy {
+	struct device	*dev;
+	void __iomem	*base;
+	struct regmap	*crgctrl;
+	struct regmap	*sysctrl;
+	struct clk	*apb_sys_clk;
+	struct clk	*apb_phy_clk;
+	struct clk	*phy_ref_clk;
+	struct clk	*aclk;
+	struct clk	*aux_clk;
+};
+
+/* Registers in PCIePHY */
+static inline void kirin_apb_phy_writel(struct hi3660_pcie_phy *hi3660_pcie_phy,
+					u32 val, u32 reg)
+{
+	writel(val, hi3660_pcie_phy->base + reg);
+}
+
+static inline u32 kirin_apb_phy_readl(struct hi3660_pcie_phy *hi3660_pcie_phy,
+				      u32 reg)
+{
+	return readl(hi3660_pcie_phy->base + reg);
+}
+
+static int hi3660_pcie_phy_get_clk(struct hi3660_pcie_phy *phy)
+{
+	struct device *dev = phy->dev;
+
+	phy->phy_ref_clk = devm_clk_get(dev, "pcie_phy_ref");
+	if (IS_ERR(phy->phy_ref_clk))
+		return PTR_ERR(phy->phy_ref_clk);
+
+	phy->aux_clk = devm_clk_get(dev, "pcie_aux");
+	if (IS_ERR(phy->aux_clk))
+		return PTR_ERR(phy->aux_clk);
+
+	phy->apb_phy_clk = devm_clk_get(dev, "pcie_apb_phy");
+	if (IS_ERR(phy->apb_phy_clk))
+		return PTR_ERR(phy->apb_phy_clk);
+
+	phy->apb_sys_clk = devm_clk_get(dev, "pcie_apb_sys");
+	if (IS_ERR(phy->apb_sys_clk))
+		return PTR_ERR(phy->apb_sys_clk);
+
+	phy->aclk = devm_clk_get(dev, "pcie_aclk");
+	if (IS_ERR(phy->aclk))
+		return PTR_ERR(phy->aclk);
+
+	return 0;
+}
+
+static int hi3660_pcie_phy_get_resource(struct hi3660_pcie_phy *phy)
+{
+	struct device *dev = phy->dev;
+	struct platform_device *pdev;
+
+	/* registers */
+	pdev = container_of(dev, struct platform_device, dev);
+
+	phy->base = devm_platform_ioremap_resource_byname(pdev, "phy");
+	if (IS_ERR(phy->base))
+		return PTR_ERR(phy->base);
+
+	phy->crgctrl = syscon_regmap_lookup_by_compatible("hisilicon,hi3660-crgctrl");
+	if (IS_ERR(phy->crgctrl))
+		return PTR_ERR(phy->crgctrl);
+
+	phy->sysctrl = syscon_regmap_lookup_by_compatible("hisilicon,hi3660-sctrl");
+	if (IS_ERR(phy->sysctrl))
+		return PTR_ERR(phy->sysctrl);
+
+	return 0;
+}
+
+static int hi3660_pcie_phy_start(struct hi3660_pcie_phy *phy)
+{
+	struct device *dev = phy->dev;
+	u32 reg_val;
+
+	reg_val = kirin_apb_phy_readl(phy, PCIE_APB_PHY_CTRL1);
+	reg_val &= ~PHY_REF_PAD_BIT;
+	kirin_apb_phy_writel(phy, reg_val, PCIE_APB_PHY_CTRL1);
+
+	reg_val = kirin_apb_phy_readl(phy, PCIE_APB_PHY_CTRL0);
+	reg_val &= ~PHY_PWR_DOWN_BIT;
+	kirin_apb_phy_writel(phy, reg_val, PCIE_APB_PHY_CTRL0);
+	usleep_range(TIME_PHY_PD_MIN, TIME_PHY_PD_MAX);
+
+	reg_val = kirin_apb_phy_readl(phy, PCIE_APB_PHY_CTRL1);
+	reg_val &= ~PHY_RST_ACK_BIT;
+	kirin_apb_phy_writel(phy, reg_val, PCIE_APB_PHY_CTRL1);
+
+	usleep_range(PIPE_CLK_WAIT_MIN, PIPE_CLK_WAIT_MAX);
+	reg_val = kirin_apb_phy_readl(phy, PCIE_APB_PHY_STATUS0);
+	if (reg_val & PIPE_CLK_STABLE) {
+		dev_err(dev, "PIPE clk is not stable\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hi3660_pcie_phy_oe_enable(struct hi3660_pcie_phy *phy)
+{
+	u32 val;
+
+	regmap_read(phy->sysctrl, SCTRL_PCIE_OE_OFFSET, &val);
+	val |= PCIE_DEBOUNCE_PARAM;
+	val &= ~PCIE_OE_BYPASS;
+	regmap_write(phy->sysctrl, SCTRL_PCIE_OE_OFFSET, val);
+}
+
+static int hi3660_pcie_phy_clk_ctrl(struct hi3660_pcie_phy *phy, bool enable)
+{
+	int ret = 0;
+
+	if (!enable)
+		goto close_clk;
+
+	ret = clk_set_rate(phy->phy_ref_clk, REF_CLK_FREQ);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(phy->phy_ref_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(phy->apb_sys_clk);
+	if (ret)
+		goto apb_sys_fail;
+
+	ret = clk_prepare_enable(phy->apb_phy_clk);
+	if (ret)
+		goto apb_phy_fail;
+
+	ret = clk_prepare_enable(phy->aclk);
+	if (ret)
+		goto aclk_fail;
+
+	ret = clk_prepare_enable(phy->aux_clk);
+	if (ret)
+		goto aux_clk_fail;
+
+	return 0;
+
+close_clk:
+	clk_disable_unprepare(phy->aux_clk);
+aux_clk_fail:
+	clk_disable_unprepare(phy->aclk);
+aclk_fail:
+	clk_disable_unprepare(phy->apb_phy_clk);
+apb_phy_fail:
+	clk_disable_unprepare(phy->apb_sys_clk);
+apb_sys_fail:
+	clk_disable_unprepare(phy->phy_ref_clk);
+
+	return ret;
+}
+
+static int hi3660_pcie_phy_power_on(struct kirin_pcie *pcie)
+{
+	struct hi3660_pcie_phy *phy = pcie->phy_priv;
+	int ret;
+
+	/* Power supply for Host */
+	regmap_write(phy->sysctrl,
+		     SCTRL_PCIE_CMOS_OFFSET, SCTRL_PCIE_CMOS_BIT);
+	usleep_range(TIME_CMOS_MIN, TIME_CMOS_MAX);
+
+	hi3660_pcie_phy_oe_enable(phy);
+
+	ret = hi3660_pcie_phy_clk_ctrl(phy, true);
+	if (ret)
+		return ret;
+
+	/* ISO disable, PCIeCtrl, PHY assert and clk gate clear */
+	regmap_write(phy->sysctrl,
+		     SCTRL_PCIE_ISO_OFFSET, SCTRL_PCIE_ISO_BIT);
+	regmap_write(phy->crgctrl,
+		     CRGCTRL_PCIE_ASSERT_OFFSET, CRGCTRL_PCIE_ASSERT_BIT);
+	regmap_write(phy->sysctrl,
+		     SCTRL_PCIE_HPCLK_OFFSET, SCTRL_PCIE_HPCLK_BIT);
+
+	ret = hi3660_pcie_phy_start(phy);
+	if (ret)
+		goto disable_clks;
+
+	return 0;
+
+disable_clks:
+	hi3660_pcie_phy_clk_ctrl(phy, false);
+	return ret;
+}
+
+static int hi3660_pcie_phy_init(struct platform_device *pdev,
+				struct kirin_pcie *pcie)
+{
+	struct device *dev = &pdev->dev;
+	struct hi3660_pcie_phy *phy;
+	int ret;
+
+	phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
+	if (!phy)
+		return -ENOMEM;
+
+	pcie->phy_priv = phy;
+	phy->dev = dev;
+
+	ret = hi3660_pcie_phy_get_clk(phy);
+	if (ret)
+		return ret;
+
+	return hi3660_pcie_phy_get_resource(phy);
+}
+
+static int hi3660_pcie_phy_power_off(struct kirin_pcie *pcie)
+{
+	struct hi3660_pcie_phy *phy = pcie->phy_priv;
+
+	/* Drop power supply for Host */
+	regmap_write(phy->sysctrl, SCTRL_PCIE_CMOS_OFFSET, 0x00);
+
+	hi3660_pcie_phy_clk_ctrl(phy, false);
+
+	return 0;
+}
+
+/*
+ * The non-PHY part starts here
+ */
+
+static const struct regmap_config pcie_kirin_regmap_conf = {
+	.name = "kirin_pcie_apb",
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+};
+
+static int kirin_pcie_get_gpio_enable(struct kirin_pcie *pcie,
+				      struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	char name[32];
+	int ret, i;
+
+	/* This is an optional property */
+	ret = gpiod_count(dev, "hisilicon,clken");
+	if (ret < 0)
+		return 0;
+
+	if (ret > MAX_PCI_SLOTS) {
+		dev_err(dev, "Too many GPIO clock requests!\n");
+		return -EINVAL;
+	}
+
+	pcie->n_gpio_clkreq = ret;
+
+	for (i = 0; i < pcie->n_gpio_clkreq; i++) {
+		pcie->gpio_id_clkreq[i] = of_get_named_gpio(dev->of_node,
+						    "hisilicon,clken-gpios", i);
+		if (pcie->gpio_id_clkreq[i] < 0)
+			return pcie->gpio_id_clkreq[i];
+
+		sprintf(name, "pcie_clkreq_%d", i);
+		pcie->clkreq_names[i] = devm_kstrdup_const(dev, name,
+							    GFP_KERNEL);
+		if (!pcie->clkreq_names[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int kirin_pcie_parse_port(struct kirin_pcie *pcie,
+				 struct platform_device *pdev,
+				 struct device_node *node)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *parent, *child;
+	int ret, slot, i;
+	char name[32];
+
+	for_each_available_child_of_node(node, parent) {
+		for_each_available_child_of_node(parent, child) {
+			i = pcie->num_slots;
+
+			pcie->gpio_id_reset[i] = of_get_named_gpio(child,
+							"reset-gpios", 0);
+			if (pcie->gpio_id_reset[i] < 0)
+				continue;
+
+			pcie->num_slots++;
+			if (pcie->num_slots > MAX_PCI_SLOTS) {
+				dev_err(dev, "Too many PCI slots!\n");
+				ret = -EINVAL;
+				goto put_node;
+			}
+
+			ret = of_pci_get_devfn(child);
+			if (ret < 0) {
+				dev_err(dev, "failed to parse devfn: %d\n", ret);
+				goto put_node;
+			}
+
+			slot = PCI_SLOT(ret);
+
+			sprintf(name, "pcie_perst_%d", slot);
+			pcie->reset_names[i] = devm_kstrdup_const(dev, name,
+								GFP_KERNEL);
+			if (!pcie->reset_names[i]) {
+				ret = -ENOMEM;
+				goto put_node;
+			}
+		}
+	}
+
+	return 0;
+
+put_node:
+	of_node_put(child);
+	of_node_put(parent);
+	return ret;
+}
+
+static long kirin_pcie_get_resource(struct kirin_pcie *kirin_pcie,
+				    struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *child, *node = dev->of_node;
+	void __iomem *apb_base;
+	int ret;
+
+	apb_base = devm_platform_ioremap_resource_byname(pdev, "apb");
+	if (IS_ERR(apb_base))
+		return PTR_ERR(apb_base);
+
+	kirin_pcie->apb = devm_regmap_init_mmio(dev, apb_base,
+						&pcie_kirin_regmap_conf);
+	if (IS_ERR(kirin_pcie->apb))
+		return PTR_ERR(kirin_pcie->apb);
+
+	/* pcie internal PERST# gpio */
+	kirin_pcie->gpio_id_dwc_perst = of_get_named_gpio(dev->of_node,
+							  "reset-gpios", 0);
+	if (kirin_pcie->gpio_id_dwc_perst == -EPROBE_DEFER) {
+		return -EPROBE_DEFER;
+	} else if (!gpio_is_valid(kirin_pcie->gpio_id_dwc_perst)) {
+		dev_err(dev, "unable to get a valid gpio pin\n");
+		return -ENODEV;
+	}
+
+	ret = kirin_pcie_get_gpio_enable(kirin_pcie, pdev);
+	if (ret)
+		return ret;
+
+	/* Parse OF children */
+	for_each_available_child_of_node(node, child) {
+		ret = kirin_pcie_parse_port(kirin_pcie, pdev, child);
+		if (ret)
+			goto put_node;
+	}
+
+	return 0;
+
+put_node:
+	of_node_put(child);
+	return ret;
+}
+
+static void kirin_pcie_sideband_dbi_w_mode(struct kirin_pcie *kirin_pcie,
+					   bool on)
+{
+	u32 val;
+
+	regmap_read(kirin_pcie->apb, SOC_PCIECTRL_CTRL0_ADDR, &val);
+	if (on)
+		val = val | PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val = val & ~PCIE_ELBI_SLV_DBI_ENABLE;
+
+	regmap_write(kirin_pcie->apb, SOC_PCIECTRL_CTRL0_ADDR, val);
+}
+
+static void kirin_pcie_sideband_dbi_r_mode(struct kirin_pcie *kirin_pcie,
+					   bool on)
+{
+	u32 val;
+
+	regmap_read(kirin_pcie->apb, SOC_PCIECTRL_CTRL1_ADDR, &val);
+	if (on)
+		val = val | PCIE_ELBI_SLV_DBI_ENABLE;
+	else
+		val = val & ~PCIE_ELBI_SLV_DBI_ENABLE;
+
+	regmap_write(kirin_pcie->apb, SOC_PCIECTRL_CTRL1_ADDR, val);
+}
+
+static int kirin_pcie_rd_own_conf(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*val = dw_pcie_read_dbi(pci, where, size);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int kirin_pcie_wr_own_conf(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 val)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+
+	if (PCI_SLOT(devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	dw_pcie_write_dbi(pci, where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int kirin_pcie_add_bus(struct pci_bus *bus)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(bus->sysdata);
+	struct kirin_pcie *kirin_pcie = to_kirin_pcie(pci);
+	int i, ret;
+
+	if (!kirin_pcie->num_slots)
+		return 0;
+
+	/* Send PERST# to each slot */
+	for (i = 0; i < kirin_pcie->num_slots; i++) {
+		ret = gpio_direction_output(kirin_pcie->gpio_id_reset[i], 1);
+		if (ret) {
+			dev_err(pci->dev, "PERST# %s error: %d\n",
+				kirin_pcie->reset_names[i], ret);
+		}
+	}
+	usleep_range(PERST_2_ACCESS_MIN, PERST_2_ACCESS_MAX);
+
+	return 0;
+}
+
+static struct pci_ops kirin_pci_ops = {
+	.read = kirin_pcie_rd_own_conf,
+	.write = kirin_pcie_wr_own_conf,
+	.add_bus = kirin_pcie_add_bus,
+};
+
+static u32 kirin_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base,
+			       u32 reg, size_t size)
+{
+	struct kirin_pcie *kirin_pcie = to_kirin_pcie(pci);
+	u32 ret;
+
+	kirin_pcie_sideband_dbi_r_mode(kirin_pcie, true);
+	dw_pcie_read(base + reg, size, &ret);
+	kirin_pcie_sideband_dbi_r_mode(kirin_pcie, false);
+
+	return ret;
+}
+
+static void kirin_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base,
+				 u32 reg, size_t size, u32 val)
+{
+	struct kirin_pcie *kirin_pcie = to_kirin_pcie(pci);
+
+	kirin_pcie_sideband_dbi_w_mode(kirin_pcie, true);
+	dw_pcie_write(base + reg, size, val);
+	kirin_pcie_sideband_dbi_w_mode(kirin_pcie, false);
+}
+
+static int kirin_pcie_link_up(struct dw_pcie *pci)
+{
+	struct kirin_pcie *kirin_pcie = to_kirin_pcie(pci);
+	u32 val;
+
+	regmap_read(kirin_pcie->apb, PCIE_APB_PHY_STATUS0, &val);
+	if ((val & PCIE_LINKUP_ENABLE) == PCIE_LINKUP_ENABLE)
+		return 1;
+
+	return 0;
+}
+
+static int kirin_pcie_start_link(struct dw_pcie *pci)
+{
+	struct kirin_pcie *kirin_pcie = to_kirin_pcie(pci);
+
+	/* assert LTSSM enable */
+	regmap_write(kirin_pcie->apb, PCIE_APP_LTSSM_ENABLE,
+		     PCIE_LTSSM_ENABLE_BIT);
+
+	return 0;
+}
+
+static int kirin_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	pp->bridge->ops = &kirin_pci_ops;
+
+	return 0;
+}
+
+static int kirin_pcie_gpio_request(struct kirin_pcie *kirin_pcie,
+				   struct device *dev)
+{
+	int ret, i;
+
+	for (i = 0; i < kirin_pcie->num_slots; i++) {
+		if (!gpio_is_valid(kirin_pcie->gpio_id_reset[i])) {
+			dev_err(dev, "unable to get a valid %s gpio\n",
+				kirin_pcie->reset_names[i]);
+			return -ENODEV;
+		}
+
+		ret = devm_gpio_request(dev, kirin_pcie->gpio_id_reset[i],
+					kirin_pcie->reset_names[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < kirin_pcie->n_gpio_clkreq; i++) {
+		if (!gpio_is_valid(kirin_pcie->gpio_id_clkreq[i])) {
+			dev_err(dev, "unable to get a valid %s gpio\n",
+				kirin_pcie->clkreq_names[i]);
+			return -ENODEV;
+		}
+
+		ret = devm_gpio_request(dev, kirin_pcie->gpio_id_clkreq[i],
+					kirin_pcie->clkreq_names[i]);
+		if (ret)
+			return ret;
+
+		ret = gpio_direction_output(kirin_pcie->gpio_id_clkreq[i], 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops kirin_dw_pcie_ops = {
+	.read_dbi = kirin_pcie_read_dbi,
+	.write_dbi = kirin_pcie_write_dbi,
+	.link_up = kirin_pcie_link_up,
+	.start_link = kirin_pcie_start_link,
+};
+
+static const struct dw_pcie_host_ops kirin_pcie_host_ops = {
+	.host_init = kirin_pcie_host_init,
+};
+
+static int kirin_pcie_power_off(struct kirin_pcie *kirin_pcie)
+{
+	int i;
+
+	if (kirin_pcie->type == PCIE_KIRIN_INTERNAL_PHY)
+		return hi3660_pcie_phy_power_off(kirin_pcie);
+
+	for (i = 0; i < kirin_pcie->n_gpio_clkreq; i++)
+		gpio_direction_output(kirin_pcie->gpio_id_clkreq[i], 1);
+
+	phy_power_off(kirin_pcie->phy);
+	phy_exit(kirin_pcie->phy);
+
+	return 0;
+}
+
+static int kirin_pcie_power_on(struct platform_device *pdev,
+			       struct kirin_pcie *kirin_pcie)
+{
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	if (kirin_pcie->type == PCIE_KIRIN_INTERNAL_PHY) {
+		ret = hi3660_pcie_phy_init(pdev, kirin_pcie);
+		if (ret)
+			return ret;
+
+		ret = hi3660_pcie_phy_power_on(kirin_pcie);
+		if (ret)
+			return ret;
+	} else {
+		kirin_pcie->phy = devm_of_phy_get(dev, dev->of_node, NULL);
+		if (IS_ERR(kirin_pcie->phy))
+			return PTR_ERR(kirin_pcie->phy);
+
+		ret = kirin_pcie_gpio_request(kirin_pcie, dev);
+		if (ret)
+			return ret;
+
+		ret = phy_init(kirin_pcie->phy);
+		if (ret)
+			goto err;
+
+		ret = phy_power_on(kirin_pcie->phy);
+		if (ret)
+			goto err;
+	}
+
+	/* perst assert Endpoint */
+	usleep_range(REF_2_PERST_MIN, REF_2_PERST_MAX);
+
+	if (!gpio_request(kirin_pcie->gpio_id_dwc_perst, "pcie_perst_bridge")) {
+		ret = gpio_direction_output(kirin_pcie->gpio_id_dwc_perst, 1);
+		if (ret)
+			goto err;
+	}
+
+	usleep_range(PERST_2_ACCESS_MIN, PERST_2_ACCESS_MAX);
+
+	return 0;
+err:
+	kirin_pcie_power_off(kirin_pcie);
+
+	return ret;
+}
+
+static int kirin_pcie_remove(struct platform_device *pdev)
+{
+	struct kirin_pcie *kirin_pcie = platform_get_drvdata(pdev);
+
+	dw_pcie_host_deinit(&kirin_pcie->pci->pp);
+
+	kirin_pcie_power_off(kirin_pcie);
+
+	return 0;
+}
+
+struct kirin_pcie_data {
+	enum pcie_kirin_phy_type	phy_type;
+};
+
+static const struct kirin_pcie_data kirin_960_data = {
+	.phy_type = PCIE_KIRIN_INTERNAL_PHY,
+};
+
+static const struct kirin_pcie_data kirin_970_data = {
+	.phy_type = PCIE_KIRIN_EXTERNAL_PHY,
+};
+
+static const struct of_device_id kirin_pcie_match[] = {
+	{ .compatible = "hisilicon,kirin960-pcie", .data = &kirin_960_data },
+	{ .compatible = "hisilicon,kirin970-pcie", .data = &kirin_970_data },
+	{},
+};
+
+static int kirin_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct kirin_pcie_data *data;
+	struct kirin_pcie *kirin_pcie;
+	struct dw_pcie *pci;
+	int ret;
+
+	if (!dev->of_node) {
+		dev_err(dev, "NULL node\n");
+		return -EINVAL;
+	}
+
+	data = of_device_get_match_data(dev);
+	if (!data) {
+		dev_err(dev, "OF data missing\n");
+		return -EINVAL;
+	}
+
+	kirin_pcie = devm_kzalloc(dev, sizeof(struct kirin_pcie), GFP_KERNEL);
+	if (!kirin_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &kirin_dw_pcie_ops;
+	pci->pp.ops = &kirin_pcie_host_ops;
+	kirin_pcie->pci = pci;
+	kirin_pcie->type = data->phy_type;
+
+	ret = kirin_pcie_get_resource(kirin_pcie, pdev);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, kirin_pcie);
+
+	ret = kirin_pcie_power_on(pdev, kirin_pcie);
+	if (ret)
+		return ret;
+
+	return dw_pcie_host_init(&pci->pp);
+}
+
+static struct platform_driver kirin_pcie_driver = {
+	.probe			= kirin_pcie_probe,
+	.remove	        	= kirin_pcie_remove,
+	.driver			= {
+		.name			= "kirin-pcie",
+		.of_match_table		= kirin_pcie_match,
+		.suppress_bind_attrs	= true,
+	},
+};
+module_platform_driver(kirin_pcie_driver);
+
+MODULE_DEVICE_TABLE(of, kirin_pcie_match);
+MODULE_DESCRIPTION("PCIe host controller driver for Kirin Phone SoCs");
+MODULE_AUTHOR("Xiaowei Song <songxiaowei@huawei.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
new file mode 100644
index 000000000..9b62ee699
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
@@ -0,0 +1,916 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Qualcomm PCIe Endpoint controller driver
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Author: Siddartha Mohanadoss <smohanad@codeaurora.org
+ *
+ * Copyright (c) 2021, Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interconnect.h>
+#include <linux/mfd/syscon.h>
+#include <linux/phy/pcie.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/module.h>
+
+#include "pcie-designware.h"
+
+/* PARF registers */
+#define PARF_SYS_CTRL				0x00
+#define PARF_DB_CTRL				0x10
+#define PARF_PM_CTRL				0x20
+#define PARF_MHI_CLOCK_RESET_CTRL		0x174
+#define PARF_MHI_BASE_ADDR_LOWER		0x178
+#define PARF_MHI_BASE_ADDR_UPPER		0x17c
+#define PARF_DEBUG_INT_EN			0x190
+#define PARF_AXI_MSTR_RD_HALT_NO_WRITES		0x1a4
+#define PARF_AXI_MSTR_WR_ADDR_HALT		0x1a8
+#define PARF_Q2A_FLUSH				0x1ac
+#define PARF_LTSSM				0x1b0
+#define PARF_CFG_BITS				0x210
+#define PARF_INT_ALL_STATUS			0x224
+#define PARF_INT_ALL_CLEAR			0x228
+#define PARF_INT_ALL_MASK			0x22c
+#define PARF_SLV_ADDR_MSB_CTRL			0x2c0
+#define PARF_DBI_BASE_ADDR			0x350
+#define PARF_DBI_BASE_ADDR_HI			0x354
+#define PARF_SLV_ADDR_SPACE_SIZE		0x358
+#define PARF_SLV_ADDR_SPACE_SIZE_HI		0x35c
+#define PARF_ATU_BASE_ADDR			0x634
+#define PARF_ATU_BASE_ADDR_HI			0x638
+#define PARF_SRIS_MODE				0x644
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L2		0xc04
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L1		0xc0c
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L0S		0xc10
+#define PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L1	0xc84
+#define PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L2	0xc88
+#define PARF_DEVICE_TYPE			0x1000
+#define PARF_BDF_TO_SID_CFG			0x2c00
+
+/* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */
+#define PARF_INT_ALL_LINK_DOWN			BIT(1)
+#define PARF_INT_ALL_BME			BIT(2)
+#define PARF_INT_ALL_PM_TURNOFF			BIT(3)
+#define PARF_INT_ALL_DEBUG			BIT(4)
+#define PARF_INT_ALL_LTR			BIT(5)
+#define PARF_INT_ALL_MHI_Q6			BIT(6)
+#define PARF_INT_ALL_MHI_A7			BIT(7)
+#define PARF_INT_ALL_DSTATE_CHANGE		BIT(8)
+#define PARF_INT_ALL_L1SUB_TIMEOUT		BIT(9)
+#define PARF_INT_ALL_MMIO_WRITE			BIT(10)
+#define PARF_INT_ALL_CFG_WRITE			BIT(11)
+#define PARF_INT_ALL_BRIDGE_FLUSH_N		BIT(12)
+#define PARF_INT_ALL_LINK_UP			BIT(13)
+#define PARF_INT_ALL_AER_LEGACY			BIT(14)
+#define PARF_INT_ALL_PLS_ERR			BIT(15)
+#define PARF_INT_ALL_PME_LEGACY			BIT(16)
+#define PARF_INT_ALL_PLS_PME			BIT(17)
+#define PARF_INT_ALL_EDMA			BIT(22)
+
+/* PARF_BDF_TO_SID_CFG register fields */
+#define PARF_BDF_TO_SID_BYPASS			BIT(0)
+
+/* PARF_DEBUG_INT_EN register fields */
+#define PARF_DEBUG_INT_PM_DSTATE_CHANGE		BIT(1)
+#define PARF_DEBUG_INT_CFG_BUS_MASTER_EN	BIT(2)
+#define PARF_DEBUG_INT_RADM_PM_TURNOFF		BIT(3)
+
+/* PARF_DEVICE_TYPE register fields */
+#define PARF_DEVICE_TYPE_EP			0x0
+
+/* PARF_PM_CTRL register fields */
+#define PARF_PM_CTRL_REQ_EXIT_L1		BIT(1)
+#define PARF_PM_CTRL_READY_ENTR_L23		BIT(2)
+#define PARF_PM_CTRL_REQ_NOT_ENTR_L1		BIT(5)
+
+/* PARF_MHI_CLOCK_RESET_CTRL fields */
+#define PARF_MSTR_AXI_CLK_EN			BIT(1)
+
+/* PARF_AXI_MSTR_RD_HALT_NO_WRITES register fields */
+#define PARF_AXI_MSTR_RD_HALT_NO_WRITE_EN	BIT(0)
+
+/* PARF_AXI_MSTR_WR_ADDR_HALT register fields */
+#define PARF_AXI_MSTR_WR_ADDR_HALT_EN		BIT(31)
+
+/* PARF_Q2A_FLUSH register fields */
+#define PARF_Q2A_FLUSH_EN			BIT(16)
+
+/* PARF_SYS_CTRL register fields */
+#define PARF_SYS_CTRL_AUX_PWR_DET		BIT(4)
+#define PARF_SYS_CTRL_CORE_CLK_CGC_DIS		BIT(6)
+#define PARF_SYS_CTRL_MSTR_ACLK_CGC_DIS		BIT(10)
+#define PARF_SYS_CTRL_SLV_DBI_WAKE_DISABLE	BIT(11)
+
+/* PARF_DB_CTRL register fields */
+#define PARF_DB_CTRL_INSR_DBNCR_BLOCK		BIT(0)
+#define PARF_DB_CTRL_RMVL_DBNCR_BLOCK		BIT(1)
+#define PARF_DB_CTRL_DBI_WKP_BLOCK		BIT(4)
+#define PARF_DB_CTRL_SLV_WKP_BLOCK		BIT(5)
+#define PARF_DB_CTRL_MST_WKP_BLOCK		BIT(6)
+
+/* PARF_CFG_BITS register fields */
+#define PARF_CFG_BITS_REQ_EXIT_L1SS_MSI_LTR_EN	BIT(1)
+
+/* ELBI registers */
+#define ELBI_SYS_STTS				0x08
+#define ELBI_CS2_ENABLE				0xa4
+
+/* DBI registers */
+#define DBI_CON_STATUS				0x44
+
+/* DBI register fields */
+#define DBI_CON_STATUS_POWER_STATE_MASK		GENMASK(1, 0)
+
+#define XMLH_LINK_UP				0x400
+#define CORE_RESET_TIME_US_MIN			1000
+#define CORE_RESET_TIME_US_MAX			1005
+#define WAKE_DELAY_US				2000 /* 2 ms */
+
+#define PCIE_GEN1_BW_MBPS			250
+#define PCIE_GEN2_BW_MBPS			500
+#define PCIE_GEN3_BW_MBPS			985
+#define PCIE_GEN4_BW_MBPS			1969
+
+#define to_pcie_ep(x)				dev_get_drvdata((x)->dev)
+
+enum qcom_pcie_ep_link_status {
+	QCOM_PCIE_EP_LINK_DISABLED,
+	QCOM_PCIE_EP_LINK_ENABLED,
+	QCOM_PCIE_EP_LINK_UP,
+	QCOM_PCIE_EP_LINK_DOWN,
+};
+
+/**
+ * struct qcom_pcie_ep - Qualcomm PCIe Endpoint Controller
+ * @pci: Designware PCIe controller struct
+ * @parf: Qualcomm PCIe specific PARF register base
+ * @elbi: Designware PCIe specific ELBI register base
+ * @mmio: MMIO register base
+ * @perst_map: PERST regmap
+ * @mmio_res: MMIO region resource
+ * @core_reset: PCIe Endpoint core reset
+ * @reset: PERST# GPIO
+ * @wake: WAKE# GPIO
+ * @phy: PHY controller block
+ * @debugfs: PCIe Endpoint Debugfs directory
+ * @icc_mem: Handle to an interconnect path between PCIe and MEM
+ * @clks: PCIe clocks
+ * @num_clks: PCIe clocks count
+ * @perst_en: Flag for PERST enable
+ * @perst_sep_en: Flag for PERST separation enable
+ * @link_status: PCIe Link status
+ * @global_irq: Qualcomm PCIe specific Global IRQ
+ * @perst_irq: PERST# IRQ
+ */
+struct qcom_pcie_ep {
+	struct dw_pcie pci;
+
+	void __iomem *parf;
+	void __iomem *elbi;
+	void __iomem *mmio;
+	struct regmap *perst_map;
+	struct resource *mmio_res;
+
+	struct reset_control *core_reset;
+	struct gpio_desc *reset;
+	struct gpio_desc *wake;
+	struct phy *phy;
+	struct dentry *debugfs;
+
+	struct icc_path *icc_mem;
+
+	struct clk_bulk_data *clks;
+	int num_clks;
+
+	u32 perst_en;
+	u32 perst_sep_en;
+
+	enum qcom_pcie_ep_link_status link_status;
+	int global_irq;
+	int perst_irq;
+};
+
+static int qcom_pcie_ep_core_reset(struct qcom_pcie_ep *pcie_ep)
+{
+	struct dw_pcie *pci = &pcie_ep->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = reset_control_assert(pcie_ep->core_reset);
+	if (ret) {
+		dev_err(dev, "Cannot assert core reset\n");
+		return ret;
+	}
+
+	usleep_range(CORE_RESET_TIME_US_MIN, CORE_RESET_TIME_US_MAX);
+
+	ret = reset_control_deassert(pcie_ep->core_reset);
+	if (ret) {
+		dev_err(dev, "Cannot de-assert core reset\n");
+		return ret;
+	}
+
+	usleep_range(CORE_RESET_TIME_US_MIN, CORE_RESET_TIME_US_MAX);
+
+	return 0;
+}
+
+/*
+ * Delatch PERST_EN and PERST_SEPARATION_ENABLE with TCSR to avoid
+ * device reset during host reboot and hibernation. The driver is
+ * expected to handle this situation.
+ */
+static void qcom_pcie_ep_configure_tcsr(struct qcom_pcie_ep *pcie_ep)
+{
+	if (pcie_ep->perst_map) {
+		regmap_write(pcie_ep->perst_map, pcie_ep->perst_en, 0);
+		regmap_write(pcie_ep->perst_map, pcie_ep->perst_sep_en, 0);
+	}
+}
+
+static int qcom_pcie_dw_link_up(struct dw_pcie *pci)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+	u32 reg;
+
+	reg = readl_relaxed(pcie_ep->elbi + ELBI_SYS_STTS);
+
+	return reg & XMLH_LINK_UP;
+}
+
+static int qcom_pcie_dw_start_link(struct dw_pcie *pci)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+
+	enable_irq(pcie_ep->perst_irq);
+
+	return 0;
+}
+
+static void qcom_pcie_dw_stop_link(struct dw_pcie *pci)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+
+	disable_irq(pcie_ep->perst_irq);
+}
+
+static void qcom_pcie_dw_write_dbi2(struct dw_pcie *pci, void __iomem *base,
+				    u32 reg, size_t size, u32 val)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+	int ret;
+
+	writel(1, pcie_ep->elbi + ELBI_CS2_ENABLE);
+
+	ret = dw_pcie_write(pci->dbi_base2 + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Failed to write DBI2 register (0x%x): %d\n", reg, ret);
+
+	writel(0, pcie_ep->elbi + ELBI_CS2_ENABLE);
+}
+
+static void qcom_pcie_ep_icc_update(struct qcom_pcie_ep *pcie_ep)
+{
+	struct dw_pcie *pci = &pcie_ep->pci;
+	u32 offset, status, bw;
+	int speed, width;
+	int ret;
+
+	if (!pcie_ep->icc_mem)
+		return;
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
+
+	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
+	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
+
+	switch (speed) {
+	case 1:
+		bw = MBps_to_icc(PCIE_GEN1_BW_MBPS);
+		break;
+	case 2:
+		bw = MBps_to_icc(PCIE_GEN2_BW_MBPS);
+		break;
+	case 3:
+		bw = MBps_to_icc(PCIE_GEN3_BW_MBPS);
+		break;
+	default:
+		dev_warn(pci->dev, "using default GEN4 bandwidth\n");
+		fallthrough;
+	case 4:
+		bw = MBps_to_icc(PCIE_GEN4_BW_MBPS);
+		break;
+	}
+
+	ret = icc_set_bw(pcie_ep->icc_mem, 0, width * bw);
+	if (ret)
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+}
+
+static int qcom_pcie_enable_resources(struct qcom_pcie_ep *pcie_ep)
+{
+	struct dw_pcie *pci = &pcie_ep->pci;
+	int ret;
+
+	ret = clk_bulk_prepare_enable(pcie_ep->num_clks, pcie_ep->clks);
+	if (ret)
+		return ret;
+
+	ret = qcom_pcie_ep_core_reset(pcie_ep);
+	if (ret)
+		goto err_disable_clk;
+
+	ret = phy_init(pcie_ep->phy);
+	if (ret)
+		goto err_disable_clk;
+
+	ret = phy_set_mode_ext(pcie_ep->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_EP);
+	if (ret)
+		goto err_phy_exit;
+
+	ret = phy_power_on(pcie_ep->phy);
+	if (ret)
+		goto err_phy_exit;
+
+	/*
+	 * Some Qualcomm platforms require interconnect bandwidth constraints
+	 * to be set before enabling interconnect clocks.
+	 *
+	 * Set an initial peak bandwidth corresponding to single-lane Gen 1
+	 * for the pcie-mem path.
+	 */
+	ret = icc_set_bw(pcie_ep->icc_mem, 0, MBps_to_icc(PCIE_GEN1_BW_MBPS));
+	if (ret) {
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+		goto err_phy_off;
+	}
+
+	return 0;
+
+err_phy_off:
+	phy_power_off(pcie_ep->phy);
+err_phy_exit:
+	phy_exit(pcie_ep->phy);
+err_disable_clk:
+	clk_bulk_disable_unprepare(pcie_ep->num_clks, pcie_ep->clks);
+
+	return ret;
+}
+
+static void qcom_pcie_disable_resources(struct qcom_pcie_ep *pcie_ep)
+{
+	icc_set_bw(pcie_ep->icc_mem, 0, 0);
+	phy_power_off(pcie_ep->phy);
+	phy_exit(pcie_ep->phy);
+	clk_bulk_disable_unprepare(pcie_ep->num_clks, pcie_ep->clks);
+}
+
+static int qcom_pcie_perst_deassert(struct dw_pcie *pci)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+	struct device *dev = pci->dev;
+	u32 val, offset;
+	int ret;
+
+	ret = qcom_pcie_enable_resources(pcie_ep);
+	if (ret) {
+		dev_err(dev, "Failed to enable resources: %d\n", ret);
+		return ret;
+	}
+
+	/* Assert WAKE# to RC to indicate device is ready */
+	gpiod_set_value_cansleep(pcie_ep->wake, 1);
+	usleep_range(WAKE_DELAY_US, WAKE_DELAY_US + 500);
+	gpiod_set_value_cansleep(pcie_ep->wake, 0);
+
+	qcom_pcie_ep_configure_tcsr(pcie_ep);
+
+	/* Disable BDF to SID mapping */
+	val = readl_relaxed(pcie_ep->parf + PARF_BDF_TO_SID_CFG);
+	val |= PARF_BDF_TO_SID_BYPASS;
+	writel_relaxed(val, pcie_ep->parf + PARF_BDF_TO_SID_CFG);
+
+	/* Enable debug IRQ */
+	val = readl_relaxed(pcie_ep->parf + PARF_DEBUG_INT_EN);
+	val |= PARF_DEBUG_INT_RADM_PM_TURNOFF |
+	       PARF_DEBUG_INT_CFG_BUS_MASTER_EN |
+	       PARF_DEBUG_INT_PM_DSTATE_CHANGE;
+	writel_relaxed(val, pcie_ep->parf + PARF_DEBUG_INT_EN);
+
+	/* Configure PCIe to endpoint mode */
+	writel_relaxed(PARF_DEVICE_TYPE_EP, pcie_ep->parf + PARF_DEVICE_TYPE);
+
+	/* Allow entering L1 state */
+	val = readl_relaxed(pcie_ep->parf + PARF_PM_CTRL);
+	val &= ~PARF_PM_CTRL_REQ_NOT_ENTR_L1;
+	writel_relaxed(val, pcie_ep->parf + PARF_PM_CTRL);
+
+	/* Read halts write */
+	val = readl_relaxed(pcie_ep->parf + PARF_AXI_MSTR_RD_HALT_NO_WRITES);
+	val &= ~PARF_AXI_MSTR_RD_HALT_NO_WRITE_EN;
+	writel_relaxed(val, pcie_ep->parf + PARF_AXI_MSTR_RD_HALT_NO_WRITES);
+
+	/* Write after write halt */
+	val = readl_relaxed(pcie_ep->parf + PARF_AXI_MSTR_WR_ADDR_HALT);
+	val |= PARF_AXI_MSTR_WR_ADDR_HALT_EN;
+	writel_relaxed(val, pcie_ep->parf + PARF_AXI_MSTR_WR_ADDR_HALT);
+
+	/* Q2A flush disable */
+	val = readl_relaxed(pcie_ep->parf + PARF_Q2A_FLUSH);
+	val &= ~PARF_Q2A_FLUSH_EN;
+	writel_relaxed(val, pcie_ep->parf + PARF_Q2A_FLUSH);
+
+	/*
+	 * Disable Master AXI clock during idle.  Do not allow DBI access
+	 * to take the core out of L1.  Disable core clock gating that
+	 * gates PIPE clock from propagating to core clock.  Report to the
+	 * host that Vaux is present.
+	 */
+	val = readl_relaxed(pcie_ep->parf + PARF_SYS_CTRL);
+	val &= ~PARF_SYS_CTRL_MSTR_ACLK_CGC_DIS;
+	val |= PARF_SYS_CTRL_SLV_DBI_WAKE_DISABLE |
+	       PARF_SYS_CTRL_CORE_CLK_CGC_DIS |
+	       PARF_SYS_CTRL_AUX_PWR_DET;
+	writel_relaxed(val, pcie_ep->parf + PARF_SYS_CTRL);
+
+	/* Disable the debouncers */
+	val = readl_relaxed(pcie_ep->parf + PARF_DB_CTRL);
+	val |= PARF_DB_CTRL_INSR_DBNCR_BLOCK | PARF_DB_CTRL_RMVL_DBNCR_BLOCK |
+	       PARF_DB_CTRL_DBI_WKP_BLOCK | PARF_DB_CTRL_SLV_WKP_BLOCK |
+	       PARF_DB_CTRL_MST_WKP_BLOCK;
+	writel_relaxed(val, pcie_ep->parf + PARF_DB_CTRL);
+
+	/* Request to exit from L1SS for MSI and LTR MSG */
+	val = readl_relaxed(pcie_ep->parf + PARF_CFG_BITS);
+	val |= PARF_CFG_BITS_REQ_EXIT_L1SS_MSI_LTR_EN;
+	writel_relaxed(val, pcie_ep->parf + PARF_CFG_BITS);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	/* Set the L0s Exit Latency to 2us-4us = 0x6 */
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+	val &= ~PCI_EXP_LNKCAP_L0SEL;
+	val |= FIELD_PREP(PCI_EXP_LNKCAP_L0SEL, 0x6);
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, val);
+
+	/* Set the L1 Exit Latency to be 32us-64 us = 0x6 */
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	val = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
+	val &= ~PCI_EXP_LNKCAP_L1EL;
+	val |= FIELD_PREP(PCI_EXP_LNKCAP_L1EL, 0x6);
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, val);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	writel_relaxed(0, pcie_ep->parf + PARF_INT_ALL_MASK);
+	val = PARF_INT_ALL_LINK_DOWN | PARF_INT_ALL_BME |
+	      PARF_INT_ALL_PM_TURNOFF | PARF_INT_ALL_DSTATE_CHANGE |
+	      PARF_INT_ALL_LINK_UP | PARF_INT_ALL_EDMA;
+	writel_relaxed(val, pcie_ep->parf + PARF_INT_ALL_MASK);
+
+	ret = dw_pcie_ep_init_complete(&pcie_ep->pci.ep);
+	if (ret) {
+		dev_err(dev, "Failed to complete initialization: %d\n", ret);
+		goto err_disable_resources;
+	}
+
+	/*
+	 * The physical address of the MMIO region which is exposed as the BAR
+	 * should be written to MHI BASE registers.
+	 */
+	writel_relaxed(pcie_ep->mmio_res->start,
+		       pcie_ep->parf + PARF_MHI_BASE_ADDR_LOWER);
+	writel_relaxed(0, pcie_ep->parf + PARF_MHI_BASE_ADDR_UPPER);
+
+	/* Gate Master AXI clock to MHI bus during L1SS */
+	val = readl_relaxed(pcie_ep->parf + PARF_MHI_CLOCK_RESET_CTRL);
+	val &= ~PARF_MSTR_AXI_CLK_EN;
+	writel_relaxed(val, pcie_ep->parf + PARF_MHI_CLOCK_RESET_CTRL);
+
+	dw_pcie_ep_init_notify(&pcie_ep->pci.ep);
+
+	/* Enable LTSSM */
+	val = readl_relaxed(pcie_ep->parf + PARF_LTSSM);
+	val |= BIT(8);
+	writel_relaxed(val, pcie_ep->parf + PARF_LTSSM);
+
+	return 0;
+
+err_disable_resources:
+	qcom_pcie_disable_resources(pcie_ep);
+
+	return ret;
+}
+
+static void qcom_pcie_perst_assert(struct dw_pcie *pci)
+{
+	struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
+	struct device *dev = pci->dev;
+
+	if (pcie_ep->link_status == QCOM_PCIE_EP_LINK_DISABLED) {
+		dev_dbg(dev, "Link is already disabled\n");
+		return;
+	}
+
+	qcom_pcie_disable_resources(pcie_ep);
+	pcie_ep->link_status = QCOM_PCIE_EP_LINK_DISABLED;
+}
+
+/* Common DWC controller ops */
+static const struct dw_pcie_ops pci_ops = {
+	.link_up = qcom_pcie_dw_link_up,
+	.start_link = qcom_pcie_dw_start_link,
+	.stop_link = qcom_pcie_dw_stop_link,
+	.write_dbi2 = qcom_pcie_dw_write_dbi2,
+};
+
+static int qcom_pcie_ep_get_io_resources(struct platform_device *pdev,
+					 struct qcom_pcie_ep *pcie_ep)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci = &pcie_ep->pci;
+	struct device_node *syscon;
+	struct resource *res;
+	int ret;
+
+	pcie_ep->parf = devm_platform_ioremap_resource_byname(pdev, "parf");
+	if (IS_ERR(pcie_ep->parf))
+		return PTR_ERR(pcie_ep->parf);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+	pci->dbi_base2 = pci->dbi_base;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "elbi");
+	pcie_ep->elbi = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pcie_ep->elbi))
+		return PTR_ERR(pcie_ep->elbi);
+
+	pcie_ep->mmio_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							 "mmio");
+	if (!pcie_ep->mmio_res) {
+		dev_err(dev, "Failed to get mmio resource\n");
+		return -EINVAL;
+	}
+
+	pcie_ep->mmio = devm_pci_remap_cfg_resource(dev, pcie_ep->mmio_res);
+	if (IS_ERR(pcie_ep->mmio))
+		return PTR_ERR(pcie_ep->mmio);
+
+	syscon = of_parse_phandle(dev->of_node, "qcom,perst-regs", 0);
+	if (!syscon) {
+		dev_dbg(dev, "PERST separation not available\n");
+		return 0;
+	}
+
+	pcie_ep->perst_map = syscon_node_to_regmap(syscon);
+	of_node_put(syscon);
+	if (IS_ERR(pcie_ep->perst_map))
+		return PTR_ERR(pcie_ep->perst_map);
+
+	ret = of_property_read_u32_index(dev->of_node, "qcom,perst-regs",
+					 1, &pcie_ep->perst_en);
+	if (ret < 0) {
+		dev_err(dev, "No Perst Enable offset in syscon\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32_index(dev->of_node, "qcom,perst-regs",
+					 2, &pcie_ep->perst_sep_en);
+	if (ret < 0) {
+		dev_err(dev, "No Perst Separation Enable offset in syscon\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_ep_get_resources(struct platform_device *pdev,
+				      struct qcom_pcie_ep *pcie_ep)
+{
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = qcom_pcie_ep_get_io_resources(pdev, pcie_ep);
+	if (ret) {
+		dev_err(dev, "Failed to get io resources %d\n", ret);
+		return ret;
+	}
+
+	pcie_ep->num_clks = devm_clk_bulk_get_all(dev, &pcie_ep->clks);
+	if (pcie_ep->num_clks < 0) {
+		dev_err(dev, "Failed to get clocks\n");
+		return pcie_ep->num_clks;
+	}
+
+	pcie_ep->core_reset = devm_reset_control_get_exclusive(dev, "core");
+	if (IS_ERR(pcie_ep->core_reset))
+		return PTR_ERR(pcie_ep->core_reset);
+
+	pcie_ep->reset = devm_gpiod_get(dev, "reset", GPIOD_IN);
+	if (IS_ERR(pcie_ep->reset))
+		return PTR_ERR(pcie_ep->reset);
+
+	pcie_ep->wake = devm_gpiod_get_optional(dev, "wake", GPIOD_OUT_LOW);
+	if (IS_ERR(pcie_ep->wake))
+		return PTR_ERR(pcie_ep->wake);
+
+	pcie_ep->phy = devm_phy_optional_get(dev, "pciephy");
+	if (IS_ERR(pcie_ep->phy))
+		ret = PTR_ERR(pcie_ep->phy);
+
+	pcie_ep->icc_mem = devm_of_icc_get(dev, "pcie-mem");
+	if (IS_ERR(pcie_ep->icc_mem))
+		ret = PTR_ERR(pcie_ep->icc_mem);
+
+	return ret;
+}
+
+/* TODO: Notify clients about PCIe state change */
+static irqreturn_t qcom_pcie_ep_global_irq_thread(int irq, void *data)
+{
+	struct qcom_pcie_ep *pcie_ep = data;
+	struct dw_pcie *pci = &pcie_ep->pci;
+	struct device *dev = pci->dev;
+	u32 status = readl_relaxed(pcie_ep->parf + PARF_INT_ALL_STATUS);
+	u32 mask = readl_relaxed(pcie_ep->parf + PARF_INT_ALL_MASK);
+	u32 dstate, val;
+
+	writel_relaxed(status, pcie_ep->parf + PARF_INT_ALL_CLEAR);
+	status &= mask;
+
+	if (FIELD_GET(PARF_INT_ALL_LINK_DOWN, status)) {
+		dev_dbg(dev, "Received Linkdown event\n");
+		pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN;
+		pci_epc_linkdown(pci->ep.epc);
+	} else if (FIELD_GET(PARF_INT_ALL_BME, status)) {
+		dev_dbg(dev, "Received BME event. Link is enabled!\n");
+		pcie_ep->link_status = QCOM_PCIE_EP_LINK_ENABLED;
+		qcom_pcie_ep_icc_update(pcie_ep);
+		pci_epc_bme_notify(pci->ep.epc);
+	} else if (FIELD_GET(PARF_INT_ALL_PM_TURNOFF, status)) {
+		dev_dbg(dev, "Received PM Turn-off event! Entering L23\n");
+		val = readl_relaxed(pcie_ep->parf + PARF_PM_CTRL);
+		val |= PARF_PM_CTRL_READY_ENTR_L23;
+		writel_relaxed(val, pcie_ep->parf + PARF_PM_CTRL);
+	} else if (FIELD_GET(PARF_INT_ALL_DSTATE_CHANGE, status)) {
+		dstate = dw_pcie_readl_dbi(pci, DBI_CON_STATUS) &
+					   DBI_CON_STATUS_POWER_STATE_MASK;
+		dev_dbg(dev, "Received D%d state event\n", dstate);
+		if (dstate == 3) {
+			val = readl_relaxed(pcie_ep->parf + PARF_PM_CTRL);
+			val |= PARF_PM_CTRL_REQ_EXIT_L1;
+			writel_relaxed(val, pcie_ep->parf + PARF_PM_CTRL);
+		}
+	} else if (FIELD_GET(PARF_INT_ALL_LINK_UP, status)) {
+		dev_dbg(dev, "Received Linkup event. Enumeration complete!\n");
+		dw_pcie_ep_linkup(&pci->ep);
+		pcie_ep->link_status = QCOM_PCIE_EP_LINK_UP;
+	} else {
+		dev_err(dev, "Received unknown event: %d\n", status);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t qcom_pcie_ep_perst_irq_thread(int irq, void *data)
+{
+	struct qcom_pcie_ep *pcie_ep = data;
+	struct dw_pcie *pci = &pcie_ep->pci;
+	struct device *dev = pci->dev;
+	u32 perst;
+
+	perst = gpiod_get_value(pcie_ep->reset);
+	if (perst) {
+		dev_dbg(dev, "PERST asserted by host. Shutting down the PCIe link!\n");
+		qcom_pcie_perst_assert(pci);
+	} else {
+		dev_dbg(dev, "PERST de-asserted by host. Starting link training!\n");
+		qcom_pcie_perst_deassert(pci);
+	}
+
+	irq_set_irq_type(gpiod_to_irq(pcie_ep->reset),
+			 (perst ? IRQF_TRIGGER_HIGH : IRQF_TRIGGER_LOW));
+
+	return IRQ_HANDLED;
+}
+
+static int qcom_pcie_ep_enable_irq_resources(struct platform_device *pdev,
+					     struct qcom_pcie_ep *pcie_ep)
+{
+	int ret;
+
+	pcie_ep->global_irq = platform_get_irq_byname(pdev, "global");
+	if (pcie_ep->global_irq < 0)
+		return pcie_ep->global_irq;
+
+	ret = devm_request_threaded_irq(&pdev->dev, pcie_ep->global_irq, NULL,
+					qcom_pcie_ep_global_irq_thread,
+					IRQF_ONESHOT,
+					"global_irq", pcie_ep);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request Global IRQ\n");
+		return ret;
+	}
+
+	pcie_ep->perst_irq = gpiod_to_irq(pcie_ep->reset);
+	irq_set_status_flags(pcie_ep->perst_irq, IRQ_NOAUTOEN);
+	ret = devm_request_threaded_irq(&pdev->dev, pcie_ep->perst_irq, NULL,
+					qcom_pcie_ep_perst_irq_thread,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"perst_irq", pcie_ep);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request PERST IRQ\n");
+		disable_irq(pcie_ep->global_irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				  enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return dw_pcie_ep_raise_legacy_irq(ep, func_no);
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "Unknown IRQ type\n");
+		return -EINVAL;
+	}
+}
+
+static int qcom_pcie_ep_link_transition_count(struct seq_file *s, void *data)
+{
+	struct qcom_pcie_ep *pcie_ep = (struct qcom_pcie_ep *)
+				     dev_get_drvdata(s->private);
+
+	seq_printf(s, "L0s transition count: %u\n",
+		   readl_relaxed(pcie_ep->mmio + PARF_DEBUG_CNT_PM_LINKST_IN_L0S));
+
+	seq_printf(s, "L1 transition count: %u\n",
+		   readl_relaxed(pcie_ep->mmio + PARF_DEBUG_CNT_PM_LINKST_IN_L1));
+
+	seq_printf(s, "L1.1 transition count: %u\n",
+		   readl_relaxed(pcie_ep->mmio + PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L1));
+
+	seq_printf(s, "L1.2 transition count: %u\n",
+		   readl_relaxed(pcie_ep->mmio + PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L2));
+
+	seq_printf(s, "L2 transition count: %u\n",
+		   readl_relaxed(pcie_ep->mmio + PARF_DEBUG_CNT_PM_LINKST_IN_L2));
+
+	return 0;
+}
+
+static void qcom_pcie_ep_init_debugfs(struct qcom_pcie_ep *pcie_ep)
+{
+	struct dw_pcie *pci = &pcie_ep->pci;
+
+	debugfs_create_devm_seqfile(pci->dev, "link_transition_count", pcie_ep->debugfs,
+				    qcom_pcie_ep_link_transition_count);
+}
+
+static const struct pci_epc_features qcom_pcie_epc_features = {
+	.linkup_notifier = true,
+	.core_init_notifier = true,
+	.msi_capable = true,
+	.msix_capable = false,
+	.align = SZ_4K,
+};
+
+static const struct pci_epc_features *
+qcom_pcie_epc_get_features(struct dw_pcie_ep *pci_ep)
+{
+	return &qcom_pcie_epc_features;
+}
+
+static void qcom_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar;
+
+	for (bar = BAR_0; bar <= BAR_5; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static const struct dw_pcie_ep_ops pci_ep_ops = {
+	.ep_init = qcom_pcie_ep_init,
+	.raise_irq = qcom_pcie_ep_raise_irq,
+	.get_features = qcom_pcie_epc_get_features,
+};
+
+static int qcom_pcie_ep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct qcom_pcie_ep *pcie_ep;
+	char *name;
+	int ret;
+
+	pcie_ep = devm_kzalloc(dev, sizeof(*pcie_ep), GFP_KERNEL);
+	if (!pcie_ep)
+		return -ENOMEM;
+
+	pcie_ep->pci.dev = dev;
+	pcie_ep->pci.ops = &pci_ops;
+	pcie_ep->pci.ep.ops = &pci_ep_ops;
+	pcie_ep->pci.edma.nr_irqs = 1;
+	platform_set_drvdata(pdev, pcie_ep);
+
+	ret = qcom_pcie_ep_get_resources(pdev, pcie_ep);
+	if (ret)
+		return ret;
+
+	ret = qcom_pcie_enable_resources(pcie_ep);
+	if (ret) {
+		dev_err(dev, "Failed to enable resources: %d\n", ret);
+		return ret;
+	}
+
+	ret = dw_pcie_ep_init(&pcie_ep->pci.ep);
+	if (ret) {
+		dev_err(dev, "Failed to initialize endpoint: %d\n", ret);
+		goto err_disable_resources;
+	}
+
+	ret = qcom_pcie_ep_enable_irq_resources(pdev, pcie_ep);
+	if (ret)
+		goto err_disable_resources;
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+	if (!name) {
+		ret = -ENOMEM;
+		goto err_disable_irqs;
+	}
+
+	pcie_ep->debugfs = debugfs_create_dir(name, NULL);
+	qcom_pcie_ep_init_debugfs(pcie_ep);
+
+	return 0;
+
+err_disable_irqs:
+	disable_irq(pcie_ep->global_irq);
+	disable_irq(pcie_ep->perst_irq);
+
+err_disable_resources:
+	qcom_pcie_disable_resources(pcie_ep);
+
+	return ret;
+}
+
+static void qcom_pcie_ep_remove(struct platform_device *pdev)
+{
+	struct qcom_pcie_ep *pcie_ep = platform_get_drvdata(pdev);
+
+	disable_irq(pcie_ep->global_irq);
+	disable_irq(pcie_ep->perst_irq);
+
+	debugfs_remove_recursive(pcie_ep->debugfs);
+
+	if (pcie_ep->link_status == QCOM_PCIE_EP_LINK_DISABLED)
+		return;
+
+	qcom_pcie_disable_resources(pcie_ep);
+}
+
+static const struct of_device_id qcom_pcie_ep_match[] = {
+	{ .compatible = "qcom,sdx55-pcie-ep", },
+	{ .compatible = "qcom,sm8450-pcie-ep", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qcom_pcie_ep_match);
+
+static struct platform_driver qcom_pcie_ep_driver = {
+	.probe	= qcom_pcie_ep_probe,
+	.remove_new = qcom_pcie_ep_remove,
+	.driver	= {
+		.name = "qcom-pcie-ep",
+		.of_match_table	= qcom_pcie_ep_match,
+	},
+};
+builtin_platform_driver(qcom_pcie_ep_driver);
+
+MODULE_AUTHOR("Siddartha Mohanadoss <smohanad@codeaurora.org>");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Qualcomm PCIe Endpoint controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
new file mode 100644
index 000000000..64420ecc2
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -0,0 +1,1655 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Qualcomm PCIe root complex driver
+ *
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ * Copyright 2015 Linaro Limited.
+ *
+ * Author: Stanimir Varbanov <svarbanov@mm-sol.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/crc8.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interconnect.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/platform_device.h>
+#include <linux/phy/pcie.h>
+#include <linux/phy/phy.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "../../pci.h"
+#include "pcie-designware.h"
+
+/* PARF registers */
+#define PARF_SYS_CTRL				0x00
+#define PARF_PM_CTRL				0x20
+#define PARF_PCS_DEEMPH				0x34
+#define PARF_PCS_SWING				0x38
+#define PARF_PHY_CTRL				0x40
+#define PARF_PHY_REFCLK				0x4c
+#define PARF_CONFIG_BITS			0x50
+#define PARF_DBI_BASE_ADDR			0x168
+#define PARF_MHI_CLOCK_RESET_CTRL		0x174
+#define PARF_AXI_MSTR_WR_ADDR_HALT		0x178
+#define PARF_AXI_MSTR_WR_ADDR_HALT_V2		0x1a8
+#define PARF_Q2A_FLUSH				0x1ac
+#define PARF_LTSSM				0x1b0
+#define PARF_SID_OFFSET				0x234
+#define PARF_BDF_TRANSLATE_CFG			0x24c
+#define PARF_SLV_ADDR_SPACE_SIZE		0x358
+#define PARF_DEVICE_TYPE			0x1000
+#define PARF_BDF_TO_SID_TABLE_N			0x2000
+
+/* ELBI registers */
+#define ELBI_SYS_CTRL				0x04
+
+/* DBI registers */
+#define AXI_MSTR_RESP_COMP_CTRL0		0x818
+#define AXI_MSTR_RESP_COMP_CTRL1		0x81c
+
+/* MHI registers */
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L2		0xc04
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L1		0xc0c
+#define PARF_DEBUG_CNT_PM_LINKST_IN_L0S		0xc10
+#define PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L1	0xc84
+#define PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L2	0xc88
+
+/* PARF_SYS_CTRL register fields */
+#define MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN	BIT(29)
+#define MST_WAKEUP_EN				BIT(13)
+#define SLV_WAKEUP_EN				BIT(12)
+#define MSTR_ACLK_CGC_DIS			BIT(10)
+#define SLV_ACLK_CGC_DIS			BIT(9)
+#define CORE_CLK_CGC_DIS			BIT(6)
+#define AUX_PWR_DET				BIT(4)
+#define L23_CLK_RMV_DIS				BIT(2)
+#define L1_CLK_RMV_DIS				BIT(1)
+
+/* PARF_PM_CTRL register fields */
+#define REQ_NOT_ENTR_L1				BIT(5)
+
+/* PARF_PCS_DEEMPH register fields */
+#define PCS_DEEMPH_TX_DEEMPH_GEN1(x)		FIELD_PREP(GENMASK(21, 16), x)
+#define PCS_DEEMPH_TX_DEEMPH_GEN2_3_5DB(x)	FIELD_PREP(GENMASK(13, 8), x)
+#define PCS_DEEMPH_TX_DEEMPH_GEN2_6DB(x)	FIELD_PREP(GENMASK(5, 0), x)
+
+/* PARF_PCS_SWING register fields */
+#define PCS_SWING_TX_SWING_FULL(x)		FIELD_PREP(GENMASK(14, 8), x)
+#define PCS_SWING_TX_SWING_LOW(x)		FIELD_PREP(GENMASK(6, 0), x)
+
+/* PARF_PHY_CTRL register fields */
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK	GENMASK(20, 16)
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET(x)		FIELD_PREP(PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK, x)
+#define PHY_TEST_PWR_DOWN			BIT(0)
+
+/* PARF_PHY_REFCLK register fields */
+#define PHY_REFCLK_SSP_EN			BIT(16)
+#define PHY_REFCLK_USE_PAD			BIT(12)
+
+/* PARF_CONFIG_BITS register fields */
+#define PHY_RX0_EQ(x)				FIELD_PREP(GENMASK(26, 24), x)
+
+/* PARF_SLV_ADDR_SPACE_SIZE register value */
+#define SLV_ADDR_SPACE_SZ			0x10000000
+
+/* PARF_MHI_CLOCK_RESET_CTRL register fields */
+#define AHB_CLK_EN				BIT(0)
+#define MSTR_AXI_CLK_EN				BIT(1)
+#define BYPASS					BIT(4)
+
+/* PARF_AXI_MSTR_WR_ADDR_HALT register fields */
+#define EN					BIT(31)
+
+/* PARF_LTSSM register fields */
+#define LTSSM_EN				BIT(8)
+
+/* PARF_DEVICE_TYPE register fields */
+#define DEVICE_TYPE_RC				0x4
+
+/* ELBI_SYS_CTRL register fields */
+#define ELBI_SYS_CTRL_LT_ENABLE			BIT(0)
+
+/* AXI_MSTR_RESP_COMP_CTRL0 register fields */
+#define CFG_REMOTE_RD_REQ_BRIDGE_SIZE_2K	0x4
+#define CFG_REMOTE_RD_REQ_BRIDGE_SIZE_4K	0x5
+
+/* AXI_MSTR_RESP_COMP_CTRL1 register fields */
+#define CFG_BRIDGE_SB_INIT			BIT(0)
+
+/* PCI_EXP_SLTCAP register fields */
+#define PCIE_CAP_SLOT_POWER_LIMIT_VAL		FIELD_PREP(PCI_EXP_SLTCAP_SPLV, 250)
+#define PCIE_CAP_SLOT_POWER_LIMIT_SCALE		FIELD_PREP(PCI_EXP_SLTCAP_SPLS, 1)
+#define PCIE_CAP_SLOT_VAL			(PCI_EXP_SLTCAP_ABP | \
+						PCI_EXP_SLTCAP_PCP | \
+						PCI_EXP_SLTCAP_MRLSP | \
+						PCI_EXP_SLTCAP_AIP | \
+						PCI_EXP_SLTCAP_PIP | \
+						PCI_EXP_SLTCAP_HPS | \
+						PCI_EXP_SLTCAP_EIP | \
+						PCIE_CAP_SLOT_POWER_LIMIT_VAL | \
+						PCIE_CAP_SLOT_POWER_LIMIT_SCALE)
+
+#define PERST_DELAY_US				1000
+
+#define QCOM_PCIE_CRC8_POLYNOMIAL		(BIT(2) | BIT(1) | BIT(0))
+
+#define QCOM_PCIE_1_0_0_MAX_CLOCKS		4
+struct qcom_pcie_resources_1_0_0 {
+	struct clk_bulk_data clks[QCOM_PCIE_1_0_0_MAX_CLOCKS];
+	struct reset_control *core;
+	struct regulator *vdda;
+};
+
+#define QCOM_PCIE_2_1_0_MAX_CLOCKS		5
+#define QCOM_PCIE_2_1_0_MAX_RESETS		6
+#define QCOM_PCIE_2_1_0_MAX_SUPPLY		3
+struct qcom_pcie_resources_2_1_0 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_1_0_MAX_CLOCKS];
+	struct reset_control_bulk_data resets[QCOM_PCIE_2_1_0_MAX_RESETS];
+	int num_resets;
+	struct regulator_bulk_data supplies[QCOM_PCIE_2_1_0_MAX_SUPPLY];
+};
+
+#define QCOM_PCIE_2_3_2_MAX_CLOCKS		4
+#define QCOM_PCIE_2_3_2_MAX_SUPPLY		2
+struct qcom_pcie_resources_2_3_2 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_3_2_MAX_CLOCKS];
+	struct regulator_bulk_data supplies[QCOM_PCIE_2_3_2_MAX_SUPPLY];
+};
+
+#define QCOM_PCIE_2_3_3_MAX_CLOCKS		5
+#define QCOM_PCIE_2_3_3_MAX_RESETS		7
+struct qcom_pcie_resources_2_3_3 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_3_3_MAX_CLOCKS];
+	struct reset_control_bulk_data rst[QCOM_PCIE_2_3_3_MAX_RESETS];
+};
+
+#define QCOM_PCIE_2_4_0_MAX_CLOCKS		4
+#define QCOM_PCIE_2_4_0_MAX_RESETS		12
+struct qcom_pcie_resources_2_4_0 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_4_0_MAX_CLOCKS];
+	int num_clks;
+	struct reset_control_bulk_data resets[QCOM_PCIE_2_4_0_MAX_RESETS];
+	int num_resets;
+};
+
+#define QCOM_PCIE_2_7_0_MAX_CLOCKS		15
+#define QCOM_PCIE_2_7_0_MAX_SUPPLIES		2
+struct qcom_pcie_resources_2_7_0 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_7_0_MAX_CLOCKS];
+	int num_clks;
+	struct regulator_bulk_data supplies[QCOM_PCIE_2_7_0_MAX_SUPPLIES];
+	struct reset_control *rst;
+};
+
+#define QCOM_PCIE_2_9_0_MAX_CLOCKS		5
+struct qcom_pcie_resources_2_9_0 {
+	struct clk_bulk_data clks[QCOM_PCIE_2_9_0_MAX_CLOCKS];
+	struct reset_control *rst;
+};
+
+union qcom_pcie_resources {
+	struct qcom_pcie_resources_1_0_0 v1_0_0;
+	struct qcom_pcie_resources_2_1_0 v2_1_0;
+	struct qcom_pcie_resources_2_3_2 v2_3_2;
+	struct qcom_pcie_resources_2_3_3 v2_3_3;
+	struct qcom_pcie_resources_2_4_0 v2_4_0;
+	struct qcom_pcie_resources_2_7_0 v2_7_0;
+	struct qcom_pcie_resources_2_9_0 v2_9_0;
+};
+
+struct qcom_pcie;
+
+struct qcom_pcie_ops {
+	int (*get_resources)(struct qcom_pcie *pcie);
+	int (*init)(struct qcom_pcie *pcie);
+	int (*post_init)(struct qcom_pcie *pcie);
+	void (*deinit)(struct qcom_pcie *pcie);
+	void (*ltssm_enable)(struct qcom_pcie *pcie);
+	int (*config_sid)(struct qcom_pcie *pcie);
+};
+
+struct qcom_pcie_cfg {
+	const struct qcom_pcie_ops *ops;
+};
+
+struct qcom_pcie {
+	struct dw_pcie *pci;
+	void __iomem *parf;			/* DT parf */
+	void __iomem *elbi;			/* DT elbi */
+	void __iomem *mhi;
+	union qcom_pcie_resources res;
+	struct phy *phy;
+	struct gpio_desc *reset;
+	struct icc_path *icc_mem;
+	const struct qcom_pcie_cfg *cfg;
+	struct dentry *debugfs;
+	bool suspended;
+};
+
+#define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
+
+static void qcom_ep_reset_assert(struct qcom_pcie *pcie)
+{
+	gpiod_set_value_cansleep(pcie->reset, 1);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static void qcom_ep_reset_deassert(struct qcom_pcie *pcie)
+{
+	/* Ensure that PERST has been asserted for at least 100 ms */
+	msleep(100);
+	gpiod_set_value_cansleep(pcie->reset, 0);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static int qcom_pcie_start_link(struct dw_pcie *pci)
+{
+	struct qcom_pcie *pcie = to_qcom_pcie(pci);
+
+	/* Enable Link Training state machine */
+	if (pcie->cfg->ops->ltssm_enable)
+		pcie->cfg->ops->ltssm_enable(pcie);
+
+	return 0;
+}
+
+static void qcom_pcie_clear_hpc(struct dw_pcie *pci)
+{
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 val;
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	val = readl(pci->dbi_base + offset + PCI_EXP_SLTCAP);
+	val &= ~PCI_EXP_SLTCAP_HPC;
+	writel(val, pci->dbi_base + offset + PCI_EXP_SLTCAP);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+}
+
+static void qcom_pcie_2_1_0_ltssm_enable(struct qcom_pcie *pcie)
+{
+	u32 val;
+
+	/* enable link training */
+	val = readl(pcie->elbi + ELBI_SYS_CTRL);
+	val |= ELBI_SYS_CTRL_LT_ENABLE;
+	writel(val, pcie->elbi + ELBI_SYS_CTRL);
+}
+
+static int qcom_pcie_get_resources_2_1_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	bool is_apq = of_device_is_compatible(dev->of_node, "qcom,pcie-apq8064");
+	int ret;
+
+	res->supplies[0].supply = "vdda";
+	res->supplies[1].supply = "vdda_phy";
+	res->supplies[2].supply = "vdda_refclk";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(res->supplies),
+				      res->supplies);
+	if (ret)
+		return ret;
+
+	res->clks[0].id = "iface";
+	res->clks[1].id = "core";
+	res->clks[2].id = "phy";
+	res->clks[3].id = "aux";
+	res->clks[4].id = "ref";
+
+	/* iface, core, phy are required */
+	ret = devm_clk_bulk_get(dev, 3, res->clks);
+	if (ret < 0)
+		return ret;
+
+	/* aux, ref are optional */
+	ret = devm_clk_bulk_get_optional(dev, 2, res->clks + 3);
+	if (ret < 0)
+		return ret;
+
+	res->resets[0].id = "pci";
+	res->resets[1].id = "axi";
+	res->resets[2].id = "ahb";
+	res->resets[3].id = "por";
+	res->resets[4].id = "phy";
+	res->resets[5].id = "ext";
+
+	/* ext is optional on APQ8016 */
+	res->num_resets = is_apq ? 5 : 6;
+	ret = devm_reset_control_bulk_get_exclusive(dev, res->num_resets, res->resets);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_1_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
+
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+	reset_control_bulk_assert(res->num_resets, res->resets);
+
+	writel(1, pcie->parf + PARF_PHY_CTRL);
+
+	regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+}
+
+static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	/* reset the PCIe interface as uboot can leave it undefined state */
+	ret = reset_control_bulk_assert(res->num_resets, res->resets);
+	if (ret < 0) {
+		dev_err(dev, "cannot assert resets\n");
+		return ret;
+	}
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(res->supplies), res->supplies);
+	if (ret < 0) {
+		dev_err(dev, "cannot enable regulators\n");
+		return ret;
+	}
+
+	ret = reset_control_bulk_deassert(res->num_resets, res->resets);
+	if (ret < 0) {
+		dev_err(dev, "cannot deassert resets\n");
+		regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_post_init_2_1_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	struct device_node *node = dev->of_node;
+	u32 val;
+	int ret;
+
+	/* enable PCIe clocks and resets */
+	val = readl(pcie->parf + PARF_PHY_CTRL);
+	val &= ~PHY_TEST_PWR_DOWN;
+	writel(val, pcie->parf + PARF_PHY_CTRL);
+
+	ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+	if (ret)
+		return ret;
+
+	if (of_device_is_compatible(node, "qcom,pcie-ipq8064") ||
+	    of_device_is_compatible(node, "qcom,pcie-ipq8064-v2")) {
+		writel(PCS_DEEMPH_TX_DEEMPH_GEN1(24) |
+			       PCS_DEEMPH_TX_DEEMPH_GEN2_3_5DB(24) |
+			       PCS_DEEMPH_TX_DEEMPH_GEN2_6DB(34),
+		       pcie->parf + PARF_PCS_DEEMPH);
+		writel(PCS_SWING_TX_SWING_FULL(120) |
+			       PCS_SWING_TX_SWING_LOW(120),
+		       pcie->parf + PARF_PCS_SWING);
+		writel(PHY_RX0_EQ(4), pcie->parf + PARF_CONFIG_BITS);
+	}
+
+	if (of_device_is_compatible(node, "qcom,pcie-ipq8064")) {
+		/* set TX termination offset */
+		val = readl(pcie->parf + PARF_PHY_CTRL);
+		val &= ~PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK;
+		val |= PHY_CTRL_PHY_TX0_TERM_OFFSET(7);
+		writel(val, pcie->parf + PARF_PHY_CTRL);
+	}
+
+	/* enable external reference clock */
+	val = readl(pcie->parf + PARF_PHY_REFCLK);
+	/* USE_PAD is required only for ipq806x */
+	if (!of_device_is_compatible(node, "qcom,pcie-apq8064"))
+		val &= ~PHY_REFCLK_USE_PAD;
+	val |= PHY_REFCLK_SSP_EN;
+	writel(val, pcie->parf + PARF_PHY_REFCLK);
+
+	/* wait for clock acquisition */
+	usleep_range(1000, 1500);
+
+	/* Set the Max TLP size to 2K, instead of using default of 4K */
+	writel(CFG_REMOTE_RD_REQ_BRIDGE_SIZE_2K,
+	       pci->dbi_base + AXI_MSTR_RESP_COMP_CTRL0);
+	writel(CFG_BRIDGE_SB_INIT,
+	       pci->dbi_base + AXI_MSTR_RESP_COMP_CTRL1);
+
+	qcom_pcie_clear_hpc(pcie->pci);
+
+	return 0;
+}
+
+static int qcom_pcie_get_resources_1_0_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	res->vdda = devm_regulator_get(dev, "vdda");
+	if (IS_ERR(res->vdda))
+		return PTR_ERR(res->vdda);
+
+	res->clks[0].id = "iface";
+	res->clks[1].id = "aux";
+	res->clks[2].id = "master_bus";
+	res->clks[3].id = "slave_bus";
+
+	ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+	if (ret < 0)
+		return ret;
+
+	res->core = devm_reset_control_get_exclusive(dev, "core");
+	return PTR_ERR_OR_ZERO(res->core);
+}
+
+static void qcom_pcie_deinit_1_0_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
+
+	reset_control_assert(res->core);
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+	regulator_disable(res->vdda);
+}
+
+static int qcom_pcie_init_1_0_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = reset_control_deassert(res->core);
+	if (ret) {
+		dev_err(dev, "cannot deassert core reset\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable clocks\n");
+		goto err_assert_reset;
+	}
+
+	ret = regulator_enable(res->vdda);
+	if (ret) {
+		dev_err(dev, "cannot enable vdda regulator\n");
+		goto err_disable_clks;
+	}
+
+	return 0;
+
+err_disable_clks:
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+err_assert_reset:
+	reset_control_assert(res->core);
+
+	return ret;
+}
+
+static int qcom_pcie_post_init_1_0_0(struct qcom_pcie *pcie)
+{
+	/* change DBI base address */
+	writel(0, pcie->parf + PARF_DBI_BASE_ADDR);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		u32 val = readl(pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT);
+
+		val |= EN;
+		writel(val, pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT);
+	}
+
+	qcom_pcie_clear_hpc(pcie->pci);
+
+	return 0;
+}
+
+static void qcom_pcie_2_3_2_ltssm_enable(struct qcom_pcie *pcie)
+{
+	u32 val;
+
+	/* enable link training */
+	val = readl(pcie->parf + PARF_LTSSM);
+	val |= LTSSM_EN;
+	writel(val, pcie->parf + PARF_LTSSM);
+}
+
+static int qcom_pcie_get_resources_2_3_2(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	res->supplies[0].supply = "vdda";
+	res->supplies[1].supply = "vddpe-3v3";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(res->supplies),
+				      res->supplies);
+	if (ret)
+		return ret;
+
+	res->clks[0].id = "aux";
+	res->clks[1].id = "cfg";
+	res->clks[2].id = "bus_master";
+	res->clks[3].id = "bus_slave";
+
+	ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_3_2(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
+
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+	regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+}
+
+static int qcom_pcie_init_2_3_2(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(res->supplies), res->supplies);
+	if (ret < 0) {
+		dev_err(dev, "cannot enable regulators\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable clocks\n");
+		regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_post_init_2_3_2(struct qcom_pcie *pcie)
+{
+	u32 val;
+
+	/* enable PCIe clocks and resets */
+	val = readl(pcie->parf + PARF_PHY_CTRL);
+	val &= ~PHY_TEST_PWR_DOWN;
+	writel(val, pcie->parf + PARF_PHY_CTRL);
+
+	/* change DBI base address */
+	writel(0, pcie->parf + PARF_DBI_BASE_ADDR);
+
+	/* MAC PHY_POWERDOWN MUX DISABLE  */
+	val = readl(pcie->parf + PARF_SYS_CTRL);
+	val &= ~MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN;
+	writel(val, pcie->parf + PARF_SYS_CTRL);
+
+	val = readl(pcie->parf + PARF_MHI_CLOCK_RESET_CTRL);
+	val |= BYPASS;
+	writel(val, pcie->parf + PARF_MHI_CLOCK_RESET_CTRL);
+
+	val = readl(pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT_V2);
+	val |= EN;
+	writel(val, pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT_V2);
+
+	qcom_pcie_clear_hpc(pcie->pci);
+
+	return 0;
+}
+
+static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	bool is_ipq = of_device_is_compatible(dev->of_node, "qcom,pcie-ipq4019");
+	int ret;
+
+	res->clks[0].id = "aux";
+	res->clks[1].id = "master_bus";
+	res->clks[2].id = "slave_bus";
+	res->clks[3].id = "iface";
+
+	/* qcom,pcie-ipq4019 is defined without "iface" */
+	res->num_clks = is_ipq ? 3 : 4;
+
+	ret = devm_clk_bulk_get(dev, res->num_clks, res->clks);
+	if (ret < 0)
+		return ret;
+
+	res->resets[0].id = "axi_m";
+	res->resets[1].id = "axi_s";
+	res->resets[2].id = "axi_m_sticky";
+	res->resets[3].id = "pipe_sticky";
+	res->resets[4].id = "pwr";
+	res->resets[5].id = "ahb";
+	res->resets[6].id = "pipe";
+	res->resets[7].id = "axi_m_vmid";
+	res->resets[8].id = "axi_s_xpu";
+	res->resets[9].id = "parf";
+	res->resets[10].id = "phy";
+	res->resets[11].id = "phy_ahb";
+
+	res->num_resets = is_ipq ? 12 : 6;
+
+	ret = devm_reset_control_bulk_get_exclusive(dev, res->num_resets, res->resets);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_4_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
+
+	reset_control_bulk_assert(res->num_resets, res->resets);
+	clk_bulk_disable_unprepare(res->num_clks, res->clks);
+}
+
+static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = reset_control_bulk_assert(res->num_resets, res->resets);
+	if (ret < 0) {
+		dev_err(dev, "cannot assert resets\n");
+		return ret;
+	}
+
+	usleep_range(10000, 12000);
+
+	ret = reset_control_bulk_deassert(res->num_resets, res->resets);
+	if (ret < 0) {
+		dev_err(dev, "cannot deassert resets\n");
+		return ret;
+	}
+
+	usleep_range(10000, 12000);
+
+	ret = clk_bulk_prepare_enable(res->num_clks, res->clks);
+	if (ret) {
+		reset_control_bulk_assert(res->num_resets, res->resets);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_get_resources_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	res->clks[0].id = "iface";
+	res->clks[1].id = "axi_m";
+	res->clks[2].id = "axi_s";
+	res->clks[3].id = "ahb";
+	res->clks[4].id = "aux";
+
+	ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+	if (ret < 0)
+		return ret;
+
+	res->rst[0].id = "axi_m";
+	res->rst[1].id = "axi_s";
+	res->rst[2].id = "pipe";
+	res->rst[3].id = "axi_m_sticky";
+	res->rst[4].id = "sticky";
+	res->rst[5].id = "ahb";
+	res->rst[6].id = "sleep";
+
+	ret = devm_reset_control_bulk_get_exclusive(dev, ARRAY_SIZE(res->rst), res->rst);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+}
+
+static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	ret = reset_control_bulk_assert(ARRAY_SIZE(res->rst), res->rst);
+	if (ret < 0) {
+		dev_err(dev, "cannot assert resets\n");
+		return ret;
+	}
+
+	usleep_range(2000, 2500);
+
+	ret = reset_control_bulk_deassert(ARRAY_SIZE(res->rst), res->rst);
+	if (ret < 0) {
+		dev_err(dev, "cannot deassert resets\n");
+		return ret;
+	}
+
+	/*
+	 * Don't have a way to see if the reset has completed.
+	 * Wait for some time.
+	 */
+	usleep_range(2000, 2500);
+
+	ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable clocks\n");
+		goto err_assert_resets;
+	}
+
+	return 0;
+
+err_assert_resets:
+	/*
+	 * Not checking for failure, will anyway return
+	 * the original failure in 'ret'.
+	 */
+	reset_control_bulk_assert(ARRAY_SIZE(res->rst), res->rst);
+
+	return ret;
+}
+
+static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 val;
+
+	writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE);
+
+	val = readl(pcie->parf + PARF_PHY_CTRL);
+	val &= ~PHY_TEST_PWR_DOWN;
+	writel(val, pcie->parf + PARF_PHY_CTRL);
+
+	writel(0, pcie->parf + PARF_DBI_BASE_ADDR);
+
+	writel(MST_WAKEUP_EN | SLV_WAKEUP_EN | MSTR_ACLK_CGC_DIS
+		| SLV_ACLK_CGC_DIS | CORE_CLK_CGC_DIS |
+		AUX_PWR_DET | L23_CLK_RMV_DIS | L1_CLK_RMV_DIS,
+		pcie->parf + PARF_SYS_CTRL);
+	writel(0, pcie->parf + PARF_Q2A_FLUSH);
+
+	writel(PCI_COMMAND_MASTER, pci->dbi_base + PCI_COMMAND);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	writel(PCIE_CAP_SLOT_VAL, pci->dbi_base + offset + PCI_EXP_SLTCAP);
+
+	val = readl(pci->dbi_base + offset + PCI_EXP_LNKCAP);
+	val &= ~PCI_EXP_LNKCAP_ASPMS;
+	writel(val, pci->dbi_base + offset + PCI_EXP_LNKCAP);
+
+	writel(PCI_EXP_DEVCTL2_COMP_TMOUT_DIS, pci->dbi_base + offset +
+		PCI_EXP_DEVCTL2);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	return 0;
+}
+
+static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	unsigned int num_clks, num_opt_clks;
+	unsigned int idx;
+	int ret;
+
+	res->rst = devm_reset_control_array_get_exclusive(dev);
+	if (IS_ERR(res->rst))
+		return PTR_ERR(res->rst);
+
+	res->supplies[0].supply = "vdda";
+	res->supplies[1].supply = "vddpe-3v3";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(res->supplies),
+				      res->supplies);
+	if (ret)
+		return ret;
+
+	idx = 0;
+	res->clks[idx++].id = "aux";
+	res->clks[idx++].id = "cfg";
+	res->clks[idx++].id = "bus_master";
+	res->clks[idx++].id = "bus_slave";
+	res->clks[idx++].id = "slave_q2a";
+
+	num_clks = idx;
+
+	ret = devm_clk_bulk_get(dev, num_clks, res->clks);
+	if (ret < 0)
+		return ret;
+
+	res->clks[idx++].id = "tbu";
+	res->clks[idx++].id = "ddrss_sf_tbu";
+	res->clks[idx++].id = "aggre0";
+	res->clks[idx++].id = "aggre1";
+	res->clks[idx++].id = "noc_aggr";
+	res->clks[idx++].id = "noc_aggr_4";
+	res->clks[idx++].id = "noc_aggr_south_sf";
+	res->clks[idx++].id = "cnoc_qx";
+	res->clks[idx++].id = "sleep";
+	res->clks[idx++].id = "cnoc_sf_axi";
+
+	num_opt_clks = idx - num_clks;
+	res->num_clks = idx;
+
+	ret = devm_clk_bulk_get_optional(dev, num_opt_clks, res->clks + num_clks);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	u32 val;
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(res->supplies), res->supplies);
+	if (ret < 0) {
+		dev_err(dev, "cannot enable regulators\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(res->num_clks, res->clks);
+	if (ret < 0)
+		goto err_disable_regulators;
+
+	ret = reset_control_assert(res->rst);
+	if (ret) {
+		dev_err(dev, "reset assert failed (%d)\n", ret);
+		goto err_disable_clocks;
+	}
+
+	usleep_range(1000, 1500);
+
+	ret = reset_control_deassert(res->rst);
+	if (ret) {
+		dev_err(dev, "reset deassert failed (%d)\n", ret);
+		goto err_disable_clocks;
+	}
+
+	/* Wait for reset to complete, required on SM8450 */
+	usleep_range(1000, 1500);
+
+	/* configure PCIe to RC mode */
+	writel(DEVICE_TYPE_RC, pcie->parf + PARF_DEVICE_TYPE);
+
+	/* enable PCIe clocks and resets */
+	val = readl(pcie->parf + PARF_PHY_CTRL);
+	val &= ~PHY_TEST_PWR_DOWN;
+	writel(val, pcie->parf + PARF_PHY_CTRL);
+
+	/* change DBI base address */
+	writel(0, pcie->parf + PARF_DBI_BASE_ADDR);
+
+	/* MAC PHY_POWERDOWN MUX DISABLE  */
+	val = readl(pcie->parf + PARF_SYS_CTRL);
+	val &= ~MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN;
+	writel(val, pcie->parf + PARF_SYS_CTRL);
+
+	val = readl(pcie->parf + PARF_MHI_CLOCK_RESET_CTRL);
+	val |= BYPASS;
+	writel(val, pcie->parf + PARF_MHI_CLOCK_RESET_CTRL);
+
+	/* Enable L1 and L1SS */
+	val = readl(pcie->parf + PARF_PM_CTRL);
+	val &= ~REQ_NOT_ENTR_L1;
+	writel(val, pcie->parf + PARF_PM_CTRL);
+
+	val = readl(pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT_V2);
+	val |= EN;
+	writel(val, pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT_V2);
+
+	return 0;
+err_disable_clocks:
+	clk_bulk_disable_unprepare(res->num_clks, res->clks);
+err_disable_regulators:
+	regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+
+	return ret;
+}
+
+static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie)
+{
+	qcom_pcie_clear_hpc(pcie->pci);
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0;
+
+	clk_bulk_disable_unprepare(res->num_clks, res->clks);
+
+	regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
+}
+
+static int qcom_pcie_config_sid_1_9_0(struct qcom_pcie *pcie)
+{
+	/* iommu map structure */
+	struct {
+		u32 bdf;
+		u32 phandle;
+		u32 smmu_sid;
+		u32 smmu_sid_len;
+	} *map;
+	void __iomem *bdf_to_sid_base = pcie->parf + PARF_BDF_TO_SID_TABLE_N;
+	struct device *dev = pcie->pci->dev;
+	u8 qcom_pcie_crc8_table[CRC8_TABLE_SIZE];
+	int i, nr_map, size = 0;
+	u32 smmu_sid_base;
+
+	of_get_property(dev->of_node, "iommu-map", &size);
+	if (!size)
+		return 0;
+
+	map = kzalloc(size, GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	of_property_read_u32_array(dev->of_node, "iommu-map", (u32 *)map,
+				   size / sizeof(u32));
+
+	nr_map = size / (sizeof(*map));
+
+	crc8_populate_msb(qcom_pcie_crc8_table, QCOM_PCIE_CRC8_POLYNOMIAL);
+
+	/* Registers need to be zero out first */
+	memset_io(bdf_to_sid_base, 0, CRC8_TABLE_SIZE * sizeof(u32));
+
+	/* Extract the SMMU SID base from the first entry of iommu-map */
+	smmu_sid_base = map[0].smmu_sid;
+
+	/* Look for an available entry to hold the mapping */
+	for (i = 0; i < nr_map; i++) {
+		__be16 bdf_be = cpu_to_be16(map[i].bdf);
+		u32 val;
+		u8 hash;
+
+		hash = crc8(qcom_pcie_crc8_table, (u8 *)&bdf_be, sizeof(bdf_be), 0);
+
+		val = readl(bdf_to_sid_base + hash * sizeof(u32));
+
+		/* If the register is already populated, look for next available entry */
+		while (val) {
+			u8 current_hash = hash++;
+			u8 next_mask = 0xff;
+
+			/* If NEXT field is NULL then update it with next hash */
+			if (!(val & next_mask)) {
+				val |= (u32)hash;
+				writel(val, bdf_to_sid_base + current_hash * sizeof(u32));
+			}
+
+			val = readl(bdf_to_sid_base + hash * sizeof(u32));
+		}
+
+		/* BDF [31:16] | SID [15:8] | NEXT [7:0] */
+		val = map[i].bdf << 16 | (map[i].smmu_sid - smmu_sid_base) << 8 | 0;
+		writel(val, bdf_to_sid_base + hash * sizeof(u32));
+	}
+
+	kfree(map);
+
+	return 0;
+}
+
+static int qcom_pcie_get_resources_2_9_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int ret;
+
+	res->clks[0].id = "iface";
+	res->clks[1].id = "axi_m";
+	res->clks[2].id = "axi_s";
+	res->clks[3].id = "axi_bridge";
+	res->clks[4].id = "rchng";
+
+	ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+	if (ret < 0)
+		return ret;
+
+	res->rst = devm_reset_control_array_get_exclusive(dev);
+	if (IS_ERR(res->rst))
+		return PTR_ERR(res->rst);
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_9_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0;
+
+	clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+}
+
+static int qcom_pcie_init_2_9_0(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_9_0 *res = &pcie->res.v2_9_0;
+	struct device *dev = pcie->pci->dev;
+	int ret;
+
+	ret = reset_control_assert(res->rst);
+	if (ret) {
+		dev_err(dev, "reset assert failed (%d)\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Delay periods before and after reset deassert are working values
+	 * from downstream Codeaurora kernel
+	 */
+	usleep_range(2000, 2500);
+
+	ret = reset_control_deassert(res->rst);
+	if (ret) {
+		dev_err(dev, "reset deassert failed (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(2000, 2500);
+
+	return clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+}
+
+static int qcom_pcie_post_init_2_9_0(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 val;
+	int i;
+
+	writel(SLV_ADDR_SPACE_SZ,
+		pcie->parf + PARF_SLV_ADDR_SPACE_SIZE);
+
+	val = readl(pcie->parf + PARF_PHY_CTRL);
+	val &= ~PHY_TEST_PWR_DOWN;
+	writel(val, pcie->parf + PARF_PHY_CTRL);
+
+	writel(0, pcie->parf + PARF_DBI_BASE_ADDR);
+
+	writel(DEVICE_TYPE_RC, pcie->parf + PARF_DEVICE_TYPE);
+	writel(BYPASS | MSTR_AXI_CLK_EN | AHB_CLK_EN,
+		pcie->parf + PARF_MHI_CLOCK_RESET_CTRL);
+	writel(GEN3_RELATED_OFF_RXEQ_RGRDLESS_RXTS |
+		GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL,
+		pci->dbi_base + GEN3_RELATED_OFF);
+
+	writel(MST_WAKEUP_EN | SLV_WAKEUP_EN | MSTR_ACLK_CGC_DIS |
+		SLV_ACLK_CGC_DIS | CORE_CLK_CGC_DIS |
+		AUX_PWR_DET | L23_CLK_RMV_DIS | L1_CLK_RMV_DIS,
+		pcie->parf + PARF_SYS_CTRL);
+
+	writel(0, pcie->parf + PARF_Q2A_FLUSH);
+
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	writel(PCIE_CAP_SLOT_VAL, pci->dbi_base + offset + PCI_EXP_SLTCAP);
+
+	val = readl(pci->dbi_base + offset + PCI_EXP_LNKCAP);
+	val &= ~PCI_EXP_LNKCAP_ASPMS;
+	writel(val, pci->dbi_base + offset + PCI_EXP_LNKCAP);
+
+	writel(PCI_EXP_DEVCTL2_COMP_TMOUT_DIS, pci->dbi_base + offset +
+			PCI_EXP_DEVCTL2);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+
+	for (i = 0; i < 256; i++)
+		writel(0, pcie->parf + PARF_BDF_TO_SID_TABLE_N + (4 * i));
+
+	return 0;
+}
+
+static int qcom_pcie_link_up(struct dw_pcie *pci)
+{
+	u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u16 val = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
+
+	return !!(val & PCI_EXP_LNKSTA_DLLLA);
+}
+
+static int qcom_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct qcom_pcie *pcie = to_qcom_pcie(pci);
+	int ret;
+
+	qcom_ep_reset_assert(pcie);
+
+	ret = pcie->cfg->ops->init(pcie);
+	if (ret)
+		return ret;
+
+	ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC);
+	if (ret)
+		goto err_deinit;
+
+	ret = phy_power_on(pcie->phy);
+	if (ret)
+		goto err_deinit;
+
+	if (pcie->cfg->ops->post_init) {
+		ret = pcie->cfg->ops->post_init(pcie);
+		if (ret)
+			goto err_disable_phy;
+	}
+
+	qcom_ep_reset_deassert(pcie);
+
+	if (pcie->cfg->ops->config_sid) {
+		ret = pcie->cfg->ops->config_sid(pcie);
+		if (ret)
+			goto err_assert_reset;
+	}
+
+	return 0;
+
+err_assert_reset:
+	qcom_ep_reset_assert(pcie);
+err_disable_phy:
+	phy_power_off(pcie->phy);
+err_deinit:
+	pcie->cfg->ops->deinit(pcie);
+
+	return ret;
+}
+
+static void qcom_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct qcom_pcie *pcie = to_qcom_pcie(pci);
+
+	qcom_ep_reset_assert(pcie);
+	phy_power_off(pcie->phy);
+	pcie->cfg->ops->deinit(pcie);
+}
+
+static const struct dw_pcie_host_ops qcom_pcie_dw_ops = {
+	.host_init	= qcom_pcie_host_init,
+	.host_deinit	= qcom_pcie_host_deinit,
+};
+
+/* Qcom IP rev.: 2.1.0	Synopsys IP rev.: 4.01a */
+static const struct qcom_pcie_ops ops_2_1_0 = {
+	.get_resources = qcom_pcie_get_resources_2_1_0,
+	.init = qcom_pcie_init_2_1_0,
+	.post_init = qcom_pcie_post_init_2_1_0,
+	.deinit = qcom_pcie_deinit_2_1_0,
+	.ltssm_enable = qcom_pcie_2_1_0_ltssm_enable,
+};
+
+/* Qcom IP rev.: 1.0.0	Synopsys IP rev.: 4.11a */
+static const struct qcom_pcie_ops ops_1_0_0 = {
+	.get_resources = qcom_pcie_get_resources_1_0_0,
+	.init = qcom_pcie_init_1_0_0,
+	.post_init = qcom_pcie_post_init_1_0_0,
+	.deinit = qcom_pcie_deinit_1_0_0,
+	.ltssm_enable = qcom_pcie_2_1_0_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.3.2	Synopsys IP rev.: 4.21a */
+static const struct qcom_pcie_ops ops_2_3_2 = {
+	.get_resources = qcom_pcie_get_resources_2_3_2,
+	.init = qcom_pcie_init_2_3_2,
+	.post_init = qcom_pcie_post_init_2_3_2,
+	.deinit = qcom_pcie_deinit_2_3_2,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.4.0	Synopsys IP rev.: 4.20a */
+static const struct qcom_pcie_ops ops_2_4_0 = {
+	.get_resources = qcom_pcie_get_resources_2_4_0,
+	.init = qcom_pcie_init_2_4_0,
+	.post_init = qcom_pcie_post_init_2_3_2,
+	.deinit = qcom_pcie_deinit_2_4_0,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.3.3	Synopsys IP rev.: 4.30a */
+static const struct qcom_pcie_ops ops_2_3_3 = {
+	.get_resources = qcom_pcie_get_resources_2_3_3,
+	.init = qcom_pcie_init_2_3_3,
+	.post_init = qcom_pcie_post_init_2_3_3,
+	.deinit = qcom_pcie_deinit_2_3_3,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.7.0	Synopsys IP rev.: 4.30a */
+static const struct qcom_pcie_ops ops_2_7_0 = {
+	.get_resources = qcom_pcie_get_resources_2_7_0,
+	.init = qcom_pcie_init_2_7_0,
+	.post_init = qcom_pcie_post_init_2_7_0,
+	.deinit = qcom_pcie_deinit_2_7_0,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 1.9.0 */
+static const struct qcom_pcie_ops ops_1_9_0 = {
+	.get_resources = qcom_pcie_get_resources_2_7_0,
+	.init = qcom_pcie_init_2_7_0,
+	.post_init = qcom_pcie_post_init_2_7_0,
+	.deinit = qcom_pcie_deinit_2_7_0,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+	.config_sid = qcom_pcie_config_sid_1_9_0,
+};
+
+/* Qcom IP rev.: 2.9.0  Synopsys IP rev.: 5.00a */
+static const struct qcom_pcie_ops ops_2_9_0 = {
+	.get_resources = qcom_pcie_get_resources_2_9_0,
+	.init = qcom_pcie_init_2_9_0,
+	.post_init = qcom_pcie_post_init_2_9_0,
+	.deinit = qcom_pcie_deinit_2_9_0,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+static const struct qcom_pcie_cfg cfg_1_0_0 = {
+	.ops = &ops_1_0_0,
+};
+
+static const struct qcom_pcie_cfg cfg_1_9_0 = {
+	.ops = &ops_1_9_0,
+};
+
+static const struct qcom_pcie_cfg cfg_2_1_0 = {
+	.ops = &ops_2_1_0,
+};
+
+static const struct qcom_pcie_cfg cfg_2_3_2 = {
+	.ops = &ops_2_3_2,
+};
+
+static const struct qcom_pcie_cfg cfg_2_3_3 = {
+	.ops = &ops_2_3_3,
+};
+
+static const struct qcom_pcie_cfg cfg_2_4_0 = {
+	.ops = &ops_2_4_0,
+};
+
+static const struct qcom_pcie_cfg cfg_2_7_0 = {
+	.ops = &ops_2_7_0,
+};
+
+static const struct qcom_pcie_cfg cfg_2_9_0 = {
+	.ops = &ops_2_9_0,
+};
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = qcom_pcie_link_up,
+	.start_link = qcom_pcie_start_link,
+};
+
+static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	int ret;
+
+	pcie->icc_mem = devm_of_icc_get(pci->dev, "pcie-mem");
+	if (IS_ERR(pcie->icc_mem))
+		return PTR_ERR(pcie->icc_mem);
+
+	/*
+	 * Some Qualcomm platforms require interconnect bandwidth constraints
+	 * to be set before enabling interconnect clocks.
+	 *
+	 * Set an initial peak bandwidth corresponding to single-lane Gen 1
+	 * for the pcie-mem path.
+	 */
+	ret = icc_set_bw(pcie->icc_mem, 0, MBps_to_icc(250));
+	if (ret) {
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	u32 offset, status, bw;
+	int speed, width;
+	int ret;
+
+	if (!pcie->icc_mem)
+		return;
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
+
+	/* Only update constraints if link is up. */
+	if (!(status & PCI_EXP_LNKSTA_DLLLA))
+		return;
+
+	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
+	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
+
+	switch (speed) {
+	case 1:
+		bw = MBps_to_icc(250);
+		break;
+	case 2:
+		bw = MBps_to_icc(500);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		fallthrough;
+	case 3:
+		bw = MBps_to_icc(985);
+		break;
+	}
+
+	ret = icc_set_bw(pcie->icc_mem, 0, width * bw);
+	if (ret) {
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+	}
+}
+
+static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
+{
+	struct qcom_pcie *pcie = (struct qcom_pcie *)dev_get_drvdata(s->private);
+
+	seq_printf(s, "L0s transition count: %u\n",
+		   readl_relaxed(pcie->mhi + PARF_DEBUG_CNT_PM_LINKST_IN_L0S));
+
+	seq_printf(s, "L1 transition count: %u\n",
+		   readl_relaxed(pcie->mhi + PARF_DEBUG_CNT_PM_LINKST_IN_L1));
+
+	seq_printf(s, "L1.1 transition count: %u\n",
+		   readl_relaxed(pcie->mhi + PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L1));
+
+	seq_printf(s, "L1.2 transition count: %u\n",
+		   readl_relaxed(pcie->mhi + PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L2));
+
+	seq_printf(s, "L2 transition count: %u\n",
+		   readl_relaxed(pcie->mhi + PARF_DEBUG_CNT_PM_LINKST_IN_L2));
+
+	return 0;
+}
+
+static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	char *name;
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+	if (!name)
+		return;
+
+	pcie->debugfs = debugfs_create_dir(name, NULL);
+	debugfs_create_devm_seqfile(dev, "link_transition_count", pcie->debugfs,
+				    qcom_pcie_link_transition_count);
+}
+
+static int qcom_pcie_probe(struct platform_device *pdev)
+{
+	const struct qcom_pcie_cfg *pcie_cfg;
+	struct device *dev = &pdev->dev;
+	struct qcom_pcie *pcie;
+	struct dw_pcie_rp *pp;
+	struct resource *res;
+	struct dw_pcie *pci;
+	int ret;
+
+	pcie_cfg = of_device_get_match_data(dev);
+	if (!pcie_cfg || !pcie_cfg->ops) {
+		dev_err(dev, "Invalid platform data\n");
+		return -EINVAL;
+	}
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0)
+		goto err_pm_runtime_put;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+	pp = &pci->pp;
+
+	pcie->pci = pci;
+
+	pcie->cfg = pcie_cfg;
+
+	pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_HIGH);
+	if (IS_ERR(pcie->reset)) {
+		ret = PTR_ERR(pcie->reset);
+		goto err_pm_runtime_put;
+	}
+
+	pcie->parf = devm_platform_ioremap_resource_byname(pdev, "parf");
+	if (IS_ERR(pcie->parf)) {
+		ret = PTR_ERR(pcie->parf);
+		goto err_pm_runtime_put;
+	}
+
+	pcie->elbi = devm_platform_ioremap_resource_byname(pdev, "elbi");
+	if (IS_ERR(pcie->elbi)) {
+		ret = PTR_ERR(pcie->elbi);
+		goto err_pm_runtime_put;
+	}
+
+	/* MHI region is optional */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mhi");
+	if (res) {
+		pcie->mhi = devm_ioremap_resource(dev, res);
+		if (IS_ERR(pcie->mhi)) {
+			ret = PTR_ERR(pcie->mhi);
+			goto err_pm_runtime_put;
+		}
+	}
+
+	pcie->phy = devm_phy_optional_get(dev, "pciephy");
+	if (IS_ERR(pcie->phy)) {
+		ret = PTR_ERR(pcie->phy);
+		goto err_pm_runtime_put;
+	}
+
+	ret = qcom_pcie_icc_init(pcie);
+	if (ret)
+		goto err_pm_runtime_put;
+
+	ret = pcie->cfg->ops->get_resources(pcie);
+	if (ret)
+		goto err_pm_runtime_put;
+
+	pp->ops = &qcom_pcie_dw_ops;
+
+	ret = phy_init(pcie->phy);
+	if (ret)
+		goto err_pm_runtime_put;
+
+	platform_set_drvdata(pdev, pcie);
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "cannot initialize host\n");
+		goto err_phy_exit;
+	}
+
+	qcom_pcie_icc_update(pcie);
+
+	if (pcie->mhi)
+		qcom_pcie_init_debugfs(pcie);
+
+	return 0;
+
+err_phy_exit:
+	phy_exit(pcie->phy);
+err_pm_runtime_put:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static int qcom_pcie_suspend_noirq(struct device *dev)
+{
+	struct qcom_pcie *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	/*
+	 * Set minimum bandwidth required to keep data path functional during
+	 * suspend.
+	 */
+	ret = icc_set_bw(pcie->icc_mem, 0, kBps_to_icc(1));
+	if (ret) {
+		dev_err(dev, "Failed to set interconnect bandwidth: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Turn OFF the resources only for controllers without active PCIe
+	 * devices. For controllers with active devices, the resources are kept
+	 * ON and the link is expected to be in L0/L1 (sub)states.
+	 *
+	 * Turning OFF the resources for controllers with active PCIe devices
+	 * will trigger access violation during the end of the suspend cycle,
+	 * as kernel tries to access the PCIe devices config space for masking
+	 * MSIs.
+	 *
+	 * Also, it is not desirable to put the link into L2/L3 state as that
+	 * implies VDD supply will be removed and the devices may go into
+	 * powerdown state. This will affect the lifetime of the storage devices
+	 * like NVMe.
+	 */
+	if (!dw_pcie_link_up(pcie->pci)) {
+		qcom_pcie_host_deinit(&pcie->pci->pp);
+		pcie->suspended = true;
+	}
+
+	return 0;
+}
+
+static int qcom_pcie_resume_noirq(struct device *dev)
+{
+	struct qcom_pcie *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	if (pcie->suspended) {
+		ret = qcom_pcie_host_init(&pcie->pci->pp);
+		if (ret)
+			return ret;
+
+		pcie->suspended = false;
+	}
+
+	qcom_pcie_icc_update(pcie);
+
+	return 0;
+}
+
+static const struct of_device_id qcom_pcie_match[] = {
+	{ .compatible = "qcom,pcie-apq8064", .data = &cfg_2_1_0 },
+	{ .compatible = "qcom,pcie-apq8084", .data = &cfg_1_0_0 },
+	{ .compatible = "qcom,pcie-ipq4019", .data = &cfg_2_4_0 },
+	{ .compatible = "qcom,pcie-ipq6018", .data = &cfg_2_9_0 },
+	{ .compatible = "qcom,pcie-ipq8064", .data = &cfg_2_1_0 },
+	{ .compatible = "qcom,pcie-ipq8064-v2", .data = &cfg_2_1_0 },
+	{ .compatible = "qcom,pcie-ipq8074", .data = &cfg_2_3_3 },
+	{ .compatible = "qcom,pcie-ipq8074-gen3", .data = &cfg_2_9_0 },
+	{ .compatible = "qcom,pcie-msm8996", .data = &cfg_2_3_2 },
+	{ .compatible = "qcom,pcie-qcs404", .data = &cfg_2_4_0 },
+	{ .compatible = "qcom,pcie-sa8540p", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sa8775p", .data = &cfg_1_9_0},
+	{ .compatible = "qcom,pcie-sc7280", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sc8180x", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sc8280xp", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sdm845", .data = &cfg_2_7_0 },
+	{ .compatible = "qcom,pcie-sdx55", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8150", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8250", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8350", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8450-pcie0", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8450-pcie1", .data = &cfg_1_9_0 },
+	{ .compatible = "qcom,pcie-sm8550", .data = &cfg_1_9_0 },
+	{ }
+};
+
+static void qcom_fixup_class(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0106, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0107, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0302, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1000, qcom_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1001, qcom_fixup_class);
+
+static const struct dev_pm_ops qcom_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(qcom_pcie_suspend_noirq, qcom_pcie_resume_noirq)
+};
+
+static struct platform_driver qcom_pcie_driver = {
+	.probe = qcom_pcie_probe,
+	.driver = {
+		.name = "qcom-pcie",
+		.suppress_bind_attrs = true,
+		.of_match_table = qcom_pcie_match,
+		.pm = &qcom_pcie_pm_ops,
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
+};
+builtin_platform_driver(qcom_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-spear13xx.c b/drivers/pci/controller/dwc/pcie-spear13xx.c
new file mode 100644
index 000000000..99d47ae80
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-spear13xx.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for ST Microelectronics SPEAr13xx SoCs
+ *
+ * SPEAr13xx PCIe Glue Layer Source Code
+ *
+ * Copyright (C) 2010-2014 ST Microelectronics
+ * Pratyush Anand <pratyush.anand@gmail.com>
+ * Mohit Kumar <mohit.kumar.dhaka@gmail.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+
+#include "pcie-designware.h"
+
+struct spear13xx_pcie {
+	struct dw_pcie		*pci;
+	void __iomem		*app_base;
+	struct phy		*phy;
+	struct clk		*clk;
+};
+
+struct pcie_app_reg {
+	u32	app_ctrl_0;		/* cr0 */
+	u32	app_ctrl_1;		/* cr1 */
+	u32	app_status_0;		/* cr2 */
+	u32	app_status_1;		/* cr3 */
+	u32	msg_status;		/* cr4 */
+	u32	msg_payload;		/* cr5 */
+	u32	int_sts;		/* cr6 */
+	u32	int_clr;		/* cr7 */
+	u32	int_mask;		/* cr8 */
+	u32	mst_bmisc;		/* cr9 */
+	u32	phy_ctrl;		/* cr10 */
+	u32	phy_status;		/* cr11 */
+	u32	cxpl_debug_info_0;	/* cr12 */
+	u32	cxpl_debug_info_1;	/* cr13 */
+	u32	ven_msg_ctrl_0;		/* cr14 */
+	u32	ven_msg_ctrl_1;		/* cr15 */
+	u32	ven_msg_data_0;		/* cr16 */
+	u32	ven_msg_data_1;		/* cr17 */
+	u32	ven_msi_0;		/* cr18 */
+	u32	ven_msi_1;		/* cr19 */
+	u32	mst_rmisc;		/* cr20 */
+};
+
+/* CR0 ID */
+#define APP_LTSSM_ENABLE_ID			3
+#define DEVICE_TYPE_RC				(4 << 25)
+#define MISCTRL_EN_ID				30
+#define REG_TRANSLATION_ENABLE			31
+
+/* CR3 ID */
+#define XMLH_LINK_UP				(1 << 6)
+
+/* CR6 */
+#define MSI_CTRL_INT				(1 << 26)
+
+#define to_spear13xx_pcie(x)	dev_get_drvdata((x)->dev)
+
+static int spear13xx_pcie_start_link(struct dw_pcie *pci)
+{
+	struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
+	struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
+
+	/* enable ltssm */
+	writel(DEVICE_TYPE_RC | (1 << MISCTRL_EN_ID)
+			| (1 << APP_LTSSM_ENABLE_ID)
+			| ((u32)1 << REG_TRANSLATION_ENABLE),
+			&app_reg->app_ctrl_0);
+
+	return 0;
+}
+
+static irqreturn_t spear13xx_pcie_irq_handler(int irq, void *arg)
+{
+	struct spear13xx_pcie *spear13xx_pcie = arg;
+	struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
+	struct dw_pcie *pci = spear13xx_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	unsigned int status;
+
+	status = readl(&app_reg->int_sts);
+
+	if (status & MSI_CTRL_INT) {
+		BUG_ON(!IS_ENABLED(CONFIG_PCI_MSI));
+		dw_handle_msi_irq(pp);
+	}
+
+	writel(status, &app_reg->int_clr);
+
+	return IRQ_HANDLED;
+}
+
+static void spear13xx_pcie_enable_interrupts(struct spear13xx_pcie *spear13xx_pcie)
+{
+	struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
+
+	/* Enable MSI interrupt */
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		writel(readl(&app_reg->int_mask) |
+				MSI_CTRL_INT, &app_reg->int_mask);
+}
+
+static int spear13xx_pcie_link_up(struct dw_pcie *pci)
+{
+	struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
+	struct pcie_app_reg __iomem *app_reg = spear13xx_pcie->app_base;
+
+	if (readl(&app_reg->app_status_1) & XMLH_LINK_UP)
+		return 1;
+
+	return 0;
+}
+
+static int spear13xx_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
+	u32 exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	u32 val;
+
+	spear13xx_pcie->app_base = pci->dbi_base + 0x2000;
+
+	/*
+	 * this controller support only 128 bytes read size, however its
+	 * default value in capability register is 512 bytes. So force
+	 * it to 128 here.
+	 */
+	val = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL);
+	val &= ~PCI_EXP_DEVCTL_READRQ;
+	dw_pcie_writew_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL, val);
+
+	dw_pcie_writew_dbi(pci, PCI_VENDOR_ID, 0x104A);
+	dw_pcie_writew_dbi(pci, PCI_DEVICE_ID, 0xCD80);
+
+	spear13xx_pcie_enable_interrupts(spear13xx_pcie);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops spear13xx_pcie_host_ops = {
+	.host_init = spear13xx_pcie_host_init,
+};
+
+static int spear13xx_add_pcie_port(struct spear13xx_pcie *spear13xx_pcie,
+				   struct platform_device *pdev)
+{
+	struct dw_pcie *pci = spear13xx_pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->irq = platform_get_irq(pdev, 0);
+	if (pp->irq < 0)
+		return pp->irq;
+
+	ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler,
+			       IRQF_SHARED | IRQF_NO_THREAD,
+			       "spear1340-pcie", spear13xx_pcie);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", pp->irq);
+		return ret;
+	}
+
+	pp->ops = &spear13xx_pcie_host_ops;
+	pp->msi_irq[0] = -ENODEV;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = spear13xx_pcie_link_up,
+	.start_link = spear13xx_pcie_start_link,
+};
+
+static int spear13xx_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci;
+	struct spear13xx_pcie *spear13xx_pcie;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	spear13xx_pcie = devm_kzalloc(dev, sizeof(*spear13xx_pcie), GFP_KERNEL);
+	if (!spear13xx_pcie)
+		return -ENOMEM;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	spear13xx_pcie->pci = pci;
+
+	spear13xx_pcie->phy = devm_phy_get(dev, "pcie-phy");
+	if (IS_ERR(spear13xx_pcie->phy)) {
+		ret = PTR_ERR(spear13xx_pcie->phy);
+		if (ret == -EPROBE_DEFER)
+			dev_info(dev, "probe deferred\n");
+		else
+			dev_err(dev, "couldn't get pcie-phy\n");
+		return ret;
+	}
+
+	phy_init(spear13xx_pcie->phy);
+
+	spear13xx_pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(spear13xx_pcie->clk)) {
+		dev_err(dev, "couldn't get clk for pcie\n");
+		return PTR_ERR(spear13xx_pcie->clk);
+	}
+	ret = clk_prepare_enable(spear13xx_pcie->clk);
+	if (ret) {
+		dev_err(dev, "couldn't enable clk for pcie\n");
+		return ret;
+	}
+
+	if (of_property_read_bool(np, "st,pcie-is-gen1"))
+		pci->link_gen = 1;
+
+	platform_set_drvdata(pdev, spear13xx_pcie);
+
+	ret = spear13xx_add_pcie_port(spear13xx_pcie, pdev);
+	if (ret < 0)
+		goto fail_clk;
+
+	return 0;
+
+fail_clk:
+	clk_disable_unprepare(spear13xx_pcie->clk);
+
+	return ret;
+}
+
+static const struct of_device_id spear13xx_pcie_of_match[] = {
+	{ .compatible = "st,spear1340-pcie", },
+	{},
+};
+
+static struct platform_driver spear13xx_pcie_driver = {
+	.probe		= spear13xx_pcie_probe,
+	.driver = {
+		.name	= "spear-pcie",
+		.of_match_table = spear13xx_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+builtin_platform_driver(spear13xx_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-tegra194-acpi.c b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
new file mode 100644
index 000000000..55f61914a
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI quirks for Tegra194 PCIe host controller
+ *
+ * Copyright (C) 2021 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+
+#include "pcie-designware.h"
+
+struct tegra194_pcie_ecam  {
+	void __iomem *config_base;
+	void __iomem *iatu_base;
+	void __iomem *dbi_base;
+};
+
+static int tegra194_acpi_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct tegra194_pcie_ecam *pcie_ecam;
+
+	pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
+	if (!pcie_ecam)
+		return -ENOMEM;
+
+	pcie_ecam->config_base = cfg->win;
+	pcie_ecam->iatu_base = cfg->win + SZ_256K;
+	pcie_ecam->dbi_base = cfg->win + SZ_512K;
+	cfg->priv = pcie_ecam;
+
+	return 0;
+}
+
+static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
+			  u32 val, u32 reg)
+{
+	u32 offset = PCIE_ATU_UNROLL_BASE(PCIE_ATU_REGION_DIR_OB, index) +
+		     PCIE_ATU_VIEWPORT_BASE;
+
+	writel(val, pcie_ecam->iatu_base + offset + reg);
+}
+
+static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
+				 int index, int type, u64 cpu_addr,
+				 u64 pci_addr, u64 size)
+{
+	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
+		      PCIE_ATU_LOWER_BASE);
+	atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
+		      PCIE_ATU_UPPER_BASE);
+	atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
+		      PCIE_ATU_LOWER_TARGET);
+	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
+		      PCIE_ATU_LIMIT);
+	atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
+		      PCIE_ATU_UPPER_TARGET);
+	atu_reg_write(pcie_ecam, index, type, PCIE_ATU_REGION_CTRL1);
+	atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_REGION_CTRL2);
+}
+
+static void __iomem *tegra194_map_bus(struct pci_bus *bus,
+				      unsigned int devfn, int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
+	u32 busdev;
+	int type;
+
+	if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
+		return NULL;
+
+	if (bus->number == cfg->busr.start) {
+		if (PCI_SLOT(devfn) == 0)
+			return pcie_ecam->dbi_base + where;
+		else
+			return NULL;
+	}
+
+	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+		 PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+	if (bus->parent->number == cfg->busr.start) {
+		if (PCI_SLOT(devfn) == 0)
+			type = PCIE_ATU_TYPE_CFG0;
+		else
+			return NULL;
+	} else {
+		type = PCIE_ATU_TYPE_CFG1;
+	}
+
+	program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
+			     SZ_256K);
+
+	return pcie_ecam->config_base + where;
+}
+
+const struct pci_ecam_ops tegra194_pcie_ops = {
+	.init		= tegra194_acpi_init,
+	.pci_ops	= {
+		.map_bus	= tegra194_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
new file mode 100644
index 000000000..248cd9347
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -0,0 +1,2514 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for the following SoCs
+ * Tegra194
+ * Tegra234
+ *
+ * Copyright (C) 2019-2022 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interconnect.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/random.h>
+#include <linux/reset.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include "pcie-designware.h"
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include "../../pci.h"
+
+#define TEGRA194_DWC_IP_VER			0x490A
+#define TEGRA234_DWC_IP_VER			0x562A
+
+#define APPL_PINMUX				0x0
+#define APPL_PINMUX_PEX_RST			BIT(0)
+#define APPL_PINMUX_CLKREQ_OVERRIDE_EN		BIT(2)
+#define APPL_PINMUX_CLKREQ_OVERRIDE		BIT(3)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN	BIT(4)
+#define APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE	BIT(5)
+
+#define APPL_CTRL				0x4
+#define APPL_CTRL_SYS_PRE_DET_STATE		BIT(6)
+#define APPL_CTRL_LTSSM_EN			BIT(7)
+#define APPL_CTRL_HW_HOT_RST_EN			BIT(20)
+#define APPL_CTRL_HW_HOT_RST_MODE_MASK		GENMASK(1, 0)
+#define APPL_CTRL_HW_HOT_RST_MODE_SHIFT		22
+#define APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST	0x1
+#define APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST_LTSSM_EN	0x2
+
+#define APPL_INTR_EN_L0_0			0x8
+#define APPL_INTR_EN_L0_0_LINK_STATE_INT_EN	BIT(0)
+#define APPL_INTR_EN_L0_0_MSI_RCV_INT_EN	BIT(4)
+#define APPL_INTR_EN_L0_0_INT_INT_EN		BIT(8)
+#define APPL_INTR_EN_L0_0_PCI_CMD_EN_INT_EN	BIT(15)
+#define APPL_INTR_EN_L0_0_CDM_REG_CHK_INT_EN	BIT(19)
+#define APPL_INTR_EN_L0_0_SYS_INTR_EN		BIT(30)
+#define APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN	BIT(31)
+
+#define APPL_INTR_STATUS_L0			0xC
+#define APPL_INTR_STATUS_L0_LINK_STATE_INT	BIT(0)
+#define APPL_INTR_STATUS_L0_INT_INT		BIT(8)
+#define APPL_INTR_STATUS_L0_PCI_CMD_EN_INT	BIT(15)
+#define APPL_INTR_STATUS_L0_PEX_RST_INT		BIT(16)
+#define APPL_INTR_STATUS_L0_CDM_REG_CHK_INT	BIT(18)
+
+#define APPL_INTR_EN_L1_0_0				0x1C
+#define APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN	BIT(1)
+#define APPL_INTR_EN_L1_0_0_RDLH_LINK_UP_INT_EN		BIT(3)
+#define APPL_INTR_EN_L1_0_0_HOT_RESET_DONE_INT_EN	BIT(30)
+
+#define APPL_INTR_STATUS_L1_0_0				0x20
+#define APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED	BIT(1)
+#define APPL_INTR_STATUS_L1_0_0_RDLH_LINK_UP_CHGED	BIT(3)
+#define APPL_INTR_STATUS_L1_0_0_HOT_RESET_DONE		BIT(30)
+
+#define APPL_INTR_STATUS_L1_1			0x2C
+#define APPL_INTR_STATUS_L1_2			0x30
+#define APPL_INTR_STATUS_L1_3			0x34
+#define APPL_INTR_STATUS_L1_6			0x3C
+#define APPL_INTR_STATUS_L1_7			0x40
+#define APPL_INTR_STATUS_L1_15_CFG_BME_CHGED	BIT(1)
+
+#define APPL_INTR_EN_L1_8_0			0x44
+#define APPL_INTR_EN_L1_8_BW_MGT_INT_EN		BIT(2)
+#define APPL_INTR_EN_L1_8_AUTO_BW_INT_EN	BIT(3)
+#define APPL_INTR_EN_L1_8_INTX_EN		BIT(11)
+#define APPL_INTR_EN_L1_8_AER_INT_EN		BIT(15)
+
+#define APPL_INTR_STATUS_L1_8_0			0x4C
+#define APPL_INTR_STATUS_L1_8_0_EDMA_INT_MASK	GENMASK(11, 6)
+#define APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS	BIT(2)
+#define APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS	BIT(3)
+
+#define APPL_INTR_STATUS_L1_9			0x54
+#define APPL_INTR_STATUS_L1_10			0x58
+#define APPL_INTR_STATUS_L1_11			0x64
+#define APPL_INTR_STATUS_L1_13			0x74
+#define APPL_INTR_STATUS_L1_14			0x78
+#define APPL_INTR_STATUS_L1_15			0x7C
+#define APPL_INTR_STATUS_L1_17			0x88
+
+#define APPL_INTR_EN_L1_18				0x90
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMPLT		BIT(2)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR		BIT(1)
+#define APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR	BIT(0)
+
+#define APPL_INTR_STATUS_L1_18				0x94
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT	BIT(2)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR	BIT(1)
+#define APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR	BIT(0)
+
+#define APPL_MSI_CTRL_1				0xAC
+
+#define APPL_MSI_CTRL_2				0xB0
+
+#define APPL_LEGACY_INTX			0xB8
+
+#define APPL_LTR_MSG_1				0xC4
+#define LTR_MSG_REQ				BIT(15)
+#define LTR_MST_NO_SNOOP_SHIFT			16
+
+#define APPL_LTR_MSG_2				0xC8
+#define APPL_LTR_MSG_2_LTR_MSG_REQ_STATE	BIT(3)
+
+#define APPL_LINK_STATUS			0xCC
+#define APPL_LINK_STATUS_RDLH_LINK_UP		BIT(0)
+
+#define APPL_DEBUG				0xD0
+#define APPL_DEBUG_PM_LINKST_IN_L2_LAT		BIT(21)
+#define APPL_DEBUG_PM_LINKST_IN_L0		0x11
+#define APPL_DEBUG_LTSSM_STATE_MASK		GENMASK(8, 3)
+#define APPL_DEBUG_LTSSM_STATE_SHIFT		3
+#define LTSSM_STATE_PRE_DETECT			5
+
+#define APPL_RADM_STATUS			0xE4
+#define APPL_PM_XMT_TURNOFF_STATE		BIT(0)
+
+#define APPL_DM_TYPE				0x100
+#define APPL_DM_TYPE_MASK			GENMASK(3, 0)
+#define APPL_DM_TYPE_RP				0x4
+#define APPL_DM_TYPE_EP				0x0
+
+#define APPL_CFG_BASE_ADDR			0x104
+#define APPL_CFG_BASE_ADDR_MASK			GENMASK(31, 12)
+
+#define APPL_CFG_IATU_DMA_BASE_ADDR		0x108
+#define APPL_CFG_IATU_DMA_BASE_ADDR_MASK	GENMASK(31, 18)
+
+#define APPL_CFG_MISC				0x110
+#define APPL_CFG_MISC_SLV_EP_MODE		BIT(14)
+#define APPL_CFG_MISC_ARCACHE_MASK		GENMASK(13, 10)
+#define APPL_CFG_MISC_ARCACHE_SHIFT		10
+#define APPL_CFG_MISC_ARCACHE_VAL		3
+
+#define APPL_CFG_SLCG_OVERRIDE			0x114
+#define APPL_CFG_SLCG_OVERRIDE_SLCG_EN_MASTER	BIT(0)
+
+#define APPL_CAR_RESET_OVRD				0x12C
+#define APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N	BIT(0)
+
+#define IO_BASE_IO_DECODE				BIT(0)
+#define IO_BASE_IO_DECODE_BIT8				BIT(8)
+
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE		BIT(0)
+#define CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE	BIT(16)
+
+#define CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF	0x718
+#define CFG_TIMER_CTRL_ACK_NAK_SHIFT	(19)
+
+#define N_FTS_VAL					52
+#define FTS_VAL						52
+
+#define GEN3_EQ_CONTROL_OFF			0x8a8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT	8
+#define GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK	GENMASK(23, 8)
+#define GEN3_EQ_CONTROL_OFF_FB_MODE_MASK	GENMASK(3, 0)
+
+#define PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT	0x8D0
+#define AMBA_ERROR_RESPONSE_CRS_SHIFT		3
+#define AMBA_ERROR_RESPONSE_CRS_MASK		GENMASK(1, 0)
+#define AMBA_ERROR_RESPONSE_CRS_OKAY		0
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFFFFFF	1
+#define AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001	2
+
+#define MSIX_ADDR_MATCH_LOW_OFF			0x940
+#define MSIX_ADDR_MATCH_LOW_OFF_EN		BIT(0)
+#define MSIX_ADDR_MATCH_LOW_OFF_MASK		GENMASK(31, 2)
+
+#define MSIX_ADDR_MATCH_HIGH_OFF		0x944
+#define MSIX_ADDR_MATCH_HIGH_OFF_MASK		GENMASK(31, 0)
+
+#define PORT_LOGIC_MSIX_DOORBELL			0x948
+
+#define CAP_SPCIE_CAP_OFF			0x154
+#define CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK	GENMASK(3, 0)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK	GENMASK(11, 8)
+#define CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT	8
+
+#define PME_ACK_TIMEOUT 10000
+
+#define LTSSM_TIMEOUT 50000	/* 50ms */
+
+#define GEN3_GEN4_EQ_PRESET_INIT	5
+
+#define GEN1_CORE_CLK_FREQ	62500000
+#define GEN2_CORE_CLK_FREQ	125000000
+#define GEN3_CORE_CLK_FREQ	250000000
+#define GEN4_CORE_CLK_FREQ	500000000
+
+#define LTR_MSG_TIMEOUT		(100 * 1000)
+
+#define PERST_DEBOUNCE_TIME	(5 * 1000)
+
+#define EP_STATE_DISABLED	0
+#define EP_STATE_ENABLED	1
+
+static const unsigned int pcie_gen_freq[] = {
+	GEN1_CORE_CLK_FREQ,	/* PCI_EXP_LNKSTA_CLS == 0; undefined */
+	GEN1_CORE_CLK_FREQ,
+	GEN2_CORE_CLK_FREQ,
+	GEN3_CORE_CLK_FREQ,
+	GEN4_CORE_CLK_FREQ
+};
+
+struct tegra_pcie_dw_of_data {
+	u32 version;
+	enum dw_pcie_device_mode mode;
+	bool has_msix_doorbell_access_fix;
+	bool has_sbr_reset_fix;
+	bool has_l1ss_exit_fix;
+	bool has_ltr_req_fix;
+	u32 cdm_chk_int_en_bit;
+	u32 gen4_preset_vec;
+	u8 n_fts[2];
+};
+
+struct tegra_pcie_dw {
+	struct device *dev;
+	struct resource *appl_res;
+	struct resource *dbi_res;
+	struct resource *atu_dma_res;
+	void __iomem *appl_base;
+	struct clk *core_clk;
+	struct reset_control *core_apb_rst;
+	struct reset_control *core_rst;
+	struct dw_pcie pci;
+	struct tegra_bpmp *bpmp;
+
+	struct tegra_pcie_dw_of_data *of_data;
+
+	bool supports_clkreq;
+	bool enable_cdm_check;
+	bool enable_srns;
+	bool link_state;
+	bool update_fc_fixup;
+	bool enable_ext_refclk;
+	u8 init_link_width;
+	u32 msi_ctrl_int;
+	u32 num_lanes;
+	u32 cid;
+	u32 cfg_link_cap_l1sub;
+	u32 ras_des_cap;
+	u32 pcie_cap_base;
+	u32 aspm_cmrt;
+	u32 aspm_pwr_on_t;
+	u32 aspm_l0s_enter_lat;
+
+	struct regulator *pex_ctl_supply;
+	struct regulator *slot_ctl_3v3;
+	struct regulator *slot_ctl_12v;
+
+	unsigned int phy_count;
+	struct phy **phys;
+
+	struct dentry *debugfs;
+
+	/* Endpoint mode specific */
+	struct gpio_desc *pex_rst_gpiod;
+	struct gpio_desc *pex_refclk_sel_gpiod;
+	unsigned int pex_rst_irq;
+	int ep_state;
+	long link_status;
+	struct icc_path *icc_path;
+};
+
+static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
+{
+	return container_of(pci, struct tegra_pcie_dw, pci);
+}
+
+static inline void appl_writel(struct tegra_pcie_dw *pcie, const u32 value,
+			       const u32 reg)
+{
+	writel_relaxed(value, pcie->appl_base + reg);
+}
+
+static inline u32 appl_readl(struct tegra_pcie_dw *pcie, const u32 reg)
+{
+	return readl_relaxed(pcie->appl_base + reg);
+}
+
+struct tegra_pcie_soc {
+	enum dw_pcie_device_mode mode;
+};
+
+static void tegra_pcie_icc_set(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val, speed, width;
+
+	val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+
+	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, val);
+	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, val);
+
+	val = width * (PCIE_SPEED2MBS_ENC(pcie_link_speed[speed]) / BITS_PER_BYTE);
+
+	if (icc_set_bw(pcie->icc_path, MBps_to_icc(val), 0))
+		dev_err(pcie->dev, "can't set bw[%u]\n", val);
+
+	if (speed >= ARRAY_SIZE(pcie_gen_freq))
+		speed = 0;
+
+	clk_set_rate(pcie->core_clk, pcie_gen_freq[speed]);
+}
+
+static void apply_bad_link_workaround(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 current_link_width;
+	u16 val;
+
+	/*
+	 * NOTE:- Since this scenario is uncommon and link as such is not
+	 * stable anyway, not waiting to confirm if link is really
+	 * transitioning to Gen-2 speed
+	 */
+	val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+	if (val & PCI_EXP_LNKSTA_LBMS) {
+		current_link_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, val);
+		if (pcie->init_link_width > current_link_width) {
+			dev_warn(pci->dev, "PCIe link is bad, width reduced\n");
+			val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+						PCI_EXP_LNKCTL2);
+			val &= ~PCI_EXP_LNKCTL2_TLS;
+			val |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+			dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+					   PCI_EXP_LNKCTL2, val);
+
+			val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+						PCI_EXP_LNKCTL);
+			val |= PCI_EXP_LNKCTL_RL;
+			dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+					   PCI_EXP_LNKCTL, val);
+		}
+	}
+}
+
+static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
+{
+	struct tegra_pcie_dw *pcie = arg;
+	struct dw_pcie *pci = &pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	u32 val, status_l0, status_l1;
+	u16 val_w;
+
+	status_l0 = appl_readl(pcie, APPL_INTR_STATUS_L0);
+	if (status_l0 & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+		appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_0_0);
+		if (!pcie->of_data->has_sbr_reset_fix &&
+		    status_l1 & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+			/* SBR & Surprise Link Down WAR */
+			val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+			val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+			appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+			udelay(1);
+			val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+			val |= APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+			appl_writel(pcie, val, APPL_CAR_RESET_OVRD);
+
+			val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+			val |= PORT_LOGIC_SPEED_CHANGE;
+			dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+		}
+	}
+
+	if (status_l0 & APPL_INTR_STATUS_L0_INT_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0);
+		if (status_l1 & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
+			appl_writel(pcie,
+				    APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS,
+				    APPL_INTR_STATUS_L1_8_0);
+			apply_bad_link_workaround(pp);
+		}
+		if (status_l1 & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
+			val_w = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+						  PCI_EXP_LNKSTA);
+			val_w |= PCI_EXP_LNKSTA_LBMS;
+			dw_pcie_writew_dbi(pci, pcie->pcie_cap_base +
+					   PCI_EXP_LNKSTA, val_w);
+
+			appl_writel(pcie,
+				    APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS,
+				    APPL_INTR_STATUS_L1_8_0);
+
+			val_w = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+						  PCI_EXP_LNKSTA);
+			dev_dbg(pci->dev, "Link Speed : Gen-%u\n", val_w &
+				PCI_EXP_LNKSTA_CLS);
+		}
+	}
+
+	if (status_l0 & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_18);
+		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+		if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
+			dev_info(pci->dev, "CDM check complete\n");
+			val |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE;
+		}
+		if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
+			dev_err(pci->dev, "CDM comparison mismatch\n");
+			val |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
+		}
+		if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
+			dev_err(pci->dev, "CDM Logic error\n");
+			val |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
+		}
+		dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR);
+		dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", val);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void pex_ep_event_hot_rst_done(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_1);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_2);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_3);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_6);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_7);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_8_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_9);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_10);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_11);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_13);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_14);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_15);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_17);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_MSI_CTRL_2);
+
+	val = appl_readl(pcie, APPL_CTRL);
+	val |= APPL_CTRL_LTSSM_EN;
+	appl_writel(pcie, val, APPL_CTRL);
+}
+
+static irqreturn_t tegra_pcie_ep_irq_thread(int irq, void *arg)
+{
+	struct tegra_pcie_dw *pcie = arg;
+	struct dw_pcie_ep *ep = &pcie->pci.ep;
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val;
+
+	if (test_and_clear_bit(0, &pcie->link_status))
+		dw_pcie_ep_linkup(ep);
+
+	tegra_pcie_icc_set(pcie);
+
+	if (pcie->of_data->has_ltr_req_fix)
+		return IRQ_HANDLED;
+
+	/* If EP doesn't advertise L1SS, just return */
+	val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+	if (!(val & (PCI_L1SS_CAP_ASPM_L1_1 | PCI_L1SS_CAP_ASPM_L1_2)))
+		return IRQ_HANDLED;
+
+	/* Check if BME is set to '1' */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	if (val & PCI_COMMAND_MASTER) {
+		ktime_t timeout;
+
+		/* 110us for both snoop and no-snoop */
+		val = 110 | (2 << PCI_LTR_SCALE_SHIFT) | LTR_MSG_REQ;
+		val |= (val << LTR_MST_NO_SNOOP_SHIFT);
+		appl_writel(pcie, val, APPL_LTR_MSG_1);
+
+		/* Send LTR upstream */
+		val = appl_readl(pcie, APPL_LTR_MSG_2);
+		val |= APPL_LTR_MSG_2_LTR_MSG_REQ_STATE;
+		appl_writel(pcie, val, APPL_LTR_MSG_2);
+
+		timeout = ktime_add_us(ktime_get(), LTR_MSG_TIMEOUT);
+		for (;;) {
+			val = appl_readl(pcie, APPL_LTR_MSG_2);
+			if (!(val & APPL_LTR_MSG_2_LTR_MSG_REQ_STATE))
+				break;
+			if (ktime_after(ktime_get(), timeout))
+				break;
+			usleep_range(1000, 1100);
+		}
+		if (val & APPL_LTR_MSG_2_LTR_MSG_REQ_STATE)
+			dev_err(pcie->dev, "Failed to send LTR message\n");
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
+{
+	struct tegra_pcie_dw *pcie = arg;
+	int spurious = 1;
+	u32 status_l0, status_l1, link_status;
+
+	status_l0 = appl_readl(pcie, APPL_INTR_STATUS_L0);
+	if (status_l0 & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+		appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_0_0);
+
+		if (status_l1 & APPL_INTR_STATUS_L1_0_0_HOT_RESET_DONE)
+			pex_ep_event_hot_rst_done(pcie);
+
+		if (status_l1 & APPL_INTR_STATUS_L1_0_0_RDLH_LINK_UP_CHGED) {
+			link_status = appl_readl(pcie, APPL_LINK_STATUS);
+			if (link_status & APPL_LINK_STATUS_RDLH_LINK_UP) {
+				dev_dbg(pcie->dev, "Link is up with Host\n");
+				set_bit(0, &pcie->link_status);
+				return IRQ_WAKE_THREAD;
+			}
+		}
+
+		spurious = 0;
+	}
+
+	if (status_l0 & APPL_INTR_STATUS_L0_PCI_CMD_EN_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_15);
+		appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_15);
+
+		if (status_l1 & APPL_INTR_STATUS_L1_15_CFG_BME_CHGED)
+			return IRQ_WAKE_THREAD;
+
+		spurious = 0;
+	}
+
+	if (spurious) {
+		dev_warn(pcie->dev, "Random interrupt (STATUS = 0x%08X)\n",
+			 status_l0);
+		appl_writel(pcie, status_l0, APPL_INTR_STATUS_L0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int tegra_pcie_dw_rd_own_conf(struct pci_bus *bus, u32 devfn, int where,
+				     int size, u32 *val)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+	/*
+	 * This is an endpoint mode specific register happen to appear even
+	 * when controller is operating in root port mode and system hangs
+	 * when it is accessed with link being in ASPM-L1 state.
+	 * So skip accessing it altogether
+	 */
+	if (!pcie->of_data->has_msix_doorbell_access_fix &&
+	    !PCI_SLOT(devfn) && where == PORT_LOGIC_MSIX_DOORBELL) {
+		*val = 0x00000000;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	return pci_generic_config_read(bus, devfn, where, size, val);
+}
+
+static int tegra_pcie_dw_wr_own_conf(struct pci_bus *bus, u32 devfn, int where,
+				     int size, u32 val)
+{
+	struct dw_pcie_rp *pp = bus->sysdata;
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+	/*
+	 * This is an endpoint mode specific register happen to appear even
+	 * when controller is operating in root port mode and system hangs
+	 * when it is accessed with link being in ASPM-L1 state.
+	 * So skip accessing it altogether
+	 */
+	if (!pcie->of_data->has_msix_doorbell_access_fix &&
+	    !PCI_SLOT(devfn) && where == PORT_LOGIC_MSIX_DOORBELL)
+		return PCIBIOS_SUCCESSFUL;
+
+	return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+static struct pci_ops tegra_pci_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = tegra_pcie_dw_rd_own_conf,
+	.write = tegra_pcie_dw_wr_own_conf,
+};
+
+#if defined(CONFIG_PCIEASPM)
+static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+	val &= ~PCI_L1SS_CAP_ASPM_L1_1;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static void disable_aspm_l12(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub);
+	val &= ~PCI_L1SS_CAP_ASPM_L1_2;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->cfg_link_cap_l1sub, val);
+}
+
+static inline u32 event_counter_prog(struct tegra_pcie_dw *pcie, u32 event)
+{
+	u32 val;
+
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->ras_des_cap +
+				PCIE_RAS_DES_EVENT_COUNTER_CONTROL);
+	val &= ~(EVENT_COUNTER_EVENT_SEL_MASK << EVENT_COUNTER_EVENT_SEL_SHIFT);
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	val |= event << EVENT_COUNTER_EVENT_SEL_SHIFT;
+	val |= EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->ras_des_cap +
+			   PCIE_RAS_DES_EVENT_COUNTER_CONTROL, val);
+	val = dw_pcie_readl_dbi(&pcie->pci, pcie->ras_des_cap +
+				PCIE_RAS_DES_EVENT_COUNTER_DATA);
+
+	return val;
+}
+
+static int aspm_state_cnt(struct seq_file *s, void *data)
+{
+	struct tegra_pcie_dw *pcie = (struct tegra_pcie_dw *)
+				     dev_get_drvdata(s->private);
+	u32 val;
+
+	seq_printf(s, "Tx L0s entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_Tx_L0S));
+
+	seq_printf(s, "Rx L0s entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_Rx_L0S));
+
+	seq_printf(s, "Link L1 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1));
+
+	seq_printf(s, "Link L1.1 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_1));
+
+	seq_printf(s, "Link L1.2 entry count : %u\n",
+		   event_counter_prog(pcie, EVENT_COUNTER_EVENT_L1_2));
+
+	/* Clear all counters */
+	dw_pcie_writel_dbi(&pcie->pci, pcie->ras_des_cap +
+			   PCIE_RAS_DES_EVENT_COUNTER_CONTROL,
+			   EVENT_COUNTER_ALL_CLEAR);
+
+	/* Re-enable counting */
+	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	dw_pcie_writel_dbi(&pcie->pci, pcie->ras_des_cap +
+			   PCIE_RAS_DES_EVENT_COUNTER_CONTROL, val);
+
+	return 0;
+}
+
+static void init_host_aspm(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val;
+
+	val = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+	pcie->cfg_link_cap_l1sub = val + PCI_L1SS_CAP;
+
+	pcie->ras_des_cap = dw_pcie_find_ext_capability(&pcie->pci,
+							PCI_EXT_CAP_ID_VNDR);
+
+	/* Enable ASPM counters */
+	val = EVENT_COUNTER_ENABLE_ALL << EVENT_COUNTER_ENABLE_SHIFT;
+	val |= EVENT_COUNTER_GROUP_5 << EVENT_COUNTER_GROUP_SEL_SHIFT;
+	dw_pcie_writel_dbi(pci, pcie->ras_des_cap +
+			   PCIE_RAS_DES_EVENT_COUNTER_CONTROL, val);
+
+	/* Program T_cmrt and T_pwr_on values */
+	val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+	val &= ~(PCI_L1SS_CAP_CM_RESTORE_TIME | PCI_L1SS_CAP_P_PWR_ON_VALUE);
+	val |= (pcie->aspm_cmrt << 8);
+	val |= (pcie->aspm_pwr_on_t << 19);
+	dw_pcie_writel_dbi(pci, pcie->cfg_link_cap_l1sub, val);
+
+	/* Program L0s and L1 entrance latencies */
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_AFR);
+	val &= ~PORT_AFR_L0S_ENTRANCE_LAT_MASK;
+	val |= (pcie->aspm_l0s_enter_lat << PORT_AFR_L0S_ENTRANCE_LAT_SHIFT);
+	val |= PORT_AFR_ENTER_ASPM;
+	dw_pcie_writel_dbi(pci, PCIE_PORT_AFR, val);
+}
+
+static void init_debugfs(struct tegra_pcie_dw *pcie)
+{
+	debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt", pcie->debugfs,
+				    aspm_state_cnt);
+}
+#else
+static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
+static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
+static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
+static inline void init_debugfs(struct tegra_pcie_dw *pcie) { return; }
+#endif
+
+static void tegra_pcie_enable_system_interrupts(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 val;
+	u16 val_w;
+
+	val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_LINK_STATE_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+	if (!pcie->of_data->has_sbr_reset_fix) {
+		val = appl_readl(pcie, APPL_INTR_EN_L1_0_0);
+		val |= APPL_INTR_EN_L1_0_0_LINK_REQ_RST_NOT_INT_EN;
+		appl_writel(pcie, val, APPL_INTR_EN_L1_0_0);
+	}
+
+	if (pcie->enable_cdm_check) {
+		val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+		val |= pcie->of_data->cdm_chk_int_en_bit;
+		appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+		val = appl_readl(pcie, APPL_INTR_EN_L1_18);
+		val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_CMP_ERR;
+		val |= APPL_INTR_EN_L1_18_CDM_REG_CHK_LOGIC_ERR;
+		appl_writel(pcie, val, APPL_INTR_EN_L1_18);
+	}
+
+	val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+				  PCI_EXP_LNKSTA);
+	pcie->init_link_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, val_w);
+
+	val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base +
+				  PCI_EXP_LNKCTL);
+	val_w |= PCI_EXP_LNKCTL_LBMIE;
+	dw_pcie_writew_dbi(&pcie->pci, pcie->pcie_cap_base + PCI_EXP_LNKCTL,
+			   val_w);
+}
+
+static void tegra_pcie_enable_legacy_interrupts(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 val;
+
+	/* Enable legacy interrupt generation */
+	val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_SYS_INTR_EN;
+	val |= APPL_INTR_EN_L0_0_INT_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+	val = appl_readl(pcie, APPL_INTR_EN_L1_8_0);
+	val |= APPL_INTR_EN_L1_8_INTX_EN;
+	val |= APPL_INTR_EN_L1_8_AUTO_BW_INT_EN;
+	val |= APPL_INTR_EN_L1_8_BW_MGT_INT_EN;
+	if (IS_ENABLED(CONFIG_PCIEAER))
+		val |= APPL_INTR_EN_L1_8_AER_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L1_8_0);
+}
+
+static void tegra_pcie_enable_msi_interrupts(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 val;
+
+	/* Enable MSI interrupt generation */
+	val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_SYS_MSI_INTR_EN;
+	val |= APPL_INTR_EN_L0_0_MSI_RCV_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+}
+
+static void tegra_pcie_enable_interrupts(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+	/* Clear interrupt statuses before enabling interrupts */
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_1);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_2);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_3);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_6);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_7);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_8_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_9);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_10);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_11);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_13);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_14);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_15);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_17);
+
+	tegra_pcie_enable_system_interrupts(pp);
+	tegra_pcie_enable_legacy_interrupts(pp);
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_pcie_enable_msi_interrupts(pp);
+}
+
+static void config_gen3_gen4_eq_presets(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val, offset, i;
+
+	/* Program init preset */
+	for (i = 0; i < pcie->num_lanes; i++) {
+		val = dw_pcie_readw_dbi(pci, CAP_SPCIE_CAP_OFF + (i * 2));
+		val &= ~CAP_SPCIE_CAP_OFF_DSP_TX_PRESET0_MASK;
+		val |= GEN3_GEN4_EQ_PRESET_INIT;
+		val &= ~CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_MASK;
+		val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+			   CAP_SPCIE_CAP_OFF_USP_TX_PRESET0_SHIFT);
+		dw_pcie_writew_dbi(pci, CAP_SPCIE_CAP_OFF + (i * 2), val);
+
+		offset = dw_pcie_find_ext_capability(pci,
+						     PCI_EXT_CAP_ID_PL_16GT) +
+				PCI_PL_16GT_LE_CTRL;
+		val = dw_pcie_readb_dbi(pci, offset + i);
+		val &= ~PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK;
+		val |= GEN3_GEN4_EQ_PRESET_INIT;
+		val &= ~PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK;
+		val |= (GEN3_GEN4_EQ_PRESET_INIT <<
+			PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT);
+		dw_pcie_writeb_dbi(pci, offset + i, val);
+	}
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+	val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+	val |= (0x3ff << GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+	val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	val |= (0x1 << GEN3_RELATED_OFF_RATE_SHADOW_SEL_SHIFT);
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_EQ_CONTROL_OFF);
+	val &= ~GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_MASK;
+	val |= (pcie->of_data->gen4_preset_vec <<
+		GEN3_EQ_CONTROL_OFF_PSET_REQ_VEC_SHIFT);
+	val &= ~GEN3_EQ_CONTROL_OFF_FB_MODE_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_EQ_CONTROL_OFF, val);
+
+	val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+	val &= ~GEN3_RELATED_OFF_RATE_SHADOW_SEL_MASK;
+	dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+}
+
+static int tegra_pcie_dw_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 val;
+	u16 val_16;
+
+	pp->bridge->ops = &tegra_pci_ops;
+
+	if (!pcie->pcie_cap_base)
+		pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci,
+							      PCI_CAP_ID_EXP);
+
+	val = dw_pcie_readl_dbi(pci, PCI_IO_BASE);
+	val &= ~(IO_BASE_IO_DECODE | IO_BASE_IO_DECODE_BIT8);
+	dw_pcie_writel_dbi(pci, PCI_IO_BASE, val);
+
+	val = dw_pcie_readl_dbi(pci, PCI_PREF_MEMORY_BASE);
+	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_DECODE;
+	val |= CFG_PREF_MEM_LIMIT_BASE_MEM_LIMIT_DECODE;
+	dw_pcie_writel_dbi(pci, PCI_PREF_MEMORY_BASE, val);
+
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+	/* Enable as 0xFFFF0001 response for CRS */
+	val = dw_pcie_readl_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT);
+	val &= ~(AMBA_ERROR_RESPONSE_CRS_MASK << AMBA_ERROR_RESPONSE_CRS_SHIFT);
+	val |= (AMBA_ERROR_RESPONSE_CRS_OKAY_FFFF0001 <<
+		AMBA_ERROR_RESPONSE_CRS_SHIFT);
+	dw_pcie_writel_dbi(pci, PORT_LOGIC_AMBA_ERROR_RESPONSE_DEFAULT, val);
+
+	/* Configure Max lane width from DT */
+	val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP);
+	val &= ~PCI_EXP_LNKCAP_MLW;
+	val |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, pcie->num_lanes);
+	dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP, val);
+
+	/* Clear Slot Clock Configuration bit if SRNS configuration */
+	if (pcie->enable_srns) {
+		val_16 = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+					   PCI_EXP_LNKSTA);
+		val_16 &= ~PCI_EXP_LNKSTA_SLC;
+		dw_pcie_writew_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA,
+				   val_16);
+	}
+
+	config_gen3_gen4_eq_presets(pcie);
+
+	init_host_aspm(pcie);
+
+	/* Disable ASPM-L1SS advertisement if there is no CLKREQ routing */
+	if (!pcie->supports_clkreq) {
+		disable_aspm_l11(pcie);
+		disable_aspm_l12(pcie);
+	}
+
+	if (!pcie->of_data->has_l1ss_exit_fix) {
+		val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+		val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
+		dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+	}
+
+	if (pcie->update_fc_fixup) {
+		val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
+		val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
+		dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
+	}
+
+	clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_start_link(struct dw_pcie *pci)
+{
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	struct dw_pcie_rp *pp = &pci->pp;
+	u32 val, offset, tmp;
+	bool retry = true;
+
+	if (pcie->of_data->mode == DW_PCIE_EP_TYPE) {
+		enable_irq(pcie->pex_rst_irq);
+		return 0;
+	}
+
+retry_link:
+	/* Assert RST */
+	val = appl_readl(pcie, APPL_PINMUX);
+	val &= ~APPL_PINMUX_PEX_RST;
+	appl_writel(pcie, val, APPL_PINMUX);
+
+	usleep_range(100, 200);
+
+	/* Enable LTSSM */
+	val = appl_readl(pcie, APPL_CTRL);
+	val |= APPL_CTRL_LTSSM_EN;
+	appl_writel(pcie, val, APPL_CTRL);
+
+	/* De-assert RST */
+	val = appl_readl(pcie, APPL_PINMUX);
+	val |= APPL_PINMUX_PEX_RST;
+	appl_writel(pcie, val, APPL_PINMUX);
+
+	msleep(100);
+
+	if (dw_pcie_wait_for_link(pci)) {
+		if (!retry)
+			return 0;
+		/*
+		 * There are some endpoints which can't get the link up if
+		 * root port has Data Link Feature (DLF) enabled.
+		 * Refer Spec rev 4.0 ver 1.0 sec 3.4.2 & 7.7.4 for more info
+		 * on Scaled Flow Control and DLF.
+		 * So, need to confirm that is indeed the case here and attempt
+		 * link up once again with DLF disabled.
+		 */
+		val = appl_readl(pcie, APPL_DEBUG);
+		val &= APPL_DEBUG_LTSSM_STATE_MASK;
+		val >>= APPL_DEBUG_LTSSM_STATE_SHIFT;
+		tmp = appl_readl(pcie, APPL_LINK_STATUS);
+		tmp &= APPL_LINK_STATUS_RDLH_LINK_UP;
+		if (!(val == 0x11 && !tmp)) {
+			/* Link is down for all good reasons */
+			return 0;
+		}
+
+		dev_info(pci->dev, "Link is down in DLL");
+		dev_info(pci->dev, "Trying again with DLFE disabled\n");
+		/* Disable LTSSM */
+		val = appl_readl(pcie, APPL_CTRL);
+		val &= ~APPL_CTRL_LTSSM_EN;
+		appl_writel(pcie, val, APPL_CTRL);
+
+		reset_control_assert(pcie->core_rst);
+		reset_control_deassert(pcie->core_rst);
+
+		offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_DLF);
+		val = dw_pcie_readl_dbi(pci, offset + PCI_DLF_CAP);
+		val &= ~PCI_DLF_EXCHANGE_ENABLE;
+		dw_pcie_writel_dbi(pci, offset + PCI_DLF_CAP, val);
+
+		tegra_pcie_dw_host_init(pp);
+		dw_pcie_setup_rc(pp);
+
+		retry = false;
+		goto retry_link;
+	}
+
+	tegra_pcie_icc_set(pcie);
+
+	tegra_pcie_enable_interrupts(pp);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_link_up(struct dw_pcie *pci)
+{
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+	u32 val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA);
+
+	return !!(val & PCI_EXP_LNKSTA_DLLLA);
+}
+
+static void tegra_pcie_dw_stop_link(struct dw_pcie *pci)
+{
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+	disable_irq(pcie->pex_rst_irq);
+}
+
+static const struct dw_pcie_ops tegra_dw_pcie_ops = {
+	.link_up = tegra_pcie_dw_link_up,
+	.start_link = tegra_pcie_dw_start_link,
+	.stop_link = tegra_pcie_dw_stop_link,
+};
+
+static const struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
+	.host_init = tegra_pcie_dw_host_init,
+};
+
+static void tegra_pcie_disable_phy(struct tegra_pcie_dw *pcie)
+{
+	unsigned int phy_count = pcie->phy_count;
+
+	while (phy_count--) {
+		phy_power_off(pcie->phys[phy_count]);
+		phy_exit(pcie->phys[phy_count]);
+	}
+}
+
+static int tegra_pcie_enable_phy(struct tegra_pcie_dw *pcie)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < pcie->phy_count; i++) {
+		ret = phy_init(pcie->phys[i]);
+		if (ret < 0)
+			goto phy_power_off;
+
+		ret = phy_power_on(pcie->phys[i]);
+		if (ret < 0)
+			goto phy_exit;
+	}
+
+	return 0;
+
+phy_power_off:
+	while (i--) {
+		phy_power_off(pcie->phys[i]);
+phy_exit:
+		phy_exit(pcie->phys[i]);
+	}
+
+	return ret;
+}
+
+static int tegra_pcie_dw_parse_dt(struct tegra_pcie_dw *pcie)
+{
+	struct platform_device *pdev = to_platform_device(pcie->dev);
+	struct device_node *np = pcie->dev->of_node;
+	int ret;
+
+	pcie->dbi_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!pcie->dbi_res) {
+		dev_err(pcie->dev, "Failed to find \"dbi\" region\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,aspm-cmrt-us", &pcie->aspm_cmrt);
+	if (ret < 0) {
+		dev_info(pcie->dev, "Failed to read ASPM T_cmrt: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "nvidia,aspm-pwr-on-t-us",
+				   &pcie->aspm_pwr_on_t);
+	if (ret < 0)
+		dev_info(pcie->dev, "Failed to read ASPM Power On time: %d\n",
+			 ret);
+
+	ret = of_property_read_u32(np, "nvidia,aspm-l0s-entrance-latency-us",
+				   &pcie->aspm_l0s_enter_lat);
+	if (ret < 0)
+		dev_info(pcie->dev,
+			 "Failed to read ASPM L0s Entrance latency: %d\n", ret);
+
+	ret = of_property_read_u32(np, "num-lanes", &pcie->num_lanes);
+	if (ret < 0) {
+		dev_err(pcie->dev, "Failed to read num-lanes: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_read_u32_index(np, "nvidia,bpmp", 1, &pcie->cid);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to read Controller-ID: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_property_count_strings(np, "phy-names");
+	if (ret < 0) {
+		dev_err(pcie->dev, "Failed to find PHY entries: %d\n",
+			ret);
+		return ret;
+	}
+	pcie->phy_count = ret;
+
+	if (of_property_read_bool(np, "nvidia,update-fc-fixup"))
+		pcie->update_fc_fixup = true;
+
+	/* RP using an external REFCLK is supported only in Tegra234 */
+	if (pcie->of_data->version == TEGRA194_DWC_IP_VER) {
+		if (pcie->of_data->mode == DW_PCIE_EP_TYPE)
+			pcie->enable_ext_refclk = true;
+	} else {
+		pcie->enable_ext_refclk =
+			of_property_read_bool(pcie->dev->of_node,
+					      "nvidia,enable-ext-refclk");
+	}
+
+	pcie->supports_clkreq =
+		of_property_read_bool(pcie->dev->of_node, "supports-clkreq");
+
+	pcie->enable_cdm_check =
+		of_property_read_bool(np, "snps,enable-cdm-check");
+
+	if (pcie->of_data->version == TEGRA234_DWC_IP_VER)
+		pcie->enable_srns =
+			of_property_read_bool(np, "nvidia,enable-srns");
+
+	if (pcie->of_data->mode == DW_PCIE_RC_TYPE)
+		return 0;
+
+	/* Endpoint mode specific DT entries */
+	pcie->pex_rst_gpiod = devm_gpiod_get(pcie->dev, "reset", GPIOD_IN);
+	if (IS_ERR(pcie->pex_rst_gpiod)) {
+		int err = PTR_ERR(pcie->pex_rst_gpiod);
+		const char *level = KERN_ERR;
+
+		if (err == -EPROBE_DEFER)
+			level = KERN_DEBUG;
+
+		dev_printk(level, pcie->dev,
+			   dev_fmt("Failed to get PERST GPIO: %d\n"),
+			   err);
+		return err;
+	}
+
+	pcie->pex_refclk_sel_gpiod = devm_gpiod_get(pcie->dev,
+						    "nvidia,refclk-select",
+						    GPIOD_OUT_HIGH);
+	if (IS_ERR(pcie->pex_refclk_sel_gpiod)) {
+		int err = PTR_ERR(pcie->pex_refclk_sel_gpiod);
+		const char *level = KERN_ERR;
+
+		if (err == -EPROBE_DEFER)
+			level = KERN_DEBUG;
+
+		dev_printk(level, pcie->dev,
+			   dev_fmt("Failed to get REFCLK select GPIOs: %d\n"),
+			   err);
+		pcie->pex_refclk_sel_gpiod = NULL;
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_bpmp_set_ctrl_state(struct tegra_pcie_dw *pcie,
+					  bool enable)
+{
+	struct mrq_uphy_response resp;
+	struct tegra_bpmp_message msg;
+	struct mrq_uphy_request req;
+
+	/*
+	 * Controller-5 doesn't need to have its state set by BPMP-FW in
+	 * Tegra194
+	 */
+	if (pcie->of_data->version == TEGRA194_DWC_IP_VER && pcie->cid == 5)
+		return 0;
+
+	memset(&req, 0, sizeof(req));
+	memset(&resp, 0, sizeof(resp));
+
+	req.cmd = CMD_UPHY_PCIE_CONTROLLER_STATE;
+	req.controller_state.pcie_controller = pcie->cid;
+	req.controller_state.enable = enable;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_UPHY;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+	msg.rx.data = &resp;
+	msg.rx.size = sizeof(resp);
+
+	return tegra_bpmp_transfer(pcie->bpmp, &msg);
+}
+
+static int tegra_pcie_bpmp_set_pll_state(struct tegra_pcie_dw *pcie,
+					 bool enable)
+{
+	struct mrq_uphy_response resp;
+	struct tegra_bpmp_message msg;
+	struct mrq_uphy_request req;
+
+	memset(&req, 0, sizeof(req));
+	memset(&resp, 0, sizeof(resp));
+
+	if (enable) {
+		req.cmd = CMD_UPHY_PCIE_EP_CONTROLLER_PLL_INIT;
+		req.ep_ctrlr_pll_init.ep_controller = pcie->cid;
+	} else {
+		req.cmd = CMD_UPHY_PCIE_EP_CONTROLLER_PLL_OFF;
+		req.ep_ctrlr_pll_off.ep_controller = pcie->cid;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_UPHY;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+	msg.rx.data = &resp;
+	msg.rx.size = sizeof(resp);
+
+	return tegra_bpmp_transfer(pcie->bpmp, &msg);
+}
+
+static void tegra_pcie_downstream_dev_to_D0(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie_rp *pp = &pcie->pci.pp;
+	struct pci_bus *child, *root_bus = NULL;
+	struct pci_dev *pdev;
+
+	/*
+	 * link doesn't go into L2 state with some of the endpoints with Tegra
+	 * if they are not in D0 state. So, need to make sure that immediate
+	 * downstream devices are in D0 state before sending PME_TurnOff to put
+	 * link into L2 state.
+	 * This is as per PCI Express Base r4.0 v1.0 September 27-2017,
+	 * 5.2 Link State Power Management (Page #428).
+	 */
+
+	list_for_each_entry(child, &pp->bridge->bus->children, node) {
+		/* Bring downstream devices to D0 if they are not already in */
+		if (child->parent == pp->bridge->bus) {
+			root_bus = child;
+			break;
+		}
+	}
+
+	if (!root_bus) {
+		dev_err(pcie->dev, "Failed to find downstream devices\n");
+		return;
+	}
+
+	list_for_each_entry(pdev, &root_bus->devices, bus_list) {
+		if (PCI_SLOT(pdev->devfn) == 0) {
+			if (pci_set_power_state(pdev, PCI_D0))
+				dev_err(pcie->dev,
+					"Failed to transition %s to D0 state\n",
+					dev_name(&pdev->dev));
+		}
+	}
+}
+
+static int tegra_pcie_get_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+	pcie->slot_ctl_3v3 = devm_regulator_get_optional(pcie->dev, "vpcie3v3");
+	if (IS_ERR(pcie->slot_ctl_3v3)) {
+		if (PTR_ERR(pcie->slot_ctl_3v3) != -ENODEV)
+			return PTR_ERR(pcie->slot_ctl_3v3);
+
+		pcie->slot_ctl_3v3 = NULL;
+	}
+
+	pcie->slot_ctl_12v = devm_regulator_get_optional(pcie->dev, "vpcie12v");
+	if (IS_ERR(pcie->slot_ctl_12v)) {
+		if (PTR_ERR(pcie->slot_ctl_12v) != -ENODEV)
+			return PTR_ERR(pcie->slot_ctl_12v);
+
+		pcie->slot_ctl_12v = NULL;
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_enable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+	int ret;
+
+	if (pcie->slot_ctl_3v3) {
+		ret = regulator_enable(pcie->slot_ctl_3v3);
+		if (ret < 0) {
+			dev_err(pcie->dev,
+				"Failed to enable 3.3V slot supply: %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (pcie->slot_ctl_12v) {
+		ret = regulator_enable(pcie->slot_ctl_12v);
+		if (ret < 0) {
+			dev_err(pcie->dev,
+				"Failed to enable 12V slot supply: %d\n", ret);
+			goto fail_12v_enable;
+		}
+	}
+
+	/*
+	 * According to PCI Express Card Electromechanical Specification
+	 * Revision 1.1, Table-2.4, T_PVPERL (Power stable to PERST# inactive)
+	 * should be a minimum of 100ms.
+	 */
+	if (pcie->slot_ctl_3v3 || pcie->slot_ctl_12v)
+		msleep(100);
+
+	return 0;
+
+fail_12v_enable:
+	if (pcie->slot_ctl_3v3)
+		regulator_disable(pcie->slot_ctl_3v3);
+	return ret;
+}
+
+static void tegra_pcie_disable_slot_regulators(struct tegra_pcie_dw *pcie)
+{
+	if (pcie->slot_ctl_12v)
+		regulator_disable(pcie->slot_ctl_12v);
+	if (pcie->slot_ctl_3v3)
+		regulator_disable(pcie->slot_ctl_3v3);
+}
+
+static int tegra_pcie_config_controller(struct tegra_pcie_dw *pcie,
+					bool en_hw_hot_rst)
+{
+	int ret;
+	u32 val;
+
+	ret = tegra_pcie_bpmp_set_ctrl_state(pcie, true);
+	if (ret) {
+		dev_err(pcie->dev,
+			"Failed to enable controller %u: %d\n", pcie->cid, ret);
+		return ret;
+	}
+
+	if (pcie->enable_ext_refclk) {
+		ret = tegra_pcie_bpmp_set_pll_state(pcie, true);
+		if (ret) {
+			dev_err(pcie->dev, "Failed to init UPHY: %d\n", ret);
+			goto fail_pll_init;
+		}
+	}
+
+	ret = tegra_pcie_enable_slot_regulators(pcie);
+	if (ret < 0)
+		goto fail_slot_reg_en;
+
+	ret = regulator_enable(pcie->pex_ctl_supply);
+	if (ret < 0) {
+		dev_err(pcie->dev, "Failed to enable regulator: %d\n", ret);
+		goto fail_reg_en;
+	}
+
+	ret = clk_prepare_enable(pcie->core_clk);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to enable core clock: %d\n", ret);
+		goto fail_core_clk;
+	}
+
+	ret = reset_control_deassert(pcie->core_apb_rst);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to deassert core APB reset: %d\n",
+			ret);
+		goto fail_core_apb_rst;
+	}
+
+	if (en_hw_hot_rst || pcie->of_data->has_sbr_reset_fix) {
+		/* Enable HW_HOT_RST mode */
+		val = appl_readl(pcie, APPL_CTRL);
+		val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+			 APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+		val |= (APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST_LTSSM_EN <<
+			APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+		val |= APPL_CTRL_HW_HOT_RST_EN;
+		appl_writel(pcie, val, APPL_CTRL);
+	}
+
+	ret = tegra_pcie_enable_phy(pcie);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to enable PHY: %d\n", ret);
+		goto fail_phy;
+	}
+
+	/* Update CFG base address */
+	appl_writel(pcie, pcie->dbi_res->start & APPL_CFG_BASE_ADDR_MASK,
+		    APPL_CFG_BASE_ADDR);
+
+	/* Configure this core for RP mode operation */
+	appl_writel(pcie, APPL_DM_TYPE_RP, APPL_DM_TYPE);
+
+	appl_writel(pcie, 0x0, APPL_CFG_SLCG_OVERRIDE);
+
+	val = appl_readl(pcie, APPL_CTRL);
+	appl_writel(pcie, val | APPL_CTRL_SYS_PRE_DET_STATE, APPL_CTRL);
+
+	val = appl_readl(pcie, APPL_CFG_MISC);
+	val |= (APPL_CFG_MISC_ARCACHE_VAL << APPL_CFG_MISC_ARCACHE_SHIFT);
+	appl_writel(pcie, val, APPL_CFG_MISC);
+
+	if (pcie->enable_srns || pcie->enable_ext_refclk) {
+		/*
+		 * When Tegra PCIe RP is using external clock, it cannot supply
+		 * same clock to its downstream hierarchy. Hence, gate PCIe RP
+		 * REFCLK out pads when RP & EP are using separate clocks or RP
+		 * is using an external REFCLK.
+		 */
+		val = appl_readl(pcie, APPL_PINMUX);
+		val |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+		val &= ~APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+		appl_writel(pcie, val, APPL_PINMUX);
+	}
+
+	if (!pcie->supports_clkreq) {
+		val = appl_readl(pcie, APPL_PINMUX);
+		val |= APPL_PINMUX_CLKREQ_OVERRIDE_EN;
+		val &= ~APPL_PINMUX_CLKREQ_OVERRIDE;
+		appl_writel(pcie, val, APPL_PINMUX);
+	}
+
+	/* Update iATU_DMA base address */
+	appl_writel(pcie,
+		    pcie->atu_dma_res->start & APPL_CFG_IATU_DMA_BASE_ADDR_MASK,
+		    APPL_CFG_IATU_DMA_BASE_ADDR);
+
+	reset_control_deassert(pcie->core_rst);
+
+	return ret;
+
+fail_phy:
+	reset_control_assert(pcie->core_apb_rst);
+fail_core_apb_rst:
+	clk_disable_unprepare(pcie->core_clk);
+fail_core_clk:
+	regulator_disable(pcie->pex_ctl_supply);
+fail_reg_en:
+	tegra_pcie_disable_slot_regulators(pcie);
+fail_slot_reg_en:
+	if (pcie->enable_ext_refclk)
+		tegra_pcie_bpmp_set_pll_state(pcie, false);
+fail_pll_init:
+	tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+
+	return ret;
+}
+
+static void tegra_pcie_unconfig_controller(struct tegra_pcie_dw *pcie)
+{
+	int ret;
+
+	ret = reset_control_assert(pcie->core_rst);
+	if (ret)
+		dev_err(pcie->dev, "Failed to assert \"core\" reset: %d\n", ret);
+
+	tegra_pcie_disable_phy(pcie);
+
+	ret = reset_control_assert(pcie->core_apb_rst);
+	if (ret)
+		dev_err(pcie->dev, "Failed to assert APB reset: %d\n", ret);
+
+	clk_disable_unprepare(pcie->core_clk);
+
+	ret = regulator_disable(pcie->pex_ctl_supply);
+	if (ret)
+		dev_err(pcie->dev, "Failed to disable regulator: %d\n", ret);
+
+	tegra_pcie_disable_slot_regulators(pcie);
+
+	if (pcie->enable_ext_refclk) {
+		ret = tegra_pcie_bpmp_set_pll_state(pcie, false);
+		if (ret)
+			dev_err(pcie->dev, "Failed to deinit UPHY: %d\n", ret);
+	}
+
+	ret = tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+	if (ret)
+		dev_err(pcie->dev, "Failed to disable controller %d: %d\n",
+			pcie->cid, ret);
+}
+
+static int tegra_pcie_init_controller(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+	int ret;
+
+	ret = tegra_pcie_config_controller(pcie, false);
+	if (ret < 0)
+		return ret;
+
+	pp->ops = &tegra_pcie_dw_host_ops;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret < 0) {
+		dev_err(pcie->dev, "Failed to add PCIe port: %d\n", ret);
+		goto fail_host_init;
+	}
+
+	return 0;
+
+fail_host_init:
+	tegra_pcie_unconfig_controller(pcie);
+	return ret;
+}
+
+static int tegra_pcie_try_link_l2(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+
+	if (!tegra_pcie_dw_link_up(&pcie->pci))
+		return 0;
+
+	val = appl_readl(pcie, APPL_RADM_STATUS);
+	val |= APPL_PM_XMT_TURNOFF_STATE;
+	appl_writel(pcie, val, APPL_RADM_STATUS);
+
+	return readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG, val,
+				 val & APPL_DEBUG_PM_LINKST_IN_L2_LAT,
+				 1, PME_ACK_TIMEOUT);
+}
+
+static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
+{
+	u32 data;
+	int err;
+
+	if (!tegra_pcie_dw_link_up(&pcie->pci)) {
+		dev_dbg(pcie->dev, "PCIe link is not up...!\n");
+		return;
+	}
+
+	/*
+	 * PCIe controller exits from L2 only if reset is applied, so
+	 * controller doesn't handle interrupts. But in cases where
+	 * L2 entry fails, PERST# is asserted which can trigger surprise
+	 * link down AER. However this function call happens in
+	 * suspend_noirq(), so AER interrupt will not be processed.
+	 * Disable all interrupts to avoid such a scenario.
+	 */
+	appl_writel(pcie, 0x0, APPL_INTR_EN_L0_0);
+
+	if (tegra_pcie_try_link_l2(pcie)) {
+		dev_info(pcie->dev, "Link didn't transition to L2 state\n");
+		/*
+		 * TX lane clock freq will reset to Gen1 only if link is in L2
+		 * or detect state.
+		 * So apply pex_rst to end point to force RP to go into detect
+		 * state
+		 */
+		data = appl_readl(pcie, APPL_PINMUX);
+		data &= ~APPL_PINMUX_PEX_RST;
+		appl_writel(pcie, data, APPL_PINMUX);
+
+		/*
+		 * Some cards do not go to detect state even after de-asserting
+		 * PERST#. So, de-assert LTSSM to bring link to detect state.
+		 */
+		data = readl(pcie->appl_base + APPL_CTRL);
+		data &= ~APPL_CTRL_LTSSM_EN;
+		writel(data, pcie->appl_base + APPL_CTRL);
+
+		err = readl_poll_timeout_atomic(pcie->appl_base + APPL_DEBUG,
+						data,
+						((data &
+						APPL_DEBUG_LTSSM_STATE_MASK) >>
+						APPL_DEBUG_LTSSM_STATE_SHIFT) ==
+						LTSSM_STATE_PRE_DETECT,
+						1, LTSSM_TIMEOUT);
+		if (err)
+			dev_info(pcie->dev, "Link didn't go to detect state\n");
+	}
+	/*
+	 * DBI registers may not be accessible after this as PLL-E would be
+	 * down depending on how CLKREQ is pulled by end point
+	 */
+	data = appl_readl(pcie, APPL_PINMUX);
+	data |= (APPL_PINMUX_CLKREQ_OVERRIDE_EN | APPL_PINMUX_CLKREQ_OVERRIDE);
+	/* Cut REFCLK to slot */
+	data |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+	data &= ~APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+	appl_writel(pcie, data, APPL_PINMUX);
+}
+
+static void tegra_pcie_deinit_controller(struct tegra_pcie_dw *pcie)
+{
+	tegra_pcie_downstream_dev_to_D0(pcie);
+	dw_pcie_host_deinit(&pcie->pci.pp);
+	tegra_pcie_dw_pme_turnoff(pcie);
+	tegra_pcie_unconfig_controller(pcie);
+}
+
+static int tegra_pcie_config_rp(struct tegra_pcie_dw *pcie)
+{
+	struct device *dev = pcie->dev;
+	char *name;
+	int ret;
+
+	pm_runtime_enable(dev);
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to get runtime sync for PCIe dev: %d\n",
+			ret);
+		goto fail_pm_get_sync;
+	}
+
+	ret = pinctrl_pm_select_default_state(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to configure sideband pins: %d\n", ret);
+		goto fail_pm_get_sync;
+	}
+
+	ret = tegra_pcie_init_controller(pcie);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize controller: %d\n", ret);
+		goto fail_pm_get_sync;
+	}
+
+	pcie->link_state = tegra_pcie_dw_link_up(&pcie->pci);
+	if (!pcie->link_state) {
+		ret = -ENOMEDIUM;
+		goto fail_host_init;
+	}
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node);
+	if (!name) {
+		ret = -ENOMEM;
+		goto fail_host_init;
+	}
+
+	pcie->debugfs = debugfs_create_dir(name, NULL);
+	init_debugfs(pcie);
+
+	return ret;
+
+fail_host_init:
+	tegra_pcie_deinit_controller(pcie);
+fail_pm_get_sync:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+	return ret;
+}
+
+static void pex_ep_event_pex_rst_assert(struct tegra_pcie_dw *pcie)
+{
+	u32 val;
+	int ret;
+
+	if (pcie->ep_state == EP_STATE_DISABLED)
+		return;
+
+	/* Disable LTSSM */
+	val = appl_readl(pcie, APPL_CTRL);
+	val &= ~APPL_CTRL_LTSSM_EN;
+	appl_writel(pcie, val, APPL_CTRL);
+
+	ret = readl_poll_timeout(pcie->appl_base + APPL_DEBUG, val,
+				 ((val & APPL_DEBUG_LTSSM_STATE_MASK) >>
+				 APPL_DEBUG_LTSSM_STATE_SHIFT) ==
+				 LTSSM_STATE_PRE_DETECT,
+				 1, LTSSM_TIMEOUT);
+	if (ret)
+		dev_err(pcie->dev, "Failed to go Detect state: %d\n", ret);
+
+	reset_control_assert(pcie->core_rst);
+
+	tegra_pcie_disable_phy(pcie);
+
+	reset_control_assert(pcie->core_apb_rst);
+
+	clk_disable_unprepare(pcie->core_clk);
+
+	pm_runtime_put_sync(pcie->dev);
+
+	if (pcie->enable_ext_refclk) {
+		ret = tegra_pcie_bpmp_set_pll_state(pcie, false);
+		if (ret)
+			dev_err(pcie->dev, "Failed to turn off UPHY: %d\n",
+				ret);
+	}
+
+	ret = tegra_pcie_bpmp_set_pll_state(pcie, false);
+	if (ret)
+		dev_err(pcie->dev, "Failed to turn off UPHY: %d\n", ret);
+
+	pcie->ep_state = EP_STATE_DISABLED;
+	dev_dbg(pcie->dev, "Uninitialization of endpoint is completed\n");
+}
+
+static void pex_ep_event_pex_rst_deassert(struct tegra_pcie_dw *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct dw_pcie_ep *ep = &pci->ep;
+	struct device *dev = pcie->dev;
+	u32 val;
+	int ret;
+	u16 val_16;
+
+	if (pcie->ep_state == EP_STATE_ENABLED)
+		return;
+
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to get runtime sync for PCIe dev: %d\n",
+			ret);
+		return;
+	}
+
+	ret = tegra_pcie_bpmp_set_ctrl_state(pcie, true);
+	if (ret) {
+		dev_err(pcie->dev, "Failed to enable controller %u: %d\n",
+			pcie->cid, ret);
+		goto fail_set_ctrl_state;
+	}
+
+	if (pcie->enable_ext_refclk) {
+		ret = tegra_pcie_bpmp_set_pll_state(pcie, true);
+		if (ret) {
+			dev_err(dev, "Failed to init UPHY for PCIe EP: %d\n",
+				ret);
+			goto fail_pll_init;
+		}
+	}
+
+	ret = clk_prepare_enable(pcie->core_clk);
+	if (ret) {
+		dev_err(dev, "Failed to enable core clock: %d\n", ret);
+		goto fail_core_clk_enable;
+	}
+
+	ret = reset_control_deassert(pcie->core_apb_rst);
+	if (ret) {
+		dev_err(dev, "Failed to deassert core APB reset: %d\n", ret);
+		goto fail_core_apb_rst;
+	}
+
+	ret = tegra_pcie_enable_phy(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to enable PHY: %d\n", ret);
+		goto fail_phy;
+	}
+
+	/* Clear any stale interrupt statuses */
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_1);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_2);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_3);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_6);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_7);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_8_0);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_9);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_10);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_11);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_13);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_14);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_15);
+	appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_17);
+
+	/* configure this core for EP mode operation */
+	val = appl_readl(pcie, APPL_DM_TYPE);
+	val &= ~APPL_DM_TYPE_MASK;
+	val |= APPL_DM_TYPE_EP;
+	appl_writel(pcie, val, APPL_DM_TYPE);
+
+	appl_writel(pcie, 0x0, APPL_CFG_SLCG_OVERRIDE);
+
+	val = appl_readl(pcie, APPL_CTRL);
+	val |= APPL_CTRL_SYS_PRE_DET_STATE;
+	val |= APPL_CTRL_HW_HOT_RST_EN;
+	appl_writel(pcie, val, APPL_CTRL);
+
+	val = appl_readl(pcie, APPL_CFG_MISC);
+	val |= APPL_CFG_MISC_SLV_EP_MODE;
+	val |= (APPL_CFG_MISC_ARCACHE_VAL << APPL_CFG_MISC_ARCACHE_SHIFT);
+	appl_writel(pcie, val, APPL_CFG_MISC);
+
+	val = appl_readl(pcie, APPL_PINMUX);
+	val |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE_EN;
+	val |= APPL_PINMUX_CLK_OUTPUT_IN_OVERRIDE;
+	appl_writel(pcie, val, APPL_PINMUX);
+
+	appl_writel(pcie, pcie->dbi_res->start & APPL_CFG_BASE_ADDR_MASK,
+		    APPL_CFG_BASE_ADDR);
+
+	appl_writel(pcie, pcie->atu_dma_res->start &
+		    APPL_CFG_IATU_DMA_BASE_ADDR_MASK,
+		    APPL_CFG_IATU_DMA_BASE_ADDR);
+
+	val = appl_readl(pcie, APPL_INTR_EN_L0_0);
+	val |= APPL_INTR_EN_L0_0_SYS_INTR_EN;
+	val |= APPL_INTR_EN_L0_0_LINK_STATE_INT_EN;
+	val |= APPL_INTR_EN_L0_0_PCI_CMD_EN_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L0_0);
+
+	val = appl_readl(pcie, APPL_INTR_EN_L1_0_0);
+	val |= APPL_INTR_EN_L1_0_0_HOT_RESET_DONE_INT_EN;
+	val |= APPL_INTR_EN_L1_0_0_RDLH_LINK_UP_INT_EN;
+	appl_writel(pcie, val, APPL_INTR_EN_L1_0_0);
+
+	reset_control_deassert(pcie->core_rst);
+
+	if (pcie->update_fc_fixup) {
+		val = dw_pcie_readl_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF);
+		val |= 0x1 << CFG_TIMER_CTRL_ACK_NAK_SHIFT;
+		dw_pcie_writel_dbi(pci, CFG_TIMER_CTRL_MAX_FUNC_NUM_OFF, val);
+	}
+
+	config_gen3_gen4_eq_presets(pcie);
+
+	init_host_aspm(pcie);
+
+	/* Disable ASPM-L1SS advertisement if there is no CLKREQ routing */
+	if (!pcie->supports_clkreq) {
+		disable_aspm_l11(pcie);
+		disable_aspm_l12(pcie);
+	}
+
+	if (!pcie->of_data->has_l1ss_exit_fix) {
+		val = dw_pcie_readl_dbi(pci, GEN3_RELATED_OFF);
+		val &= ~GEN3_RELATED_OFF_GEN3_ZRXDC_NONCOMPL;
+		dw_pcie_writel_dbi(pci, GEN3_RELATED_OFF, val);
+	}
+
+	pcie->pcie_cap_base = dw_pcie_find_capability(&pcie->pci,
+						      PCI_CAP_ID_EXP);
+
+	/* Clear Slot Clock Configuration bit if SRNS configuration */
+	if (pcie->enable_srns) {
+		val_16 = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base +
+					   PCI_EXP_LNKSTA);
+		val_16 &= ~PCI_EXP_LNKSTA_SLC;
+		dw_pcie_writew_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA,
+				   val_16);
+	}
+
+	clk_set_rate(pcie->core_clk, GEN4_CORE_CLK_FREQ);
+
+	val = (ep->msi_mem_phys & MSIX_ADDR_MATCH_LOW_OFF_MASK);
+	val |= MSIX_ADDR_MATCH_LOW_OFF_EN;
+	dw_pcie_writel_dbi(pci, MSIX_ADDR_MATCH_LOW_OFF, val);
+	val = (upper_32_bits(ep->msi_mem_phys) & MSIX_ADDR_MATCH_HIGH_OFF_MASK);
+	dw_pcie_writel_dbi(pci, MSIX_ADDR_MATCH_HIGH_OFF, val);
+
+	ret = dw_pcie_ep_init_complete(ep);
+	if (ret) {
+		dev_err(dev, "Failed to complete initialization: %d\n", ret);
+		goto fail_init_complete;
+	}
+
+	dw_pcie_ep_init_notify(ep);
+
+	/* Program the private control to allow sending LTR upstream */
+	if (pcie->of_data->has_ltr_req_fix) {
+		val = appl_readl(pcie, APPL_LTR_MSG_2);
+		val |= APPL_LTR_MSG_2_LTR_MSG_REQ_STATE;
+		appl_writel(pcie, val, APPL_LTR_MSG_2);
+	}
+
+	/* Enable LTSSM */
+	val = appl_readl(pcie, APPL_CTRL);
+	val |= APPL_CTRL_LTSSM_EN;
+	appl_writel(pcie, val, APPL_CTRL);
+
+	pcie->ep_state = EP_STATE_ENABLED;
+	dev_dbg(dev, "Initialization of endpoint is completed\n");
+
+	return;
+
+fail_init_complete:
+	reset_control_assert(pcie->core_rst);
+	tegra_pcie_disable_phy(pcie);
+fail_phy:
+	reset_control_assert(pcie->core_apb_rst);
+fail_core_apb_rst:
+	clk_disable_unprepare(pcie->core_clk);
+fail_core_clk_enable:
+	tegra_pcie_bpmp_set_pll_state(pcie, false);
+fail_pll_init:
+	tegra_pcie_bpmp_set_ctrl_state(pcie, false);
+fail_set_ctrl_state:
+	pm_runtime_put_sync(dev);
+}
+
+static irqreturn_t tegra_pcie_ep_pex_rst_irq(int irq, void *arg)
+{
+	struct tegra_pcie_dw *pcie = arg;
+
+	if (gpiod_get_value(pcie->pex_rst_gpiod))
+		pex_ep_event_pex_rst_assert(pcie);
+	else
+		pex_ep_event_pex_rst_deassert(pcie);
+
+	return IRQ_HANDLED;
+}
+
+static int tegra_pcie_ep_raise_legacy_irq(struct tegra_pcie_dw *pcie, u16 irq)
+{
+	/* Tegra194 supports only INTA */
+	if (irq > 1)
+		return -EINVAL;
+
+	appl_writel(pcie, 1, APPL_LEGACY_INTX);
+	usleep_range(1000, 2000);
+	appl_writel(pcie, 0, APPL_LEGACY_INTX);
+	return 0;
+}
+
+static int tegra_pcie_ep_raise_msi_irq(struct tegra_pcie_dw *pcie, u16 irq)
+{
+	if (unlikely(irq > 31))
+		return -EINVAL;
+
+	appl_writel(pcie, BIT(irq), APPL_MSI_CTRL_1);
+
+	return 0;
+}
+
+static int tegra_pcie_ep_raise_msix_irq(struct tegra_pcie_dw *pcie, u16 irq)
+{
+	struct dw_pcie_ep *ep = &pcie->pci.ep;
+
+	writel(irq, ep->msi_mem);
+
+	return 0;
+}
+
+static int tegra_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				   enum pci_epc_irq_type type,
+				   u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct tegra_pcie_dw *pcie = to_tegra_pcie(pci);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return tegra_pcie_ep_raise_legacy_irq(pcie, interrupt_num);
+
+	case PCI_EPC_IRQ_MSI:
+		return tegra_pcie_ep_raise_msi_irq(pcie, interrupt_num);
+
+	case PCI_EPC_IRQ_MSIX:
+		return tegra_pcie_ep_raise_msix_irq(pcie, interrupt_num);
+
+	default:
+		dev_err(pci->dev, "Unknown IRQ type\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features tegra_pcie_epc_features = {
+	.linkup_notifier = true,
+	.core_init_notifier = true,
+	.msi_capable = false,
+	.msix_capable = false,
+	.reserved_bar = 1 << BAR_2 | 1 << BAR_3 | 1 << BAR_4 | 1 << BAR_5,
+	.bar_fixed_64bit = 1 << BAR_0,
+	.bar_fixed_size[0] = SZ_1M,
+};
+
+static const struct pci_epc_features*
+tegra_pcie_ep_get_features(struct dw_pcie_ep *ep)
+{
+	return &tegra_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
+	.raise_irq = tegra_pcie_ep_raise_irq,
+	.get_features = tegra_pcie_ep_get_features,
+};
+
+static int tegra_pcie_config_ep(struct tegra_pcie_dw *pcie,
+				struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pcie->dev;
+	struct dw_pcie_ep *ep;
+	char *name;
+	int ret;
+
+	ep = &pci->ep;
+	ep->ops = &pcie_ep_ops;
+
+	ep->page_size = SZ_64K;
+
+	ret = gpiod_set_debounce(pcie->pex_rst_gpiod, PERST_DEBOUNCE_TIME);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set PERST GPIO debounce time: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = gpiod_to_irq(pcie->pex_rst_gpiod);
+	if (ret < 0) {
+		dev_err(dev, "Failed to get IRQ for PERST GPIO: %d\n", ret);
+		return ret;
+	}
+	pcie->pex_rst_irq = (unsigned int)ret;
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "tegra_pcie_%u_pex_rst_irq",
+			      pcie->cid);
+	if (!name) {
+		dev_err(dev, "Failed to create PERST IRQ string\n");
+		return -ENOMEM;
+	}
+
+	irq_set_status_flags(pcie->pex_rst_irq, IRQ_NOAUTOEN);
+
+	pcie->ep_state = EP_STATE_DISABLED;
+
+	ret = devm_request_threaded_irq(dev, pcie->pex_rst_irq, NULL,
+					tegra_pcie_ep_pex_rst_irq,
+					IRQF_TRIGGER_RISING |
+					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					name, (void *)pcie);
+	if (ret < 0) {
+		dev_err(dev, "Failed to request IRQ for PERST: %d\n", ret);
+		return ret;
+	}
+
+	pm_runtime_enable(dev);
+
+	ret = dw_pcie_ep_init(ep);
+	if (ret) {
+		dev_err(dev, "Failed to initialize DWC Endpoint subsystem: %d\n",
+			ret);
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_dw_probe(struct platform_device *pdev)
+{
+	const struct tegra_pcie_dw_of_data *data;
+	struct device *dev = &pdev->dev;
+	struct resource *atu_dma_res;
+	struct tegra_pcie_dw *pcie;
+	struct dw_pcie_rp *pp;
+	struct dw_pcie *pci;
+	struct phy **phys;
+	char *name;
+	int ret;
+	u32 i;
+
+	data = of_device_get_match_data(dev);
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = &pdev->dev;
+	pci->ops = &tegra_dw_pcie_ops;
+	pcie->dev = &pdev->dev;
+	pcie->of_data = (struct tegra_pcie_dw_of_data *)data;
+	pci->n_fts[0] = pcie->of_data->n_fts[0];
+	pci->n_fts[1] = pcie->of_data->n_fts[1];
+	pp = &pci->pp;
+	pp->num_vectors = MAX_MSI_IRQS;
+
+	ret = tegra_pcie_dw_parse_dt(pcie);
+	if (ret < 0) {
+		const char *level = KERN_ERR;
+
+		if (ret == -EPROBE_DEFER)
+			level = KERN_DEBUG;
+
+		dev_printk(level, dev,
+			   dev_fmt("Failed to parse device tree: %d\n"),
+			   ret);
+		return ret;
+	}
+
+	ret = tegra_pcie_get_slot_regulators(pcie);
+	if (ret < 0) {
+		const char *level = KERN_ERR;
+
+		if (ret == -EPROBE_DEFER)
+			level = KERN_DEBUG;
+
+		dev_printk(level, dev,
+			   dev_fmt("Failed to get slot regulators: %d\n"),
+			   ret);
+		return ret;
+	}
+
+	if (pcie->pex_refclk_sel_gpiod)
+		gpiod_set_value(pcie->pex_refclk_sel_gpiod, 1);
+
+	pcie->pex_ctl_supply = devm_regulator_get(dev, "vddio-pex-ctl");
+	if (IS_ERR(pcie->pex_ctl_supply)) {
+		ret = PTR_ERR(pcie->pex_ctl_supply);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get regulator: %ld\n",
+				PTR_ERR(pcie->pex_ctl_supply));
+		return ret;
+	}
+
+	pcie->core_clk = devm_clk_get(dev, "core");
+	if (IS_ERR(pcie->core_clk)) {
+		dev_err(dev, "Failed to get core clock: %ld\n",
+			PTR_ERR(pcie->core_clk));
+		return PTR_ERR(pcie->core_clk);
+	}
+
+	pcie->appl_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						      "appl");
+	if (!pcie->appl_res) {
+		dev_err(dev, "Failed to find \"appl\" region\n");
+		return -ENODEV;
+	}
+
+	pcie->appl_base = devm_ioremap_resource(dev, pcie->appl_res);
+	if (IS_ERR(pcie->appl_base))
+		return PTR_ERR(pcie->appl_base);
+
+	pcie->core_apb_rst = devm_reset_control_get(dev, "apb");
+	if (IS_ERR(pcie->core_apb_rst)) {
+		dev_err(dev, "Failed to get APB reset: %ld\n",
+			PTR_ERR(pcie->core_apb_rst));
+		return PTR_ERR(pcie->core_apb_rst);
+	}
+
+	phys = devm_kcalloc(dev, pcie->phy_count, sizeof(*phys), GFP_KERNEL);
+	if (!phys)
+		return -ENOMEM;
+
+	for (i = 0; i < pcie->phy_count; i++) {
+		name = kasprintf(GFP_KERNEL, "p2u-%u", i);
+		if (!name) {
+			dev_err(dev, "Failed to create P2U string\n");
+			return -ENOMEM;
+		}
+		phys[i] = devm_phy_get(dev, name);
+		kfree(name);
+		if (IS_ERR(phys[i])) {
+			ret = PTR_ERR(phys[i]);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to get PHY: %d\n", ret);
+			return ret;
+		}
+	}
+
+	pcie->phys = phys;
+
+	atu_dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "atu_dma");
+	if (!atu_dma_res) {
+		dev_err(dev, "Failed to find \"atu_dma\" region\n");
+		return -ENODEV;
+	}
+	pcie->atu_dma_res = atu_dma_res;
+
+	pci->atu_size = resource_size(atu_dma_res);
+	pci->atu_base = devm_ioremap_resource(dev, atu_dma_res);
+	if (IS_ERR(pci->atu_base))
+		return PTR_ERR(pci->atu_base);
+
+	pcie->core_rst = devm_reset_control_get(dev, "core");
+	if (IS_ERR(pcie->core_rst)) {
+		dev_err(dev, "Failed to get core reset: %ld\n",
+			PTR_ERR(pcie->core_rst));
+		return PTR_ERR(pcie->core_rst);
+	}
+
+	pp->irq = platform_get_irq_byname(pdev, "intr");
+	if (pp->irq < 0)
+		return pp->irq;
+
+	pcie->bpmp = tegra_bpmp_get(dev);
+	if (IS_ERR(pcie->bpmp))
+		return PTR_ERR(pcie->bpmp);
+
+	platform_set_drvdata(pdev, pcie);
+
+	pcie->icc_path = devm_of_icc_get(&pdev->dev, "write");
+	ret = PTR_ERR_OR_ZERO(pcie->icc_path);
+	if (ret) {
+		tegra_bpmp_put(pcie->bpmp);
+		dev_err_probe(&pdev->dev, ret, "failed to get write interconnect\n");
+		return ret;
+	}
+
+	switch (pcie->of_data->mode) {
+	case DW_PCIE_RC_TYPE:
+		ret = devm_request_irq(dev, pp->irq, tegra_pcie_rp_irq_handler,
+				       IRQF_SHARED, "tegra-pcie-intr", pcie);
+		if (ret) {
+			dev_err(dev, "Failed to request IRQ %d: %d\n", pp->irq,
+				ret);
+			goto fail;
+		}
+
+		ret = tegra_pcie_config_rp(pcie);
+		if (ret && ret != -ENOMEDIUM)
+			goto fail;
+		else
+			return 0;
+		break;
+
+	case DW_PCIE_EP_TYPE:
+		ret = devm_request_threaded_irq(dev, pp->irq,
+						tegra_pcie_ep_hard_irq,
+						tegra_pcie_ep_irq_thread,
+						IRQF_SHARED | IRQF_ONESHOT,
+						"tegra-pcie-ep-intr", pcie);
+		if (ret) {
+			dev_err(dev, "Failed to request IRQ %d: %d\n", pp->irq,
+				ret);
+			goto fail;
+		}
+
+		ret = tegra_pcie_config_ep(pcie, pdev);
+		if (ret < 0)
+			goto fail;
+		break;
+
+	default:
+		dev_err(dev, "Invalid PCIe device type %d\n",
+			pcie->of_data->mode);
+	}
+
+fail:
+	tegra_bpmp_put(pcie->bpmp);
+	return ret;
+}
+
+static void tegra_pcie_dw_remove(struct platform_device *pdev)
+{
+	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+	if (pcie->of_data->mode == DW_PCIE_RC_TYPE) {
+		if (!pcie->link_state)
+			return;
+
+		debugfs_remove_recursive(pcie->debugfs);
+		tegra_pcie_deinit_controller(pcie);
+		pm_runtime_put_sync(pcie->dev);
+	} else {
+		disable_irq(pcie->pex_rst_irq);
+		pex_ep_event_pex_rst_assert(pcie);
+	}
+
+	pm_runtime_disable(pcie->dev);
+	tegra_bpmp_put(pcie->bpmp);
+	if (pcie->pex_refclk_sel_gpiod)
+		gpiod_set_value(pcie->pex_refclk_sel_gpiod, 0);
+}
+
+static int tegra_pcie_dw_suspend_late(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	u32 val;
+
+	if (pcie->of_data->mode == DW_PCIE_EP_TYPE) {
+		dev_err(dev, "Failed to Suspend as Tegra PCIe is in EP mode\n");
+		return -EPERM;
+	}
+
+	if (!pcie->link_state)
+		return 0;
+
+	/* Enable HW_HOT_RST mode */
+	if (!pcie->of_data->has_sbr_reset_fix) {
+		val = appl_readl(pcie, APPL_CTRL);
+		val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+			 APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+		val |= APPL_CTRL_HW_HOT_RST_EN;
+		appl_writel(pcie, val, APPL_CTRL);
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_dw_suspend_noirq(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+
+	if (!pcie->link_state)
+		return 0;
+
+	tegra_pcie_downstream_dev_to_D0(pcie);
+	tegra_pcie_dw_pme_turnoff(pcie);
+	tegra_pcie_unconfig_controller(pcie);
+
+	return 0;
+}
+
+static int tegra_pcie_dw_resume_noirq(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	int ret;
+
+	if (!pcie->link_state)
+		return 0;
+
+	ret = tegra_pcie_config_controller(pcie, true);
+	if (ret < 0)
+		return ret;
+
+	ret = tegra_pcie_dw_host_init(&pcie->pci.pp);
+	if (ret < 0) {
+		dev_err(dev, "Failed to init host: %d\n", ret);
+		goto fail_host_init;
+	}
+
+	dw_pcie_setup_rc(&pcie->pci.pp);
+
+	ret = tegra_pcie_dw_start_link(&pcie->pci);
+	if (ret < 0)
+		goto fail_host_init;
+
+	return 0;
+
+fail_host_init:
+	tegra_pcie_unconfig_controller(pcie);
+	return ret;
+}
+
+static int tegra_pcie_dw_resume_early(struct device *dev)
+{
+	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
+	u32 val;
+
+	if (pcie->of_data->mode == DW_PCIE_EP_TYPE) {
+		dev_err(dev, "Suspend is not supported in EP mode");
+		return -ENOTSUPP;
+	}
+
+	if (!pcie->link_state)
+		return 0;
+
+	/* Disable HW_HOT_RST mode */
+	if (!pcie->of_data->has_sbr_reset_fix) {
+		val = appl_readl(pcie, APPL_CTRL);
+		val &= ~(APPL_CTRL_HW_HOT_RST_MODE_MASK <<
+			 APPL_CTRL_HW_HOT_RST_MODE_SHIFT);
+		val |= APPL_CTRL_HW_HOT_RST_MODE_IMDT_RST <<
+		       APPL_CTRL_HW_HOT_RST_MODE_SHIFT;
+		val &= ~APPL_CTRL_HW_HOT_RST_EN;
+		appl_writel(pcie, val, APPL_CTRL);
+	}
+
+	return 0;
+}
+
+static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
+{
+	struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
+
+	if (pcie->of_data->mode == DW_PCIE_RC_TYPE) {
+		if (!pcie->link_state)
+			return;
+
+		debugfs_remove_recursive(pcie->debugfs);
+		tegra_pcie_downstream_dev_to_D0(pcie);
+
+		disable_irq(pcie->pci.pp.irq);
+		if (IS_ENABLED(CONFIG_PCI_MSI))
+			disable_irq(pcie->pci.pp.msi_irq[0]);
+
+		tegra_pcie_dw_pme_turnoff(pcie);
+		tegra_pcie_unconfig_controller(pcie);
+		pm_runtime_put_sync(pcie->dev);
+	} else {
+		disable_irq(pcie->pex_rst_irq);
+		pex_ep_event_pex_rst_assert(pcie);
+	}
+}
+
+static const struct tegra_pcie_dw_of_data tegra194_pcie_dw_rc_of_data = {
+	.version = TEGRA194_DWC_IP_VER,
+	.mode = DW_PCIE_RC_TYPE,
+	.cdm_chk_int_en_bit = BIT(19),
+	/* Gen4 - 5, 6, 8 and 9 presets enabled */
+	.gen4_preset_vec = 0x360,
+	.n_fts = { 52, 52 },
+};
+
+static const struct tegra_pcie_dw_of_data tegra194_pcie_dw_ep_of_data = {
+	.version = TEGRA194_DWC_IP_VER,
+	.mode = DW_PCIE_EP_TYPE,
+	.cdm_chk_int_en_bit = BIT(19),
+	/* Gen4 - 5, 6, 8 and 9 presets enabled */
+	.gen4_preset_vec = 0x360,
+	.n_fts = { 52, 52 },
+};
+
+static const struct tegra_pcie_dw_of_data tegra234_pcie_dw_rc_of_data = {
+	.version = TEGRA234_DWC_IP_VER,
+	.mode = DW_PCIE_RC_TYPE,
+	.has_msix_doorbell_access_fix = true,
+	.has_sbr_reset_fix = true,
+	.has_l1ss_exit_fix = true,
+	.cdm_chk_int_en_bit = BIT(18),
+	/* Gen4 - 6, 8 and 9 presets enabled */
+	.gen4_preset_vec = 0x340,
+	.n_fts = { 52, 80 },
+};
+
+static const struct tegra_pcie_dw_of_data tegra234_pcie_dw_ep_of_data = {
+	.version = TEGRA234_DWC_IP_VER,
+	.mode = DW_PCIE_EP_TYPE,
+	.has_l1ss_exit_fix = true,
+	.has_ltr_req_fix = true,
+	.cdm_chk_int_en_bit = BIT(18),
+	/* Gen4 - 6, 8 and 9 presets enabled */
+	.gen4_preset_vec = 0x340,
+	.n_fts = { 52, 80 },
+};
+
+static const struct of_device_id tegra_pcie_dw_of_match[] = {
+	{
+		.compatible = "nvidia,tegra194-pcie",
+		.data = &tegra194_pcie_dw_rc_of_data,
+	},
+	{
+		.compatible = "nvidia,tegra194-pcie-ep",
+		.data = &tegra194_pcie_dw_ep_of_data,
+	},
+	{
+		.compatible = "nvidia,tegra234-pcie",
+		.data = &tegra234_pcie_dw_rc_of_data,
+	},
+	{
+		.compatible = "nvidia,tegra234-pcie-ep",
+		.data = &tegra234_pcie_dw_ep_of_data,
+	},
+	{}
+};
+
+static const struct dev_pm_ops tegra_pcie_dw_pm_ops = {
+	.suspend_late = tegra_pcie_dw_suspend_late,
+	.suspend_noirq = tegra_pcie_dw_suspend_noirq,
+	.resume_noirq = tegra_pcie_dw_resume_noirq,
+	.resume_early = tegra_pcie_dw_resume_early,
+};
+
+static struct platform_driver tegra_pcie_dw_driver = {
+	.probe = tegra_pcie_dw_probe,
+	.remove_new = tegra_pcie_dw_remove,
+	.shutdown = tegra_pcie_dw_shutdown,
+	.driver = {
+		.name	= "tegra194-pcie",
+		.pm = &tegra_pcie_dw_pm_ops,
+		.of_match_table = tegra_pcie_dw_of_match,
+	},
+};
+module_platform_driver(tegra_pcie_dw_driver);
+
+MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
+
+MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/pcie-uniphier-ep.c b/drivers/pci/controller/dwc/pcie-uniphier-ep.c
new file mode 100644
index 000000000..cba3c88fc
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-uniphier-ep.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe endpoint controller driver for UniPhier SoCs
+ * Copyright 2018 Socionext Inc.
+ * Author: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+/* Link Glue registers */
+#define PCL_RSTCTRL0			0x0010
+#define PCL_RSTCTRL_AXI_REG		BIT(3)
+#define PCL_RSTCTRL_AXI_SLAVE		BIT(2)
+#define PCL_RSTCTRL_AXI_MASTER		BIT(1)
+#define PCL_RSTCTRL_PIPE3		BIT(0)
+
+#define PCL_RSTCTRL1			0x0020
+#define PCL_RSTCTRL_PERST		BIT(0)
+
+#define PCL_RSTCTRL2			0x0024
+#define PCL_RSTCTRL_PHY_RESET		BIT(0)
+
+#define PCL_PINCTRL0			0x002c
+#define PCL_PERST_PLDN_REGEN		BIT(12)
+#define PCL_PERST_NOE_REGEN		BIT(11)
+#define PCL_PERST_OUT_REGEN		BIT(8)
+#define PCL_PERST_PLDN_REGVAL		BIT(4)
+#define PCL_PERST_NOE_REGVAL		BIT(3)
+#define PCL_PERST_OUT_REGVAL		BIT(0)
+
+#define PCL_PIPEMON			0x0044
+#define PCL_PCLK_ALIVE			BIT(15)
+
+#define PCL_MODE			0x8000
+#define PCL_MODE_REGEN			BIT(8)
+#define PCL_MODE_REGVAL			BIT(0)
+
+#define PCL_APP_CLK_CTRL		0x8004
+#define PCL_APP_CLK_REQ			BIT(0)
+
+#define PCL_APP_READY_CTRL		0x8008
+#define PCL_APP_LTSSM_ENABLE		BIT(0)
+
+#define PCL_APP_MSI0			0x8040
+#define PCL_APP_VEN_MSI_TC_MASK		GENMASK(10, 8)
+#define PCL_APP_VEN_MSI_VECTOR_MASK	GENMASK(4, 0)
+
+#define PCL_APP_MSI1			0x8044
+#define PCL_APP_MSI_REQ			BIT(0)
+
+#define PCL_APP_INTX			0x8074
+#define PCL_APP_INTX_SYS_INT		BIT(0)
+
+#define PCL_APP_PM0			0x8078
+#define PCL_SYS_AUX_PWR_DET		BIT(8)
+
+/* assertion time of INTx in usec */
+#define PCL_INTX_WIDTH_USEC		30
+
+struct uniphier_pcie_ep_priv {
+	void __iomem *base;
+	struct dw_pcie pci;
+	struct clk *clk, *clk_gio;
+	struct reset_control *rst, *rst_gio;
+	struct phy *phy;
+	const struct uniphier_pcie_ep_soc_data *data;
+};
+
+struct uniphier_pcie_ep_soc_data {
+	bool has_gio;
+	void (*init)(struct uniphier_pcie_ep_priv *priv);
+	int (*wait)(struct uniphier_pcie_ep_priv *priv);
+	const struct pci_epc_features features;
+};
+
+#define to_uniphier_pcie(x)	dev_get_drvdata((x)->dev)
+
+static void uniphier_pcie_ltssm_enable(struct uniphier_pcie_ep_priv *priv,
+				       bool enable)
+{
+	u32 val;
+
+	val = readl(priv->base + PCL_APP_READY_CTRL);
+	if (enable)
+		val |= PCL_APP_LTSSM_ENABLE;
+	else
+		val &= ~PCL_APP_LTSSM_ENABLE;
+	writel(val, priv->base + PCL_APP_READY_CTRL);
+}
+
+static void uniphier_pcie_phy_reset(struct uniphier_pcie_ep_priv *priv,
+				    bool assert)
+{
+	u32 val;
+
+	val = readl(priv->base + PCL_RSTCTRL2);
+	if (assert)
+		val |= PCL_RSTCTRL_PHY_RESET;
+	else
+		val &= ~PCL_RSTCTRL_PHY_RESET;
+	writel(val, priv->base + PCL_RSTCTRL2);
+}
+
+static void uniphier_pcie_pro5_init_ep(struct uniphier_pcie_ep_priv *priv)
+{
+	u32 val;
+
+	/* set EP mode */
+	val = readl(priv->base + PCL_MODE);
+	val |= PCL_MODE_REGEN | PCL_MODE_REGVAL;
+	writel(val, priv->base + PCL_MODE);
+
+	/* clock request */
+	val = readl(priv->base + PCL_APP_CLK_CTRL);
+	val &= ~PCL_APP_CLK_REQ;
+	writel(val, priv->base + PCL_APP_CLK_CTRL);
+
+	/* deassert PIPE3 and AXI reset */
+	val = readl(priv->base + PCL_RSTCTRL0);
+	val |= PCL_RSTCTRL_AXI_REG | PCL_RSTCTRL_AXI_SLAVE
+		| PCL_RSTCTRL_AXI_MASTER | PCL_RSTCTRL_PIPE3;
+	writel(val, priv->base + PCL_RSTCTRL0);
+
+	uniphier_pcie_ltssm_enable(priv, false);
+
+	msleep(100);
+}
+
+static void uniphier_pcie_nx1_init_ep(struct uniphier_pcie_ep_priv *priv)
+{
+	u32 val;
+
+	/* set EP mode */
+	val = readl(priv->base + PCL_MODE);
+	val |= PCL_MODE_REGEN | PCL_MODE_REGVAL;
+	writel(val, priv->base + PCL_MODE);
+
+	/* use auxiliary power detection */
+	val = readl(priv->base + PCL_APP_PM0);
+	val |= PCL_SYS_AUX_PWR_DET;
+	writel(val, priv->base + PCL_APP_PM0);
+
+	/* assert PERST# */
+	val = readl(priv->base + PCL_PINCTRL0);
+	val &= ~(PCL_PERST_NOE_REGVAL | PCL_PERST_OUT_REGVAL
+		 | PCL_PERST_PLDN_REGVAL);
+	val |= PCL_PERST_NOE_REGEN | PCL_PERST_OUT_REGEN
+		| PCL_PERST_PLDN_REGEN;
+	writel(val, priv->base + PCL_PINCTRL0);
+
+	uniphier_pcie_ltssm_enable(priv, false);
+
+	usleep_range(100000, 200000);
+
+	/* deassert PERST# */
+	val = readl(priv->base + PCL_PINCTRL0);
+	val |= PCL_PERST_OUT_REGVAL | PCL_PERST_OUT_REGEN;
+	writel(val, priv->base + PCL_PINCTRL0);
+}
+
+static int uniphier_pcie_nx1_wait_ep(struct uniphier_pcie_ep_priv *priv)
+{
+	u32 status;
+	int ret;
+
+	/* wait PIPE clock */
+	ret = readl_poll_timeout(priv->base + PCL_PIPEMON, status,
+				 status & PCL_PCLK_ALIVE, 100000, 1000000);
+	if (ret) {
+		dev_err(priv->pci.dev,
+			"Failed to initialize controller in EP mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int uniphier_pcie_start_link(struct dw_pcie *pci)
+{
+	struct uniphier_pcie_ep_priv *priv = to_uniphier_pcie(pci);
+
+	uniphier_pcie_ltssm_enable(priv, true);
+
+	return 0;
+}
+
+static void uniphier_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct uniphier_pcie_ep_priv *priv = to_uniphier_pcie(pci);
+
+	uniphier_pcie_ltssm_enable(priv, false);
+}
+
+static void uniphier_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar;
+
+	for (bar = BAR_0; bar <= BAR_5; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static int uniphier_pcie_ep_raise_legacy_irq(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct uniphier_pcie_ep_priv *priv = to_uniphier_pcie(pci);
+	u32 val;
+
+	/*
+	 * This makes pulse signal to send INTx to the RC, so this should
+	 * be cleared as soon as possible. This sequence is covered with
+	 * mutex in pci_epc_raise_irq().
+	 */
+	/* assert INTx */
+	val = readl(priv->base + PCL_APP_INTX);
+	val |= PCL_APP_INTX_SYS_INT;
+	writel(val, priv->base + PCL_APP_INTX);
+
+	udelay(PCL_INTX_WIDTH_USEC);
+
+	/* deassert INTx */
+	val &= ~PCL_APP_INTX_SYS_INT;
+	writel(val, priv->base + PCL_APP_INTX);
+
+	return 0;
+}
+
+static int uniphier_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep,
+					  u8 func_no, u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct uniphier_pcie_ep_priv *priv = to_uniphier_pcie(pci);
+	u32 val;
+
+	val = FIELD_PREP(PCL_APP_VEN_MSI_TC_MASK, func_no)
+		| FIELD_PREP(PCL_APP_VEN_MSI_VECTOR_MASK, interrupt_num - 1);
+	writel(val, priv->base + PCL_APP_MSI0);
+
+	val = readl(priv->base + PCL_APP_MSI1);
+	val |= PCL_APP_MSI_REQ;
+	writel(val, priv->base + PCL_APP_MSI1);
+
+	return 0;
+}
+
+static int uniphier_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				      enum pci_epc_irq_type type,
+				      u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return uniphier_pcie_ep_raise_legacy_irq(ep);
+	case PCI_EPC_IRQ_MSI:
+		return uniphier_pcie_ep_raise_msi_irq(ep, func_no,
+						      interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type (%d)\n", type);
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_features*
+uniphier_pcie_get_features(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct uniphier_pcie_ep_priv *priv = to_uniphier_pcie(pci);
+
+	return &priv->data->features;
+}
+
+static const struct dw_pcie_ep_ops uniphier_pcie_ep_ops = {
+	.ep_init = uniphier_pcie_ep_init,
+	.raise_irq = uniphier_pcie_ep_raise_irq,
+	.get_features = uniphier_pcie_get_features,
+};
+
+static int uniphier_pcie_ep_enable(struct uniphier_pcie_ep_priv *priv)
+{
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(priv->clk_gio);
+	if (ret)
+		goto out_clk_disable;
+
+	ret = reset_control_deassert(priv->rst);
+	if (ret)
+		goto out_clk_gio_disable;
+
+	ret = reset_control_deassert(priv->rst_gio);
+	if (ret)
+		goto out_rst_assert;
+
+	if (priv->data->init)
+		priv->data->init(priv);
+
+	uniphier_pcie_phy_reset(priv, true);
+
+	ret = phy_init(priv->phy);
+	if (ret)
+		goto out_rst_gio_assert;
+
+	uniphier_pcie_phy_reset(priv, false);
+
+	if (priv->data->wait) {
+		ret = priv->data->wait(priv);
+		if (ret)
+			goto out_phy_exit;
+	}
+
+	return 0;
+
+out_phy_exit:
+	phy_exit(priv->phy);
+out_rst_gio_assert:
+	reset_control_assert(priv->rst_gio);
+out_rst_assert:
+	reset_control_assert(priv->rst);
+out_clk_gio_disable:
+	clk_disable_unprepare(priv->clk_gio);
+out_clk_disable:
+	clk_disable_unprepare(priv->clk);
+
+	return ret;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.start_link = uniphier_pcie_start_link,
+	.stop_link = uniphier_pcie_stop_link,
+};
+
+static int uniphier_pcie_ep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_pcie_ep_priv *priv;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->data = of_device_get_match_data(dev);
+	if (WARN_ON(!priv->data))
+		return -EINVAL;
+
+	priv->pci.dev = dev;
+	priv->pci.ops = &dw_pcie_ops;
+
+	priv->base = devm_platform_ioremap_resource_byname(pdev, "link");
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	if (priv->data->has_gio) {
+		priv->clk_gio = devm_clk_get(dev, "gio");
+		if (IS_ERR(priv->clk_gio))
+			return PTR_ERR(priv->clk_gio);
+
+		priv->rst_gio = devm_reset_control_get_shared(dev, "gio");
+		if (IS_ERR(priv->rst_gio))
+			return PTR_ERR(priv->rst_gio);
+	}
+
+	priv->clk = devm_clk_get(dev, "link");
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	priv->rst = devm_reset_control_get_shared(dev, "link");
+	if (IS_ERR(priv->rst))
+		return PTR_ERR(priv->rst);
+
+	priv->phy = devm_phy_optional_get(dev, "pcie-phy");
+	if (IS_ERR(priv->phy)) {
+		ret = PTR_ERR(priv->phy);
+		dev_err(dev, "Failed to get phy (%d)\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = uniphier_pcie_ep_enable(priv);
+	if (ret)
+		return ret;
+
+	priv->pci.ep.ops = &uniphier_pcie_ep_ops;
+	return dw_pcie_ep_init(&priv->pci.ep);
+}
+
+static const struct uniphier_pcie_ep_soc_data uniphier_pro5_data = {
+	.has_gio = true,
+	.init = uniphier_pcie_pro5_init_ep,
+	.wait = NULL,
+	.features = {
+		.linkup_notifier = false,
+		.msi_capable = true,
+		.msix_capable = false,
+		.align = 1 << 16,
+		.bar_fixed_64bit = BIT(BAR_0) | BIT(BAR_2) | BIT(BAR_4),
+		.reserved_bar =  BIT(BAR_4),
+	},
+};
+
+static const struct uniphier_pcie_ep_soc_data uniphier_nx1_data = {
+	.has_gio = false,
+	.init = uniphier_pcie_nx1_init_ep,
+	.wait = uniphier_pcie_nx1_wait_ep,
+	.features = {
+		.linkup_notifier = false,
+		.msi_capable = true,
+		.msix_capable = false,
+		.align = 1 << 12,
+		.bar_fixed_64bit = BIT(BAR_0) | BIT(BAR_2) | BIT(BAR_4),
+	},
+};
+
+static const struct of_device_id uniphier_pcie_ep_match[] = {
+	{
+		.compatible = "socionext,uniphier-pro5-pcie-ep",
+		.data = &uniphier_pro5_data,
+	},
+	{
+		.compatible = "socionext,uniphier-nx1-pcie-ep",
+		.data = &uniphier_nx1_data,
+	},
+	{ /* sentinel */ },
+};
+
+static struct platform_driver uniphier_pcie_ep_driver = {
+	.probe  = uniphier_pcie_ep_probe,
+	.driver = {
+		.name = "uniphier-pcie-ep",
+		.of_match_table = uniphier_pcie_ep_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(uniphier_pcie_ep_driver);
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c
new file mode 100644
index 000000000..48c3eba81
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for UniPhier SoCs
+ * Copyright 2018 Socionext Inc.
+ * Author: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+#define PCL_PINCTRL0			0x002c
+#define PCL_PERST_PLDN_REGEN		BIT(12)
+#define PCL_PERST_NOE_REGEN		BIT(11)
+#define PCL_PERST_OUT_REGEN		BIT(8)
+#define PCL_PERST_PLDN_REGVAL		BIT(4)
+#define PCL_PERST_NOE_REGVAL		BIT(3)
+#define PCL_PERST_OUT_REGVAL		BIT(0)
+
+#define PCL_PIPEMON			0x0044
+#define PCL_PCLK_ALIVE			BIT(15)
+
+#define PCL_MODE			0x8000
+#define PCL_MODE_REGEN			BIT(8)
+#define PCL_MODE_REGVAL			BIT(0)
+
+#define PCL_APP_READY_CTRL		0x8008
+#define PCL_APP_LTSSM_ENABLE		BIT(0)
+
+#define PCL_APP_PM0			0x8078
+#define PCL_SYS_AUX_PWR_DET		BIT(8)
+
+#define PCL_RCV_INT			0x8108
+#define PCL_RCV_INT_ALL_ENABLE		GENMASK(20, 17)
+#define PCL_CFG_BW_MGT_STATUS		BIT(4)
+#define PCL_CFG_LINK_AUTO_BW_STATUS	BIT(3)
+#define PCL_CFG_AER_RC_ERR_MSI_STATUS	BIT(2)
+#define PCL_CFG_PME_MSI_STATUS		BIT(1)
+
+#define PCL_RCV_INTX			0x810c
+#define PCL_RCV_INTX_ALL_ENABLE		GENMASK(19, 16)
+#define PCL_RCV_INTX_ALL_MASK		GENMASK(11, 8)
+#define PCL_RCV_INTX_MASK_SHIFT		8
+#define PCL_RCV_INTX_ALL_STATUS		GENMASK(3, 0)
+#define PCL_RCV_INTX_STATUS_SHIFT	0
+
+#define PCL_STATUS_LINK			0x8140
+#define PCL_RDLH_LINK_UP		BIT(1)
+#define PCL_XMLH_LINK_UP		BIT(0)
+
+struct uniphier_pcie {
+	struct dw_pcie pci;
+	void __iomem *base;
+	struct clk *clk;
+	struct reset_control *rst;
+	struct phy *phy;
+	struct irq_domain *legacy_irq_domain;
+};
+
+#define to_uniphier_pcie(x)	dev_get_drvdata((x)->dev)
+
+static void uniphier_pcie_ltssm_enable(struct uniphier_pcie *pcie,
+				       bool enable)
+{
+	u32 val;
+
+	val = readl(pcie->base + PCL_APP_READY_CTRL);
+	if (enable)
+		val |= PCL_APP_LTSSM_ENABLE;
+	else
+		val &= ~PCL_APP_LTSSM_ENABLE;
+	writel(val, pcie->base + PCL_APP_READY_CTRL);
+}
+
+static void uniphier_pcie_init_rc(struct uniphier_pcie *pcie)
+{
+	u32 val;
+
+	/* set RC MODE */
+	val = readl(pcie->base + PCL_MODE);
+	val |= PCL_MODE_REGEN;
+	val &= ~PCL_MODE_REGVAL;
+	writel(val, pcie->base + PCL_MODE);
+
+	/* use auxiliary power detection */
+	val = readl(pcie->base + PCL_APP_PM0);
+	val |= PCL_SYS_AUX_PWR_DET;
+	writel(val, pcie->base + PCL_APP_PM0);
+
+	/* assert PERST# */
+	val = readl(pcie->base + PCL_PINCTRL0);
+	val &= ~(PCL_PERST_NOE_REGVAL | PCL_PERST_OUT_REGVAL
+		 | PCL_PERST_PLDN_REGVAL);
+	val |= PCL_PERST_NOE_REGEN | PCL_PERST_OUT_REGEN
+		| PCL_PERST_PLDN_REGEN;
+	writel(val, pcie->base + PCL_PINCTRL0);
+
+	uniphier_pcie_ltssm_enable(pcie, false);
+
+	usleep_range(100000, 200000);
+
+	/* deassert PERST# */
+	val = readl(pcie->base + PCL_PINCTRL0);
+	val |= PCL_PERST_OUT_REGVAL | PCL_PERST_OUT_REGEN;
+	writel(val, pcie->base + PCL_PINCTRL0);
+}
+
+static int uniphier_pcie_wait_rc(struct uniphier_pcie *pcie)
+{
+	u32 status;
+	int ret;
+
+	/* wait PIPE clock */
+	ret = readl_poll_timeout(pcie->base + PCL_PIPEMON, status,
+				 status & PCL_PCLK_ALIVE, 100000, 1000000);
+	if (ret) {
+		dev_err(pcie->pci.dev,
+			"Failed to initialize controller in RC mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int uniphier_pcie_link_up(struct dw_pcie *pci)
+{
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	u32 val, mask;
+
+	val = readl(pcie->base + PCL_STATUS_LINK);
+	mask = PCL_RDLH_LINK_UP | PCL_XMLH_LINK_UP;
+
+	return (val & mask) == mask;
+}
+
+static int uniphier_pcie_start_link(struct dw_pcie *pci)
+{
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+
+	uniphier_pcie_ltssm_enable(pcie, true);
+
+	return 0;
+}
+
+static void uniphier_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+
+	uniphier_pcie_ltssm_enable(pcie, false);
+}
+
+static void uniphier_pcie_irq_enable(struct uniphier_pcie *pcie)
+{
+	writel(PCL_RCV_INT_ALL_ENABLE, pcie->base + PCL_RCV_INT);
+	writel(PCL_RCV_INTX_ALL_ENABLE, pcie->base + PCL_RCV_INTX);
+}
+
+
+static void uniphier_pcie_irq_mask(struct irq_data *d)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	val = readl(pcie->base + PCL_RCV_INTX);
+	val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
+	writel(val, pcie->base + PCL_RCV_INTX);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static void uniphier_pcie_irq_unmask(struct irq_data *d)
+{
+	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&pp->lock, flags);
+
+	val = readl(pcie->base + PCL_RCV_INTX);
+	val &= ~BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
+	writel(val, pcie->base + PCL_RCV_INTX);
+
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+}
+
+static struct irq_chip uniphier_pcie_irq_chip = {
+	.name = "PCI",
+	.irq_mask = uniphier_pcie_irq_mask,
+	.irq_unmask = uniphier_pcie_irq_unmask,
+};
+
+static int uniphier_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				  irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &uniphier_pcie_irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops uniphier_intx_domain_ops = {
+	.map = uniphier_pcie_intx_map,
+};
+
+static void uniphier_pcie_irq_handler(struct irq_desc *desc)
+{
+	struct dw_pcie_rp *pp = irq_desc_get_handler_data(desc);
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long reg;
+	u32 val, bit;
+
+	/* INT for debug */
+	val = readl(pcie->base + PCL_RCV_INT);
+
+	if (val & PCL_CFG_BW_MGT_STATUS)
+		dev_dbg(pci->dev, "Link Bandwidth Management Event\n");
+	if (val & PCL_CFG_LINK_AUTO_BW_STATUS)
+		dev_dbg(pci->dev, "Link Autonomous Bandwidth Event\n");
+	if (val & PCL_CFG_AER_RC_ERR_MSI_STATUS)
+		dev_dbg(pci->dev, "Root Error\n");
+	if (val & PCL_CFG_PME_MSI_STATUS)
+		dev_dbg(pci->dev, "PME Interrupt\n");
+
+	writel(val, pcie->base + PCL_RCV_INT);
+
+	/* INTx */
+	chained_irq_enter(chip, desc);
+
+	val = readl(pcie->base + PCL_RCV_INTX);
+	reg = FIELD_GET(PCL_RCV_INTX_ALL_STATUS, val);
+
+	for_each_set_bit(bit, &reg, PCI_NUM_INTX)
+		generic_handle_domain_irq(pcie->legacy_irq_domain, bit);
+
+	chained_irq_exit(chip, desc);
+}
+
+static int uniphier_pcie_config_legacy_irq(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	struct device_node *np = pci->dev->of_node;
+	struct device_node *np_intc;
+	int ret = 0;
+
+	np_intc = of_get_child_by_name(np, "legacy-interrupt-controller");
+	if (!np_intc) {
+		dev_err(pci->dev, "Failed to get legacy-interrupt-controller node\n");
+		return -EINVAL;
+	}
+
+	pp->irq = irq_of_parse_and_map(np_intc, 0);
+	if (!pp->irq) {
+		dev_err(pci->dev, "Failed to get an IRQ entry in legacy-interrupt-controller\n");
+		ret = -EINVAL;
+		goto out_put_node;
+	}
+
+	pcie->legacy_irq_domain = irq_domain_add_linear(np_intc, PCI_NUM_INTX,
+						&uniphier_intx_domain_ops, pp);
+	if (!pcie->legacy_irq_domain) {
+		dev_err(pci->dev, "Failed to get INTx domain\n");
+		ret = -ENODEV;
+		goto out_put_node;
+	}
+
+	irq_set_chained_handler_and_data(pp->irq, uniphier_pcie_irq_handler,
+					 pp);
+
+out_put_node:
+	of_node_put(np_intc);
+	return ret;
+}
+
+static int uniphier_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct uniphier_pcie *pcie = to_uniphier_pcie(pci);
+	int ret;
+
+	ret = uniphier_pcie_config_legacy_irq(pp);
+	if (ret)
+		return ret;
+
+	uniphier_pcie_irq_enable(pcie);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops uniphier_pcie_host_ops = {
+	.host_init = uniphier_pcie_host_init,
+};
+
+static int uniphier_pcie_host_enable(struct uniphier_pcie *pcie)
+{
+	int ret;
+
+	ret = clk_prepare_enable(pcie->clk);
+	if (ret)
+		return ret;
+
+	ret = reset_control_deassert(pcie->rst);
+	if (ret)
+		goto out_clk_disable;
+
+	uniphier_pcie_init_rc(pcie);
+
+	ret = phy_init(pcie->phy);
+	if (ret)
+		goto out_rst_assert;
+
+	ret = uniphier_pcie_wait_rc(pcie);
+	if (ret)
+		goto out_phy_exit;
+
+	return 0;
+
+out_phy_exit:
+	phy_exit(pcie->phy);
+out_rst_assert:
+	reset_control_assert(pcie->rst);
+out_clk_disable:
+	clk_disable_unprepare(pcie->clk);
+
+	return ret;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.start_link = uniphier_pcie_start_link,
+	.stop_link = uniphier_pcie_stop_link,
+	.link_up = uniphier_pcie_link_up,
+};
+
+static int uniphier_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_pcie *pcie;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->pci.dev = dev;
+	pcie->pci.ops = &dw_pcie_ops;
+
+	pcie->base = devm_platform_ioremap_resource_byname(pdev, "link");
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	pcie->rst = devm_reset_control_get_shared(dev, NULL);
+	if (IS_ERR(pcie->rst))
+		return PTR_ERR(pcie->rst);
+
+	pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+	if (IS_ERR(pcie->phy))
+		return PTR_ERR(pcie->phy);
+
+	platform_set_drvdata(pdev, pcie);
+
+	ret = uniphier_pcie_host_enable(pcie);
+	if (ret)
+		return ret;
+
+	pcie->pci.pp.ops = &uniphier_pcie_host_ops;
+
+	return dw_pcie_host_init(&pcie->pci.pp);
+}
+
+static const struct of_device_id uniphier_pcie_match[] = {
+	{ .compatible = "socionext,uniphier-pcie", },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver uniphier_pcie_driver = {
+	.probe  = uniphier_pcie_probe,
+	.driver = {
+		.name = "uniphier-pcie",
+		.of_match_table = uniphier_pcie_match,
+	},
+};
+builtin_platform_driver(uniphier_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-visconti.c b/drivers/pci/controller/dwc/pcie-visconti.c
new file mode 100644
index 000000000..71026fefa
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-visconti.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DWC PCIe RC driver for Toshiba Visconti ARM SoC
+ *
+ * Copyright (C) 2021 Toshiba Electronic Device & Storage Corporation
+ * Copyright (C) 2021 TOSHIBA CORPORATION
+ *
+ * Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+#include "../../pci.h"
+
+struct visconti_pcie {
+	struct dw_pcie pci;
+	void __iomem *ulreg_base;
+	void __iomem *smu_base;
+	void __iomem *mpu_base;
+	struct clk *refclk;
+	struct clk *coreclk;
+	struct clk *auxclk;
+};
+
+#define PCIE_UL_REG_S_PCIE_MODE		0x00F4
+#define  PCIE_UL_REG_S_PCIE_MODE_EP	0x00
+#define  PCIE_UL_REG_S_PCIE_MODE_RC	0x04
+
+#define PCIE_UL_REG_S_PERSTN_CTRL	0x00F8
+#define  PCIE_UL_IOM_PCIE_PERSTN_I_EN	BIT(3)
+#define  PCIE_UL_DIRECT_PERSTN_EN	BIT(2)
+#define  PCIE_UL_PERSTN_OUT		BIT(1)
+#define  PCIE_UL_DIRECT_PERSTN		BIT(0)
+#define  PCIE_UL_REG_S_PERSTN_CTRL_INIT	(PCIE_UL_IOM_PCIE_PERSTN_I_EN | \
+					 PCIE_UL_DIRECT_PERSTN_EN | \
+					 PCIE_UL_DIRECT_PERSTN)
+
+#define PCIE_UL_REG_S_PHY_INIT_02	0x0104
+#define  PCIE_UL_PHY0_SRAM_EXT_LD_DONE	BIT(0)
+
+#define PCIE_UL_REG_S_PHY_INIT_03	0x0108
+#define  PCIE_UL_PHY0_SRAM_INIT_DONE	BIT(0)
+
+#define PCIE_UL_REG_S_INT_EVENT_MASK1	0x0138
+#define  PCIE_UL_CFG_PME_INT		BIT(0)
+#define  PCIE_UL_CFG_LINK_EQ_REQ_INT	BIT(1)
+#define  PCIE_UL_EDMA_INT0		BIT(2)
+#define  PCIE_UL_EDMA_INT1		BIT(3)
+#define  PCIE_UL_EDMA_INT2		BIT(4)
+#define  PCIE_UL_EDMA_INT3		BIT(5)
+#define  PCIE_UL_S_INT_EVENT_MASK1_ALL  (PCIE_UL_CFG_PME_INT | \
+					 PCIE_UL_CFG_LINK_EQ_REQ_INT | \
+					 PCIE_UL_EDMA_INT0 | \
+					 PCIE_UL_EDMA_INT1 | \
+					 PCIE_UL_EDMA_INT2 | \
+					 PCIE_UL_EDMA_INT3)
+
+#define PCIE_UL_REG_S_SB_MON		0x0198
+#define PCIE_UL_REG_S_SIG_MON		0x019C
+#define  PCIE_UL_CORE_RST_N_MON		BIT(0)
+
+#define PCIE_UL_REG_V_SII_DBG_00	0x0844
+#define PCIE_UL_REG_V_SII_GEN_CTRL_01	0x0860
+#define  PCIE_UL_APP_LTSSM_ENABLE	BIT(0)
+
+#define PCIE_UL_REG_V_PHY_ST_00		0x0864
+#define  PCIE_UL_SMLH_LINK_UP		BIT(0)
+
+#define PCIE_UL_REG_V_PHY_ST_02		0x0868
+#define  PCIE_UL_S_DETECT_ACT		0x01
+#define  PCIE_UL_S_L0			0x11
+
+#define PISMU_CKON_PCIE			0x0038
+#define  PISMU_CKON_PCIE_AUX_CLK	BIT(1)
+#define  PISMU_CKON_PCIE_MSTR_ACLK	BIT(0)
+
+#define PISMU_RSOFF_PCIE		0x0538
+#define  PISMU_RSOFF_PCIE_ULREG_RST_N	BIT(1)
+#define  PISMU_RSOFF_PCIE_PWR_UP_RST_N	BIT(0)
+
+#define PCIE_MPU_REG_MP_EN		0x0
+#define  MPU_MP_EN_DISABLE		BIT(0)
+
+/* Access registers in PCIe ulreg */
+static void visconti_ulreg_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->ulreg_base + reg);
+}
+
+static u32 visconti_ulreg_readl(struct visconti_pcie *pcie, u32 reg)
+{
+	return readl_relaxed(pcie->ulreg_base + reg);
+}
+
+/* Access registers in PCIe smu */
+static void visconti_smu_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->smu_base + reg);
+}
+
+/* Access registers in PCIe mpu */
+static void visconti_mpu_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->mpu_base + reg);
+}
+
+static u32 visconti_mpu_readl(struct visconti_pcie *pcie, u32 reg)
+{
+	return readl_relaxed(pcie->mpu_base + reg);
+}
+
+static int visconti_pcie_link_up(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr = pcie->ulreg_base;
+	u32 val = readl_relaxed(addr + PCIE_UL_REG_V_PHY_ST_02);
+
+	return !!(val & PCIE_UL_S_L0);
+}
+
+static int visconti_pcie_start_link(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr = pcie->ulreg_base;
+	u32 val;
+	int ret;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_APP_LTSSM_ENABLE,
+			      PCIE_UL_REG_V_SII_GEN_CTRL_01);
+
+	ret = readl_relaxed_poll_timeout(addr + PCIE_UL_REG_V_PHY_ST_02,
+					 val, (val & PCIE_UL_S_L0),
+					 90000, 100000);
+	if (ret)
+		return ret;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_S_INT_EVENT_MASK1_ALL,
+			      PCIE_UL_REG_S_INT_EVENT_MASK1);
+
+	if (dw_pcie_link_up(pci)) {
+		val = visconti_mpu_readl(pcie, PCIE_MPU_REG_MP_EN);
+		visconti_mpu_writel(pcie, val & ~MPU_MP_EN_DISABLE,
+				    PCIE_MPU_REG_MP_EN);
+	}
+
+	return 0;
+}
+
+static void visconti_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+
+	val = visconti_ulreg_readl(pcie, PCIE_UL_REG_V_SII_GEN_CTRL_01);
+	val &= ~PCIE_UL_APP_LTSSM_ENABLE;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_V_SII_GEN_CTRL_01);
+
+	val = visconti_mpu_readl(pcie, PCIE_MPU_REG_MP_EN);
+	visconti_mpu_writel(pcie, val | MPU_MP_EN_DISABLE, PCIE_MPU_REG_MP_EN);
+}
+
+/*
+ * In this SoC specification, the CPU bus outputs the offset value from
+ * 0x40000000 to the PCIe bus, so 0x40000000 is subtracted from the CPU
+ * bus address. This 0x40000000 is also based on io_base from DT.
+ */
+static u64 visconti_pcie_cpu_addr_fixup(struct dw_pcie *pci, u64 cpu_addr)
+{
+	struct dw_pcie_rp *pp = &pci->pp;
+
+	return cpu_addr & ~pp->io_base;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.cpu_addr_fixup = visconti_pcie_cpu_addr_fixup,
+	.link_up = visconti_pcie_link_up,
+	.start_link = visconti_pcie_start_link,
+	.stop_link = visconti_pcie_stop_link,
+};
+
+static int visconti_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr;
+	int err;
+	u32 val;
+
+	visconti_smu_writel(pcie,
+			    PISMU_CKON_PCIE_AUX_CLK | PISMU_CKON_PCIE_MSTR_ACLK,
+			    PISMU_CKON_PCIE);
+	ndelay(250);
+
+	visconti_smu_writel(pcie, PISMU_RSOFF_PCIE_ULREG_RST_N,
+			    PISMU_RSOFF_PCIE);
+	visconti_ulreg_writel(pcie, PCIE_UL_REG_S_PCIE_MODE_RC,
+			      PCIE_UL_REG_S_PCIE_MODE);
+
+	val = PCIE_UL_REG_S_PERSTN_CTRL_INIT;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_S_PERSTN_CTRL);
+	udelay(100);
+
+	val |= PCIE_UL_PERSTN_OUT;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_S_PERSTN_CTRL);
+	udelay(100);
+
+	visconti_smu_writel(pcie, PISMU_RSOFF_PCIE_PWR_UP_RST_N,
+			    PISMU_RSOFF_PCIE);
+
+	addr = pcie->ulreg_base + PCIE_UL_REG_S_PHY_INIT_03;
+	err = readl_relaxed_poll_timeout(addr, val,
+					 (val & PCIE_UL_PHY0_SRAM_INIT_DONE),
+					 100, 1000);
+	if (err)
+		return err;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_PHY0_SRAM_EXT_LD_DONE,
+			      PCIE_UL_REG_S_PHY_INIT_02);
+
+	addr = pcie->ulreg_base + PCIE_UL_REG_S_SIG_MON;
+	return readl_relaxed_poll_timeout(addr, val,
+					  (val & PCIE_UL_CORE_RST_N_MON), 100,
+					  1000);
+}
+
+static const struct dw_pcie_host_ops visconti_pcie_host_ops = {
+	.host_init = visconti_pcie_host_init,
+};
+
+static int visconti_get_resources(struct platform_device *pdev,
+				  struct visconti_pcie *pcie)
+{
+	struct device *dev = &pdev->dev;
+
+	pcie->ulreg_base = devm_platform_ioremap_resource_byname(pdev, "ulreg");
+	if (IS_ERR(pcie->ulreg_base))
+		return PTR_ERR(pcie->ulreg_base);
+
+	pcie->smu_base = devm_platform_ioremap_resource_byname(pdev, "smu");
+	if (IS_ERR(pcie->smu_base))
+		return PTR_ERR(pcie->smu_base);
+
+	pcie->mpu_base = devm_platform_ioremap_resource_byname(pdev, "mpu");
+	if (IS_ERR(pcie->mpu_base))
+		return PTR_ERR(pcie->mpu_base);
+
+	pcie->refclk = devm_clk_get(dev, "ref");
+	if (IS_ERR(pcie->refclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->refclk),
+				     "Failed to get ref clock\n");
+
+	pcie->coreclk = devm_clk_get(dev, "core");
+	if (IS_ERR(pcie->coreclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->coreclk),
+				     "Failed to get core clock\n");
+
+	pcie->auxclk = devm_clk_get(dev, "aux");
+	if (IS_ERR(pcie->auxclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->auxclk),
+				     "Failed to get aux clock\n");
+
+	return 0;
+}
+
+static int visconti_add_pcie_port(struct visconti_pcie *pcie,
+				  struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct dw_pcie_rp *pp = &pci->pp;
+
+	pp->irq = platform_get_irq_byname(pdev, "intr");
+	if (pp->irq < 0)
+		return pp->irq;
+
+	pp->ops = &visconti_pcie_host_ops;
+
+	return dw_pcie_host_init(pp);
+}
+
+static int visconti_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct visconti_pcie *pcie;
+	struct dw_pcie *pci;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	ret = visconti_get_resources(pdev, pcie);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, pcie);
+
+	return visconti_add_pcie_port(pcie, pdev);
+}
+
+static const struct of_device_id visconti_pcie_match[] = {
+	{ .compatible = "toshiba,visconti-pcie" },
+	{},
+};
+
+static struct platform_driver visconti_pcie_driver = {
+	.probe = visconti_pcie_probe,
+	.driver = {
+		.name = "visconti-pcie",
+		.of_match_table = visconti_pcie_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(visconti_pcie_driver);
diff --git a/drivers/pci/controller/mobiveil/Kconfig b/drivers/pci/controller/mobiveil/Kconfig
new file mode 100644
index 000000000..58ce034f7
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Mobiveil-based PCIe controllers"
+	depends on PCI
+
+config PCIE_MOBIVEIL
+	bool
+
+config PCIE_MOBIVEIL_HOST
+	bool
+	depends on PCI_MSI
+	select PCIE_MOBIVEIL
+
+config PCIE_LAYERSCAPE_GEN4
+	bool "Freescale Layerscape Gen4 PCIe controller"
+	depends on ARCH_LAYERSCAPE || COMPILE_TEST
+	depends on PCI_MSI
+	select PCIE_MOBIVEIL_HOST
+	help
+	  Say Y here if you want PCIe Gen4 controller support on
+	  Layerscape SoCs.
+
+config PCIE_MOBIVEIL_PLAT
+	bool "Mobiveil AXI PCIe controller"
+	depends on ARCH_ZYNQMP || COMPILE_TEST
+	depends on OF
+	depends on PCI_MSI
+	select PCIE_MOBIVEIL_HOST
+	help
+	  Say Y here if you want to enable support for the Mobiveil AXI PCIe
+	  Soft IP. It has up to 8 outbound and inbound windows
+	  for address translation and it is a PCIe Gen4 IP.
+
+endmenu
diff --git a/drivers/pci/controller/mobiveil/Makefile b/drivers/pci/controller/mobiveil/Makefile
new file mode 100644
index 000000000..99d879de3
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PCIE_MOBIVEIL) += pcie-mobiveil.o
+obj-$(CONFIG_PCIE_MOBIVEIL_HOST) += pcie-mobiveil-host.o
+obj-$(CONFIG_PCIE_MOBIVEIL_PLAT) += pcie-mobiveil-plat.o
+obj-$(CONFIG_PCIE_LAYERSCAPE_GEN4) += pcie-layerscape-gen4.o
diff --git a/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
new file mode 100644
index 000000000..d7b7350f0
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe Gen4 host controller driver for NXP Layerscape SoCs
+ *
+ * Copyright 2019-2020 NXP
+ *
+ * Author: Zhiqiang Hou <Zhiqiang.Hou@nxp.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#include "pcie-mobiveil.h"
+
+/* LUT and PF control registers */
+#define PCIE_LUT_OFF			0x80000
+#define PCIE_PF_OFF			0xc0000
+#define PCIE_PF_INT_STAT		0x18
+#define PF_INT_STAT_PABRST		BIT(31)
+
+#define PCIE_PF_DBG			0x7fc
+#define PF_DBG_LTSSM_MASK		0x3f
+#define PF_DBG_LTSSM_L0			0x2d /* L0 state */
+#define PF_DBG_WE			BIT(31)
+#define PF_DBG_PABR			BIT(27)
+
+#define to_ls_g4_pcie(x)		platform_get_drvdata((x)->pdev)
+
+struct ls_g4_pcie {
+	struct mobiveil_pcie pci;
+	struct delayed_work dwork;
+	int irq;
+};
+
+static inline u32 ls_g4_pcie_pf_readl(struct ls_g4_pcie *pcie, u32 off)
+{
+	return ioread32(pcie->pci.csr_axi_slave_base + PCIE_PF_OFF + off);
+}
+
+static inline void ls_g4_pcie_pf_writel(struct ls_g4_pcie *pcie,
+					u32 off, u32 val)
+{
+	iowrite32(val, pcie->pci.csr_axi_slave_base + PCIE_PF_OFF + off);
+}
+
+static int ls_g4_pcie_link_up(struct mobiveil_pcie *pci)
+{
+	struct ls_g4_pcie *pcie = to_ls_g4_pcie(pci);
+	u32 state;
+
+	state = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
+	state =	state & PF_DBG_LTSSM_MASK;
+
+	if (state == PF_DBG_LTSSM_L0)
+		return 1;
+
+	return 0;
+}
+
+static void ls_g4_pcie_disable_interrupt(struct ls_g4_pcie *pcie)
+{
+	struct mobiveil_pcie *mv_pci = &pcie->pci;
+
+	mobiveil_csr_writel(mv_pci, 0, PAB_INTP_AMBA_MISC_ENB);
+}
+
+static void ls_g4_pcie_enable_interrupt(struct ls_g4_pcie *pcie)
+{
+	struct mobiveil_pcie *mv_pci = &pcie->pci;
+	u32 val;
+
+	/* Clear the interrupt status */
+	mobiveil_csr_writel(mv_pci, 0xffffffff, PAB_INTP_AMBA_MISC_STAT);
+
+	val = PAB_INTP_INTX_MASK | PAB_INTP_MSI | PAB_INTP_RESET |
+	      PAB_INTP_PCIE_UE | PAB_INTP_IE_PMREDI | PAB_INTP_IE_EC;
+	mobiveil_csr_writel(mv_pci, val, PAB_INTP_AMBA_MISC_ENB);
+}
+
+static int ls_g4_pcie_reinit_hw(struct ls_g4_pcie *pcie)
+{
+	struct mobiveil_pcie *mv_pci = &pcie->pci;
+	struct device *dev = &mv_pci->pdev->dev;
+	u32 val, act_stat;
+	int to = 100;
+
+	/* Poll for pab_csb_reset to set and PAB activity to clear */
+	do {
+		usleep_range(10, 15);
+		val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_INT_STAT);
+		act_stat = mobiveil_csr_readl(mv_pci, PAB_ACTIVITY_STAT);
+	} while (((val & PF_INT_STAT_PABRST) == 0 || act_stat) && to--);
+	if (to < 0) {
+		dev_err(dev, "Poll PABRST&PABACT timeout\n");
+		return -EIO;
+	}
+
+	/* clear PEX_RESET bit in PEX_PF0_DBG register */
+	val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
+	val |= PF_DBG_WE;
+	ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
+
+	val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
+	val |= PF_DBG_PABR;
+	ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
+
+	val = ls_g4_pcie_pf_readl(pcie, PCIE_PF_DBG);
+	val &= ~PF_DBG_WE;
+	ls_g4_pcie_pf_writel(pcie, PCIE_PF_DBG, val);
+
+	mobiveil_host_init(mv_pci, true);
+
+	to = 100;
+	while (!ls_g4_pcie_link_up(mv_pci) && to--)
+		usleep_range(200, 250);
+	if (to < 0) {
+		dev_err(dev, "PCIe link training timeout\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static irqreturn_t ls_g4_pcie_isr(int irq, void *dev_id)
+{
+	struct ls_g4_pcie *pcie = (struct ls_g4_pcie *)dev_id;
+	struct mobiveil_pcie *mv_pci = &pcie->pci;
+	u32 val;
+
+	val = mobiveil_csr_readl(mv_pci, PAB_INTP_AMBA_MISC_STAT);
+	if (!val)
+		return IRQ_NONE;
+
+	if (val & PAB_INTP_RESET) {
+		ls_g4_pcie_disable_interrupt(pcie);
+		schedule_delayed_work(&pcie->dwork, msecs_to_jiffies(1));
+	}
+
+	mobiveil_csr_writel(mv_pci, val, PAB_INTP_AMBA_MISC_STAT);
+
+	return IRQ_HANDLED;
+}
+
+static int ls_g4_pcie_interrupt_init(struct mobiveil_pcie *mv_pci)
+{
+	struct ls_g4_pcie *pcie = to_ls_g4_pcie(mv_pci);
+	struct platform_device *pdev = mv_pci->pdev;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pcie->irq = platform_get_irq_byname(pdev, "intr");
+	if (pcie->irq < 0)
+		return pcie->irq;
+
+	ret = devm_request_irq(dev, pcie->irq, ls_g4_pcie_isr,
+			       IRQF_SHARED, pdev->name, pcie);
+	if (ret) {
+		dev_err(dev, "Can't register PCIe IRQ, errno = %d\n", ret);
+		return  ret;
+	}
+
+	return 0;
+}
+
+static void ls_g4_pcie_reset(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+						  work);
+	struct ls_g4_pcie *pcie = container_of(dwork, struct ls_g4_pcie, dwork);
+	struct mobiveil_pcie *mv_pci = &pcie->pci;
+	u16 ctrl;
+
+	ctrl = mobiveil_csr_readw(mv_pci, PCI_BRIDGE_CONTROL);
+	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	mobiveil_csr_writew(mv_pci, ctrl, PCI_BRIDGE_CONTROL);
+
+	if (!ls_g4_pcie_reinit_hw(pcie))
+		return;
+
+	ls_g4_pcie_enable_interrupt(pcie);
+}
+
+static struct mobiveil_rp_ops ls_g4_pcie_rp_ops = {
+	.interrupt_init = ls_g4_pcie_interrupt_init,
+};
+
+static const struct mobiveil_pab_ops ls_g4_pcie_pab_ops = {
+	.link_up = ls_g4_pcie_link_up,
+};
+
+static int __init ls_g4_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *bridge;
+	struct mobiveil_pcie *mv_pci;
+	struct ls_g4_pcie *pcie;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	if (!of_parse_phandle(np, "msi-parent", 0)) {
+		dev_err(dev, "Failed to find msi-parent\n");
+		return -EINVAL;
+	}
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	mv_pci = &pcie->pci;
+
+	mv_pci->pdev = pdev;
+	mv_pci->ops = &ls_g4_pcie_pab_ops;
+	mv_pci->rp.ops = &ls_g4_pcie_rp_ops;
+	mv_pci->rp.bridge = bridge;
+
+	platform_set_drvdata(pdev, pcie);
+
+	INIT_DELAYED_WORK(&pcie->dwork, ls_g4_pcie_reset);
+
+	ret = mobiveil_pcie_host_probe(mv_pci);
+	if (ret) {
+		dev_err(dev, "Fail to probe\n");
+		return  ret;
+	}
+
+	ls_g4_pcie_enable_interrupt(pcie);
+
+	return 0;
+}
+
+static const struct of_device_id ls_g4_pcie_of_match[] = {
+	{ .compatible = "fsl,lx2160a-pcie", },
+	{ },
+};
+
+static struct platform_driver ls_g4_pcie_driver = {
+	.driver = {
+		.name = "layerscape-pcie-gen4",
+		.of_match_table = ls_g4_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+builtin_platform_driver_probe(ls_g4_pcie_driver, ls_g4_pcie_probe);
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
new file mode 100644
index 000000000..45b97a4b1
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Mobiveil PCIe Host controller
+ *
+ * Copyright (c) 2018 Mobiveil Inc.
+ * Copyright 2019-2020 NXP
+ *
+ * Author: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
+ *	   Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "pcie-mobiveil.h"
+
+static bool mobiveil_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
+{
+	/* Only one device down on each root port */
+	if (pci_is_root_bus(bus) && (devfn > 0))
+		return false;
+
+	/*
+	 * Do not read more than one device on the bus directly
+	 * attached to RC
+	 */
+	if ((bus->primary == to_pci_host_bridge(bus->bridge)->busnr) && (PCI_SLOT(devfn) > 0))
+		return false;
+
+	return true;
+}
+
+/*
+ * mobiveil_pcie_map_bus - routine to get the configuration base of either
+ * root port or endpoint
+ */
+static void __iomem *mobiveil_pcie_map_bus(struct pci_bus *bus,
+					   unsigned int devfn, int where)
+{
+	struct mobiveil_pcie *pcie = bus->sysdata;
+	struct mobiveil_root_port *rp = &pcie->rp;
+	u32 value;
+
+	if (!mobiveil_pcie_valid_device(bus, devfn))
+		return NULL;
+
+	/* RC config access */
+	if (pci_is_root_bus(bus))
+		return pcie->csr_axi_slave_base + where;
+
+	/*
+	 * EP config access (in Config/APIO space)
+	 * Program PEX Address base (31..16 bits) with appropriate value
+	 * (BDF) in PAB_AXI_AMAP_PEX_WIN_L0 Register.
+	 * Relies on pci_lock serialization
+	 */
+	value = bus->number << PAB_BUS_SHIFT |
+		PCI_SLOT(devfn) << PAB_DEVICE_SHIFT |
+		PCI_FUNC(devfn) << PAB_FUNCTION_SHIFT;
+
+	mobiveil_csr_writel(pcie, value, PAB_AXI_AMAP_PEX_WIN_L(WIN_NUM_0));
+
+	return rp->config_axi_slave_base + where;
+}
+
+static struct pci_ops mobiveil_pcie_ops = {
+	.map_bus = mobiveil_pcie_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void mobiveil_pcie_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct mobiveil_pcie *pcie = irq_desc_get_handler_data(desc);
+	struct device *dev = &pcie->pdev->dev;
+	struct mobiveil_root_port *rp = &pcie->rp;
+	struct mobiveil_msi *msi = &rp->msi;
+	u32 msi_data, msi_addr_lo, msi_addr_hi;
+	u32 intr_status, msi_status;
+	unsigned long shifted_status;
+	u32 bit, val, mask;
+
+	/*
+	 * The core provides a single interrupt for both INTx/MSI messages.
+	 * So we'll read both INTx and MSI status
+	 */
+
+	chained_irq_enter(chip, desc);
+
+	/* read INTx status */
+	val = mobiveil_csr_readl(pcie, PAB_INTP_AMBA_MISC_STAT);
+	mask = mobiveil_csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB);
+	intr_status = val & mask;
+
+	/* Handle INTx */
+	if (intr_status & PAB_INTP_INTX_MASK) {
+		shifted_status = mobiveil_csr_readl(pcie,
+						    PAB_INTP_AMBA_MISC_STAT);
+		shifted_status &= PAB_INTP_INTX_MASK;
+		shifted_status >>= PAB_INTX_START;
+		do {
+			for_each_set_bit(bit, &shifted_status, PCI_NUM_INTX) {
+				int ret;
+				ret = generic_handle_domain_irq(rp->intx_domain,
+								bit + 1);
+				if (ret)
+					dev_err_ratelimited(dev, "unexpected IRQ, INT%d\n",
+							    bit);
+
+				/* clear interrupt handled */
+				mobiveil_csr_writel(pcie,
+						    1 << (PAB_INTX_START + bit),
+						    PAB_INTP_AMBA_MISC_STAT);
+			}
+
+			shifted_status = mobiveil_csr_readl(pcie,
+							    PAB_INTP_AMBA_MISC_STAT);
+			shifted_status &= PAB_INTP_INTX_MASK;
+			shifted_status >>= PAB_INTX_START;
+		} while (shifted_status != 0);
+	}
+
+	/* read extra MSI status register */
+	msi_status = readl_relaxed(pcie->apb_csr_base + MSI_STATUS_OFFSET);
+
+	/* handle MSI interrupts */
+	while (msi_status & 1) {
+		msi_data = readl_relaxed(pcie->apb_csr_base + MSI_DATA_OFFSET);
+
+		/*
+		 * MSI_STATUS_OFFSET register gets updated to zero
+		 * once we pop not only the MSI data but also address
+		 * from MSI hardware FIFO. So keeping these following
+		 * two dummy reads.
+		 */
+		msi_addr_lo = readl_relaxed(pcie->apb_csr_base +
+					    MSI_ADDR_L_OFFSET);
+		msi_addr_hi = readl_relaxed(pcie->apb_csr_base +
+					    MSI_ADDR_H_OFFSET);
+		dev_dbg(dev, "MSI registers, data: %08x, addr: %08x:%08x\n",
+			msi_data, msi_addr_hi, msi_addr_lo);
+
+		generic_handle_domain_irq(msi->dev_domain, msi_data);
+
+		msi_status = readl_relaxed(pcie->apb_csr_base +
+					   MSI_STATUS_OFFSET);
+	}
+
+	/* Clear the interrupt status */
+	mobiveil_csr_writel(pcie, intr_status, PAB_INTP_AMBA_MISC_STAT);
+	chained_irq_exit(chip, desc);
+}
+
+static int mobiveil_pcie_parse_dt(struct mobiveil_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct platform_device *pdev = pcie->pdev;
+	struct device_node *node = dev->of_node;
+	struct mobiveil_root_port *rp = &pcie->rp;
+	struct resource *res;
+
+	/* map config resource */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "config_axi_slave");
+	rp->config_axi_slave_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(rp->config_axi_slave_base))
+		return PTR_ERR(rp->config_axi_slave_base);
+	rp->ob_io_res = res;
+
+	/* map csr resource */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "csr_axi_slave");
+	pcie->csr_axi_slave_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pcie->csr_axi_slave_base))
+		return PTR_ERR(pcie->csr_axi_slave_base);
+	pcie->pcie_reg_base = res->start;
+
+	/* read the number of windows requested */
+	if (of_property_read_u32(node, "apio-wins", &pcie->apio_wins))
+		pcie->apio_wins = MAX_PIO_WINDOWS;
+
+	if (of_property_read_u32(node, "ppio-wins", &pcie->ppio_wins))
+		pcie->ppio_wins = MAX_PIO_WINDOWS;
+
+	return 0;
+}
+
+static void mobiveil_pcie_enable_msi(struct mobiveil_pcie *pcie)
+{
+	phys_addr_t msg_addr = pcie->pcie_reg_base;
+	struct mobiveil_msi *msi = &pcie->rp.msi;
+
+	msi->num_of_vectors = PCI_NUM_MSI;
+	msi->msi_pages_phys = (phys_addr_t)msg_addr;
+
+	writel_relaxed(lower_32_bits(msg_addr),
+		       pcie->apb_csr_base + MSI_BASE_LO_OFFSET);
+	writel_relaxed(upper_32_bits(msg_addr),
+		       pcie->apb_csr_base + MSI_BASE_HI_OFFSET);
+	writel_relaxed(4096, pcie->apb_csr_base + MSI_SIZE_OFFSET);
+	writel_relaxed(1, pcie->apb_csr_base + MSI_ENABLE_OFFSET);
+}
+
+int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit)
+{
+	struct mobiveil_root_port *rp = &pcie->rp;
+	struct pci_host_bridge *bridge = rp->bridge;
+	u32 value, pab_ctrl, type;
+	struct resource_entry *win;
+
+	pcie->ib_wins_configured = 0;
+	pcie->ob_wins_configured = 0;
+
+	if (!reinit) {
+		/* setup bus numbers */
+		value = mobiveil_csr_readl(pcie, PCI_PRIMARY_BUS);
+		value &= 0xff000000;
+		value |= 0x00ff0100;
+		mobiveil_csr_writel(pcie, value, PCI_PRIMARY_BUS);
+	}
+
+	/*
+	 * program Bus Master Enable Bit in Command Register in PAB Config
+	 * Space
+	 */
+	value = mobiveil_csr_readl(pcie, PCI_COMMAND);
+	value |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	mobiveil_csr_writel(pcie, value, PCI_COMMAND);
+
+	/*
+	 * program PIO Enable Bit to 1 (and PEX PIO Enable to 1) in PAB_CTRL
+	 * register
+	 */
+	pab_ctrl = mobiveil_csr_readl(pcie, PAB_CTRL);
+	pab_ctrl |= (1 << AMBA_PIO_ENABLE_SHIFT) | (1 << PEX_PIO_ENABLE_SHIFT);
+	mobiveil_csr_writel(pcie, pab_ctrl, PAB_CTRL);
+
+	/*
+	 * program PIO Enable Bit to 1 and Config Window Enable Bit to 1 in
+	 * PAB_AXI_PIO_CTRL Register
+	 */
+	value = mobiveil_csr_readl(pcie, PAB_AXI_PIO_CTRL);
+	value |= APIO_EN_MASK;
+	mobiveil_csr_writel(pcie, value, PAB_AXI_PIO_CTRL);
+
+	/* Enable PCIe PIO master */
+	value = mobiveil_csr_readl(pcie, PAB_PEX_PIO_CTRL);
+	value |= 1 << PIO_ENABLE_SHIFT;
+	mobiveil_csr_writel(pcie, value, PAB_PEX_PIO_CTRL);
+
+	/*
+	 * we'll program one outbound window for config reads and
+	 * another default inbound window for all the upstream traffic
+	 * rest of the outbound windows will be configured according to
+	 * the "ranges" field defined in device tree
+	 */
+
+	/* config outbound translation window */
+	program_ob_windows(pcie, WIN_NUM_0, rp->ob_io_res->start, 0,
+			   CFG_WINDOW_TYPE, resource_size(rp->ob_io_res));
+
+	/* memory inbound translation window */
+	program_ib_windows(pcie, WIN_NUM_0, 0, 0, MEM_WINDOW_TYPE, IB_WIN_SIZE);
+
+	/* Get the I/O and memory ranges from DT */
+	resource_list_for_each_entry(win, &bridge->windows) {
+		if (resource_type(win->res) == IORESOURCE_MEM)
+			type = MEM_WINDOW_TYPE;
+		else if (resource_type(win->res) == IORESOURCE_IO)
+			type = IO_WINDOW_TYPE;
+		else
+			continue;
+
+		/* configure outbound translation window */
+		program_ob_windows(pcie, pcie->ob_wins_configured,
+				   win->res->start,
+				   win->res->start - win->offset,
+				   type, resource_size(win->res));
+	}
+
+	/* fixup for PCIe class register */
+	value = mobiveil_csr_readl(pcie, PAB_INTP_AXI_PIO_CLASS);
+	value &= 0xff;
+	value |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+	mobiveil_csr_writel(pcie, value, PAB_INTP_AXI_PIO_CLASS);
+
+	return 0;
+}
+
+static void mobiveil_mask_intx_irq(struct irq_data *data)
+{
+	struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
+	struct mobiveil_root_port *rp;
+	unsigned long flags;
+	u32 mask, shifted_val;
+
+	rp = &pcie->rp;
+	mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
+	raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
+	shifted_val = mobiveil_csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB);
+	shifted_val &= ~mask;
+	mobiveil_csr_writel(pcie, shifted_val, PAB_INTP_AMBA_MISC_ENB);
+	raw_spin_unlock_irqrestore(&rp->intx_mask_lock, flags);
+}
+
+static void mobiveil_unmask_intx_irq(struct irq_data *data)
+{
+	struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
+	struct mobiveil_root_port *rp;
+	unsigned long flags;
+	u32 shifted_val, mask;
+
+	rp = &pcie->rp;
+	mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
+	raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
+	shifted_val = mobiveil_csr_readl(pcie, PAB_INTP_AMBA_MISC_ENB);
+	shifted_val |= mask;
+	mobiveil_csr_writel(pcie, shifted_val, PAB_INTP_AMBA_MISC_ENB);
+	raw_spin_unlock_irqrestore(&rp->intx_mask_lock, flags);
+}
+
+static struct irq_chip intx_irq_chip = {
+	.name = "mobiveil_pcie:intx",
+	.irq_enable = mobiveil_unmask_intx_irq,
+	.irq_disable = mobiveil_mask_intx_irq,
+	.irq_mask = mobiveil_mask_intx_irq,
+	.irq_unmask = mobiveil_unmask_intx_irq,
+};
+
+/* routine to setup the INTx related data */
+static int mobiveil_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				  irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+/* INTx domain operations structure */
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = mobiveil_pcie_intx_map,
+};
+
+static struct irq_chip mobiveil_msi_irq_chip = {
+	.name = "Mobiveil PCIe MSI",
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info mobiveil_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX),
+	.chip	= &mobiveil_msi_irq_chip,
+};
+
+static void mobiveil_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr = pcie->pcie_reg_base + (data->hwirq * sizeof(int));
+
+	msg->address_lo = lower_32_bits(addr);
+	msg->address_hi = upper_32_bits(addr);
+	msg->data = data->hwirq;
+
+	dev_dbg(&pcie->pdev->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int mobiveil_msi_set_affinity(struct irq_data *irq_data,
+				     const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static struct irq_chip mobiveil_msi_bottom_irq_chip = {
+	.name			= "Mobiveil MSI",
+	.irq_compose_msi_msg	= mobiveil_compose_msi_msg,
+	.irq_set_affinity	= mobiveil_msi_set_affinity,
+};
+
+static int mobiveil_irq_msi_domain_alloc(struct irq_domain *domain,
+					 unsigned int virq,
+					 unsigned int nr_irqs, void *args)
+{
+	struct mobiveil_pcie *pcie = domain->host_data;
+	struct mobiveil_msi *msi = &pcie->rp.msi;
+	unsigned long bit;
+
+	WARN_ON(nr_irqs != 1);
+	mutex_lock(&msi->lock);
+
+	bit = find_first_zero_bit(msi->msi_irq_in_use, msi->num_of_vectors);
+	if (bit >= msi->num_of_vectors) {
+		mutex_unlock(&msi->lock);
+		return -ENOSPC;
+	}
+
+	set_bit(bit, msi->msi_irq_in_use);
+
+	mutex_unlock(&msi->lock);
+
+	irq_domain_set_info(domain, virq, bit, &mobiveil_msi_bottom_irq_chip,
+			    domain->host_data, handle_level_irq, NULL, NULL);
+	return 0;
+}
+
+static void mobiveil_irq_msi_domain_free(struct irq_domain *domain,
+					 unsigned int virq,
+					 unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(d);
+	struct mobiveil_msi *msi = &pcie->rp.msi;
+
+	mutex_lock(&msi->lock);
+
+	if (!test_bit(d->hwirq, msi->msi_irq_in_use))
+		dev_err(&pcie->pdev->dev, "trying to free unused MSI#%lu\n",
+			d->hwirq);
+	else
+		__clear_bit(d->hwirq, msi->msi_irq_in_use);
+
+	mutex_unlock(&msi->lock);
+}
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc	= mobiveil_irq_msi_domain_alloc,
+	.free	= mobiveil_irq_msi_domain_free,
+};
+
+static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+	struct mobiveil_msi *msi = &pcie->rp.msi;
+
+	mutex_init(&msi->lock);
+	msi->dev_domain = irq_domain_add_linear(NULL, msi->num_of_vectors,
+						&msi_domain_ops, pcie);
+	if (!msi->dev_domain) {
+		dev_err(dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	msi->msi_domain = pci_msi_create_irq_domain(fwnode,
+						    &mobiveil_msi_domain_info,
+						    msi->dev_domain);
+	if (!msi->msi_domain) {
+		dev_err(dev, "failed to create MSI domain\n");
+		irq_domain_remove(msi->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int mobiveil_pcie_init_irq_domain(struct mobiveil_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct mobiveil_root_port *rp = &pcie->rp;
+
+	/* setup INTx */
+	rp->intx_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
+						&intx_domain_ops, pcie);
+
+	if (!rp->intx_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	raw_spin_lock_init(&rp->intx_mask_lock);
+
+	/* setup MSI */
+	return mobiveil_allocate_msi_domains(pcie);
+}
+
+static int mobiveil_pcie_integrated_interrupt_init(struct mobiveil_pcie *pcie)
+{
+	struct platform_device *pdev = pcie->pdev;
+	struct device *dev = &pdev->dev;
+	struct mobiveil_root_port *rp = &pcie->rp;
+	struct resource *res;
+	int ret;
+
+	/* map MSI config resource */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "apb_csr");
+	pcie->apb_csr_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pcie->apb_csr_base))
+		return PTR_ERR(pcie->apb_csr_base);
+
+	/* setup MSI hardware registers */
+	mobiveil_pcie_enable_msi(pcie);
+
+	rp->irq = platform_get_irq(pdev, 0);
+	if (rp->irq < 0)
+		return rp->irq;
+
+	/* initialize the IRQ domains */
+	ret = mobiveil_pcie_init_irq_domain(pcie);
+	if (ret) {
+		dev_err(dev, "Failed creating IRQ Domain\n");
+		return ret;
+	}
+
+	irq_set_chained_handler_and_data(rp->irq, mobiveil_pcie_isr, pcie);
+
+	/* Enable interrupts */
+	mobiveil_csr_writel(pcie, (PAB_INTP_INTX_MASK | PAB_INTP_MSI_MASK),
+			    PAB_INTP_AMBA_MISC_ENB);
+
+
+	return 0;
+}
+
+static int mobiveil_pcie_interrupt_init(struct mobiveil_pcie *pcie)
+{
+	struct mobiveil_root_port *rp = &pcie->rp;
+
+	if (rp->ops->interrupt_init)
+		return rp->ops->interrupt_init(pcie);
+
+	return mobiveil_pcie_integrated_interrupt_init(pcie);
+}
+
+static bool mobiveil_pcie_is_bridge(struct mobiveil_pcie *pcie)
+{
+	u32 header_type;
+
+	header_type = mobiveil_csr_readb(pcie, PCI_HEADER_TYPE);
+	header_type &= 0x7f;
+
+	return header_type == PCI_HEADER_TYPE_BRIDGE;
+}
+
+int mobiveil_pcie_host_probe(struct mobiveil_pcie *pcie)
+{
+	struct mobiveil_root_port *rp = &pcie->rp;
+	struct pci_host_bridge *bridge = rp->bridge;
+	struct device *dev = &pcie->pdev->dev;
+	int ret;
+
+	ret = mobiveil_pcie_parse_dt(pcie);
+	if (ret) {
+		dev_err(dev, "Parsing DT failed, ret: %x\n", ret);
+		return ret;
+	}
+
+	if (!mobiveil_pcie_is_bridge(pcie))
+		return -ENODEV;
+
+	/*
+	 * configure all inbound and outbound windows and prepare the RC for
+	 * config access
+	 */
+	ret = mobiveil_host_init(pcie, false);
+	if (ret) {
+		dev_err(dev, "Failed to initialize host\n");
+		return ret;
+	}
+
+	ret = mobiveil_pcie_interrupt_init(pcie);
+	if (ret) {
+		dev_err(dev, "Interrupt init failed\n");
+		return ret;
+	}
+
+	/* Initialize bridge */
+	bridge->sysdata = pcie;
+	bridge->ops = &mobiveil_pcie_ops;
+
+	ret = mobiveil_bringup_link(pcie);
+	if (ret) {
+		dev_info(dev, "link bring-up failed\n");
+		return ret;
+	}
+
+	return pci_host_probe(bridge);
+}
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-plat.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-plat.c
new file mode 100644
index 000000000..c5bb87ff6
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-plat.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Mobiveil PCIe Host controller
+ *
+ * Copyright (c) 2018 Mobiveil Inc.
+ * Copyright 2019 NXP
+ *
+ * Author: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
+ *	   Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "pcie-mobiveil.h"
+
+static int mobiveil_pcie_probe(struct platform_device *pdev)
+{
+	struct mobiveil_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	struct device *dev = &pdev->dev;
+
+	/* allocate the PCIe port */
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->rp.bridge = bridge;
+
+	pcie->pdev = pdev;
+
+	return mobiveil_pcie_host_probe(pcie);
+}
+
+static const struct of_device_id mobiveil_pcie_of_match[] = {
+	{.compatible = "mbvl,gpex40-pcie",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mobiveil_pcie_of_match);
+
+static struct platform_driver mobiveil_pcie_driver = {
+	.probe = mobiveil_pcie_probe,
+	.driver = {
+		.name = "mobiveil-pcie",
+		.of_match_table = mobiveil_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+builtin_platform_driver(mobiveil_pcie_driver);
+
+MODULE_DESCRIPTION("Mobiveil PCIe host controller driver");
+MODULE_AUTHOR("Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>");
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil.c b/drivers/pci/controller/mobiveil/pcie-mobiveil.c
new file mode 100644
index 000000000..62ecbaeb0
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Mobiveil PCIe Host controller
+ *
+ * Copyright (c) 2018 Mobiveil Inc.
+ * Copyright 2019 NXP
+ *
+ * Author: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
+ *	   Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "pcie-mobiveil.h"
+
+/*
+ * mobiveil_pcie_sel_page - routine to access paged register
+ *
+ * Registers whose address greater than PAGED_ADDR_BNDRY (0xc00) are paged,
+ * for this scheme to work extracted higher 6 bits of the offset will be
+ * written to pg_sel field of PAB_CTRL register and rest of the lower 10
+ * bits enabled with PAGED_ADDR_BNDRY are used as offset of the register.
+ */
+static void mobiveil_pcie_sel_page(struct mobiveil_pcie *pcie, u8 pg_idx)
+{
+	u32 val;
+
+	val = readl(pcie->csr_axi_slave_base + PAB_CTRL);
+	val &= ~(PAGE_SEL_MASK << PAGE_SEL_SHIFT);
+	val |= (pg_idx & PAGE_SEL_MASK) << PAGE_SEL_SHIFT;
+
+	writel(val, pcie->csr_axi_slave_base + PAB_CTRL);
+}
+
+static void __iomem *mobiveil_pcie_comp_addr(struct mobiveil_pcie *pcie,
+					     u32 off)
+{
+	if (off < PAGED_ADDR_BNDRY) {
+		/* For directly accessed registers, clear the pg_sel field */
+		mobiveil_pcie_sel_page(pcie, 0);
+		return pcie->csr_axi_slave_base + off;
+	}
+
+	mobiveil_pcie_sel_page(pcie, OFFSET_TO_PAGE_IDX(off));
+	return pcie->csr_axi_slave_base + OFFSET_TO_PAGE_ADDR(off);
+}
+
+static int mobiveil_pcie_read(void __iomem *addr, int size, u32 *val)
+{
+	if ((uintptr_t)addr & (size - 1)) {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	switch (size) {
+	case 4:
+		*val = readl(addr);
+		break;
+	case 2:
+		*val = readw(addr);
+		break;
+	case 1:
+		*val = readb(addr);
+		break;
+	default:
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mobiveil_pcie_write(void __iomem *addr, int size, u32 val)
+{
+	if ((uintptr_t)addr & (size - 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	switch (size) {
+	case 4:
+		writel(val, addr);
+		break;
+	case 2:
+		writew(val, addr);
+		break;
+	case 1:
+		writeb(val, addr);
+		break;
+	default:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+u32 mobiveil_csr_read(struct mobiveil_pcie *pcie, u32 off, size_t size)
+{
+	void __iomem *addr;
+	u32 val;
+	int ret;
+
+	addr = mobiveil_pcie_comp_addr(pcie, off);
+
+	ret = mobiveil_pcie_read(addr, size, &val);
+	if (ret)
+		dev_err(&pcie->pdev->dev, "read CSR address failed\n");
+
+	return val;
+}
+
+void mobiveil_csr_write(struct mobiveil_pcie *pcie, u32 val, u32 off,
+			       size_t size)
+{
+	void __iomem *addr;
+	int ret;
+
+	addr = mobiveil_pcie_comp_addr(pcie, off);
+
+	ret = mobiveil_pcie_write(addr, size, val);
+	if (ret)
+		dev_err(&pcie->pdev->dev, "write CSR address failed\n");
+}
+
+bool mobiveil_pcie_link_up(struct mobiveil_pcie *pcie)
+{
+	if (pcie->ops->link_up)
+		return pcie->ops->link_up(pcie);
+
+	return (mobiveil_csr_readl(pcie, LTSSM_STATUS) &
+		LTSSM_STATUS_L0_MASK) == LTSSM_STATUS_L0;
+}
+
+void program_ib_windows(struct mobiveil_pcie *pcie, int win_num,
+			u64 cpu_addr, u64 pci_addr, u32 type, u64 size)
+{
+	u32 value;
+	u64 size64 = ~(size - 1);
+
+	if (win_num >= pcie->ppio_wins) {
+		dev_err(&pcie->pdev->dev,
+			"ERROR: max inbound windows reached !\n");
+		return;
+	}
+
+	value = mobiveil_csr_readl(pcie, PAB_PEX_AMAP_CTRL(win_num));
+	value &= ~(AMAP_CTRL_TYPE_MASK << AMAP_CTRL_TYPE_SHIFT | WIN_SIZE_MASK);
+	value |= type << AMAP_CTRL_TYPE_SHIFT | 1 << AMAP_CTRL_EN_SHIFT |
+		 (lower_32_bits(size64) & WIN_SIZE_MASK);
+	mobiveil_csr_writel(pcie, value, PAB_PEX_AMAP_CTRL(win_num));
+
+	mobiveil_csr_writel(pcie, upper_32_bits(size64),
+			    PAB_EXT_PEX_AMAP_SIZEN(win_num));
+
+	mobiveil_csr_writel(pcie, lower_32_bits(cpu_addr),
+			    PAB_PEX_AMAP_AXI_WIN(win_num));
+	mobiveil_csr_writel(pcie, upper_32_bits(cpu_addr),
+			    PAB_EXT_PEX_AMAP_AXI_WIN(win_num));
+
+	mobiveil_csr_writel(pcie, lower_32_bits(pci_addr),
+			    PAB_PEX_AMAP_PEX_WIN_L(win_num));
+	mobiveil_csr_writel(pcie, upper_32_bits(pci_addr),
+			    PAB_PEX_AMAP_PEX_WIN_H(win_num));
+
+	pcie->ib_wins_configured++;
+}
+
+/*
+ * routine to program the outbound windows
+ */
+void program_ob_windows(struct mobiveil_pcie *pcie, int win_num,
+			u64 cpu_addr, u64 pci_addr, u32 type, u64 size)
+{
+	u32 value;
+	u64 size64 = ~(size - 1);
+
+	if (win_num >= pcie->apio_wins) {
+		dev_err(&pcie->pdev->dev,
+			"ERROR: max outbound windows reached !\n");
+		return;
+	}
+
+	/*
+	 * program Enable Bit to 1, Type Bit to (00) base 2, AXI Window Size Bit
+	 * to 4 KB in PAB_AXI_AMAP_CTRL register
+	 */
+	value = mobiveil_csr_readl(pcie, PAB_AXI_AMAP_CTRL(win_num));
+	value &= ~(WIN_TYPE_MASK << WIN_TYPE_SHIFT | WIN_SIZE_MASK);
+	value |= 1 << WIN_ENABLE_SHIFT | type << WIN_TYPE_SHIFT |
+		 (lower_32_bits(size64) & WIN_SIZE_MASK);
+	mobiveil_csr_writel(pcie, value, PAB_AXI_AMAP_CTRL(win_num));
+
+	mobiveil_csr_writel(pcie, upper_32_bits(size64),
+			    PAB_EXT_AXI_AMAP_SIZE(win_num));
+
+	/*
+	 * program AXI window base with appropriate value in
+	 * PAB_AXI_AMAP_AXI_WIN0 register
+	 */
+	mobiveil_csr_writel(pcie,
+			    lower_32_bits(cpu_addr) & (~AXI_WINDOW_ALIGN_MASK),
+			    PAB_AXI_AMAP_AXI_WIN(win_num));
+	mobiveil_csr_writel(pcie, upper_32_bits(cpu_addr),
+			    PAB_EXT_AXI_AMAP_AXI_WIN(win_num));
+
+	mobiveil_csr_writel(pcie, lower_32_bits(pci_addr),
+			    PAB_AXI_AMAP_PEX_WIN_L(win_num));
+	mobiveil_csr_writel(pcie, upper_32_bits(pci_addr),
+			    PAB_AXI_AMAP_PEX_WIN_H(win_num));
+
+	pcie->ob_wins_configured++;
+}
+
+int mobiveil_bringup_link(struct mobiveil_pcie *pcie)
+{
+	int retries;
+
+	/* check if the link is up or not */
+	for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+		if (mobiveil_pcie_link_up(pcie))
+			return 0;
+
+		usleep_range(LINK_WAIT_MIN, LINK_WAIT_MAX);
+	}
+
+	dev_err(&pcie->pdev->dev, "link never came up\n");
+
+	return -ETIMEDOUT;
+}
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil.h b/drivers/pci/controller/mobiveil/pcie-mobiveil.h
new file mode 100644
index 000000000..6082b8afb
--- /dev/null
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCIe host controller driver for Mobiveil PCIe Host controller
+ *
+ * Copyright (c) 2018 Mobiveil Inc.
+ * Copyright 2019 NXP
+ *
+ * Author: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
+ *	   Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
+ */
+
+#ifndef _PCIE_MOBIVEIL_H
+#define _PCIE_MOBIVEIL_H
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include "../../pci.h"
+
+/* register offsets and bit positions */
+
+/*
+ * translation tables are grouped into windows, each window registers are
+ * grouped into blocks of 4 or 16 registers each
+ */
+#define PAB_REG_BLOCK_SIZE		16
+#define PAB_EXT_REG_BLOCK_SIZE		4
+
+#define PAB_REG_ADDR(offset, win)	\
+	(offset + (win * PAB_REG_BLOCK_SIZE))
+#define PAB_EXT_REG_ADDR(offset, win)	\
+	(offset + (win * PAB_EXT_REG_BLOCK_SIZE))
+
+#define LTSSM_STATUS			0x0404
+#define  LTSSM_STATUS_L0_MASK		0x3f
+#define  LTSSM_STATUS_L0		0x2d
+
+#define PAB_CTRL			0x0808
+#define  AMBA_PIO_ENABLE_SHIFT		0
+#define  PEX_PIO_ENABLE_SHIFT		1
+#define  PAGE_SEL_SHIFT			13
+#define  PAGE_SEL_MASK			0x3f
+#define  PAGE_LO_MASK			0x3ff
+#define  PAGE_SEL_OFFSET_SHIFT		10
+
+#define PAB_ACTIVITY_STAT		0x81c
+
+#define PAB_AXI_PIO_CTRL		0x0840
+#define  APIO_EN_MASK			0xf
+
+#define PAB_PEX_PIO_CTRL		0x08c0
+#define  PIO_ENABLE_SHIFT		0
+
+#define PAB_INTP_AMBA_MISC_ENB		0x0b0c
+#define PAB_INTP_AMBA_MISC_STAT		0x0b1c
+#define  PAB_INTP_RESET			BIT(1)
+#define  PAB_INTP_MSI			BIT(3)
+#define  PAB_INTP_INTA			BIT(5)
+#define  PAB_INTP_INTB			BIT(6)
+#define  PAB_INTP_INTC			BIT(7)
+#define  PAB_INTP_INTD			BIT(8)
+#define  PAB_INTP_PCIE_UE		BIT(9)
+#define  PAB_INTP_IE_PMREDI		BIT(29)
+#define  PAB_INTP_IE_EC			BIT(30)
+#define  PAB_INTP_MSI_MASK		PAB_INTP_MSI
+#define  PAB_INTP_INTX_MASK		(PAB_INTP_INTA | PAB_INTP_INTB |\
+					PAB_INTP_INTC | PAB_INTP_INTD)
+
+#define PAB_AXI_AMAP_CTRL(win)		PAB_REG_ADDR(0x0ba0, win)
+#define  WIN_ENABLE_SHIFT		0
+#define  WIN_TYPE_SHIFT			1
+#define  WIN_TYPE_MASK			0x3
+#define  WIN_SIZE_MASK			0xfffffc00
+
+#define PAB_EXT_AXI_AMAP_SIZE(win)	PAB_EXT_REG_ADDR(0xbaf0, win)
+
+#define PAB_EXT_AXI_AMAP_AXI_WIN(win)	PAB_EXT_REG_ADDR(0x80a0, win)
+#define PAB_AXI_AMAP_AXI_WIN(win)	PAB_REG_ADDR(0x0ba4, win)
+#define  AXI_WINDOW_ALIGN_MASK		3
+
+#define PAB_AXI_AMAP_PEX_WIN_L(win)	PAB_REG_ADDR(0x0ba8, win)
+#define  PAB_BUS_SHIFT			24
+#define  PAB_DEVICE_SHIFT		19
+#define  PAB_FUNCTION_SHIFT		16
+
+#define PAB_AXI_AMAP_PEX_WIN_H(win)	PAB_REG_ADDR(0x0bac, win)
+#define PAB_INTP_AXI_PIO_CLASS		0x474
+
+#define PAB_PEX_AMAP_CTRL(win)		PAB_REG_ADDR(0x4ba0, win)
+#define  AMAP_CTRL_EN_SHIFT		0
+#define  AMAP_CTRL_TYPE_SHIFT		1
+#define  AMAP_CTRL_TYPE_MASK		3
+
+#define PAB_EXT_PEX_AMAP_SIZEN(win)	PAB_EXT_REG_ADDR(0xbef0, win)
+#define PAB_EXT_PEX_AMAP_AXI_WIN(win)	PAB_EXT_REG_ADDR(0xb4a0, win)
+#define PAB_PEX_AMAP_AXI_WIN(win)	PAB_REG_ADDR(0x4ba4, win)
+#define PAB_PEX_AMAP_PEX_WIN_L(win)	PAB_REG_ADDR(0x4ba8, win)
+#define PAB_PEX_AMAP_PEX_WIN_H(win)	PAB_REG_ADDR(0x4bac, win)
+
+/* starting offset of INTX bits in status register */
+#define PAB_INTX_START			5
+
+/* supported number of MSI interrupts */
+#define PCI_NUM_MSI			16
+
+/* MSI registers */
+#define MSI_BASE_LO_OFFSET		0x04
+#define MSI_BASE_HI_OFFSET		0x08
+#define MSI_SIZE_OFFSET			0x0c
+#define MSI_ENABLE_OFFSET		0x14
+#define MSI_STATUS_OFFSET		0x18
+#define MSI_DATA_OFFSET			0x20
+#define MSI_ADDR_L_OFFSET		0x24
+#define MSI_ADDR_H_OFFSET		0x28
+
+/* outbound and inbound window definitions */
+#define WIN_NUM_0			0
+#define WIN_NUM_1			1
+#define CFG_WINDOW_TYPE			0
+#define IO_WINDOW_TYPE			1
+#define MEM_WINDOW_TYPE			2
+#define IB_WIN_SIZE			((u64)256 * 1024 * 1024 * 1024)
+#define MAX_PIO_WINDOWS			8
+
+/* Parameters for the waiting for link up routine */
+#define LINK_WAIT_MAX_RETRIES		10
+#define LINK_WAIT_MIN			90000
+#define LINK_WAIT_MAX			100000
+
+#define PAGED_ADDR_BNDRY		0xc00
+#define OFFSET_TO_PAGE_ADDR(off)	\
+	((off & PAGE_LO_MASK) | PAGED_ADDR_BNDRY)
+#define OFFSET_TO_PAGE_IDX(off)		\
+	((off >> PAGE_SEL_OFFSET_SHIFT) & PAGE_SEL_MASK)
+
+struct mobiveil_msi {			/* MSI information */
+	struct mutex lock;		/* protect bitmap variable */
+	struct irq_domain *msi_domain;
+	struct irq_domain *dev_domain;
+	phys_addr_t msi_pages_phys;
+	int num_of_vectors;
+	DECLARE_BITMAP(msi_irq_in_use, PCI_NUM_MSI);
+};
+
+struct mobiveil_pcie;
+
+struct mobiveil_rp_ops {
+	int (*interrupt_init)(struct mobiveil_pcie *pcie);
+};
+
+struct mobiveil_root_port {
+	void __iomem *config_axi_slave_base;	/* endpoint config base */
+	struct resource *ob_io_res;
+	struct mobiveil_rp_ops *ops;
+	int irq;
+	raw_spinlock_t intx_mask_lock;
+	struct irq_domain *intx_domain;
+	struct mobiveil_msi msi;
+	struct pci_host_bridge *bridge;
+};
+
+struct mobiveil_pab_ops {
+	int (*link_up)(struct mobiveil_pcie *pcie);
+};
+
+struct mobiveil_pcie {
+	struct platform_device *pdev;
+	void __iomem *csr_axi_slave_base;	/* root port config base */
+	void __iomem *apb_csr_base;	/* MSI register base */
+	phys_addr_t pcie_reg_base;	/* Physical PCIe Controller Base */
+	int apio_wins;
+	int ppio_wins;
+	int ob_wins_configured;		/* configured outbound windows */
+	int ib_wins_configured;		/* configured inbound windows */
+	const struct mobiveil_pab_ops *ops;
+	struct mobiveil_root_port rp;
+};
+
+int mobiveil_pcie_host_probe(struct mobiveil_pcie *pcie);
+int mobiveil_host_init(struct mobiveil_pcie *pcie, bool reinit);
+bool mobiveil_pcie_link_up(struct mobiveil_pcie *pcie);
+int mobiveil_bringup_link(struct mobiveil_pcie *pcie);
+void program_ob_windows(struct mobiveil_pcie *pcie, int win_num, u64 cpu_addr,
+			u64 pci_addr, u32 type, u64 size);
+void program_ib_windows(struct mobiveil_pcie *pcie, int win_num, u64 cpu_addr,
+			u64 pci_addr, u32 type, u64 size);
+u32 mobiveil_csr_read(struct mobiveil_pcie *pcie, u32 off, size_t size);
+void mobiveil_csr_write(struct mobiveil_pcie *pcie, u32 val, u32 off,
+			size_t size);
+
+static inline u32 mobiveil_csr_readl(struct mobiveil_pcie *pcie, u32 off)
+{
+	return mobiveil_csr_read(pcie, off, 0x4);
+}
+
+static inline u16 mobiveil_csr_readw(struct mobiveil_pcie *pcie, u32 off)
+{
+	return mobiveil_csr_read(pcie, off, 0x2);
+}
+
+static inline u8 mobiveil_csr_readb(struct mobiveil_pcie *pcie, u32 off)
+{
+	return mobiveil_csr_read(pcie, off, 0x1);
+}
+
+
+static inline void mobiveil_csr_writel(struct mobiveil_pcie *pcie, u32 val,
+				       u32 off)
+{
+	mobiveil_csr_write(pcie, val, off, 0x4);
+}
+
+static inline void mobiveil_csr_writew(struct mobiveil_pcie *pcie, u16 val,
+				       u32 off)
+{
+	mobiveil_csr_write(pcie, val, off, 0x2);
+}
+
+static inline void mobiveil_csr_writeb(struct mobiveil_pcie *pcie, u8 val,
+				       u32 off)
+{
+	mobiveil_csr_write(pcie, val, off, 0x1);
+}
+
+#endif /* _PCIE_MOBIVEIL_H */
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
new file mode 100644
index 000000000..71ecd7ddc
--- /dev/null
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -0,0 +1,2011 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for the Aardvark PCIe controller, used on Marvell Armada
+ * 3700.
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Author: Hezi Shahmoon <hezi.shahmoon@marvell.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/init.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_gpio.h>
+#include <linux/of_pci.h>
+
+#include "../pci.h"
+#include "../pci-bridge-emul.h"
+
+/* PCIe core registers */
+#define PCIE_CORE_DEV_ID_REG					0x0
+#define PCIE_CORE_CMD_STATUS_REG				0x4
+#define PCIE_CORE_DEV_REV_REG					0x8
+#define PCIE_CORE_SSDEV_ID_REG					0x2c
+#define PCIE_CORE_PCIEXP_CAP					0xc0
+#define PCIE_CORE_PCIERR_CAP					0x100
+#define PCIE_CORE_ERR_CAPCTL_REG				0x118
+#define     PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX			BIT(5)
+#define     PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN			BIT(6)
+#define     PCIE_CORE_ERR_CAPCTL_ECRC_CHCK			BIT(7)
+#define     PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV			BIT(8)
+/* PIO registers base address and register offsets */
+#define PIO_BASE_ADDR				0x4000
+#define PIO_CTRL				(PIO_BASE_ADDR + 0x0)
+#define   PIO_CTRL_TYPE_MASK			GENMASK(3, 0)
+#define   PIO_CTRL_ADDR_WIN_DISABLE		BIT(24)
+#define PIO_STAT				(PIO_BASE_ADDR + 0x4)
+#define   PIO_COMPLETION_STATUS_SHIFT		7
+#define   PIO_COMPLETION_STATUS_MASK		GENMASK(9, 7)
+#define   PIO_COMPLETION_STATUS_OK		0
+#define   PIO_COMPLETION_STATUS_UR		1
+#define   PIO_COMPLETION_STATUS_CRS		2
+#define   PIO_COMPLETION_STATUS_CA		4
+#define   PIO_NON_POSTED_REQ			BIT(10)
+#define   PIO_ERR_STATUS			BIT(11)
+#define PIO_ADDR_LS				(PIO_BASE_ADDR + 0x8)
+#define PIO_ADDR_MS				(PIO_BASE_ADDR + 0xc)
+#define PIO_WR_DATA				(PIO_BASE_ADDR + 0x10)
+#define PIO_WR_DATA_STRB			(PIO_BASE_ADDR + 0x14)
+#define PIO_RD_DATA				(PIO_BASE_ADDR + 0x18)
+#define PIO_START				(PIO_BASE_ADDR + 0x1c)
+#define PIO_ISR					(PIO_BASE_ADDR + 0x20)
+#define PIO_ISRM				(PIO_BASE_ADDR + 0x24)
+
+/* Aardvark Control registers */
+#define CONTROL_BASE_ADDR			0x4800
+#define PCIE_CORE_CTRL0_REG			(CONTROL_BASE_ADDR + 0x0)
+#define     PCIE_GEN_SEL_MSK			0x3
+#define     PCIE_GEN_SEL_SHIFT			0x0
+#define     SPEED_GEN_1				0
+#define     SPEED_GEN_2				1
+#define     SPEED_GEN_3				2
+#define     IS_RC_MSK				1
+#define     IS_RC_SHIFT				2
+#define     LANE_CNT_MSK			0x18
+#define     LANE_CNT_SHIFT			0x3
+#define     LANE_COUNT_1			(0 << LANE_CNT_SHIFT)
+#define     LANE_COUNT_2			(1 << LANE_CNT_SHIFT)
+#define     LANE_COUNT_4			(2 << LANE_CNT_SHIFT)
+#define     LANE_COUNT_8			(3 << LANE_CNT_SHIFT)
+#define     LINK_TRAINING_EN			BIT(6)
+#define     LEGACY_INTA				BIT(28)
+#define     LEGACY_INTB				BIT(29)
+#define     LEGACY_INTC				BIT(30)
+#define     LEGACY_INTD				BIT(31)
+#define PCIE_CORE_CTRL1_REG			(CONTROL_BASE_ADDR + 0x4)
+#define     HOT_RESET_GEN			BIT(0)
+#define PCIE_CORE_CTRL2_REG			(CONTROL_BASE_ADDR + 0x8)
+#define     PCIE_CORE_CTRL2_RESERVED		0x7
+#define     PCIE_CORE_CTRL2_TD_ENABLE		BIT(4)
+#define     PCIE_CORE_CTRL2_STRICT_ORDER_ENABLE	BIT(5)
+#define     PCIE_CORE_CTRL2_OB_WIN_ENABLE	BIT(6)
+#define     PCIE_CORE_CTRL2_MSI_ENABLE		BIT(10)
+#define PCIE_CORE_REF_CLK_REG			(CONTROL_BASE_ADDR + 0x14)
+#define     PCIE_CORE_REF_CLK_TX_ENABLE		BIT(1)
+#define     PCIE_CORE_REF_CLK_RX_ENABLE		BIT(2)
+#define PCIE_MSG_LOG_REG			(CONTROL_BASE_ADDR + 0x30)
+#define PCIE_ISR0_REG				(CONTROL_BASE_ADDR + 0x40)
+#define PCIE_MSG_PM_PME_MASK			BIT(7)
+#define PCIE_ISR0_MASK_REG			(CONTROL_BASE_ADDR + 0x44)
+#define     PCIE_ISR0_MSI_INT_PENDING		BIT(24)
+#define     PCIE_ISR0_CORR_ERR			BIT(11)
+#define     PCIE_ISR0_NFAT_ERR			BIT(12)
+#define     PCIE_ISR0_FAT_ERR			BIT(13)
+#define     PCIE_ISR0_ERR_MASK			GENMASK(13, 11)
+#define     PCIE_ISR0_INTX_ASSERT(val)		BIT(16 + (val))
+#define     PCIE_ISR0_INTX_DEASSERT(val)	BIT(20 + (val))
+#define     PCIE_ISR0_ALL_MASK			GENMASK(31, 0)
+#define PCIE_ISR1_REG				(CONTROL_BASE_ADDR + 0x48)
+#define PCIE_ISR1_MASK_REG			(CONTROL_BASE_ADDR + 0x4C)
+#define     PCIE_ISR1_POWER_STATE_CHANGE	BIT(4)
+#define     PCIE_ISR1_FLUSH			BIT(5)
+#define     PCIE_ISR1_INTX_ASSERT(val)		BIT(8 + (val))
+#define     PCIE_ISR1_ALL_MASK			GENMASK(31, 0)
+#define PCIE_MSI_ADDR_LOW_REG			(CONTROL_BASE_ADDR + 0x50)
+#define PCIE_MSI_ADDR_HIGH_REG			(CONTROL_BASE_ADDR + 0x54)
+#define PCIE_MSI_STATUS_REG			(CONTROL_BASE_ADDR + 0x58)
+#define PCIE_MSI_MASK_REG			(CONTROL_BASE_ADDR + 0x5C)
+#define     PCIE_MSI_ALL_MASK			GENMASK(31, 0)
+#define PCIE_MSI_PAYLOAD_REG			(CONTROL_BASE_ADDR + 0x9C)
+#define     PCIE_MSI_DATA_MASK			GENMASK(15, 0)
+
+/* PCIe window configuration */
+#define OB_WIN_BASE_ADDR			0x4c00
+#define OB_WIN_BLOCK_SIZE			0x20
+#define OB_WIN_COUNT				8
+#define OB_WIN_REG_ADDR(win, offset)		(OB_WIN_BASE_ADDR + \
+						 OB_WIN_BLOCK_SIZE * (win) + \
+						 (offset))
+#define OB_WIN_MATCH_LS(win)			OB_WIN_REG_ADDR(win, 0x00)
+#define     OB_WIN_ENABLE			BIT(0)
+#define OB_WIN_MATCH_MS(win)			OB_WIN_REG_ADDR(win, 0x04)
+#define OB_WIN_REMAP_LS(win)			OB_WIN_REG_ADDR(win, 0x08)
+#define OB_WIN_REMAP_MS(win)			OB_WIN_REG_ADDR(win, 0x0c)
+#define OB_WIN_MASK_LS(win)			OB_WIN_REG_ADDR(win, 0x10)
+#define OB_WIN_MASK_MS(win)			OB_WIN_REG_ADDR(win, 0x14)
+#define OB_WIN_ACTIONS(win)			OB_WIN_REG_ADDR(win, 0x18)
+#define OB_WIN_DEFAULT_ACTIONS			(OB_WIN_ACTIONS(OB_WIN_COUNT-1) + 0x4)
+#define     OB_WIN_FUNC_NUM_MASK		GENMASK(31, 24)
+#define     OB_WIN_FUNC_NUM_SHIFT		24
+#define     OB_WIN_FUNC_NUM_ENABLE		BIT(23)
+#define     OB_WIN_BUS_NUM_BITS_MASK		GENMASK(22, 20)
+#define     OB_WIN_BUS_NUM_BITS_SHIFT		20
+#define     OB_WIN_MSG_CODE_ENABLE		BIT(22)
+#define     OB_WIN_MSG_CODE_MASK		GENMASK(21, 14)
+#define     OB_WIN_MSG_CODE_SHIFT		14
+#define     OB_WIN_MSG_PAYLOAD_LEN		BIT(12)
+#define     OB_WIN_ATTR_ENABLE			BIT(11)
+#define     OB_WIN_ATTR_TC_MASK			GENMASK(10, 8)
+#define     OB_WIN_ATTR_TC_SHIFT		8
+#define     OB_WIN_ATTR_RELAXED			BIT(7)
+#define     OB_WIN_ATTR_NOSNOOP			BIT(6)
+#define     OB_WIN_ATTR_POISON			BIT(5)
+#define     OB_WIN_ATTR_IDO			BIT(4)
+#define     OB_WIN_TYPE_MASK			GENMASK(3, 0)
+#define     OB_WIN_TYPE_SHIFT			0
+#define     OB_WIN_TYPE_MEM			0x0
+#define     OB_WIN_TYPE_IO			0x4
+#define     OB_WIN_TYPE_CONFIG_TYPE0		0x8
+#define     OB_WIN_TYPE_CONFIG_TYPE1		0x9
+#define     OB_WIN_TYPE_MSG			0xc
+
+/* LMI registers base address and register offsets */
+#define LMI_BASE_ADDR				0x6000
+#define CFG_REG					(LMI_BASE_ADDR + 0x0)
+#define     LTSSM_SHIFT				24
+#define     LTSSM_MASK				0x3f
+#define     RC_BAR_CONFIG			0x300
+
+/* LTSSM values in CFG_REG */
+enum {
+	LTSSM_DETECT_QUIET			= 0x0,
+	LTSSM_DETECT_ACTIVE			= 0x1,
+	LTSSM_POLLING_ACTIVE			= 0x2,
+	LTSSM_POLLING_COMPLIANCE		= 0x3,
+	LTSSM_POLLING_CONFIGURATION		= 0x4,
+	LTSSM_CONFIG_LINKWIDTH_START		= 0x5,
+	LTSSM_CONFIG_LINKWIDTH_ACCEPT		= 0x6,
+	LTSSM_CONFIG_LANENUM_ACCEPT		= 0x7,
+	LTSSM_CONFIG_LANENUM_WAIT		= 0x8,
+	LTSSM_CONFIG_COMPLETE			= 0x9,
+	LTSSM_CONFIG_IDLE			= 0xa,
+	LTSSM_RECOVERY_RCVR_LOCK		= 0xb,
+	LTSSM_RECOVERY_SPEED			= 0xc,
+	LTSSM_RECOVERY_RCVR_CFG			= 0xd,
+	LTSSM_RECOVERY_IDLE			= 0xe,
+	LTSSM_L0				= 0x10,
+	LTSSM_RX_L0S_ENTRY			= 0x11,
+	LTSSM_RX_L0S_IDLE			= 0x12,
+	LTSSM_RX_L0S_FTS			= 0x13,
+	LTSSM_TX_L0S_ENTRY			= 0x14,
+	LTSSM_TX_L0S_IDLE			= 0x15,
+	LTSSM_TX_L0S_FTS			= 0x16,
+	LTSSM_L1_ENTRY				= 0x17,
+	LTSSM_L1_IDLE				= 0x18,
+	LTSSM_L2_IDLE				= 0x19,
+	LTSSM_L2_TRANSMIT_WAKE			= 0x1a,
+	LTSSM_DISABLED				= 0x20,
+	LTSSM_LOOPBACK_ENTRY_MASTER		= 0x21,
+	LTSSM_LOOPBACK_ACTIVE_MASTER		= 0x22,
+	LTSSM_LOOPBACK_EXIT_MASTER		= 0x23,
+	LTSSM_LOOPBACK_ENTRY_SLAVE		= 0x24,
+	LTSSM_LOOPBACK_ACTIVE_SLAVE		= 0x25,
+	LTSSM_LOOPBACK_EXIT_SLAVE		= 0x26,
+	LTSSM_HOT_RESET				= 0x27,
+	LTSSM_RECOVERY_EQUALIZATION_PHASE0	= 0x28,
+	LTSSM_RECOVERY_EQUALIZATION_PHASE1	= 0x29,
+	LTSSM_RECOVERY_EQUALIZATION_PHASE2	= 0x2a,
+	LTSSM_RECOVERY_EQUALIZATION_PHASE3	= 0x2b,
+};
+
+#define VENDOR_ID_REG				(LMI_BASE_ADDR + 0x44)
+
+/* PCIe core controller registers */
+#define CTRL_CORE_BASE_ADDR			0x18000
+#define CTRL_CONFIG_REG				(CTRL_CORE_BASE_ADDR + 0x0)
+#define     CTRL_MODE_SHIFT			0x0
+#define     CTRL_MODE_MASK			0x1
+#define     PCIE_CORE_MODE_DIRECT		0x0
+#define     PCIE_CORE_MODE_COMMAND		0x1
+
+/* PCIe Central Interrupts Registers */
+#define CENTRAL_INT_BASE_ADDR			0x1b000
+#define HOST_CTRL_INT_STATUS_REG		(CENTRAL_INT_BASE_ADDR + 0x0)
+#define HOST_CTRL_INT_MASK_REG			(CENTRAL_INT_BASE_ADDR + 0x4)
+#define     PCIE_IRQ_CMDQ_INT			BIT(0)
+#define     PCIE_IRQ_MSI_STATUS_INT		BIT(1)
+#define     PCIE_IRQ_CMD_SENT_DONE		BIT(3)
+#define     PCIE_IRQ_DMA_INT			BIT(4)
+#define     PCIE_IRQ_IB_DXFERDONE		BIT(5)
+#define     PCIE_IRQ_OB_DXFERDONE		BIT(6)
+#define     PCIE_IRQ_OB_RXFERDONE		BIT(7)
+#define     PCIE_IRQ_COMPQ_INT			BIT(12)
+#define     PCIE_IRQ_DIR_RD_DDR_DET		BIT(13)
+#define     PCIE_IRQ_DIR_WR_DDR_DET		BIT(14)
+#define     PCIE_IRQ_CORE_INT			BIT(16)
+#define     PCIE_IRQ_CORE_INT_PIO		BIT(17)
+#define     PCIE_IRQ_DPMU_INT			BIT(18)
+#define     PCIE_IRQ_PCIE_MIS_INT		BIT(19)
+#define     PCIE_IRQ_MSI_INT1_DET		BIT(20)
+#define     PCIE_IRQ_MSI_INT2_DET		BIT(21)
+#define     PCIE_IRQ_RC_DBELL_DET		BIT(22)
+#define     PCIE_IRQ_EP_STATUS			BIT(23)
+#define     PCIE_IRQ_ALL_MASK			GENMASK(31, 0)
+#define     PCIE_IRQ_ENABLE_INTS_MASK		PCIE_IRQ_CORE_INT
+
+/* Transaction types */
+#define PCIE_CONFIG_RD_TYPE0			0x8
+#define PCIE_CONFIG_RD_TYPE1			0x9
+#define PCIE_CONFIG_WR_TYPE0			0xa
+#define PCIE_CONFIG_WR_TYPE1			0xb
+
+#define PIO_RETRY_CNT			750000 /* 1.5 s */
+#define PIO_RETRY_DELAY			2 /* 2 us*/
+
+#define LINK_WAIT_MAX_RETRIES		10
+#define LINK_WAIT_USLEEP_MIN		90000
+#define LINK_WAIT_USLEEP_MAX		100000
+#define RETRAIN_WAIT_MAX_RETRIES	10
+#define RETRAIN_WAIT_USLEEP_US		2000
+
+#define MSI_IRQ_NUM			32
+
+#define CFG_RD_CRS_VAL			0xffff0001
+
+struct advk_pcie {
+	struct platform_device *pdev;
+	void __iomem *base;
+	struct {
+		phys_addr_t match;
+		phys_addr_t remap;
+		phys_addr_t mask;
+		u32 actions;
+	} wins[OB_WIN_COUNT];
+	u8 wins_count;
+	struct irq_domain *rp_irq_domain;
+	struct irq_domain *irq_domain;
+	struct irq_chip irq_chip;
+	raw_spinlock_t irq_lock;
+	struct irq_domain *msi_domain;
+	struct irq_domain *msi_inner_domain;
+	raw_spinlock_t msi_irq_lock;
+	DECLARE_BITMAP(msi_used, MSI_IRQ_NUM);
+	struct mutex msi_used_lock;
+	int link_gen;
+	struct pci_bridge_emul bridge;
+	struct gpio_desc *reset_gpio;
+	struct phy *phy;
+};
+
+static inline void advk_writel(struct advk_pcie *pcie, u32 val, u64 reg)
+{
+	writel(val, pcie->base + reg);
+}
+
+static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg)
+{
+	return readl(pcie->base + reg);
+}
+
+static u8 advk_pcie_ltssm_state(struct advk_pcie *pcie)
+{
+	u32 val;
+	u8 ltssm_state;
+
+	val = advk_readl(pcie, CFG_REG);
+	ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK;
+	return ltssm_state;
+}
+
+static inline bool advk_pcie_link_up(struct advk_pcie *pcie)
+{
+	/* check if LTSSM is in normal operation - some L* state */
+	u8 ltssm_state = advk_pcie_ltssm_state(pcie);
+	return ltssm_state >= LTSSM_L0 && ltssm_state < LTSSM_DISABLED;
+}
+
+static inline bool advk_pcie_link_active(struct advk_pcie *pcie)
+{
+	/*
+	 * According to PCIe Base specification 3.0, Table 4-14: Link
+	 * Status Mapped to the LTSSM, and 4.2.6.3.6 Configuration.Idle
+	 * is Link Up mapped to LTSSM Configuration.Idle, Recovery, L0,
+	 * L0s, L1 and L2 states. And according to 3.2.1. Data Link
+	 * Control and Management State Machine Rules is DL Up status
+	 * reported in DL Active state.
+	 */
+	u8 ltssm_state = advk_pcie_ltssm_state(pcie);
+	return ltssm_state >= LTSSM_CONFIG_IDLE && ltssm_state < LTSSM_DISABLED;
+}
+
+static inline bool advk_pcie_link_training(struct advk_pcie *pcie)
+{
+	/*
+	 * According to PCIe Base specification 3.0, Table 4-14: Link
+	 * Status Mapped to the LTSSM is Link Training mapped to LTSSM
+	 * Configuration and Recovery states.
+	 */
+	u8 ltssm_state = advk_pcie_ltssm_state(pcie);
+	return ((ltssm_state >= LTSSM_CONFIG_LINKWIDTH_START &&
+		 ltssm_state < LTSSM_L0) ||
+		(ltssm_state >= LTSSM_RECOVERY_EQUALIZATION_PHASE0 &&
+		 ltssm_state <= LTSSM_RECOVERY_EQUALIZATION_PHASE3));
+}
+
+static int advk_pcie_wait_for_link(struct advk_pcie *pcie)
+{
+	int retries;
+
+	/* check if the link is up or not */
+	for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+		if (advk_pcie_link_up(pcie))
+			return 0;
+
+		usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie)
+{
+	size_t retries;
+
+	for (retries = 0; retries < RETRAIN_WAIT_MAX_RETRIES; ++retries) {
+		if (advk_pcie_link_training(pcie))
+			break;
+		udelay(RETRAIN_WAIT_USLEEP_US);
+	}
+}
+
+static void advk_pcie_issue_perst(struct advk_pcie *pcie)
+{
+	if (!pcie->reset_gpio)
+		return;
+
+	/* 10ms delay is needed for some cards */
+	dev_info(&pcie->pdev->dev, "issuing PERST via reset GPIO for 10ms\n");
+	gpiod_set_value_cansleep(pcie->reset_gpio, 1);
+	usleep_range(10000, 11000);
+	gpiod_set_value_cansleep(pcie->reset_gpio, 0);
+}
+
+static void advk_pcie_train_link(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	u32 reg;
+	int ret;
+
+	/*
+	 * Setup PCIe rev / gen compliance based on device tree property
+	 * 'max-link-speed' which also forces maximal link speed.
+	 */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+	reg &= ~PCIE_GEN_SEL_MSK;
+	if (pcie->link_gen == 3)
+		reg |= SPEED_GEN_3;
+	else if (pcie->link_gen == 2)
+		reg |= SPEED_GEN_2;
+	else
+		reg |= SPEED_GEN_1;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+	/*
+	 * Set maximal link speed value also into PCIe Link Control 2 register.
+	 * Armada 3700 Functional Specification says that default value is based
+	 * on SPEED_GEN but tests showed that default value is always 8.0 GT/s.
+	 */
+	reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
+	reg &= ~PCI_EXP_LNKCTL2_TLS;
+	if (pcie->link_gen == 3)
+		reg |= PCI_EXP_LNKCTL2_TLS_8_0GT;
+	else if (pcie->link_gen == 2)
+		reg |= PCI_EXP_LNKCTL2_TLS_5_0GT;
+	else
+		reg |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+	advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
+
+	/* Enable link training after selecting PCIe generation */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+	reg |= LINK_TRAINING_EN;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+	/*
+	 * Reset PCIe card via PERST# signal. Some cards are not detected
+	 * during link training when they are in some non-initial state.
+	 */
+	advk_pcie_issue_perst(pcie);
+
+	/*
+	 * PERST# signal could have been asserted by pinctrl subsystem before
+	 * probe() callback has been called or issued explicitly by reset gpio
+	 * function advk_pcie_issue_perst(), making the endpoint going into
+	 * fundamental reset. As required by PCI Express spec (PCI Express
+	 * Base Specification, REV. 4.0 PCI Express, February 19 2014, 6.6.1
+	 * Conventional Reset) a delay for at least 100ms after such a reset
+	 * before sending a Configuration Request to the device is needed.
+	 * So wait until PCIe link is up. Function advk_pcie_wait_for_link()
+	 * waits for link at least 900ms.
+	 */
+	ret = advk_pcie_wait_for_link(pcie);
+	if (ret < 0)
+		dev_err(dev, "link never came up\n");
+	else
+		dev_info(dev, "link up\n");
+}
+
+/*
+ * Set PCIe address window register which could be used for memory
+ * mapping.
+ */
+static void advk_pcie_set_ob_win(struct advk_pcie *pcie, u8 win_num,
+				 phys_addr_t match, phys_addr_t remap,
+				 phys_addr_t mask, u32 actions)
+{
+	advk_writel(pcie, OB_WIN_ENABLE |
+			  lower_32_bits(match), OB_WIN_MATCH_LS(win_num));
+	advk_writel(pcie, upper_32_bits(match), OB_WIN_MATCH_MS(win_num));
+	advk_writel(pcie, lower_32_bits(remap), OB_WIN_REMAP_LS(win_num));
+	advk_writel(pcie, upper_32_bits(remap), OB_WIN_REMAP_MS(win_num));
+	advk_writel(pcie, lower_32_bits(mask), OB_WIN_MASK_LS(win_num));
+	advk_writel(pcie, upper_32_bits(mask), OB_WIN_MASK_MS(win_num));
+	advk_writel(pcie, actions, OB_WIN_ACTIONS(win_num));
+}
+
+static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num)
+{
+	advk_writel(pcie, 0, OB_WIN_MATCH_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MATCH_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_REMAP_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_REMAP_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MASK_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MASK_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_ACTIONS(win_num));
+}
+
+static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+{
+	phys_addr_t msi_addr;
+	u32 reg;
+	int i;
+
+	/*
+	 * Configure PCIe Reference clock. Direction is from the PCIe
+	 * controller to the endpoint card, so enable transmitting of
+	 * Reference clock differential signal off-chip and disable
+	 * receiving off-chip differential signal.
+	 */
+	reg = advk_readl(pcie, PCIE_CORE_REF_CLK_REG);
+	reg |= PCIE_CORE_REF_CLK_TX_ENABLE;
+	reg &= ~PCIE_CORE_REF_CLK_RX_ENABLE;
+	advk_writel(pcie, reg, PCIE_CORE_REF_CLK_REG);
+
+	/* Set to Direct mode */
+	reg = advk_readl(pcie, CTRL_CONFIG_REG);
+	reg &= ~(CTRL_MODE_MASK << CTRL_MODE_SHIFT);
+	reg |= ((PCIE_CORE_MODE_DIRECT & CTRL_MODE_MASK) << CTRL_MODE_SHIFT);
+	advk_writel(pcie, reg, CTRL_CONFIG_REG);
+
+	/* Set PCI global control register to RC mode */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+	reg |= (IS_RC_MSK << IS_RC_SHIFT);
+	advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+	/*
+	 * Replace incorrect PCI vendor id value 0x1b4b by correct value 0x11ab.
+	 * VENDOR_ID_REG contains vendor id in low 16 bits and subsystem vendor
+	 * id in high 16 bits. Updating this register changes readback value of
+	 * read-only vendor id bits in PCIE_CORE_DEV_ID_REG register. Workaround
+	 * for erratum 4.1: "The value of device and vendor ID is incorrect".
+	 */
+	reg = (PCI_VENDOR_ID_MARVELL << 16) | PCI_VENDOR_ID_MARVELL;
+	advk_writel(pcie, reg, VENDOR_ID_REG);
+
+	/*
+	 * Change Class Code of PCI Bridge device to PCI Bridge (0x600400),
+	 * because the default value is Mass storage controller (0x010400).
+	 *
+	 * Note that this Aardvark PCI Bridge does not have compliant Type 1
+	 * Configuration Space and it even cannot be accessed via Aardvark's
+	 * PCI config space access method. Something like config space is
+	 * available in internal Aardvark registers starting at offset 0x0
+	 * and is reported as Type 0. In range 0x10 - 0x34 it has totally
+	 * different registers.
+	 *
+	 * Therefore driver uses emulation of PCI Bridge which emulates
+	 * access to configuration space via internal Aardvark registers or
+	 * emulated configuration buffer.
+	 */
+	reg = advk_readl(pcie, PCIE_CORE_DEV_REV_REG);
+	reg &= ~0xffffff00;
+	reg |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+	advk_writel(pcie, reg, PCIE_CORE_DEV_REV_REG);
+
+	/* Disable Root Bridge I/O space, memory space and bus mastering */
+	reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+	reg &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG);
+
+	/* Set Advanced Error Capabilities and Control PF0 register */
+	reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX |
+		PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN |
+		PCIE_CORE_ERR_CAPCTL_ECRC_CHCK |
+		PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV;
+	advk_writel(pcie, reg, PCIE_CORE_ERR_CAPCTL_REG);
+
+	/* Set PCIe Device Control register */
+	reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
+	reg &= ~PCI_EXP_DEVCTL_RELAX_EN;
+	reg &= ~PCI_EXP_DEVCTL_NOSNOOP_EN;
+	reg &= ~PCI_EXP_DEVCTL_PAYLOAD;
+	reg &= ~PCI_EXP_DEVCTL_READRQ;
+	reg |= PCI_EXP_DEVCTL_PAYLOAD_512B;
+	reg |= PCI_EXP_DEVCTL_READRQ_512B;
+	advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
+
+	/* Program PCIe Control 2 to disable strict ordering */
+	reg = PCIE_CORE_CTRL2_RESERVED |
+		PCIE_CORE_CTRL2_TD_ENABLE;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
+
+	/* Set lane X1 */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+	reg &= ~LANE_CNT_MSK;
+	reg |= LANE_COUNT_1;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+	/* Set MSI address */
+	msi_addr = virt_to_phys(pcie);
+	advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG);
+	advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG);
+
+	/* Enable MSI */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
+	reg |= PCIE_CORE_CTRL2_MSI_ENABLE;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
+
+	/* Clear all interrupts */
+	advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
+	advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
+	advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
+	advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
+
+	/* Disable All ISR0/1 and MSI Sources */
+	advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_MASK_REG);
+	advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
+	advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
+
+	/* Unmask summary MSI interrupt */
+	reg = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+	reg &= ~PCIE_ISR0_MSI_INT_PENDING;
+	advk_writel(pcie, reg, PCIE_ISR0_MASK_REG);
+
+	/* Unmask PME interrupt for processing of PME requester */
+	reg = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+	reg &= ~PCIE_MSG_PM_PME_MASK;
+	advk_writel(pcie, reg, PCIE_ISR0_MASK_REG);
+
+	/* Enable summary interrupt for GIC SPI source */
+	reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK);
+	advk_writel(pcie, reg, HOST_CTRL_INT_MASK_REG);
+
+	/*
+	 * Enable AXI address window location generation:
+	 * When it is enabled, the default outbound window
+	 * configurations (Default User Field: 0xD0074CFC)
+	 * are used to transparent address translation for
+	 * the outbound transactions. Thus, PCIe address
+	 * windows are not required for transparent memory
+	 * access when default outbound window configuration
+	 * is set for memory access.
+	 */
+	reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
+	reg |= PCIE_CORE_CTRL2_OB_WIN_ENABLE;
+	advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
+
+	/*
+	 * Set memory access in Default User Field so it
+	 * is not required to configure PCIe address for
+	 * transparent memory access.
+	 */
+	advk_writel(pcie, OB_WIN_TYPE_MEM, OB_WIN_DEFAULT_ACTIONS);
+
+	/*
+	 * Bypass the address window mapping for PIO:
+	 * Since PIO access already contains all required
+	 * info over AXI interface by PIO registers, the
+	 * address window is not required.
+	 */
+	reg = advk_readl(pcie, PIO_CTRL);
+	reg |= PIO_CTRL_ADDR_WIN_DISABLE;
+	advk_writel(pcie, reg, PIO_CTRL);
+
+	/*
+	 * Configure PCIe address windows for non-memory or
+	 * non-transparent access as by default PCIe uses
+	 * transparent memory access.
+	 */
+	for (i = 0; i < pcie->wins_count; i++)
+		advk_pcie_set_ob_win(pcie, i,
+				     pcie->wins[i].match, pcie->wins[i].remap,
+				     pcie->wins[i].mask, pcie->wins[i].actions);
+
+	/* Disable remaining PCIe outbound windows */
+	for (i = pcie->wins_count; i < OB_WIN_COUNT; i++)
+		advk_pcie_disable_ob_win(pcie, i);
+
+	advk_pcie_train_link(pcie);
+}
+
+static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val)
+{
+	struct device *dev = &pcie->pdev->dev;
+	u32 reg;
+	unsigned int status;
+	char *strcomp_status, *str_posted;
+	int ret;
+
+	reg = advk_readl(pcie, PIO_STAT);
+	status = (reg & PIO_COMPLETION_STATUS_MASK) >>
+		PIO_COMPLETION_STATUS_SHIFT;
+
+	/*
+	 * According to HW spec, the PIO status check sequence as below:
+	 * 1) even if COMPLETION_STATUS(bit9:7) indicates successful,
+	 *    it still needs to check Error Status(bit11), only when this bit
+	 *    indicates no error happen, the operation is successful.
+	 * 2) value Unsupported Request(1) of COMPLETION_STATUS(bit9:7) only
+	 *    means a PIO write error, and for PIO read it is successful with
+	 *    a read value of 0xFFFFFFFF.
+	 * 3) value Completion Retry Status(CRS) of COMPLETION_STATUS(bit9:7)
+	 *    only means a PIO write error, and for PIO read it is successful
+	 *    with a read value of 0xFFFF0001.
+	 * 4) value Completer Abort (CA) of COMPLETION_STATUS(bit9:7) means
+	 *    error for both PIO read and PIO write operation.
+	 * 5) other errors are indicated as 'unknown'.
+	 */
+	switch (status) {
+	case PIO_COMPLETION_STATUS_OK:
+		if (reg & PIO_ERR_STATUS) {
+			strcomp_status = "COMP_ERR";
+			ret = -EFAULT;
+			break;
+		}
+		/* Get the read result */
+		if (val)
+			*val = advk_readl(pcie, PIO_RD_DATA);
+		/* No error */
+		strcomp_status = NULL;
+		ret = 0;
+		break;
+	case PIO_COMPLETION_STATUS_UR:
+		strcomp_status = "UR";
+		ret = -EOPNOTSUPP;
+		break;
+	case PIO_COMPLETION_STATUS_CRS:
+		if (allow_crs && val) {
+			/* PCIe r4.0, sec 2.3.2, says:
+			 * If CRS Software Visibility is enabled:
+			 * For a Configuration Read Request that includes both
+			 * bytes of the Vendor ID field of a device Function's
+			 * Configuration Space Header, the Root Complex must
+			 * complete the Request to the host by returning a
+			 * read-data value of 0001h for the Vendor ID field and
+			 * all '1's for any additional bytes included in the
+			 * request.
+			 *
+			 * So CRS in this case is not an error status.
+			 */
+			*val = CFG_RD_CRS_VAL;
+			strcomp_status = NULL;
+			ret = 0;
+			break;
+		}
+		/* PCIe r4.0, sec 2.3.2, says:
+		 * If CRS Software Visibility is not enabled, the Root Complex
+		 * must re-issue the Configuration Request as a new Request.
+		 * If CRS Software Visibility is enabled: For a Configuration
+		 * Write Request or for any other Configuration Read Request,
+		 * the Root Complex must re-issue the Configuration Request as
+		 * a new Request.
+		 * A Root Complex implementation may choose to limit the number
+		 * of Configuration Request/CRS Completion Status loops before
+		 * determining that something is wrong with the target of the
+		 * Request and taking appropriate action, e.g., complete the
+		 * Request to the host as a failed transaction.
+		 *
+		 * So return -EAGAIN and caller (pci-aardvark.c driver) will
+		 * re-issue request again up to the PIO_RETRY_CNT retries.
+		 */
+		strcomp_status = "CRS";
+		ret = -EAGAIN;
+		break;
+	case PIO_COMPLETION_STATUS_CA:
+		strcomp_status = "CA";
+		ret = -ECANCELED;
+		break;
+	default:
+		strcomp_status = "Unknown";
+		ret = -EINVAL;
+		break;
+	}
+
+	if (!strcomp_status)
+		return ret;
+
+	if (reg & PIO_NON_POSTED_REQ)
+		str_posted = "Non-posted";
+	else
+		str_posted = "Posted";
+
+	dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
+		str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
+
+	return ret;
+}
+
+static int advk_pcie_wait_pio(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	int i;
+
+	for (i = 1; i <= PIO_RETRY_CNT; i++) {
+		u32 start, isr;
+
+		start = advk_readl(pcie, PIO_START);
+		isr = advk_readl(pcie, PIO_ISR);
+		if (!start && isr)
+			return i;
+		udelay(PIO_RETRY_DELAY);
+	}
+
+	dev_err(dev, "PIO read/write transfer time out\n");
+	return -ETIMEDOUT;
+}
+
+static pci_bridge_emul_read_status_t
+advk_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge,
+				    int reg, u32 *value)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		*value = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	case PCI_INTERRUPT_LINE: {
+		/*
+		 * From the whole 32bit register we support reading from HW only
+		 * two bits: PCI_BRIDGE_CTL_BUS_RESET and PCI_BRIDGE_CTL_SERR.
+		 * Other bits are retrieved only from emulated config buffer.
+		 */
+		__le32 *cfgspace = (__le32 *)&bridge->conf;
+		u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]);
+		if (advk_readl(pcie, PCIE_ISR0_MASK_REG) & PCIE_ISR0_ERR_MASK)
+			val &= ~(PCI_BRIDGE_CTL_SERR << 16);
+		else
+			val |= PCI_BRIDGE_CTL_SERR << 16;
+		if (advk_readl(pcie, PCIE_CORE_CTRL1_REG) & HOT_RESET_GEN)
+			val |= PCI_BRIDGE_CTL_BUS_RESET << 16;
+		else
+			val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16);
+		*value = val;
+		return PCI_BRIDGE_EMUL_HANDLED;
+	}
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+}
+
+static void
+advk_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
+				     int reg, u32 old, u32 new, u32 mask)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		advk_writel(pcie, new, PCIE_CORE_CMD_STATUS_REG);
+		break;
+
+	case PCI_INTERRUPT_LINE:
+		/*
+		 * According to Figure 6-3: Pseudo Logic Diagram for Error
+		 * Message Controls in PCIe base specification, SERR# Enable bit
+		 * in Bridge Control register enable receiving of ERR_* messages
+		 */
+		if (mask & (PCI_BRIDGE_CTL_SERR << 16)) {
+			u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+			if (new & (PCI_BRIDGE_CTL_SERR << 16))
+				val &= ~PCIE_ISR0_ERR_MASK;
+			else
+				val |= PCIE_ISR0_ERR_MASK;
+			advk_writel(pcie, val, PCIE_ISR0_MASK_REG);
+		}
+		if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) {
+			u32 val = advk_readl(pcie, PCIE_CORE_CTRL1_REG);
+			if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16))
+				val |= HOT_RESET_GEN;
+			else
+				val &= ~HOT_RESET_GEN;
+			advk_writel(pcie, val, PCIE_CORE_CTRL1_REG);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static pci_bridge_emul_read_status_t
+advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+				    int reg, u32 *value)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+
+	switch (reg) {
+	/*
+	 * PCI_EXP_SLTCAP, PCI_EXP_SLTCTL, PCI_EXP_RTCTL and PCI_EXP_RTSTA are
+	 * also supported, but do not need to be handled here, because their
+	 * values are stored in emulated config space buffer, and we read them
+	 * from there when needed.
+	 */
+
+	case PCI_EXP_LNKCAP: {
+		u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
+		/*
+		 * PCI_EXP_LNKCAP_DLLLARC bit is hardwired in aardvark HW to 0.
+		 * But support for PCI_EXP_LNKSTA_DLLLA is emulated via ltssm
+		 * state so explicitly enable PCI_EXP_LNKCAP_DLLLARC flag.
+		 */
+		val |= PCI_EXP_LNKCAP_DLLLARC;
+		*value = val;
+		return PCI_BRIDGE_EMUL_HANDLED;
+	}
+
+	case PCI_EXP_LNKCTL: {
+		/* u32 contains both PCI_EXP_LNKCTL and PCI_EXP_LNKSTA */
+		u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg) &
+			~(PCI_EXP_LNKSTA_LT << 16);
+		if (advk_pcie_link_training(pcie))
+			val |= (PCI_EXP_LNKSTA_LT << 16);
+		if (advk_pcie_link_active(pcie))
+			val |= (PCI_EXP_LNKSTA_DLLLA << 16);
+		*value = val;
+		return PCI_BRIDGE_EMUL_HANDLED;
+	}
+
+	case PCI_EXP_DEVCAP:
+	case PCI_EXP_DEVCTL:
+	case PCI_EXP_DEVCAP2:
+	case PCI_EXP_DEVCTL2:
+	case PCI_EXP_LNKCAP2:
+	case PCI_EXP_LNKCTL2:
+		*value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+
+}
+
+static void
+advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+				     int reg, u32 old, u32 new, u32 mask)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+	switch (reg) {
+	case PCI_EXP_LNKCTL:
+		advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+		if (new & PCI_EXP_LNKCTL_RL)
+			advk_pcie_wait_for_retrain(pcie);
+		break;
+
+	case PCI_EXP_RTCTL: {
+		u16 rootctl = le16_to_cpu(bridge->pcie_conf.rootctl);
+		/* Only emulation of PMEIE and CRSSVE bits is provided */
+		rootctl &= PCI_EXP_RTCTL_PMEIE | PCI_EXP_RTCTL_CRSSVE;
+		bridge->pcie_conf.rootctl = cpu_to_le16(rootctl);
+		break;
+	}
+
+	/*
+	 * PCI_EXP_RTSTA is also supported, but does not need to be handled
+	 * here, because its value is stored in emulated config space buffer,
+	 * and we write it there when needed.
+	 */
+
+	case PCI_EXP_DEVCTL:
+	case PCI_EXP_DEVCTL2:
+	case PCI_EXP_LNKCTL2:
+		advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static pci_bridge_emul_read_status_t
+advk_pci_bridge_emul_ext_conf_read(struct pci_bridge_emul *bridge,
+				   int reg, u32 *value)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+	switch (reg) {
+	case 0:
+		*value = advk_readl(pcie, PCIE_CORE_PCIERR_CAP + reg);
+
+		/*
+		 * PCI_EXT_CAP_NEXT bits are set to offset 0x150, but Armada
+		 * 3700 Functional Specification does not document registers
+		 * at those addresses.
+		 *
+		 * Thus we clear PCI_EXT_CAP_NEXT bits to make Advanced Error
+		 * Reporting Capability header the last Extended Capability.
+		 * If we obtain documentation for those registers in the
+		 * future, this can be changed.
+		 */
+		*value &= 0x000fffff;
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	case PCI_ERR_UNCOR_STATUS:
+	case PCI_ERR_UNCOR_MASK:
+	case PCI_ERR_UNCOR_SEVER:
+	case PCI_ERR_COR_STATUS:
+	case PCI_ERR_COR_MASK:
+	case PCI_ERR_CAP:
+	case PCI_ERR_HEADER_LOG + 0:
+	case PCI_ERR_HEADER_LOG + 4:
+	case PCI_ERR_HEADER_LOG + 8:
+	case PCI_ERR_HEADER_LOG + 12:
+	case PCI_ERR_ROOT_COMMAND:
+	case PCI_ERR_ROOT_STATUS:
+	case PCI_ERR_ROOT_ERR_SRC:
+		*value = advk_readl(pcie, PCIE_CORE_PCIERR_CAP + reg);
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+}
+
+static void
+advk_pci_bridge_emul_ext_conf_write(struct pci_bridge_emul *bridge,
+				    int reg, u32 old, u32 new, u32 mask)
+{
+	struct advk_pcie *pcie = bridge->data;
+
+	switch (reg) {
+	/* These are W1C registers, so clear other bits */
+	case PCI_ERR_UNCOR_STATUS:
+	case PCI_ERR_COR_STATUS:
+	case PCI_ERR_ROOT_STATUS:
+		new &= mask;
+		fallthrough;
+
+	case PCI_ERR_UNCOR_MASK:
+	case PCI_ERR_UNCOR_SEVER:
+	case PCI_ERR_COR_MASK:
+	case PCI_ERR_CAP:
+	case PCI_ERR_HEADER_LOG + 0:
+	case PCI_ERR_HEADER_LOG + 4:
+	case PCI_ERR_HEADER_LOG + 8:
+	case PCI_ERR_HEADER_LOG + 12:
+	case PCI_ERR_ROOT_COMMAND:
+	case PCI_ERR_ROOT_ERR_SRC:
+		advk_writel(pcie, new, PCIE_CORE_PCIERR_CAP + reg);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static const struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = {
+	.read_base = advk_pci_bridge_emul_base_conf_read,
+	.write_base = advk_pci_bridge_emul_base_conf_write,
+	.read_pcie = advk_pci_bridge_emul_pcie_conf_read,
+	.write_pcie = advk_pci_bridge_emul_pcie_conf_write,
+	.read_ext = advk_pci_bridge_emul_ext_conf_read,
+	.write_ext = advk_pci_bridge_emul_ext_conf_write,
+};
+
+/*
+ * Initialize the configuration space of the PCI-to-PCI bridge
+ * associated with the given PCIe interface.
+ */
+static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
+{
+	struct pci_bridge_emul *bridge = &pcie->bridge;
+
+	bridge->conf.vendor =
+		cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff);
+	bridge->conf.device =
+		cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) >> 16);
+	bridge->conf.class_revision =
+		cpu_to_le32(advk_readl(pcie, PCIE_CORE_DEV_REV_REG) & 0xff);
+
+	/* Support 32 bits I/O addressing */
+	bridge->conf.iobase = PCI_IO_RANGE_TYPE_32;
+	bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32;
+
+	/* Support 64 bits memory pref */
+	bridge->conf.pref_mem_base = cpu_to_le16(PCI_PREF_RANGE_TYPE_64);
+	bridge->conf.pref_mem_limit = cpu_to_le16(PCI_PREF_RANGE_TYPE_64);
+
+	/* Support interrupt A for MSI feature */
+	bridge->conf.intpin = PCI_INTERRUPT_INTA;
+
+	/*
+	 * Aardvark HW provides PCIe Capability structure in version 2 and
+	 * indicate slot support, which is emulated.
+	 */
+	bridge->pcie_conf.cap = cpu_to_le16(2 | PCI_EXP_FLAGS_SLOT);
+
+	/*
+	 * Set Presence Detect State bit permanently since there is no support
+	 * for unplugging the card nor detecting whether it is plugged. (If a
+	 * platform exists in the future that supports it, via a GPIO for
+	 * example, it should be implemented via this bit.)
+	 *
+	 * Set physical slot number to 1 since there is only one port and zero
+	 * value is reserved for ports within the same silicon as Root Port
+	 * which is not our case.
+	 */
+	bridge->pcie_conf.slotcap = cpu_to_le32(FIELD_PREP(PCI_EXP_SLTCAP_PSN,
+							   1));
+	bridge->pcie_conf.slotsta = cpu_to_le16(PCI_EXP_SLTSTA_PDS);
+
+	/* Indicates supports for Completion Retry Status */
+	bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS);
+
+	bridge->subsystem_vendor_id = advk_readl(pcie, PCIE_CORE_SSDEV_ID_REG) & 0xffff;
+	bridge->subsystem_id = advk_readl(pcie, PCIE_CORE_SSDEV_ID_REG) >> 16;
+	bridge->has_pcie = true;
+	bridge->pcie_start = PCIE_CORE_PCIEXP_CAP;
+	bridge->data = pcie;
+	bridge->ops = &advk_pci_bridge_emul_ops;
+
+	return pci_bridge_emul_init(bridge, 0);
+}
+
+static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
+				  int devfn)
+{
+	if (pci_is_root_bus(bus) && PCI_SLOT(devfn) != 0)
+		return false;
+
+	/*
+	 * If the link goes down after we check for link-up, we have a problem:
+	 * if a PIO request is executed while link-down, the whole controller
+	 * gets stuck in a non-functional state, and even after link comes up
+	 * again, PIO requests won't work anymore, and a reset of the whole PCIe
+	 * controller is needed. Therefore we need to prevent sending PIO
+	 * requests while the link is down.
+	 */
+	if (!pci_is_root_bus(bus) && !advk_pcie_link_up(pcie))
+		return false;
+
+	return true;
+}
+
+static bool advk_pcie_pio_is_running(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+
+	/*
+	 * Trying to start a new PIO transfer when previous has not completed
+	 * cause External Abort on CPU which results in kernel panic:
+	 *
+	 *     SError Interrupt on CPU0, code 0xbf000002 -- SError
+	 *     Kernel panic - not syncing: Asynchronous SError Interrupt
+	 *
+	 * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected
+	 * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent
+	 * concurrent calls at the same time. But because PIO transfer may take
+	 * about 1.5s when link is down or card is disconnected, it means that
+	 * advk_pcie_wait_pio() does not always have to wait for completion.
+	 *
+	 * Some versions of ARM Trusted Firmware handles this External Abort at
+	 * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit:
+	 * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
+	 */
+	if (advk_readl(pcie, PIO_START)) {
+		dev_err(dev, "Previous PIO read/write transfer is still running\n");
+		return true;
+	}
+
+	return false;
+}
+
+static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
+			     int where, int size, u32 *val)
+{
+	struct advk_pcie *pcie = bus->sysdata;
+	int retry_count;
+	bool allow_crs;
+	u32 reg;
+	int ret;
+
+	if (!advk_pcie_valid_device(pcie, bus, devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pci_is_root_bus(bus))
+		return pci_bridge_emul_conf_read(&pcie->bridge, where,
+						 size, val);
+
+	/*
+	 * Completion Retry Status is possible to return only when reading all
+	 * 4 bytes from PCI_VENDOR_ID and PCI_DEVICE_ID registers at once and
+	 * CRSSVE flag on Root Bridge is enabled.
+	 */
+	allow_crs = (where == PCI_VENDOR_ID) && (size == 4) &&
+		    (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) &
+		     PCI_EXP_RTCTL_CRSSVE);
+
+	if (advk_pcie_pio_is_running(pcie))
+		goto try_crs;
+
+	/* Program the control register */
+	reg = advk_readl(pcie, PIO_CTRL);
+	reg &= ~PIO_CTRL_TYPE_MASK;
+	if (pci_is_root_bus(bus->parent))
+		reg |= PCIE_CONFIG_RD_TYPE0;
+	else
+		reg |= PCIE_CONFIG_RD_TYPE1;
+	advk_writel(pcie, reg, PIO_CTRL);
+
+	/* Program the address registers */
+	reg = ALIGN_DOWN(PCIE_ECAM_OFFSET(bus->number, devfn, where), 4);
+	advk_writel(pcie, reg, PIO_ADDR_LS);
+	advk_writel(pcie, 0, PIO_ADDR_MS);
+
+	/* Program the data strobe */
+	advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
+
+	retry_count = 0;
+	do {
+		/* Clear PIO DONE ISR and start the transfer */
+		advk_writel(pcie, 1, PIO_ISR);
+		advk_writel(pcie, 1, PIO_START);
+
+		ret = advk_pcie_wait_pio(pcie);
+		if (ret < 0)
+			goto try_crs;
+
+		retry_count += ret;
+
+		/* Check PIO status and get the read result */
+		ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
+	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
+
+	if (ret < 0)
+		goto fail;
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 3))) & 0xffff;
+
+	return PCIBIOS_SUCCESSFUL;
+
+try_crs:
+	/*
+	 * If it is possible, return Completion Retry Status so that caller
+	 * tries to issue the request again instead of failing.
+	 */
+	if (allow_crs) {
+		*val = CFG_RD_CRS_VAL;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+fail:
+	*val = 0xffffffff;
+	return PCIBIOS_SET_FAILED;
+}
+
+static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+				int where, int size, u32 val)
+{
+	struct advk_pcie *pcie = bus->sysdata;
+	u32 reg;
+	u32 data_strobe = 0x0;
+	int retry_count;
+	int offset;
+	int ret;
+
+	if (!advk_pcie_valid_device(pcie, bus, devfn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pci_is_root_bus(bus))
+		return pci_bridge_emul_conf_write(&pcie->bridge, where,
+						  size, val);
+
+	if (where % size)
+		return PCIBIOS_SET_FAILED;
+
+	if (advk_pcie_pio_is_running(pcie))
+		return PCIBIOS_SET_FAILED;
+
+	/* Program the control register */
+	reg = advk_readl(pcie, PIO_CTRL);
+	reg &= ~PIO_CTRL_TYPE_MASK;
+	if (pci_is_root_bus(bus->parent))
+		reg |= PCIE_CONFIG_WR_TYPE0;
+	else
+		reg |= PCIE_CONFIG_WR_TYPE1;
+	advk_writel(pcie, reg, PIO_CTRL);
+
+	/* Program the address registers */
+	reg = ALIGN_DOWN(PCIE_ECAM_OFFSET(bus->number, devfn, where), 4);
+	advk_writel(pcie, reg, PIO_ADDR_LS);
+	advk_writel(pcie, 0, PIO_ADDR_MS);
+
+	/* Calculate the write strobe */
+	offset      = where & 0x3;
+	reg         = val << (8 * offset);
+	data_strobe = GENMASK(size - 1, 0) << offset;
+
+	/* Program the data register */
+	advk_writel(pcie, reg, PIO_WR_DATA);
+
+	/* Program the data strobe */
+	advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
+
+	retry_count = 0;
+	do {
+		/* Clear PIO DONE ISR and start the transfer */
+		advk_writel(pcie, 1, PIO_ISR);
+		advk_writel(pcie, 1, PIO_START);
+
+		ret = advk_pcie_wait_pio(pcie);
+		if (ret < 0)
+			return PCIBIOS_SET_FAILED;
+
+		retry_count += ret;
+
+		ret = advk_pcie_check_pio_status(pcie, false, NULL);
+	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
+
+	return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops advk_pcie_ops = {
+	.read = advk_pcie_rd_conf,
+	.write = advk_pcie_wr_conf,
+};
+
+static void advk_msi_irq_compose_msi_msg(struct irq_data *data,
+					 struct msi_msg *msg)
+{
+	struct advk_pcie *pcie = irq_data_get_irq_chip_data(data);
+	phys_addr_t msi_addr = virt_to_phys(pcie);
+
+	msg->address_lo = lower_32_bits(msi_addr);
+	msg->address_hi = upper_32_bits(msi_addr);
+	msg->data = data->hwirq;
+}
+
+static int advk_msi_set_affinity(struct irq_data *irq_data,
+				 const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void advk_msi_irq_mask(struct irq_data *d)
+{
+	struct advk_pcie *pcie = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 mask;
+
+	raw_spin_lock_irqsave(&pcie->msi_irq_lock, flags);
+	mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
+	mask |= BIT(hwirq);
+	advk_writel(pcie, mask, PCIE_MSI_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->msi_irq_lock, flags);
+}
+
+static void advk_msi_irq_unmask(struct irq_data *d)
+{
+	struct advk_pcie *pcie = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 mask;
+
+	raw_spin_lock_irqsave(&pcie->msi_irq_lock, flags);
+	mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
+	mask &= ~BIT(hwirq);
+	advk_writel(pcie, mask, PCIE_MSI_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->msi_irq_lock, flags);
+}
+
+static void advk_msi_top_irq_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void advk_msi_top_irq_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip advk_msi_bottom_irq_chip = {
+	.name			= "MSI",
+	.irq_compose_msi_msg	= advk_msi_irq_compose_msi_msg,
+	.irq_set_affinity	= advk_msi_set_affinity,
+	.irq_mask		= advk_msi_irq_mask,
+	.irq_unmask		= advk_msi_irq_unmask,
+};
+
+static int advk_msi_irq_domain_alloc(struct irq_domain *domain,
+				     unsigned int virq,
+				     unsigned int nr_irqs, void *args)
+{
+	struct advk_pcie *pcie = domain->host_data;
+	int hwirq, i;
+
+	mutex_lock(&pcie->msi_used_lock);
+	hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM,
+					order_base_2(nr_irqs));
+	mutex_unlock(&pcie->msi_used_lock);
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &advk_msi_bottom_irq_chip,
+				    domain->host_data, handle_simple_irq,
+				    NULL, NULL);
+
+	return 0;
+}
+
+static void advk_msi_irq_domain_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct advk_pcie *pcie = domain->host_data;
+
+	mutex_lock(&pcie->msi_used_lock);
+	bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs));
+	mutex_unlock(&pcie->msi_used_lock);
+}
+
+static const struct irq_domain_ops advk_msi_domain_ops = {
+	.alloc = advk_msi_irq_domain_alloc,
+	.free = advk_msi_irq_domain_free,
+};
+
+static void advk_pcie_irq_mask(struct irq_data *d)
+{
+	struct advk_pcie *pcie = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 mask;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+	mask |= PCIE_ISR1_INTX_ASSERT(hwirq);
+	advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+static void advk_pcie_irq_unmask(struct irq_data *d)
+{
+	struct advk_pcie *pcie = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 mask;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+	mask &= ~PCIE_ISR1_INTX_ASSERT(hwirq);
+	advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+static int advk_pcie_irq_map(struct irq_domain *h,
+			     unsigned int virq, irq_hw_number_t hwirq)
+{
+	struct advk_pcie *pcie = h->host_data;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &pcie->irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(virq, pcie);
+
+	return 0;
+}
+
+static const struct irq_domain_ops advk_pcie_irq_domain_ops = {
+	.map = advk_pcie_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static struct irq_chip advk_msi_irq_chip = {
+	.name		= "advk-MSI",
+	.irq_mask	= advk_msi_top_irq_mask,
+	.irq_unmask	= advk_msi_top_irq_unmask,
+};
+
+static struct msi_domain_info advk_msi_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+	.chip	= &advk_msi_irq_chip,
+};
+
+static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+
+	raw_spin_lock_init(&pcie->msi_irq_lock);
+	mutex_init(&pcie->msi_used_lock);
+
+	pcie->msi_inner_domain =
+		irq_domain_add_linear(NULL, MSI_IRQ_NUM,
+				      &advk_msi_domain_ops, pcie);
+	if (!pcie->msi_inner_domain)
+		return -ENOMEM;
+
+	pcie->msi_domain =
+		pci_msi_create_irq_domain(dev_fwnode(dev),
+					  &advk_msi_domain_info,
+					  pcie->msi_inner_domain);
+	if (!pcie->msi_domain) {
+		irq_domain_remove(pcie->msi_inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void advk_pcie_remove_msi_irq_domain(struct advk_pcie *pcie)
+{
+	irq_domain_remove(pcie->msi_domain);
+	irq_domain_remove(pcie->msi_inner_domain);
+}
+
+static int advk_pcie_init_irq_domain(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node;
+	struct irq_chip *irq_chip;
+	int ret = 0;
+
+	raw_spin_lock_init(&pcie->irq_lock);
+
+	pcie_intc_node =  of_get_next_child(node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	irq_chip = &pcie->irq_chip;
+
+	irq_chip->name = devm_kasprintf(dev, GFP_KERNEL, "%s-irq",
+					dev_name(dev));
+	if (!irq_chip->name) {
+		ret = -ENOMEM;
+		goto out_put_node;
+	}
+
+	irq_chip->irq_mask = advk_pcie_irq_mask;
+	irq_chip->irq_unmask = advk_pcie_irq_unmask;
+
+	pcie->irq_domain =
+		irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+				      &advk_pcie_irq_domain_ops, pcie);
+	if (!pcie->irq_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		ret = -ENOMEM;
+		goto out_put_node;
+	}
+
+out_put_node:
+	of_node_put(pcie_intc_node);
+	return ret;
+}
+
+static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie)
+{
+	irq_domain_remove(pcie->irq_domain);
+}
+
+static struct irq_chip advk_rp_irq_chip = {
+	.name = "advk-RP",
+};
+
+static int advk_pcie_rp_irq_map(struct irq_domain *h,
+				unsigned int virq, irq_hw_number_t hwirq)
+{
+	struct advk_pcie *pcie = h->host_data;
+
+	irq_set_chip_and_handler(virq, &advk_rp_irq_chip, handle_simple_irq);
+	irq_set_chip_data(virq, pcie);
+
+	return 0;
+}
+
+static const struct irq_domain_ops advk_pcie_rp_irq_domain_ops = {
+	.map = advk_pcie_rp_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int advk_pcie_init_rp_irq_domain(struct advk_pcie *pcie)
+{
+	pcie->rp_irq_domain = irq_domain_add_linear(NULL, 1,
+						    &advk_pcie_rp_irq_domain_ops,
+						    pcie);
+	if (!pcie->rp_irq_domain) {
+		dev_err(&pcie->pdev->dev, "Failed to add Root Port IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void advk_pcie_remove_rp_irq_domain(struct advk_pcie *pcie)
+{
+	irq_domain_remove(pcie->rp_irq_domain);
+}
+
+static void advk_pcie_handle_pme(struct advk_pcie *pcie)
+{
+	u32 requester = advk_readl(pcie, PCIE_MSG_LOG_REG) >> 16;
+
+	advk_writel(pcie, PCIE_MSG_PM_PME_MASK, PCIE_ISR0_REG);
+
+	/*
+	 * PCIE_MSG_LOG_REG contains the last inbound message, so store
+	 * the requester ID only when PME was not asserted yet.
+	 * Also do not trigger PME interrupt when PME is still asserted.
+	 */
+	if (!(le32_to_cpu(pcie->bridge.pcie_conf.rootsta) & PCI_EXP_RTSTA_PME)) {
+		pcie->bridge.pcie_conf.rootsta = cpu_to_le32(requester | PCI_EXP_RTSTA_PME);
+
+		/*
+		 * Trigger PME interrupt only if PMEIE bit in Root Control is set.
+		 * Aardvark HW returns zero for PCI_EXP_FLAGS_IRQ, so use PCIe interrupt 0.
+		 */
+		if (!(le16_to_cpu(pcie->bridge.pcie_conf.rootctl) & PCI_EXP_RTCTL_PMEIE))
+			return;
+
+		if (generic_handle_domain_irq(pcie->rp_irq_domain, 0) == -EINVAL)
+			dev_err_ratelimited(&pcie->pdev->dev, "unhandled PME IRQ\n");
+	}
+}
+
+static void advk_pcie_handle_msi(struct advk_pcie *pcie)
+{
+	u32 msi_val, msi_mask, msi_status, msi_idx;
+
+	msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
+	msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG);
+	msi_status = msi_val & ((~msi_mask) & PCIE_MSI_ALL_MASK);
+
+	for (msi_idx = 0; msi_idx < MSI_IRQ_NUM; msi_idx++) {
+		if (!(BIT(msi_idx) & msi_status))
+			continue;
+
+		advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG);
+		if (generic_handle_domain_irq(pcie->msi_inner_domain, msi_idx) == -EINVAL)
+			dev_err_ratelimited(&pcie->pdev->dev, "unexpected MSI 0x%02x\n", msi_idx);
+	}
+
+	advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING,
+		    PCIE_ISR0_REG);
+}
+
+static void advk_pcie_handle_int(struct advk_pcie *pcie)
+{
+	u32 isr0_val, isr0_mask, isr0_status;
+	u32 isr1_val, isr1_mask, isr1_status;
+	int i;
+
+	isr0_val = advk_readl(pcie, PCIE_ISR0_REG);
+	isr0_mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+	isr0_status = isr0_val & ((~isr0_mask) & PCIE_ISR0_ALL_MASK);
+
+	isr1_val = advk_readl(pcie, PCIE_ISR1_REG);
+	isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+	isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK);
+
+	/* Process PME interrupt as the first one to do not miss PME requester id */
+	if (isr0_status & PCIE_MSG_PM_PME_MASK)
+		advk_pcie_handle_pme(pcie);
+
+	/* Process ERR interrupt */
+	if (isr0_status & PCIE_ISR0_ERR_MASK) {
+		advk_writel(pcie, PCIE_ISR0_ERR_MASK, PCIE_ISR0_REG);
+
+		/*
+		 * Aardvark HW returns zero for PCI_ERR_ROOT_AER_IRQ, so use
+		 * PCIe interrupt 0
+		 */
+		if (generic_handle_domain_irq(pcie->rp_irq_domain, 0) == -EINVAL)
+			dev_err_ratelimited(&pcie->pdev->dev, "unhandled ERR IRQ\n");
+	}
+
+	/* Process MSI interrupts */
+	if (isr0_status & PCIE_ISR0_MSI_INT_PENDING)
+		advk_pcie_handle_msi(pcie);
+
+	/* Process legacy interrupts */
+	for (i = 0; i < PCI_NUM_INTX; i++) {
+		if (!(isr1_status & PCIE_ISR1_INTX_ASSERT(i)))
+			continue;
+
+		advk_writel(pcie, PCIE_ISR1_INTX_ASSERT(i),
+			    PCIE_ISR1_REG);
+
+		if (generic_handle_domain_irq(pcie->irq_domain, i) == -EINVAL)
+			dev_err_ratelimited(&pcie->pdev->dev, "unexpected INT%c IRQ\n",
+					    (char)i + 'A');
+	}
+}
+
+static irqreturn_t advk_pcie_irq_handler(int irq, void *arg)
+{
+	struct advk_pcie *pcie = arg;
+	u32 status;
+
+	status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
+	if (!(status & PCIE_IRQ_CORE_INT))
+		return IRQ_NONE;
+
+	advk_pcie_handle_int(pcie);
+
+	/* Clear interrupt */
+	advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
+
+	return IRQ_HANDLED;
+}
+
+static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct advk_pcie *pcie = dev->bus->sysdata;
+
+	/*
+	 * Emulated root bridge has its own emulated irq chip and irq domain.
+	 * Argument pin is the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD) and
+	 * hwirq for irq_create_mapping() is indexed from zero.
+	 */
+	if (pci_is_root_bus(dev->bus))
+		return irq_create_mapping(pcie->rp_irq_domain, pin - 1);
+	else
+		return of_irq_parse_and_map_pci(dev, slot, pin);
+}
+
+static void advk_pcie_disable_phy(struct advk_pcie *pcie)
+{
+	phy_power_off(pcie->phy);
+	phy_exit(pcie->phy);
+}
+
+static int advk_pcie_enable_phy(struct advk_pcie *pcie)
+{
+	int ret;
+
+	if (!pcie->phy)
+		return 0;
+
+	ret = phy_init(pcie->phy);
+	if (ret)
+		return ret;
+
+	ret = phy_set_mode(pcie->phy, PHY_MODE_PCIE);
+	if (ret) {
+		phy_exit(pcie->phy);
+		return ret;
+	}
+
+	ret = phy_power_on(pcie->phy);
+	if (ret) {
+		phy_exit(pcie->phy);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int advk_pcie_setup_phy(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct device_node *node = dev->of_node;
+	int ret = 0;
+
+	pcie->phy = devm_of_phy_get(dev, node, NULL);
+	if (IS_ERR(pcie->phy) && (PTR_ERR(pcie->phy) == -EPROBE_DEFER))
+		return PTR_ERR(pcie->phy);
+
+	/* Old bindings miss the PHY handle */
+	if (IS_ERR(pcie->phy)) {
+		dev_warn(dev, "PHY unavailable (%ld)\n", PTR_ERR(pcie->phy));
+		pcie->phy = NULL;
+		return 0;
+	}
+
+	ret = advk_pcie_enable_phy(pcie);
+	if (ret)
+		dev_err(dev, "Failed to initialize PHY (%d)\n", ret);
+
+	return ret;
+}
+
+static int advk_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct advk_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	struct resource_entry *entry;
+	int ret, irq;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->pdev = pdev;
+	platform_set_drvdata(pdev, pcie);
+
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		resource_size_t start = entry->res->start;
+		resource_size_t size = resource_size(entry->res);
+		unsigned long type = resource_type(entry->res);
+		u64 win_size;
+
+		/*
+		 * Aardvark hardware allows to configure also PCIe window
+		 * for config type 0 and type 1 mapping, but driver uses
+		 * only PIO for issuing configuration transfers which does
+		 * not use PCIe window configuration.
+		 */
+		if (type != IORESOURCE_MEM && type != IORESOURCE_IO)
+			continue;
+
+		/*
+		 * Skip transparent memory resources. Default outbound access
+		 * configuration is set to transparent memory access so it
+		 * does not need window configuration.
+		 */
+		if (type == IORESOURCE_MEM && entry->offset == 0)
+			continue;
+
+		/*
+		 * The n-th PCIe window is configured by tuple (match, remap, mask)
+		 * and an access to address A uses this window if A matches the
+		 * match with given mask.
+		 * So every PCIe window size must be a power of two and every start
+		 * address must be aligned to window size. Minimal size is 64 KiB
+		 * because lower 16 bits of mask must be zero. Remapped address
+		 * may have set only bits from the mask.
+		 */
+		while (pcie->wins_count < OB_WIN_COUNT && size > 0) {
+			/* Calculate the largest aligned window size */
+			win_size = (1ULL << (fls64(size)-1)) |
+				   (start ? (1ULL << __ffs64(start)) : 0);
+			win_size = 1ULL << __ffs64(win_size);
+			if (win_size < 0x10000)
+				break;
+
+			dev_dbg(dev,
+				"Configuring PCIe window %d: [0x%llx-0x%llx] as %lu\n",
+				pcie->wins_count, (unsigned long long)start,
+				(unsigned long long)start + win_size, type);
+
+			if (type == IORESOURCE_IO) {
+				pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_IO;
+				pcie->wins[pcie->wins_count].match = pci_pio_to_address(start);
+			} else {
+				pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_MEM;
+				pcie->wins[pcie->wins_count].match = start;
+			}
+			pcie->wins[pcie->wins_count].remap = start - entry->offset;
+			pcie->wins[pcie->wins_count].mask = ~(win_size - 1);
+
+			if (pcie->wins[pcie->wins_count].remap & (win_size - 1))
+				break;
+
+			start += win_size;
+			size -= win_size;
+			pcie->wins_count++;
+		}
+
+		if (size > 0) {
+			dev_err(&pcie->pdev->dev,
+				"Invalid PCIe region [0x%llx-0x%llx]\n",
+				(unsigned long long)entry->res->start,
+				(unsigned long long)entry->res->end + 1);
+			return -EINVAL;
+		}
+	}
+
+	pcie->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, advk_pcie_irq_handler,
+			       IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie",
+			       pcie);
+	if (ret) {
+		dev_err(dev, "Failed to register interrupt\n");
+		return ret;
+	}
+
+	pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	ret = PTR_ERR_OR_ZERO(pcie->reset_gpio);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get reset-gpio: %i\n", ret);
+		return ret;
+	}
+
+	ret = gpiod_set_consumer_name(pcie->reset_gpio, "pcie1-reset");
+	if (ret) {
+		dev_err(dev, "Failed to set reset gpio name: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_pci_get_max_link_speed(dev->of_node);
+	if (ret <= 0 || ret > 3)
+		pcie->link_gen = 3;
+	else
+		pcie->link_gen = ret;
+
+	ret = advk_pcie_setup_phy(pcie);
+	if (ret)
+		return ret;
+
+	advk_pcie_setup_hw(pcie);
+
+	ret = advk_sw_pci_bridge_init(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to register emulated root PCI bridge\n");
+		return ret;
+	}
+
+	ret = advk_pcie_init_irq_domain(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to initialize irq\n");
+		return ret;
+	}
+
+	ret = advk_pcie_init_msi_irq_domain(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to initialize irq\n");
+		advk_pcie_remove_irq_domain(pcie);
+		return ret;
+	}
+
+	ret = advk_pcie_init_rp_irq_domain(pcie);
+	if (ret) {
+		dev_err(dev, "Failed to initialize irq\n");
+		advk_pcie_remove_msi_irq_domain(pcie);
+		advk_pcie_remove_irq_domain(pcie);
+		return ret;
+	}
+
+	bridge->sysdata = pcie;
+	bridge->ops = &advk_pcie_ops;
+	bridge->map_irq = advk_pcie_map_irq;
+
+	ret = pci_host_probe(bridge);
+	if (ret < 0) {
+		advk_pcie_remove_rp_irq_domain(pcie);
+		advk_pcie_remove_msi_irq_domain(pcie);
+		advk_pcie_remove_irq_domain(pcie);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void advk_pcie_remove(struct platform_device *pdev)
+{
+	struct advk_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	u32 val;
+	int i;
+
+	/* Remove PCI bus with all devices */
+	pci_lock_rescan_remove();
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	pci_unlock_rescan_remove();
+
+	/* Disable Root Bridge I/O space, memory space and bus mastering */
+	val = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+	val &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	advk_writel(pcie, val, PCIE_CORE_CMD_STATUS_REG);
+
+	/* Disable MSI */
+	val = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
+	val &= ~PCIE_CORE_CTRL2_MSI_ENABLE;
+	advk_writel(pcie, val, PCIE_CORE_CTRL2_REG);
+
+	/* Clear MSI address */
+	advk_writel(pcie, 0, PCIE_MSI_ADDR_LOW_REG);
+	advk_writel(pcie, 0, PCIE_MSI_ADDR_HIGH_REG);
+
+	/* Mask all interrupts */
+	advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
+	advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_MASK_REG);
+	advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
+	advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_MASK_REG);
+
+	/* Clear all interrupts */
+	advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
+	advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
+	advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
+	advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
+
+	/* Remove IRQ domains */
+	advk_pcie_remove_rp_irq_domain(pcie);
+	advk_pcie_remove_msi_irq_domain(pcie);
+	advk_pcie_remove_irq_domain(pcie);
+
+	/* Free config space for emulated root bridge */
+	pci_bridge_emul_cleanup(&pcie->bridge);
+
+	/* Assert PERST# signal which prepares PCIe card for power down */
+	if (pcie->reset_gpio)
+		gpiod_set_value_cansleep(pcie->reset_gpio, 1);
+
+	/* Disable link training */
+	val = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+	val &= ~LINK_TRAINING_EN;
+	advk_writel(pcie, val, PCIE_CORE_CTRL0_REG);
+
+	/* Disable outbound address windows mapping */
+	for (i = 0; i < OB_WIN_COUNT; i++)
+		advk_pcie_disable_ob_win(pcie, i);
+
+	/* Disable phy */
+	advk_pcie_disable_phy(pcie);
+}
+
+static const struct of_device_id advk_pcie_of_match_table[] = {
+	{ .compatible = "marvell,armada-3700-pcie", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, advk_pcie_of_match_table);
+
+static struct platform_driver advk_pcie_driver = {
+	.driver = {
+		.name = "advk-pcie",
+		.of_match_table = advk_pcie_of_match_table,
+	},
+	.probe = advk_pcie_probe,
+	.remove_new = advk_pcie_remove,
+};
+module_platform_driver(advk_pcie_driver);
+
+MODULE_DESCRIPTION("Aardvark PCIe controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
new file mode 100644
index 000000000..ffdeed25e
--- /dev/null
+++ b/drivers/pci/controller/pci-ftpci100.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for Faraday Technology FTPC100 PCI Controller
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Based on the out-of-tree OpenWRT patch for Cortina Gemini:
+ * Copyright (C) 2009 Janos Laube <janos.dev@gmail.com>
+ * Copyright (C) 2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ * Based on SL2312 PCI controller code
+ * Storlink (C) 2003
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/clk.h>
+
+#include "../pci.h"
+
+/*
+ * Special configuration registers directly in the first few words
+ * in I/O space.
+ */
+#define FTPCI_IOSIZE	0x00
+#define FTPCI_PROT	0x04 /* AHB protection */
+#define FTPCI_CTRL	0x08 /* PCI control signal */
+#define FTPCI_SOFTRST	0x10 /* Soft reset counter and response error enable */
+#define FTPCI_CONFIG	0x28 /* PCI configuration command register */
+#define FTPCI_DATA	0x2C
+
+#define FARADAY_PCI_STATUS_CMD		0x04 /* Status and command */
+#define FARADAY_PCI_PMC			0x40 /* Power management control */
+#define FARADAY_PCI_PMCSR		0x44 /* Power management status */
+#define FARADAY_PCI_CTRL1		0x48 /* Control register 1 */
+#define FARADAY_PCI_CTRL2		0x4C /* Control register 2 */
+#define FARADAY_PCI_MEM1_BASE_SIZE	0x50 /* Memory base and size #1 */
+#define FARADAY_PCI_MEM2_BASE_SIZE	0x54 /* Memory base and size #2 */
+#define FARADAY_PCI_MEM3_BASE_SIZE	0x58 /* Memory base and size #3 */
+
+#define PCI_STATUS_66MHZ_CAPABLE	BIT(21)
+
+/* Bits 31..28 gives INTD..INTA status */
+#define PCI_CTRL2_INTSTS_SHIFT		28
+#define PCI_CTRL2_INTMASK_CMDERR	BIT(27)
+#define PCI_CTRL2_INTMASK_PARERR	BIT(26)
+/* Bits 25..22 masks INTD..INTA */
+#define PCI_CTRL2_INTMASK_SHIFT		22
+#define PCI_CTRL2_INTMASK_MABRT_RX	BIT(21)
+#define PCI_CTRL2_INTMASK_TABRT_RX	BIT(20)
+#define PCI_CTRL2_INTMASK_TABRT_TX	BIT(19)
+#define PCI_CTRL2_INTMASK_RETRY4	BIT(18)
+#define PCI_CTRL2_INTMASK_SERR_RX	BIT(17)
+#define PCI_CTRL2_INTMASK_PERR_RX	BIT(16)
+/* Bit 15 reserved */
+#define PCI_CTRL2_MSTPRI_REQ6		BIT(14)
+#define PCI_CTRL2_MSTPRI_REQ5		BIT(13)
+#define PCI_CTRL2_MSTPRI_REQ4		BIT(12)
+#define PCI_CTRL2_MSTPRI_REQ3		BIT(11)
+#define PCI_CTRL2_MSTPRI_REQ2		BIT(10)
+#define PCI_CTRL2_MSTPRI_REQ1		BIT(9)
+#define PCI_CTRL2_MSTPRI_REQ0		BIT(8)
+/* Bits 7..4 reserved */
+/* Bits 3..0 TRDYW */
+
+/*
+ * Memory configs:
+ * Bit 31..20 defines the PCI side memory base
+ * Bit 19..16 (4 bits) defines the size per below
+ */
+#define FARADAY_PCI_MEMBASE_MASK	0xfff00000
+#define FARADAY_PCI_MEMSIZE_1MB		0x0
+#define FARADAY_PCI_MEMSIZE_2MB		0x1
+#define FARADAY_PCI_MEMSIZE_4MB		0x2
+#define FARADAY_PCI_MEMSIZE_8MB		0x3
+#define FARADAY_PCI_MEMSIZE_16MB	0x4
+#define FARADAY_PCI_MEMSIZE_32MB	0x5
+#define FARADAY_PCI_MEMSIZE_64MB	0x6
+#define FARADAY_PCI_MEMSIZE_128MB	0x7
+#define FARADAY_PCI_MEMSIZE_256MB	0x8
+#define FARADAY_PCI_MEMSIZE_512MB	0x9
+#define FARADAY_PCI_MEMSIZE_1GB		0xa
+#define FARADAY_PCI_MEMSIZE_2GB		0xb
+#define FARADAY_PCI_MEMSIZE_SHIFT	16
+
+/*
+ * The DMA base is set to 0x0 for all memory segments, it reflects the
+ * fact that the memory of the host system starts at 0x0.
+ */
+#define FARADAY_PCI_DMA_MEM1_BASE	0x00000000
+#define FARADAY_PCI_DMA_MEM2_BASE	0x00000000
+#define FARADAY_PCI_DMA_MEM3_BASE	0x00000000
+
+/**
+ * struct faraday_pci_variant - encodes IP block differences
+ * @cascaded_irq: this host has cascaded IRQs from an interrupt controller
+ *	embedded in the host bridge.
+ */
+struct faraday_pci_variant {
+	bool cascaded_irq;
+};
+
+struct faraday_pci {
+	struct device *dev;
+	void __iomem *base;
+	struct irq_domain *irqdomain;
+	struct pci_bus *bus;
+	struct clk *bus_clk;
+};
+
+static int faraday_res_to_memcfg(resource_size_t mem_base,
+				 resource_size_t mem_size, u32 *val)
+{
+	u32 outval;
+
+	switch (mem_size) {
+	case SZ_1M:
+		outval = FARADAY_PCI_MEMSIZE_1MB;
+		break;
+	case SZ_2M:
+		outval = FARADAY_PCI_MEMSIZE_2MB;
+		break;
+	case SZ_4M:
+		outval = FARADAY_PCI_MEMSIZE_4MB;
+		break;
+	case SZ_8M:
+		outval = FARADAY_PCI_MEMSIZE_8MB;
+		break;
+	case SZ_16M:
+		outval = FARADAY_PCI_MEMSIZE_16MB;
+		break;
+	case SZ_32M:
+		outval = FARADAY_PCI_MEMSIZE_32MB;
+		break;
+	case SZ_64M:
+		outval = FARADAY_PCI_MEMSIZE_64MB;
+		break;
+	case SZ_128M:
+		outval = FARADAY_PCI_MEMSIZE_128MB;
+		break;
+	case SZ_256M:
+		outval = FARADAY_PCI_MEMSIZE_256MB;
+		break;
+	case SZ_512M:
+		outval = FARADAY_PCI_MEMSIZE_512MB;
+		break;
+	case SZ_1G:
+		outval = FARADAY_PCI_MEMSIZE_1GB;
+		break;
+	case SZ_2G:
+		outval = FARADAY_PCI_MEMSIZE_2GB;
+		break;
+	default:
+		return -EINVAL;
+	}
+	outval <<= FARADAY_PCI_MEMSIZE_SHIFT;
+
+	/* This is probably not good */
+	if (mem_base & ~(FARADAY_PCI_MEMBASE_MASK))
+		pr_warn("truncated PCI memory base\n");
+	/* Translate to bridge side address space */
+	outval |= (mem_base & FARADAY_PCI_MEMBASE_MASK);
+	pr_debug("Translated pci base @%pap, size %pap to config %08x\n",
+		 &mem_base, &mem_size, outval);
+
+	*val = outval;
+	return 0;
+}
+
+static int faraday_raw_pci_read_config(struct faraday_pci *p, int bus_number,
+				       unsigned int fn, int config, int size,
+				       u32 *value)
+{
+	writel(PCI_CONF1_ADDRESS(bus_number, PCI_SLOT(fn),
+				 PCI_FUNC(fn), config),
+			p->base + FTPCI_CONFIG);
+
+	*value = readl(p->base + FTPCI_DATA);
+
+	if (size == 1)
+		*value = (*value >> (8 * (config & 3))) & 0xFF;
+	else if (size == 2)
+		*value = (*value >> (8 * (config & 3))) & 0xFFFF;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int faraday_pci_read_config(struct pci_bus *bus, unsigned int fn,
+				   int config, int size, u32 *value)
+{
+	struct faraday_pci *p = bus->sysdata;
+
+	dev_dbg(&bus->dev,
+		"[read]  slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+		PCI_SLOT(fn), PCI_FUNC(fn), config, size, *value);
+
+	return faraday_raw_pci_read_config(p, bus->number, fn, config, size, value);
+}
+
+static int faraday_raw_pci_write_config(struct faraday_pci *p, int bus_number,
+					 unsigned int fn, int config, int size,
+					 u32 value)
+{
+	int ret = PCIBIOS_SUCCESSFUL;
+
+	writel(PCI_CONF1_ADDRESS(bus_number, PCI_SLOT(fn),
+				 PCI_FUNC(fn), config),
+			p->base + FTPCI_CONFIG);
+
+	switch (size) {
+	case 4:
+		writel(value, p->base + FTPCI_DATA);
+		break;
+	case 2:
+		writew(value, p->base + FTPCI_DATA + (config & 3));
+		break;
+	case 1:
+		writeb(value, p->base + FTPCI_DATA + (config & 3));
+		break;
+	default:
+		ret = PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return ret;
+}
+
+static int faraday_pci_write_config(struct pci_bus *bus, unsigned int fn,
+				    int config, int size, u32 value)
+{
+	struct faraday_pci *p = bus->sysdata;
+
+	dev_dbg(&bus->dev,
+		"[write] slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+		PCI_SLOT(fn), PCI_FUNC(fn), config, size, value);
+
+	return faraday_raw_pci_write_config(p, bus->number, fn, config, size,
+					    value);
+}
+
+static struct pci_ops faraday_pci_ops = {
+	.read	= faraday_pci_read_config,
+	.write	= faraday_pci_write_config,
+};
+
+static void faraday_pci_ack_irq(struct irq_data *d)
+{
+	struct faraday_pci *p = irq_data_get_irq_chip_data(d);
+	unsigned int reg;
+
+	faraday_raw_pci_read_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, &reg);
+	reg &= ~(0xF << PCI_CTRL2_INTSTS_SHIFT);
+	reg |= BIT(irqd_to_hwirq(d) + PCI_CTRL2_INTSTS_SHIFT);
+	faraday_raw_pci_write_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, reg);
+}
+
+static void faraday_pci_mask_irq(struct irq_data *d)
+{
+	struct faraday_pci *p = irq_data_get_irq_chip_data(d);
+	unsigned int reg;
+
+	faraday_raw_pci_read_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, &reg);
+	reg &= ~((0xF << PCI_CTRL2_INTSTS_SHIFT)
+		 | BIT(irqd_to_hwirq(d) + PCI_CTRL2_INTMASK_SHIFT));
+	faraday_raw_pci_write_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, reg);
+}
+
+static void faraday_pci_unmask_irq(struct irq_data *d)
+{
+	struct faraday_pci *p = irq_data_get_irq_chip_data(d);
+	unsigned int reg;
+
+	faraday_raw_pci_read_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, &reg);
+	reg &= ~(0xF << PCI_CTRL2_INTSTS_SHIFT);
+	reg |= BIT(irqd_to_hwirq(d) + PCI_CTRL2_INTMASK_SHIFT);
+	faraday_raw_pci_write_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, reg);
+}
+
+static void faraday_pci_irq_handler(struct irq_desc *desc)
+{
+	struct faraday_pci *p = irq_desc_get_handler_data(desc);
+	struct irq_chip *irqchip = irq_desc_get_chip(desc);
+	unsigned int irq_stat, reg, i;
+
+	faraday_raw_pci_read_config(p, 0, 0, FARADAY_PCI_CTRL2, 4, &reg);
+	irq_stat = reg >> PCI_CTRL2_INTSTS_SHIFT;
+
+	chained_irq_enter(irqchip, desc);
+
+	for (i = 0; i < 4; i++) {
+		if ((irq_stat & BIT(i)) == 0)
+			continue;
+		generic_handle_domain_irq(p->irqdomain, i);
+	}
+
+	chained_irq_exit(irqchip, desc);
+}
+
+static struct irq_chip faraday_pci_irq_chip = {
+	.name = "PCI",
+	.irq_ack = faraday_pci_ack_irq,
+	.irq_mask = faraday_pci_mask_irq,
+	.irq_unmask = faraday_pci_unmask_irq,
+};
+
+static int faraday_pci_irq_map(struct irq_domain *domain, unsigned int irq,
+			       irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &faraday_pci_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops faraday_pci_irqdomain_ops = {
+	.map = faraday_pci_irq_map,
+};
+
+static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p)
+{
+	struct device_node *intc = of_get_next_child(p->dev->of_node, NULL);
+	int irq;
+	int i;
+
+	if (!intc) {
+		dev_err(p->dev, "missing child interrupt-controller node\n");
+		return -EINVAL;
+	}
+
+	/* All PCI IRQs cascade off this one */
+	irq = of_irq_get(intc, 0);
+	if (irq <= 0) {
+		dev_err(p->dev, "failed to get parent IRQ\n");
+		of_node_put(intc);
+		return irq ?: -EINVAL;
+	}
+
+	p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX,
+					     &faraday_pci_irqdomain_ops, p);
+	of_node_put(intc);
+	if (!p->irqdomain) {
+		dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n");
+		return -EINVAL;
+	}
+
+	irq_set_chained_handler_and_data(irq, faraday_pci_irq_handler, p);
+
+	for (i = 0; i < 4; i++)
+		irq_create_mapping(p->irqdomain, i);
+
+	return 0;
+}
+
+static int faraday_pci_parse_map_dma_ranges(struct faraday_pci *p)
+{
+	struct device *dev = p->dev;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(p);
+	struct resource_entry *entry;
+	u32 confreg[3] = {
+		FARADAY_PCI_MEM1_BASE_SIZE,
+		FARADAY_PCI_MEM2_BASE_SIZE,
+		FARADAY_PCI_MEM3_BASE_SIZE,
+	};
+	int i = 0;
+	u32 val;
+
+	resource_list_for_each_entry(entry, &bridge->dma_ranges) {
+		u64 pci_addr = entry->res->start - entry->offset;
+		u64 end = entry->res->end - entry->offset;
+		int ret;
+
+		ret = faraday_res_to_memcfg(pci_addr,
+					    resource_size(entry->res), &val);
+		if (ret) {
+			dev_err(dev,
+				"DMA range %d: illegal MEM resource size\n", i);
+			return -EINVAL;
+		}
+
+		dev_info(dev, "DMA MEM%d BASE: 0x%016llx -> 0x%016llx config %08x\n",
+			 i + 1, pci_addr, end, val);
+		if (i <= 2) {
+			faraday_raw_pci_write_config(p, 0, 0, confreg[i],
+						     4, val);
+		} else {
+			dev_err(dev, "ignore extraneous dma-range %d\n", i);
+			break;
+		}
+
+		i++;
+	}
+
+	return 0;
+}
+
+static int faraday_pci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct faraday_pci_variant *variant =
+		of_device_get_match_data(dev);
+	struct resource_entry *win;
+	struct faraday_pci *p;
+	struct resource *io;
+	struct pci_host_bridge *host;
+	struct clk *clk;
+	unsigned char max_bus_speed = PCI_SPEED_33MHz;
+	unsigned char cur_bus_speed = PCI_SPEED_33MHz;
+	int ret;
+	u32 val;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*p));
+	if (!host)
+		return -ENOMEM;
+
+	host->ops = &faraday_pci_ops;
+	p = pci_host_bridge_priv(host);
+	host->sysdata = p;
+	p->dev = dev;
+
+	/* Retrieve and enable optional clocks */
+	clk = devm_clk_get_enabled(dev, "PCLK");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+	p->bus_clk = devm_clk_get_enabled(dev, "PCICLK");
+	if (IS_ERR(p->bus_clk))
+		return PTR_ERR(p->bus_clk);
+
+	p->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(p->base))
+		return PTR_ERR(p->base);
+
+	win = resource_list_first_type(&host->windows, IORESOURCE_IO);
+	if (win) {
+		io = win->res;
+		if (!faraday_res_to_memcfg(io->start - win->offset,
+					   resource_size(io), &val)) {
+			/* setup I/O space size */
+			writel(val, p->base + FTPCI_IOSIZE);
+		} else {
+			dev_err(dev, "illegal IO mem size\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Setup hostbridge */
+	val = readl(p->base + FTPCI_CTRL);
+	val |= PCI_COMMAND_IO;
+	val |= PCI_COMMAND_MEMORY;
+	val |= PCI_COMMAND_MASTER;
+	writel(val, p->base + FTPCI_CTRL);
+	/* Mask and clear all interrupts */
+	faraday_raw_pci_write_config(p, 0, 0, FARADAY_PCI_CTRL2 + 2, 2, 0xF000);
+	if (variant->cascaded_irq) {
+		ret = faraday_pci_setup_cascaded_irq(p);
+		if (ret) {
+			dev_err(dev, "failed to setup cascaded IRQ\n");
+			return ret;
+		}
+	}
+
+	/* Check bus clock if we can gear up to 66 MHz */
+	if (!IS_ERR(p->bus_clk)) {
+		unsigned long rate;
+		u32 val;
+
+		faraday_raw_pci_read_config(p, 0, 0,
+					    FARADAY_PCI_STATUS_CMD, 4, &val);
+		rate = clk_get_rate(p->bus_clk);
+
+		if ((rate == 33000000) && (val & PCI_STATUS_66MHZ_CAPABLE)) {
+			dev_info(dev, "33MHz bus is 66MHz capable\n");
+			max_bus_speed = PCI_SPEED_66MHz;
+			ret = clk_set_rate(p->bus_clk, 66000000);
+			if (ret)
+				dev_err(dev, "failed to set bus clock\n");
+		} else {
+			dev_info(dev, "33MHz only bus\n");
+			max_bus_speed = PCI_SPEED_33MHz;
+		}
+
+		/* Bumping the clock may fail so read back the rate */
+		rate = clk_get_rate(p->bus_clk);
+		if (rate == 33000000)
+			cur_bus_speed = PCI_SPEED_33MHz;
+		if (rate == 66000000)
+			cur_bus_speed = PCI_SPEED_66MHz;
+	}
+
+	ret = faraday_pci_parse_map_dma_ranges(p);
+	if (ret)
+		return ret;
+
+	ret = pci_scan_root_bus_bridge(host);
+	if (ret) {
+		dev_err(dev, "failed to scan host: %d\n", ret);
+		return ret;
+	}
+	p->bus = host->bus;
+	p->bus->max_bus_speed = max_bus_speed;
+	p->bus->cur_bus_speed = cur_bus_speed;
+
+	pci_bus_assign_resources(p->bus);
+	pci_bus_add_devices(p->bus);
+
+	return 0;
+}
+
+/*
+ * We encode bridge variants here, we have at least two so it doesn't
+ * hurt to have infrastructure to encompass future variants as well.
+ */
+static const struct faraday_pci_variant faraday_regular = {
+	.cascaded_irq = true,
+};
+
+static const struct faraday_pci_variant faraday_dual = {
+	.cascaded_irq = false,
+};
+
+static const struct of_device_id faraday_pci_of_match[] = {
+	{
+		.compatible = "faraday,ftpci100",
+		.data = &faraday_regular,
+	},
+	{
+		.compatible = "faraday,ftpci100-dual",
+		.data = &faraday_dual,
+	},
+	{},
+};
+
+static struct platform_driver faraday_pci_driver = {
+	.driver = {
+		.name = "ftpci100",
+		.of_match_table = faraday_pci_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe  = faraday_pci_probe,
+};
+builtin_platform_driver(faraday_pci_driver);
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
new file mode 100644
index 000000000..6be3266cd
--- /dev/null
+++ b/drivers/pci/controller/pci-host-common.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic PCI host driver common code
+ *
+ * Copyright (C) 2014 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+static void gen_pci_unmap_cfg(void *ptr)
+{
+	pci_ecam_free((struct pci_config_window *)ptr);
+}
+
+static struct pci_config_window *gen_pci_init(struct device *dev,
+		struct pci_host_bridge *bridge, const struct pci_ecam_ops *ops)
+{
+	int err;
+	struct resource cfgres;
+	struct resource_entry *bus;
+	struct pci_config_window *cfg;
+
+	err = of_address_to_resource(dev->of_node, 0, &cfgres);
+	if (err) {
+		dev_err(dev, "missing \"reg\" property\n");
+		return ERR_PTR(err);
+	}
+
+	bus = resource_list_first_type(&bridge->windows, IORESOURCE_BUS);
+	if (!bus)
+		return ERR_PTR(-ENODEV);
+
+	cfg = pci_ecam_create(dev, &cfgres, bus->res, ops);
+	if (IS_ERR(cfg))
+		return cfg;
+
+	err = devm_add_action_or_reset(dev, gen_pci_unmap_cfg, cfg);
+	if (err)
+		return ERR_PTR(err);
+
+	return cfg;
+}
+
+int pci_host_common_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *bridge;
+	struct pci_config_window *cfg;
+	const struct pci_ecam_ops *ops;
+
+	ops = of_device_get_match_data(&pdev->dev);
+	if (!ops)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, 0);
+	if (!bridge)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, bridge);
+
+	of_pci_check_probe_only();
+
+	/* Parse and map our Configuration Space windows */
+	cfg = gen_pci_init(dev, bridge, ops);
+	if (IS_ERR(cfg))
+		return PTR_ERR(cfg);
+
+	/* Do not reassign resources if probe only */
+	if (!pci_has_flag(PCI_PROBE_ONLY))
+		pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	bridge->sysdata = cfg;
+	bridge->ops = (struct pci_ops *)&ops->pci_ops;
+	bridge->msi_domain = true;
+
+	return pci_host_probe(bridge);
+}
+EXPORT_SYMBOL_GPL(pci_host_common_probe);
+
+int pci_host_common_remove(struct platform_device *pdev)
+{
+	struct pci_host_bridge *bridge = platform_get_drvdata(pdev);
+
+	pci_lock_rescan_remove();
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	pci_unlock_rescan_remove();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_host_common_remove);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-host-generic.c b/drivers/pci/controller/pci-host-generic.c
new file mode 100644
index 000000000..63865aeb6
--- /dev/null
+++ b/drivers/pci/controller/pci-host-generic.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple, generic PCI host controller driver targeting firmware-initialised
+ * systems and virtual machines (e.g. the PCI emulation provided by kvmtool).
+ *
+ * Copyright (C) 2014 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+static const struct pci_ecam_ops gen_pci_cfg_cam_bus_ops = {
+	.bus_shift	= 16,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
+
+static bool pci_dw_valid_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+
+	/*
+	 * The Synopsys DesignWare PCIe controller in ECAM mode will not filter
+	 * type 0 config TLPs sent to devices 1 and up on its downstream port,
+	 * resulting in devices appearing multiple times on bus 0 unless we
+	 * filter out those accesses here.
+	 */
+	if (bus->number == cfg->busr.start && PCI_SLOT(devfn) > 0)
+		return false;
+
+	return true;
+}
+
+static void __iomem *pci_dw_ecam_map_bus(struct pci_bus *bus,
+					 unsigned int devfn, int where)
+{
+	if (!pci_dw_valid_device(bus, devfn))
+		return NULL;
+
+	return pci_ecam_map_bus(bus, devfn, where);
+}
+
+static const struct pci_ecam_ops pci_dw_ecam_bus_ops = {
+	.pci_ops	= {
+		.map_bus	= pci_dw_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
+
+static const struct of_device_id gen_pci_of_match[] = {
+	{ .compatible = "pci-host-cam-generic",
+	  .data = &gen_pci_cfg_cam_bus_ops },
+
+	{ .compatible = "pci-host-ecam-generic",
+	  .data = &pci_generic_ecam_ops },
+
+	{ .compatible = "marvell,armada8k-pcie-ecam",
+	  .data = &pci_dw_ecam_bus_ops },
+
+	{ .compatible = "socionext,synquacer-pcie-ecam",
+	  .data = &pci_dw_ecam_bus_ops },
+
+	{ .compatible = "snps,dw-pcie-ecam",
+	  .data = &pci_dw_ecam_bus_ops },
+
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gen_pci_of_match);
+
+static struct platform_driver gen_pci_driver = {
+	.driver = {
+		.name = "pci-host-generic",
+		.of_match_table = gen_pci_of_match,
+	},
+	.probe = pci_host_common_probe,
+	.remove = pci_host_common_remove,
+};
+module_platform_driver(gen_pci_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
new file mode 100644
index 000000000..cc96be450
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ *   Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This small module is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL_GPL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned)
+{
+	if (!hvpci_block_ops.read_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+					  bytes_returned);
+}
+EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id)
+{
+	if (!hvpci_block_ops.write_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	if (!hvpci_block_ops.reg_blk_invalidate)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.reg_blk_invalidate(dev, context,
+						  block_invalidate);
+}
+EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_intf(void)
+{
+}
+
+static int __init init_hv_pci_intf(void)
+{
+	return 0;
+}
+
+module_init(init_hv_pci_intf);
+module_exit(exit_hv_pci_intf);
+
+MODULE_DESCRIPTION("Hyper-V PCI Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
new file mode 100644
index 000000000..bed3cefda
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -0,0 +1,4121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ *   Jake Oshins <jakeo@microsoft.com>
+ *
+ * This driver acts as a paravirtual front-end for PCI Express root buses.
+ * When a PCI Express function (either an entire device or an SR-IOV
+ * Virtual Function) is being passed through to the VM, this driver exposes
+ * a new bus to the guest VM.  This is modeled as a root PCI bus because
+ * no bridges are being exposed to the VM.  In fact, with a "Generation 2"
+ * VM within Hyper-V, there may seem to be no PCI bus at all in the VM
+ * until a device as been exposed using this driver.
+ *
+ * Each root PCI bus has its own PCI domain, which is called "Segment" in
+ * the PCI Firmware Specifications.  Thus while each device passed through
+ * to the VM using this front-end will appear at "device 0", the domain will
+ * be unique.  Typically, each bus will have one PCI function on it, though
+ * this driver does support more than one.
+ *
+ * In order to map the interrupts from the device through to the guest VM,
+ * this driver also implements an IRQ Domain, which handles interrupts (either
+ * MSI or MSI-X) associated with the functions on the bus.  As interrupts are
+ * set up, torn down, or reaffined, this driver communicates with the
+ * underlying hypervisor to adjust the mappings in the I/O MMU so that each
+ * interrupt will be delivered to the correct virtual processor at the right
+ * vector.  This driver does not support level-triggered (line-based)
+ * interrupts, and will report that the Interrupt Line register in the
+ * function's configuration space is zero.
+ *
+ * The rest of this driver mostly maps PCI concepts onto underlying Hyper-V
+ * facilities.  For instance, the configuration space of a function exposed
+ * by Hyper-V is mapped into a single page of memory space, and the
+ * read and write handlers for config space must be aware of this mechanism.
+ * Similarly, device setup and teardown involves messages sent to and from
+ * the PCI back-end driver in Hyper-V.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/delay.h>
+#include <linux/semaphore.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/hyperv.h>
+#include <linux/refcount.h>
+#include <linux/irqdomain.h>
+#include <linux/acpi.h>
+#include <asm/mshyperv.h>
+
+/*
+ * Protocol versions. The low word is the minor version, the high word the
+ * major version.
+ */
+
+#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (minor)))
+#define PCI_MAJOR_VERSION(version) ((u32)(version) >> 16)
+#define PCI_MINOR_VERSION(version) ((u32)(version) & 0xff)
+
+enum pci_protocol_version_t {
+	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/* Win10 */
+	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1 */
+	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* Vibranium */
+	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),	/* WS2022 */
+};
+
+#define CPU_AFFINITY_ALL	-1ULL
+
+/*
+ * Supported protocol versions in the order of probing - highest go
+ * first.
+ */
+static enum pci_protocol_version_t pci_protocol_versions[] = {
+	PCI_PROTOCOL_VERSION_1_4,
+	PCI_PROTOCOL_VERSION_1_3,
+	PCI_PROTOCOL_VERSION_1_2,
+	PCI_PROTOCOL_VERSION_1_1,
+};
+
+#define PCI_CONFIG_MMIO_LENGTH	0x2000
+#define CFG_PAGE_OFFSET 0x1000
+#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
+
+#define MAX_SUPPORTED_MSI_MESSAGES 0x400
+
+#define STATUS_REVISION_MISMATCH 0xC0000059
+
+/* space for 32bit serial number as string */
+#define SLOT_NAME_SIZE 11
+
+/*
+ * Size of requestor for VMbus; the value is based on the observation
+ * that having more than one request outstanding is 'rare', and so 64
+ * should be generous in ensuring that we don't ever run out.
+ */
+#define HV_PCI_RQSTOR_SIZE 64
+
+/*
+ * Message Types
+ */
+
+enum pci_message_type {
+	/*
+	 * Version 1.1
+	 */
+	PCI_MESSAGE_BASE                = 0x42490000,
+	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
+	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
+	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
+	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
+	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
+	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
+	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
+	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
+	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
+	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
+	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
+	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
+	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
+	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
+	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
+	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
+	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
+	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
+	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
+	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
+	PCI_RESOURCES_ASSIGNED2		= PCI_MESSAGE_BASE + 0x16,
+	PCI_CREATE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x17,
+	PCI_DELETE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x18, /* unused */
+	PCI_BUS_RELATIONS2		= PCI_MESSAGE_BASE + 0x19,
+	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
+	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
+	PCI_MESSAGE_MAXIMUM
+};
+
+/*
+ * Structures defining the virtual PCI Express protocol.
+ */
+
+union pci_version {
+	struct {
+		u16 minor_version;
+		u16 major_version;
+	} parts;
+	u32 version;
+} __packed;
+
+/*
+ * Function numbers are 8-bits wide on Express, as interpreted through ARI,
+ * which is all this driver does.  This representation is the one used in
+ * Windows, which is what is expected when sending this back and forth with
+ * the Hyper-V parent partition.
+ */
+union win_slot_encoding {
+	struct {
+		u32	dev:5;
+		u32	func:3;
+		u32	reserved:24;
+	} bits;
+	u32 slot;
+} __packed;
+
+/*
+ * Pretty much as defined in the PCI Specifications.
+ */
+struct pci_function_description {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+} __packed;
+
+enum pci_device_description_flags {
+	HV_PCI_DEVICE_FLAG_NONE			= 0x0,
+	HV_PCI_DEVICE_FLAG_NUMA_AFFINITY	= 0x1,
+};
+
+struct pci_function_description2 {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union	win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+	u32	flags;
+	u16	virtual_numa_node;
+	u16	reserved;
+} __packed;
+
+/**
+ * struct hv_msi_desc
+ * @vector:		IDT entry
+ * @delivery_mode:	As defined in Intel's Programmer's
+ *			Reference Manual, Volume 3, Chapter 8.
+ * @vector_count:	Number of contiguous entries in the
+ *			Interrupt Descriptor Table that are
+ *			occupied by this Message-Signaled
+ *			Interrupt. For "MSI", as first defined
+ *			in PCI 2.2, this can be between 1 and
+ *			32. For "MSI-X," as first defined in PCI
+ *			3.0, this must be 1, as each MSI-X table
+ *			entry would have its own descriptor.
+ * @reserved:		Empty space
+ * @cpu_mask:		All the target virtual processors.
+ */
+struct hv_msi_desc {
+	u8	vector;
+	u8	delivery_mode;
+	u16	vector_count;
+	u32	reserved;
+	u64	cpu_mask;
+} __packed;
+
+/**
+ * struct hv_msi_desc2 - 1.2 version of hv_msi_desc
+ * @vector:		IDT entry
+ * @delivery_mode:	As defined in Intel's Programmer's
+ *			Reference Manual, Volume 3, Chapter 8.
+ * @vector_count:	Number of contiguous entries in the
+ *			Interrupt Descriptor Table that are
+ *			occupied by this Message-Signaled
+ *			Interrupt. For "MSI", as first defined
+ *			in PCI 2.2, this can be between 1 and
+ *			32. For "MSI-X," as first defined in PCI
+ *			3.0, this must be 1, as each MSI-X table
+ *			entry would have its own descriptor.
+ * @processor_count:	number of bits enabled in array.
+ * @processor_array:	All the target virtual processors.
+ */
+struct hv_msi_desc2 {
+	u8	vector;
+	u8	delivery_mode;
+	u16	vector_count;
+	u16	processor_count;
+	u16	processor_array[32];
+} __packed;
+
+/*
+ * struct hv_msi_desc3 - 1.3 version of hv_msi_desc
+ *	Everything is the same as in 'hv_msi_desc2' except that the size of the
+ *	'vector' field is larger to support bigger vector values. For ex: LPI
+ *	vectors on ARM.
+ */
+struct hv_msi_desc3 {
+	u32	vector;
+	u8	delivery_mode;
+	u8	reserved;
+	u16	vector_count;
+	u16	processor_count;
+	u16	processor_array[32];
+} __packed;
+
+/**
+ * struct tran_int_desc
+ * @reserved:		unused, padding
+ * @vector_count:	same as in hv_msi_desc
+ * @data:		This is the "data payload" value that is
+ *			written by the device when it generates
+ *			a message-signaled interrupt, either MSI
+ *			or MSI-X.
+ * @address:		This is the address to which the data
+ *			payload is written on interrupt
+ *			generation.
+ */
+struct tran_int_desc {
+	u16	reserved;
+	u16	vector_count;
+	u32	data;
+	u64	address;
+} __packed;
+
+/*
+ * A generic message format for virtual PCI.
+ * Specific message formats are defined later in the file.
+ */
+
+struct pci_message {
+	u32 type;
+} __packed;
+
+struct pci_child_message {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+} __packed;
+
+struct pci_incoming_message {
+	struct vmpacket_descriptor hdr;
+	struct pci_message message_type;
+} __packed;
+
+struct pci_response {
+	struct vmpacket_descriptor hdr;
+	s32 status;			/* negative values are failures */
+} __packed;
+
+struct pci_packet {
+	void (*completion_func)(void *context, struct pci_response *resp,
+				int resp_packet_size);
+	void *compl_ctxt;
+
+	struct pci_message message[];
+};
+
+/*
+ * Specific message types supporting the PCI protocol.
+ */
+
+/*
+ * Version negotiation message. Sent from the guest to the host.
+ * The guest is free to try different versions until the host
+ * accepts the version.
+ *
+ * pci_version: The protocol version requested.
+ * is_last_attempt: If TRUE, this is the last version guest will request.
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct pci_version_request {
+	struct pci_message message_type;
+	u32 protocol_version;
+} __packed;
+
+/*
+ * Bus D0 Entry.  This is sent from the guest to the host when the virtual
+ * bus (PCI Express port) is ready for action.
+ */
+
+struct pci_bus_d0_entry {
+	struct pci_message message_type;
+	u32 reserved;
+	u64 mmio_base;
+} __packed;
+
+struct pci_bus_relations {
+	struct pci_incoming_message incoming;
+	u32 device_count;
+	struct pci_function_description func[];
+} __packed;
+
+struct pci_bus_relations2 {
+	struct pci_incoming_message incoming;
+	u32 device_count;
+	struct pci_function_description2 func[];
+} __packed;
+
+struct pci_q_res_req_response {
+	struct vmpacket_descriptor hdr;
+	s32 status;			/* negative values are failures */
+	u32 probed_bar[PCI_STD_NUM_BARS];
+} __packed;
+
+struct pci_set_power {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	u32 power_state;		/* In Windows terms */
+	u32 reserved;
+} __packed;
+
+struct pci_set_power_response {
+	struct vmpacket_descriptor hdr;
+	s32 status;			/* negative values are failures */
+	union win_slot_encoding wslot;
+	u32 resultant_state;		/* In Windows terms */
+	u32 reserved;
+} __packed;
+
+struct pci_resources_assigned {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	u8 memory_range[0x14][6];	/* not used here */
+	u32 msi_descriptors;
+	u32 reserved[4];
+} __packed;
+
+struct pci_resources_assigned2 {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	u8 memory_range[0x14][6];	/* not used here */
+	u32 msi_descriptor_count;
+	u8 reserved[70];
+} __packed;
+
+struct pci_create_interrupt {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct hv_msi_desc int_desc;
+} __packed;
+
+struct pci_create_int_response {
+	struct pci_response response;
+	u32 reserved;
+	struct tran_int_desc int_desc;
+} __packed;
+
+struct pci_create_interrupt2 {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct hv_msi_desc2 int_desc;
+} __packed;
+
+struct pci_create_interrupt3 {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct hv_msi_desc3 int_desc;
+} __packed;
+
+struct pci_delete_interrupt {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct tran_int_desc int_desc;
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+	struct vmpacket_descriptor hdr;
+	u32 status;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 byte_count;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+	struct pci_incoming_message incoming;
+	union win_slot_encoding wslot;
+	u64 block_mask;
+} __packed;
+
+struct pci_dev_incoming {
+	struct pci_incoming_message incoming;
+	union win_slot_encoding wslot;
+} __packed;
+
+struct pci_eject_response {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	u32 status;
+} __packed;
+
+static int pci_ring_size = (4 * PAGE_SIZE);
+
+/*
+ * Driver specific state.
+ */
+
+enum hv_pcibus_state {
+	hv_pcibus_init = 0,
+	hv_pcibus_probed,
+	hv_pcibus_installed,
+	hv_pcibus_removing,
+	hv_pcibus_maximum
+};
+
+struct hv_pcibus_device {
+#ifdef CONFIG_X86
+	struct pci_sysdata sysdata;
+#elif defined(CONFIG_ARM64)
+	struct pci_config_window sysdata;
+#endif
+	struct pci_host_bridge *bridge;
+	struct fwnode_handle *fwnode;
+	/* Protocol version negotiated with the host */
+	enum pci_protocol_version_t protocol_version;
+
+	struct mutex state_lock;
+	enum hv_pcibus_state state;
+
+	struct hv_device *hdev;
+	resource_size_t low_mmio_space;
+	resource_size_t high_mmio_space;
+	struct resource *mem_config;
+	struct resource *low_mmio_res;
+	struct resource *high_mmio_res;
+	struct completion *survey_event;
+	struct pci_bus *pci_bus;
+	spinlock_t config_lock;	/* Avoid two threads writing index page */
+	spinlock_t device_list_lock;	/* Protect lists below */
+	void __iomem *cfg_addr;
+
+	struct list_head children;
+	struct list_head dr_list;
+
+	struct msi_domain_info msi_info;
+	struct irq_domain *irq_domain;
+
+	struct workqueue_struct *wq;
+
+	/* Highest slot of child device with resources allocated */
+	int wslot_res_allocated;
+	bool use_calls; /* Use hypercalls to access mmio cfg space */
+};
+
+/*
+ * Tracks "Device Relations" messages from the host, which must be both
+ * processed in order and deferred so that they don't run in the context
+ * of the incoming packet callback.
+ */
+struct hv_dr_work {
+	struct work_struct wrk;
+	struct hv_pcibus_device *bus;
+};
+
+struct hv_pcidev_description {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union	win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+	u32	flags;
+	u16	virtual_numa_node;
+};
+
+struct hv_dr_state {
+	struct list_head list_entry;
+	u32 device_count;
+	struct hv_pcidev_description func[];
+};
+
+struct hv_pci_dev {
+	/* List protected by pci_rescan_remove_lock */
+	struct list_head list_entry;
+	refcount_t refs;
+	struct pci_slot *pci_slot;
+	struct hv_pcidev_description desc;
+	bool reported_missing;
+	struct hv_pcibus_device *hbus;
+	struct work_struct wrk;
+
+	void (*block_invalidate)(void *context, u64 block_mask);
+	void *invalidate_context;
+
+	/*
+	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
+	 * read it back, for each of the BAR offsets within config space.
+	 */
+	u32 probed_bar[PCI_STD_NUM_BARS];
+};
+
+struct hv_pci_compl {
+	struct completion host_event;
+	s32 completion_status;
+};
+
+static void hv_pci_onchannelcallback(void *context);
+
+#ifdef CONFIG_X86
+#define DELIVERY_MODE	APIC_DELIVERY_MODE_FIXED
+#define FLOW_HANDLER	handle_edge_irq
+#define FLOW_NAME	"edge"
+
+static int hv_pci_irqchip_init(void)
+{
+	return 0;
+}
+
+static struct irq_domain *hv_pci_get_root_domain(void)
+{
+	return x86_vector_domain;
+}
+
+static unsigned int hv_msi_get_int_vector(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+
+	return cfg->vector;
+}
+
+#define hv_msi_prepare		pci_msi_prepare
+
+/**
+ * hv_arch_irq_unmask() - "Unmask" the IRQ by setting its current
+ * affinity.
+ * @data:	Describes the IRQ
+ *
+ * Build new a destination for the MSI and make a hypercall to
+ * update the Interrupt Redirection Table. "Device Logical ID"
+ * is built out of this PCI bus's instance GUID and the function
+ * number of the device.
+ */
+static void hv_arch_irq_unmask(struct irq_data *data)
+{
+	struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
+	struct hv_retarget_device_interrupt *params;
+	struct tran_int_desc *int_desc;
+	struct hv_pcibus_device *hbus;
+	const struct cpumask *dest;
+	cpumask_var_t tmp;
+	struct pci_bus *pbus;
+	struct pci_dev *pdev;
+	unsigned long flags;
+	u32 var_size = 0;
+	int cpu, nr_bank;
+	u64 res;
+
+	dest = irq_data_get_effective_affinity_mask(data);
+	pdev = msi_desc_to_pci_dev(msi_desc);
+	pbus = pdev->bus;
+	hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
+	int_desc = data->chip_data;
+	if (!int_desc) {
+		dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
+			 __func__, data->irq);
+		return;
+	}
+
+	local_irq_save(flags);
+
+	params = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(params, 0, sizeof(*params));
+	params->partition_id = HV_PARTITION_ID_SELF;
+	params->int_entry.source = HV_INTERRUPT_SOURCE_MSI;
+	params->int_entry.msi_entry.address.as_uint32 = int_desc->address & 0xffffffff;
+	params->int_entry.msi_entry.data.as_uint32 = int_desc->data;
+	params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+			   (hbus->hdev->dev_instance.b[4] << 16) |
+			   (hbus->hdev->dev_instance.b[7] << 8) |
+			   (hbus->hdev->dev_instance.b[6] & 0xf8) |
+			   PCI_FUNC(pdev->devfn);
+	params->int_target.vector = hv_msi_get_int_vector(data);
+
+	/*
+	 * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by
+	 * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
+	 * spurious interrupt storm. Not doing so does not seem to have a
+	 * negative effect (yet?).
+	 */
+
+	if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
+		/*
+		 * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
+		 * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
+		 * with >64 VP support.
+		 * ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED
+		 * is not sufficient for this hypercall.
+		 */
+		params->int_target.flags |=
+			HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
+
+		if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) {
+			res = 1;
+			goto out;
+		}
+
+		cpumask_and(tmp, dest, cpu_online_mask);
+		nr_bank = cpumask_to_vpset(&params->int_target.vp_set, tmp);
+		free_cpumask_var(tmp);
+
+		if (nr_bank <= 0) {
+			res = 1;
+			goto out;
+		}
+
+		/*
+		 * var-sized hypercall, var-size starts after vp_mask (thus
+		 * vp_set.format does not count, but vp_set.valid_bank_mask
+		 * does).
+		 */
+		var_size = 1 + nr_bank;
+	} else {
+		for_each_cpu_and(cpu, dest, cpu_online_mask) {
+			params->int_target.vp_mask |=
+				(1ULL << hv_cpu_number_to_vp_number(cpu));
+		}
+	}
+
+	res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
+			      params, NULL);
+
+out:
+	local_irq_restore(flags);
+
+	/*
+	 * During hibernation, when a CPU is offlined, the kernel tries
+	 * to move the interrupt to the remaining CPUs that haven't
+	 * been offlined yet. In this case, the below hv_do_hypercall()
+	 * always fails since the vmbus channel has been closed:
+	 * refer to cpu_disable_common() -> fixup_irqs() ->
+	 * irq_migrate_all_off_this_cpu() -> migrate_one_irq().
+	 *
+	 * Suppress the error message for hibernation because the failure
+	 * during hibernation does not matter (at this time all the devices
+	 * have been frozen). Note: the correct affinity info is still updated
+	 * into the irqdata data structure in migrate_one_irq() ->
+	 * irq_do_set_affinity(), so later when the VM resumes,
+	 * hv_pci_restore_msi_state() is able to correctly restore the
+	 * interrupt with the correct affinity.
+	 */
+	if (!hv_result_success(res) && hbus->state != hv_pcibus_removing)
+		dev_err(&hbus->hdev->device,
+			"%s() failed: %#llx", __func__, res);
+}
+#elif defined(CONFIG_ARM64)
+/*
+ * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit
+ * of room at the start to allow for SPIs to be specified through ACPI and
+ * starting with a power of two to satisfy power of 2 multi-MSI requirement.
+ */
+#define HV_PCI_MSI_SPI_START	64
+#define HV_PCI_MSI_SPI_NR	(1020 - HV_PCI_MSI_SPI_START)
+#define DELIVERY_MODE		0
+#define FLOW_HANDLER		NULL
+#define FLOW_NAME		NULL
+#define hv_msi_prepare		NULL
+
+struct hv_pci_chip_data {
+	DECLARE_BITMAP(spi_map, HV_PCI_MSI_SPI_NR);
+	struct mutex	map_lock;
+};
+
+/* Hyper-V vPCI MSI GIC IRQ domain */
+static struct irq_domain *hv_msi_gic_irq_domain;
+
+/* Hyper-V PCI MSI IRQ chip */
+static struct irq_chip hv_arm64_msi_irq_chip = {
+	.name = "MSI",
+	.irq_set_affinity = irq_chip_set_affinity_parent,
+	.irq_eoi = irq_chip_eoi_parent,
+	.irq_mask = irq_chip_mask_parent,
+	.irq_unmask = irq_chip_unmask_parent
+};
+
+static unsigned int hv_msi_get_int_vector(struct irq_data *irqd)
+{
+	return irqd->parent_data->hwirq;
+}
+
+/*
+ * @nr_bm_irqs:		Indicates the number of IRQs that were allocated from
+ *			the bitmap.
+ * @nr_dom_irqs:	Indicates the number of IRQs that were allocated from
+ *			the parent domain.
+ */
+static void hv_pci_vec_irq_free(struct irq_domain *domain,
+				unsigned int virq,
+				unsigned int nr_bm_irqs,
+				unsigned int nr_dom_irqs)
+{
+	struct hv_pci_chip_data *chip_data = domain->host_data;
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	int first = d->hwirq - HV_PCI_MSI_SPI_START;
+	int i;
+
+	mutex_lock(&chip_data->map_lock);
+	bitmap_release_region(chip_data->spi_map,
+			      first,
+			      get_count_order(nr_bm_irqs));
+	mutex_unlock(&chip_data->map_lock);
+	for (i = 0; i < nr_dom_irqs; i++) {
+		if (i)
+			d = irq_domain_get_irq_data(domain, virq + i);
+		irq_domain_reset_irq_data(d);
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_dom_irqs);
+}
+
+static void hv_pci_vec_irq_domain_free(struct irq_domain *domain,
+				       unsigned int virq,
+				       unsigned int nr_irqs)
+{
+	hv_pci_vec_irq_free(domain, virq, nr_irqs, nr_irqs);
+}
+
+static int hv_pci_vec_alloc_device_irq(struct irq_domain *domain,
+				       unsigned int nr_irqs,
+				       irq_hw_number_t *hwirq)
+{
+	struct hv_pci_chip_data *chip_data = domain->host_data;
+	int index;
+
+	/* Find and allocate region from the SPI bitmap */
+	mutex_lock(&chip_data->map_lock);
+	index = bitmap_find_free_region(chip_data->spi_map,
+					HV_PCI_MSI_SPI_NR,
+					get_count_order(nr_irqs));
+	mutex_unlock(&chip_data->map_lock);
+	if (index < 0)
+		return -ENOSPC;
+
+	*hwirq = index + HV_PCI_MSI_SPI_START;
+
+	return 0;
+}
+
+static int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain,
+					   unsigned int virq,
+					   irq_hw_number_t hwirq)
+{
+	struct irq_fwspec fwspec;
+	struct irq_data *d;
+	int ret;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 2;
+	fwspec.param[0] = hwirq;
+	fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret)
+		return ret;
+
+	/*
+	 * Since the interrupt specifier is not coming from ACPI or DT, the
+	 * trigger type will need to be set explicitly. Otherwise, it will be
+	 * set to whatever is in the GIC configuration.
+	 */
+	d = irq_domain_get_irq_data(domain->parent, virq);
+
+	return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+}
+
+static int hv_pci_vec_irq_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs,
+				       void *args)
+{
+	irq_hw_number_t hwirq;
+	unsigned int i;
+	int ret;
+
+	ret = hv_pci_vec_alloc_device_irq(domain, nr_irqs, &hwirq);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = hv_pci_vec_irq_gic_domain_alloc(domain, virq + i,
+						      hwirq + i);
+		if (ret) {
+			hv_pci_vec_irq_free(domain, virq, nr_irqs, i);
+			return ret;
+		}
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i,
+					      hwirq + i,
+					      &hv_arm64_msi_irq_chip,
+					      domain->host_data);
+		pr_debug("pID:%d vID:%u\n", (int)(hwirq + i), virq + i);
+	}
+
+	return 0;
+}
+
+/*
+ * Pick the first cpu as the irq affinity that can be temporarily used for
+ * composing MSI from the hypervisor. GIC will eventually set the right
+ * affinity for the irq and the 'unmask' will retarget the interrupt to that
+ * cpu.
+ */
+static int hv_pci_vec_irq_domain_activate(struct irq_domain *domain,
+					  struct irq_data *irqd, bool reserve)
+{
+	int cpu = cpumask_first(cpu_present_mask);
+
+	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+
+	return 0;
+}
+
+static const struct irq_domain_ops hv_pci_domain_ops = {
+	.alloc	= hv_pci_vec_irq_domain_alloc,
+	.free	= hv_pci_vec_irq_domain_free,
+	.activate = hv_pci_vec_irq_domain_activate,
+};
+
+static int hv_pci_irqchip_init(void)
+{
+	static struct hv_pci_chip_data *chip_data;
+	struct fwnode_handle *fn = NULL;
+	int ret = -ENOMEM;
+
+	chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
+	if (!chip_data)
+		return ret;
+
+	mutex_init(&chip_data->map_lock);
+	fn = irq_domain_alloc_named_fwnode("hv_vpci_arm64");
+	if (!fn)
+		goto free_chip;
+
+	/*
+	 * IRQ domain once enabled, should not be removed since there is no
+	 * way to ensure that all the corresponding devices are also gone and
+	 * no interrupts will be generated.
+	 */
+	hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0, HV_PCI_MSI_SPI_NR,
+							  fn, &hv_pci_domain_ops,
+							  chip_data);
+
+	if (!hv_msi_gic_irq_domain) {
+		pr_err("Failed to create Hyper-V arm64 vPCI MSI IRQ domain\n");
+		goto free_chip;
+	}
+
+	return 0;
+
+free_chip:
+	kfree(chip_data);
+	if (fn)
+		irq_domain_free_fwnode(fn);
+
+	return ret;
+}
+
+static struct irq_domain *hv_pci_get_root_domain(void)
+{
+	return hv_msi_gic_irq_domain;
+}
+
+/*
+ * SPIs are used for interrupts of PCI devices and SPIs is managed via GICD
+ * registers which Hyper-V already supports, so no hypercall needed.
+ */
+static void hv_arch_irq_unmask(struct irq_data *data) { }
+#endif /* CONFIG_ARM64 */
+
+/**
+ * hv_pci_generic_compl() - Invoked for a completion packet
+ * @context:		Set up by the sender of the packet.
+ * @resp:		The response packet
+ * @resp_packet_size:	Size in bytes of the packet
+ *
+ * This function is used to trigger an event and report status
+ * for any message for which the completion packet contains a
+ * status and nothing else.
+ */
+static void hv_pci_generic_compl(void *context, struct pci_response *resp,
+				 int resp_packet_size)
+{
+	struct hv_pci_compl *comp_pkt = context;
+
+	comp_pkt->completion_status = resp->status;
+	complete(&comp_pkt->host_event);
+}
+
+static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
+						u32 wslot);
+
+static void get_pcichild(struct hv_pci_dev *hpdev)
+{
+	refcount_inc(&hpdev->refs);
+}
+
+static void put_pcichild(struct hv_pci_dev *hpdev)
+{
+	if (refcount_dec_and_test(&hpdev->refs))
+		kfree(hpdev);
+}
+
+/*
+ * There is no good way to get notified from vmbus_onoffer_rescind(),
+ * so let's use polling here, since this is not a hot path.
+ */
+static int wait_for_response(struct hv_device *hdev,
+			     struct completion *comp)
+{
+	while (true) {
+		if (hdev->channel->rescind) {
+			dev_warn_once(&hdev->device, "The device is gone.\n");
+			return -ENODEV;
+		}
+
+		if (wait_for_completion_timeout(comp, HZ / 10))
+			break;
+	}
+
+	return 0;
+}
+
+/**
+ * devfn_to_wslot() - Convert from Linux PCI slot to Windows
+ * @devfn:	The Linux representation of PCI slot
+ *
+ * Windows uses a slightly different representation of PCI slot.
+ *
+ * Return: The Windows representation
+ */
+static u32 devfn_to_wslot(int devfn)
+{
+	union win_slot_encoding wslot;
+
+	wslot.slot = 0;
+	wslot.bits.dev = PCI_SLOT(devfn);
+	wslot.bits.func = PCI_FUNC(devfn);
+
+	return wslot.slot;
+}
+
+/**
+ * wslot_to_devfn() - Convert from Windows PCI slot to Linux
+ * @wslot:	The Windows representation of PCI slot
+ *
+ * Windows uses a slightly different representation of PCI slot.
+ *
+ * Return: The Linux representation
+ */
+static int wslot_to_devfn(u32 wslot)
+{
+	union win_slot_encoding slot_no;
+
+	slot_no.slot = wslot;
+	return PCI_DEVFN(slot_no.bits.dev, slot_no.bits.func);
+}
+
+static void hv_pci_read_mmio(struct device *dev, phys_addr_t gpa, int size, u32 *val)
+{
+	struct hv_mmio_read_input *in;
+	struct hv_mmio_read_output *out;
+	u64 ret;
+
+	/*
+	 * Must be called with interrupts disabled so it is safe
+	 * to use the per-cpu input argument page.  Use it for
+	 * both input and output.
+	 */
+	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	out = *this_cpu_ptr(hyperv_pcpu_input_arg) + sizeof(*in);
+	in->gpa = gpa;
+	in->size = size;
+
+	ret = hv_do_hypercall(HVCALL_MMIO_READ, in, out);
+	if (hv_result_success(ret)) {
+		switch (size) {
+		case 1:
+			*val = *(u8 *)(out->data);
+			break;
+		case 2:
+			*val = *(u16 *)(out->data);
+			break;
+		default:
+			*val = *(u32 *)(out->data);
+			break;
+		}
+	} else
+		dev_err(dev, "MMIO read hypercall error %llx addr %llx size %d\n",
+				ret, gpa, size);
+}
+
+static void hv_pci_write_mmio(struct device *dev, phys_addr_t gpa, int size, u32 val)
+{
+	struct hv_mmio_write_input *in;
+	u64 ret;
+
+	/*
+	 * Must be called with interrupts disabled so it is safe
+	 * to use the per-cpu input argument memory.
+	 */
+	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	in->gpa = gpa;
+	in->size = size;
+	switch (size) {
+	case 1:
+		*(u8 *)(in->data) = val;
+		break;
+	case 2:
+		*(u16 *)(in->data) = val;
+		break;
+	default:
+		*(u32 *)(in->data) = val;
+		break;
+	}
+
+	ret = hv_do_hypercall(HVCALL_MMIO_WRITE, in, NULL);
+	if (!hv_result_success(ret))
+		dev_err(dev, "MMIO write hypercall error %llx addr %llx size %d\n",
+				ret, gpa, size);
+}
+
+/*
+ * PCI Configuration Space for these root PCI buses is implemented as a pair
+ * of pages in memory-mapped I/O space.  Writing to the first page chooses
+ * the PCI function being written or read.  Once the first page has been
+ * written to, the following page maps in the entire configuration space of
+ * the function.
+ */
+
+/**
+ * _hv_pcifront_read_config() - Internal PCI config read
+ * @hpdev:	The PCI driver's representation of the device
+ * @where:	Offset within config space
+ * @size:	Size of the transfer
+ * @val:	Pointer to the buffer receiving the data
+ */
+static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
+				     int size, u32 *val)
+{
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct device *dev = &hbus->hdev->device;
+	int offset = where + CFG_PAGE_OFFSET;
+	unsigned long flags;
+
+	/*
+	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
+	 */
+	if (where + size <= PCI_COMMAND) {
+		memcpy(val, ((u8 *)&hpdev->desc.v_id) + where, size);
+	} else if (where >= PCI_CLASS_REVISION && where + size <=
+		   PCI_CACHE_LINE_SIZE) {
+		memcpy(val, ((u8 *)&hpdev->desc.rev) + where -
+		       PCI_CLASS_REVISION, size);
+	} else if (where >= PCI_SUBSYSTEM_VENDOR_ID && where + size <=
+		   PCI_ROM_ADDRESS) {
+		memcpy(val, (u8 *)&hpdev->desc.subsystem_id + where -
+		       PCI_SUBSYSTEM_VENDOR_ID, size);
+	} else if (where >= PCI_ROM_ADDRESS && where + size <=
+		   PCI_CAPABILITY_LIST) {
+		/* ROM BARs are unimplemented */
+		*val = 0;
+	} else if (where >= PCI_INTERRUPT_LINE && where + size <=
+		   PCI_INTERRUPT_PIN) {
+		/*
+		 * Interrupt Line and Interrupt PIN are hard-wired to zero
+		 * because this front-end only supports message-signaled
+		 * interrupts.
+		 */
+		*val = 0;
+	} else if (where + size <= CFG_PAGE_SIZE) {
+
+		spin_lock_irqsave(&hbus->config_lock, flags);
+		if (hbus->use_calls) {
+			phys_addr_t addr = hbus->mem_config->start + offset;
+
+			hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
+						hpdev->desc.win_slot.slot);
+			hv_pci_read_mmio(dev, addr, size, val);
+		} else {
+			void __iomem *addr = hbus->cfg_addr + offset;
+
+			/* Choose the function to be read. (See comment above) */
+			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
+			/* Make sure the function was chosen before reading. */
+			mb();
+			/* Read from that function's config space. */
+			switch (size) {
+			case 1:
+				*val = readb(addr);
+				break;
+			case 2:
+				*val = readw(addr);
+				break;
+			default:
+				*val = readl(addr);
+				break;
+			}
+			/*
+			 * Make sure the read was done before we release the
+			 * spinlock allowing consecutive reads/writes.
+			 */
+			mb();
+		}
+		spin_unlock_irqrestore(&hbus->config_lock, flags);
+	} else {
+		dev_err(dev, "Attempt to read beyond a function's config space.\n");
+	}
+}
+
+static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
+{
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct device *dev = &hbus->hdev->device;
+	u32 val;
+	u16 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hbus->config_lock, flags);
+
+	if (hbus->use_calls) {
+		phys_addr_t addr = hbus->mem_config->start +
+					 CFG_PAGE_OFFSET + PCI_VENDOR_ID;
+
+		hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
+					hpdev->desc.win_slot.slot);
+		hv_pci_read_mmio(dev, addr, 2, &val);
+		ret = val;  /* Truncates to 16 bits */
+	} else {
+		void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET +
+					     PCI_VENDOR_ID;
+		/* Choose the function to be read. (See comment above) */
+		writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
+		/* Make sure the function was chosen before we start reading. */
+		mb();
+		/* Read from that function's config space. */
+		ret = readw(addr);
+		/*
+		 * mb() is not required here, because the
+		 * spin_unlock_irqrestore() is a barrier.
+		 */
+	}
+
+	spin_unlock_irqrestore(&hbus->config_lock, flags);
+
+	return ret;
+}
+
+/**
+ * _hv_pcifront_write_config() - Internal PCI config write
+ * @hpdev:	The PCI driver's representation of the device
+ * @where:	Offset within config space
+ * @size:	Size of the transfer
+ * @val:	The data being transferred
+ */
+static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
+				      int size, u32 val)
+{
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct device *dev = &hbus->hdev->device;
+	int offset = where + CFG_PAGE_OFFSET;
+	unsigned long flags;
+
+	if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
+	    where + size <= PCI_CAPABILITY_LIST) {
+		/* SSIDs and ROM BARs are read-only */
+	} else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
+		spin_lock_irqsave(&hbus->config_lock, flags);
+
+		if (hbus->use_calls) {
+			phys_addr_t addr = hbus->mem_config->start + offset;
+
+			hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
+						hpdev->desc.win_slot.slot);
+			hv_pci_write_mmio(dev, addr, size, val);
+		} else {
+			void __iomem *addr = hbus->cfg_addr + offset;
+
+			/* Choose the function to write. (See comment above) */
+			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
+			/* Make sure the function was chosen before writing. */
+			wmb();
+			/* Write to that function's config space. */
+			switch (size) {
+			case 1:
+				writeb(val, addr);
+				break;
+			case 2:
+				writew(val, addr);
+				break;
+			default:
+				writel(val, addr);
+				break;
+			}
+			/*
+			 * Make sure the write was done before we release the
+			 * spinlock allowing consecutive reads/writes.
+			 */
+			mb();
+		}
+		spin_unlock_irqrestore(&hbus->config_lock, flags);
+	} else {
+		dev_err(dev, "Attempt to write beyond a function's config space.\n");
+	}
+}
+
+/**
+ * hv_pcifront_read_config() - Read configuration space
+ * @bus: PCI Bus structure
+ * @devfn: Device/function
+ * @where: Offset from base
+ * @size: Byte/word/dword
+ * @val: Value to be read
+ *
+ * Return: PCIBIOS_SUCCESSFUL on success
+ *	   PCIBIOS_DEVICE_NOT_FOUND on failure
+ */
+static int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 *val)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
+	struct hv_pci_dev *hpdev;
+
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
+	if (!hpdev)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	_hv_pcifront_read_config(hpdev, where, size, val);
+
+	put_pcichild(hpdev);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/**
+ * hv_pcifront_write_config() - Write configuration space
+ * @bus: PCI Bus structure
+ * @devfn: Device/function
+ * @where: Offset from base
+ * @size: Byte/word/dword
+ * @val: Value to be written to device
+ *
+ * Return: PCIBIOS_SUCCESSFUL on success
+ *	   PCIBIOS_DEVICE_NOT_FOUND on failure
+ */
+static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 val)
+{
+	struct hv_pcibus_device *hbus =
+	    container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
+	struct hv_pci_dev *hpdev;
+
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
+	if (!hpdev)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	_hv_pcifront_write_config(hpdev, where, size, val);
+
+	put_pcichild(hpdev);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/* PCIe operations */
+static struct pci_ops hv_pcifront_ops = {
+	.read  = hv_pcifront_read_config,
+	.write = hv_pcifront_write_config,
+};
+
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver.  These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional.  Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver.  The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks.  This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+	struct hv_pci_compl comp_pkt;
+	void *buf;
+	unsigned int len;
+	unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context:		Identifies the read config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+				     int resp_packet_size)
+{
+	struct hv_read_config_compl *comp = context;
+	struct pci_read_block_response *read_resp =
+		(struct pci_read_block_response *)resp;
+	unsigned int data_len, hdr_len;
+
+	hdr_len = offsetof(struct pci_read_block_response, bytes);
+	if (resp_packet_size < hdr_len) {
+		comp->comp_pkt.completion_status = -1;
+		goto out;
+	}
+
+	data_len = resp_packet_size - hdr_len;
+	if (data_len > 0 && read_resp->status == 0) {
+		comp->bytes_returned = min(comp->len, data_len);
+		memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+	} else {
+		comp->bytes_returned = 0;
+	}
+
+	comp->comp_pkt.completion_status = read_resp->status;
+out:
+	complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer into which the config block will be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which has been requested.
+ * @bytes_returned:	Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_read_config_block(struct pci_dev *pdev, void *buf,
+				unsigned int len, unsigned int block_id,
+				unsigned int *bytes_returned)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_read_block)];
+	} pkt;
+	struct hv_read_config_compl comp_pkt;
+	struct pci_read_block *read_blk;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.comp_pkt.host_event);
+	comp_pkt.buf = buf;
+	comp_pkt.len = len;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_read_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	read_blk = (struct pci_read_block *)&pkt.pkt.message;
+	read_blk->message_type.type = PCI_READ_BLOCK;
+	read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	read_blk->block_id = block_id;
+	read_blk->bytes_requested = len;
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+			       sizeof(*read_blk), (unsigned long)&pkt.pkt,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.comp_pkt.completion_status != 0 ||
+	    comp_pkt.bytes_returned == 0) {
+		dev_err(&hbus->hdev->device,
+			"Read Config Block failed: 0x%x, bytes_returned=%d\n",
+			comp_pkt.comp_pkt.completion_status,
+			comp_pkt.bytes_returned);
+		return -EIO;
+	}
+
+	*bytes_returned = comp_pkt.bytes_returned;
+	return 0;
+}
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context:		Identifies the write config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+				      int resp_packet_size)
+{
+	struct hv_pci_compl *comp_pkt = context;
+
+	comp_pkt->completion_status = resp->status;
+	complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer from which the config block will	be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_write_config_block(struct pci_dev *pdev, void *buf,
+				unsigned int len, unsigned int block_id)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_write_block)];
+		u32 reserved;
+	} pkt;
+	struct hv_pci_compl comp_pkt;
+	struct pci_write_block *write_blk;
+	u32 pkt_size;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.host_event);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_write_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	write_blk = (struct pci_write_block *)&pkt.pkt.message;
+	write_blk->message_type.type = PCI_WRITE_BLOCK;
+	write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	write_blk->block_id = block_id;
+	write_blk->byte_count = len;
+	memcpy(write_blk->bytes, buf, len);
+	pkt_size = offsetof(struct pci_write_block, bytes) + len;
+	/*
+	 * This quirk is required on some hosts shipped around 2018, because
+	 * these hosts don't check the pkt_size correctly (new hosts have been
+	 * fixed since early 2019). The quirk is also safe on very old hosts
+	 * and new hosts, because, on them, what really matters is the length
+	 * specified in write_blk->byte_count.
+	 */
+	pkt_size += sizeof(pkt.reserved);
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+			       (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.completion_status != 0) {
+		dev_err(&hbus->hdev->device,
+			"Write Config Block failed: 0x%x\n",
+			comp_pkt.completion_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev:		The PCI driver's representation for this device.
+ * @context:		Identifies the device.
+ * @block_invalidate:	Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+					void (*block_invalidate)(void *context,
+								 u64 block_mask))
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct hv_pci_dev *hpdev;
+
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+	if (!hpdev)
+		return -ENODEV;
+
+	hpdev->block_invalidate = block_invalidate;
+	hpdev->invalidate_context = context;
+
+	put_pcichild(hpdev);
+	return 0;
+
+}
+
+/* Interrupt management hooks */
+static void hv_int_desc_free(struct hv_pci_dev *hpdev,
+			     struct tran_int_desc *int_desc)
+{
+	struct pci_delete_interrupt *int_pkt;
+	struct {
+		struct pci_packet pkt;
+		u8 buffer[sizeof(struct pci_delete_interrupt)];
+	} ctxt;
+
+	if (!int_desc->vector_count) {
+		kfree(int_desc);
+		return;
+	}
+	memset(&ctxt, 0, sizeof(ctxt));
+	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
+	int_pkt->message_type.type =
+		PCI_DELETE_INTERRUPT_MESSAGE;
+	int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
+	int_pkt->int_desc = *int_desc;
+	vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, sizeof(*int_pkt),
+			 0, VM_PKT_DATA_INBAND, 0);
+	kfree(int_desc);
+}
+
+/**
+ * hv_msi_free() - Free the MSI.
+ * @domain:	The interrupt domain pointer
+ * @info:	Extra MSI-related context
+ * @irq:	Identifies the IRQ.
+ *
+ * The Hyper-V parent partition and hypervisor are tracking the
+ * messages that are in use, keeping the interrupt redirection
+ * table up to date.  This callback sends a message that frees
+ * the IRT entry and related tracking nonsense.
+ */
+static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
+			unsigned int irq)
+{
+	struct hv_pcibus_device *hbus;
+	struct hv_pci_dev *hpdev;
+	struct pci_dev *pdev;
+	struct tran_int_desc *int_desc;
+	struct irq_data *irq_data = irq_domain_get_irq_data(domain, irq);
+	struct msi_desc *msi = irq_data_get_msi_desc(irq_data);
+
+	pdev = msi_desc_to_pci_dev(msi);
+	hbus = info->data;
+	int_desc = irq_data_get_irq_chip_data(irq_data);
+	if (!int_desc)
+		return;
+
+	irq_data->chip_data = NULL;
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+	if (!hpdev) {
+		kfree(int_desc);
+		return;
+	}
+
+	hv_int_desc_free(hpdev, int_desc);
+	put_pcichild(hpdev);
+}
+
+static void hv_irq_mask(struct irq_data *data)
+{
+	pci_msi_mask_irq(data);
+	if (data->parent_data->chip->irq_mask)
+		irq_chip_mask_parent(data);
+}
+
+static void hv_irq_unmask(struct irq_data *data)
+{
+	hv_arch_irq_unmask(data);
+
+	if (data->parent_data->chip->irq_unmask)
+		irq_chip_unmask_parent(data);
+	pci_msi_unmask_irq(data);
+}
+
+struct compose_comp_ctxt {
+	struct hv_pci_compl comp_pkt;
+	struct tran_int_desc int_desc;
+};
+
+static void hv_pci_compose_compl(void *context, struct pci_response *resp,
+				 int resp_packet_size)
+{
+	struct compose_comp_ctxt *comp_pkt = context;
+	struct pci_create_int_response *int_resp =
+		(struct pci_create_int_response *)resp;
+
+	if (resp_packet_size < sizeof(*int_resp)) {
+		comp_pkt->comp_pkt.completion_status = -1;
+		goto out;
+	}
+	comp_pkt->comp_pkt.completion_status = resp->status;
+	comp_pkt->int_desc = int_resp->int_desc;
+out:
+	complete(&comp_pkt->comp_pkt.host_event);
+}
+
+static u32 hv_compose_msi_req_v1(
+	struct pci_create_interrupt *int_pkt,
+	u32 slot, u8 vector, u16 vector_count)
+{
+	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
+	int_pkt->wslot.slot = slot;
+	int_pkt->int_desc.vector = vector;
+	int_pkt->int_desc.vector_count = vector_count;
+	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
+
+	/*
+	 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
+	 * hv_irq_unmask().
+	 */
+	int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL;
+
+	return sizeof(*int_pkt);
+}
+
+/*
+ * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
+ * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
+ * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
+ * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
+ * not irrelevant because Hyper-V chooses the physical CPU to handle the
+ * interrupts based on the vCPU specified in message sent to the vPCI VSP in
+ * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
+ * but assigning too many vPCI device interrupts to the same pCPU can cause a
+ * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
+ * to spread out the pCPUs that it selects.
+ *
+ * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
+ * to always return the same dummy vCPU, because a second call to
+ * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
+ * new pCPU for the interrupt. But for the multi-MSI case, the second call to
+ * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
+ * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
+ * the pCPUs are spread out. All interrupts for a multi-MSI device end up using
+ * the same pCPU, even though the vCPUs will be spread out by later calls
+ * to hv_irq_unmask(), but that is the best we can do now.
+ *
+ * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
+ * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
+ * enhancement is planned for a future version. With that enhancement, the
+ * dummy vCPU selection won't matter, and interrupts for the same multi-MSI
+ * device will be spread across multiple pCPUs.
+ */
+
+/*
+ * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
+ * by subsequent retarget in hv_irq_unmask().
+ */
+static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
+{
+	return cpumask_first_and(affinity, cpu_online_mask);
+}
+
+/*
+ * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
+ */
+static int hv_compose_multi_msi_req_get_cpu(void)
+{
+	static DEFINE_SPINLOCK(multi_msi_cpu_lock);
+
+	/* -1 means starting with CPU 0 */
+	static int cpu_next = -1;
+
+	unsigned long flags;
+	int cpu;
+
+	spin_lock_irqsave(&multi_msi_cpu_lock, flags);
+
+	cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
+				     false);
+	cpu = cpu_next;
+
+	spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
+
+	return cpu;
+}
+
+static u32 hv_compose_msi_req_v2(
+	struct pci_create_interrupt2 *int_pkt, int cpu,
+	u32 slot, u8 vector, u16 vector_count)
+{
+	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
+	int_pkt->wslot.slot = slot;
+	int_pkt->int_desc.vector = vector;
+	int_pkt->int_desc.vector_count = vector_count;
+	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
+	int_pkt->int_desc.processor_array[0] =
+		hv_cpu_number_to_vp_number(cpu);
+	int_pkt->int_desc.processor_count = 1;
+
+	return sizeof(*int_pkt);
+}
+
+static u32 hv_compose_msi_req_v3(
+	struct pci_create_interrupt3 *int_pkt, int cpu,
+	u32 slot, u32 vector, u16 vector_count)
+{
+	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
+	int_pkt->wslot.slot = slot;
+	int_pkt->int_desc.vector = vector;
+	int_pkt->int_desc.reserved = 0;
+	int_pkt->int_desc.vector_count = vector_count;
+	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
+	int_pkt->int_desc.processor_array[0] =
+		hv_cpu_number_to_vp_number(cpu);
+	int_pkt->int_desc.processor_count = 1;
+
+	return sizeof(*int_pkt);
+}
+
+/**
+ * hv_compose_msi_msg() - Supplies a valid MSI address/data
+ * @data:	Everything about this MSI
+ * @msg:	Buffer that is filled in by this function
+ *
+ * This function unpacks the IRQ looking for target CPU set, IDT
+ * vector and mode and sends a message to the parent partition
+ * asking for a mapping for that tuple in this partition.  The
+ * response supplies a data value and address to which that data
+ * should be written to trigger that interrupt.
+ */
+static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct hv_pcibus_device *hbus;
+	struct vmbus_channel *channel;
+	struct hv_pci_dev *hpdev;
+	struct pci_bus *pbus;
+	struct pci_dev *pdev;
+	const struct cpumask *dest;
+	struct compose_comp_ctxt comp;
+	struct tran_int_desc *int_desc;
+	struct msi_desc *msi_desc;
+	/*
+	 * vector_count should be u16: see hv_msi_desc, hv_msi_desc2
+	 * and hv_msi_desc3. vector must be u32: see hv_msi_desc3.
+	 */
+	u16 vector_count;
+	u32 vector;
+	struct {
+		struct pci_packet pci_pkt;
+		union {
+			struct pci_create_interrupt v1;
+			struct pci_create_interrupt2 v2;
+			struct pci_create_interrupt3 v3;
+		} int_pkts;
+	} __packed ctxt;
+	bool multi_msi;
+	u64 trans_id;
+	u32 size;
+	int ret;
+	int cpu;
+
+	msi_desc  = irq_data_get_msi_desc(data);
+	multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
+		    msi_desc->nvec_used > 1;
+
+	/* Reuse the previous allocation */
+	if (data->chip_data && multi_msi) {
+		int_desc = data->chip_data;
+		msg->address_hi = int_desc->address >> 32;
+		msg->address_lo = int_desc->address & 0xffffffff;
+		msg->data = int_desc->data;
+		return;
+	}
+
+	pdev = msi_desc_to_pci_dev(msi_desc);
+	dest = irq_data_get_effective_affinity_mask(data);
+	pbus = pdev->bus;
+	hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
+	channel = hbus->hdev->channel;
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+	if (!hpdev)
+		goto return_null_message;
+
+	/* Free any previous message that might have already been composed. */
+	if (data->chip_data && !multi_msi) {
+		int_desc = data->chip_data;
+		data->chip_data = NULL;
+		hv_int_desc_free(hpdev, int_desc);
+	}
+
+	int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
+	if (!int_desc)
+		goto drop_reference;
+
+	if (multi_msi) {
+		/*
+		 * If this is not the first MSI of Multi MSI, we already have
+		 * a mapping.  Can exit early.
+		 */
+		if (msi_desc->irq != data->irq) {
+			data->chip_data = int_desc;
+			int_desc->address = msi_desc->msg.address_lo |
+					    (u64)msi_desc->msg.address_hi << 32;
+			int_desc->data = msi_desc->msg.data +
+					 (data->irq - msi_desc->irq);
+			msg->address_hi = msi_desc->msg.address_hi;
+			msg->address_lo = msi_desc->msg.address_lo;
+			msg->data = int_desc->data;
+			put_pcichild(hpdev);
+			return;
+		}
+		/*
+		 * The vector we select here is a dummy value.  The correct
+		 * value gets sent to the hypervisor in unmask().  This needs
+		 * to be aligned with the count, and also not zero.  Multi-msi
+		 * is powers of 2 up to 32, so 32 will always work here.
+		 */
+		vector = 32;
+		vector_count = msi_desc->nvec_used;
+		cpu = hv_compose_multi_msi_req_get_cpu();
+	} else {
+		vector = hv_msi_get_int_vector(data);
+		vector_count = 1;
+		cpu = hv_compose_msi_req_get_cpu(dest);
+	}
+
+	/*
+	 * hv_compose_msi_req_v1 and v2 are for x86 only, meaning 'vector'
+	 * can't exceed u8. Cast 'vector' down to u8 for v1/v2 explicitly
+	 * for better readability.
+	 */
+	memset(&ctxt, 0, sizeof(ctxt));
+	init_completion(&comp.comp_pkt.host_event);
+	ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
+	ctxt.pci_pkt.compl_ctxt = &comp;
+
+	switch (hbus->protocol_version) {
+	case PCI_PROTOCOL_VERSION_1_1:
+		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
+					hpdev->desc.win_slot.slot,
+					(u8)vector,
+					vector_count);
+		break;
+
+	case PCI_PROTOCOL_VERSION_1_2:
+	case PCI_PROTOCOL_VERSION_1_3:
+		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
+					cpu,
+					hpdev->desc.win_slot.slot,
+					(u8)vector,
+					vector_count);
+		break;
+
+	case PCI_PROTOCOL_VERSION_1_4:
+		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
+					cpu,
+					hpdev->desc.win_slot.slot,
+					vector,
+					vector_count);
+		break;
+
+	default:
+		/* As we only negotiate protocol versions known to this driver,
+		 * this path should never hit. However, this is it not a hot
+		 * path so we print a message to aid future updates.
+		 */
+		dev_err(&hbus->hdev->device,
+			"Unexpected vPCI protocol, update driver.");
+		goto free_int_desc;
+	}
+
+	ret = vmbus_sendpacket_getid(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
+				     size, (unsigned long)&ctxt.pci_pkt,
+				     &trans_id, VM_PKT_DATA_INBAND,
+				     VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret) {
+		dev_err(&hbus->hdev->device,
+			"Sending request for interrupt failed: 0x%x",
+			comp.comp_pkt.completion_status);
+		goto free_int_desc;
+	}
+
+	/*
+	 * Prevents hv_pci_onchannelcallback() from running concurrently
+	 * in the tasklet.
+	 */
+	tasklet_disable_in_atomic(&channel->callback_event);
+
+	/*
+	 * Since this function is called with IRQ locks held, can't
+	 * do normal wait for completion; instead poll.
+	 */
+	while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
+		unsigned long flags;
+
+		/* 0xFFFF means an invalid PCI VENDOR ID. */
+		if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
+			dev_err_once(&hbus->hdev->device,
+				     "the device has gone\n");
+			goto enable_tasklet;
+		}
+
+		/*
+		 * Make sure that the ring buffer data structure doesn't get
+		 * freed while we dereference the ring buffer pointer.  Test
+		 * for the channel's onchannel_callback being NULL within a
+		 * sched_lock critical section.  See also the inline comments
+		 * in vmbus_reset_channel_cb().
+		 */
+		spin_lock_irqsave(&channel->sched_lock, flags);
+		if (unlikely(channel->onchannel_callback == NULL)) {
+			spin_unlock_irqrestore(&channel->sched_lock, flags);
+			goto enable_tasklet;
+		}
+		hv_pci_onchannelcallback(hbus);
+		spin_unlock_irqrestore(&channel->sched_lock, flags);
+
+		udelay(100);
+	}
+
+	tasklet_enable(&channel->callback_event);
+
+	if (comp.comp_pkt.completion_status < 0) {
+		dev_err(&hbus->hdev->device,
+			"Request for interrupt failed: 0x%x",
+			comp.comp_pkt.completion_status);
+		goto free_int_desc;
+	}
+
+	/*
+	 * Record the assignment so that this can be unwound later. Using
+	 * irq_set_chip_data() here would be appropriate, but the lock it takes
+	 * is already held.
+	 */
+	*int_desc = comp.int_desc;
+	data->chip_data = int_desc;
+
+	/* Pass up the result. */
+	msg->address_hi = comp.int_desc.address >> 32;
+	msg->address_lo = comp.int_desc.address & 0xffffffff;
+	msg->data = comp.int_desc.data;
+
+	put_pcichild(hpdev);
+	return;
+
+enable_tasklet:
+	tasklet_enable(&channel->callback_event);
+	/*
+	 * The completion packet on the stack becomes invalid after 'return';
+	 * remove the ID from the VMbus requestor if the identifier is still
+	 * mapped to/associated with the packet.  (The identifier could have
+	 * been 're-used', i.e., already removed and (re-)mapped.)
+	 *
+	 * Cf. hv_pci_onchannelcallback().
+	 */
+	vmbus_request_addr_match(channel, trans_id, (unsigned long)&ctxt.pci_pkt);
+free_int_desc:
+	kfree(int_desc);
+drop_reference:
+	put_pcichild(hpdev);
+return_null_message:
+	msg->address_hi = 0;
+	msg->address_lo = 0;
+	msg->data = 0;
+}
+
+/* HW Interrupt Chip Descriptor */
+static struct irq_chip hv_msi_irq_chip = {
+	.name			= "Hyper-V PCIe MSI",
+	.irq_compose_msi_msg	= hv_compose_msi_msg,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#ifdef CONFIG_X86
+	.irq_ack		= irq_chip_ack_parent,
+#elif defined(CONFIG_ARM64)
+	.irq_eoi		= irq_chip_eoi_parent,
+#endif
+	.irq_mask		= hv_irq_mask,
+	.irq_unmask		= hv_irq_unmask,
+};
+
+static struct msi_domain_ops hv_msi_ops = {
+	.msi_prepare	= hv_msi_prepare,
+	.msi_free	= hv_msi_free,
+};
+
+/**
+ * hv_pcie_init_irq_domain() - Initialize IRQ domain
+ * @hbus:	The root PCI bus
+ *
+ * This function creates an IRQ domain which will be used for
+ * interrupts from devices that have been passed through.  These
+ * devices only support MSI and MSI-X, not line-based interrupts
+ * or simulations of line-based interrupts through PCIe's
+ * fabric-layer messages.  Because interrupts are remapped, we
+ * can support multi-message MSI here.
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
+{
+	hbus->msi_info.chip = &hv_msi_irq_chip;
+	hbus->msi_info.ops = &hv_msi_ops;
+	hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
+		MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
+		MSI_FLAG_PCI_MSIX);
+	hbus->msi_info.handler = FLOW_HANDLER;
+	hbus->msi_info.handler_name = FLOW_NAME;
+	hbus->msi_info.data = hbus;
+	hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
+						     &hbus->msi_info,
+						     hv_pci_get_root_domain());
+	if (!hbus->irq_domain) {
+		dev_err(&hbus->hdev->device,
+			"Failed to build an MSI IRQ domain\n");
+		return -ENODEV;
+	}
+
+	dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain);
+
+	return 0;
+}
+
+/**
+ * get_bar_size() - Get the address space consumed by a BAR
+ * @bar_val:	Value that a BAR returned after -1 was written
+ *              to it.
+ *
+ * This function returns the size of the BAR, rounded up to 1
+ * page.  It has to be rounded up because the hypervisor's page
+ * table entry that maps the BAR into the VM can't specify an
+ * offset within a page.  The invariant is that the hypervisor
+ * must place any BARs of smaller than page length at the
+ * beginning of a page.
+ *
+ * Return:	Size in bytes of the consumed MMIO space.
+ */
+static u64 get_bar_size(u64 bar_val)
+{
+	return round_up((1 + ~(bar_val & PCI_BASE_ADDRESS_MEM_MASK)),
+			PAGE_SIZE);
+}
+
+/**
+ * survey_child_resources() - Total all MMIO requirements
+ * @hbus:	Root PCI bus, as understood by this driver
+ */
+static void survey_child_resources(struct hv_pcibus_device *hbus)
+{
+	struct hv_pci_dev *hpdev;
+	resource_size_t bar_size = 0;
+	unsigned long flags;
+	struct completion *event;
+	u64 bar_val;
+	int i;
+
+	/* If nobody is waiting on the answer, don't compute it. */
+	event = xchg(&hbus->survey_event, NULL);
+	if (!event)
+		return;
+
+	/* If the answer has already been computed, go with it. */
+	if (hbus->low_mmio_space || hbus->high_mmio_space) {
+		complete(event);
+		return;
+	}
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+
+	/*
+	 * Due to an interesting quirk of the PCI spec, all memory regions
+	 * for a child device are a power of 2 in size and aligned in memory,
+	 * so it's sufficient to just add them up without tracking alignment.
+	 */
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+			if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO)
+				dev_err(&hbus->hdev->device,
+					"There's an I/O BAR in this list!\n");
+
+			if (hpdev->probed_bar[i] != 0) {
+				/*
+				 * A probed BAR has all the upper bits set that
+				 * can be changed.
+				 */
+
+				bar_val = hpdev->probed_bar[i];
+				if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+					bar_val |=
+					((u64)hpdev->probed_bar[++i] << 32);
+				else
+					bar_val |= 0xffffffff00000000ULL;
+
+				bar_size = get_bar_size(bar_val);
+
+				if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+					hbus->high_mmio_space += bar_size;
+				else
+					hbus->low_mmio_space += bar_size;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+	complete(event);
+}
+
+/**
+ * prepopulate_bars() - Fill in BARs with defaults
+ * @hbus:	Root PCI bus, as understood by this driver
+ *
+ * The core PCI driver code seems much, much happier if the BARs
+ * for a device have values upon first scan. So fill them in.
+ * The algorithm below works down from large sizes to small,
+ * attempting to pack the assignments optimally. The assumption,
+ * enforced in other parts of the code, is that the beginning of
+ * the memory-mapped I/O space will be aligned on the largest
+ * BAR size.
+ */
+static void prepopulate_bars(struct hv_pcibus_device *hbus)
+{
+	resource_size_t high_size = 0;
+	resource_size_t low_size = 0;
+	resource_size_t high_base = 0;
+	resource_size_t low_base = 0;
+	resource_size_t bar_size;
+	struct hv_pci_dev *hpdev;
+	unsigned long flags;
+	u64 bar_val;
+	u32 command;
+	bool high;
+	int i;
+
+	if (hbus->low_mmio_space) {
+		low_size = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
+		low_base = hbus->low_mmio_res->start;
+	}
+
+	if (hbus->high_mmio_space) {
+		high_size = 1ULL <<
+			(63 - __builtin_clzll(hbus->high_mmio_space));
+		high_base = hbus->high_mmio_res->start;
+	}
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+
+	/*
+	 * Clear the memory enable bit, in case it's already set. This occurs
+	 * in the suspend path of hibernation, where the device is suspended,
+	 * resumed and suspended again: see hibernation_snapshot() and
+	 * hibernation_platform_enter().
+	 *
+	 * If the memory enable bit is already set, Hyper-V silently ignores
+	 * the below BAR updates, and the related PCI device driver can not
+	 * work, because reading from the device register(s) always returns
+	 * 0xFFFFFFFF (PCI_ERROR_RESPONSE).
+	 */
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
+		command &= ~PCI_COMMAND_MEMORY;
+		_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
+	}
+
+	/* Pick addresses for the BARs. */
+	do {
+		list_for_each_entry(hpdev, &hbus->children, list_entry) {
+			for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+				bar_val = hpdev->probed_bar[i];
+				if (bar_val == 0)
+					continue;
+				high = bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64;
+				if (high) {
+					bar_val |=
+						((u64)hpdev->probed_bar[i + 1]
+						 << 32);
+				} else {
+					bar_val |= 0xffffffffULL << 32;
+				}
+				bar_size = get_bar_size(bar_val);
+				if (high) {
+					if (high_size != bar_size) {
+						i++;
+						continue;
+					}
+					_hv_pcifront_write_config(hpdev,
+						PCI_BASE_ADDRESS_0 + (4 * i),
+						4,
+						(u32)(high_base & 0xffffff00));
+					i++;
+					_hv_pcifront_write_config(hpdev,
+						PCI_BASE_ADDRESS_0 + (4 * i),
+						4, (u32)(high_base >> 32));
+					high_base += bar_size;
+				} else {
+					if (low_size != bar_size)
+						continue;
+					_hv_pcifront_write_config(hpdev,
+						PCI_BASE_ADDRESS_0 + (4 * i),
+						4,
+						(u32)(low_base & 0xffffff00));
+					low_base += bar_size;
+				}
+			}
+			if (high_size <= 1 && low_size <= 1) {
+				/*
+				 * No need to set the PCI_COMMAND_MEMORY bit as
+				 * the core PCI driver doesn't require the bit
+				 * to be pre-set. Actually here we intentionally
+				 * keep the bit off so that the PCI BAR probing
+				 * in the core PCI driver doesn't cause Hyper-V
+				 * to unnecessarily unmap/map the virtual BARs
+				 * from/to the physical BARs multiple times.
+				 * This reduces the VM boot time significantly
+				 * if the BAR sizes are huge.
+				 */
+				break;
+			}
+		}
+
+		high_size >>= 1;
+		low_size >>= 1;
+	}  while (high_size || low_size);
+
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+}
+
+/*
+ * Assign entries in sysfs pci slot directory.
+ *
+ * Note that this function does not need to lock the children list
+ * because it is called from pci_devices_present_work which
+ * is serialized with hv_eject_device_work because they are on the
+ * same ordered workqueue. Therefore hbus->children list will not change
+ * even when pci_create_slot sleeps.
+ */
+static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
+{
+	struct hv_pci_dev *hpdev;
+	char name[SLOT_NAME_SIZE];
+	int slot_nr;
+
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		if (hpdev->pci_slot)
+			continue;
+
+		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
+		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
+		hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr,
+					  name, NULL);
+		if (IS_ERR(hpdev->pci_slot)) {
+			pr_warn("pci_create slot %s failed\n", name);
+			hpdev->pci_slot = NULL;
+		}
+	}
+}
+
+/*
+ * Remove entries in sysfs pci slot directory.
+ */
+static void hv_pci_remove_slots(struct hv_pcibus_device *hbus)
+{
+	struct hv_pci_dev *hpdev;
+
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		if (!hpdev->pci_slot)
+			continue;
+		pci_destroy_slot(hpdev->pci_slot);
+		hpdev->pci_slot = NULL;
+	}
+}
+
+/*
+ * Set NUMA node for the devices on the bus
+ */
+static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
+{
+	struct pci_dev *dev;
+	struct pci_bus *bus = hbus->bridge->bus;
+	struct hv_pci_dev *hv_dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev->devfn));
+		if (!hv_dev)
+			continue;
+
+		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY &&
+		    hv_dev->desc.virtual_numa_node < num_possible_nodes())
+			/*
+			 * The kernel may boot with some NUMA nodes offline
+			 * (e.g. in a KDUMP kernel) or with NUMA disabled via
+			 * "numa=off". In those cases, adjust the host provided
+			 * NUMA node to a valid NUMA node used by the kernel.
+			 */
+			set_dev_node(&dev->dev,
+				     numa_map_to_online_node(
+					     hv_dev->desc.virtual_numa_node));
+
+		put_pcichild(hv_dev);
+	}
+}
+
+/**
+ * create_root_hv_pci_bus() - Expose a new root PCI bus
+ * @hbus:	Root PCI bus, as understood by this driver
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
+{
+	int error;
+	struct pci_host_bridge *bridge = hbus->bridge;
+
+	bridge->dev.parent = &hbus->hdev->device;
+	bridge->sysdata = &hbus->sysdata;
+	bridge->ops = &hv_pcifront_ops;
+
+	error = pci_scan_root_bus_bridge(bridge);
+	if (error)
+		return error;
+
+	pci_lock_rescan_remove();
+	hv_pci_assign_numa_node(hbus);
+	pci_bus_assign_resources(bridge->bus);
+	hv_pci_assign_slots(hbus);
+	pci_bus_add_devices(bridge->bus);
+	pci_unlock_rescan_remove();
+	hbus->state = hv_pcibus_installed;
+	return 0;
+}
+
+struct q_res_req_compl {
+	struct completion host_event;
+	struct hv_pci_dev *hpdev;
+};
+
+/**
+ * q_resource_requirements() - Query Resource Requirements
+ * @context:		The completion context.
+ * @resp:		The response that came from the host.
+ * @resp_packet_size:	The size in bytes of resp.
+ *
+ * This function is invoked on completion of a Query Resource
+ * Requirements packet.
+ */
+static void q_resource_requirements(void *context, struct pci_response *resp,
+				    int resp_packet_size)
+{
+	struct q_res_req_compl *completion = context;
+	struct pci_q_res_req_response *q_res_req =
+		(struct pci_q_res_req_response *)resp;
+	s32 status;
+	int i;
+
+	status = (resp_packet_size < sizeof(*q_res_req)) ? -1 : resp->status;
+	if (status < 0) {
+		dev_err(&completion->hpdev->hbus->hdev->device,
+			"query resource requirements failed: %x\n",
+			status);
+	} else {
+		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+			completion->hpdev->probed_bar[i] =
+				q_res_req->probed_bar[i];
+		}
+	}
+
+	complete(&completion->host_event);
+}
+
+/**
+ * new_pcichild_device() - Create a new child device
+ * @hbus:	The internal struct tracking this root PCI bus.
+ * @desc:	The information supplied so far from the host
+ *              about the device.
+ *
+ * This function creates the tracking structure for a new child
+ * device and kicks off the process of figuring out what it is.
+ *
+ * Return: Pointer to the new tracking struct
+ */
+static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
+		struct hv_pcidev_description *desc)
+{
+	struct hv_pci_dev *hpdev;
+	struct pci_child_message *res_req;
+	struct q_res_req_compl comp_pkt;
+	struct {
+		struct pci_packet init_packet;
+		u8 buffer[sizeof(struct pci_child_message)];
+	} pkt;
+	unsigned long flags;
+	int ret;
+
+	hpdev = kzalloc(sizeof(*hpdev), GFP_KERNEL);
+	if (!hpdev)
+		return NULL;
+
+	hpdev->hbus = hbus;
+
+	memset(&pkt, 0, sizeof(pkt));
+	init_completion(&comp_pkt.host_event);
+	comp_pkt.hpdev = hpdev;
+	pkt.init_packet.compl_ctxt = &comp_pkt;
+	pkt.init_packet.completion_func = q_resource_requirements;
+	res_req = (struct pci_child_message *)&pkt.init_packet.message;
+	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
+	res_req->wslot.slot = desc->win_slot.slot;
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, res_req,
+			       sizeof(struct pci_child_message),
+			       (unsigned long)&pkt.init_packet,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		goto error;
+
+	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
+		goto error;
+
+	hpdev->desc = *desc;
+	refcount_set(&hpdev->refs, 1);
+	get_pcichild(hpdev);
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+
+	list_add_tail(&hpdev->list_entry, &hbus->children);
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+	return hpdev;
+
+error:
+	kfree(hpdev);
+	return NULL;
+}
+
+/**
+ * get_pcichild_wslot() - Find device from slot
+ * @hbus:	Root PCI bus, as understood by this driver
+ * @wslot:	Location on the bus
+ *
+ * This function looks up a PCI device and returns the internal
+ * representation of it.  It acquires a reference on it, so that
+ * the device won't be deleted while somebody is using it.  The
+ * caller is responsible for calling put_pcichild() to release
+ * this reference.
+ *
+ * Return:	Internal representation of a PCI device
+ */
+static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
+					     u32 wslot)
+{
+	unsigned long flags;
+	struct hv_pci_dev *iter, *hpdev = NULL;
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	list_for_each_entry(iter, &hbus->children, list_entry) {
+		if (iter->desc.win_slot.slot == wslot) {
+			hpdev = iter;
+			get_pcichild(hpdev);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	return hpdev;
+}
+
+/**
+ * pci_devices_present_work() - Handle new list of child devices
+ * @work:	Work struct embedded in struct hv_dr_work
+ *
+ * "Bus Relations" is the Windows term for "children of this
+ * bus."  The terminology is preserved here for people trying to
+ * debug the interaction between Hyper-V and Linux.  This
+ * function is called when the parent partition reports a list
+ * of functions that should be observed under this PCI Express
+ * port (bus).
+ *
+ * This function updates the list, and must tolerate being
+ * called multiple times with the same information.  The typical
+ * number of child devices is one, with very atypical cases
+ * involving three or four, so the algorithms used here can be
+ * simple and inefficient.
+ *
+ * It must also treat the omission of a previously observed device as
+ * notification that the device no longer exists.
+ *
+ * Note that this function is serialized with hv_eject_device_work(),
+ * because both are pushed to the ordered workqueue hbus->wq.
+ */
+static void pci_devices_present_work(struct work_struct *work)
+{
+	u32 child_no;
+	bool found;
+	struct hv_pcidev_description *new_desc;
+	struct hv_pci_dev *hpdev;
+	struct hv_pcibus_device *hbus;
+	struct list_head removed;
+	struct hv_dr_work *dr_wrk;
+	struct hv_dr_state *dr = NULL;
+	unsigned long flags;
+
+	dr_wrk = container_of(work, struct hv_dr_work, wrk);
+	hbus = dr_wrk->bus;
+	kfree(dr_wrk);
+
+	INIT_LIST_HEAD(&removed);
+
+	/* Pull this off the queue and process it if it was the last one. */
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	while (!list_empty(&hbus->dr_list)) {
+		dr = list_first_entry(&hbus->dr_list, struct hv_dr_state,
+				      list_entry);
+		list_del(&dr->list_entry);
+
+		/* Throw this away if the list still has stuff in it. */
+		if (!list_empty(&hbus->dr_list)) {
+			kfree(dr);
+			continue;
+		}
+	}
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	if (!dr)
+		return;
+
+	mutex_lock(&hbus->state_lock);
+
+	/* First, mark all existing children as reported missing. */
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		hpdev->reported_missing = true;
+	}
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	/* Next, add back any reported devices. */
+	for (child_no = 0; child_no < dr->device_count; child_no++) {
+		found = false;
+		new_desc = &dr->func[child_no];
+
+		spin_lock_irqsave(&hbus->device_list_lock, flags);
+		list_for_each_entry(hpdev, &hbus->children, list_entry) {
+			if ((hpdev->desc.win_slot.slot == new_desc->win_slot.slot) &&
+			    (hpdev->desc.v_id == new_desc->v_id) &&
+			    (hpdev->desc.d_id == new_desc->d_id) &&
+			    (hpdev->desc.ser == new_desc->ser)) {
+				hpdev->reported_missing = false;
+				found = true;
+			}
+		}
+		spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+		if (!found) {
+			hpdev = new_pcichild_device(hbus, new_desc);
+			if (!hpdev)
+				dev_err(&hbus->hdev->device,
+					"couldn't record a child device.\n");
+		}
+	}
+
+	/* Move missing children to a list on the stack. */
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	do {
+		found = false;
+		list_for_each_entry(hpdev, &hbus->children, list_entry) {
+			if (hpdev->reported_missing) {
+				found = true;
+				put_pcichild(hpdev);
+				list_move_tail(&hpdev->list_entry, &removed);
+				break;
+			}
+		}
+	} while (found);
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	/* Delete everything that should no longer exist. */
+	while (!list_empty(&removed)) {
+		hpdev = list_first_entry(&removed, struct hv_pci_dev,
+					 list_entry);
+		list_del(&hpdev->list_entry);
+
+		if (hpdev->pci_slot)
+			pci_destroy_slot(hpdev->pci_slot);
+
+		put_pcichild(hpdev);
+	}
+
+	switch (hbus->state) {
+	case hv_pcibus_installed:
+		/*
+		 * Tell the core to rescan bus
+		 * because there may have been changes.
+		 */
+		pci_lock_rescan_remove();
+		pci_scan_child_bus(hbus->bridge->bus);
+		hv_pci_assign_numa_node(hbus);
+		hv_pci_assign_slots(hbus);
+		pci_unlock_rescan_remove();
+		break;
+
+	case hv_pcibus_init:
+	case hv_pcibus_probed:
+		survey_child_resources(hbus);
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&hbus->state_lock);
+
+	kfree(dr);
+}
+
+/**
+ * hv_pci_start_relations_work() - Queue work to start device discovery
+ * @hbus:	Root PCI bus, as understood by this driver
+ * @dr:		The list of children returned from host
+ *
+ * Return:  0 on success, -errno on failure
+ */
+static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
+				       struct hv_dr_state *dr)
+{
+	struct hv_dr_work *dr_wrk;
+	unsigned long flags;
+	bool pending_dr;
+
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hbus->hdev->device,
+			 "PCI VMBus BUS_RELATIONS: ignored\n");
+		return -ENOENT;
+	}
+
+	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
+	if (!dr_wrk)
+		return -ENOMEM;
+
+	INIT_WORK(&dr_wrk->wrk, pci_devices_present_work);
+	dr_wrk->bus = hbus;
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	/*
+	 * If pending_dr is true, we have already queued a work,
+	 * which will see the new dr. Otherwise, we need to
+	 * queue a new work.
+	 */
+	pending_dr = !list_empty(&hbus->dr_list);
+	list_add_tail(&dr->list_entry, &hbus->dr_list);
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	if (pending_dr)
+		kfree(dr_wrk);
+	else
+		queue_work(hbus->wq, &dr_wrk->wrk);
+
+	return 0;
+}
+
+/**
+ * hv_pci_devices_present() - Handle list of new children
+ * @hbus:      Root PCI bus, as understood by this driver
+ * @relations: Packet from host listing children
+ *
+ * Process a new list of devices on the bus. The list of devices is
+ * discovered by VSP and sent to us via VSP message PCI_BUS_RELATIONS,
+ * whenever a new list of devices for this bus appears.
+ */
+static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
+				   struct pci_bus_relations *relations)
+{
+	struct hv_dr_state *dr;
+	int i;
+
+	dr = kzalloc(struct_size(dr, func, relations->device_count),
+		     GFP_NOWAIT);
+	if (!dr)
+		return;
+
+	dr->device_count = relations->device_count;
+	for (i = 0; i < dr->device_count; i++) {
+		dr->func[i].v_id = relations->func[i].v_id;
+		dr->func[i].d_id = relations->func[i].d_id;
+		dr->func[i].rev = relations->func[i].rev;
+		dr->func[i].prog_intf = relations->func[i].prog_intf;
+		dr->func[i].subclass = relations->func[i].subclass;
+		dr->func[i].base_class = relations->func[i].base_class;
+		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
+		dr->func[i].win_slot = relations->func[i].win_slot;
+		dr->func[i].ser = relations->func[i].ser;
+	}
+
+	if (hv_pci_start_relations_work(hbus, dr))
+		kfree(dr);
+}
+
+/**
+ * hv_pci_devices_present2() - Handle list of new children
+ * @hbus:	Root PCI bus, as understood by this driver
+ * @relations:	Packet from host listing children
+ *
+ * This function is the v2 version of hv_pci_devices_present()
+ */
+static void hv_pci_devices_present2(struct hv_pcibus_device *hbus,
+				    struct pci_bus_relations2 *relations)
+{
+	struct hv_dr_state *dr;
+	int i;
+
+	dr = kzalloc(struct_size(dr, func, relations->device_count),
+		     GFP_NOWAIT);
+	if (!dr)
+		return;
+
+	dr->device_count = relations->device_count;
+	for (i = 0; i < dr->device_count; i++) {
+		dr->func[i].v_id = relations->func[i].v_id;
+		dr->func[i].d_id = relations->func[i].d_id;
+		dr->func[i].rev = relations->func[i].rev;
+		dr->func[i].prog_intf = relations->func[i].prog_intf;
+		dr->func[i].subclass = relations->func[i].subclass;
+		dr->func[i].base_class = relations->func[i].base_class;
+		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
+		dr->func[i].win_slot = relations->func[i].win_slot;
+		dr->func[i].ser = relations->func[i].ser;
+		dr->func[i].flags = relations->func[i].flags;
+		dr->func[i].virtual_numa_node =
+			relations->func[i].virtual_numa_node;
+	}
+
+	if (hv_pci_start_relations_work(hbus, dr))
+		kfree(dr);
+}
+
+/**
+ * hv_eject_device_work() - Asynchronously handles ejection
+ * @work:	Work struct embedded in internal device struct
+ *
+ * This function handles ejecting a device.  Windows will
+ * attempt to gracefully eject a device, waiting 60 seconds to
+ * hear back from the guest OS that this completed successfully.
+ * If this timer expires, the device will be forcibly removed.
+ */
+static void hv_eject_device_work(struct work_struct *work)
+{
+	struct pci_eject_response *ejct_pkt;
+	struct hv_pcibus_device *hbus;
+	struct hv_pci_dev *hpdev;
+	struct pci_dev *pdev;
+	unsigned long flags;
+	int wslot;
+	struct {
+		struct pci_packet pkt;
+		u8 buffer[sizeof(struct pci_eject_response)];
+	} ctxt;
+
+	hpdev = container_of(work, struct hv_pci_dev, wrk);
+	hbus = hpdev->hbus;
+
+	mutex_lock(&hbus->state_lock);
+
+	/*
+	 * Ejection can come before or after the PCI bus has been set up, so
+	 * attempt to find it and tear down the bus state, if it exists.  This
+	 * must be done without constructs like pci_domain_nr(hbus->bridge->bus)
+	 * because hbus->bridge->bus may not exist yet.
+	 */
+	wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
+	pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot);
+	if (pdev) {
+		pci_lock_rescan_remove();
+		pci_stop_and_remove_bus_device(pdev);
+		pci_dev_put(pdev);
+		pci_unlock_rescan_remove();
+	}
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	list_del(&hpdev->list_entry);
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	if (hpdev->pci_slot)
+		pci_destroy_slot(hpdev->pci_slot);
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
+	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
+	ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot;
+	vmbus_sendpacket(hbus->hdev->channel, ejct_pkt,
+			 sizeof(*ejct_pkt), 0,
+			 VM_PKT_DATA_INBAND, 0);
+
+	/* For the get_pcichild() in hv_pci_eject_device() */
+	put_pcichild(hpdev);
+	/* For the two refs got in new_pcichild_device() */
+	put_pcichild(hpdev);
+	put_pcichild(hpdev);
+	/* hpdev has been freed. Do not use it any more. */
+
+	mutex_unlock(&hbus->state_lock);
+}
+
+/**
+ * hv_pci_eject_device() - Handles device ejection
+ * @hpdev:	Internal device tracking struct
+ *
+ * This function is invoked when an ejection packet arrives.  It
+ * just schedules work so that we don't re-enter the packet
+ * delivery code handling the ejection.
+ */
+static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
+{
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct hv_device *hdev = hbus->hdev;
+
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
+		return;
+	}
+
+	get_pcichild(hpdev);
+	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
+	queue_work(hbus->wq, &hpdev->wrk);
+}
+
+/**
+ * hv_pci_onchannelcallback() - Handles incoming packets
+ * @context:	Internal bus tracking struct
+ *
+ * This function is invoked whenever the host sends a packet to
+ * this channel (which is private to this root PCI bus).
+ */
+static void hv_pci_onchannelcallback(void *context)
+{
+	const int packet_size = 0x100;
+	int ret;
+	struct hv_pcibus_device *hbus = context;
+	struct vmbus_channel *chan = hbus->hdev->channel;
+	u32 bytes_recvd;
+	u64 req_id, req_addr;
+	struct vmpacket_descriptor *desc;
+	unsigned char *buffer;
+	int bufferlen = packet_size;
+	struct pci_packet *comp_packet;
+	struct pci_response *response;
+	struct pci_incoming_message *new_message;
+	struct pci_bus_relations *bus_rel;
+	struct pci_bus_relations2 *bus_rel2;
+	struct pci_dev_inval_block *inval;
+	struct pci_dev_incoming *dev_message;
+	struct hv_pci_dev *hpdev;
+	unsigned long flags;
+
+	buffer = kmalloc(bufferlen, GFP_ATOMIC);
+	if (!buffer)
+		return;
+
+	while (1) {
+		ret = vmbus_recvpacket_raw(chan, buffer, bufferlen,
+					   &bytes_recvd, &req_id);
+
+		if (ret == -ENOBUFS) {
+			kfree(buffer);
+			/* Handle large packet */
+			bufferlen = bytes_recvd;
+			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
+			if (!buffer)
+				return;
+			continue;
+		}
+
+		/* Zero length indicates there are no more packets. */
+		if (ret || !bytes_recvd)
+			break;
+
+		/*
+		 * All incoming packets must be at least as large as a
+		 * response.
+		 */
+		if (bytes_recvd <= sizeof(struct pci_response))
+			continue;
+		desc = (struct vmpacket_descriptor *)buffer;
+
+		switch (desc->type) {
+		case VM_PKT_COMP:
+
+			lock_requestor(chan, flags);
+			req_addr = __vmbus_request_addr_match(chan, req_id,
+							      VMBUS_RQST_ADDR_ANY);
+			if (req_addr == VMBUS_RQST_ERROR) {
+				unlock_requestor(chan, flags);
+				dev_err(&hbus->hdev->device,
+					"Invalid transaction ID %llx\n",
+					req_id);
+				break;
+			}
+			comp_packet = (struct pci_packet *)req_addr;
+			response = (struct pci_response *)buffer;
+			/*
+			 * Call ->completion_func() within the critical section to make
+			 * sure that the packet pointer is still valid during the call:
+			 * here 'valid' means that there's a task still waiting for the
+			 * completion, and that the packet data is still on the waiting
+			 * task's stack.  Cf. hv_compose_msi_msg().
+			 */
+			comp_packet->completion_func(comp_packet->compl_ctxt,
+						     response,
+						     bytes_recvd);
+			unlock_requestor(chan, flags);
+			break;
+
+		case VM_PKT_DATA_INBAND:
+
+			new_message = (struct pci_incoming_message *)buffer;
+			switch (new_message->message_type.type) {
+			case PCI_BUS_RELATIONS:
+
+				bus_rel = (struct pci_bus_relations *)buffer;
+				if (bytes_recvd < sizeof(*bus_rel) ||
+				    bytes_recvd <
+					struct_size(bus_rel, func,
+						    bus_rel->device_count)) {
+					dev_err(&hbus->hdev->device,
+						"bus relations too small\n");
+					break;
+				}
+
+				hv_pci_devices_present(hbus, bus_rel);
+				break;
+
+			case PCI_BUS_RELATIONS2:
+
+				bus_rel2 = (struct pci_bus_relations2 *)buffer;
+				if (bytes_recvd < sizeof(*bus_rel2) ||
+				    bytes_recvd <
+					struct_size(bus_rel2, func,
+						    bus_rel2->device_count)) {
+					dev_err(&hbus->hdev->device,
+						"bus relations v2 too small\n");
+					break;
+				}
+
+				hv_pci_devices_present2(hbus, bus_rel2);
+				break;
+
+			case PCI_EJECT:
+
+				dev_message = (struct pci_dev_incoming *)buffer;
+				if (bytes_recvd < sizeof(*dev_message)) {
+					dev_err(&hbus->hdev->device,
+						"eject message too small\n");
+					break;
+				}
+				hpdev = get_pcichild_wslot(hbus,
+						      dev_message->wslot.slot);
+				if (hpdev) {
+					hv_pci_eject_device(hpdev);
+					put_pcichild(hpdev);
+				}
+				break;
+
+			case PCI_INVALIDATE_BLOCK:
+
+				inval = (struct pci_dev_inval_block *)buffer;
+				if (bytes_recvd < sizeof(*inval)) {
+					dev_err(&hbus->hdev->device,
+						"invalidate message too small\n");
+					break;
+				}
+				hpdev = get_pcichild_wslot(hbus,
+							   inval->wslot.slot);
+				if (hpdev) {
+					if (hpdev->block_invalidate) {
+						hpdev->block_invalidate(
+						    hpdev->invalidate_context,
+						    inval->block_mask);
+					}
+					put_pcichild(hpdev);
+				}
+				break;
+
+			default:
+				dev_warn(&hbus->hdev->device,
+					"Unimplemented protocol message %x\n",
+					new_message->message_type.type);
+				break;
+			}
+			break;
+
+		default:
+			dev_err(&hbus->hdev->device,
+				"unhandled packet type %d, tid %llx len %d\n",
+				desc->type, req_id, bytes_recvd);
+			break;
+		}
+	}
+
+	kfree(buffer);
+}
+
+/**
+ * hv_pci_protocol_negotiation() - Set up protocol
+ * @hdev:		VMBus's tracking struct for this root PCI bus.
+ * @version:		Array of supported channel protocol versions in
+ *			the order of probing - highest go first.
+ * @num_version:	Number of elements in the version array.
+ *
+ * This driver is intended to support running on Windows 10
+ * (server) and later versions. It will not run on earlier
+ * versions, as they assume that many of the operations which
+ * Linux needs accomplished with a spinlock held were done via
+ * asynchronous messaging via VMBus.  Windows 10 increases the
+ * surface area of PCI emulation so that these actions can take
+ * place by suspending a virtual processor for their duration.
+ *
+ * This function negotiates the channel protocol version,
+ * failing if the host doesn't support the necessary protocol
+ * level.
+ */
+static int hv_pci_protocol_negotiation(struct hv_device *hdev,
+				       enum pci_protocol_version_t version[],
+				       int num_version)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct pci_version_request *version_req;
+	struct hv_pci_compl comp_pkt;
+	struct pci_packet *pkt;
+	int ret;
+	int i;
+
+	/*
+	 * Initiate the handshake with the host and negotiate
+	 * a version that the host can support. We start with the
+	 * highest version number and go down if the host cannot
+	 * support it.
+	 */
+	pkt = kzalloc(sizeof(*pkt) + sizeof(*version_req), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	init_completion(&comp_pkt.host_event);
+	pkt->completion_func = hv_pci_generic_compl;
+	pkt->compl_ctxt = &comp_pkt;
+	version_req = (struct pci_version_request *)&pkt->message;
+	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
+
+	for (i = 0; i < num_version; i++) {
+		version_req->protocol_version = version[i];
+		ret = vmbus_sendpacket(hdev->channel, version_req,
+				sizeof(struct pci_version_request),
+				(unsigned long)pkt, VM_PKT_DATA_INBAND,
+				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
+
+		if (ret) {
+			dev_err(&hdev->device,
+				"PCI Pass-through VSP failed to request version: %d",
+				ret);
+			goto exit;
+		}
+
+		if (comp_pkt.completion_status >= 0) {
+			hbus->protocol_version = version[i];
+			dev_info(&hdev->device,
+				"PCI VMBus probing: Using version %#x\n",
+				hbus->protocol_version);
+			goto exit;
+		}
+
+		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
+			dev_err(&hdev->device,
+				"PCI Pass-through VSP failed version request: %#x",
+				comp_pkt.completion_status);
+			ret = -EPROTO;
+			goto exit;
+		}
+
+		reinit_completion(&comp_pkt.host_event);
+	}
+
+	dev_err(&hdev->device,
+		"PCI pass-through VSP failed to find supported version");
+	ret = -EPROTO;
+
+exit:
+	kfree(pkt);
+	return ret;
+}
+
+/**
+ * hv_pci_free_bridge_windows() - Release memory regions for the
+ * bus
+ * @hbus:	Root PCI bus, as understood by this driver
+ */
+static void hv_pci_free_bridge_windows(struct hv_pcibus_device *hbus)
+{
+	/*
+	 * Set the resources back to the way they looked when they
+	 * were allocated by setting IORESOURCE_BUSY again.
+	 */
+
+	if (hbus->low_mmio_space && hbus->low_mmio_res) {
+		hbus->low_mmio_res->flags |= IORESOURCE_BUSY;
+		vmbus_free_mmio(hbus->low_mmio_res->start,
+				resource_size(hbus->low_mmio_res));
+	}
+
+	if (hbus->high_mmio_space && hbus->high_mmio_res) {
+		hbus->high_mmio_res->flags |= IORESOURCE_BUSY;
+		vmbus_free_mmio(hbus->high_mmio_res->start,
+				resource_size(hbus->high_mmio_res));
+	}
+}
+
+/**
+ * hv_pci_allocate_bridge_windows() - Allocate memory regions
+ * for the bus
+ * @hbus:	Root PCI bus, as understood by this driver
+ *
+ * This function calls vmbus_allocate_mmio(), which is itself a
+ * bit of a compromise.  Ideally, we might change the pnp layer
+ * in the kernel such that it comprehends either PCI devices
+ * which are "grandchildren of ACPI," with some intermediate bus
+ * node (in this case, VMBus) or change it such that it
+ * understands VMBus.  The pnp layer, however, has been declared
+ * deprecated, and not subject to change.
+ *
+ * The workaround, implemented here, is to ask VMBus to allocate
+ * MMIO space for this bus.  VMBus itself knows which ranges are
+ * appropriate by looking at its own ACPI objects.  Then, after
+ * these ranges are claimed, they're modified to look like they
+ * would have looked if the ACPI and pnp code had allocated
+ * bridge windows.  These descriptors have to exist in this form
+ * in order to satisfy the code which will get invoked when the
+ * endpoint PCI function driver calls request_mem_region() or
+ * request_mem_region_exclusive().
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
+{
+	resource_size_t align;
+	int ret;
+
+	if (hbus->low_mmio_space) {
+		align = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
+		ret = vmbus_allocate_mmio(&hbus->low_mmio_res, hbus->hdev, 0,
+					  (u64)(u32)0xffffffff,
+					  hbus->low_mmio_space,
+					  align, false);
+		if (ret) {
+			dev_err(&hbus->hdev->device,
+				"Need %#llx of low MMIO space. Consider reconfiguring the VM.\n",
+				hbus->low_mmio_space);
+			return ret;
+		}
+
+		/* Modify this resource to become a bridge window. */
+		hbus->low_mmio_res->flags |= IORESOURCE_WINDOW;
+		hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY;
+		pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res);
+	}
+
+	if (hbus->high_mmio_space) {
+		align = 1ULL << (63 - __builtin_clzll(hbus->high_mmio_space));
+		ret = vmbus_allocate_mmio(&hbus->high_mmio_res, hbus->hdev,
+					  0x100000000, -1,
+					  hbus->high_mmio_space, align,
+					  false);
+		if (ret) {
+			dev_err(&hbus->hdev->device,
+				"Need %#llx of high MMIO space. Consider reconfiguring the VM.\n",
+				hbus->high_mmio_space);
+			goto release_low_mmio;
+		}
+
+		/* Modify this resource to become a bridge window. */
+		hbus->high_mmio_res->flags |= IORESOURCE_WINDOW;
+		hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY;
+		pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res);
+	}
+
+	return 0;
+
+release_low_mmio:
+	if (hbus->low_mmio_res) {
+		vmbus_free_mmio(hbus->low_mmio_res->start,
+				resource_size(hbus->low_mmio_res));
+	}
+
+	return ret;
+}
+
+/**
+ * hv_allocate_config_window() - Find MMIO space for PCI Config
+ * @hbus:	Root PCI bus, as understood by this driver
+ *
+ * This function claims memory-mapped I/O space for accessing
+ * configuration space for the functions on this bus.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_allocate_config_window(struct hv_pcibus_device *hbus)
+{
+	int ret;
+
+	/*
+	 * Set up a region of MMIO space to use for accessing configuration
+	 * space.
+	 */
+	ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
+				  PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
+	if (ret)
+		return ret;
+
+	/*
+	 * vmbus_allocate_mmio() gets used for allocating both device endpoint
+	 * resource claims (those which cannot be overlapped) and the ranges
+	 * which are valid for the children of this bus, which are intended
+	 * to be overlapped by those children.  Set the flag on this claim
+	 * meaning that this region can't be overlapped.
+	 */
+
+	hbus->mem_config->flags |= IORESOURCE_BUSY;
+
+	return 0;
+}
+
+static void hv_free_config_window(struct hv_pcibus_device *hbus)
+{
+	vmbus_free_mmio(hbus->mem_config->start, PCI_CONFIG_MMIO_LENGTH);
+}
+
+static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs);
+
+/**
+ * hv_pci_enter_d0() - Bring the "bus" into the D0 power state
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_pci_enter_d0(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct pci_bus_d0_entry *d0_entry;
+	struct hv_pci_compl comp_pkt;
+	struct pci_packet *pkt;
+	bool retry = true;
+	int ret;
+
+enter_d0_retry:
+	/*
+	 * Tell the host that the bus is ready to use, and moved into the
+	 * powered-on state.  This includes telling the host which region
+	 * of memory-mapped I/O space has been chosen for configuration space
+	 * access.
+	 */
+	pkt = kzalloc(sizeof(*pkt) + sizeof(*d0_entry), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	init_completion(&comp_pkt.host_event);
+	pkt->completion_func = hv_pci_generic_compl;
+	pkt->compl_ctxt = &comp_pkt;
+	d0_entry = (struct pci_bus_d0_entry *)&pkt->message;
+	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
+	d0_entry->mmio_base = hbus->mem_config->start;
+
+	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
+			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (!ret)
+		ret = wait_for_response(hdev, &comp_pkt.host_event);
+
+	if (ret)
+		goto exit;
+
+	/*
+	 * In certain case (Kdump) the pci device of interest was
+	 * not cleanly shut down and resource is still held on host
+	 * side, the host could return invalid device status.
+	 * We need to explicitly request host to release the resource
+	 * and try to enter D0 again.
+	 */
+	if (comp_pkt.completion_status < 0 && retry) {
+		retry = false;
+
+		dev_err(&hdev->device, "Retrying D0 Entry\n");
+
+		/*
+		 * Hv_pci_bus_exit() calls hv_send_resource_released()
+		 * to free up resources of its child devices.
+		 * In the kdump kernel we need to set the
+		 * wslot_res_allocated to 255 so it scans all child
+		 * devices to release resources allocated in the
+		 * normal kernel before panic happened.
+		 */
+		hbus->wslot_res_allocated = 255;
+
+		ret = hv_pci_bus_exit(hdev, true);
+
+		if (ret == 0) {
+			kfree(pkt);
+			goto enter_d0_retry;
+		}
+		dev_err(&hdev->device,
+			"Retrying D0 failed with ret %d\n", ret);
+	}
+
+	if (comp_pkt.completion_status < 0) {
+		dev_err(&hdev->device,
+			"PCI Pass-through VSP failed D0 Entry with status %x\n",
+			comp_pkt.completion_status);
+		ret = -EPROTO;
+		goto exit;
+	}
+
+	ret = 0;
+
+exit:
+	kfree(pkt);
+	return ret;
+}
+
+/**
+ * hv_pci_query_relations() - Ask host to send list of child
+ * devices
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_pci_query_relations(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct pci_message message;
+	struct completion comp;
+	int ret;
+
+	/* Ask the host to send along the list of child devices */
+	init_completion(&comp);
+	if (cmpxchg(&hbus->survey_event, NULL, &comp))
+		return -ENOTEMPTY;
+
+	memset(&message, 0, sizeof(message));
+	message.type = PCI_QUERY_BUS_RELATIONS;
+
+	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
+			       0, VM_PKT_DATA_INBAND, 0);
+	if (!ret)
+		ret = wait_for_response(hdev, &comp);
+
+	/*
+	 * In the case of fast device addition/removal, it's possible that
+	 * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
+	 * already got a PCI_BUS_RELATIONS* message from the host and the
+	 * channel callback already scheduled a work to hbus->wq, which can be
+	 * running pci_devices_present_work() -> survey_child_resources() ->
+	 * complete(&hbus->survey_event), even after hv_pci_query_relations()
+	 * exits and the stack variable 'comp' is no longer valid; as a result,
+	 * a hang or a page fault may happen when the complete() calls
+	 * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
+	 * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
+	 * -ENODEV, there can't be any more work item scheduled to hbus->wq
+	 * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
+	 * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
+	 * channel->rescind = true.
+	 */
+	flush_workqueue(hbus->wq);
+
+	return ret;
+}
+
+/**
+ * hv_send_resources_allocated() - Report local resource choices
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ *
+ * The host OS is expecting to be sent a request as a message
+ * which contains all the resources that the device will use.
+ * The response contains those same resources, "translated"
+ * which is to say, the values which should be used by the
+ * hardware, when it delivers an interrupt.  (MMIO resources are
+ * used in local terms.)  This is nice for Windows, and lines up
+ * with the FDO/PDO split, which doesn't exist in Linux.  Linux
+ * is deeply expecting to scan an emulated PCI configuration
+ * space.  So this message is sent here only to drive the state
+ * machine on the host forward.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_send_resources_allocated(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct pci_resources_assigned *res_assigned;
+	struct pci_resources_assigned2 *res_assigned2;
+	struct hv_pci_compl comp_pkt;
+	struct hv_pci_dev *hpdev;
+	struct pci_packet *pkt;
+	size_t size_res;
+	int wslot;
+	int ret;
+
+	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
+			? sizeof(*res_assigned) : sizeof(*res_assigned2);
+
+	pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	ret = 0;
+
+	for (wslot = 0; wslot < 256; wslot++) {
+		hpdev = get_pcichild_wslot(hbus, wslot);
+		if (!hpdev)
+			continue;
+
+		memset(pkt, 0, sizeof(*pkt) + size_res);
+		init_completion(&comp_pkt.host_event);
+		pkt->completion_func = hv_pci_generic_compl;
+		pkt->compl_ctxt = &comp_pkt;
+
+		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) {
+			res_assigned =
+				(struct pci_resources_assigned *)&pkt->message;
+			res_assigned->message_type.type =
+				PCI_RESOURCES_ASSIGNED;
+			res_assigned->wslot.slot = hpdev->desc.win_slot.slot;
+		} else {
+			res_assigned2 =
+				(struct pci_resources_assigned2 *)&pkt->message;
+			res_assigned2->message_type.type =
+				PCI_RESOURCES_ASSIGNED2;
+			res_assigned2->wslot.slot = hpdev->desc.win_slot.slot;
+		}
+		put_pcichild(hpdev);
+
+		ret = vmbus_sendpacket(hdev->channel, &pkt->message,
+				size_res, (unsigned long)pkt,
+				VM_PKT_DATA_INBAND,
+				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
+		if (ret)
+			break;
+
+		if (comp_pkt.completion_status < 0) {
+			ret = -EPROTO;
+			dev_err(&hdev->device,
+				"resource allocated returned 0x%x",
+				comp_pkt.completion_status);
+			break;
+		}
+
+		hbus->wslot_res_allocated = wslot;
+	}
+
+	kfree(pkt);
+	return ret;
+}
+
+/**
+ * hv_send_resources_released() - Report local resources
+ * released
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_send_resources_released(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct pci_child_message pkt;
+	struct hv_pci_dev *hpdev;
+	int wslot;
+	int ret;
+
+	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
+		hpdev = get_pcichild_wslot(hbus, wslot);
+		if (!hpdev)
+			continue;
+
+		memset(&pkt, 0, sizeof(pkt));
+		pkt.message_type.type = PCI_RESOURCES_RELEASED;
+		pkt.wslot.slot = hpdev->desc.win_slot.slot;
+
+		put_pcichild(hpdev);
+
+		ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0,
+				       VM_PKT_DATA_INBAND, 0);
+		if (ret)
+			return ret;
+
+		hbus->wslot_res_allocated = wslot - 1;
+	}
+
+	hbus->wslot_res_allocated = -1;
+
+	return 0;
+}
+
+#define HVPCI_DOM_MAP_SIZE (64 * 1024)
+static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
+
+/*
+ * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
+ * as invalid for passthrough PCI devices of this driver.
+ */
+#define HVPCI_DOM_INVALID 0
+
+/**
+ * hv_get_dom_num() - Get a valid PCI domain number
+ * Check if the PCI domain number is in use, and return another number if
+ * it is in use.
+ *
+ * @dom: Requested domain number
+ *
+ * return: domain number on success, HVPCI_DOM_INVALID on failure
+ */
+static u16 hv_get_dom_num(u16 dom)
+{
+	unsigned int i;
+
+	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
+		return dom;
+
+	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
+		if (test_and_set_bit(i, hvpci_dom_map) == 0)
+			return i;
+	}
+
+	return HVPCI_DOM_INVALID;
+}
+
+/**
+ * hv_put_dom_num() - Mark the PCI domain number as free
+ * @dom: Domain number to be freed
+ */
+static void hv_put_dom_num(u16 dom)
+{
+	clear_bit(dom, hvpci_dom_map);
+}
+
+/**
+ * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ * @dev_id:	Identifies the device itself
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int hv_pci_probe(struct hv_device *hdev,
+			const struct hv_vmbus_device_id *dev_id)
+{
+	struct pci_host_bridge *bridge;
+	struct hv_pcibus_device *hbus;
+	u16 dom_req, dom;
+	char *name;
+	int ret;
+
+	bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
+	if (!bridge)
+		return -ENOMEM;
+
+	hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
+	if (!hbus)
+		return -ENOMEM;
+
+	hbus->bridge = bridge;
+	mutex_init(&hbus->state_lock);
+	hbus->state = hv_pcibus_init;
+	hbus->wslot_res_allocated = -1;
+
+	/*
+	 * The PCI bus "domain" is what is called "segment" in ACPI and other
+	 * specs. Pull it from the instance ID, to get something usually
+	 * unique. In rare cases of collision, we will find out another number
+	 * not in use.
+	 *
+	 * Note that, since this code only runs in a Hyper-V VM, Hyper-V
+	 * together with this guest driver can guarantee that (1) The only
+	 * domain used by Gen1 VMs for something that looks like a physical
+	 * PCI bus (which is actually emulated by the hypervisor) is domain 0.
+	 * (2) There will be no overlap between domains (after fixing possible
+	 * collisions) in the same VM.
+	 */
+	dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
+	dom = hv_get_dom_num(dom_req);
+
+	if (dom == HVPCI_DOM_INVALID) {
+		dev_err(&hdev->device,
+			"Unable to use dom# 0x%x or other numbers", dom_req);
+		ret = -EINVAL;
+		goto free_bus;
+	}
+
+	if (dom != dom_req)
+		dev_info(&hdev->device,
+			 "PCI dom# 0x%x has collision, using 0x%x",
+			 dom_req, dom);
+
+	hbus->bridge->domain_nr = dom;
+#ifdef CONFIG_X86
+	hbus->sysdata.domain = dom;
+	hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS);
+#elif defined(CONFIG_ARM64)
+	/*
+	 * Set the PCI bus parent to be the corresponding VMbus
+	 * device. Then the VMbus device will be assigned as the
+	 * ACPI companion in pcibios_root_bridge_prepare() and
+	 * pci_dma_configure() will propagate device coherence
+	 * information to devices created on the bus.
+	 */
+	hbus->sysdata.parent = hdev->device.parent;
+	hbus->use_calls = false;
+#endif
+
+	hbus->hdev = hdev;
+	INIT_LIST_HEAD(&hbus->children);
+	INIT_LIST_HEAD(&hbus->dr_list);
+	spin_lock_init(&hbus->config_lock);
+	spin_lock_init(&hbus->device_list_lock);
+	hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
+					   hbus->bridge->domain_nr);
+	if (!hbus->wq) {
+		ret = -ENOMEM;
+		goto free_dom;
+	}
+
+	hdev->channel->next_request_id_callback = vmbus_next_request_id;
+	hdev->channel->request_addr_callback = vmbus_request_addr;
+	hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
+
+	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
+			 hv_pci_onchannelcallback, hbus);
+	if (ret)
+		goto destroy_wq;
+
+	hv_set_drvdata(hdev, hbus);
+
+	ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
+					  ARRAY_SIZE(pci_protocol_versions));
+	if (ret)
+		goto close;
+
+	ret = hv_allocate_config_window(hbus);
+	if (ret)
+		goto close;
+
+	hbus->cfg_addr = ioremap(hbus->mem_config->start,
+				 PCI_CONFIG_MMIO_LENGTH);
+	if (!hbus->cfg_addr) {
+		dev_err(&hdev->device,
+			"Unable to map a virtual address for config space\n");
+		ret = -ENOMEM;
+		goto free_config;
+	}
+
+	name = kasprintf(GFP_KERNEL, "%pUL", &hdev->dev_instance);
+	if (!name) {
+		ret = -ENOMEM;
+		goto unmap;
+	}
+
+	hbus->fwnode = irq_domain_alloc_named_fwnode(name);
+	kfree(name);
+	if (!hbus->fwnode) {
+		ret = -ENOMEM;
+		goto unmap;
+	}
+
+	ret = hv_pcie_init_irq_domain(hbus);
+	if (ret)
+		goto free_fwnode;
+
+	ret = hv_pci_query_relations(hdev);
+	if (ret)
+		goto free_irq_domain;
+
+	mutex_lock(&hbus->state_lock);
+
+	ret = hv_pci_enter_d0(hdev);
+	if (ret)
+		goto release_state_lock;
+
+	ret = hv_pci_allocate_bridge_windows(hbus);
+	if (ret)
+		goto exit_d0;
+
+	ret = hv_send_resources_allocated(hdev);
+	if (ret)
+		goto free_windows;
+
+	prepopulate_bars(hbus);
+
+	hbus->state = hv_pcibus_probed;
+
+	ret = create_root_hv_pci_bus(hbus);
+	if (ret)
+		goto free_windows;
+
+	mutex_unlock(&hbus->state_lock);
+	return 0;
+
+free_windows:
+	hv_pci_free_bridge_windows(hbus);
+exit_d0:
+	(void) hv_pci_bus_exit(hdev, true);
+release_state_lock:
+	mutex_unlock(&hbus->state_lock);
+free_irq_domain:
+	irq_domain_remove(hbus->irq_domain);
+free_fwnode:
+	irq_domain_free_fwnode(hbus->fwnode);
+unmap:
+	iounmap(hbus->cfg_addr);
+free_config:
+	hv_free_config_window(hbus);
+close:
+	vmbus_close(hdev->channel);
+destroy_wq:
+	destroy_workqueue(hbus->wq);
+free_dom:
+	hv_put_dom_num(hbus->bridge->domain_nr);
+free_bus:
+	kfree(hbus);
+	return ret;
+}
+
+static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	struct vmbus_channel *chan = hdev->channel;
+	struct {
+		struct pci_packet teardown_packet;
+		u8 buffer[sizeof(struct pci_message)];
+	} pkt;
+	struct hv_pci_compl comp_pkt;
+	struct hv_pci_dev *hpdev, *tmp;
+	unsigned long flags;
+	u64 trans_id;
+	int ret;
+
+	/*
+	 * After the host sends the RESCIND_CHANNEL message, it doesn't
+	 * access the per-channel ringbuffer any longer.
+	 */
+	if (chan->rescind)
+		return 0;
+
+	if (!keep_devs) {
+		struct list_head removed;
+
+		/* Move all present children to the list on stack */
+		INIT_LIST_HEAD(&removed);
+		spin_lock_irqsave(&hbus->device_list_lock, flags);
+		list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry)
+			list_move_tail(&hpdev->list_entry, &removed);
+		spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+		/* Remove all children in the list */
+		list_for_each_entry_safe(hpdev, tmp, &removed, list_entry) {
+			list_del(&hpdev->list_entry);
+			if (hpdev->pci_slot)
+				pci_destroy_slot(hpdev->pci_slot);
+			/* For the two refs got in new_pcichild_device() */
+			put_pcichild(hpdev);
+			put_pcichild(hpdev);
+		}
+	}
+
+	ret = hv_send_resources_released(hdev);
+	if (ret) {
+		dev_err(&hdev->device,
+			"Couldn't send resources released packet(s)\n");
+		return ret;
+	}
+
+	memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
+	init_completion(&comp_pkt.host_event);
+	pkt.teardown_packet.completion_func = hv_pci_generic_compl;
+	pkt.teardown_packet.compl_ctxt = &comp_pkt;
+	pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT;
+
+	ret = vmbus_sendpacket_getid(chan, &pkt.teardown_packet.message,
+				     sizeof(struct pci_message),
+				     (unsigned long)&pkt.teardown_packet,
+				     &trans_id, VM_PKT_DATA_INBAND,
+				     VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0) {
+		/*
+		 * The completion packet on the stack becomes invalid after
+		 * 'return'; remove the ID from the VMbus requestor if the
+		 * identifier is still mapped to/associated with the packet.
+		 *
+		 * Cf. hv_pci_onchannelcallback().
+		 */
+		vmbus_request_addr_match(chan, trans_id,
+					 (unsigned long)&pkt.teardown_packet);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/**
+ * hv_pci_remove() - Remove routine for this VMBus channel
+ * @hdev:	VMBus's tracking struct for this root PCI bus
+ */
+static void hv_pci_remove(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus;
+
+	hbus = hv_get_drvdata(hdev);
+	if (hbus->state == hv_pcibus_installed) {
+		tasklet_disable(&hdev->channel->callback_event);
+		hbus->state = hv_pcibus_removing;
+		tasklet_enable(&hdev->channel->callback_event);
+		destroy_workqueue(hbus->wq);
+		hbus->wq = NULL;
+		/*
+		 * At this point, no work is running or can be scheduled
+		 * on hbus-wq. We can't race with hv_pci_devices_present()
+		 * or hv_pci_eject_device(), it's safe to proceed.
+		 */
+
+		/* Remove the bus from PCI's point of view. */
+		pci_lock_rescan_remove();
+		pci_stop_root_bus(hbus->bridge->bus);
+		hv_pci_remove_slots(hbus);
+		pci_remove_root_bus(hbus->bridge->bus);
+		pci_unlock_rescan_remove();
+	}
+
+	hv_pci_bus_exit(hdev, false);
+
+	vmbus_close(hdev->channel);
+
+	iounmap(hbus->cfg_addr);
+	hv_free_config_window(hbus);
+	hv_pci_free_bridge_windows(hbus);
+	irq_domain_remove(hbus->irq_domain);
+	irq_domain_free_fwnode(hbus->fwnode);
+
+	hv_put_dom_num(hbus->bridge->domain_nr);
+
+	kfree(hbus);
+}
+
+static int hv_pci_suspend(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum hv_pcibus_state old_state;
+	int ret;
+
+	/*
+	 * hv_pci_suspend() must make sure there are no pending work items
+	 * before calling vmbus_close(), since it runs in a process context
+	 * as a callback in dpm_suspend().  When it starts to run, the channel
+	 * callback hv_pci_onchannelcallback(), which runs in a tasklet
+	 * context, can be still running concurrently and scheduling new work
+	 * items onto hbus->wq in hv_pci_devices_present() and
+	 * hv_pci_eject_device(), and the work item handlers can access the
+	 * vmbus channel, which can be being closed by hv_pci_suspend(), e.g.
+	 * the work item handler pci_devices_present_work() ->
+	 * new_pcichild_device() writes to the vmbus channel.
+	 *
+	 * To eliminate the race, hv_pci_suspend() disables the channel
+	 * callback tasklet, sets hbus->state to hv_pcibus_removing, and
+	 * re-enables the tasklet. This way, when hv_pci_suspend() proceeds,
+	 * it knows that no new work item can be scheduled, and then it flushes
+	 * hbus->wq and safely closes the vmbus channel.
+	 */
+	tasklet_disable(&hdev->channel->callback_event);
+
+	/* Change the hbus state to prevent new work items. */
+	old_state = hbus->state;
+	if (hbus->state == hv_pcibus_installed)
+		hbus->state = hv_pcibus_removing;
+
+	tasklet_enable(&hdev->channel->callback_event);
+
+	if (old_state != hv_pcibus_installed)
+		return -EINVAL;
+
+	flush_workqueue(hbus->wq);
+
+	ret = hv_pci_bus_exit(hdev, true);
+	if (ret)
+		return ret;
+
+	vmbus_close(hdev->channel);
+
+	return 0;
+}
+
+static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
+{
+	struct irq_data *irq_data;
+	struct msi_desc *entry;
+	int ret = 0;
+
+	if (!pdev->msi_enabled && !pdev->msix_enabled)
+		return 0;
+
+	msi_lock_descs(&pdev->dev);
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		irq_data = irq_get_irq_data(entry->irq);
+		if (WARN_ON_ONCE(!irq_data)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		hv_compose_msi_msg(irq_data, &entry->msg);
+	}
+	msi_unlock_descs(&pdev->dev);
+
+	return ret;
+}
+
+/*
+ * Upon resume, pci_restore_msi_state() -> ... ->  __pci_write_msi_msg()
+ * directly writes the MSI/MSI-X registers via MMIO, but since Hyper-V
+ * doesn't trap and emulate the MMIO accesses, here hv_compose_msi_msg()
+ * must be used to ask Hyper-V to re-create the IOMMU Interrupt Remapping
+ * Table entries.
+ */
+static void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus)
+{
+	pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL);
+}
+
+static int hv_pci_resume(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum pci_protocol_version_t version[1];
+	int ret;
+
+	hbus->state = hv_pcibus_init;
+
+	hdev->channel->next_request_id_callback = vmbus_next_request_id;
+	hdev->channel->request_addr_callback = vmbus_request_addr;
+	hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
+
+	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
+			 hv_pci_onchannelcallback, hbus);
+	if (ret)
+		return ret;
+
+	/* Only use the version that was in use before hibernation. */
+	version[0] = hbus->protocol_version;
+	ret = hv_pci_protocol_negotiation(hdev, version, 1);
+	if (ret)
+		goto out;
+
+	ret = hv_pci_query_relations(hdev);
+	if (ret)
+		goto out;
+
+	mutex_lock(&hbus->state_lock);
+
+	ret = hv_pci_enter_d0(hdev);
+	if (ret)
+		goto release_state_lock;
+
+	ret = hv_send_resources_allocated(hdev);
+	if (ret)
+		goto release_state_lock;
+
+	prepopulate_bars(hbus);
+
+	hv_pci_restore_msi_state(hbus);
+
+	hbus->state = hv_pcibus_installed;
+	mutex_unlock(&hbus->state_lock);
+	return 0;
+
+release_state_lock:
+	mutex_unlock(&hbus->state_lock);
+out:
+	vmbus_close(hdev->channel);
+	return ret;
+}
+
+static const struct hv_vmbus_device_id hv_pci_id_table[] = {
+	/* PCI Pass-through Class ID */
+	/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
+	{ HV_PCIE_GUID, },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(vmbus, hv_pci_id_table);
+
+static struct hv_driver hv_pci_drv = {
+	.name		= "hv_pci",
+	.id_table	= hv_pci_id_table,
+	.probe		= hv_pci_probe,
+	.remove		= hv_pci_remove,
+	.suspend	= hv_pci_suspend,
+	.resume		= hv_pci_resume,
+};
+
+static void __exit exit_hv_pci_drv(void)
+{
+	vmbus_driver_unregister(&hv_pci_drv);
+
+	hvpci_block_ops.read_block = NULL;
+	hvpci_block_ops.write_block = NULL;
+	hvpci_block_ops.reg_blk_invalidate = NULL;
+}
+
+static int __init init_hv_pci_drv(void)
+{
+	int ret;
+
+	if (!hv_is_hyperv_initialized())
+		return -ENODEV;
+
+	ret = hv_pci_irqchip_init();
+	if (ret)
+		return ret;
+
+	/* Set the invalid domain number's bit, so it will not be used */
+	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
+
+	/* Initialize PCI block r/w interface */
+	hvpci_block_ops.read_block = hv_read_config_block;
+	hvpci_block_ops.write_block = hv_write_config_block;
+	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
+	return vmbus_driver_register(&hv_pci_drv);
+}
+
+module_init(init_hv_pci_drv);
+module_exit(exit_hv_pci_drv);
+
+MODULE_DESCRIPTION("Hyper-V PCI");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-ixp4xx.c b/drivers/pci/controller/pci-ixp4xx.c
new file mode 100644
index 000000000..acb85e0d5
--- /dev/null
+++ b/drivers/pci/controller/pci-ixp4xx.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for Intel IXP4xx PCI host controller
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Based on the IXP4xx arch/arm/mach-ixp4xx/common-pci.c driver
+ * Copyright (C) 2002 Intel Corporation
+ * Copyright (C) 2003 Greg Ungerer <gerg@linux-m68k.org>
+ * Copyright (C) 2003-2004 MontaVista Software, Inc.
+ * Copyright (C) 2005 Deepak Saxena <dsaxena@plexity.net>
+ * Copyright (C) 2005 Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * TODO:
+ * - Test IO-space access
+ * - DMA support
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/bits.h>
+#include "../pci.h"
+
+/* Register offsets */
+#define IXP4XX_PCI_NP_AD		0x00
+#define IXP4XX_PCI_NP_CBE		0x04
+#define IXP4XX_PCI_NP_WDATA		0x08
+#define IXP4XX_PCI_NP_RDATA		0x0c
+#define IXP4XX_PCI_CRP_AD_CBE		0x10
+#define IXP4XX_PCI_CRP_WDATA		0x14
+#define IXP4XX_PCI_CRP_RDATA		0x18
+#define IXP4XX_PCI_CSR			0x1c
+#define IXP4XX_PCI_ISR			0x20
+#define IXP4XX_PCI_INTEN		0x24
+#define IXP4XX_PCI_DMACTRL		0x28
+#define IXP4XX_PCI_AHBMEMBASE		0x2c
+#define IXP4XX_PCI_AHBIOBASE		0x30
+#define IXP4XX_PCI_PCIMEMBASE		0x34
+#define IXP4XX_PCI_AHBDOORBELL		0x38
+#define IXP4XX_PCI_PCIDOORBELL		0x3c
+#define IXP4XX_PCI_ATPDMA0_AHBADDR	0x40
+#define IXP4XX_PCI_ATPDMA0_PCIADDR	0x44
+#define IXP4XX_PCI_ATPDMA0_LENADDR	0x48
+#define IXP4XX_PCI_ATPDMA1_AHBADDR	0x4c
+#define IXP4XX_PCI_ATPDMA1_PCIADDR	0x50
+#define IXP4XX_PCI_ATPDMA1_LENADDR	0x54
+
+/* CSR bit definitions */
+#define IXP4XX_PCI_CSR_HOST		BIT(0)
+#define IXP4XX_PCI_CSR_ARBEN		BIT(1)
+#define IXP4XX_PCI_CSR_ADS		BIT(2)
+#define IXP4XX_PCI_CSR_PDS		BIT(3)
+#define IXP4XX_PCI_CSR_ABE		BIT(4)
+#define IXP4XX_PCI_CSR_DBT		BIT(5)
+#define IXP4XX_PCI_CSR_ASE		BIT(8)
+#define IXP4XX_PCI_CSR_IC		BIT(15)
+#define IXP4XX_PCI_CSR_PRST		BIT(16)
+
+/* ISR (Interrupt status) Register bit definitions */
+#define IXP4XX_PCI_ISR_PSE		BIT(0)
+#define IXP4XX_PCI_ISR_PFE		BIT(1)
+#define IXP4XX_PCI_ISR_PPE		BIT(2)
+#define IXP4XX_PCI_ISR_AHBE		BIT(3)
+#define IXP4XX_PCI_ISR_APDC		BIT(4)
+#define IXP4XX_PCI_ISR_PADC		BIT(5)
+#define IXP4XX_PCI_ISR_ADB		BIT(6)
+#define IXP4XX_PCI_ISR_PDB		BIT(7)
+
+/* INTEN (Interrupt Enable) Register bit definitions */
+#define IXP4XX_PCI_INTEN_PSE		BIT(0)
+#define IXP4XX_PCI_INTEN_PFE		BIT(1)
+#define IXP4XX_PCI_INTEN_PPE		BIT(2)
+#define IXP4XX_PCI_INTEN_AHBE		BIT(3)
+#define IXP4XX_PCI_INTEN_APDC		BIT(4)
+#define IXP4XX_PCI_INTEN_PADC		BIT(5)
+#define IXP4XX_PCI_INTEN_ADB		BIT(6)
+#define IXP4XX_PCI_INTEN_PDB		BIT(7)
+
+/* Shift value for byte enable on NP cmd/byte enable register */
+#define IXP4XX_PCI_NP_CBE_BESL		4
+
+/* PCI commands supported by NP access unit */
+#define NP_CMD_IOREAD			0x2
+#define NP_CMD_IOWRITE			0x3
+#define NP_CMD_CONFIGREAD		0xa
+#define NP_CMD_CONFIGWRITE		0xb
+#define NP_CMD_MEMREAD			0x6
+#define	NP_CMD_MEMWRITE			0x7
+
+/* Constants for CRP access into local config space */
+#define CRP_AD_CBE_BESL         20
+#define CRP_AD_CBE_WRITE	0x00010000
+
+/* Special PCI configuration space registers for this controller */
+#define IXP4XX_PCI_RTOTTO		0x40
+
+struct ixp4xx_pci {
+	struct device *dev;
+	void __iomem *base;
+	bool errata_hammer;
+	bool host_mode;
+};
+
+/*
+ * The IXP4xx has a peculiar address bus that will change the
+ * byte order on SoC peripherals depending on whether the device
+ * operates in big-endian or little-endian mode. That means that
+ * readl() and writel() that always use little-endian access
+ * will not work for SoC peripherals such as the PCI controller
+ * when used in big-endian mode. The accesses to the individual
+ * PCI devices on the other hand, are always little-endian and
+ * can use readl() and writel().
+ *
+ * For local AHB bus access we need to use __raw_[readl|writel]()
+ * to make sure that we access the SoC devices in the CPU native
+ * endianness.
+ */
+static inline u32 ixp4xx_readl(struct ixp4xx_pci *p, u32 reg)
+{
+	return __raw_readl(p->base + reg);
+}
+
+static inline void ixp4xx_writel(struct ixp4xx_pci *p, u32 reg, u32 val)
+{
+	__raw_writel(val, p->base + reg);
+}
+
+static int ixp4xx_pci_check_master_abort(struct ixp4xx_pci *p)
+{
+	u32 isr = ixp4xx_readl(p, IXP4XX_PCI_ISR);
+
+	if (isr & IXP4XX_PCI_ISR_PFE) {
+		/* Make sure the master abort bit is reset */
+		ixp4xx_writel(p, IXP4XX_PCI_ISR, IXP4XX_PCI_ISR_PFE);
+		dev_dbg(p->dev, "master abort detected\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ixp4xx_pci_read_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data)
+{
+	ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr);
+
+	if (p->errata_hammer) {
+		int i;
+
+		/*
+		 * PCI workaround - only works if NP PCI space reads have
+		 * no side effects. Hammer the register and read twice 8
+		 * times. last one will be good.
+		 */
+		for (i = 0; i < 8; i++) {
+			ixp4xx_writel(p, IXP4XX_PCI_NP_CBE, cmd);
+			*data = ixp4xx_readl(p, IXP4XX_PCI_NP_RDATA);
+			*data = ixp4xx_readl(p, IXP4XX_PCI_NP_RDATA);
+		}
+	} else {
+		ixp4xx_writel(p, IXP4XX_PCI_NP_CBE, cmd);
+		*data = ixp4xx_readl(p, IXP4XX_PCI_NP_RDATA);
+	}
+
+	return ixp4xx_pci_check_master_abort(p);
+}
+
+static int ixp4xx_pci_write_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 data)
+{
+	ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr);
+
+	/* Set up the write */
+	ixp4xx_writel(p, IXP4XX_PCI_NP_CBE, cmd);
+
+	/* Execute the write by writing to NP_WDATA */
+	ixp4xx_writel(p, IXP4XX_PCI_NP_WDATA, data);
+
+	return ixp4xx_pci_check_master_abort(p);
+}
+
+static u32 ixp4xx_config_addr(u8 bus_num, u16 devfn, int where)
+{
+	/* Root bus is always 0 in this hardware */
+	if (bus_num == 0) {
+		/* type 0 */
+		return (PCI_CONF1_ADDRESS(0, 0, PCI_FUNC(devfn), where) &
+			~PCI_CONF1_ENABLE) | BIT(32-PCI_SLOT(devfn));
+	} else {
+		/* type 1 */
+		return (PCI_CONF1_ADDRESS(bus_num, PCI_SLOT(devfn),
+					  PCI_FUNC(devfn), where) &
+			~PCI_CONF1_ENABLE) | 1;
+	}
+}
+
+/*
+ * CRP functions are "Controller Configuration Port" accesses
+ * initiated from within this driver itself to read/write PCI
+ * control information in the config space.
+ */
+static u32 ixp4xx_crp_byte_lane_enable_bits(u32 n, int size)
+{
+	if (size == 1)
+		return (0xf & ~BIT(n)) << CRP_AD_CBE_BESL;
+	if (size == 2)
+		return (0xf & ~(BIT(n) | BIT(n+1))) << CRP_AD_CBE_BESL;
+	if (size == 4)
+		return 0;
+	return 0xffffffff;
+}
+
+static int ixp4xx_crp_read_config(struct ixp4xx_pci *p, int where, int size,
+				  u32 *value)
+{
+	u32 n, cmd, val;
+
+	n = where % 4;
+	cmd = where & ~3;
+
+	dev_dbg(p->dev, "%s from %d size %d cmd %08x\n",
+		__func__, where, size, cmd);
+
+	ixp4xx_writel(p, IXP4XX_PCI_CRP_AD_CBE, cmd);
+	val = ixp4xx_readl(p, IXP4XX_PCI_CRP_RDATA);
+
+	val >>= (8*n);
+	switch (size) {
+	case 1:
+		val &= U8_MAX;
+		dev_dbg(p->dev, "%s read byte %02x\n", __func__, val);
+		break;
+	case 2:
+		val &= U16_MAX;
+		dev_dbg(p->dev, "%s read word %04x\n", __func__, val);
+		break;
+	case 4:
+		val &= U32_MAX;
+		dev_dbg(p->dev, "%s read long %08x\n", __func__, val);
+		break;
+	default:
+		/* Should not happen */
+		dev_err(p->dev, "%s illegal size\n", __func__);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	*value = val;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int ixp4xx_crp_write_config(struct ixp4xx_pci *p, int where, int size,
+				   u32 value)
+{
+	u32 n, cmd, val;
+
+	n = where % 4;
+	cmd = ixp4xx_crp_byte_lane_enable_bits(n, size);
+	if (cmd == 0xffffffff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	cmd |= where & ~3;
+	cmd |= CRP_AD_CBE_WRITE;
+
+	val = value << (8*n);
+
+	dev_dbg(p->dev, "%s to %d size %d cmd %08x val %08x\n",
+		__func__, where, size, cmd, val);
+
+	ixp4xx_writel(p, IXP4XX_PCI_CRP_AD_CBE, cmd);
+	ixp4xx_writel(p, IXP4XX_PCI_CRP_WDATA, val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * Then follows the functions that read and write from the common PCI
+ * configuration space.
+ */
+static u32 ixp4xx_byte_lane_enable_bits(u32 n, int size)
+{
+	if (size == 1)
+		return (0xf & ~BIT(n)) << 4;
+	if (size == 2)
+		return (0xf & ~(BIT(n) | BIT(n+1))) << 4;
+	if (size == 4)
+		return 0;
+	return 0xffffffff;
+}
+
+static int ixp4xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *value)
+{
+	struct ixp4xx_pci *p = bus->sysdata;
+	u32 n, addr, val, cmd;
+	u8 bus_num = bus->number;
+	int ret;
+
+	*value = 0xffffffff;
+	n = where % 4;
+	cmd = ixp4xx_byte_lane_enable_bits(n, size);
+	if (cmd == 0xffffffff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = ixp4xx_config_addr(bus_num, devfn, where);
+	cmd |= NP_CMD_CONFIGREAD;
+	dev_dbg(p->dev, "read_config from %d size %d dev %d:%d:%d address: %08x cmd: %08x\n",
+		where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd);
+
+	ret = ixp4xx_pci_read_indirect(p, addr, cmd, &val);
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	val >>= (8*n);
+	switch (size) {
+	case 1:
+		val &= U8_MAX;
+		dev_dbg(p->dev, "%s read byte %02x\n", __func__, val);
+		break;
+	case 2:
+		val &= U16_MAX;
+		dev_dbg(p->dev, "%s read word %04x\n", __func__, val);
+		break;
+	case 4:
+		val &= U32_MAX;
+		dev_dbg(p->dev, "%s read long %08x\n", __func__, val);
+		break;
+	default:
+		/* Should not happen */
+		dev_err(p->dev, "%s illegal size\n", __func__);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	*value = val;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int ixp4xx_pci_write_config(struct pci_bus *bus,  unsigned int devfn,
+				   int where, int size, u32 value)
+{
+	struct ixp4xx_pci *p = bus->sysdata;
+	u32 n, addr, val, cmd;
+	u8 bus_num = bus->number;
+	int ret;
+
+	n = where % 4;
+	cmd = ixp4xx_byte_lane_enable_bits(n, size);
+	if (cmd == 0xffffffff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = ixp4xx_config_addr(bus_num, devfn, where);
+	cmd |= NP_CMD_CONFIGWRITE;
+	val = value << (8*n);
+
+	dev_dbg(p->dev, "write_config_byte %#x to %d size %d dev %d:%d:%d addr: %08x cmd %08x\n",
+		value, where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd);
+
+	ret = ixp4xx_pci_write_indirect(p, addr, cmd, val);
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops ixp4xx_pci_ops = {
+	.read = ixp4xx_pci_read_config,
+	.write = ixp4xx_pci_write_config,
+};
+
+static u32 ixp4xx_pci_addr_to_64mconf(phys_addr_t addr)
+{
+	u8 base;
+
+	base = ((addr & 0xff000000) >> 24);
+	return (base << 24) | ((base + 1) << 16)
+		| ((base + 2) << 8) | (base + 3);
+}
+
+static int ixp4xx_pci_parse_map_ranges(struct ixp4xx_pci *p)
+{
+	struct device *dev = p->dev;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(p);
+	struct resource_entry *win;
+	struct resource *res;
+	phys_addr_t addr;
+
+	win = resource_list_first_type(&bridge->windows, IORESOURCE_MEM);
+	if (win) {
+		u32 pcimembase;
+
+		res = win->res;
+		addr = res->start - win->offset;
+
+		if (res->flags & IORESOURCE_PREFETCH)
+			res->name = "IXP4xx PCI PRE-MEM";
+		else
+			res->name = "IXP4xx PCI NON-PRE-MEM";
+
+		dev_dbg(dev, "%s window %pR, bus addr %pa\n",
+			res->name, res, &addr);
+		if (resource_size(res) != SZ_64M) {
+			dev_err(dev, "memory range is not 64MB\n");
+			return -EINVAL;
+		}
+
+		pcimembase = ixp4xx_pci_addr_to_64mconf(addr);
+		/* Commit configuration */
+		ixp4xx_writel(p, IXP4XX_PCI_PCIMEMBASE, pcimembase);
+	} else {
+		dev_err(dev, "no AHB memory mapping defined\n");
+	}
+
+	win = resource_list_first_type(&bridge->windows, IORESOURCE_IO);
+	if (win) {
+		res = win->res;
+
+		addr = pci_pio_to_address(res->start);
+		if (addr & 0xff) {
+			dev_err(dev, "IO mem at uneven address: %pa\n", &addr);
+			return -EINVAL;
+		}
+
+		res->name = "IXP4xx PCI IO MEM";
+		/*
+		 * Setup I/O space location for PCI->AHB access, the
+		 * upper 24 bits of the address goes into the lower
+		 * 24 bits of this register.
+		 */
+		ixp4xx_writel(p, IXP4XX_PCI_AHBIOBASE, (addr >> 8));
+	} else {
+		dev_info(dev, "no IO space AHB memory mapping defined\n");
+	}
+
+	return 0;
+}
+
+static int ixp4xx_pci_parse_map_dma_ranges(struct ixp4xx_pci *p)
+{
+	struct device *dev = p->dev;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(p);
+	struct resource_entry *win;
+	struct resource *res;
+	phys_addr_t addr;
+	u32 ahbmembase;
+
+	win = resource_list_first_type(&bridge->dma_ranges, IORESOURCE_MEM);
+	if (win) {
+		res = win->res;
+		addr = res->start - win->offset;
+
+		if (resource_size(res) != SZ_64M) {
+			dev_err(dev, "DMA memory range is not 64MB\n");
+			return -EINVAL;
+		}
+
+		dev_dbg(dev, "DMA MEM BASE: %pa\n", &addr);
+		/*
+		 * 4 PCI-to-AHB windows of 16 MB each, write the 8 high bits
+		 * into each byte of the PCI_AHBMEMBASE register.
+		 */
+		ahbmembase = ixp4xx_pci_addr_to_64mconf(addr);
+		/* Commit AHB membase */
+		ixp4xx_writel(p, IXP4XX_PCI_AHBMEMBASE, ahbmembase);
+	} else {
+		dev_err(dev, "no DMA memory range defined\n");
+	}
+
+	return 0;
+}
+
+/* Only used to get context for abort handling */
+static struct ixp4xx_pci *ixp4xx_pci_abort_singleton;
+
+static int ixp4xx_pci_abort_handler(unsigned long addr, unsigned int fsr,
+				    struct pt_regs *regs)
+{
+	struct ixp4xx_pci *p = ixp4xx_pci_abort_singleton;
+	u32 isr, status;
+	int ret;
+
+	isr = ixp4xx_readl(p, IXP4XX_PCI_ISR);
+	ret = ixp4xx_crp_read_config(p, PCI_STATUS, 2, &status);
+	if (ret) {
+		dev_err(p->dev, "unable to read abort status\n");
+		return -EINVAL;
+	}
+
+	dev_err(p->dev,
+		"PCI: abort_handler addr = %#lx, isr = %#x, status = %#x\n",
+		addr, isr, status);
+
+	/* Make sure the Master Abort bit is reset */
+	ixp4xx_writel(p, IXP4XX_PCI_ISR, IXP4XX_PCI_ISR_PFE);
+	status |= PCI_STATUS_REC_MASTER_ABORT;
+	ret = ixp4xx_crp_write_config(p, PCI_STATUS, 2, status);
+	if (ret)
+		dev_err(p->dev, "unable to clear abort status bit\n");
+
+	/*
+	 * If it was an imprecise abort, then we need to correct the
+	 * return address to be _after_ the instruction.
+	 */
+	if (fsr & (1 << 10)) {
+		dev_err(p->dev, "imprecise abort\n");
+		regs->ARM_pc += 4;
+	}
+
+	return 0;
+}
+
+static int __init ixp4xx_pci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct ixp4xx_pci *p;
+	struct pci_host_bridge *host;
+	int ret;
+	u32 val;
+	phys_addr_t addr;
+	u32 basereg[4] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		PCI_BASE_ADDRESS_2,
+		PCI_BASE_ADDRESS_3,
+	};
+	int i;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*p));
+	if (!host)
+		return -ENOMEM;
+
+	host->ops = &ixp4xx_pci_ops;
+	p = pci_host_bridge_priv(host);
+	host->sysdata = p;
+	p->dev = dev;
+	dev_set_drvdata(dev, p);
+
+	/*
+	 * Set up quirk for erratic behaviour in the 42x variant
+	 * when accessing config space.
+	 */
+	if (of_device_is_compatible(np, "intel,ixp42x-pci")) {
+		p->errata_hammer = true;
+		dev_info(dev, "activate hammering errata\n");
+	}
+
+	p->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(p->base))
+		return PTR_ERR(p->base);
+
+	val = ixp4xx_readl(p, IXP4XX_PCI_CSR);
+	p->host_mode = !!(val & IXP4XX_PCI_CSR_HOST);
+	dev_info(dev, "controller is in %s mode\n",
+		 p->host_mode ? "host" : "option");
+
+	/* Hook in our fault handler for PCI errors */
+	ixp4xx_pci_abort_singleton = p;
+	hook_fault_code(16+6, ixp4xx_pci_abort_handler, SIGBUS, 0,
+			"imprecise external abort");
+
+	ret = ixp4xx_pci_parse_map_ranges(p);
+	if (ret)
+		return ret;
+
+	ret = ixp4xx_pci_parse_map_dma_ranges(p);
+	if (ret)
+		return ret;
+
+	/* This is only configured in host mode */
+	if (p->host_mode) {
+		addr = __pa(PAGE_OFFSET);
+		/* This is a noop (0x00) but explains what is going on */
+		addr |= PCI_BASE_ADDRESS_SPACE_MEMORY;
+
+		for (i = 0; i < 4; i++) {
+			/* Write this directly into the config space */
+			ret = ixp4xx_crp_write_config(p, basereg[i], 4, addr);
+			if (ret)
+				dev_err(dev, "failed to set up PCI_BASE_ADDRESS_%d\n", i);
+			else
+				dev_info(dev, "set PCI_BASE_ADDR_%d to %pa\n", i, &addr);
+			addr += SZ_16M;
+		}
+
+		/*
+		 * Enable CSR window at 64 MiB to allow PCI masters to continue
+		 * prefetching past the 64 MiB boundary, if all AHB to PCI
+		 * windows are consecutive.
+		 */
+		ret = ixp4xx_crp_write_config(p, PCI_BASE_ADDRESS_4, 4, addr);
+		if (ret)
+			dev_err(dev, "failed to set up PCI_BASE_ADDRESS_4\n");
+		else
+			dev_info(dev, "set PCI_BASE_ADDR_4 to %pa\n", &addr);
+
+		/*
+		 * Put the IO memory window at the very end of physical memory
+		 * at 0xfffffc00. This is when the system is trying to access IO
+		 * memory over AHB.
+		 */
+		addr = 0xfffffc00;
+		addr |= PCI_BASE_ADDRESS_SPACE_IO;
+		ret = ixp4xx_crp_write_config(p, PCI_BASE_ADDRESS_5, 4, addr);
+		if (ret)
+			dev_err(dev, "failed to set up PCI_BASE_ADDRESS_5\n");
+		else
+			dev_info(dev, "set PCI_BASE_ADDR_5 to %pa\n", &addr);
+
+		/*
+		 * Retry timeout to 0x80
+		 * Transfer ready timeout to 0xff
+		 */
+		ret = ixp4xx_crp_write_config(p, IXP4XX_PCI_RTOTTO, 4,
+					      0x000080ff);
+		if (ret)
+			dev_err(dev, "failed to set up TRDY limit\n");
+		else
+			dev_info(dev, "set TRDY limit to 0x80ff\n");
+	}
+
+	/* Clear interrupts */
+	val = IXP4XX_PCI_ISR_PSE | IXP4XX_PCI_ISR_PFE | IXP4XX_PCI_ISR_PPE | IXP4XX_PCI_ISR_AHBE;
+	ixp4xx_writel(p, IXP4XX_PCI_ISR, val);
+
+	/*
+	 * Set Initialize Complete in PCI Control Register: allow IXP4XX to
+	 * generate PCI configuration cycles. Specify that the AHB bus is
+	 * operating in big-endian mode. Set up byte lane swapping between
+	 * little-endian PCI and the big-endian AHB bus.
+	 */
+	val = IXP4XX_PCI_CSR_IC | IXP4XX_PCI_CSR_ABE;
+	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		val |= (IXP4XX_PCI_CSR_PDS | IXP4XX_PCI_CSR_ADS);
+	ixp4xx_writel(p, IXP4XX_PCI_CSR, val);
+
+	ret = ixp4xx_crp_write_config(p, PCI_COMMAND, 2, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
+	if (ret)
+		dev_err(dev, "unable to initialize master and command memory\n");
+	else
+		dev_info(dev, "initialized as master\n");
+
+	pci_host_probe(host);
+
+	return 0;
+}
+
+static const struct of_device_id ixp4xx_pci_of_match[] = {
+	{
+		.compatible = "intel,ixp42x-pci",
+	},
+	{
+		.compatible = "intel,ixp43x-pci",
+	},
+	{},
+};
+
+/*
+ * This driver needs to be a builtin module with suppressed bind
+ * attributes since the probe() is initializing a hard exception
+ * handler and this can only be done from __init-tagged code
+ * sections. This module cannot be removed and inserted at all.
+ */
+static struct platform_driver ixp4xx_pci_driver = {
+	.driver = {
+		.name = "ixp4xx-pci",
+		.suppress_bind_attrs = true,
+		.of_match_table = ixp4xx_pci_of_match,
+	},
+};
+builtin_platform_driver_probe(ixp4xx_pci_driver, ixp4xx_pci_probe);
diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c
new file mode 100644
index 000000000..8b34ccff0
--- /dev/null
+++ b/drivers/pci/controller/pci-loongson.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Loongson PCI Host Controller Driver
+ *
+ * Copyright (C) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ */
+
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+
+#include "../pci.h"
+
+/* Device IDs */
+#define DEV_LS2K_PCIE_PORT0	0x1a05
+#define DEV_LS7A_PCIE_PORT0	0x7a09
+#define DEV_LS7A_PCIE_PORT1	0x7a19
+#define DEV_LS7A_PCIE_PORT2	0x7a29
+#define DEV_LS7A_PCIE_PORT3	0x7a39
+#define DEV_LS7A_PCIE_PORT4	0x7a49
+#define DEV_LS7A_PCIE_PORT5	0x7a59
+#define DEV_LS7A_PCIE_PORT6	0x7a69
+
+#define DEV_LS2K_APB	0x7a02
+#define DEV_LS7A_GMAC	0x7a03
+#define DEV_LS7A_DC1	0x7a06
+#define DEV_LS7A_LPC	0x7a0c
+#define DEV_LS7A_AHCI	0x7a08
+#define DEV_LS7A_CONF	0x7a10
+#define DEV_LS7A_GNET	0x7a13
+#define DEV_LS7A_EHCI	0x7a14
+#define DEV_LS7A_DC2	0x7a36
+#define DEV_LS7A_HDMI	0x7a37
+
+#define FLAG_CFG0	BIT(0)
+#define FLAG_CFG1	BIT(1)
+#define FLAG_DEV_FIX	BIT(2)
+#define FLAG_DEV_HIDDEN	BIT(3)
+
+struct loongson_pci_data {
+	u32 flags;
+	struct pci_ops *ops;
+};
+
+struct loongson_pci {
+	void __iomem *cfg0_base;
+	void __iomem *cfg1_base;
+	struct platform_device *pdev;
+	const struct loongson_pci_data *data;
+};
+
+/* Fixup wrong class code in PCIe bridges */
+static void bridge_class_quirk(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT0, bridge_class_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT1, bridge_class_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT2, bridge_class_quirk);
+
+static void system_bus_quirk(struct pci_dev *pdev)
+{
+	/*
+	 * The address space consumed by these devices is outside the
+	 * resources of the host bridge.
+	 */
+	pdev->mmio_always_on = 1;
+	pdev->non_compliant_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS2K_APB, system_bus_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_CONF, system_bus_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_LPC, system_bus_quirk);
+
+/*
+ * Some Loongson PCIe ports have hardware limitations on their Maximum Read
+ * Request Size. They can't handle anything larger than this.  Sane
+ * firmware will set proper MRRS at boot, so we only need no_inc_mrrs for
+ * bridges. However, some MIPS Loongson firmware doesn't set MRRS properly,
+ * so we have to enforce maximum safe MRRS, which is 256 bytes.
+ */
+#ifdef CONFIG_MIPS
+static void loongson_set_min_mrrs_quirk(struct pci_dev *pdev)
+{
+	struct pci_bus *bus = pdev->bus;
+	struct pci_dev *bridge;
+	static const struct pci_device_id bridge_devids[] = {
+		{ PCI_VDEVICE(LOONGSON, DEV_LS2K_PCIE_PORT0) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT0) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT1) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT2) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT3) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT4) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT5) },
+		{ PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT6) },
+		{ 0, },
+	};
+
+	/* look for the matching bridge */
+	while (!pci_is_root_bus(bus)) {
+		bridge = bus->self;
+		bus = bus->parent;
+
+		if (pci_match_id(bridge_devids, bridge)) {
+			if (pcie_get_readrq(pdev) > 256) {
+				pci_info(pdev, "limiting MRRS to 256\n");
+				pcie_set_readrq(pdev, 256);
+			}
+			break;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_set_min_mrrs_quirk);
+#endif
+
+static void loongson_mrrs_quirk(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+
+	bridge->no_inc_mrrs = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS2K_PCIE_PORT0, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT0, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT1, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT2, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT3, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT4, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT5, loongson_mrrs_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_PCIE_PORT6, loongson_mrrs_quirk);
+
+static void loongson_pci_pin_quirk(struct pci_dev *pdev)
+{
+	pdev->pin = 1 + (PCI_FUNC(pdev->devfn) & 3);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_DC1, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_DC2, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_GMAC, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_AHCI, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_EHCI, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_GNET, loongson_pci_pin_quirk);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_HDMI, loongson_pci_pin_quirk);
+
+static struct loongson_pci *pci_bus_to_loongson_pci(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg;
+
+	if (acpi_disabled)
+		return (struct loongson_pci *)(bus->sysdata);
+
+	cfg = bus->sysdata;
+	return (struct loongson_pci *)(cfg->priv);
+}
+
+static void __iomem *cfg0_map(struct loongson_pci *priv, struct pci_bus *bus,
+			      unsigned int devfn, int where)
+{
+	unsigned long addroff = 0x0;
+	unsigned char busnum = bus->number;
+
+	if (!pci_is_root_bus(bus)) {
+		addroff |= BIT(24); /* Type 1 Access */
+		addroff |= (busnum << 16);
+	}
+	addroff |= (devfn << 8) | where;
+	return priv->cfg0_base + addroff;
+}
+
+static void __iomem *cfg1_map(struct loongson_pci *priv, struct pci_bus *bus,
+			      unsigned int devfn, int where)
+{
+	unsigned long addroff = 0x0;
+	unsigned char busnum = bus->number;
+
+	if (!pci_is_root_bus(bus)) {
+		addroff |= BIT(28); /* Type 1 Access */
+		addroff |= (busnum << 16);
+	}
+	addroff |= (devfn << 8) | (where & 0xff) | ((where & 0xf00) << 16);
+	return priv->cfg1_base + addroff;
+}
+
+static bool pdev_may_exist(struct pci_bus *bus, unsigned int device,
+			   unsigned int function)
+{
+	return !(pci_is_root_bus(bus) &&
+		(device >= 9 && device <= 20) && (function > 0));
+}
+
+static void __iomem *pci_loongson_map_bus(struct pci_bus *bus,
+					  unsigned int devfn, int where)
+{
+	unsigned int device = PCI_SLOT(devfn);
+	unsigned int function = PCI_FUNC(devfn);
+	struct loongson_pci *priv = pci_bus_to_loongson_pci(bus);
+
+	/*
+	 * Do not read more than one device on the bus other than
+	 * the host bus.
+	 */
+	if ((priv->data->flags & FLAG_DEV_FIX) && bus->self) {
+		if (!pci_is_root_bus(bus) && (device > 0))
+			return NULL;
+	}
+
+	/* Don't access non-existent devices */
+	if (priv->data->flags & FLAG_DEV_HIDDEN) {
+		if (!pdev_may_exist(bus, device, function))
+			return NULL;
+	}
+
+	/* CFG0 can only access standard space */
+	if (where < PCI_CFG_SPACE_SIZE && priv->cfg0_base)
+		return cfg0_map(priv, bus, devfn, where);
+
+	/* CFG1 can access extended space */
+	if (where < PCI_CFG_SPACE_EXP_SIZE && priv->cfg1_base)
+		return cfg1_map(priv, bus, devfn, where);
+
+	return NULL;
+}
+
+#ifdef CONFIG_OF
+
+static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int irq;
+	u8 val;
+
+	irq = of_irq_parse_and_map_pci(dev, slot, pin);
+	if (irq > 0)
+		return irq;
+
+	/* Care i8259 legacy systems */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &val);
+	/* i8259 only have 15 IRQs */
+	if (val > 15)
+		return 0;
+
+	return val;
+}
+
+/* LS2K/LS7A accept 8/16/32-bit PCI config operations */
+static struct pci_ops loongson_pci_ops = {
+	.map_bus = pci_loongson_map_bus,
+	.read	= pci_generic_config_read,
+	.write	= pci_generic_config_write,
+};
+
+/* RS780/SR5690 only accept 32-bit PCI config operations */
+static struct pci_ops loongson_pci_ops32 = {
+	.map_bus = pci_loongson_map_bus,
+	.read	= pci_generic_config_read32,
+	.write	= pci_generic_config_write32,
+};
+
+static const struct loongson_pci_data ls2k_pci_data = {
+	.flags = FLAG_CFG1 | FLAG_DEV_FIX | FLAG_DEV_HIDDEN,
+	.ops = &loongson_pci_ops,
+};
+
+static const struct loongson_pci_data ls7a_pci_data = {
+	.flags = FLAG_CFG1 | FLAG_DEV_FIX | FLAG_DEV_HIDDEN,
+	.ops = &loongson_pci_ops,
+};
+
+static const struct loongson_pci_data rs780e_pci_data = {
+	.flags = FLAG_CFG0,
+	.ops = &loongson_pci_ops32,
+};
+
+static const struct of_device_id loongson_pci_of_match[] = {
+	{ .compatible = "loongson,ls2k-pci",
+		.data = &ls2k_pci_data, },
+	{ .compatible = "loongson,ls7a-pci",
+		.data = &ls7a_pci_data, },
+	{ .compatible = "loongson,rs780e-pci",
+		.data = &rs780e_pci_data, },
+	{}
+};
+
+static int loongson_pci_probe(struct platform_device *pdev)
+{
+	struct loongson_pci *priv;
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct pci_host_bridge *bridge;
+	struct resource *regs;
+
+	if (!node)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*priv));
+	if (!bridge)
+		return -ENODEV;
+
+	priv = pci_host_bridge_priv(bridge);
+	priv->pdev = pdev;
+	priv->data = of_device_get_match_data(dev);
+
+	if (priv->data->flags & FLAG_CFG0) {
+		regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!regs)
+			dev_err(dev, "missing mem resources for cfg0\n");
+		else {
+			priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs);
+			if (IS_ERR(priv->cfg0_base))
+				return PTR_ERR(priv->cfg0_base);
+		}
+	}
+
+	if (priv->data->flags & FLAG_CFG1) {
+		regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		if (!regs)
+			dev_info(dev, "missing mem resource for cfg1\n");
+		else {
+			priv->cfg1_base = devm_pci_remap_cfg_resource(dev, regs);
+			if (IS_ERR(priv->cfg1_base))
+				priv->cfg1_base = NULL;
+		}
+	}
+
+	bridge->sysdata = priv;
+	bridge->ops = priv->data->ops;
+	bridge->map_irq = loongson_map_irq;
+
+	return pci_host_probe(bridge);
+}
+
+static struct platform_driver loongson_pci_driver = {
+	.driver = {
+		.name = "loongson-pci",
+		.of_match_table = loongson_pci_of_match,
+	},
+	.probe = loongson_pci_probe,
+};
+builtin_platform_driver(loongson_pci_driver);
+
+#endif
+
+#ifdef CONFIG_ACPI
+
+static int loongson_pci_ecam_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct loongson_pci *priv;
+	struct loongson_pci_data *data;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	cfg->priv = priv;
+	data->flags = FLAG_CFG1 | FLAG_DEV_HIDDEN;
+	priv->data = data;
+	priv->cfg1_base = cfg->win - (cfg->busr.start << 16);
+
+	return 0;
+}
+
+const struct pci_ecam_ops loongson_pci_ecam_ops = {
+	.bus_shift = 16,
+	.init	   = loongson_pci_ecam_init,
+	.pci_ops   = {
+		.map_bus = pci_loongson_map_bus,
+		.read	 = pci_generic_config_read,
+		.write	 = pci_generic_config_write,
+	}
+};
+
+#endif
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
new file mode 100644
index 000000000..29fe09c99
--- /dev/null
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -0,0 +1,1737 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe driver for Marvell Armada 370 and Armada XP SoCs
+ *
+ * Author: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/init.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mbus.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+
+#include "../pci.h"
+#include "../pci-bridge-emul.h"
+
+/*
+ * PCIe unit register offsets.
+ */
+#define PCIE_DEV_ID_OFF		0x0000
+#define PCIE_CMD_OFF		0x0004
+#define PCIE_DEV_REV_OFF	0x0008
+#define PCIE_BAR_LO_OFF(n)	(0x0010 + ((n) << 3))
+#define PCIE_BAR_HI_OFF(n)	(0x0014 + ((n) << 3))
+#define PCIE_SSDEV_ID_OFF	0x002c
+#define PCIE_CAP_PCIEXP		0x0060
+#define PCIE_CAP_PCIERR_OFF	0x0100
+#define PCIE_BAR_CTRL_OFF(n)	(0x1804 + (((n) - 1) * 4))
+#define PCIE_WIN04_CTRL_OFF(n)	(0x1820 + ((n) << 4))
+#define PCIE_WIN04_BASE_OFF(n)	(0x1824 + ((n) << 4))
+#define PCIE_WIN04_REMAP_OFF(n)	(0x182c + ((n) << 4))
+#define PCIE_WIN5_CTRL_OFF	0x1880
+#define PCIE_WIN5_BASE_OFF	0x1884
+#define PCIE_WIN5_REMAP_OFF	0x188c
+#define PCIE_CONF_ADDR_OFF	0x18f8
+#define  PCIE_CONF_ADDR_EN		0x80000000
+#define  PCIE_CONF_REG(r)		((((r) & 0xf00) << 16) | ((r) & 0xfc))
+#define  PCIE_CONF_BUS(b)		(((b) & 0xff) << 16)
+#define  PCIE_CONF_DEV(d)		(((d) & 0x1f) << 11)
+#define  PCIE_CONF_FUNC(f)		(((f) & 0x7) << 8)
+#define  PCIE_CONF_ADDR(bus, devfn, where) \
+	(PCIE_CONF_BUS(bus) | PCIE_CONF_DEV(PCI_SLOT(devfn))    | \
+	 PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where) | \
+	 PCIE_CONF_ADDR_EN)
+#define PCIE_CONF_DATA_OFF	0x18fc
+#define PCIE_INT_CAUSE_OFF	0x1900
+#define PCIE_INT_UNMASK_OFF	0x1910
+#define  PCIE_INT_INTX(i)		BIT(24+i)
+#define  PCIE_INT_PM_PME		BIT(28)
+#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
+#define PCIE_CTRL_OFF		0x1a00
+#define  PCIE_CTRL_X1_MODE		0x0001
+#define  PCIE_CTRL_RC_MODE		BIT(1)
+#define  PCIE_CTRL_MASTER_HOT_RESET	BIT(24)
+#define PCIE_STAT_OFF		0x1a04
+#define  PCIE_STAT_BUS                  0xff00
+#define  PCIE_STAT_DEV                  0x1f0000
+#define  PCIE_STAT_LINK_DOWN		BIT(0)
+#define PCIE_SSPL_OFF		0x1a0c
+#define  PCIE_SSPL_VALUE_SHIFT		0
+#define  PCIE_SSPL_VALUE_MASK		GENMASK(7, 0)
+#define  PCIE_SSPL_SCALE_SHIFT		8
+#define  PCIE_SSPL_SCALE_MASK		GENMASK(9, 8)
+#define  PCIE_SSPL_ENABLE		BIT(16)
+#define PCIE_RC_RTSTA		0x1a14
+#define PCIE_DEBUG_CTRL         0x1a60
+#define  PCIE_DEBUG_SOFT_RESET		BIT(20)
+
+struct mvebu_pcie_port;
+
+/* Structure representing all PCIe interfaces */
+struct mvebu_pcie {
+	struct platform_device *pdev;
+	struct mvebu_pcie_port *ports;
+	struct resource io;
+	struct resource realio;
+	struct resource mem;
+	int nports;
+};
+
+struct mvebu_pcie_window {
+	phys_addr_t base;
+	phys_addr_t remap;
+	size_t size;
+};
+
+/* Structure representing one PCIe interface */
+struct mvebu_pcie_port {
+	char *name;
+	void __iomem *base;
+	u32 port;
+	u32 lane;
+	bool is_x4;
+	int devfn;
+	unsigned int mem_target;
+	unsigned int mem_attr;
+	unsigned int io_target;
+	unsigned int io_attr;
+	struct clk *clk;
+	struct gpio_desc *reset_gpio;
+	char *reset_name;
+	struct pci_bridge_emul bridge;
+	struct device_node *dn;
+	struct mvebu_pcie *pcie;
+	struct mvebu_pcie_window memwin;
+	struct mvebu_pcie_window iowin;
+	u32 saved_pcie_stat;
+	struct resource regs;
+	u8 slot_power_limit_value;
+	u8 slot_power_limit_scale;
+	struct irq_domain *intx_irq_domain;
+	raw_spinlock_t irq_lock;
+	int intx_irq;
+};
+
+static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
+{
+	writel(val, port->base + reg);
+}
+
+static inline u32 mvebu_readl(struct mvebu_pcie_port *port, u32 reg)
+{
+	return readl(port->base + reg);
+}
+
+static inline bool mvebu_has_ioport(struct mvebu_pcie_port *port)
+{
+	return port->io_target != -1 && port->io_attr != -1;
+}
+
+static bool mvebu_pcie_link_up(struct mvebu_pcie_port *port)
+{
+	return !(mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_LINK_DOWN);
+}
+
+static u8 mvebu_pcie_get_local_bus_nr(struct mvebu_pcie_port *port)
+{
+	return (mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_BUS) >> 8;
+}
+
+static void mvebu_pcie_set_local_bus_nr(struct mvebu_pcie_port *port, int nr)
+{
+	u32 stat;
+
+	stat = mvebu_readl(port, PCIE_STAT_OFF);
+	stat &= ~PCIE_STAT_BUS;
+	stat |= nr << 8;
+	mvebu_writel(port, stat, PCIE_STAT_OFF);
+}
+
+static void mvebu_pcie_set_local_dev_nr(struct mvebu_pcie_port *port, int nr)
+{
+	u32 stat;
+
+	stat = mvebu_readl(port, PCIE_STAT_OFF);
+	stat &= ~PCIE_STAT_DEV;
+	stat |= nr << 16;
+	mvebu_writel(port, stat, PCIE_STAT_OFF);
+}
+
+static void mvebu_pcie_disable_wins(struct mvebu_pcie_port *port)
+{
+	int i;
+
+	mvebu_writel(port, 0, PCIE_BAR_LO_OFF(0));
+	mvebu_writel(port, 0, PCIE_BAR_HI_OFF(0));
+
+	for (i = 1; i < 3; i++) {
+		mvebu_writel(port, 0, PCIE_BAR_CTRL_OFF(i));
+		mvebu_writel(port, 0, PCIE_BAR_LO_OFF(i));
+		mvebu_writel(port, 0, PCIE_BAR_HI_OFF(i));
+	}
+
+	for (i = 0; i < 5; i++) {
+		mvebu_writel(port, 0, PCIE_WIN04_CTRL_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_BASE_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_REMAP_OFF(i));
+	}
+
+	mvebu_writel(port, 0, PCIE_WIN5_CTRL_OFF);
+	mvebu_writel(port, 0, PCIE_WIN5_BASE_OFF);
+	mvebu_writel(port, 0, PCIE_WIN5_REMAP_OFF);
+}
+
+/*
+ * Setup PCIE BARs and Address Decode Wins:
+ * BAR[0] -> internal registers (needed for MSI)
+ * BAR[1] -> covers all DRAM banks
+ * BAR[2] -> Disabled
+ * WIN[0-3] -> DRAM bank[0-3]
+ */
+static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
+{
+	const struct mbus_dram_target_info *dram;
+	u32 size;
+	int i;
+
+	dram = mv_mbus_dram_info();
+
+	/* First, disable and clear BARs and windows. */
+	mvebu_pcie_disable_wins(port);
+
+	/* Setup windows for DDR banks.  Count total DDR size on the fly. */
+	size = 0;
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		mvebu_writel(port, cs->base & 0xffff0000,
+			     PCIE_WIN04_BASE_OFF(i));
+		mvebu_writel(port, 0, PCIE_WIN04_REMAP_OFF(i));
+		mvebu_writel(port,
+			     ((cs->size - 1) & 0xffff0000) |
+			     (cs->mbus_attr << 8) |
+			     (dram->mbus_dram_target_id << 4) | 1,
+			     PCIE_WIN04_CTRL_OFF(i));
+
+		size += cs->size;
+	}
+
+	/* Round up 'size' to the nearest power of two. */
+	if ((size & (size - 1)) != 0)
+		size = 1 << fls(size);
+
+	/* Setup BAR[1] to all DRAM banks. */
+	mvebu_writel(port, dram->cs[0].base, PCIE_BAR_LO_OFF(1));
+	mvebu_writel(port, 0, PCIE_BAR_HI_OFF(1));
+	mvebu_writel(port, ((size - 1) & 0xffff0000) | 1,
+		     PCIE_BAR_CTRL_OFF(1));
+
+	/*
+	 * Point BAR[0] to the device's internal registers.
+	 */
+	mvebu_writel(port, round_down(port->regs.start, SZ_1M), PCIE_BAR_LO_OFF(0));
+	mvebu_writel(port, 0, PCIE_BAR_HI_OFF(0));
+}
+
+static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
+{
+	u32 ctrl, lnkcap, cmd, dev_rev, unmask, sspl;
+
+	/* Setup PCIe controller to Root Complex mode. */
+	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
+	ctrl |= PCIE_CTRL_RC_MODE;
+	mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
+
+	/*
+	 * Set Maximum Link Width to X1 or X4 in Root Port's PCIe Link
+	 * Capability register. This register is defined by PCIe specification
+	 * as read-only but this mvebu controller has it as read-write and must
+	 * be set to number of SerDes PCIe lanes (1 or 4). If this register is
+	 * not set correctly then link with endpoint card is not established.
+	 */
+	lnkcap = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCAP);
+	lnkcap &= ~PCI_EXP_LNKCAP_MLW;
+	lnkcap |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, port->is_x4 ? 4 : 1);
+	mvebu_writel(port, lnkcap, PCIE_CAP_PCIEXP + PCI_EXP_LNKCAP);
+
+	/* Disable Root Bridge I/O space, memory space and bus mastering. */
+	cmd = mvebu_readl(port, PCIE_CMD_OFF);
+	cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	mvebu_writel(port, cmd, PCIE_CMD_OFF);
+
+	/*
+	 * Change Class Code of PCI Bridge device to PCI Bridge (0x6004)
+	 * because default value is Memory controller (0x5080).
+	 *
+	 * Note that this mvebu PCI Bridge does not have compliant Type 1
+	 * Configuration Space. Header Type is reported as Type 0 and it
+	 * has format of Type 0 config space.
+	 *
+	 * Moreover Type 0 BAR registers (ranges 0x10 - 0x28 and 0x30 - 0x34)
+	 * have the same format in Marvell's specification as in PCIe
+	 * specification, but their meaning is totally different and they do
+	 * different things: they are aliased into internal mvebu registers
+	 * (e.g. PCIE_BAR_LO_OFF) and these should not be changed or
+	 * reconfigured by pci device drivers.
+	 *
+	 * Therefore driver uses emulation of PCI Bridge which emulates
+	 * access to configuration space via internal mvebu registers or
+	 * emulated configuration buffer. Driver access these PCI Bridge
+	 * directly for simplification, but these registers can be accessed
+	 * also via standard mvebu way for accessing PCI config space.
+	 */
+	dev_rev = mvebu_readl(port, PCIE_DEV_REV_OFF);
+	dev_rev &= ~0xffffff00;
+	dev_rev |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+	mvebu_writel(port, dev_rev, PCIE_DEV_REV_OFF);
+
+	/* Point PCIe unit MBUS decode windows to DRAM space. */
+	mvebu_pcie_setup_wins(port);
+
+	/*
+	 * Program Root Port to automatically send Set_Slot_Power_Limit
+	 * PCIe Message when changing status from Dl_Down to Dl_Up and valid
+	 * slot power limit was specified.
+	 */
+	sspl = mvebu_readl(port, PCIE_SSPL_OFF);
+	sspl &= ~(PCIE_SSPL_VALUE_MASK | PCIE_SSPL_SCALE_MASK | PCIE_SSPL_ENABLE);
+	if (port->slot_power_limit_value) {
+		sspl |= port->slot_power_limit_value << PCIE_SSPL_VALUE_SHIFT;
+		sspl |= port->slot_power_limit_scale << PCIE_SSPL_SCALE_SHIFT;
+		sspl |= PCIE_SSPL_ENABLE;
+	}
+	mvebu_writel(port, sspl, PCIE_SSPL_OFF);
+
+	/* Mask all interrupt sources. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+	/* Clear all interrupt causes. */
+	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+	/* Check if "intx" interrupt was specified in DT. */
+	if (port->intx_irq > 0)
+		return;
+
+	/*
+	 * Fallback code when "intx" interrupt was not specified in DT:
+	 * Unmask all legacy INTx interrupts as driver does not provide a way
+	 * for masking and unmasking of individual legacy INTx interrupts.
+	 * Legacy INTx are reported via one shared GIC source and therefore
+	 * kernel cannot distinguish which individual legacy INTx was triggered.
+	 * These interrupts are shared, so it should not cause any issue. Just
+	 * performance penalty as every PCIe interrupt handler needs to be
+	 * called when some interrupt is triggered.
+	 */
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
+		  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+}
+
+static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
+						    struct pci_bus *bus,
+						    int devfn);
+
+static int mvebu_pcie_child_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+				    int size, u32 *val)
+{
+	struct mvebu_pcie *pcie = bus->sysdata;
+	struct mvebu_pcie_port *port;
+	void __iomem *conf_data;
+
+	port = mvebu_pcie_find_port(pcie, bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!mvebu_pcie_link_up(port))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	conf_data = port->base + PCIE_CONF_DATA_OFF;
+
+	mvebu_writel(port, PCIE_CONF_ADDR(bus->number, devfn, where),
+		     PCIE_CONF_ADDR_OFF);
+
+	switch (size) {
+	case 1:
+		*val = readb_relaxed(conf_data + (where & 3));
+		break;
+	case 2:
+		*val = readw_relaxed(conf_data + (where & 2));
+		break;
+	case 4:
+		*val = readl_relaxed(conf_data);
+		break;
+	default:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mvebu_pcie_child_wr_conf(struct pci_bus *bus, u32 devfn,
+				    int where, int size, u32 val)
+{
+	struct mvebu_pcie *pcie = bus->sysdata;
+	struct mvebu_pcie_port *port;
+	void __iomem *conf_data;
+
+	port = mvebu_pcie_find_port(pcie, bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!mvebu_pcie_link_up(port))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	conf_data = port->base + PCIE_CONF_DATA_OFF;
+
+	mvebu_writel(port, PCIE_CONF_ADDR(bus->number, devfn, where),
+		     PCIE_CONF_ADDR_OFF);
+
+	switch (size) {
+	case 1:
+		writeb(val, conf_data + (where & 3));
+		break;
+	case 2:
+		writew(val, conf_data + (where & 2));
+		break;
+	case 4:
+		writel(val, conf_data);
+		break;
+	default:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops mvebu_pcie_child_ops = {
+	.read = mvebu_pcie_child_rd_conf,
+	.write = mvebu_pcie_child_wr_conf,
+};
+
+/*
+ * Remove windows, starting from the largest ones to the smallest
+ * ones.
+ */
+static void mvebu_pcie_del_windows(struct mvebu_pcie_port *port,
+				   phys_addr_t base, size_t size)
+{
+	while (size) {
+		size_t sz = 1 << (fls(size) - 1);
+
+		mvebu_mbus_del_window(base, sz);
+		base += sz;
+		size -= sz;
+	}
+}
+
+/*
+ * MBus windows can only have a power of two size, but PCI BARs do not
+ * have this constraint. Therefore, we have to split the PCI BAR into
+ * areas each having a power of two size. We start from the largest
+ * one (i.e highest order bit set in the size).
+ */
+static int mvebu_pcie_add_windows(struct mvebu_pcie_port *port,
+				   unsigned int target, unsigned int attribute,
+				   phys_addr_t base, size_t size,
+				   phys_addr_t remap)
+{
+	size_t size_mapped = 0;
+
+	while (size) {
+		size_t sz = 1 << (fls(size) - 1);
+		int ret;
+
+		ret = mvebu_mbus_add_window_remap_by_id(target, attribute, base,
+							sz, remap);
+		if (ret) {
+			phys_addr_t end = base + sz - 1;
+
+			dev_err(&port->pcie->pdev->dev,
+				"Could not create MBus window at [mem %pa-%pa]: %d\n",
+				&base, &end, ret);
+			mvebu_pcie_del_windows(port, base - size_mapped,
+					       size_mapped);
+			return ret;
+		}
+
+		size -= sz;
+		size_mapped += sz;
+		base += sz;
+		if (remap != MVEBU_MBUS_NO_REMAP)
+			remap += sz;
+	}
+
+	return 0;
+}
+
+static int mvebu_pcie_set_window(struct mvebu_pcie_port *port,
+				  unsigned int target, unsigned int attribute,
+				  const struct mvebu_pcie_window *desired,
+				  struct mvebu_pcie_window *cur)
+{
+	int ret;
+
+	if (desired->base == cur->base && desired->remap == cur->remap &&
+	    desired->size == cur->size)
+		return 0;
+
+	if (cur->size != 0) {
+		mvebu_pcie_del_windows(port, cur->base, cur->size);
+		cur->size = 0;
+		cur->base = 0;
+
+		/*
+		 * If something tries to change the window while it is enabled
+		 * the change will not be done atomically. That would be
+		 * difficult to do in the general case.
+		 */
+	}
+
+	if (desired->size == 0)
+		return 0;
+
+	ret = mvebu_pcie_add_windows(port, target, attribute, desired->base,
+				     desired->size, desired->remap);
+	if (ret) {
+		cur->size = 0;
+		cur->base = 0;
+		return ret;
+	}
+
+	*cur = *desired;
+	return 0;
+}
+
+static int mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
+{
+	struct mvebu_pcie_window desired = {};
+	struct pci_bridge_emul_conf *conf = &port->bridge.conf;
+
+	/* Are the new iobase/iolimit values invalid? */
+	if (conf->iolimit < conf->iobase ||
+	    le16_to_cpu(conf->iolimitupper) < le16_to_cpu(conf->iobaseupper))
+		return mvebu_pcie_set_window(port, port->io_target, port->io_attr,
+					     &desired, &port->iowin);
+
+	/*
+	 * We read the PCI-to-PCI bridge emulated registers, and
+	 * calculate the base address and size of the address decoding
+	 * window to setup, according to the PCI-to-PCI bridge
+	 * specifications. iobase is the bus address, port->iowin_base
+	 * is the CPU address.
+	 */
+	desired.remap = ((conf->iobase & 0xF0) << 8) |
+			(le16_to_cpu(conf->iobaseupper) << 16);
+	desired.base = port->pcie->io.start + desired.remap;
+	desired.size = ((0xFFF | ((conf->iolimit & 0xF0) << 8) |
+			 (le16_to_cpu(conf->iolimitupper) << 16)) -
+			desired.remap) +
+		       1;
+
+	return mvebu_pcie_set_window(port, port->io_target, port->io_attr, &desired,
+				     &port->iowin);
+}
+
+static int mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
+{
+	struct mvebu_pcie_window desired = {.remap = MVEBU_MBUS_NO_REMAP};
+	struct pci_bridge_emul_conf *conf = &port->bridge.conf;
+
+	/* Are the new membase/memlimit values invalid? */
+	if (le16_to_cpu(conf->memlimit) < le16_to_cpu(conf->membase))
+		return mvebu_pcie_set_window(port, port->mem_target, port->mem_attr,
+					     &desired, &port->memwin);
+
+	/*
+	 * We read the PCI-to-PCI bridge emulated registers, and
+	 * calculate the base address and size of the address decoding
+	 * window to setup, according to the PCI-to-PCI bridge
+	 * specifications.
+	 */
+	desired.base = ((le16_to_cpu(conf->membase) & 0xFFF0) << 16);
+	desired.size = (((le16_to_cpu(conf->memlimit) & 0xFFF0) << 16) | 0xFFFFF) -
+		       desired.base + 1;
+
+	return mvebu_pcie_set_window(port, port->mem_target, port->mem_attr, &desired,
+				     &port->memwin);
+}
+
+static pci_bridge_emul_read_status_t
+mvebu_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge,
+				     int reg, u32 *value)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		*value = mvebu_readl(port, PCIE_CMD_OFF);
+		break;
+
+	case PCI_PRIMARY_BUS: {
+		/*
+		 * From the whole 32bit register we support reading from HW only
+		 * secondary bus number which is mvebu local bus number.
+		 * Other bits are retrieved only from emulated config buffer.
+		 */
+		__le32 *cfgspace = (__le32 *)&bridge->conf;
+		u32 val = le32_to_cpu(cfgspace[PCI_PRIMARY_BUS / 4]);
+		val &= ~0xff00;
+		val |= mvebu_pcie_get_local_bus_nr(port) << 8;
+		*value = val;
+		break;
+	}
+
+	case PCI_INTERRUPT_LINE: {
+		/*
+		 * From the whole 32bit register we support reading from HW only
+		 * one bit: PCI_BRIDGE_CTL_BUS_RESET.
+		 * Other bits are retrieved only from emulated config buffer.
+		 */
+		__le32 *cfgspace = (__le32 *)&bridge->conf;
+		u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]);
+		if (mvebu_readl(port, PCIE_CTRL_OFF) & PCIE_CTRL_MASTER_HOT_RESET)
+			val |= PCI_BRIDGE_CTL_BUS_RESET << 16;
+		else
+			val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16);
+		*value = val;
+		break;
+	}
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+
+	return PCI_BRIDGE_EMUL_HANDLED;
+}
+
+static pci_bridge_emul_read_status_t
+mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+				     int reg, u32 *value)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+
+	switch (reg) {
+	case PCI_EXP_DEVCAP:
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCAP);
+		break;
+
+	case PCI_EXP_DEVCTL:
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
+		break;
+
+	case PCI_EXP_LNKCAP:
+		/*
+		 * PCIe requires that the Clock Power Management capability bit
+		 * is hard-wired to zero for downstream ports but HW returns 1.
+		 * Additionally enable Data Link Layer Link Active Reporting
+		 * Capable bit as DL_Active indication is provided too.
+		 */
+		*value = (mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCAP) &
+			  ~PCI_EXP_LNKCAP_CLKPM) | PCI_EXP_LNKCAP_DLLLARC;
+		break;
+
+	case PCI_EXP_LNKCTL:
+		/* DL_Active indication is provided via PCIE_STAT_OFF */
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL) |
+			 (mvebu_pcie_link_up(port) ?
+			  (PCI_EXP_LNKSTA_DLLLA << 16) : 0);
+		break;
+
+	case PCI_EXP_SLTCTL: {
+		u16 slotctl = le16_to_cpu(bridge->pcie_conf.slotctl);
+		u16 slotsta = le16_to_cpu(bridge->pcie_conf.slotsta);
+		u32 val = 0;
+		/*
+		 * When slot power limit was not specified in DT then
+		 * ASPL_DISABLE bit is stored only in emulated config space.
+		 * Otherwise reflect status of PCIE_SSPL_ENABLE bit in HW.
+		 */
+		if (!port->slot_power_limit_value)
+			val |= slotctl & PCI_EXP_SLTCTL_ASPL_DISABLE;
+		else if (!(mvebu_readl(port, PCIE_SSPL_OFF) & PCIE_SSPL_ENABLE))
+			val |= PCI_EXP_SLTCTL_ASPL_DISABLE;
+		/* This callback is 32-bit and in high bits is slot status. */
+		val |= slotsta << 16;
+		*value = val;
+		break;
+	}
+
+	case PCI_EXP_RTSTA:
+		*value = mvebu_readl(port, PCIE_RC_RTSTA);
+		break;
+
+	case PCI_EXP_DEVCAP2:
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCAP2);
+		break;
+
+	case PCI_EXP_DEVCTL2:
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
+		break;
+
+	case PCI_EXP_LNKCTL2:
+		*value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
+		break;
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+
+	return PCI_BRIDGE_EMUL_HANDLED;
+}
+
+static pci_bridge_emul_read_status_t
+mvebu_pci_bridge_emul_ext_conf_read(struct pci_bridge_emul *bridge,
+				    int reg, u32 *value)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+
+	switch (reg) {
+	case 0:
+	case PCI_ERR_UNCOR_STATUS:
+	case PCI_ERR_UNCOR_MASK:
+	case PCI_ERR_UNCOR_SEVER:
+	case PCI_ERR_COR_STATUS:
+	case PCI_ERR_COR_MASK:
+	case PCI_ERR_CAP:
+	case PCI_ERR_HEADER_LOG+0:
+	case PCI_ERR_HEADER_LOG+4:
+	case PCI_ERR_HEADER_LOG+8:
+	case PCI_ERR_HEADER_LOG+12:
+	case PCI_ERR_ROOT_COMMAND:
+	case PCI_ERR_ROOT_STATUS:
+	case PCI_ERR_ROOT_ERR_SRC:
+		*value = mvebu_readl(port, PCIE_CAP_PCIERR_OFF + reg);
+		break;
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+
+	return PCI_BRIDGE_EMUL_HANDLED;
+}
+
+static void
+mvebu_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
+				      int reg, u32 old, u32 new, u32 mask)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+	struct pci_bridge_emul_conf *conf = &bridge->conf;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		mvebu_writel(port, new, PCIE_CMD_OFF);
+		break;
+
+	case PCI_IO_BASE:
+		if ((mask & 0xffff) && mvebu_has_ioport(port) &&
+		    mvebu_pcie_handle_iobase_change(port)) {
+			/* On error disable IO range */
+			conf->iobase &= ~0xf0;
+			conf->iolimit &= ~0xf0;
+			conf->iobase |= 0xf0;
+			conf->iobaseupper = cpu_to_le16(0x0000);
+			conf->iolimitupper = cpu_to_le16(0x0000);
+		}
+		break;
+
+	case PCI_MEMORY_BASE:
+		if (mvebu_pcie_handle_membase_change(port)) {
+			/* On error disable mem range */
+			conf->membase = cpu_to_le16(le16_to_cpu(conf->membase) & ~0xfff0);
+			conf->memlimit = cpu_to_le16(le16_to_cpu(conf->memlimit) & ~0xfff0);
+			conf->membase = cpu_to_le16(le16_to_cpu(conf->membase) | 0xfff0);
+		}
+		break;
+
+	case PCI_IO_BASE_UPPER16:
+		if (mvebu_has_ioport(port) &&
+		    mvebu_pcie_handle_iobase_change(port)) {
+			/* On error disable IO range */
+			conf->iobase &= ~0xf0;
+			conf->iolimit &= ~0xf0;
+			conf->iobase |= 0xf0;
+			conf->iobaseupper = cpu_to_le16(0x0000);
+			conf->iolimitupper = cpu_to_le16(0x0000);
+		}
+		break;
+
+	case PCI_PRIMARY_BUS:
+		if (mask & 0xff00)
+			mvebu_pcie_set_local_bus_nr(port, conf->secondary_bus);
+		break;
+
+	case PCI_INTERRUPT_LINE:
+		if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) {
+			u32 ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
+			if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16))
+				ctrl |= PCIE_CTRL_MASTER_HOT_RESET;
+			else
+				ctrl &= ~PCIE_CTRL_MASTER_HOT_RESET;
+			mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void
+mvebu_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+				      int reg, u32 old, u32 new, u32 mask)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+
+	switch (reg) {
+	case PCI_EXP_DEVCTL:
+		mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
+		break;
+
+	case PCI_EXP_LNKCTL:
+		/*
+		 * PCIe requires that the Enable Clock Power Management bit
+		 * is hard-wired to zero for downstream ports but HW allows
+		 * to change it.
+		 */
+		new &= ~PCI_EXP_LNKCTL_CLKREQ_EN;
+
+		mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL);
+		break;
+
+	case PCI_EXP_SLTCTL:
+		/*
+		 * Allow to change PCIE_SSPL_ENABLE bit only when slot power
+		 * limit was specified in DT and configured into HW.
+		 */
+		if ((mask & PCI_EXP_SLTCTL_ASPL_DISABLE) &&
+		    port->slot_power_limit_value) {
+			u32 sspl = mvebu_readl(port, PCIE_SSPL_OFF);
+			if (new & PCI_EXP_SLTCTL_ASPL_DISABLE)
+				sspl &= ~PCIE_SSPL_ENABLE;
+			else
+				sspl |= PCIE_SSPL_ENABLE;
+			mvebu_writel(port, sspl, PCIE_SSPL_OFF);
+		}
+		break;
+
+	case PCI_EXP_RTSTA:
+		/*
+		 * PME Status bit in Root Status Register (PCIE_RC_RTSTA)
+		 * is read-only and can be cleared only by writing 0b to the
+		 * Interrupt Cause RW0C register (PCIE_INT_CAUSE_OFF). So
+		 * clear PME via Interrupt Cause.
+		 */
+		if (new & PCI_EXP_RTSTA_PME)
+			mvebu_writel(port, ~PCIE_INT_PM_PME, PCIE_INT_CAUSE_OFF);
+		break;
+
+	case PCI_EXP_DEVCTL2:
+		mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
+		break;
+
+	case PCI_EXP_LNKCTL2:
+		mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void
+mvebu_pci_bridge_emul_ext_conf_write(struct pci_bridge_emul *bridge,
+				     int reg, u32 old, u32 new, u32 mask)
+{
+	struct mvebu_pcie_port *port = bridge->data;
+
+	switch (reg) {
+	/* These are W1C registers, so clear other bits */
+	case PCI_ERR_UNCOR_STATUS:
+	case PCI_ERR_COR_STATUS:
+	case PCI_ERR_ROOT_STATUS:
+		new &= mask;
+		fallthrough;
+
+	case PCI_ERR_UNCOR_MASK:
+	case PCI_ERR_UNCOR_SEVER:
+	case PCI_ERR_COR_MASK:
+	case PCI_ERR_CAP:
+	case PCI_ERR_HEADER_LOG+0:
+	case PCI_ERR_HEADER_LOG+4:
+	case PCI_ERR_HEADER_LOG+8:
+	case PCI_ERR_HEADER_LOG+12:
+	case PCI_ERR_ROOT_COMMAND:
+	case PCI_ERR_ROOT_ERR_SRC:
+		mvebu_writel(port, new, PCIE_CAP_PCIERR_OFF + reg);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static const struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
+	.read_base = mvebu_pci_bridge_emul_base_conf_read,
+	.write_base = mvebu_pci_bridge_emul_base_conf_write,
+	.read_pcie = mvebu_pci_bridge_emul_pcie_conf_read,
+	.write_pcie = mvebu_pci_bridge_emul_pcie_conf_write,
+	.read_ext = mvebu_pci_bridge_emul_ext_conf_read,
+	.write_ext = mvebu_pci_bridge_emul_ext_conf_write,
+};
+
+/*
+ * Initialize the configuration space of the PCI-to-PCI bridge
+ * associated with the given PCIe interface.
+ */
+static int mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
+{
+	unsigned int bridge_flags = PCI_BRIDGE_EMUL_NO_PREFMEM_FORWARD;
+	struct pci_bridge_emul *bridge = &port->bridge;
+	u32 dev_id = mvebu_readl(port, PCIE_DEV_ID_OFF);
+	u32 dev_rev = mvebu_readl(port, PCIE_DEV_REV_OFF);
+	u32 ssdev_id = mvebu_readl(port, PCIE_SSDEV_ID_OFF);
+	u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP);
+	u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS);
+
+	bridge->conf.vendor = cpu_to_le16(dev_id & 0xffff);
+	bridge->conf.device = cpu_to_le16(dev_id >> 16);
+	bridge->conf.class_revision = cpu_to_le32(dev_rev & 0xff);
+
+	if (mvebu_has_ioport(port)) {
+		/* We support 32 bits I/O addressing */
+		bridge->conf.iobase = PCI_IO_RANGE_TYPE_32;
+		bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32;
+	} else {
+		bridge_flags |= PCI_BRIDGE_EMUL_NO_IO_FORWARD;
+	}
+
+	/*
+	 * Older mvebu hardware provides PCIe Capability structure only in
+	 * version 1. New hardware provides it in version 2.
+	 * Enable slot support which is emulated.
+	 */
+	bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver | PCI_EXP_FLAGS_SLOT);
+
+	/*
+	 * Set Presence Detect State bit permanently as there is no support for
+	 * unplugging PCIe card from the slot. Assume that PCIe card is always
+	 * connected in slot.
+	 *
+	 * Set physical slot number to port+1 as mvebu ports are indexed from
+	 * zero and zero value is reserved for ports within the same silicon
+	 * as Root Port which is not mvebu case.
+	 *
+	 * Also set correct slot power limit.
+	 */
+	bridge->pcie_conf.slotcap = cpu_to_le32(
+		FIELD_PREP(PCI_EXP_SLTCAP_SPLV, port->slot_power_limit_value) |
+		FIELD_PREP(PCI_EXP_SLTCAP_SPLS, port->slot_power_limit_scale) |
+		FIELD_PREP(PCI_EXP_SLTCAP_PSN, port->port+1));
+	bridge->pcie_conf.slotsta = cpu_to_le16(PCI_EXP_SLTSTA_PDS);
+
+	bridge->subsystem_vendor_id = ssdev_id & 0xffff;
+	bridge->subsystem_id = ssdev_id >> 16;
+	bridge->has_pcie = true;
+	bridge->pcie_start = PCIE_CAP_PCIEXP;
+	bridge->data = port;
+	bridge->ops = &mvebu_pci_bridge_emul_ops;
+
+	return pci_bridge_emul_init(bridge, bridge_flags);
+}
+
+static inline struct mvebu_pcie *sys_to_pcie(struct pci_sys_data *sys)
+{
+	return sys->private_data;
+}
+
+static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
+						    struct pci_bus *bus,
+						    int devfn)
+{
+	int i;
+
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+
+		if (!port->base)
+			continue;
+
+		if (bus->number == 0 && port->devfn == devfn)
+			return port;
+		if (bus->number != 0 &&
+		    bus->number >= port->bridge.conf.secondary_bus &&
+		    bus->number <= port->bridge.conf.subordinate_bus)
+			return port;
+	}
+
+	return NULL;
+}
+
+/* PCI configuration space write function */
+static int mvebu_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+			      int where, int size, u32 val)
+{
+	struct mvebu_pcie *pcie = bus->sysdata;
+	struct mvebu_pcie_port *port;
+
+	port = mvebu_pcie_find_port(pcie, bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return pci_bridge_emul_conf_write(&port->bridge, where, size, val);
+}
+
+/* PCI configuration space read function */
+static int mvebu_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+			      int size, u32 *val)
+{
+	struct mvebu_pcie *pcie = bus->sysdata;
+	struct mvebu_pcie_port *port;
+
+	port = mvebu_pcie_find_port(pcie, bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return pci_bridge_emul_conf_read(&port->bridge, where, size, val);
+}
+
+static struct pci_ops mvebu_pcie_ops = {
+	.read = mvebu_pcie_rd_conf,
+	.write = mvebu_pcie_wr_conf,
+};
+
+static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask &= ~PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
+{
+	struct mvebu_pcie_port *port = d->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 unmask;
+
+	raw_spin_lock_irqsave(&port->irq_lock, flags);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	unmask |= PCIE_INT_INTX(hwirq);
+	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
+	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static struct irq_chip intx_irq_chip = {
+	.name = "mvebu-INTx",
+	.irq_mask = mvebu_pcie_intx_irq_mask,
+	.irq_unmask = mvebu_pcie_intx_irq_unmask,
+};
+
+static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
+				   unsigned int virq, irq_hw_number_t hwirq)
+{
+	struct mvebu_pcie_port *port = h->host_data;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(virq, port);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
+	.map = mvebu_pcie_intx_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
+{
+	struct device *dev = &port->pcie->pdev->dev;
+	struct device_node *pcie_intc_node;
+
+	raw_spin_lock_init(&port->irq_lock);
+
+	pcie_intc_node = of_get_next_child(port->dn, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
+		return -ENODEV;
+	}
+
+	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						      &mvebu_pcie_intx_irq_domain_ops,
+						      port);
+	of_node_put(pcie_intc_node);
+	if (!port->intx_irq_domain) {
+		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mvebu_pcie_irq_handler(struct irq_desc *desc)
+{
+	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct device *dev = &port->pcie->pdev->dev;
+	u32 cause, unmask, status;
+	int i;
+
+	chained_irq_enter(chip, desc);
+
+	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
+	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
+	status = cause & unmask;
+
+	/* Process legacy INTx interrupts */
+	for (i = 0; i < PCI_NUM_INTX; i++) {
+		if (!(status & PCIE_INT_INTX(i)))
+			continue;
+
+		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
+			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	/* Interrupt support on mvebu emulated bridges is not implemented yet */
+	if (dev->bus->number == 0)
+		return 0; /* Proper return code 0 == NO_IRQ */
+
+	return of_irq_parse_and_map_pci(dev, slot, pin);
+}
+
+static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
+						 const struct resource *res,
+						 resource_size_t start,
+						 resource_size_t size,
+						 resource_size_t align)
+{
+	if (dev->bus->number != 0)
+		return start;
+
+	/*
+	 * On the PCI-to-PCI bridge side, the I/O windows must have at
+	 * least a 64 KB size and the memory windows must have at
+	 * least a 1 MB size. Moreover, MBus windows need to have a
+	 * base address aligned on their size, and their size must be
+	 * a power of two. This means that if the BAR doesn't have a
+	 * power of two size, several MBus windows will actually be
+	 * created. We need to ensure that the biggest MBus window
+	 * (which will be the first one) is aligned on its size, which
+	 * explains the rounddown_pow_of_two() being done here.
+	 */
+	if (res->flags & IORESOURCE_IO)
+		return round_up(start, max_t(resource_size_t, SZ_64K,
+					     rounddown_pow_of_two(size)));
+	else if (res->flags & IORESOURCE_MEM)
+		return round_up(start, max_t(resource_size_t, SZ_1M,
+					     rounddown_pow_of_two(size)));
+	else
+		return start;
+}
+
+static void __iomem *mvebu_pcie_map_registers(struct platform_device *pdev,
+					      struct device_node *np,
+					      struct mvebu_pcie_port *port)
+{
+	int ret = 0;
+
+	ret = of_address_to_resource(np, 0, &port->regs);
+	if (ret)
+		return (void __iomem *)ERR_PTR(ret);
+
+	return devm_ioremap_resource(&pdev->dev, &port->regs);
+}
+
+#define DT_FLAGS_TO_TYPE(flags)       (((flags) >> 24) & 0x03)
+#define    DT_TYPE_IO                 0x1
+#define    DT_TYPE_MEM32              0x2
+#define DT_CPUADDR_TO_TARGET(cpuaddr) (((cpuaddr) >> 56) & 0xFF)
+#define DT_CPUADDR_TO_ATTR(cpuaddr)   (((cpuaddr) >> 48) & 0xFF)
+
+static int mvebu_get_tgt_attr(struct device_node *np, int devfn,
+			      unsigned long type,
+			      unsigned int *tgt,
+			      unsigned int *attr)
+{
+	const int na = 3, ns = 2;
+	const __be32 *range;
+	int rlen, nranges, rangesz, pna, i;
+
+	*tgt = -1;
+	*attr = -1;
+
+	range = of_get_property(np, "ranges", &rlen);
+	if (!range)
+		return -EINVAL;
+
+	pna = of_n_addr_cells(np);
+	rangesz = pna + na + ns;
+	nranges = rlen / sizeof(__be32) / rangesz;
+
+	for (i = 0; i < nranges; i++, range += rangesz) {
+		u32 flags = of_read_number(range, 1);
+		u32 slot = of_read_number(range + 1, 1);
+		u64 cpuaddr = of_read_number(range + na, pna);
+		unsigned long rtype;
+
+		if (DT_FLAGS_TO_TYPE(flags) == DT_TYPE_IO)
+			rtype = IORESOURCE_IO;
+		else if (DT_FLAGS_TO_TYPE(flags) == DT_TYPE_MEM32)
+			rtype = IORESOURCE_MEM;
+		else
+			continue;
+
+		if (slot == PCI_SLOT(devfn) && type == rtype) {
+			*tgt = DT_CPUADDR_TO_TARGET(cpuaddr);
+			*attr = DT_CPUADDR_TO_ATTR(cpuaddr);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int mvebu_pcie_suspend(struct device *dev)
+{
+	struct mvebu_pcie *pcie;
+	int i;
+
+	pcie = dev_get_drvdata(dev);
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = pcie->ports + i;
+		if (!port->base)
+			continue;
+		port->saved_pcie_stat = mvebu_readl(port, PCIE_STAT_OFF);
+	}
+
+	return 0;
+}
+
+static int mvebu_pcie_resume(struct device *dev)
+{
+	struct mvebu_pcie *pcie;
+	int i;
+
+	pcie = dev_get_drvdata(dev);
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = pcie->ports + i;
+		if (!port->base)
+			continue;
+		mvebu_writel(port, port->saved_pcie_stat, PCIE_STAT_OFF);
+		mvebu_pcie_setup_hw(port);
+	}
+
+	return 0;
+}
+
+static void mvebu_pcie_port_clk_put(void *data)
+{
+	struct mvebu_pcie_port *port = data;
+
+	clk_put(port->clk);
+}
+
+static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
+	struct mvebu_pcie_port *port, struct device_node *child)
+{
+	struct device *dev = &pcie->pdev->dev;
+	u32 slot_power_limit;
+	int ret;
+	u32 num_lanes;
+
+	port->pcie = pcie;
+
+	if (of_property_read_u32(child, "marvell,pcie-port", &port->port)) {
+		dev_warn(dev, "ignoring %pOF, missing pcie-port property\n",
+			 child);
+		goto skip;
+	}
+
+	if (of_property_read_u32(child, "marvell,pcie-lane", &port->lane))
+		port->lane = 0;
+
+	if (!of_property_read_u32(child, "num-lanes", &num_lanes) && num_lanes == 4)
+		port->is_x4 = true;
+
+	port->name = devm_kasprintf(dev, GFP_KERNEL, "pcie%d.%d", port->port,
+				    port->lane);
+	if (!port->name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	port->devfn = of_pci_get_devfn(child);
+	if (port->devfn < 0)
+		goto skip;
+	if (PCI_FUNC(port->devfn) != 0) {
+		dev_err(dev, "%s: invalid function number, must be zero\n",
+			port->name);
+		goto skip;
+	}
+
+	ret = mvebu_get_tgt_attr(dev->of_node, port->devfn, IORESOURCE_MEM,
+				 &port->mem_target, &port->mem_attr);
+	if (ret < 0) {
+		dev_err(dev, "%s: cannot get tgt/attr for mem window\n",
+			port->name);
+		goto skip;
+	}
+
+	if (resource_size(&pcie->io) != 0) {
+		mvebu_get_tgt_attr(dev->of_node, port->devfn, IORESOURCE_IO,
+				   &port->io_target, &port->io_attr);
+	} else {
+		port->io_target = -1;
+		port->io_attr = -1;
+	}
+
+	/*
+	 * Old DT bindings do not contain "intx" interrupt
+	 * so do not fail probing driver when interrupt does not exist.
+	 */
+	port->intx_irq = of_irq_get_byname(child, "intx");
+	if (port->intx_irq == -EPROBE_DEFER) {
+		ret = port->intx_irq;
+		goto err;
+	}
+	if (port->intx_irq <= 0) {
+		dev_warn(dev, "%s: legacy INTx interrupts cannot be masked individually, "
+			      "%pOF does not contain intx interrupt\n",
+			 port->name, child);
+	}
+
+	port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset",
+					  port->name);
+	if (!port->reset_name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	port->reset_gpio = devm_fwnode_gpiod_get(dev, of_fwnode_handle(child),
+						 "reset", GPIOD_OUT_HIGH,
+						 port->name);
+	ret = PTR_ERR_OR_ZERO(port->reset_gpio);
+	if (ret) {
+		if (ret != -ENOENT)
+			goto err;
+		/* reset gpio is optional */
+		port->reset_gpio = NULL;
+		devm_kfree(dev, port->reset_name);
+		port->reset_name = NULL;
+	}
+
+	slot_power_limit = of_pci_get_slot_power_limit(child,
+				&port->slot_power_limit_value,
+				&port->slot_power_limit_scale);
+	if (slot_power_limit)
+		dev_info(dev, "%s: Slot power limit %u.%uW\n",
+			 port->name,
+			 slot_power_limit / 1000,
+			 (slot_power_limit / 100) % 10);
+
+	port->clk = of_clk_get_by_name(child, NULL);
+	if (IS_ERR(port->clk)) {
+		dev_err(dev, "%s: cannot get clock\n", port->name);
+		goto skip;
+	}
+
+	ret = devm_add_action(dev, mvebu_pcie_port_clk_put, port);
+	if (ret < 0) {
+		clk_put(port->clk);
+		goto err;
+	}
+
+	return 1;
+
+skip:
+	ret = 0;
+
+	/* In the case of skipping, we need to free these */
+	devm_kfree(dev, port->reset_name);
+	port->reset_name = NULL;
+	devm_kfree(dev, port->name);
+	port->name = NULL;
+
+err:
+	return ret;
+}
+
+/*
+ * Power up a PCIe port.  PCIe requires the refclk to be stable for 100µs
+ * prior to releasing PERST.  See table 2-4 in section 2.6.2 AC Specifications
+ * of the PCI Express Card Electromechanical Specification, 1.1.
+ */
+static int mvebu_pcie_powerup(struct mvebu_pcie_port *port)
+{
+	int ret;
+
+	ret = clk_prepare_enable(port->clk);
+	if (ret < 0)
+		return ret;
+
+	if (port->reset_gpio) {
+		u32 reset_udelay = PCI_PM_D3COLD_WAIT * 1000;
+
+		of_property_read_u32(port->dn, "reset-delay-us",
+				     &reset_udelay);
+
+		udelay(100);
+
+		gpiod_set_value_cansleep(port->reset_gpio, 0);
+		msleep(reset_udelay / 1000);
+	}
+
+	return 0;
+}
+
+/*
+ * Power down a PCIe port.  Strictly, PCIe requires us to place the card
+ * in D3hot state before asserting PERST#.
+ */
+static void mvebu_pcie_powerdown(struct mvebu_pcie_port *port)
+{
+	gpiod_set_value_cansleep(port->reset_gpio, 1);
+
+	clk_disable_unprepare(port->clk);
+}
+
+/*
+ * devm_of_pci_get_host_bridge_resources() only sets up translateable resources,
+ * so we need extra resource setup parsing our special DT properties encoding
+ * the MEM and IO apertures.
+ */
+static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	int ret;
+
+	/* Get the PCIe memory aperture */
+	mvebu_mbus_get_pcie_mem_aperture(&pcie->mem);
+	if (resource_size(&pcie->mem) == 0) {
+		dev_err(dev, "invalid memory aperture size\n");
+		return -EINVAL;
+	}
+
+	pcie->mem.name = "PCI MEM";
+	pci_add_resource(&bridge->windows, &pcie->mem);
+	ret = devm_request_resource(dev, &iomem_resource, &pcie->mem);
+	if (ret)
+		return ret;
+
+	/* Get the PCIe IO aperture */
+	mvebu_mbus_get_pcie_io_aperture(&pcie->io);
+
+	if (resource_size(&pcie->io) != 0) {
+		pcie->realio.flags = pcie->io.flags;
+		pcie->realio.start = PCIBIOS_MIN_IO;
+		pcie->realio.end = min_t(resource_size_t,
+					 IO_SPACE_LIMIT - SZ_64K,
+					 resource_size(&pcie->io) - 1);
+		pcie->realio.name = "PCI I/O";
+
+		ret = devm_pci_remap_iospace(dev, &pcie->realio, pcie->io.start);
+		if (ret)
+			return ret;
+
+		pci_add_resource(&bridge->windows, &pcie->realio);
+		ret = devm_request_resource(dev, &ioport_resource, &pcie->realio);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mvebu_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mvebu_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	struct device_node *np = dev->of_node;
+	struct device_node *child;
+	int num, i, ret;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct mvebu_pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->pdev = pdev;
+	platform_set_drvdata(pdev, pcie);
+
+	ret = mvebu_pcie_parse_request_resources(pcie);
+	if (ret)
+		return ret;
+
+	num = of_get_available_child_count(np);
+
+	pcie->ports = devm_kcalloc(dev, num, sizeof(*pcie->ports), GFP_KERNEL);
+	if (!pcie->ports)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_available_child_of_node(np, child) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+
+		ret = mvebu_pcie_parse_port(pcie, port, child);
+		if (ret < 0) {
+			of_node_put(child);
+			return ret;
+		} else if (ret == 0) {
+			continue;
+		}
+
+		port->dn = child;
+		i++;
+	}
+	pcie->nports = i;
+
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
+
+		child = port->dn;
+		if (!child)
+			continue;
+
+		ret = mvebu_pcie_powerup(port);
+		if (ret < 0)
+			continue;
+
+		port->base = mvebu_pcie_map_registers(pdev, child, port);
+		if (IS_ERR(port->base)) {
+			dev_err(dev, "%s: cannot map registers\n", port->name);
+			port->base = NULL;
+			mvebu_pcie_powerdown(port);
+			continue;
+		}
+
+		ret = mvebu_pci_bridge_emul_init(port);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot init emulated bridge\n",
+				port->name);
+			devm_iounmap(dev, port->base);
+			port->base = NULL;
+			mvebu_pcie_powerdown(port);
+			continue;
+		}
+
+		if (irq > 0) {
+			ret = mvebu_pcie_init_irq_domain(port);
+			if (ret) {
+				dev_err(dev, "%s: cannot init irq domain\n",
+					port->name);
+				pci_bridge_emul_cleanup(&port->bridge);
+				devm_iounmap(dev, port->base);
+				port->base = NULL;
+				mvebu_pcie_powerdown(port);
+				continue;
+			}
+			irq_set_chained_handler_and_data(irq,
+							 mvebu_pcie_irq_handler,
+							 port);
+		}
+
+		/*
+		 * PCIe topology exported by mvebu hw is quite complicated. In
+		 * reality has something like N fully independent host bridges
+		 * where each host bridge has one PCIe Root Port (which acts as
+		 * PCI Bridge device). Each host bridge has its own independent
+		 * internal registers, independent access to PCI config space,
+		 * independent interrupt lines, independent window and memory
+		 * access configuration. But additionally there is some kind of
+		 * peer-to-peer support between PCIe devices behind different
+		 * host bridges limited just to forwarding of memory and I/O
+		 * transactions (forwarding of error messages and config cycles
+		 * is not supported). So we could say there are N independent
+		 * PCIe Root Complexes.
+		 *
+		 * For this kind of setup DT should have been structured into
+		 * N independent PCIe controllers / host bridges. But instead
+		 * structure in past was defined to put PCIe Root Ports of all
+		 * host bridges into one bus zero, like in classic multi-port
+		 * Root Complex setup with just one host bridge.
+		 *
+		 * This means that pci-mvebu.c driver provides "virtual" bus 0
+		 * on which registers all PCIe Root Ports (PCI Bridge devices)
+		 * specified in DT by their BDF addresses and virtually routes
+		 * PCI config access of each PCI bridge device to specific PCIe
+		 * host bridge.
+		 *
+		 * Normally PCI Bridge should choose between Type 0 and Type 1
+		 * config requests based on primary and secondary bus numbers
+		 * configured on the bridge itself. But because mvebu PCI Bridge
+		 * does not have registers for primary and secondary bus numbers
+		 * in its config space, it determinates type of config requests
+		 * via its own custom way.
+		 *
+		 * There are two options how mvebu determinate type of config
+		 * request.
+		 *
+		 * 1. If Secondary Bus Number Enable bit is not set or is not
+		 * available (applies for pre-XP PCIe controllers) then Type 0
+		 * is used if target bus number equals Local Bus Number (bits
+		 * [15:8] in register 0x1a04) and target device number differs
+		 * from Local Device Number (bits [20:16] in register 0x1a04).
+		 * Type 1 is used if target bus number differs from Local Bus
+		 * Number. And when target bus number equals Local Bus Number
+		 * and target device equals Local Device Number then request is
+		 * routed to Local PCI Bridge (PCIe Root Port).
+		 *
+		 * 2. If Secondary Bus Number Enable bit is set (bit 7 in
+		 * register 0x1a2c) then mvebu hw determinate type of config
+		 * request like compliant PCI Bridge based on primary bus number
+		 * which is configured via Local Bus Number (bits [15:8] in
+		 * register 0x1a04) and secondary bus number which is configured
+		 * via Secondary Bus Number (bits [7:0] in register 0x1a2c).
+		 * Local PCI Bridge (PCIe Root Port) is available on primary bus
+		 * as device with Local Device Number (bits [20:16] in register
+		 * 0x1a04).
+		 *
+		 * Secondary Bus Number Enable bit is disabled by default and
+		 * option 2. is not available on pre-XP PCIe controllers. Hence
+		 * this driver always use option 1.
+		 *
+		 * Basically it means that primary and secondary buses shares
+		 * one virtual number configured via Local Bus Number bits and
+		 * Local Device Number bits determinates if accessing primary
+		 * or secondary bus. Set Local Device Number to 1 and redirect
+		 * all writes of PCI Bridge Secondary Bus Number register to
+		 * Local Bus Number (bits [15:8] in register 0x1a04).
+		 *
+		 * So when accessing devices on buses behind secondary bus
+		 * number it would work correctly. And also when accessing
+		 * device 0 at secondary bus number via config space would be
+		 * correctly routed to secondary bus. Due to issues described
+		 * in mvebu_pcie_setup_hw(), PCI Bridges at primary bus (zero)
+		 * are not accessed directly via PCI config space but rarher
+		 * indirectly via kernel emulated PCI bridge driver.
+		 */
+		mvebu_pcie_setup_hw(port);
+		mvebu_pcie_set_local_dev_nr(port, 1);
+		mvebu_pcie_set_local_bus_nr(port, 0);
+	}
+
+	bridge->sysdata = pcie;
+	bridge->ops = &mvebu_pcie_ops;
+	bridge->child_ops = &mvebu_pcie_child_ops;
+	bridge->align_resource = mvebu_pcie_align_resource;
+	bridge->map_irq = mvebu_pcie_map_irq;
+
+	return pci_host_probe(bridge);
+}
+
+static void mvebu_pcie_remove(struct platform_device *pdev)
+{
+	struct mvebu_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	u32 cmd, sspl;
+	int i;
+
+	/* Remove PCI bus with all devices. */
+	pci_lock_rescan_remove();
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	pci_unlock_rescan_remove();
+
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+		int irq = port->intx_irq;
+
+		if (!port->base)
+			continue;
+
+		/* Disable Root Bridge I/O space, memory space and bus mastering. */
+		cmd = mvebu_readl(port, PCIE_CMD_OFF);
+		cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+		mvebu_writel(port, cmd, PCIE_CMD_OFF);
+
+		/* Mask all interrupt sources. */
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
+
+		/* Clear all interrupt causes. */
+		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
+
+		if (irq > 0)
+			irq_set_chained_handler_and_data(irq, NULL, NULL);
+
+		/* Remove IRQ domains. */
+		if (port->intx_irq_domain)
+			irq_domain_remove(port->intx_irq_domain);
+
+		/* Free config space for emulated root bridge. */
+		pci_bridge_emul_cleanup(&port->bridge);
+
+		/* Disable sending Set_Slot_Power_Limit PCIe Message. */
+		sspl = mvebu_readl(port, PCIE_SSPL_OFF);
+		sspl &= ~(PCIE_SSPL_VALUE_MASK | PCIE_SSPL_SCALE_MASK | PCIE_SSPL_ENABLE);
+		mvebu_writel(port, sspl, PCIE_SSPL_OFF);
+
+		/* Disable and clear BARs and windows. */
+		mvebu_pcie_disable_wins(port);
+
+		/* Delete PCIe IO and MEM windows. */
+		if (port->iowin.size)
+			mvebu_pcie_del_windows(port, port->iowin.base, port->iowin.size);
+		if (port->memwin.size)
+			mvebu_pcie_del_windows(port, port->memwin.base, port->memwin.size);
+
+		/* Power down card and disable clocks. Must be the last step. */
+		mvebu_pcie_powerdown(port);
+	}
+}
+
+static const struct of_device_id mvebu_pcie_of_match_table[] = {
+	{ .compatible = "marvell,armada-xp-pcie", },
+	{ .compatible = "marvell,armada-370-pcie", },
+	{ .compatible = "marvell,dove-pcie", },
+	{ .compatible = "marvell,kirkwood-pcie", },
+	{},
+};
+
+static const struct dev_pm_ops mvebu_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(mvebu_pcie_suspend, mvebu_pcie_resume)
+};
+
+static struct platform_driver mvebu_pcie_driver = {
+	.driver = {
+		.name = "mvebu-pcie",
+		.of_match_table = mvebu_pcie_of_match_table,
+		.pm = &mvebu_pcie_pm_ops,
+	},
+	.probe = mvebu_pcie_probe,
+	.remove_new = mvebu_pcie_remove,
+};
+module_platform_driver(mvebu_pcie_driver);
+
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@bootlin.com>");
+MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
+MODULE_DESCRIPTION("Marvell EBU PCIe controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-rcar-gen2.c b/drivers/pci/controller/pci-rcar-gen2.c
new file mode 100644
index 000000000..d29866485
--- /dev/null
+++ b/drivers/pci/controller/pci-rcar-gen2.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  pci-rcar-gen2: internal PCI bus support
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * Author: Valentine Barshak <valentine.barshak@cogentembedded.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "../pci.h"
+
+/* AHB-PCI Bridge PCI communication registers */
+#define RCAR_AHBPCI_PCICOM_OFFSET	0x800
+
+#define RCAR_PCIAHB_WIN1_CTR_REG	(RCAR_AHBPCI_PCICOM_OFFSET + 0x00)
+#define RCAR_PCIAHB_WIN2_CTR_REG	(RCAR_AHBPCI_PCICOM_OFFSET + 0x04)
+#define RCAR_PCIAHB_PREFETCH0		0x0
+#define RCAR_PCIAHB_PREFETCH4		0x1
+#define RCAR_PCIAHB_PREFETCH8		0x2
+#define RCAR_PCIAHB_PREFETCH16		0x3
+
+#define RCAR_AHBPCI_WIN1_CTR_REG	(RCAR_AHBPCI_PCICOM_OFFSET + 0x10)
+#define RCAR_AHBPCI_WIN2_CTR_REG	(RCAR_AHBPCI_PCICOM_OFFSET + 0x14)
+#define RCAR_AHBPCI_WIN_CTR_MEM		(3 << 1)
+#define RCAR_AHBPCI_WIN_CTR_CFG		(5 << 1)
+#define RCAR_AHBPCI_WIN1_HOST		(1 << 30)
+#define RCAR_AHBPCI_WIN1_DEVICE		(1 << 31)
+
+#define RCAR_PCI_INT_ENABLE_REG		(RCAR_AHBPCI_PCICOM_OFFSET + 0x20)
+#define RCAR_PCI_INT_STATUS_REG		(RCAR_AHBPCI_PCICOM_OFFSET + 0x24)
+#define RCAR_PCI_INT_SIGTABORT		(1 << 0)
+#define RCAR_PCI_INT_SIGRETABORT	(1 << 1)
+#define RCAR_PCI_INT_REMABORT		(1 << 2)
+#define RCAR_PCI_INT_PERR		(1 << 3)
+#define RCAR_PCI_INT_SIGSERR		(1 << 4)
+#define RCAR_PCI_INT_RESERR		(1 << 5)
+#define RCAR_PCI_INT_WIN1ERR		(1 << 12)
+#define RCAR_PCI_INT_WIN2ERR		(1 << 13)
+#define RCAR_PCI_INT_A			(1 << 16)
+#define RCAR_PCI_INT_B			(1 << 17)
+#define RCAR_PCI_INT_PME		(1 << 19)
+#define RCAR_PCI_INT_ALLERRORS (RCAR_PCI_INT_SIGTABORT		| \
+				RCAR_PCI_INT_SIGRETABORT	| \
+				RCAR_PCI_INT_REMABORT		| \
+				RCAR_PCI_INT_PERR		| \
+				RCAR_PCI_INT_SIGSERR		| \
+				RCAR_PCI_INT_RESERR		| \
+				RCAR_PCI_INT_WIN1ERR		| \
+				RCAR_PCI_INT_WIN2ERR)
+
+#define RCAR_AHB_BUS_CTR_REG		(RCAR_AHBPCI_PCICOM_OFFSET + 0x30)
+#define RCAR_AHB_BUS_MMODE_HTRANS	(1 << 0)
+#define RCAR_AHB_BUS_MMODE_BYTE_BURST	(1 << 1)
+#define RCAR_AHB_BUS_MMODE_WR_INCR	(1 << 2)
+#define RCAR_AHB_BUS_MMODE_HBUS_REQ	(1 << 7)
+#define RCAR_AHB_BUS_SMODE_READYCTR	(1 << 17)
+#define RCAR_AHB_BUS_MODE		(RCAR_AHB_BUS_MMODE_HTRANS |	\
+					RCAR_AHB_BUS_MMODE_BYTE_BURST |	\
+					RCAR_AHB_BUS_MMODE_WR_INCR |	\
+					RCAR_AHB_BUS_MMODE_HBUS_REQ |	\
+					RCAR_AHB_BUS_SMODE_READYCTR)
+
+#define RCAR_USBCTR_REG			(RCAR_AHBPCI_PCICOM_OFFSET + 0x34)
+#define RCAR_USBCTR_USBH_RST		(1 << 0)
+#define RCAR_USBCTR_PCICLK_MASK		(1 << 1)
+#define RCAR_USBCTR_PLL_RST		(1 << 2)
+#define RCAR_USBCTR_DIRPD		(1 << 8)
+#define RCAR_USBCTR_PCIAHB_WIN2_EN	(1 << 9)
+#define RCAR_USBCTR_PCIAHB_WIN1_256M	(0 << 10)
+#define RCAR_USBCTR_PCIAHB_WIN1_512M	(1 << 10)
+#define RCAR_USBCTR_PCIAHB_WIN1_1G	(2 << 10)
+#define RCAR_USBCTR_PCIAHB_WIN1_2G	(3 << 10)
+#define RCAR_USBCTR_PCIAHB_WIN1_MASK	(3 << 10)
+
+#define RCAR_PCI_ARBITER_CTR_REG	(RCAR_AHBPCI_PCICOM_OFFSET + 0x40)
+#define RCAR_PCI_ARBITER_PCIREQ0	(1 << 0)
+#define RCAR_PCI_ARBITER_PCIREQ1	(1 << 1)
+#define RCAR_PCI_ARBITER_PCIBP_MODE	(1 << 12)
+
+#define RCAR_PCI_UNIT_REV_REG		(RCAR_AHBPCI_PCICOM_OFFSET + 0x48)
+
+struct rcar_pci {
+	struct device *dev;
+	void __iomem *reg;
+	struct resource mem_res;
+	struct resource *cfg_res;
+	int irq;
+};
+
+/* PCI configuration space operations */
+static void __iomem *rcar_pci_cfg_base(struct pci_bus *bus, unsigned int devfn,
+				       int where)
+{
+	struct rcar_pci *priv = bus->sysdata;
+	int slot, val;
+
+	if (!pci_is_root_bus(bus) || PCI_FUNC(devfn))
+		return NULL;
+
+	/* Only one EHCI/OHCI device built-in */
+	slot = PCI_SLOT(devfn);
+	if (slot > 2)
+		return NULL;
+
+	/* bridge logic only has registers to 0x40 */
+	if (slot == 0x0 && where >= 0x40)
+		return NULL;
+
+	val = slot ? RCAR_AHBPCI_WIN1_DEVICE | RCAR_AHBPCI_WIN_CTR_CFG :
+		     RCAR_AHBPCI_WIN1_HOST | RCAR_AHBPCI_WIN_CTR_CFG;
+
+	iowrite32(val, priv->reg + RCAR_AHBPCI_WIN1_CTR_REG);
+	return priv->reg + (slot >> 1) * 0x100 + where;
+}
+
+#ifdef CONFIG_PCI_DEBUG
+/* if debug enabled, then attach an error handler irq to the bridge */
+
+static irqreturn_t rcar_pci_err_irq(int irq, void *pw)
+{
+	struct rcar_pci *priv = pw;
+	struct device *dev = priv->dev;
+	u32 status = ioread32(priv->reg + RCAR_PCI_INT_STATUS_REG);
+
+	if (status & RCAR_PCI_INT_ALLERRORS) {
+		dev_err(dev, "error irq: status %08x\n", status);
+
+		/* clear the error(s) */
+		iowrite32(status & RCAR_PCI_INT_ALLERRORS,
+			  priv->reg + RCAR_PCI_INT_STATUS_REG);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void rcar_pci_setup_errirq(struct rcar_pci *priv)
+{
+	struct device *dev = priv->dev;
+	int ret;
+	u32 val;
+
+	ret = devm_request_irq(dev, priv->irq, rcar_pci_err_irq,
+			       IRQF_SHARED, "error irq", priv);
+	if (ret) {
+		dev_err(dev, "cannot claim IRQ for error handling\n");
+		return;
+	}
+
+	val = ioread32(priv->reg + RCAR_PCI_INT_ENABLE_REG);
+	val |= RCAR_PCI_INT_ALLERRORS;
+	iowrite32(val, priv->reg + RCAR_PCI_INT_ENABLE_REG);
+}
+#else
+static inline void rcar_pci_setup_errirq(struct rcar_pci *priv) { }
+#endif
+
+/* PCI host controller setup */
+static void rcar_pci_setup(struct rcar_pci *priv)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(priv);
+	struct device *dev = priv->dev;
+	void __iomem *reg = priv->reg;
+	struct resource_entry *entry;
+	unsigned long window_size;
+	unsigned long window_addr;
+	unsigned long window_pci;
+	u32 val;
+
+	entry = resource_list_first_type(&bridge->dma_ranges, IORESOURCE_MEM);
+	if (!entry) {
+		window_addr = 0x40000000;
+		window_pci = 0x40000000;
+		window_size = SZ_1G;
+	} else {
+		window_addr = entry->res->start;
+		window_pci = entry->res->start - entry->offset;
+		window_size = resource_size(entry->res);
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	val = ioread32(reg + RCAR_PCI_UNIT_REV_REG);
+	dev_info(dev, "PCI: revision %x\n", val);
+
+	/* Disable Direct Power Down State and assert reset */
+	val = ioread32(reg + RCAR_USBCTR_REG) & ~RCAR_USBCTR_DIRPD;
+	val |= RCAR_USBCTR_USBH_RST | RCAR_USBCTR_PLL_RST;
+	iowrite32(val, reg + RCAR_USBCTR_REG);
+	udelay(4);
+
+	/* De-assert reset and reset PCIAHB window1 size */
+	val &= ~(RCAR_USBCTR_PCIAHB_WIN1_MASK | RCAR_USBCTR_PCICLK_MASK |
+		 RCAR_USBCTR_USBH_RST | RCAR_USBCTR_PLL_RST);
+
+	/* Setup PCIAHB window1 size */
+	switch (window_size) {
+	case SZ_2G:
+		val |= RCAR_USBCTR_PCIAHB_WIN1_2G;
+		break;
+	case SZ_1G:
+		val |= RCAR_USBCTR_PCIAHB_WIN1_1G;
+		break;
+	case SZ_512M:
+		val |= RCAR_USBCTR_PCIAHB_WIN1_512M;
+		break;
+	default:
+		pr_warn("unknown window size %ld - defaulting to 256M\n",
+			window_size);
+		window_size = SZ_256M;
+		fallthrough;
+	case SZ_256M:
+		val |= RCAR_USBCTR_PCIAHB_WIN1_256M;
+		break;
+	}
+	iowrite32(val, reg + RCAR_USBCTR_REG);
+
+	/* Configure AHB master and slave modes */
+	iowrite32(RCAR_AHB_BUS_MODE, reg + RCAR_AHB_BUS_CTR_REG);
+
+	/* Configure PCI arbiter */
+	val = ioread32(reg + RCAR_PCI_ARBITER_CTR_REG);
+	val |= RCAR_PCI_ARBITER_PCIREQ0 | RCAR_PCI_ARBITER_PCIREQ1 |
+	       RCAR_PCI_ARBITER_PCIBP_MODE;
+	iowrite32(val, reg + RCAR_PCI_ARBITER_CTR_REG);
+
+	/* PCI-AHB mapping */
+	iowrite32(window_addr | RCAR_PCIAHB_PREFETCH16,
+		  reg + RCAR_PCIAHB_WIN1_CTR_REG);
+
+	/* AHB-PCI mapping: OHCI/EHCI registers */
+	val = priv->mem_res.start | RCAR_AHBPCI_WIN_CTR_MEM;
+	iowrite32(val, reg + RCAR_AHBPCI_WIN2_CTR_REG);
+
+	/* Enable AHB-PCI bridge PCI configuration access */
+	iowrite32(RCAR_AHBPCI_WIN1_HOST | RCAR_AHBPCI_WIN_CTR_CFG,
+		  reg + RCAR_AHBPCI_WIN1_CTR_REG);
+	/* Set PCI-AHB Window1 address */
+	iowrite32(window_pci | PCI_BASE_ADDRESS_MEM_PREFETCH,
+		  reg + PCI_BASE_ADDRESS_1);
+	/* Set AHB-PCI bridge PCI communication area address */
+	val = priv->cfg_res->start + RCAR_AHBPCI_PCICOM_OFFSET;
+	iowrite32(val, reg + PCI_BASE_ADDRESS_0);
+
+	val = ioread32(reg + PCI_COMMAND);
+	val |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+	       PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	iowrite32(val, reg + PCI_COMMAND);
+
+	/* Enable PCI interrupts */
+	iowrite32(RCAR_PCI_INT_A | RCAR_PCI_INT_B | RCAR_PCI_INT_PME,
+		  reg + RCAR_PCI_INT_ENABLE_REG);
+
+	rcar_pci_setup_errirq(priv);
+}
+
+static struct pci_ops rcar_pci_ops = {
+	.map_bus = rcar_pci_cfg_base,
+	.read	= pci_generic_config_read,
+	.write	= pci_generic_config_write,
+};
+
+static int rcar_pci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *cfg_res, *mem_res;
+	struct rcar_pci *priv;
+	struct pci_host_bridge *bridge;
+	void __iomem *reg;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*priv));
+	if (!bridge)
+		return -ENOMEM;
+
+	priv = pci_host_bridge_priv(bridge);
+	bridge->sysdata = priv;
+
+	reg = devm_platform_get_and_ioremap_resource(pdev, 0, &cfg_res);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!mem_res || !mem_res->start)
+		return -ENODEV;
+
+	if (mem_res->start & 0xFFFF)
+		return -EINVAL;
+
+	priv->mem_res = *mem_res;
+	priv->cfg_res = cfg_res;
+
+	priv->irq = platform_get_irq(pdev, 0);
+	priv->reg = reg;
+	priv->dev = dev;
+
+	if (priv->irq < 0) {
+		dev_err(dev, "no valid irq found\n");
+		return priv->irq;
+	}
+
+	bridge->ops = &rcar_pci_ops;
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	rcar_pci_setup(priv);
+
+	return pci_host_probe(bridge);
+}
+
+static const struct of_device_id rcar_pci_of_match[] = {
+	{ .compatible = "renesas,pci-r8a7790", },
+	{ .compatible = "renesas,pci-r8a7791", },
+	{ .compatible = "renesas,pci-r8a7794", },
+	{ .compatible = "renesas,pci-rcar-gen2", },
+	{ .compatible = "renesas,pci-rzn1", },
+	{ },
+};
+
+static struct platform_driver rcar_pci_driver = {
+	.driver = {
+		.name = "pci-rcar-gen2",
+		.suppress_bind_attrs = true,
+		.of_match_table = rcar_pci_of_match,
+	},
+	.probe = rcar_pci_probe,
+};
+builtin_platform_driver(rcar_pci_driver);
diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
new file mode 100644
index 000000000..038d974a3
--- /dev/null
+++ b/drivers/pci/controller/pci-tegra.c
@@ -0,0 +1,2811 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Tegra SoCs
+ *
+ * Copyright (c) 2010, CompuLab, Ltd.
+ * Author: Mike Rapoport <mike@compulab.co.il>
+ *
+ * Based on NVIDIA PCIe driver
+ * Copyright (c) 2008-2009, NVIDIA Corporation.
+ *
+ * Bits taken from arch/arm/mach-dove/pcie.c
+ *
+ * Author: Thierry Reding <treding@nvidia.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/regulator/consumer.h>
+
+#include <soc/tegra/cpuidle.h>
+#include <soc/tegra/pmc.h>
+
+#include "../pci.h"
+
+#define INT_PCI_MSI_NR (8 * 32)
+
+/* register definitions */
+
+#define AFI_AXI_BAR0_SZ	0x00
+#define AFI_AXI_BAR1_SZ	0x04
+#define AFI_AXI_BAR2_SZ	0x08
+#define AFI_AXI_BAR3_SZ	0x0c
+#define AFI_AXI_BAR4_SZ	0x10
+#define AFI_AXI_BAR5_SZ	0x14
+
+#define AFI_AXI_BAR0_START	0x18
+#define AFI_AXI_BAR1_START	0x1c
+#define AFI_AXI_BAR2_START	0x20
+#define AFI_AXI_BAR3_START	0x24
+#define AFI_AXI_BAR4_START	0x28
+#define AFI_AXI_BAR5_START	0x2c
+
+#define AFI_FPCI_BAR0	0x30
+#define AFI_FPCI_BAR1	0x34
+#define AFI_FPCI_BAR2	0x38
+#define AFI_FPCI_BAR3	0x3c
+#define AFI_FPCI_BAR4	0x40
+#define AFI_FPCI_BAR5	0x44
+
+#define AFI_CACHE_BAR0_SZ	0x48
+#define AFI_CACHE_BAR0_ST	0x4c
+#define AFI_CACHE_BAR1_SZ	0x50
+#define AFI_CACHE_BAR1_ST	0x54
+
+#define AFI_MSI_BAR_SZ		0x60
+#define AFI_MSI_FPCI_BAR_ST	0x64
+#define AFI_MSI_AXI_BAR_ST	0x68
+
+#define AFI_MSI_VEC(x)		(0x6c + ((x) * 4))
+#define AFI_MSI_EN_VEC(x)	(0x8c + ((x) * 4))
+
+#define AFI_CONFIGURATION		0xac
+#define  AFI_CONFIGURATION_EN_FPCI		(1 << 0)
+#define  AFI_CONFIGURATION_CLKEN_OVERRIDE	(1 << 31)
+
+#define AFI_FPCI_ERROR_MASKS	0xb0
+
+#define AFI_INTR_MASK		0xb4
+#define  AFI_INTR_MASK_INT_MASK	(1 << 0)
+#define  AFI_INTR_MASK_MSI_MASK	(1 << 8)
+
+#define AFI_INTR_CODE			0xb8
+#define  AFI_INTR_CODE_MASK		0xf
+#define  AFI_INTR_INI_SLAVE_ERROR	1
+#define  AFI_INTR_INI_DECODE_ERROR	2
+#define  AFI_INTR_TARGET_ABORT		3
+#define  AFI_INTR_MASTER_ABORT		4
+#define  AFI_INTR_INVALID_WRITE		5
+#define  AFI_INTR_LEGACY		6
+#define  AFI_INTR_FPCI_DECODE_ERROR	7
+#define  AFI_INTR_AXI_DECODE_ERROR	8
+#define  AFI_INTR_FPCI_TIMEOUT		9
+#define  AFI_INTR_PE_PRSNT_SENSE	10
+#define  AFI_INTR_PE_CLKREQ_SENSE	11
+#define  AFI_INTR_CLKCLAMP_SENSE	12
+#define  AFI_INTR_RDY4PD_SENSE		13
+#define  AFI_INTR_P2P_ERROR		14
+
+#define AFI_INTR_SIGNATURE	0xbc
+#define AFI_UPPER_FPCI_ADDRESS	0xc0
+#define AFI_SM_INTR_ENABLE	0xc4
+#define  AFI_SM_INTR_INTA_ASSERT	(1 << 0)
+#define  AFI_SM_INTR_INTB_ASSERT	(1 << 1)
+#define  AFI_SM_INTR_INTC_ASSERT	(1 << 2)
+#define  AFI_SM_INTR_INTD_ASSERT	(1 << 3)
+#define  AFI_SM_INTR_INTA_DEASSERT	(1 << 4)
+#define  AFI_SM_INTR_INTB_DEASSERT	(1 << 5)
+#define  AFI_SM_INTR_INTC_DEASSERT	(1 << 6)
+#define  AFI_SM_INTR_INTD_DEASSERT	(1 << 7)
+
+#define AFI_AFI_INTR_ENABLE		0xc8
+#define  AFI_INTR_EN_INI_SLVERR		(1 << 0)
+#define  AFI_INTR_EN_INI_DECERR		(1 << 1)
+#define  AFI_INTR_EN_TGT_SLVERR		(1 << 2)
+#define  AFI_INTR_EN_TGT_DECERR		(1 << 3)
+#define  AFI_INTR_EN_TGT_WRERR		(1 << 4)
+#define  AFI_INTR_EN_DFPCI_DECERR	(1 << 5)
+#define  AFI_INTR_EN_AXI_DECERR		(1 << 6)
+#define  AFI_INTR_EN_FPCI_TIMEOUT	(1 << 7)
+#define  AFI_INTR_EN_PRSNT_SENSE	(1 << 8)
+
+#define AFI_PCIE_PME		0xf0
+
+#define AFI_PCIE_CONFIG					0x0f8
+#define  AFI_PCIE_CONFIG_PCIE_DISABLE(x)		(1 << ((x) + 1))
+#define  AFI_PCIE_CONFIG_PCIE_DISABLE_ALL		0xe
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK	(0xf << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_SINGLE	(0x0 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_420	(0x0 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X2_X1	(0x0 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_401	(0x0 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_DUAL	(0x1 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_222	(0x1 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X4_X1	(0x1 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_211	(0x1 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_411	(0x2 << 20)
+#define  AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_111	(0x2 << 20)
+#define  AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(x)		(1 << ((x) + 29))
+#define  AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO_ALL		(0x7 << 29)
+
+#define AFI_FUSE			0x104
+#define  AFI_FUSE_PCIE_T0_GEN2_DIS	(1 << 2)
+
+#define AFI_PEX0_CTRL			0x110
+#define AFI_PEX1_CTRL			0x118
+#define  AFI_PEX_CTRL_RST		(1 << 0)
+#define  AFI_PEX_CTRL_CLKREQ_EN		(1 << 1)
+#define  AFI_PEX_CTRL_REFCLK_EN		(1 << 3)
+#define  AFI_PEX_CTRL_OVERRIDE_EN	(1 << 4)
+
+#define AFI_PLLE_CONTROL		0x160
+#define  AFI_PLLE_CONTROL_BYPASS_PADS2PLLE_CONTROL (1 << 9)
+#define  AFI_PLLE_CONTROL_PADS2PLLE_CONTROL_EN (1 << 1)
+
+#define AFI_PEXBIAS_CTRL_0		0x168
+
+#define RP_ECTL_2_R1	0x00000e84
+#define  RP_ECTL_2_R1_RX_CTLE_1C_MASK		0xffff
+
+#define RP_ECTL_4_R1	0x00000e8c
+#define  RP_ECTL_4_R1_RX_CDR_CTRL_1C_MASK	(0xffff << 16)
+#define  RP_ECTL_4_R1_RX_CDR_CTRL_1C_SHIFT	16
+
+#define RP_ECTL_5_R1	0x00000e90
+#define  RP_ECTL_5_R1_RX_EQ_CTRL_L_1C_MASK	0xffffffff
+
+#define RP_ECTL_6_R1	0x00000e94
+#define  RP_ECTL_6_R1_RX_EQ_CTRL_H_1C_MASK	0xffffffff
+
+#define RP_ECTL_2_R2	0x00000ea4
+#define  RP_ECTL_2_R2_RX_CTLE_1C_MASK	0xffff
+
+#define RP_ECTL_4_R2	0x00000eac
+#define  RP_ECTL_4_R2_RX_CDR_CTRL_1C_MASK	(0xffff << 16)
+#define  RP_ECTL_4_R2_RX_CDR_CTRL_1C_SHIFT	16
+
+#define RP_ECTL_5_R2	0x00000eb0
+#define  RP_ECTL_5_R2_RX_EQ_CTRL_L_1C_MASK	0xffffffff
+
+#define RP_ECTL_6_R2	0x00000eb4
+#define  RP_ECTL_6_R2_RX_EQ_CTRL_H_1C_MASK	0xffffffff
+
+#define RP_VEND_XP	0x00000f00
+#define  RP_VEND_XP_DL_UP			(1 << 30)
+#define  RP_VEND_XP_OPPORTUNISTIC_ACK		(1 << 27)
+#define  RP_VEND_XP_OPPORTUNISTIC_UPDATEFC	(1 << 28)
+#define  RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK	(0xff << 18)
+
+#define RP_VEND_CTL0	0x00000f44
+#define  RP_VEND_CTL0_DSK_RST_PULSE_WIDTH_MASK	(0xf << 12)
+#define  RP_VEND_CTL0_DSK_RST_PULSE_WIDTH	(0x9 << 12)
+
+#define RP_VEND_CTL1	0x00000f48
+#define  RP_VEND_CTL1_ERPT	(1 << 13)
+
+#define RP_VEND_XP_BIST	0x00000f4c
+#define  RP_VEND_XP_BIST_GOTO_L1_L2_AFTER_DLLP_DONE	(1 << 28)
+
+#define RP_VEND_CTL2 0x00000fa8
+#define  RP_VEND_CTL2_PCA_ENABLE (1 << 7)
+
+#define RP_PRIV_MISC	0x00000fe0
+#define  RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT		(0xe << 0)
+#define  RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT		(0xf << 0)
+#define  RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD_MASK	(0x7f << 16)
+#define  RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD		(0xf << 16)
+#define  RP_PRIV_MISC_CTLR_CLK_CLAMP_ENABLE		(1 << 23)
+#define  RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD_MASK	(0x7f << 24)
+#define  RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD		(0xf << 24)
+#define  RP_PRIV_MISC_TMS_CLK_CLAMP_ENABLE		(1 << 31)
+
+#define RP_LINK_CONTROL_STATUS			0x00000090
+#define  RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE	0x20000000
+#define  RP_LINK_CONTROL_STATUS_LINKSTAT_MASK	0x3fff0000
+
+#define RP_LINK_CONTROL_STATUS_2		0x000000b0
+
+#define PADS_CTL_SEL		0x0000009c
+
+#define PADS_CTL		0x000000a0
+#define  PADS_CTL_IDDQ_1L	(1 << 0)
+#define  PADS_CTL_TX_DATA_EN_1L	(1 << 6)
+#define  PADS_CTL_RX_DATA_EN_1L	(1 << 10)
+
+#define PADS_PLL_CTL_TEGRA20			0x000000b8
+#define PADS_PLL_CTL_TEGRA30			0x000000b4
+#define  PADS_PLL_CTL_RST_B4SM			(1 << 1)
+#define  PADS_PLL_CTL_LOCKDET			(1 << 8)
+#define  PADS_PLL_CTL_REFCLK_MASK		(0x3 << 16)
+#define  PADS_PLL_CTL_REFCLK_INTERNAL_CML	(0 << 16)
+#define  PADS_PLL_CTL_REFCLK_INTERNAL_CMOS	(1 << 16)
+#define  PADS_PLL_CTL_REFCLK_EXTERNAL		(2 << 16)
+#define  PADS_PLL_CTL_TXCLKREF_MASK		(0x1 << 20)
+#define  PADS_PLL_CTL_TXCLKREF_DIV10		(0 << 20)
+#define  PADS_PLL_CTL_TXCLKREF_DIV5		(1 << 20)
+#define  PADS_PLL_CTL_TXCLKREF_BUF_EN		(1 << 22)
+
+#define PADS_REFCLK_CFG0			0x000000c8
+#define PADS_REFCLK_CFG1			0x000000cc
+#define PADS_REFCLK_BIAS			0x000000d0
+
+/*
+ * Fields in PADS_REFCLK_CFG*. Those registers form an array of 16-bit
+ * entries, one entry per PCIe port. These field definitions and desired
+ * values aren't in the TRM, but do come from NVIDIA.
+ */
+#define PADS_REFCLK_CFG_TERM_SHIFT		2  /* 6:2 */
+#define PADS_REFCLK_CFG_E_TERM_SHIFT		7
+#define PADS_REFCLK_CFG_PREDI_SHIFT		8  /* 11:8 */
+#define PADS_REFCLK_CFG_DRVI_SHIFT		12 /* 15:12 */
+
+#define PME_ACK_TIMEOUT 10000
+#define LINK_RETRAIN_TIMEOUT 100000 /* in usec */
+
+struct tegra_msi {
+	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
+	struct irq_domain *domain;
+	struct mutex map_lock;
+	spinlock_t mask_lock;
+	void *virt;
+	dma_addr_t phys;
+	int irq;
+};
+
+/* used to differentiate between Tegra SoC generations */
+struct tegra_pcie_port_soc {
+	struct {
+		u8 turnoff_bit;
+		u8 ack_bit;
+	} pme;
+};
+
+struct tegra_pcie_soc {
+	unsigned int num_ports;
+	const struct tegra_pcie_port_soc *ports;
+	unsigned int msi_base_shift;
+	unsigned long afi_pex2_ctrl;
+	u32 pads_pll_ctl;
+	u32 tx_ref_sel;
+	u32 pads_refclk_cfg0;
+	u32 pads_refclk_cfg1;
+	u32 update_fc_threshold;
+	bool has_pex_clkreq_en;
+	bool has_pex_bias_ctrl;
+	bool has_intr_prsnt_sense;
+	bool has_cml_clk;
+	bool has_gen2;
+	bool force_pca_enable;
+	bool program_uphy;
+	bool update_clamp_threshold;
+	bool program_deskew_time;
+	bool update_fc_timer;
+	bool has_cache_bars;
+	struct {
+		struct {
+			u32 rp_ectl_2_r1;
+			u32 rp_ectl_4_r1;
+			u32 rp_ectl_5_r1;
+			u32 rp_ectl_6_r1;
+			u32 rp_ectl_2_r2;
+			u32 rp_ectl_4_r2;
+			u32 rp_ectl_5_r2;
+			u32 rp_ectl_6_r2;
+		} regs;
+		bool enable;
+	} ectl;
+};
+
+struct tegra_pcie {
+	struct device *dev;
+
+	void __iomem *pads;
+	void __iomem *afi;
+	void __iomem *cfg;
+	int irq;
+
+	struct resource cs;
+
+	struct clk *pex_clk;
+	struct clk *afi_clk;
+	struct clk *pll_e;
+	struct clk *cml_clk;
+
+	struct reset_control *pex_rst;
+	struct reset_control *afi_rst;
+	struct reset_control *pcie_xrst;
+
+	bool legacy_phy;
+	struct phy *phy;
+
+	struct tegra_msi msi;
+
+	struct list_head ports;
+	u32 xbar_config;
+
+	struct regulator_bulk_data *supplies;
+	unsigned int num_supplies;
+
+	const struct tegra_pcie_soc *soc;
+	struct dentry *debugfs;
+};
+
+static inline struct tegra_pcie *msi_to_pcie(struct tegra_msi *msi)
+{
+	return container_of(msi, struct tegra_pcie, msi);
+}
+
+struct tegra_pcie_port {
+	struct tegra_pcie *pcie;
+	struct device_node *np;
+	struct list_head list;
+	struct resource regs;
+	void __iomem *base;
+	unsigned int index;
+	unsigned int lanes;
+
+	struct phy **phys;
+
+	struct gpio_desc *reset_gpio;
+};
+
+static inline void afi_writel(struct tegra_pcie *pcie, u32 value,
+			      unsigned long offset)
+{
+	writel(value, pcie->afi + offset);
+}
+
+static inline u32 afi_readl(struct tegra_pcie *pcie, unsigned long offset)
+{
+	return readl(pcie->afi + offset);
+}
+
+static inline void pads_writel(struct tegra_pcie *pcie, u32 value,
+			       unsigned long offset)
+{
+	writel(value, pcie->pads + offset);
+}
+
+static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
+{
+	return readl(pcie->pads + offset);
+}
+
+/*
+ * The configuration space mapping on Tegra is somewhat similar to the ECAM
+ * defined by PCIe. However it deviates a bit in how the 4 bits for extended
+ * register accesses are mapped:
+ *
+ *    [27:24] extended register number
+ *    [23:16] bus number
+ *    [15:11] device number
+ *    [10: 8] function number
+ *    [ 7: 0] register number
+ *
+ * Mapping the whole extended configuration space would require 256 MiB of
+ * virtual address space, only a small part of which will actually be used.
+ *
+ * To work around this, a 4 KiB region is used to generate the required
+ * configuration transaction with relevant B:D:F and register offset values.
+ * This is achieved by dynamically programming base address and size of
+ * AFI_AXI_BAR used for end point config space mapping to make sure that the
+ * address (access to which generates correct config transaction) falls in
+ * this 4 KiB region.
+ */
+static unsigned int tegra_pcie_conf_offset(u8 bus, unsigned int devfn,
+					   unsigned int where)
+{
+	return ((where & 0xf00) << 16) | (bus << 16) | (PCI_SLOT(devfn) << 11) |
+	       (PCI_FUNC(devfn) << 8) | (where & 0xff);
+}
+
+static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
+					unsigned int devfn,
+					int where)
+{
+	struct tegra_pcie *pcie = bus->sysdata;
+	void __iomem *addr = NULL;
+
+	if (bus->number == 0) {
+		unsigned int slot = PCI_SLOT(devfn);
+		struct tegra_pcie_port *port;
+
+		list_for_each_entry(port, &pcie->ports, list) {
+			if (port->index + 1 == slot) {
+				addr = port->base + (where & ~3);
+				break;
+			}
+		}
+	} else {
+		unsigned int offset;
+		u32 base;
+
+		offset = tegra_pcie_conf_offset(bus->number, devfn, where);
+
+		/* move 4 KiB window to offset within the FPCI region */
+		base = 0xfe100000 + ((offset & ~(SZ_4K - 1)) >> 8);
+		afi_writel(pcie, base, AFI_FPCI_BAR0);
+
+		/* move to correct offset within the 4 KiB page */
+		addr = pcie->cfg + (offset & (SZ_4K - 1));
+	}
+
+	return addr;
+}
+
+static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *value)
+{
+	if (bus->number == 0)
+		return pci_generic_config_read32(bus, devfn, where, size,
+						 value);
+
+	return pci_generic_config_read(bus, devfn, where, size, value);
+}
+
+static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 value)
+{
+	if (bus->number == 0)
+		return pci_generic_config_write32(bus, devfn, where, size,
+						  value);
+
+	return pci_generic_config_write(bus, devfn, where, size, value);
+}
+
+static struct pci_ops tegra_pcie_ops = {
+	.map_bus = tegra_pcie_map_bus,
+	.read = tegra_pcie_config_read,
+	.write = tegra_pcie_config_write,
+};
+
+static unsigned long tegra_pcie_port_get_pex_ctrl(struct tegra_pcie_port *port)
+{
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	unsigned long ret = 0;
+
+	switch (port->index) {
+	case 0:
+		ret = AFI_PEX0_CTRL;
+		break;
+
+	case 1:
+		ret = AFI_PEX1_CTRL;
+		break;
+
+	case 2:
+		ret = soc->afi_pex2_ctrl;
+		break;
+	}
+
+	return ret;
+}
+
+static void tegra_pcie_port_reset(struct tegra_pcie_port *port)
+{
+	unsigned long ctrl = tegra_pcie_port_get_pex_ctrl(port);
+	unsigned long value;
+
+	/* pulse reset signal */
+	if (port->reset_gpio) {
+		gpiod_set_value(port->reset_gpio, 1);
+	} else {
+		value = afi_readl(port->pcie, ctrl);
+		value &= ~AFI_PEX_CTRL_RST;
+		afi_writel(port->pcie, value, ctrl);
+	}
+
+	usleep_range(1000, 2000);
+
+	if (port->reset_gpio) {
+		gpiod_set_value(port->reset_gpio, 0);
+	} else {
+		value = afi_readl(port->pcie, ctrl);
+		value |= AFI_PEX_CTRL_RST;
+		afi_writel(port->pcie, value, ctrl);
+	}
+}
+
+static void tegra_pcie_enable_rp_features(struct tegra_pcie_port *port)
+{
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	u32 value;
+
+	/* Enable AER capability */
+	value = readl(port->base + RP_VEND_CTL1);
+	value |= RP_VEND_CTL1_ERPT;
+	writel(value, port->base + RP_VEND_CTL1);
+
+	/* Optimal settings to enhance bandwidth */
+	value = readl(port->base + RP_VEND_XP);
+	value |= RP_VEND_XP_OPPORTUNISTIC_ACK;
+	value |= RP_VEND_XP_OPPORTUNISTIC_UPDATEFC;
+	writel(value, port->base + RP_VEND_XP);
+
+	/*
+	 * LTSSM will wait for DLLP to finish before entering L1 or L2,
+	 * to avoid truncation of PM messages which results in receiver errors
+	 */
+	value = readl(port->base + RP_VEND_XP_BIST);
+	value |= RP_VEND_XP_BIST_GOTO_L1_L2_AFTER_DLLP_DONE;
+	writel(value, port->base + RP_VEND_XP_BIST);
+
+	value = readl(port->base + RP_PRIV_MISC);
+	value |= RP_PRIV_MISC_CTLR_CLK_CLAMP_ENABLE;
+	value |= RP_PRIV_MISC_TMS_CLK_CLAMP_ENABLE;
+
+	if (soc->update_clamp_threshold) {
+		value &= ~(RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD_MASK |
+				RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD_MASK);
+		value |= RP_PRIV_MISC_CTLR_CLK_CLAMP_THRESHOLD |
+			RP_PRIV_MISC_TMS_CLK_CLAMP_THRESHOLD;
+	}
+
+	writel(value, port->base + RP_PRIV_MISC);
+}
+
+static void tegra_pcie_program_ectl_settings(struct tegra_pcie_port *port)
+{
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	u32 value;
+
+	value = readl(port->base + RP_ECTL_2_R1);
+	value &= ~RP_ECTL_2_R1_RX_CTLE_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_2_r1;
+	writel(value, port->base + RP_ECTL_2_R1);
+
+	value = readl(port->base + RP_ECTL_4_R1);
+	value &= ~RP_ECTL_4_R1_RX_CDR_CTRL_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_4_r1 <<
+				RP_ECTL_4_R1_RX_CDR_CTRL_1C_SHIFT;
+	writel(value, port->base + RP_ECTL_4_R1);
+
+	value = readl(port->base + RP_ECTL_5_R1);
+	value &= ~RP_ECTL_5_R1_RX_EQ_CTRL_L_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_5_r1;
+	writel(value, port->base + RP_ECTL_5_R1);
+
+	value = readl(port->base + RP_ECTL_6_R1);
+	value &= ~RP_ECTL_6_R1_RX_EQ_CTRL_H_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_6_r1;
+	writel(value, port->base + RP_ECTL_6_R1);
+
+	value = readl(port->base + RP_ECTL_2_R2);
+	value &= ~RP_ECTL_2_R2_RX_CTLE_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_2_r2;
+	writel(value, port->base + RP_ECTL_2_R2);
+
+	value = readl(port->base + RP_ECTL_4_R2);
+	value &= ~RP_ECTL_4_R2_RX_CDR_CTRL_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_4_r2 <<
+				RP_ECTL_4_R2_RX_CDR_CTRL_1C_SHIFT;
+	writel(value, port->base + RP_ECTL_4_R2);
+
+	value = readl(port->base + RP_ECTL_5_R2);
+	value &= ~RP_ECTL_5_R2_RX_EQ_CTRL_L_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_5_r2;
+	writel(value, port->base + RP_ECTL_5_R2);
+
+	value = readl(port->base + RP_ECTL_6_R2);
+	value &= ~RP_ECTL_6_R2_RX_EQ_CTRL_H_1C_MASK;
+	value |= soc->ectl.regs.rp_ectl_6_r2;
+	writel(value, port->base + RP_ECTL_6_R2);
+}
+
+static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
+{
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	u32 value;
+
+	/*
+	 * Sometimes link speed change from Gen2 to Gen1 fails due to
+	 * instability in deskew logic on lane-0. Increase the deskew
+	 * retry time to resolve this issue.
+	 */
+	if (soc->program_deskew_time) {
+		value = readl(port->base + RP_VEND_CTL0);
+		value &= ~RP_VEND_CTL0_DSK_RST_PULSE_WIDTH_MASK;
+		value |= RP_VEND_CTL0_DSK_RST_PULSE_WIDTH;
+		writel(value, port->base + RP_VEND_CTL0);
+	}
+
+	if (soc->update_fc_timer) {
+		value = readl(port->base + RP_VEND_XP);
+		value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
+		value |= soc->update_fc_threshold;
+		writel(value, port->base + RP_VEND_XP);
+	}
+
+	/*
+	 * PCIe link doesn't come up with few legacy PCIe endpoints if
+	 * root port advertises both Gen-1 and Gen-2 speeds in Tegra.
+	 * Hence, the strategy followed here is to initially advertise
+	 * only Gen-1 and after link is up, retrain link to Gen-2 speed
+	 */
+	value = readl(port->base + RP_LINK_CONTROL_STATUS_2);
+	value &= ~PCI_EXP_LNKSTA_CLS;
+	value |= PCI_EXP_LNKSTA_CLS_2_5GB;
+	writel(value, port->base + RP_LINK_CONTROL_STATUS_2);
+}
+
+static void tegra_pcie_port_enable(struct tegra_pcie_port *port)
+{
+	unsigned long ctrl = tegra_pcie_port_get_pex_ctrl(port);
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	unsigned long value;
+
+	/* enable reference clock */
+	value = afi_readl(port->pcie, ctrl);
+	value |= AFI_PEX_CTRL_REFCLK_EN;
+
+	if (soc->has_pex_clkreq_en)
+		value |= AFI_PEX_CTRL_CLKREQ_EN;
+
+	value |= AFI_PEX_CTRL_OVERRIDE_EN;
+
+	afi_writel(port->pcie, value, ctrl);
+
+	tegra_pcie_port_reset(port);
+
+	if (soc->force_pca_enable) {
+		value = readl(port->base + RP_VEND_CTL2);
+		value |= RP_VEND_CTL2_PCA_ENABLE;
+		writel(value, port->base + RP_VEND_CTL2);
+	}
+
+	tegra_pcie_enable_rp_features(port);
+
+	if (soc->ectl.enable)
+		tegra_pcie_program_ectl_settings(port);
+
+	tegra_pcie_apply_sw_fixup(port);
+}
+
+static void tegra_pcie_port_disable(struct tegra_pcie_port *port)
+{
+	unsigned long ctrl = tegra_pcie_port_get_pex_ctrl(port);
+	const struct tegra_pcie_soc *soc = port->pcie->soc;
+	unsigned long value;
+
+	/* assert port reset */
+	value = afi_readl(port->pcie, ctrl);
+	value &= ~AFI_PEX_CTRL_RST;
+	afi_writel(port->pcie, value, ctrl);
+
+	/* disable reference clock */
+	value = afi_readl(port->pcie, ctrl);
+
+	if (soc->has_pex_clkreq_en)
+		value &= ~AFI_PEX_CTRL_CLKREQ_EN;
+
+	value &= ~AFI_PEX_CTRL_REFCLK_EN;
+	afi_writel(port->pcie, value, ctrl);
+
+	/* disable PCIe port and set CLKREQ# as GPIO to allow PLLE power down */
+	value = afi_readl(port->pcie, AFI_PCIE_CONFIG);
+	value |= AFI_PCIE_CONFIG_PCIE_DISABLE(port->index);
+	value |= AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(port->index);
+	afi_writel(port->pcie, value, AFI_PCIE_CONFIG);
+}
+
+static void tegra_pcie_port_free(struct tegra_pcie_port *port)
+{
+	struct tegra_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+
+	devm_iounmap(dev, port->base);
+	devm_release_mem_region(dev, port->regs.start,
+				resource_size(&port->regs));
+	list_del(&port->list);
+	devm_kfree(dev, port);
+}
+
+/* Tegra PCIE root complex wrongly reports device class */
+static void tegra_pcie_fixup_class(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class);
+
+/* Tegra20 and Tegra30 PCIE requires relaxed ordering */
+static void tegra_pcie_relax_enable(struct pci_dev *dev)
+{
+	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_relax_enable);
+
+static int tegra_pcie_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	struct tegra_pcie *pcie = pdev->bus->sysdata;
+	int irq;
+
+	tegra_cpuidle_pcie_irqs_in_use();
+
+	irq = of_irq_parse_and_map_pci(pdev, slot, pin);
+	if (!irq)
+		irq = pcie->irq;
+
+	return irq;
+}
+
+static irqreturn_t tegra_pcie_isr(int irq, void *arg)
+{
+	static const char * const err_msg[] = {
+		"Unknown",
+		"AXI slave error",
+		"AXI decode error",
+		"Target abort",
+		"Master abort",
+		"Invalid write",
+		"Legacy interrupt",
+		"Response decoding error",
+		"AXI response decoding error",
+		"Transaction timeout",
+		"Slot present pin change",
+		"Slot clock request change",
+		"TMS clock ramp change",
+		"TMS ready for power down",
+		"Peer2Peer error",
+	};
+	struct tegra_pcie *pcie = arg;
+	struct device *dev = pcie->dev;
+	u32 code, signature;
+
+	code = afi_readl(pcie, AFI_INTR_CODE) & AFI_INTR_CODE_MASK;
+	signature = afi_readl(pcie, AFI_INTR_SIGNATURE);
+	afi_writel(pcie, 0, AFI_INTR_CODE);
+
+	if (code == AFI_INTR_LEGACY)
+		return IRQ_NONE;
+
+	if (code >= ARRAY_SIZE(err_msg))
+		code = 0;
+
+	/*
+	 * do not pollute kernel log with master abort reports since they
+	 * happen a lot during enumeration
+	 */
+	if (code == AFI_INTR_MASTER_ABORT || code == AFI_INTR_PE_PRSNT_SENSE)
+		dev_dbg(dev, "%s, signature: %08x\n", err_msg[code], signature);
+	else
+		dev_err(dev, "%s, signature: %08x\n", err_msg[code], signature);
+
+	if (code == AFI_INTR_TARGET_ABORT || code == AFI_INTR_MASTER_ABORT ||
+	    code == AFI_INTR_FPCI_DECODE_ERROR) {
+		u32 fpci = afi_readl(pcie, AFI_UPPER_FPCI_ADDRESS) & 0xff;
+		u64 address = (u64)fpci << 32 | (signature & 0xfffffffc);
+
+		if (code == AFI_INTR_MASTER_ABORT)
+			dev_dbg(dev, "  FPCI address: %10llx\n", address);
+		else
+			dev_err(dev, "  FPCI address: %10llx\n", address);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * FPCI map is as follows:
+ * - 0xfdfc000000: I/O space
+ * - 0xfdfe000000: type 0 configuration space
+ * - 0xfdff000000: type 1 configuration space
+ * - 0xfe00000000: type 0 extended configuration space
+ * - 0xfe10000000: type 1 extended configuration space
+ */
+static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
+{
+	u32 size;
+	struct resource_entry *entry;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+
+	/* Bar 0: type 1 extended configuration space */
+	size = resource_size(&pcie->cs);
+	afi_writel(pcie, pcie->cs.start, AFI_AXI_BAR0_START);
+	afi_writel(pcie, size >> 12, AFI_AXI_BAR0_SZ);
+
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		u32 fpci_bar, axi_address;
+		struct resource *res = entry->res;
+
+		size = resource_size(res);
+
+		switch (resource_type(res)) {
+		case IORESOURCE_IO:
+			/* Bar 1: downstream IO bar */
+			fpci_bar = 0xfdfc0000;
+			axi_address = pci_pio_to_address(res->start);
+			afi_writel(pcie, axi_address, AFI_AXI_BAR1_START);
+			afi_writel(pcie, size >> 12, AFI_AXI_BAR1_SZ);
+			afi_writel(pcie, fpci_bar, AFI_FPCI_BAR1);
+			break;
+		case IORESOURCE_MEM:
+			fpci_bar = (((res->start >> 12) & 0x0fffffff) << 4) | 0x1;
+			axi_address = res->start;
+
+			if (res->flags & IORESOURCE_PREFETCH) {
+				/* Bar 2: prefetchable memory BAR */
+				afi_writel(pcie, axi_address, AFI_AXI_BAR2_START);
+				afi_writel(pcie, size >> 12, AFI_AXI_BAR2_SZ);
+				afi_writel(pcie, fpci_bar, AFI_FPCI_BAR2);
+
+			} else {
+				/* Bar 3: non prefetchable memory BAR */
+				afi_writel(pcie, axi_address, AFI_AXI_BAR3_START);
+				afi_writel(pcie, size >> 12, AFI_AXI_BAR3_SZ);
+				afi_writel(pcie, fpci_bar, AFI_FPCI_BAR3);
+			}
+			break;
+		}
+	}
+
+	/* NULL out the remaining BARs as they are not used */
+	afi_writel(pcie, 0, AFI_AXI_BAR4_START);
+	afi_writel(pcie, 0, AFI_AXI_BAR4_SZ);
+	afi_writel(pcie, 0, AFI_FPCI_BAR4);
+
+	afi_writel(pcie, 0, AFI_AXI_BAR5_START);
+	afi_writel(pcie, 0, AFI_AXI_BAR5_SZ);
+	afi_writel(pcie, 0, AFI_FPCI_BAR5);
+
+	if (pcie->soc->has_cache_bars) {
+		/* map all upstream transactions as uncached */
+		afi_writel(pcie, 0, AFI_CACHE_BAR0_ST);
+		afi_writel(pcie, 0, AFI_CACHE_BAR0_SZ);
+		afi_writel(pcie, 0, AFI_CACHE_BAR1_ST);
+		afi_writel(pcie, 0, AFI_CACHE_BAR1_SZ);
+	}
+
+	/* MSI translations are setup only when needed */
+	afi_writel(pcie, 0, AFI_MSI_FPCI_BAR_ST);
+	afi_writel(pcie, 0, AFI_MSI_BAR_SZ);
+	afi_writel(pcie, 0, AFI_MSI_AXI_BAR_ST);
+	afi_writel(pcie, 0, AFI_MSI_BAR_SZ);
+}
+
+static int tegra_pcie_pll_wait(struct tegra_pcie *pcie, unsigned long timeout)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	u32 value;
+
+	timeout = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, timeout)) {
+		value = pads_readl(pcie, soc->pads_pll_ctl);
+		if (value & PADS_PLL_CTL_LOCKDET)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int tegra_pcie_phy_enable(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	u32 value;
+	int err;
+
+	/* initialize internal PHY, enable up to 16 PCIE lanes */
+	pads_writel(pcie, 0x0, PADS_CTL_SEL);
+
+	/* override IDDQ to 1 on all 4 lanes */
+	value = pads_readl(pcie, PADS_CTL);
+	value |= PADS_CTL_IDDQ_1L;
+	pads_writel(pcie, value, PADS_CTL);
+
+	/*
+	 * Set up PHY PLL inputs select PLLE output as refclock,
+	 * set TX ref sel to div10 (not div5).
+	 */
+	value = pads_readl(pcie, soc->pads_pll_ctl);
+	value &= ~(PADS_PLL_CTL_REFCLK_MASK | PADS_PLL_CTL_TXCLKREF_MASK);
+	value |= PADS_PLL_CTL_REFCLK_INTERNAL_CML | soc->tx_ref_sel;
+	pads_writel(pcie, value, soc->pads_pll_ctl);
+
+	/* reset PLL */
+	value = pads_readl(pcie, soc->pads_pll_ctl);
+	value &= ~PADS_PLL_CTL_RST_B4SM;
+	pads_writel(pcie, value, soc->pads_pll_ctl);
+
+	usleep_range(20, 100);
+
+	/* take PLL out of reset  */
+	value = pads_readl(pcie, soc->pads_pll_ctl);
+	value |= PADS_PLL_CTL_RST_B4SM;
+	pads_writel(pcie, value, soc->pads_pll_ctl);
+
+	/* wait for the PLL to lock */
+	err = tegra_pcie_pll_wait(pcie, 500);
+	if (err < 0) {
+		dev_err(dev, "PLL failed to lock: %d\n", err);
+		return err;
+	}
+
+	/* turn off IDDQ override */
+	value = pads_readl(pcie, PADS_CTL);
+	value &= ~PADS_CTL_IDDQ_1L;
+	pads_writel(pcie, value, PADS_CTL);
+
+	/* enable TX/RX data */
+	value = pads_readl(pcie, PADS_CTL);
+	value |= PADS_CTL_TX_DATA_EN_1L | PADS_CTL_RX_DATA_EN_1L;
+	pads_writel(pcie, value, PADS_CTL);
+
+	return 0;
+}
+
+static int tegra_pcie_phy_disable(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	u32 value;
+
+	/* disable TX/RX data */
+	value = pads_readl(pcie, PADS_CTL);
+	value &= ~(PADS_CTL_TX_DATA_EN_1L | PADS_CTL_RX_DATA_EN_1L);
+	pads_writel(pcie, value, PADS_CTL);
+
+	/* override IDDQ */
+	value = pads_readl(pcie, PADS_CTL);
+	value |= PADS_CTL_IDDQ_1L;
+	pads_writel(pcie, value, PADS_CTL);
+
+	/* reset PLL */
+	value = pads_readl(pcie, soc->pads_pll_ctl);
+	value &= ~PADS_PLL_CTL_RST_B4SM;
+	pads_writel(pcie, value, soc->pads_pll_ctl);
+
+	usleep_range(20, 100);
+
+	return 0;
+}
+
+static int tegra_pcie_port_phy_power_on(struct tegra_pcie_port *port)
+{
+	struct device *dev = port->pcie->dev;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < port->lanes; i++) {
+		err = phy_power_on(port->phys[i]);
+		if (err < 0) {
+			dev_err(dev, "failed to power on PHY#%u: %d\n", i, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_port_phy_power_off(struct tegra_pcie_port *port)
+{
+	struct device *dev = port->pcie->dev;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < port->lanes; i++) {
+		err = phy_power_off(port->phys[i]);
+		if (err < 0) {
+			dev_err(dev, "failed to power off PHY#%u: %d\n", i,
+				err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct tegra_pcie_port *port;
+	int err;
+
+	if (pcie->legacy_phy) {
+		if (pcie->phy)
+			err = phy_power_on(pcie->phy);
+		else
+			err = tegra_pcie_phy_enable(pcie);
+
+		if (err < 0)
+			dev_err(dev, "failed to power on PHY: %d\n", err);
+
+		return err;
+	}
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		err = tegra_pcie_port_phy_power_on(port);
+		if (err < 0) {
+			dev_err(dev,
+				"failed to power on PCIe port %u PHY: %d\n",
+				port->index, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct tegra_pcie_port *port;
+	int err;
+
+	if (pcie->legacy_phy) {
+		if (pcie->phy)
+			err = phy_power_off(pcie->phy);
+		else
+			err = tegra_pcie_phy_disable(pcie);
+
+		if (err < 0)
+			dev_err(dev, "failed to power off PHY: %d\n", err);
+
+		return err;
+	}
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		err = tegra_pcie_port_phy_power_off(port);
+		if (err < 0) {
+			dev_err(dev,
+				"failed to power off PCIe port %u PHY: %d\n",
+				port->index, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void tegra_pcie_enable_controller(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	struct tegra_pcie_port *port;
+	unsigned long value;
+
+	/* enable PLL power down */
+	if (pcie->phy) {
+		value = afi_readl(pcie, AFI_PLLE_CONTROL);
+		value &= ~AFI_PLLE_CONTROL_BYPASS_PADS2PLLE_CONTROL;
+		value |= AFI_PLLE_CONTROL_PADS2PLLE_CONTROL_EN;
+		afi_writel(pcie, value, AFI_PLLE_CONTROL);
+	}
+
+	/* power down PCIe slot clock bias pad */
+	if (soc->has_pex_bias_ctrl)
+		afi_writel(pcie, 0, AFI_PEXBIAS_CTRL_0);
+
+	/* configure mode and disable all ports */
+	value = afi_readl(pcie, AFI_PCIE_CONFIG);
+	value &= ~AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK;
+	value |= AFI_PCIE_CONFIG_PCIE_DISABLE_ALL | pcie->xbar_config;
+	value |= AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO_ALL;
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		value &= ~AFI_PCIE_CONFIG_PCIE_DISABLE(port->index);
+		value &= ~AFI_PCIE_CONFIG_PCIE_CLKREQ_GPIO(port->index);
+	}
+
+	afi_writel(pcie, value, AFI_PCIE_CONFIG);
+
+	if (soc->has_gen2) {
+		value = afi_readl(pcie, AFI_FUSE);
+		value &= ~AFI_FUSE_PCIE_T0_GEN2_DIS;
+		afi_writel(pcie, value, AFI_FUSE);
+	} else {
+		value = afi_readl(pcie, AFI_FUSE);
+		value |= AFI_FUSE_PCIE_T0_GEN2_DIS;
+		afi_writel(pcie, value, AFI_FUSE);
+	}
+
+	/* Disable AFI dynamic clock gating and enable PCIe */
+	value = afi_readl(pcie, AFI_CONFIGURATION);
+	value |= AFI_CONFIGURATION_EN_FPCI;
+	value |= AFI_CONFIGURATION_CLKEN_OVERRIDE;
+	afi_writel(pcie, value, AFI_CONFIGURATION);
+
+	value = AFI_INTR_EN_INI_SLVERR | AFI_INTR_EN_INI_DECERR |
+		AFI_INTR_EN_TGT_SLVERR | AFI_INTR_EN_TGT_DECERR |
+		AFI_INTR_EN_TGT_WRERR | AFI_INTR_EN_DFPCI_DECERR;
+
+	if (soc->has_intr_prsnt_sense)
+		value |= AFI_INTR_EN_PRSNT_SENSE;
+
+	afi_writel(pcie, value, AFI_AFI_INTR_ENABLE);
+	afi_writel(pcie, 0xffffffff, AFI_SM_INTR_ENABLE);
+
+	/* don't enable MSI for now, only when needed */
+	afi_writel(pcie, AFI_INTR_MASK_INT_MASK, AFI_INTR_MASK);
+
+	/* disable all exceptions */
+	afi_writel(pcie, 0, AFI_FPCI_ERROR_MASKS);
+}
+
+static void tegra_pcie_power_off(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	int err;
+
+	reset_control_assert(pcie->afi_rst);
+
+	clk_disable_unprepare(pcie->pll_e);
+	if (soc->has_cml_clk)
+		clk_disable_unprepare(pcie->cml_clk);
+	clk_disable_unprepare(pcie->afi_clk);
+
+	if (!dev->pm_domain)
+		tegra_powergate_power_off(TEGRA_POWERGATE_PCIE);
+
+	err = regulator_bulk_disable(pcie->num_supplies, pcie->supplies);
+	if (err < 0)
+		dev_warn(dev, "failed to disable regulators: %d\n", err);
+}
+
+static int tegra_pcie_power_on(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	int err;
+
+	reset_control_assert(pcie->pcie_xrst);
+	reset_control_assert(pcie->afi_rst);
+	reset_control_assert(pcie->pex_rst);
+
+	if (!dev->pm_domain)
+		tegra_powergate_power_off(TEGRA_POWERGATE_PCIE);
+
+	/* enable regulators */
+	err = regulator_bulk_enable(pcie->num_supplies, pcie->supplies);
+	if (err < 0)
+		dev_err(dev, "failed to enable regulators: %d\n", err);
+
+	if (!dev->pm_domain) {
+		err = tegra_powergate_power_on(TEGRA_POWERGATE_PCIE);
+		if (err) {
+			dev_err(dev, "failed to power ungate: %d\n", err);
+			goto regulator_disable;
+		}
+		err = tegra_powergate_remove_clamping(TEGRA_POWERGATE_PCIE);
+		if (err) {
+			dev_err(dev, "failed to remove clamp: %d\n", err);
+			goto powergate;
+		}
+	}
+
+	err = clk_prepare_enable(pcie->afi_clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable AFI clock: %d\n", err);
+		goto powergate;
+	}
+
+	if (soc->has_cml_clk) {
+		err = clk_prepare_enable(pcie->cml_clk);
+		if (err < 0) {
+			dev_err(dev, "failed to enable CML clock: %d\n", err);
+			goto disable_afi_clk;
+		}
+	}
+
+	err = clk_prepare_enable(pcie->pll_e);
+	if (err < 0) {
+		dev_err(dev, "failed to enable PLLE clock: %d\n", err);
+		goto disable_cml_clk;
+	}
+
+	reset_control_deassert(pcie->afi_rst);
+
+	return 0;
+
+disable_cml_clk:
+	if (soc->has_cml_clk)
+		clk_disable_unprepare(pcie->cml_clk);
+disable_afi_clk:
+	clk_disable_unprepare(pcie->afi_clk);
+powergate:
+	if (!dev->pm_domain)
+		tegra_powergate_power_off(TEGRA_POWERGATE_PCIE);
+regulator_disable:
+	regulator_bulk_disable(pcie->num_supplies, pcie->supplies);
+
+	return err;
+}
+
+static void tegra_pcie_apply_pad_settings(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+
+	/* Configure the reference clock driver */
+	pads_writel(pcie, soc->pads_refclk_cfg0, PADS_REFCLK_CFG0);
+
+	if (soc->num_ports > 2)
+		pads_writel(pcie, soc->pads_refclk_cfg1, PADS_REFCLK_CFG1);
+}
+
+static int tegra_pcie_clocks_get(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+
+	pcie->pex_clk = devm_clk_get(dev, "pex");
+	if (IS_ERR(pcie->pex_clk))
+		return PTR_ERR(pcie->pex_clk);
+
+	pcie->afi_clk = devm_clk_get(dev, "afi");
+	if (IS_ERR(pcie->afi_clk))
+		return PTR_ERR(pcie->afi_clk);
+
+	pcie->pll_e = devm_clk_get(dev, "pll_e");
+	if (IS_ERR(pcie->pll_e))
+		return PTR_ERR(pcie->pll_e);
+
+	if (soc->has_cml_clk) {
+		pcie->cml_clk = devm_clk_get(dev, "cml");
+		if (IS_ERR(pcie->cml_clk))
+			return PTR_ERR(pcie->cml_clk);
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_resets_get(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+
+	pcie->pex_rst = devm_reset_control_get_exclusive(dev, "pex");
+	if (IS_ERR(pcie->pex_rst))
+		return PTR_ERR(pcie->pex_rst);
+
+	pcie->afi_rst = devm_reset_control_get_exclusive(dev, "afi");
+	if (IS_ERR(pcie->afi_rst))
+		return PTR_ERR(pcie->afi_rst);
+
+	pcie->pcie_xrst = devm_reset_control_get_exclusive(dev, "pcie_x");
+	if (IS_ERR(pcie->pcie_xrst))
+		return PTR_ERR(pcie->pcie_xrst);
+
+	return 0;
+}
+
+static int tegra_pcie_phys_get_legacy(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	int err;
+
+	pcie->phy = devm_phy_optional_get(dev, "pcie");
+	if (IS_ERR(pcie->phy)) {
+		err = PTR_ERR(pcie->phy);
+		dev_err(dev, "failed to get PHY: %d\n", err);
+		return err;
+	}
+
+	err = phy_init(pcie->phy);
+	if (err < 0) {
+		dev_err(dev, "failed to initialize PHY: %d\n", err);
+		return err;
+	}
+
+	pcie->legacy_phy = true;
+
+	return 0;
+}
+
+static struct phy *devm_of_phy_optional_get_index(struct device *dev,
+						  struct device_node *np,
+						  const char *consumer,
+						  unsigned int index)
+{
+	struct phy *phy;
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "%s-%u", consumer, index);
+	if (!name)
+		return ERR_PTR(-ENOMEM);
+
+	phy = devm_of_phy_optional_get(dev, np, name);
+	kfree(name);
+
+	return phy;
+}
+
+static int tegra_pcie_port_get_phys(struct tegra_pcie_port *port)
+{
+	struct device *dev = port->pcie->dev;
+	struct phy *phy;
+	unsigned int i;
+	int err;
+
+	port->phys = devm_kcalloc(dev, sizeof(phy), port->lanes, GFP_KERNEL);
+	if (!port->phys)
+		return -ENOMEM;
+
+	for (i = 0; i < port->lanes; i++) {
+		phy = devm_of_phy_optional_get_index(dev, port->np, "pcie", i);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "failed to get PHY#%u: %ld\n", i,
+				PTR_ERR(phy));
+			return PTR_ERR(phy);
+		}
+
+		err = phy_init(phy);
+		if (err < 0) {
+			dev_err(dev, "failed to initialize PHY#%u: %d\n", i,
+				err);
+			return err;
+		}
+
+		port->phys[i] = phy;
+	}
+
+	return 0;
+}
+
+static int tegra_pcie_phys_get(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	struct device_node *np = pcie->dev->of_node;
+	struct tegra_pcie_port *port;
+	int err;
+
+	if (!soc->has_gen2 || of_property_present(np, "phys"))
+		return tegra_pcie_phys_get_legacy(pcie);
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		err = tegra_pcie_port_get_phys(port);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void tegra_pcie_phys_put(struct tegra_pcie *pcie)
+{
+	struct tegra_pcie_port *port;
+	struct device *dev = pcie->dev;
+	int err, i;
+
+	if (pcie->legacy_phy) {
+		err = phy_exit(pcie->phy);
+		if (err < 0)
+			dev_err(dev, "failed to teardown PHY: %d\n", err);
+		return;
+	}
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		for (i = 0; i < port->lanes; i++) {
+			err = phy_exit(port->phys[i]);
+			if (err < 0)
+				dev_err(dev, "failed to teardown PHY#%u: %d\n",
+					i, err);
+		}
+	}
+}
+
+static int tegra_pcie_get_resources(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	int err;
+
+	err = tegra_pcie_clocks_get(pcie);
+	if (err) {
+		dev_err(dev, "failed to get clocks: %d\n", err);
+		return err;
+	}
+
+	err = tegra_pcie_resets_get(pcie);
+	if (err) {
+		dev_err(dev, "failed to get resets: %d\n", err);
+		return err;
+	}
+
+	if (soc->program_uphy) {
+		err = tegra_pcie_phys_get(pcie);
+		if (err < 0) {
+			dev_err(dev, "failed to get PHYs: %d\n", err);
+			return err;
+		}
+	}
+
+	pcie->pads = devm_platform_ioremap_resource_byname(pdev, "pads");
+	if (IS_ERR(pcie->pads)) {
+		err = PTR_ERR(pcie->pads);
+		goto phys_put;
+	}
+
+	pcie->afi = devm_platform_ioremap_resource_byname(pdev, "afi");
+	if (IS_ERR(pcie->afi)) {
+		err = PTR_ERR(pcie->afi);
+		goto phys_put;
+	}
+
+	/* request configuration space, but remap later, on demand */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cs");
+	if (!res) {
+		err = -EADDRNOTAVAIL;
+		goto phys_put;
+	}
+
+	pcie->cs = *res;
+
+	/* constrain configuration space to 4 KiB */
+	pcie->cs.end = pcie->cs.start + SZ_4K - 1;
+
+	pcie->cfg = devm_ioremap_resource(dev, &pcie->cs);
+	if (IS_ERR(pcie->cfg)) {
+		err = PTR_ERR(pcie->cfg);
+		goto phys_put;
+	}
+
+	/* request interrupt */
+	err = platform_get_irq_byname(pdev, "intr");
+	if (err < 0)
+		goto phys_put;
+
+	pcie->irq = err;
+
+	err = request_irq(pcie->irq, tegra_pcie_isr, IRQF_SHARED, "PCIE", pcie);
+	if (err) {
+		dev_err(dev, "failed to register IRQ: %d\n", err);
+		goto phys_put;
+	}
+
+	return 0;
+
+phys_put:
+	if (soc->program_uphy)
+		tegra_pcie_phys_put(pcie);
+
+	return err;
+}
+
+static int tegra_pcie_put_resources(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+
+	if (pcie->irq > 0)
+		free_irq(pcie->irq, pcie);
+
+	if (soc->program_uphy)
+		tegra_pcie_phys_put(pcie);
+
+	return 0;
+}
+
+static void tegra_pcie_pme_turnoff(struct tegra_pcie_port *port)
+{
+	struct tegra_pcie *pcie = port->pcie;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	int err;
+	u32 val;
+	u8 ack_bit;
+
+	val = afi_readl(pcie, AFI_PCIE_PME);
+	val |= (0x1 << soc->ports[port->index].pme.turnoff_bit);
+	afi_writel(pcie, val, AFI_PCIE_PME);
+
+	ack_bit = soc->ports[port->index].pme.ack_bit;
+	err = readl_poll_timeout(pcie->afi + AFI_PCIE_PME, val,
+				 val & (0x1 << ack_bit), 1, PME_ACK_TIMEOUT);
+	if (err)
+		dev_err(pcie->dev, "PME Ack is not received on port: %d\n",
+			port->index);
+
+	usleep_range(10000, 11000);
+
+	val = afi_readl(pcie, AFI_PCIE_PME);
+	val &= ~(0x1 << soc->ports[port->index].pme.turnoff_bit);
+	afi_writel(pcie, val, AFI_PCIE_PME);
+}
+
+static void tegra_pcie_msi_irq(struct irq_desc *desc)
+{
+	struct tegra_pcie *pcie = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct tegra_msi *msi = &pcie->msi;
+	struct device *dev = pcie->dev;
+	unsigned int i;
+
+	chained_irq_enter(chip, desc);
+
+	for (i = 0; i < 8; i++) {
+		unsigned long reg = afi_readl(pcie, AFI_MSI_VEC(i));
+
+		while (reg) {
+			unsigned int offset = find_first_bit(&reg, 32);
+			unsigned int index = i * 32 + offset;
+			int ret;
+
+			ret = generic_handle_domain_irq(msi->domain->parent, index);
+			if (ret) {
+				/*
+				 * that's weird who triggered this?
+				 * just clear it
+				 */
+				dev_info(dev, "unexpected MSI\n");
+				afi_writel(pcie, BIT(index % 32), AFI_MSI_VEC(index));
+			}
+
+			/* see if there's any more pending in this vector */
+			reg = afi_readl(pcie, AFI_MSI_VEC(i));
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void tegra_msi_top_irq_ack(struct irq_data *d)
+{
+	irq_chip_ack_parent(d);
+}
+
+static void tegra_msi_top_irq_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void tegra_msi_top_irq_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip tegra_msi_top_chip = {
+	.name		= "Tegra PCIe MSI",
+	.irq_ack	= tegra_msi_top_irq_ack,
+	.irq_mask	= tegra_msi_top_irq_mask,
+	.irq_unmask	= tegra_msi_top_irq_unmask,
+};
+
+static void tegra_msi_irq_ack(struct irq_data *d)
+{
+	struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+	struct tegra_pcie *pcie = msi_to_pcie(msi);
+	unsigned int index = d->hwirq / 32;
+
+	/* clear the interrupt */
+	afi_writel(pcie, BIT(d->hwirq % 32), AFI_MSI_VEC(index));
+}
+
+static void tegra_msi_irq_mask(struct irq_data *d)
+{
+	struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+	struct tegra_pcie *pcie = msi_to_pcie(msi);
+	unsigned int index = d->hwirq / 32;
+	unsigned long flags;
+	u32 value;
+
+	spin_lock_irqsave(&msi->mask_lock, flags);
+	value = afi_readl(pcie, AFI_MSI_EN_VEC(index));
+	value &= ~BIT(d->hwirq % 32);
+	afi_writel(pcie, value, AFI_MSI_EN_VEC(index));
+	spin_unlock_irqrestore(&msi->mask_lock, flags);
+}
+
+static void tegra_msi_irq_unmask(struct irq_data *d)
+{
+	struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+	struct tegra_pcie *pcie = msi_to_pcie(msi);
+	unsigned int index = d->hwirq / 32;
+	unsigned long flags;
+	u32 value;
+
+	spin_lock_irqsave(&msi->mask_lock, flags);
+	value = afi_readl(pcie, AFI_MSI_EN_VEC(index));
+	value |= BIT(d->hwirq % 32);
+	afi_writel(pcie, value, AFI_MSI_EN_VEC(index));
+	spin_unlock_irqrestore(&msi->mask_lock, flags);
+}
+
+static int tegra_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void tegra_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct tegra_msi *msi = irq_data_get_irq_chip_data(data);
+
+	msg->address_lo = lower_32_bits(msi->phys);
+	msg->address_hi = upper_32_bits(msi->phys);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip tegra_msi_bottom_chip = {
+	.name			= "Tegra MSI",
+	.irq_ack		= tegra_msi_irq_ack,
+	.irq_mask		= tegra_msi_irq_mask,
+	.irq_unmask		= tegra_msi_irq_unmask,
+	.irq_set_affinity 	= tegra_msi_set_affinity,
+	.irq_compose_msi_msg	= tegra_compose_msi_msg,
+};
+
+static int tegra_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct tegra_msi *msi = domain->host_data;
+	unsigned int i;
+	int hwirq;
+
+	mutex_lock(&msi->map_lock);
+
+	hwirq = bitmap_find_free_region(msi->used, INT_PCI_MSI_NR, order_base_2(nr_irqs));
+
+	mutex_unlock(&msi->map_lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &tegra_msi_bottom_chip, domain->host_data,
+				    handle_edge_irq, NULL, NULL);
+
+	tegra_cpuidle_pcie_irqs_in_use();
+
+	return 0;
+}
+
+static void tegra_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct tegra_msi *msi = domain->host_data;
+
+	mutex_lock(&msi->map_lock);
+
+	bitmap_release_region(msi->used, d->hwirq, order_base_2(nr_irqs));
+
+	mutex_unlock(&msi->map_lock);
+}
+
+static const struct irq_domain_ops tegra_msi_domain_ops = {
+	.alloc = tegra_msi_domain_alloc,
+	.free = tegra_msi_domain_free,
+};
+
+static struct msi_domain_info tegra_msi_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX),
+	.chip	= &tegra_msi_top_chip,
+};
+
+static int tegra_allocate_domains(struct tegra_msi *msi)
+{
+	struct tegra_pcie *pcie = msi_to_pcie(msi);
+	struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+	struct irq_domain *parent;
+
+	parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR,
+					  &tegra_msi_domain_ops, msi);
+	if (!parent) {
+		dev_err(pcie->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+	irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+	msi->domain = pci_msi_create_irq_domain(fwnode, &tegra_msi_info, parent);
+	if (!msi->domain) {
+		dev_err(pcie->dev, "failed to create MSI domain\n");
+		irq_domain_remove(parent);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void tegra_free_domains(struct tegra_msi *msi)
+{
+	struct irq_domain *parent = msi->domain->parent;
+
+	irq_domain_remove(msi->domain);
+	irq_domain_remove(parent);
+}
+
+static int tegra_pcie_msi_setup(struct tegra_pcie *pcie)
+{
+	struct platform_device *pdev = to_platform_device(pcie->dev);
+	struct tegra_msi *msi = &pcie->msi;
+	struct device *dev = pcie->dev;
+	int err;
+
+	mutex_init(&msi->map_lock);
+	spin_lock_init(&msi->mask_lock);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		err = tegra_allocate_domains(msi);
+		if (err)
+			return err;
+	}
+
+	err = platform_get_irq_byname(pdev, "msi");
+	if (err < 0)
+		goto free_irq_domain;
+
+	msi->irq = err;
+
+	irq_set_chained_handler_and_data(msi->irq, tegra_pcie_msi_irq, pcie);
+
+	/* Though the PCIe controller can address >32-bit address space, to
+	 * facilitate endpoints that support only 32-bit MSI target address,
+	 * the mask is set to 32-bit to make sure that MSI target address is
+	 * always a 32-bit address
+	 */
+	err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
+	if (err < 0) {
+		dev_err(dev, "failed to set DMA coherent mask: %d\n", err);
+		goto free_irq;
+	}
+
+	msi->virt = dma_alloc_attrs(dev, PAGE_SIZE, &msi->phys, GFP_KERNEL,
+				    DMA_ATTR_NO_KERNEL_MAPPING);
+	if (!msi->virt) {
+		dev_err(dev, "failed to allocate DMA memory for MSI\n");
+		err = -ENOMEM;
+		goto free_irq;
+	}
+
+	return 0;
+
+free_irq:
+	irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
+free_irq_domain:
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_free_domains(msi);
+
+	return err;
+}
+
+static void tegra_pcie_enable_msi(struct tegra_pcie *pcie)
+{
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	struct tegra_msi *msi = &pcie->msi;
+	u32 reg, msi_state[INT_PCI_MSI_NR / 32];
+	int i;
+
+	afi_writel(pcie, msi->phys >> soc->msi_base_shift, AFI_MSI_FPCI_BAR_ST);
+	afi_writel(pcie, msi->phys, AFI_MSI_AXI_BAR_ST);
+	/* this register is in 4K increments */
+	afi_writel(pcie, 1, AFI_MSI_BAR_SZ);
+
+	/* Restore the MSI allocation state */
+	bitmap_to_arr32(msi_state, msi->used, INT_PCI_MSI_NR);
+	for (i = 0; i < ARRAY_SIZE(msi_state); i++)
+		afi_writel(pcie, msi_state[i], AFI_MSI_EN_VEC(i));
+
+	/* and unmask the MSI interrupt */
+	reg = afi_readl(pcie, AFI_INTR_MASK);
+	reg |= AFI_INTR_MASK_MSI_MASK;
+	afi_writel(pcie, reg, AFI_INTR_MASK);
+}
+
+static void tegra_pcie_msi_teardown(struct tegra_pcie *pcie)
+{
+	struct tegra_msi *msi = &pcie->msi;
+	unsigned int i, irq;
+
+	dma_free_attrs(pcie->dev, PAGE_SIZE, msi->virt, msi->phys,
+		       DMA_ATTR_NO_KERNEL_MAPPING);
+
+	for (i = 0; i < INT_PCI_MSI_NR; i++) {
+		irq = irq_find_mapping(msi->domain, i);
+		if (irq > 0)
+			irq_domain_free_irqs(irq, 1);
+	}
+
+	irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_free_domains(msi);
+}
+
+static int tegra_pcie_disable_msi(struct tegra_pcie *pcie)
+{
+	u32 value;
+
+	/* mask the MSI interrupt */
+	value = afi_readl(pcie, AFI_INTR_MASK);
+	value &= ~AFI_INTR_MASK_MSI_MASK;
+	afi_writel(pcie, value, AFI_INTR_MASK);
+
+	return 0;
+}
+
+static void tegra_pcie_disable_interrupts(struct tegra_pcie *pcie)
+{
+	u32 value;
+
+	value = afi_readl(pcie, AFI_INTR_MASK);
+	value &= ~AFI_INTR_MASK_INT_MASK;
+	afi_writel(pcie, value, AFI_INTR_MASK);
+}
+
+static int tegra_pcie_get_xbar_config(struct tegra_pcie *pcie, u32 lanes,
+				      u32 *xbar)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *np = dev->of_node;
+
+	if (of_device_is_compatible(np, "nvidia,tegra186-pcie")) {
+		switch (lanes) {
+		case 0x010004:
+			dev_info(dev, "4x1, 1x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_401;
+			return 0;
+
+		case 0x010102:
+			dev_info(dev, "2x1, 1X1, 1x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_211;
+			return 0;
+
+		case 0x010101:
+			dev_info(dev, "1x1, 1x1, 1x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_111;
+			return 0;
+
+		default:
+			dev_info(dev, "wrong configuration updated in DT, "
+				 "switching to default 2x1, 1x1, 1x1 "
+				 "configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_211;
+			return 0;
+		}
+	} else if (of_device_is_compatible(np, "nvidia,tegra124-pcie") ||
+		   of_device_is_compatible(np, "nvidia,tegra210-pcie")) {
+		switch (lanes) {
+		case 0x0000104:
+			dev_info(dev, "4x1, 1x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X4_X1;
+			return 0;
+
+		case 0x0000102:
+			dev_info(dev, "2x1, 1x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X2_X1;
+			return 0;
+		}
+	} else if (of_device_is_compatible(np, "nvidia,tegra30-pcie")) {
+		switch (lanes) {
+		case 0x00000204:
+			dev_info(dev, "4x1, 2x1 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_420;
+			return 0;
+
+		case 0x00020202:
+			dev_info(dev, "2x3 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_222;
+			return 0;
+
+		case 0x00010104:
+			dev_info(dev, "4x1, 1x2 configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_411;
+			return 0;
+		}
+	} else if (of_device_is_compatible(np, "nvidia,tegra20-pcie")) {
+		switch (lanes) {
+		case 0x00000004:
+			dev_info(dev, "single-mode configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_SINGLE;
+			return 0;
+
+		case 0x00000202:
+			dev_info(dev, "dual-mode configuration\n");
+			*xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_DUAL;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Check whether a given set of supplies is available in a device tree node.
+ * This is used to check whether the new or the legacy device tree bindings
+ * should be used.
+ */
+static bool of_regulator_bulk_available(struct device_node *np,
+					struct regulator_bulk_data *supplies,
+					unsigned int num_supplies)
+{
+	char property[32];
+	unsigned int i;
+
+	for (i = 0; i < num_supplies; i++) {
+		snprintf(property, 32, "%s-supply", supplies[i].supply);
+
+		if (!of_property_present(np, property))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Old versions of the device tree binding for this device used a set of power
+ * supplies that didn't match the hardware inputs. This happened to work for a
+ * number of cases but is not future proof. However to preserve backwards-
+ * compatibility with old device trees, this function will try to use the old
+ * set of supplies.
+ */
+static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *np = dev->of_node;
+
+	if (of_device_is_compatible(np, "nvidia,tegra30-pcie"))
+		pcie->num_supplies = 3;
+	else if (of_device_is_compatible(np, "nvidia,tegra20-pcie"))
+		pcie->num_supplies = 2;
+
+	if (pcie->num_supplies == 0) {
+		dev_err(dev, "device %pOF not supported in legacy mode\n", np);
+		return -ENODEV;
+	}
+
+	pcie->supplies = devm_kcalloc(dev, pcie->num_supplies,
+				      sizeof(*pcie->supplies),
+				      GFP_KERNEL);
+	if (!pcie->supplies)
+		return -ENOMEM;
+
+	pcie->supplies[0].supply = "pex-clk";
+	pcie->supplies[1].supply = "vdd";
+
+	if (pcie->num_supplies > 2)
+		pcie->supplies[2].supply = "avdd";
+
+	return devm_regulator_bulk_get(dev, pcie->num_supplies, pcie->supplies);
+}
+
+/*
+ * Obtains the list of regulators required for a particular generation of the
+ * IP block.
+ *
+ * This would've been nice to do simply by providing static tables for use
+ * with the regulator_bulk_*() API, but unfortunately Tegra30 is a bit quirky
+ * in that it has two pairs or AVDD_PEX and VDD_PEX supplies (PEXA and PEXB)
+ * and either seems to be optional depending on which ports are being used.
+ */
+static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *np = dev->of_node;
+	unsigned int i = 0;
+
+	if (of_device_is_compatible(np, "nvidia,tegra186-pcie")) {
+		pcie->num_supplies = 4;
+
+		pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies,
+					      sizeof(*pcie->supplies),
+					      GFP_KERNEL);
+		if (!pcie->supplies)
+			return -ENOMEM;
+
+		pcie->supplies[i++].supply = "dvdd-pex";
+		pcie->supplies[i++].supply = "hvdd-pex-pll";
+		pcie->supplies[i++].supply = "hvdd-pex";
+		pcie->supplies[i++].supply = "vddio-pexctl-aud";
+	} else if (of_device_is_compatible(np, "nvidia,tegra210-pcie")) {
+		pcie->num_supplies = 3;
+
+		pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies,
+					      sizeof(*pcie->supplies),
+					      GFP_KERNEL);
+		if (!pcie->supplies)
+			return -ENOMEM;
+
+		pcie->supplies[i++].supply = "hvddio-pex";
+		pcie->supplies[i++].supply = "dvddio-pex";
+		pcie->supplies[i++].supply = "vddio-pex-ctl";
+	} else if (of_device_is_compatible(np, "nvidia,tegra124-pcie")) {
+		pcie->num_supplies = 4;
+
+		pcie->supplies = devm_kcalloc(dev, pcie->num_supplies,
+					      sizeof(*pcie->supplies),
+					      GFP_KERNEL);
+		if (!pcie->supplies)
+			return -ENOMEM;
+
+		pcie->supplies[i++].supply = "avddio-pex";
+		pcie->supplies[i++].supply = "dvddio-pex";
+		pcie->supplies[i++].supply = "hvdd-pex";
+		pcie->supplies[i++].supply = "vddio-pex-ctl";
+	} else if (of_device_is_compatible(np, "nvidia,tegra30-pcie")) {
+		bool need_pexa = false, need_pexb = false;
+
+		/* VDD_PEXA and AVDD_PEXA supply lanes 0 to 3 */
+		if (lane_mask & 0x0f)
+			need_pexa = true;
+
+		/* VDD_PEXB and AVDD_PEXB supply lanes 4 to 5 */
+		if (lane_mask & 0x30)
+			need_pexb = true;
+
+		pcie->num_supplies = 4 + (need_pexa ? 2 : 0) +
+					 (need_pexb ? 2 : 0);
+
+		pcie->supplies = devm_kcalloc(dev, pcie->num_supplies,
+					      sizeof(*pcie->supplies),
+					      GFP_KERNEL);
+		if (!pcie->supplies)
+			return -ENOMEM;
+
+		pcie->supplies[i++].supply = "avdd-pex-pll";
+		pcie->supplies[i++].supply = "hvdd-pex";
+		pcie->supplies[i++].supply = "vddio-pex-ctl";
+		pcie->supplies[i++].supply = "avdd-plle";
+
+		if (need_pexa) {
+			pcie->supplies[i++].supply = "avdd-pexa";
+			pcie->supplies[i++].supply = "vdd-pexa";
+		}
+
+		if (need_pexb) {
+			pcie->supplies[i++].supply = "avdd-pexb";
+			pcie->supplies[i++].supply = "vdd-pexb";
+		}
+	} else if (of_device_is_compatible(np, "nvidia,tegra20-pcie")) {
+		pcie->num_supplies = 5;
+
+		pcie->supplies = devm_kcalloc(dev, pcie->num_supplies,
+					      sizeof(*pcie->supplies),
+					      GFP_KERNEL);
+		if (!pcie->supplies)
+			return -ENOMEM;
+
+		pcie->supplies[0].supply = "avdd-pex";
+		pcie->supplies[1].supply = "vdd-pex";
+		pcie->supplies[2].supply = "avdd-pex-pll";
+		pcie->supplies[3].supply = "avdd-plle";
+		pcie->supplies[4].supply = "vddio-pex-clk";
+	}
+
+	if (of_regulator_bulk_available(dev->of_node, pcie->supplies,
+					pcie->num_supplies))
+		return devm_regulator_bulk_get(dev, pcie->num_supplies,
+					       pcie->supplies);
+
+	/*
+	 * If not all regulators are available for this new scheme, assume
+	 * that the device tree complies with an older version of the device
+	 * tree binding.
+	 */
+	dev_info(dev, "using legacy DT binding for power supplies\n");
+
+	devm_kfree(dev, pcie->supplies);
+	pcie->num_supplies = 0;
+
+	return tegra_pcie_get_legacy_regulators(pcie);
+}
+
+static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *np = dev->of_node, *port;
+	const struct tegra_pcie_soc *soc = pcie->soc;
+	u32 lanes = 0, mask = 0;
+	unsigned int lane = 0;
+	int err;
+
+	/* parse root ports */
+	for_each_child_of_node(np, port) {
+		struct tegra_pcie_port *rp;
+		unsigned int index;
+		u32 value;
+		char *label;
+
+		err = of_pci_get_devfn(port);
+		if (err < 0) {
+			dev_err(dev, "failed to parse address: %d\n", err);
+			goto err_node_put;
+		}
+
+		index = PCI_SLOT(err);
+
+		if (index < 1 || index > soc->num_ports) {
+			dev_err(dev, "invalid port number: %d\n", index);
+			err = -EINVAL;
+			goto err_node_put;
+		}
+
+		index--;
+
+		err = of_property_read_u32(port, "nvidia,num-lanes", &value);
+		if (err < 0) {
+			dev_err(dev, "failed to parse # of lanes: %d\n",
+				err);
+			goto err_node_put;
+		}
+
+		if (value > 16) {
+			dev_err(dev, "invalid # of lanes: %u\n", value);
+			err = -EINVAL;
+			goto err_node_put;
+		}
+
+		lanes |= value << (index << 3);
+
+		if (!of_device_is_available(port)) {
+			lane += value;
+			continue;
+		}
+
+		mask |= ((1 << value) - 1) << lane;
+		lane += value;
+
+		rp = devm_kzalloc(dev, sizeof(*rp), GFP_KERNEL);
+		if (!rp) {
+			err = -ENOMEM;
+			goto err_node_put;
+		}
+
+		err = of_address_to_resource(port, 0, &rp->regs);
+		if (err < 0) {
+			dev_err(dev, "failed to parse address: %d\n", err);
+			goto err_node_put;
+		}
+
+		INIT_LIST_HEAD(&rp->list);
+		rp->index = index;
+		rp->lanes = value;
+		rp->pcie = pcie;
+		rp->np = port;
+
+		rp->base = devm_pci_remap_cfg_resource(dev, &rp->regs);
+		if (IS_ERR(rp->base)) {
+			err = PTR_ERR(rp->base);
+			goto err_node_put;
+		}
+
+		label = devm_kasprintf(dev, GFP_KERNEL, "pex-reset-%u", index);
+		if (!label) {
+			err = -ENOMEM;
+			goto err_node_put;
+		}
+
+		/*
+		 * Returns -ENOENT if reset-gpios property is not populated
+		 * and in this case fall back to using AFI per port register
+		 * to toggle PERST# SFIO line.
+		 */
+		rp->reset_gpio = devm_fwnode_gpiod_get(dev,
+						       of_fwnode_handle(port),
+						       "reset",
+						       GPIOD_OUT_LOW,
+						       label);
+		if (IS_ERR(rp->reset_gpio)) {
+			if (PTR_ERR(rp->reset_gpio) == -ENOENT) {
+				rp->reset_gpio = NULL;
+			} else {
+				dev_err(dev, "failed to get reset GPIO: %ld\n",
+					PTR_ERR(rp->reset_gpio));
+				err = PTR_ERR(rp->reset_gpio);
+				goto err_node_put;
+			}
+		}
+
+		list_add_tail(&rp->list, &pcie->ports);
+	}
+
+	err = tegra_pcie_get_xbar_config(pcie, lanes, &pcie->xbar_config);
+	if (err < 0) {
+		dev_err(dev, "invalid lane configuration\n");
+		return err;
+	}
+
+	err = tegra_pcie_get_regulators(pcie, mask);
+	if (err < 0)
+		return err;
+
+	return 0;
+
+err_node_put:
+	of_node_put(port);
+	return err;
+}
+
+/*
+ * FIXME: If there are no PCIe cards attached, then calling this function
+ * can result in the increase of the bootup time as there are big timeout
+ * loops.
+ */
+#define TEGRA_PCIE_LINKUP_TIMEOUT	200	/* up to 1.2 seconds */
+static bool tegra_pcie_port_check_link(struct tegra_pcie_port *port)
+{
+	struct device *dev = port->pcie->dev;
+	unsigned int retries = 3;
+	unsigned long value;
+
+	/* override presence detection */
+	value = readl(port->base + RP_PRIV_MISC);
+	value &= ~RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT;
+	value |= RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT;
+	writel(value, port->base + RP_PRIV_MISC);
+
+	do {
+		unsigned int timeout = TEGRA_PCIE_LINKUP_TIMEOUT;
+
+		do {
+			value = readl(port->base + RP_VEND_XP);
+
+			if (value & RP_VEND_XP_DL_UP)
+				break;
+
+			usleep_range(1000, 2000);
+		} while (--timeout);
+
+		if (!timeout) {
+			dev_dbg(dev, "link %u down, retrying\n", port->index);
+			goto retry;
+		}
+
+		timeout = TEGRA_PCIE_LINKUP_TIMEOUT;
+
+		do {
+			value = readl(port->base + RP_LINK_CONTROL_STATUS);
+
+			if (value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE)
+				return true;
+
+			usleep_range(1000, 2000);
+		} while (--timeout);
+
+retry:
+		tegra_pcie_port_reset(port);
+	} while (--retries);
+
+	return false;
+}
+
+static void tegra_pcie_change_link_speed(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct tegra_pcie_port *port;
+	ktime_t deadline;
+	u32 value;
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		/*
+		 * "Supported Link Speeds Vector" in "Link Capabilities 2"
+		 * is not supported by Tegra. tegra_pcie_change_link_speed()
+		 * is called only for Tegra chips which support Gen2.
+		 * So there no harm if supported link speed is not verified.
+		 */
+		value = readl(port->base + RP_LINK_CONTROL_STATUS_2);
+		value &= ~PCI_EXP_LNKSTA_CLS;
+		value |= PCI_EXP_LNKSTA_CLS_5_0GB;
+		writel(value, port->base + RP_LINK_CONTROL_STATUS_2);
+
+		/*
+		 * Poll until link comes back from recovery to avoid race
+		 * condition.
+		 */
+		deadline = ktime_add_us(ktime_get(), LINK_RETRAIN_TIMEOUT);
+
+		while (ktime_before(ktime_get(), deadline)) {
+			value = readl(port->base + RP_LINK_CONTROL_STATUS);
+			if ((value & PCI_EXP_LNKSTA_LT) == 0)
+				break;
+
+			usleep_range(2000, 3000);
+		}
+
+		if (value & PCI_EXP_LNKSTA_LT)
+			dev_warn(dev, "PCIe port %u link is in recovery\n",
+				 port->index);
+
+		/* Retrain the link */
+		value = readl(port->base + RP_LINK_CONTROL_STATUS);
+		value |= PCI_EXP_LNKCTL_RL;
+		writel(value, port->base + RP_LINK_CONTROL_STATUS);
+
+		deadline = ktime_add_us(ktime_get(), LINK_RETRAIN_TIMEOUT);
+
+		while (ktime_before(ktime_get(), deadline)) {
+			value = readl(port->base + RP_LINK_CONTROL_STATUS);
+			if ((value & PCI_EXP_LNKSTA_LT) == 0)
+				break;
+
+			usleep_range(2000, 3000);
+		}
+
+		if (value & PCI_EXP_LNKSTA_LT)
+			dev_err(dev, "failed to retrain link of port %u\n",
+				port->index);
+	}
+}
+
+static void tegra_pcie_enable_ports(struct tegra_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct tegra_pcie_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
+		dev_info(dev, "probing port %u, using %u lanes\n",
+			 port->index, port->lanes);
+
+		tegra_pcie_port_enable(port);
+	}
+
+	/* Start LTSSM from Tegra side */
+	reset_control_deassert(pcie->pcie_xrst);
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
+		if (tegra_pcie_port_check_link(port))
+			continue;
+
+		dev_info(dev, "link %u down, ignoring\n", port->index);
+
+		tegra_pcie_port_disable(port);
+		tegra_pcie_port_free(port);
+	}
+
+	if (pcie->soc->has_gen2)
+		tegra_pcie_change_link_speed(pcie);
+}
+
+static void tegra_pcie_disable_ports(struct tegra_pcie *pcie)
+{
+	struct tegra_pcie_port *port, *tmp;
+
+	reset_control_assert(pcie->pcie_xrst);
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list)
+		tegra_pcie_port_disable(port);
+}
+
+static const struct tegra_pcie_port_soc tegra20_pcie_ports[] = {
+	{ .pme.turnoff_bit = 0, .pme.ack_bit =  5 },
+	{ .pme.turnoff_bit = 8, .pme.ack_bit = 10 },
+};
+
+static const struct tegra_pcie_soc tegra20_pcie = {
+	.num_ports = 2,
+	.ports = tegra20_pcie_ports,
+	.msi_base_shift = 0,
+	.pads_pll_ctl = PADS_PLL_CTL_TEGRA20,
+	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_DIV10,
+	.pads_refclk_cfg0 = 0xfa5cfa5c,
+	.has_pex_clkreq_en = false,
+	.has_pex_bias_ctrl = false,
+	.has_intr_prsnt_sense = false,
+	.has_cml_clk = false,
+	.has_gen2 = false,
+	.force_pca_enable = false,
+	.program_uphy = true,
+	.update_clamp_threshold = false,
+	.program_deskew_time = false,
+	.update_fc_timer = false,
+	.has_cache_bars = true,
+	.ectl.enable = false,
+};
+
+static const struct tegra_pcie_port_soc tegra30_pcie_ports[] = {
+	{ .pme.turnoff_bit =  0, .pme.ack_bit =  5 },
+	{ .pme.turnoff_bit =  8, .pme.ack_bit = 10 },
+	{ .pme.turnoff_bit = 16, .pme.ack_bit = 18 },
+};
+
+static const struct tegra_pcie_soc tegra30_pcie = {
+	.num_ports = 3,
+	.ports = tegra30_pcie_ports,
+	.msi_base_shift = 8,
+	.afi_pex2_ctrl = 0x128,
+	.pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
+	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
+	.pads_refclk_cfg0 = 0xfa5cfa5c,
+	.pads_refclk_cfg1 = 0xfa5cfa5c,
+	.has_pex_clkreq_en = true,
+	.has_pex_bias_ctrl = true,
+	.has_intr_prsnt_sense = true,
+	.has_cml_clk = true,
+	.has_gen2 = false,
+	.force_pca_enable = false,
+	.program_uphy = true,
+	.update_clamp_threshold = false,
+	.program_deskew_time = false,
+	.update_fc_timer = false,
+	.has_cache_bars = false,
+	.ectl.enable = false,
+};
+
+static const struct tegra_pcie_soc tegra124_pcie = {
+	.num_ports = 2,
+	.ports = tegra20_pcie_ports,
+	.msi_base_shift = 8,
+	.pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
+	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
+	.pads_refclk_cfg0 = 0x44ac44ac,
+	.has_pex_clkreq_en = true,
+	.has_pex_bias_ctrl = true,
+	.has_intr_prsnt_sense = true,
+	.has_cml_clk = true,
+	.has_gen2 = true,
+	.force_pca_enable = false,
+	.program_uphy = true,
+	.update_clamp_threshold = true,
+	.program_deskew_time = false,
+	.update_fc_timer = false,
+	.has_cache_bars = false,
+	.ectl.enable = false,
+};
+
+static const struct tegra_pcie_soc tegra210_pcie = {
+	.num_ports = 2,
+	.ports = tegra20_pcie_ports,
+	.msi_base_shift = 8,
+	.pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
+	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
+	.pads_refclk_cfg0 = 0x90b890b8,
+	/* FC threshold is bit[25:18] */
+	.update_fc_threshold = 0x01800000,
+	.has_pex_clkreq_en = true,
+	.has_pex_bias_ctrl = true,
+	.has_intr_prsnt_sense = true,
+	.has_cml_clk = true,
+	.has_gen2 = true,
+	.force_pca_enable = true,
+	.program_uphy = true,
+	.update_clamp_threshold = true,
+	.program_deskew_time = true,
+	.update_fc_timer = true,
+	.has_cache_bars = false,
+	.ectl = {
+		.regs = {
+			.rp_ectl_2_r1 = 0x0000000f,
+			.rp_ectl_4_r1 = 0x00000067,
+			.rp_ectl_5_r1 = 0x55010000,
+			.rp_ectl_6_r1 = 0x00000001,
+			.rp_ectl_2_r2 = 0x0000008f,
+			.rp_ectl_4_r2 = 0x000000c7,
+			.rp_ectl_5_r2 = 0x55010000,
+			.rp_ectl_6_r2 = 0x00000001,
+		},
+		.enable = true,
+	},
+};
+
+static const struct tegra_pcie_port_soc tegra186_pcie_ports[] = {
+	{ .pme.turnoff_bit =  0, .pme.ack_bit =  5 },
+	{ .pme.turnoff_bit =  8, .pme.ack_bit = 10 },
+	{ .pme.turnoff_bit = 12, .pme.ack_bit = 14 },
+};
+
+static const struct tegra_pcie_soc tegra186_pcie = {
+	.num_ports = 3,
+	.ports = tegra186_pcie_ports,
+	.msi_base_shift = 8,
+	.afi_pex2_ctrl = 0x19c,
+	.pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
+	.tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
+	.pads_refclk_cfg0 = 0x80b880b8,
+	.pads_refclk_cfg1 = 0x000480b8,
+	.has_pex_clkreq_en = true,
+	.has_pex_bias_ctrl = true,
+	.has_intr_prsnt_sense = true,
+	.has_cml_clk = false,
+	.has_gen2 = true,
+	.force_pca_enable = false,
+	.program_uphy = false,
+	.update_clamp_threshold = false,
+	.program_deskew_time = false,
+	.update_fc_timer = false,
+	.has_cache_bars = false,
+	.ectl.enable = false,
+};
+
+static const struct of_device_id tegra_pcie_of_match[] = {
+	{ .compatible = "nvidia,tegra186-pcie", .data = &tegra186_pcie },
+	{ .compatible = "nvidia,tegra210-pcie", .data = &tegra210_pcie },
+	{ .compatible = "nvidia,tegra124-pcie", .data = &tegra124_pcie },
+	{ .compatible = "nvidia,tegra30-pcie", .data = &tegra30_pcie },
+	{ .compatible = "nvidia,tegra20-pcie", .data = &tegra20_pcie },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_pcie_of_match);
+
+static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct tegra_pcie *pcie = s->private;
+
+	if (list_empty(&pcie->ports))
+		return NULL;
+
+	seq_puts(s, "Index  Status\n");
+
+	return seq_list_start(&pcie->ports, *pos);
+}
+
+static void *tegra_pcie_ports_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct tegra_pcie *pcie = s->private;
+
+	return seq_list_next(v, &pcie->ports, pos);
+}
+
+static void tegra_pcie_ports_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int tegra_pcie_ports_seq_show(struct seq_file *s, void *v)
+{
+	bool up = false, active = false;
+	struct tegra_pcie_port *port;
+	unsigned int value;
+
+	port = list_entry(v, struct tegra_pcie_port, list);
+
+	value = readl(port->base + RP_VEND_XP);
+
+	if (value & RP_VEND_XP_DL_UP)
+		up = true;
+
+	value = readl(port->base + RP_LINK_CONTROL_STATUS);
+
+	if (value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE)
+		active = true;
+
+	seq_printf(s, "%2u     ", port->index);
+
+	if (up)
+		seq_puts(s, "up");
+
+	if (active) {
+		if (up)
+			seq_puts(s, ", ");
+
+		seq_puts(s, "active");
+	}
+
+	seq_puts(s, "\n");
+	return 0;
+}
+
+static const struct seq_operations tegra_pcie_ports_sops = {
+	.start = tegra_pcie_ports_seq_start,
+	.next = tegra_pcie_ports_seq_next,
+	.stop = tegra_pcie_ports_seq_stop,
+	.show = tegra_pcie_ports_seq_show,
+};
+
+DEFINE_SEQ_ATTRIBUTE(tegra_pcie_ports);
+
+static void tegra_pcie_debugfs_exit(struct tegra_pcie *pcie)
+{
+	debugfs_remove_recursive(pcie->debugfs);
+	pcie->debugfs = NULL;
+}
+
+static void tegra_pcie_debugfs_init(struct tegra_pcie *pcie)
+{
+	pcie->debugfs = debugfs_create_dir("pcie", NULL);
+
+	debugfs_create_file("ports", S_IFREG | S_IRUGO, pcie->debugfs, pcie,
+			    &tegra_pcie_ports_fops);
+}
+
+static int tegra_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *host;
+	struct tegra_pcie *pcie;
+	int err;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!host)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(host);
+	host->sysdata = pcie;
+	platform_set_drvdata(pdev, pcie);
+
+	pcie->soc = of_device_get_match_data(dev);
+	INIT_LIST_HEAD(&pcie->ports);
+	pcie->dev = dev;
+
+	err = tegra_pcie_parse_dt(pcie);
+	if (err < 0)
+		return err;
+
+	err = tegra_pcie_get_resources(pcie);
+	if (err < 0) {
+		dev_err(dev, "failed to request resources: %d\n", err);
+		return err;
+	}
+
+	err = tegra_pcie_msi_setup(pcie);
+	if (err < 0) {
+		dev_err(dev, "failed to enable MSI support: %d\n", err);
+		goto put_resources;
+	}
+
+	pm_runtime_enable(pcie->dev);
+	err = pm_runtime_get_sync(pcie->dev);
+	if (err < 0) {
+		dev_err(dev, "fail to enable pcie controller: %d\n", err);
+		goto pm_runtime_put;
+	}
+
+	host->ops = &tegra_pcie_ops;
+	host->map_irq = tegra_pcie_map_irq;
+
+	err = pci_host_probe(host);
+	if (err < 0) {
+		dev_err(dev, "failed to register host: %d\n", err);
+		goto pm_runtime_put;
+	}
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		tegra_pcie_debugfs_init(pcie);
+
+	return 0;
+
+pm_runtime_put:
+	pm_runtime_put_sync(pcie->dev);
+	pm_runtime_disable(pcie->dev);
+	tegra_pcie_msi_teardown(pcie);
+put_resources:
+	tegra_pcie_put_resources(pcie);
+	return err;
+}
+
+static void tegra_pcie_remove(struct platform_device *pdev)
+{
+	struct tegra_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+	struct tegra_pcie_port *port, *tmp;
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		tegra_pcie_debugfs_exit(pcie);
+
+	pci_stop_root_bus(host->bus);
+	pci_remove_root_bus(host->bus);
+	pm_runtime_put_sync(pcie->dev);
+	pm_runtime_disable(pcie->dev);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_pcie_msi_teardown(pcie);
+
+	tegra_pcie_put_resources(pcie);
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list)
+		tegra_pcie_port_free(port);
+}
+
+static int tegra_pcie_pm_suspend(struct device *dev)
+{
+	struct tegra_pcie *pcie = dev_get_drvdata(dev);
+	struct tegra_pcie_port *port;
+	int err;
+
+	list_for_each_entry(port, &pcie->ports, list)
+		tegra_pcie_pme_turnoff(port);
+
+	tegra_pcie_disable_ports(pcie);
+
+	/*
+	 * AFI_INTR is unmasked in tegra_pcie_enable_controller(), mask it to
+	 * avoid unwanted interrupts raised by AFI after pex_rst is asserted.
+	 */
+	tegra_pcie_disable_interrupts(pcie);
+
+	if (pcie->soc->program_uphy) {
+		err = tegra_pcie_phy_power_off(pcie);
+		if (err < 0)
+			dev_err(dev, "failed to power off PHY(s): %d\n", err);
+	}
+
+	reset_control_assert(pcie->pex_rst);
+	clk_disable_unprepare(pcie->pex_clk);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_pcie_disable_msi(pcie);
+
+	pinctrl_pm_select_idle_state(dev);
+	tegra_pcie_power_off(pcie);
+
+	return 0;
+}
+
+static int tegra_pcie_pm_resume(struct device *dev)
+{
+	struct tegra_pcie *pcie = dev_get_drvdata(dev);
+	int err;
+
+	err = tegra_pcie_power_on(pcie);
+	if (err) {
+		dev_err(dev, "tegra pcie power on fail: %d\n", err);
+		return err;
+	}
+
+	err = pinctrl_pm_select_default_state(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to disable PCIe IO DPD: %d\n", err);
+		goto poweroff;
+	}
+
+	tegra_pcie_enable_controller(pcie);
+	tegra_pcie_setup_translations(pcie);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		tegra_pcie_enable_msi(pcie);
+
+	err = clk_prepare_enable(pcie->pex_clk);
+	if (err) {
+		dev_err(dev, "failed to enable PEX clock: %d\n", err);
+		goto pex_dpd_enable;
+	}
+
+	reset_control_deassert(pcie->pex_rst);
+
+	if (pcie->soc->program_uphy) {
+		err = tegra_pcie_phy_power_on(pcie);
+		if (err < 0) {
+			dev_err(dev, "failed to power on PHY(s): %d\n", err);
+			goto disable_pex_clk;
+		}
+	}
+
+	tegra_pcie_apply_pad_settings(pcie);
+	tegra_pcie_enable_ports(pcie);
+
+	return 0;
+
+disable_pex_clk:
+	reset_control_assert(pcie->pex_rst);
+	clk_disable_unprepare(pcie->pex_clk);
+pex_dpd_enable:
+	pinctrl_pm_select_idle_state(dev);
+poweroff:
+	tegra_pcie_power_off(pcie);
+
+	return err;
+}
+
+static const struct dev_pm_ops tegra_pcie_pm_ops = {
+	RUNTIME_PM_OPS(tegra_pcie_pm_suspend, tegra_pcie_pm_resume, NULL)
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_pcie_pm_suspend, tegra_pcie_pm_resume)
+};
+
+static struct platform_driver tegra_pcie_driver = {
+	.driver = {
+		.name = "tegra-pcie",
+		.of_match_table = tegra_pcie_of_match,
+		.suppress_bind_attrs = true,
+		.pm = &tegra_pcie_pm_ops,
+	},
+	.probe = tegra_pcie_probe,
+	.remove_new = tegra_pcie_remove,
+};
+module_platform_driver(tegra_pcie_driver);
diff --git a/drivers/pci/controller/pci-thunder-ecam.c b/drivers/pci/controller/pci-thunder-ecam.c
new file mode 100644
index 000000000..b5bd10a62
--- /dev/null
+++ b/drivers/pci/controller/pci-thunder-ecam.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015, 2016 Cavium, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/of_pci.h>
+#include <linux/of.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+#if defined(CONFIG_PCI_HOST_THUNDER_ECAM) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+
+static void set_val(u32 v, int where, int size, u32 *val)
+{
+	int shift = (where & 3) * 8;
+
+	pr_debug("set_val %04x: %08x\n", (unsigned int)(where & ~3), v);
+	v >>= shift;
+	if (size == 1)
+		v &= 0xff;
+	else if (size == 2)
+		v &= 0xffff;
+	*val = v;
+}
+
+static int handle_ea_bar(u32 e0, int bar, struct pci_bus *bus,
+			 unsigned int devfn, int where, int size, u32 *val)
+{
+	void __iomem *addr;
+	u32 v;
+
+	/* Entries are 16-byte aligned; bits[2,3] select word in entry */
+	int where_a = where & 0xc;
+
+	if (where_a == 0) {
+		set_val(e0, where, size, val);
+		return PCIBIOS_SUCCESSFUL;
+	}
+	if (where_a == 0x4) {
+		addr = bus->ops->map_bus(bus, devfn, bar); /* BAR 0 */
+		if (!addr)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		v = readl(addr);
+		v &= ~0xf;
+		v |= 2; /* EA entry-1. Base-L */
+		set_val(v, where, size, val);
+		return PCIBIOS_SUCCESSFUL;
+	}
+	if (where_a == 0x8) {
+		u32 barl_orig;
+		u32 barl_rb;
+
+		addr = bus->ops->map_bus(bus, devfn, bar); /* BAR 0 */
+		if (!addr)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		barl_orig = readl(addr + 0);
+		writel(0xffffffff, addr + 0);
+		barl_rb = readl(addr + 0);
+		writel(barl_orig, addr + 0);
+		/* zeros in unsettable bits */
+		v = ~barl_rb & ~3;
+		v |= 0xc; /* EA entry-2. Offset-L */
+		set_val(v, where, size, val);
+		return PCIBIOS_SUCCESSFUL;
+	}
+	if (where_a == 0xc) {
+		addr = bus->ops->map_bus(bus, devfn, bar + 4); /* BAR 1 */
+		if (!addr)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		v = readl(addr); /* EA entry-3. Base-H */
+		set_val(v, where, size, val);
+		return PCIBIOS_SUCCESSFUL;
+	}
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int thunder_ecam_p2_config_read(struct pci_bus *bus, unsigned int devfn,
+				       int where, int size, u32 *val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	int where_a = where & ~3;
+	void __iomem *addr;
+	u32 node_bits;
+	u32 v;
+
+	/* EA Base[63:32] may be missing some bits ... */
+	switch (where_a) {
+	case 0xa8:
+	case 0xbc:
+	case 0xd0:
+	case 0xe4:
+		break;
+	default:
+		return pci_generic_config_read(bus, devfn, where, size, val);
+	}
+
+	addr = bus->ops->map_bus(bus, devfn, where_a);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	v = readl(addr);
+
+	/*
+	 * Bit 44 of the 64-bit Base must match the same bit in
+	 * the config space access window.  Since we are working with
+	 * the high-order 32 bits, shift everything down by 32 bits.
+	 */
+	node_bits = upper_32_bits(cfg->res.start) & (1 << 12);
+
+	v |= node_bits;
+	set_val(v, where, size, val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int thunder_ecam_config_read(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 *val)
+{
+	u32 v;
+	u32 vendor_device;
+	u32 class_rev;
+	void __iomem *addr;
+	int cfg_type;
+	int where_a = where & ~3;
+
+	addr = bus->ops->map_bus(bus, devfn, 0xc);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	v = readl(addr);
+
+	/* Check for non type-00 header */
+	cfg_type = (v >> 16) & 0x7f;
+
+	addr = bus->ops->map_bus(bus, devfn, 8);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	class_rev = readl(addr);
+	if (class_rev == 0xffffffff)
+		goto no_emulation;
+
+	if ((class_rev & 0xff) >= 8) {
+		/* Pass-2 handling */
+		if (cfg_type)
+			goto no_emulation;
+		return thunder_ecam_p2_config_read(bus, devfn, where,
+						   size, val);
+	}
+
+	/*
+	 * All BARs have fixed addresses specified by the EA
+	 * capability; they must return zero on read.
+	 */
+	if (cfg_type == 0 &&
+	    ((where >= 0x10 && where < 0x2c) ||
+	     (where >= 0x1a4 && where < 0x1bc))) {
+		/* BAR or SR-IOV BAR */
+		*val = 0;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	addr = bus->ops->map_bus(bus, devfn, 0);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	vendor_device = readl(addr);
+	if (vendor_device == 0xffffffff)
+		goto no_emulation;
+
+	pr_debug("%04x:%04x - Fix pass#: %08x, where: %03x, devfn: %03x\n",
+		 vendor_device & 0xffff, vendor_device >> 16, class_rev,
+		 (unsigned int)where, devfn);
+
+	/* Check for non type-00 header */
+	if (cfg_type == 0) {
+		bool has_msix;
+		bool is_nic = (vendor_device == 0xa01e177d);
+		bool is_tns = (vendor_device == 0xa01f177d);
+
+		addr = bus->ops->map_bus(bus, devfn, 0x70);
+		if (!addr)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		/* E_CAP */
+		v = readl(addr);
+		has_msix = (v & 0xff00) != 0;
+
+		if (!has_msix && where_a == 0x70) {
+			v |= 0xbc00; /* next capability is EA at 0xbc */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xb0) {
+			addr = bus->ops->map_bus(bus, devfn, where_a);
+			if (!addr)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+
+			v = readl(addr);
+			if (v & 0xff00)
+				pr_err("Bad MSIX cap header: %08x\n", v);
+			v |= 0xbc00; /* next capability is EA at 0xbc */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xbc) {
+			if (is_nic)
+				v = 0x40014; /* EA last in chain, 4 entries */
+			else if (is_tns)
+				v = 0x30014; /* EA last in chain, 3 entries */
+			else if (has_msix)
+				v = 0x20014; /* EA last in chain, 2 entries */
+			else
+				v = 0x10014; /* EA last in chain, 1 entry */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a >= 0xc0 && where_a < 0xd0)
+			/* EA entry-0. PP=0, BAR0 Size:3 */
+			return handle_ea_bar(0x80ff0003,
+					     0x10, bus, devfn, where,
+					     size, val);
+		if (where_a >= 0xd0 && where_a < 0xe0 && has_msix)
+			 /* EA entry-1. PP=0, BAR4 Size:3 */
+			return handle_ea_bar(0x80ff0043,
+					     0x20, bus, devfn, where,
+					     size, val);
+		if (where_a >= 0xe0 && where_a < 0xf0 && is_tns)
+			/* EA entry-2. PP=0, BAR2, Size:3 */
+			return handle_ea_bar(0x80ff0023,
+					     0x18, bus, devfn, where,
+					     size, val);
+		if (where_a >= 0xe0 && where_a < 0xf0 && is_nic)
+			/* EA entry-2. PP=4, VF_BAR0 (9), Size:3 */
+			return handle_ea_bar(0x80ff0493,
+					     0x1a4, bus, devfn, where,
+					     size, val);
+		if (where_a >= 0xf0 && where_a < 0x100 && is_nic)
+			/* EA entry-3. PP=4, VF_BAR4 (d), Size:3 */
+			return handle_ea_bar(0x80ff04d3,
+					     0x1b4, bus, devfn, where,
+					     size, val);
+	} else if (cfg_type == 1) {
+		bool is_rsl_bridge = devfn == 0x08;
+		bool is_rad_bridge = devfn == 0xa0;
+		bool is_zip_bridge = devfn == 0xa8;
+		bool is_dfa_bridge = devfn == 0xb0;
+		bool is_nic_bridge = devfn == 0x10;
+
+		if (where_a == 0x70) {
+			addr = bus->ops->map_bus(bus, devfn, where_a);
+			if (!addr)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+
+			v = readl(addr);
+			if (v & 0xff00)
+				pr_err("Bad PCIe cap header: %08x\n", v);
+			v |= 0xbc00; /* next capability is EA at 0xbc */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xbc) {
+			if (is_nic_bridge)
+				v = 0x10014; /* EA last in chain, 1 entry */
+			else
+				v = 0x00014; /* EA last in chain, no entries */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xc0) {
+			if (is_rsl_bridge || is_nic_bridge)
+				v = 0x0101; /* subordinate:secondary = 1:1 */
+			else if (is_rad_bridge)
+				v = 0x0202; /* subordinate:secondary = 2:2 */
+			else if (is_zip_bridge)
+				v = 0x0303; /* subordinate:secondary = 3:3 */
+			else if (is_dfa_bridge)
+				v = 0x0404; /* subordinate:secondary = 4:4 */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xc4 && is_nic_bridge) {
+			/* Enabled, not-Write, SP=ff, PP=05, BEI=6, ES=4 */
+			v = 0x80ff0564;
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xc8 && is_nic_bridge) {
+			v = 0x00000002; /* Base-L 64-bit */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xcc && is_nic_bridge) {
+			v = 0xfffffffe; /* MaxOffset-L 64-bit */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xd0 && is_nic_bridge) {
+			v = 0x00008430; /* NIC Base-H */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		if (where_a == 0xd4 && is_nic_bridge) {
+			v = 0x0000000f; /* MaxOffset-H */
+			set_val(v, where, size, val);
+			return PCIBIOS_SUCCESSFUL;
+		}
+	}
+no_emulation:
+	return pci_generic_config_read(bus, devfn, where, size, val);
+}
+
+static int thunder_ecam_config_write(struct pci_bus *bus, unsigned int devfn,
+				     int where, int size, u32 val)
+{
+	/*
+	 * All BARs have fixed addresses; ignore BAR writes so they
+	 * don't get corrupted.
+	 */
+	if ((where >= 0x10 && where < 0x2c) ||
+	    (where >= 0x1a4 && where < 0x1bc))
+		/* BAR or SR-IOV BAR */
+		return PCIBIOS_SUCCESSFUL;
+
+	return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+const struct pci_ecam_ops pci_thunder_ecam_ops = {
+	.pci_ops	= {
+		.map_bus        = pci_ecam_map_bus,
+		.read           = thunder_ecam_config_read,
+		.write          = thunder_ecam_config_write,
+	}
+};
+
+#ifdef CONFIG_PCI_HOST_THUNDER_ECAM
+
+static const struct of_device_id thunder_ecam_of_match[] = {
+	{
+		.compatible = "cavium,pci-host-thunder-ecam",
+		.data = &pci_thunder_ecam_ops,
+	},
+	{ },
+};
+
+static struct platform_driver thunder_ecam_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.of_match_table = thunder_ecam_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = pci_host_common_probe,
+};
+builtin_platform_driver(thunder_ecam_driver);
+
+#endif
+#endif
diff --git a/drivers/pci/controller/pci-thunder-pem.c b/drivers/pci/controller/pci-thunder-pem.c
new file mode 100644
index 000000000..06a9855cb
--- /dev/null
+++ b/drivers/pci/controller/pci-thunder-pem.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 - 2016 Cavium, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "../pci.h"
+
+#if defined(CONFIG_PCI_HOST_THUNDER_PEM) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+
+#define PEM_CFG_WR 0x28
+#define PEM_CFG_RD 0x30
+
+/*
+ * Enhanced Configuration Access Mechanism (ECAM)
+ *
+ * N.B. This is a non-standard platform-specific ECAM bus shift value.  For
+ * standard values defined in the PCI Express Base Specification see
+ * include/linux/pci-ecam.h.
+ */
+#define THUNDER_PCIE_ECAM_BUS_SHIFT	24
+
+struct thunder_pem_pci {
+	u32		ea_entry[3];
+	void __iomem	*pem_reg_base;
+};
+
+static int thunder_pem_bridge_read(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 *val)
+{
+	u64 read_val, tmp_val;
+	struct pci_config_window *cfg = bus->sysdata;
+	struct thunder_pem_pci *pem_pci = (struct thunder_pem_pci *)cfg->priv;
+
+	if (devfn != 0 || where >= 2048)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * 32-bit accesses only.  Write the address to the low order
+	 * bits of PEM_CFG_RD, then trigger the read by reading back.
+	 * The config data lands in the upper 32-bits of PEM_CFG_RD.
+	 */
+	read_val = where & ~3ull;
+	writeq(read_val, pem_pci->pem_reg_base + PEM_CFG_RD);
+	read_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
+	read_val >>= 32;
+
+	/*
+	 * The config space contains some garbage, fix it up.  Also
+	 * synthesize an EA capability for the BAR used by MSI-X.
+	 */
+	switch (where & ~3) {
+	case 0x40:
+		read_val &= 0xffff00ff;
+		read_val |= 0x00007000; /* Skip MSI CAP */
+		break;
+	case 0x70: /* Express Cap */
+		/*
+		 * Change PME interrupt to vector 2 on T88 where it
+		 * reads as 0, else leave it alone.
+		 */
+		if (!(read_val & (0x1f << 25)))
+			read_val |= (2u << 25);
+		break;
+	case 0xb0: /* MSI-X Cap */
+		/* TableSize=2 or 4, Next Cap is EA */
+		read_val &= 0xc00000ff;
+		/*
+		 * If Express Cap(0x70) raw PME vector reads as 0 we are on
+		 * T88 and TableSize is reported as 4, else TableSize
+		 * is 2.
+		 */
+		writeq(0x70, pem_pci->pem_reg_base + PEM_CFG_RD);
+		tmp_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
+		tmp_val >>= 32;
+		if (!(tmp_val & (0x1f << 25)))
+			read_val |= 0x0003bc00;
+		else
+			read_val |= 0x0001bc00;
+		break;
+	case 0xb4:
+		/* Table offset=0, BIR=0 */
+		read_val = 0x00000000;
+		break;
+	case 0xb8:
+		/* BPA offset=0xf0000, BIR=0 */
+		read_val = 0x000f0000;
+		break;
+	case 0xbc:
+		/* EA, 1 entry, no next Cap */
+		read_val = 0x00010014;
+		break;
+	case 0xc0:
+		/* DW2 for type-1 */
+		read_val = 0x00000000;
+		break;
+	case 0xc4:
+		/* Entry BEI=0, PP=0x00, SP=0xff, ES=3 */
+		read_val = 0x80ff0003;
+		break;
+	case 0xc8:
+		read_val = pem_pci->ea_entry[0];
+		break;
+	case 0xcc:
+		read_val = pem_pci->ea_entry[1];
+		break;
+	case 0xd0:
+		read_val = pem_pci->ea_entry[2];
+		break;
+	default:
+		break;
+	}
+	read_val >>= (8 * (where & 3));
+	switch (size) {
+	case 1:
+		read_val &= 0xff;
+		break;
+	case 2:
+		read_val &= 0xffff;
+		break;
+	default:
+		break;
+	}
+	*val = read_val;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int thunder_pem_config_read(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 *val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+
+	if (bus->number < cfg->busr.start ||
+	    bus->number > cfg->busr.end)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * The first device on the bus is the PEM PCIe bridge.
+	 * Special case its config access.
+	 */
+	if (bus->number == cfg->busr.start)
+		return thunder_pem_bridge_read(bus, devfn, where, size, val);
+
+	return pci_generic_config_read(bus, devfn, where, size, val);
+}
+
+/*
+ * Some of the w1c_bits below also include read-only or non-writable
+ * reserved bits, this makes the code simpler and is OK as the bits
+ * are not affected by writing zeros to them.
+ */
+static u32 thunder_pem_bridge_w1c_bits(u64 where_aligned)
+{
+	u32 w1c_bits = 0;
+
+	switch (where_aligned) {
+	case 0x04: /* Command/Status */
+	case 0x1c: /* Base and I/O Limit/Secondary Status */
+		w1c_bits = 0xff000000;
+		break;
+	case 0x44: /* Power Management Control and Status */
+		w1c_bits = 0xfffffe00;
+		break;
+	case 0x78: /* Device Control/Device Status */
+	case 0x80: /* Link Control/Link Status */
+	case 0x88: /* Slot Control/Slot Status */
+	case 0x90: /* Root Status */
+	case 0xa0: /* Link Control 2 Registers/Link Status 2 */
+		w1c_bits = 0xffff0000;
+		break;
+	case 0x104: /* Uncorrectable Error Status */
+	case 0x110: /* Correctable Error Status */
+	case 0x130: /* Error Status */
+	case 0x160: /* Link Control 4 */
+		w1c_bits = 0xffffffff;
+		break;
+	default:
+		break;
+	}
+	return w1c_bits;
+}
+
+/* Some bits must be written to one so they appear to be read-only. */
+static u32 thunder_pem_bridge_w1_bits(u64 where_aligned)
+{
+	u32 w1_bits;
+
+	switch (where_aligned) {
+	case 0x1c: /* I/O Base / I/O Limit, Secondary Status */
+		/* Force 32-bit I/O addressing. */
+		w1_bits = 0x0101;
+		break;
+	case 0x24: /* Prefetchable Memory Base / Prefetchable Memory Limit */
+		/* Force 64-bit addressing */
+		w1_bits = 0x00010001;
+		break;
+	default:
+		w1_bits = 0;
+		break;
+	}
+	return w1_bits;
+}
+
+static int thunder_pem_bridge_write(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct thunder_pem_pci *pem_pci = (struct thunder_pem_pci *)cfg->priv;
+	u64 write_val, read_val;
+	u64 where_aligned = where & ~3ull;
+	u32 mask = 0;
+
+
+	if (devfn != 0 || where >= 2048)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * 32-bit accesses only.  If the write is for a size smaller
+	 * than 32-bits, we must first read the 32-bit value and merge
+	 * in the desired bits and then write the whole 32-bits back
+	 * out.
+	 */
+	switch (size) {
+	case 1:
+		writeq(where_aligned, pem_pci->pem_reg_base + PEM_CFG_RD);
+		read_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
+		read_val >>= 32;
+		mask = ~(0xff << (8 * (where & 3)));
+		read_val &= mask;
+		val = (val & 0xff) << (8 * (where & 3));
+		val |= (u32)read_val;
+		break;
+	case 2:
+		writeq(where_aligned, pem_pci->pem_reg_base + PEM_CFG_RD);
+		read_val = readq(pem_pci->pem_reg_base + PEM_CFG_RD);
+		read_val >>= 32;
+		mask = ~(0xffff << (8 * (where & 3)));
+		read_val &= mask;
+		val = (val & 0xffff) << (8 * (where & 3));
+		val |= (u32)read_val;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * By expanding the write width to 32 bits, we may
+	 * inadvertently hit some W1C bits that were not intended to
+	 * be written.  Calculate the mask that must be applied to the
+	 * data to be written to avoid these cases.
+	 */
+	if (mask) {
+		u32 w1c_bits = thunder_pem_bridge_w1c_bits(where);
+
+		if (w1c_bits) {
+			mask &= w1c_bits;
+			val &= ~mask;
+		}
+	}
+
+	/*
+	 * Some bits must be read-only with value of one.  Since the
+	 * access method allows these to be cleared if a zero is
+	 * written, force them to one before writing.
+	 */
+	val |= thunder_pem_bridge_w1_bits(where_aligned);
+
+	/*
+	 * Low order bits are the config address, the high order 32
+	 * bits are the data to be written.
+	 */
+	write_val = (((u64)val) << 32) | where_aligned;
+	writeq(write_val, pem_pci->pem_reg_base + PEM_CFG_WR);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 val)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+
+	if (bus->number < cfg->busr.start ||
+	    bus->number > cfg->busr.end)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * The first device on the bus is the PEM PCIe bridge.
+	 * Special case its config access.
+	 */
+	if (bus->number == cfg->busr.start)
+		return thunder_pem_bridge_write(bus, devfn, where, size, val);
+
+
+	return pci_generic_config_write(bus, devfn, where, size, val);
+}
+
+static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg,
+			    struct resource *res_pem)
+{
+	struct thunder_pem_pci *pem_pci;
+	resource_size_t bar4_start;
+
+	pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL);
+	if (!pem_pci)
+		return -ENOMEM;
+
+	pem_pci->pem_reg_base = devm_ioremap(dev, res_pem->start, 0x10000);
+	if (!pem_pci->pem_reg_base)
+		return -ENOMEM;
+
+	/*
+	 * The MSI-X BAR for the PEM and AER interrupts is located at
+	 * a fixed offset from the PEM register base.  Generate a
+	 * fragment of the synthesized Enhanced Allocation capability
+	 * structure here for the BAR.
+	 */
+	bar4_start = res_pem->start + 0xf00000;
+	pem_pci->ea_entry[0] = lower_32_bits(bar4_start) | 2;
+	pem_pci->ea_entry[1] = lower_32_bits(res_pem->end - bar4_start) & ~3u;
+	pem_pci->ea_entry[2] = upper_32_bits(bar4_start);
+
+	cfg->priv = pem_pci;
+	return 0;
+}
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+#define PEM_RES_BASE		0x87e0c0000000ULL
+#define PEM_NODE_MASK		GENMASK_ULL(45, 44)
+#define PEM_INDX_MASK		GENMASK_ULL(26, 24)
+#define PEM_MIN_DOM_IN_NODE	4
+#define PEM_MAX_DOM_IN_NODE	10
+
+static void thunder_pem_reserve_range(struct device *dev, int seg,
+				      struct resource *r)
+{
+	resource_size_t start = r->start, end = r->end;
+	struct resource *res;
+	const char *regionid;
+
+	regionid = kasprintf(GFP_KERNEL, "PEM RC:%d", seg);
+	if (!regionid)
+		return;
+
+	res = request_mem_region(start, end - start + 1, regionid);
+	if (res)
+		res->flags &= ~IORESOURCE_BUSY;
+	else
+		kfree(regionid);
+
+	dev_info(dev, "%pR %s reserved\n", r,
+		 res ? "has been" : "could not be");
+}
+
+static void thunder_pem_legacy_fw(struct acpi_pci_root *root,
+				 struct resource *res_pem)
+{
+	int node = acpi_get_node(root->device->handle);
+	int index;
+
+	if (node == NUMA_NO_NODE)
+		node = 0;
+
+	index = root->segment - PEM_MIN_DOM_IN_NODE;
+	index -= node * PEM_MAX_DOM_IN_NODE;
+	res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) |
+					FIELD_PREP(PEM_INDX_MASK, index);
+	res_pem->flags = IORESOURCE_MEM;
+}
+
+static int thunder_pem_acpi_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+	struct resource *res_pem;
+	int ret;
+
+	res_pem = devm_kzalloc(&adev->dev, sizeof(*res_pem), GFP_KERNEL);
+	if (!res_pem)
+		return -ENOMEM;
+
+	ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
+
+	/*
+	 * If we fail to gather resources it means that we run with old
+	 * FW where we need to calculate PEM-specific resources manually.
+	 */
+	if (ret) {
+		thunder_pem_legacy_fw(root, res_pem);
+		/*
+		 * Reserve 64K size PEM specific resources. The full 16M range
+		 * size is required for thunder_pem_init() call.
+		 */
+		res_pem->end = res_pem->start + SZ_64K - 1;
+		thunder_pem_reserve_range(dev, root->segment, res_pem);
+		res_pem->end = res_pem->start + SZ_16M - 1;
+
+		/* Reserve PCI configuration space as well. */
+		thunder_pem_reserve_range(dev, root->segment, &cfg->res);
+	}
+
+	return thunder_pem_init(dev, cfg, res_pem);
+}
+
+const struct pci_ecam_ops thunder_pem_ecam_ops = {
+	.bus_shift	= THUNDER_PCIE_ECAM_BUS_SHIFT,
+	.init		= thunder_pem_acpi_init,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= thunder_pem_config_read,
+		.write		= thunder_pem_config_write,
+	}
+};
+
+#endif
+
+#ifdef CONFIG_PCI_HOST_THUNDER_PEM
+
+static int thunder_pem_platform_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res_pem;
+
+	if (!dev->of_node)
+		return -EINVAL;
+
+	/*
+	 * The second register range is the PEM bridge to the PCIe
+	 * bus.  It has a different config access method than those
+	 * devices behind the bridge.
+	 */
+	res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res_pem) {
+		dev_err(dev, "missing \"reg[1]\"property\n");
+		return -EINVAL;
+	}
+
+	return thunder_pem_init(dev, cfg, res_pem);
+}
+
+static const struct pci_ecam_ops pci_thunder_pem_ops = {
+	.bus_shift	= THUNDER_PCIE_ECAM_BUS_SHIFT,
+	.init		= thunder_pem_platform_init,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= thunder_pem_config_read,
+		.write		= thunder_pem_config_write,
+	}
+};
+
+static const struct of_device_id thunder_pem_of_match[] = {
+	{
+		.compatible = "cavium,pci-host-thunder-pem",
+		.data = &pci_thunder_pem_ops,
+	},
+	{ },
+};
+
+static struct platform_driver thunder_pem_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.of_match_table = thunder_pem_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = pci_host_common_probe,
+};
+builtin_platform_driver(thunder_pem_driver);
+
+#endif
+#endif
diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c
new file mode 100644
index 000000000..460a82532
--- /dev/null
+++ b/drivers/pci/controller/pci-v3-semi.c
@@ -0,0 +1,907 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for V3 Semiconductor PCI Local Bus to PCI Bridge
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Based on the code from arch/arm/mach-integrator/pci_v3.c
+ * Copyright (C) 1999 ARM Limited
+ * Copyright (C) 2000-2001 Deep Blue Solutions Ltd
+ *
+ * Contributors to the old driver include:
+ * Russell King <linux@armlinux.org.uk>
+ * David A. Rusling <david.rusling@linaro.org> (uHAL, ARM Firmware suite)
+ * Rob Herring <robh@kernel.org>
+ * Liviu Dudau <Liviu.Dudau@arm.com>
+ * Grant Likely <grant.likely@secretlab.ca>
+ * Arnd Bergmann <arnd@arndb.de>
+ * Bjorn Helgaas <bhelgaas@google.com>
+ */
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/clk.h>
+
+#include "../pci.h"
+
+#define V3_PCI_VENDOR			0x00000000
+#define V3_PCI_DEVICE			0x00000002
+#define V3_PCI_CMD			0x00000004
+#define V3_PCI_STAT			0x00000006
+#define V3_PCI_CC_REV			0x00000008
+#define V3_PCI_HDR_CFG			0x0000000C
+#define V3_PCI_IO_BASE			0x00000010
+#define V3_PCI_BASE0			0x00000014
+#define V3_PCI_BASE1			0x00000018
+#define V3_PCI_SUB_VENDOR		0x0000002C
+#define V3_PCI_SUB_ID			0x0000002E
+#define V3_PCI_ROM			0x00000030
+#define V3_PCI_BPARAM			0x0000003C
+#define V3_PCI_MAP0			0x00000040
+#define V3_PCI_MAP1			0x00000044
+#define V3_PCI_INT_STAT			0x00000048
+#define V3_PCI_INT_CFG			0x0000004C
+#define V3_LB_BASE0			0x00000054
+#define V3_LB_BASE1			0x00000058
+#define V3_LB_MAP0			0x0000005E
+#define V3_LB_MAP1			0x00000062
+#define V3_LB_BASE2			0x00000064
+#define V3_LB_MAP2			0x00000066
+#define V3_LB_SIZE			0x00000068
+#define V3_LB_IO_BASE			0x0000006E
+#define V3_FIFO_CFG			0x00000070
+#define V3_FIFO_PRIORITY		0x00000072
+#define V3_FIFO_STAT			0x00000074
+#define V3_LB_ISTAT			0x00000076
+#define V3_LB_IMASK			0x00000077
+#define V3_SYSTEM			0x00000078
+#define V3_LB_CFG			0x0000007A
+#define V3_PCI_CFG			0x0000007C
+#define V3_DMA_PCI_ADR0			0x00000080
+#define V3_DMA_PCI_ADR1			0x00000090
+#define V3_DMA_LOCAL_ADR0		0x00000084
+#define V3_DMA_LOCAL_ADR1		0x00000094
+#define V3_DMA_LENGTH0			0x00000088
+#define V3_DMA_LENGTH1			0x00000098
+#define V3_DMA_CSR0			0x0000008B
+#define V3_DMA_CSR1			0x0000009B
+#define V3_DMA_CTLB_ADR0		0x0000008C
+#define V3_DMA_CTLB_ADR1		0x0000009C
+#define V3_DMA_DELAY			0x000000E0
+#define V3_MAIL_DATA			0x000000C0
+#define V3_PCI_MAIL_IEWR		0x000000D0
+#define V3_PCI_MAIL_IERD		0x000000D2
+#define V3_LB_MAIL_IEWR			0x000000D4
+#define V3_LB_MAIL_IERD			0x000000D6
+#define V3_MAIL_WR_STAT			0x000000D8
+#define V3_MAIL_RD_STAT			0x000000DA
+#define V3_QBA_MAP			0x000000DC
+
+/* PCI STATUS bits */
+#define V3_PCI_STAT_PAR_ERR		BIT(15)
+#define V3_PCI_STAT_SYS_ERR		BIT(14)
+#define V3_PCI_STAT_M_ABORT_ERR		BIT(13)
+#define V3_PCI_STAT_T_ABORT_ERR		BIT(12)
+
+/* LB ISTAT bits */
+#define V3_LB_ISTAT_MAILBOX		BIT(7)
+#define V3_LB_ISTAT_PCI_RD		BIT(6)
+#define V3_LB_ISTAT_PCI_WR		BIT(5)
+#define V3_LB_ISTAT_PCI_INT		BIT(4)
+#define V3_LB_ISTAT_PCI_PERR		BIT(3)
+#define V3_LB_ISTAT_I2O_QWR		BIT(2)
+#define V3_LB_ISTAT_DMA1		BIT(1)
+#define V3_LB_ISTAT_DMA0		BIT(0)
+
+/* PCI COMMAND bits */
+#define V3_COMMAND_M_FBB_EN		BIT(9)
+#define V3_COMMAND_M_SERR_EN		BIT(8)
+#define V3_COMMAND_M_PAR_EN		BIT(6)
+#define V3_COMMAND_M_MASTER_EN		BIT(2)
+#define V3_COMMAND_M_MEM_EN		BIT(1)
+#define V3_COMMAND_M_IO_EN		BIT(0)
+
+/* SYSTEM bits */
+#define V3_SYSTEM_M_RST_OUT		BIT(15)
+#define V3_SYSTEM_M_LOCK		BIT(14)
+#define V3_SYSTEM_UNLOCK		0xa05f
+
+/* PCI CFG bits */
+#define V3_PCI_CFG_M_I2O_EN		BIT(15)
+#define V3_PCI_CFG_M_IO_REG_DIS		BIT(14)
+#define V3_PCI_CFG_M_IO_DIS		BIT(13)
+#define V3_PCI_CFG_M_EN3V		BIT(12)
+#define V3_PCI_CFG_M_RETRY_EN		BIT(10)
+#define V3_PCI_CFG_M_AD_LOW1		BIT(9)
+#define V3_PCI_CFG_M_AD_LOW0		BIT(8)
+/*
+ * This is the value applied to C/BE[3:1], with bit 0 always held 0
+ * during DMA access.
+ */
+#define V3_PCI_CFG_M_RTYPE_SHIFT	5
+#define V3_PCI_CFG_M_WTYPE_SHIFT	1
+#define V3_PCI_CFG_TYPE_DEFAULT		0x3
+
+/* PCI BASE bits (PCI -> Local Bus) */
+#define V3_PCI_BASE_M_ADR_BASE		0xFFF00000U
+#define V3_PCI_BASE_M_ADR_BASEL		0x000FFF00U
+#define V3_PCI_BASE_M_PREFETCH		BIT(3)
+#define V3_PCI_BASE_M_TYPE		(3 << 1)
+#define V3_PCI_BASE_M_IO		BIT(0)
+
+/* PCI MAP bits (PCI -> Local bus) */
+#define V3_PCI_MAP_M_MAP_ADR		0xFFF00000U
+#define V3_PCI_MAP_M_RD_POST_INH	BIT(15)
+#define V3_PCI_MAP_M_ROM_SIZE		(3 << 10)
+#define V3_PCI_MAP_M_SWAP		(3 << 8)
+#define V3_PCI_MAP_M_ADR_SIZE		0x000000F0U
+#define V3_PCI_MAP_M_REG_EN		BIT(1)
+#define V3_PCI_MAP_M_ENABLE		BIT(0)
+
+/* LB_BASE0,1 bits (Local bus -> PCI) */
+#define V3_LB_BASE_ADR_BASE		0xfff00000U
+#define V3_LB_BASE_SWAP			(3 << 8)
+#define V3_LB_BASE_ADR_SIZE		(15 << 4)
+#define V3_LB_BASE_PREFETCH		BIT(3)
+#define V3_LB_BASE_ENABLE		BIT(0)
+
+#define V3_LB_BASE_ADR_SIZE_1MB		(0 << 4)
+#define V3_LB_BASE_ADR_SIZE_2MB		(1 << 4)
+#define V3_LB_BASE_ADR_SIZE_4MB		(2 << 4)
+#define V3_LB_BASE_ADR_SIZE_8MB		(3 << 4)
+#define V3_LB_BASE_ADR_SIZE_16MB	(4 << 4)
+#define V3_LB_BASE_ADR_SIZE_32MB	(5 << 4)
+#define V3_LB_BASE_ADR_SIZE_64MB	(6 << 4)
+#define V3_LB_BASE_ADR_SIZE_128MB	(7 << 4)
+#define V3_LB_BASE_ADR_SIZE_256MB	(8 << 4)
+#define V3_LB_BASE_ADR_SIZE_512MB	(9 << 4)
+#define V3_LB_BASE_ADR_SIZE_1GB		(10 << 4)
+#define V3_LB_BASE_ADR_SIZE_2GB		(11 << 4)
+
+#define v3_addr_to_lb_base(a)	((a) & V3_LB_BASE_ADR_BASE)
+
+/* LB_MAP0,1 bits (Local bus -> PCI) */
+#define V3_LB_MAP_MAP_ADR		0xfff0U
+#define V3_LB_MAP_TYPE			(7 << 1)
+#define V3_LB_MAP_AD_LOW_EN		BIT(0)
+
+#define V3_LB_MAP_TYPE_IACK		(0 << 1)
+#define V3_LB_MAP_TYPE_IO		(1 << 1)
+#define V3_LB_MAP_TYPE_MEM		(3 << 1)
+#define V3_LB_MAP_TYPE_CONFIG		(5 << 1)
+#define V3_LB_MAP_TYPE_MEM_MULTIPLE	(6 << 1)
+
+#define v3_addr_to_lb_map(a)	(((a) >> 16) & V3_LB_MAP_MAP_ADR)
+
+/* LB_BASE2 bits (Local bus -> PCI IO) */
+#define V3_LB_BASE2_ADR_BASE		0xff00U
+#define V3_LB_BASE2_SWAP_AUTO		(3 << 6)
+#define V3_LB_BASE2_ENABLE		BIT(0)
+
+#define v3_addr_to_lb_base2(a)	(((a) >> 16) & V3_LB_BASE2_ADR_BASE)
+
+/* LB_MAP2 bits (Local bus -> PCI IO) */
+#define V3_LB_MAP2_MAP_ADR		0xff00U
+
+#define v3_addr_to_lb_map2(a)	(((a) >> 16) & V3_LB_MAP2_MAP_ADR)
+
+/* FIFO priority bits */
+#define V3_FIFO_PRIO_LOCAL		BIT(12)
+#define V3_FIFO_PRIO_LB_RD1_FLUSH_EOB	BIT(10)
+#define V3_FIFO_PRIO_LB_RD1_FLUSH_AP1	BIT(11)
+#define V3_FIFO_PRIO_LB_RD1_FLUSH_ANY	(BIT(10)|BIT(11))
+#define V3_FIFO_PRIO_LB_RD0_FLUSH_EOB	BIT(8)
+#define V3_FIFO_PRIO_LB_RD0_FLUSH_AP1	BIT(9)
+#define V3_FIFO_PRIO_LB_RD0_FLUSH_ANY	(BIT(8)|BIT(9))
+#define V3_FIFO_PRIO_PCI		BIT(4)
+#define V3_FIFO_PRIO_PCI_RD1_FLUSH_EOB	BIT(2)
+#define V3_FIFO_PRIO_PCI_RD1_FLUSH_AP1	BIT(3)
+#define V3_FIFO_PRIO_PCI_RD1_FLUSH_ANY	(BIT(2)|BIT(3))
+#define V3_FIFO_PRIO_PCI_RD0_FLUSH_EOB	BIT(0)
+#define V3_FIFO_PRIO_PCI_RD0_FLUSH_AP1	BIT(1)
+#define V3_FIFO_PRIO_PCI_RD0_FLUSH_ANY	(BIT(0)|BIT(1))
+
+/* Local bus configuration bits */
+#define V3_LB_CFG_LB_TO_64_CYCLES	0x0000
+#define V3_LB_CFG_LB_TO_256_CYCLES	BIT(13)
+#define V3_LB_CFG_LB_TO_512_CYCLES	BIT(14)
+#define V3_LB_CFG_LB_TO_1024_CYCLES	(BIT(13)|BIT(14))
+#define V3_LB_CFG_LB_RST		BIT(12)
+#define V3_LB_CFG_LB_PPC_RDY		BIT(11)
+#define V3_LB_CFG_LB_LB_INT		BIT(10)
+#define V3_LB_CFG_LB_ERR_EN		BIT(9)
+#define V3_LB_CFG_LB_RDY_EN		BIT(8)
+#define V3_LB_CFG_LB_BE_IMODE		BIT(7)
+#define V3_LB_CFG_LB_BE_OMODE		BIT(6)
+#define V3_LB_CFG_LB_ENDIAN		BIT(5)
+#define V3_LB_CFG_LB_PARK_EN		BIT(4)
+#define V3_LB_CFG_LB_FBB_DIS		BIT(2)
+
+/* ARM Integrator-specific extended control registers */
+#define INTEGRATOR_SC_PCI_OFFSET	0x18
+#define INTEGRATOR_SC_PCI_ENABLE	BIT(0)
+#define INTEGRATOR_SC_PCI_INTCLR	BIT(1)
+#define INTEGRATOR_SC_LBFADDR_OFFSET	0x20
+#define INTEGRATOR_SC_LBFCODE_OFFSET	0x24
+
+struct v3_pci {
+	struct device *dev;
+	void __iomem *base;
+	void __iomem *config_base;
+	u32 config_mem;
+	u32 non_pre_mem;
+	u32 pre_mem;
+	phys_addr_t non_pre_bus_addr;
+	phys_addr_t pre_bus_addr;
+	struct regmap *map;
+};
+
+/*
+ * The V3 PCI interface chip in Integrator provides several windows from
+ * local bus memory into the PCI memory areas. Unfortunately, there
+ * are not really enough windows for our usage, therefore we reuse
+ * one of the windows for access to PCI configuration space. On the
+ * Integrator/AP, the memory map is as follows:
+ *
+ * Local Bus Memory         Usage
+ *
+ * 40000000 - 4FFFFFFF      PCI memory.  256M non-prefetchable
+ * 50000000 - 5FFFFFFF      PCI memory.  256M prefetchable
+ * 60000000 - 60FFFFFF      PCI IO.  16M
+ * 61000000 - 61FFFFFF      PCI Configuration. 16M
+ *
+ * There are three V3 windows, each described by a pair of V3 registers.
+ * These are LB_BASE0/LB_MAP0, LB_BASE1/LB_MAP1 and LB_BASE2/LB_MAP2.
+ * Base0 and Base1 can be used for any type of PCI memory access.   Base2
+ * can be used either for PCI I/O or for I20 accesses.  By default, uHAL
+ * uses this only for PCI IO space.
+ *
+ * Normally these spaces are mapped using the following base registers:
+ *
+ * Usage Local Bus Memory         Base/Map registers used
+ *
+ * Mem   40000000 - 4FFFFFFF      LB_BASE0/LB_MAP0
+ * Mem   50000000 - 5FFFFFFF      LB_BASE1/LB_MAP1
+ * IO    60000000 - 60FFFFFF      LB_BASE2/LB_MAP2
+ * Cfg   61000000 - 61FFFFFF
+ *
+ * This means that I20 and PCI configuration space accesses will fail.
+ * When PCI configuration accesses are needed (via the uHAL PCI
+ * configuration space primitives) we must remap the spaces as follows:
+ *
+ * Usage Local Bus Memory         Base/Map registers used
+ *
+ * Mem   40000000 - 4FFFFFFF      LB_BASE0/LB_MAP0
+ * Mem   50000000 - 5FFFFFFF      LB_BASE0/LB_MAP0
+ * IO    60000000 - 60FFFFFF      LB_BASE2/LB_MAP2
+ * Cfg   61000000 - 61FFFFFF      LB_BASE1/LB_MAP1
+ *
+ * To make this work, the code depends on overlapping windows working.
+ * The V3 chip translates an address by checking its range within
+ * each of the BASE/MAP pairs in turn (in ascending register number
+ * order).  It will use the first matching pair.   So, for example,
+ * if the same address is mapped by both LB_BASE0/LB_MAP0 and
+ * LB_BASE1/LB_MAP1, the V3 will use the translation from
+ * LB_BASE0/LB_MAP0.
+ *
+ * To allow PCI Configuration space access, the code enlarges the
+ * window mapped by LB_BASE0/LB_MAP0 from 256M to 512M.  This occludes
+ * the windows currently mapped by LB_BASE1/LB_MAP1 so that it can
+ * be remapped for use by configuration cycles.
+ *
+ * At the end of the PCI Configuration space accesses,
+ * LB_BASE1/LB_MAP1 is reset to map PCI Memory.  Finally the window
+ * mapped by LB_BASE0/LB_MAP0 is reduced in size from 512M to 256M to
+ * reveal the now restored LB_BASE1/LB_MAP1 window.
+ *
+ * NOTE: We do not set up I2O mapping.  I suspect that this is only
+ * for an intelligent (target) device.  Using I2O disables most of
+ * the mappings into PCI memory.
+ */
+static void __iomem *v3_map_bus(struct pci_bus *bus,
+				unsigned int devfn, int offset)
+{
+	struct v3_pci *v3 = bus->sysdata;
+	unsigned int address, mapaddress, busnr;
+
+	busnr = bus->number;
+	if (busnr == 0) {
+		int slot = PCI_SLOT(devfn);
+
+		/*
+		 * local bus segment so need a type 0 config cycle
+		 *
+		 * build the PCI configuration "address" with one-hot in
+		 * A31-A11
+		 *
+		 * mapaddress:
+		 *  3:1 = config cycle (101)
+		 *  0   = PCI A1 & A0 are 0 (0)
+		 */
+		address = PCI_FUNC(devfn) << 8;
+		mapaddress = V3_LB_MAP_TYPE_CONFIG;
+
+		if (slot > 12)
+			/*
+			 * high order bits are handled by the MAP register
+			 */
+			mapaddress |= BIT(slot - 5);
+		else
+			/*
+			 * low order bits handled directly in the address
+			 */
+			address |= BIT(slot + 11);
+	} else {
+		/*
+		 * not the local bus segment so need a type 1 config cycle
+		 *
+		 * address:
+		 *  23:16 = bus number
+		 *  15:11 = slot number (7:3 of devfn)
+		 *  10:8  = func number (2:0 of devfn)
+		 *
+		 * mapaddress:
+		 *  3:1 = config cycle (101)
+		 *  0   = PCI A1 & A0 from host bus (1)
+		 */
+		mapaddress = V3_LB_MAP_TYPE_CONFIG | V3_LB_MAP_AD_LOW_EN;
+		address = (busnr << 16) | (devfn << 8);
+	}
+
+	/*
+	 * Set up base0 to see all 512Mbytes of memory space (not
+	 * prefetchable), this frees up base1 for re-use by
+	 * configuration memory
+	 */
+	writel(v3_addr_to_lb_base(v3->non_pre_mem) |
+	       V3_LB_BASE_ADR_SIZE_512MB | V3_LB_BASE_ENABLE,
+	       v3->base + V3_LB_BASE0);
+
+	/*
+	 * Set up base1/map1 to point into configuration space.
+	 * The config mem is always 16MB.
+	 */
+	writel(v3_addr_to_lb_base(v3->config_mem) |
+	       V3_LB_BASE_ADR_SIZE_16MB | V3_LB_BASE_ENABLE,
+	       v3->base + V3_LB_BASE1);
+	writew(mapaddress, v3->base + V3_LB_MAP1);
+
+	return v3->config_base + address + offset;
+}
+
+static void v3_unmap_bus(struct v3_pci *v3)
+{
+	/*
+	 * Reassign base1 for use by prefetchable PCI memory
+	 */
+	writel(v3_addr_to_lb_base(v3->pre_mem) |
+	       V3_LB_BASE_ADR_SIZE_256MB | V3_LB_BASE_PREFETCH |
+	       V3_LB_BASE_ENABLE,
+	       v3->base + V3_LB_BASE1);
+	writew(v3_addr_to_lb_map(v3->pre_bus_addr) |
+	       V3_LB_MAP_TYPE_MEM, /* was V3_LB_MAP_TYPE_MEM_MULTIPLE */
+	       v3->base + V3_LB_MAP1);
+
+	/*
+	 * And shrink base0 back to a 256M window (NOTE: MAP0 already correct)
+	 */
+	writel(v3_addr_to_lb_base(v3->non_pre_mem) |
+	       V3_LB_BASE_ADR_SIZE_256MB | V3_LB_BASE_ENABLE,
+	       v3->base + V3_LB_BASE0);
+}
+
+static int v3_pci_read_config(struct pci_bus *bus, unsigned int fn,
+			      int config, int size, u32 *value)
+{
+	struct v3_pci *v3 = bus->sysdata;
+	int ret;
+
+	dev_dbg(&bus->dev,
+		"[read]  slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+		PCI_SLOT(fn), PCI_FUNC(fn), config, size, *value);
+	ret = pci_generic_config_read(bus, fn, config, size, value);
+	v3_unmap_bus(v3);
+	return ret;
+}
+
+static int v3_pci_write_config(struct pci_bus *bus, unsigned int fn,
+				    int config, int size, u32 value)
+{
+	struct v3_pci *v3 = bus->sysdata;
+	int ret;
+
+	dev_dbg(&bus->dev,
+		"[write] slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+		PCI_SLOT(fn), PCI_FUNC(fn), config, size, value);
+	ret = pci_generic_config_write(bus, fn, config, size, value);
+	v3_unmap_bus(v3);
+	return ret;
+}
+
+static struct pci_ops v3_pci_ops = {
+	.map_bus = v3_map_bus,
+	.read = v3_pci_read_config,
+	.write = v3_pci_write_config,
+};
+
+static irqreturn_t v3_irq(int irq, void *data)
+{
+	struct v3_pci *v3 = data;
+	struct device *dev = v3->dev;
+	u32 status;
+
+	status = readw(v3->base + V3_PCI_STAT);
+	if (status & V3_PCI_STAT_PAR_ERR)
+		dev_err(dev, "parity error interrupt\n");
+	if (status & V3_PCI_STAT_SYS_ERR)
+		dev_err(dev, "system error interrupt\n");
+	if (status & V3_PCI_STAT_M_ABORT_ERR)
+		dev_err(dev, "master abort error interrupt\n");
+	if (status & V3_PCI_STAT_T_ABORT_ERR)
+		dev_err(dev, "target abort error interrupt\n");
+	writew(status, v3->base + V3_PCI_STAT);
+
+	status = readb(v3->base + V3_LB_ISTAT);
+	if (status & V3_LB_ISTAT_MAILBOX)
+		dev_info(dev, "PCI mailbox interrupt\n");
+	if (status & V3_LB_ISTAT_PCI_RD)
+		dev_err(dev, "PCI target LB->PCI READ abort interrupt\n");
+	if (status & V3_LB_ISTAT_PCI_WR)
+		dev_err(dev, "PCI target LB->PCI WRITE abort interrupt\n");
+	if (status &  V3_LB_ISTAT_PCI_INT)
+		dev_info(dev, "PCI pin interrupt\n");
+	if (status & V3_LB_ISTAT_PCI_PERR)
+		dev_err(dev, "PCI parity error interrupt\n");
+	if (status & V3_LB_ISTAT_I2O_QWR)
+		dev_info(dev, "I2O inbound post queue interrupt\n");
+	if (status & V3_LB_ISTAT_DMA1)
+		dev_info(dev, "DMA channel 1 interrupt\n");
+	if (status & V3_LB_ISTAT_DMA0)
+		dev_info(dev, "DMA channel 0 interrupt\n");
+	/* Clear all possible interrupts on the local bus */
+	writeb(0, v3->base + V3_LB_ISTAT);
+	if (v3->map)
+		regmap_write(v3->map, INTEGRATOR_SC_PCI_OFFSET,
+			     INTEGRATOR_SC_PCI_ENABLE |
+			     INTEGRATOR_SC_PCI_INTCLR);
+
+	return IRQ_HANDLED;
+}
+
+static int v3_integrator_init(struct v3_pci *v3)
+{
+	unsigned int val;
+
+	v3->map =
+		syscon_regmap_lookup_by_compatible("arm,integrator-ap-syscon");
+	if (IS_ERR(v3->map)) {
+		dev_err(v3->dev, "no syscon\n");
+		return -ENODEV;
+	}
+
+	regmap_read(v3->map, INTEGRATOR_SC_PCI_OFFSET, &val);
+	/* Take the PCI bridge out of reset, clear IRQs */
+	regmap_write(v3->map, INTEGRATOR_SC_PCI_OFFSET,
+		     INTEGRATOR_SC_PCI_ENABLE |
+		     INTEGRATOR_SC_PCI_INTCLR);
+
+	if (!(val & INTEGRATOR_SC_PCI_ENABLE)) {
+		/* If we were in reset we need to sleep a bit */
+		msleep(230);
+
+		/* Set the physical base for the controller itself */
+		writel(0x6200, v3->base + V3_LB_IO_BASE);
+
+		/* Wait for the mailbox to settle after reset */
+		do {
+			writeb(0xaa, v3->base + V3_MAIL_DATA);
+			writeb(0x55, v3->base + V3_MAIL_DATA + 4);
+		} while (readb(v3->base + V3_MAIL_DATA) != 0xaa &&
+			 readb(v3->base + V3_MAIL_DATA) != 0x55);
+	}
+
+	dev_info(v3->dev, "initialized PCI V3 Integrator/AP integration\n");
+
+	return 0;
+}
+
+static int v3_pci_setup_resource(struct v3_pci *v3,
+				 struct pci_host_bridge *host,
+				 struct resource_entry *win)
+{
+	struct device *dev = v3->dev;
+	struct resource *mem;
+	struct resource *io;
+
+	switch (resource_type(win->res)) {
+	case IORESOURCE_IO:
+		io = win->res;
+
+		/* Setup window 2 - PCI I/O */
+		writel(v3_addr_to_lb_base2(pci_pio_to_address(io->start)) |
+		       V3_LB_BASE2_ENABLE,
+		       v3->base + V3_LB_BASE2);
+		writew(v3_addr_to_lb_map2(io->start - win->offset),
+		       v3->base + V3_LB_MAP2);
+		break;
+	case IORESOURCE_MEM:
+		mem = win->res;
+		if (mem->flags & IORESOURCE_PREFETCH) {
+			mem->name = "V3 PCI PRE-MEM";
+			v3->pre_mem = mem->start;
+			v3->pre_bus_addr = mem->start - win->offset;
+			dev_dbg(dev, "PREFETCHABLE MEM window %pR, bus addr %pap\n",
+				mem, &v3->pre_bus_addr);
+			if (resource_size(mem) != SZ_256M) {
+				dev_err(dev, "prefetchable memory range is not 256MB\n");
+				return -EINVAL;
+			}
+			if (v3->non_pre_mem &&
+			    (mem->start != v3->non_pre_mem + SZ_256M)) {
+				dev_err(dev,
+					"prefetchable memory is not adjacent to non-prefetchable memory\n");
+				return -EINVAL;
+			}
+			/* Setup window 1 - PCI prefetchable memory */
+			writel(v3_addr_to_lb_base(v3->pre_mem) |
+			       V3_LB_BASE_ADR_SIZE_256MB |
+			       V3_LB_BASE_PREFETCH |
+			       V3_LB_BASE_ENABLE,
+			       v3->base + V3_LB_BASE1);
+			writew(v3_addr_to_lb_map(v3->pre_bus_addr) |
+			       V3_LB_MAP_TYPE_MEM, /* Was V3_LB_MAP_TYPE_MEM_MULTIPLE */
+			       v3->base + V3_LB_MAP1);
+		} else {
+			mem->name = "V3 PCI NON-PRE-MEM";
+			v3->non_pre_mem = mem->start;
+			v3->non_pre_bus_addr = mem->start - win->offset;
+			dev_dbg(dev, "NON-PREFETCHABLE MEM window %pR, bus addr %pap\n",
+				mem, &v3->non_pre_bus_addr);
+			if (resource_size(mem) != SZ_256M) {
+				dev_err(dev,
+					"non-prefetchable memory range is not 256MB\n");
+				return -EINVAL;
+			}
+			/* Setup window 0 - PCI non-prefetchable memory */
+			writel(v3_addr_to_lb_base(v3->non_pre_mem) |
+			       V3_LB_BASE_ADR_SIZE_256MB |
+			       V3_LB_BASE_ENABLE,
+			       v3->base + V3_LB_BASE0);
+			writew(v3_addr_to_lb_map(v3->non_pre_bus_addr) |
+			       V3_LB_MAP_TYPE_MEM,
+			       v3->base + V3_LB_MAP0);
+		}
+		break;
+	case IORESOURCE_BUS:
+		break;
+	default:
+		dev_info(dev, "Unknown resource type %lu\n",
+			 resource_type(win->res));
+		break;
+	}
+
+	return 0;
+}
+
+static int v3_get_dma_range_config(struct v3_pci *v3,
+				   struct resource_entry *entry,
+				   u32 *pci_base, u32 *pci_map)
+{
+	struct device *dev = v3->dev;
+	u64 cpu_addr = entry->res->start;
+	u64 cpu_end = entry->res->end;
+	u64 pci_end = cpu_end - entry->offset;
+	u64 pci_addr = entry->res->start - entry->offset;
+	u32 val;
+
+	if (pci_addr & ~V3_PCI_BASE_M_ADR_BASE) {
+		dev_err(dev, "illegal range, only PCI bits 31..20 allowed\n");
+		return -EINVAL;
+	}
+	val = ((u32)pci_addr) & V3_PCI_BASE_M_ADR_BASE;
+	*pci_base = val;
+
+	if (cpu_addr & ~V3_PCI_MAP_M_MAP_ADR) {
+		dev_err(dev, "illegal range, only CPU bits 31..20 allowed\n");
+		return -EINVAL;
+	}
+	val = ((u32)cpu_addr) & V3_PCI_MAP_M_MAP_ADR;
+
+	switch (resource_size(entry->res)) {
+	case SZ_1M:
+		val |= V3_LB_BASE_ADR_SIZE_1MB;
+		break;
+	case SZ_2M:
+		val |= V3_LB_BASE_ADR_SIZE_2MB;
+		break;
+	case SZ_4M:
+		val |= V3_LB_BASE_ADR_SIZE_4MB;
+		break;
+	case SZ_8M:
+		val |= V3_LB_BASE_ADR_SIZE_8MB;
+		break;
+	case SZ_16M:
+		val |= V3_LB_BASE_ADR_SIZE_16MB;
+		break;
+	case SZ_32M:
+		val |= V3_LB_BASE_ADR_SIZE_32MB;
+		break;
+	case SZ_64M:
+		val |= V3_LB_BASE_ADR_SIZE_64MB;
+		break;
+	case SZ_128M:
+		val |= V3_LB_BASE_ADR_SIZE_128MB;
+		break;
+	case SZ_256M:
+		val |= V3_LB_BASE_ADR_SIZE_256MB;
+		break;
+	case SZ_512M:
+		val |= V3_LB_BASE_ADR_SIZE_512MB;
+		break;
+	case SZ_1G:
+		val |= V3_LB_BASE_ADR_SIZE_1GB;
+		break;
+	case SZ_2G:
+		val |= V3_LB_BASE_ADR_SIZE_2GB;
+		break;
+	default:
+		dev_err(v3->dev, "illegal dma memory chunk size\n");
+		return -EINVAL;
+	}
+	val |= V3_PCI_MAP_M_REG_EN | V3_PCI_MAP_M_ENABLE;
+	*pci_map = val;
+
+	dev_dbg(dev,
+		"DMA MEM CPU: 0x%016llx -> 0x%016llx => "
+		"PCI: 0x%016llx -> 0x%016llx base %08x map %08x\n",
+		cpu_addr, cpu_end,
+		pci_addr, pci_end,
+		*pci_base, *pci_map);
+
+	return 0;
+}
+
+static int v3_pci_parse_map_dma_ranges(struct v3_pci *v3,
+				       struct device_node *np)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(v3);
+	struct device *dev = v3->dev;
+	struct resource_entry *entry;
+	int i = 0;
+
+	resource_list_for_each_entry(entry, &bridge->dma_ranges) {
+		int ret;
+		u32 pci_base, pci_map;
+
+		ret = v3_get_dma_range_config(v3, entry, &pci_base, &pci_map);
+		if (ret)
+			return ret;
+
+		if (i == 0) {
+			writel(pci_base, v3->base + V3_PCI_BASE0);
+			writel(pci_map, v3->base + V3_PCI_MAP0);
+		} else if (i == 1) {
+			writel(pci_base, v3->base + V3_PCI_BASE1);
+			writel(pci_map, v3->base + V3_PCI_MAP1);
+		} else {
+			dev_err(dev, "too many ranges, only two supported\n");
+			dev_err(dev, "range %d ignored\n", i);
+		}
+		i++;
+	}
+	return 0;
+}
+
+static int v3_pci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *regs;
+	struct resource_entry *win;
+	struct v3_pci *v3;
+	struct pci_host_bridge *host;
+	struct clk *clk;
+	u16 val;
+	int irq;
+	int ret;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*v3));
+	if (!host)
+		return -ENOMEM;
+
+	host->ops = &v3_pci_ops;
+	v3 = pci_host_bridge_priv(host);
+	host->sysdata = v3;
+	v3->dev = dev;
+
+	/* Get and enable host clock */
+	clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "clock not found\n");
+		return PTR_ERR(clk);
+	}
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(dev, "unable to enable clock\n");
+		return ret;
+	}
+
+	v3->base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+	if (IS_ERR(v3->base))
+		return PTR_ERR(v3->base);
+	/*
+	 * The hardware has a register with the physical base address
+	 * of the V3 controller itself, verify that this is the same
+	 * as the physical memory we've remapped it from.
+	 */
+	if (readl(v3->base + V3_LB_IO_BASE) != (regs->start >> 16))
+		dev_err(dev, "V3_LB_IO_BASE = %08x but device is @%pR\n",
+			readl(v3->base + V3_LB_IO_BASE), regs);
+
+	/* Configuration space is 16MB directly mapped */
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (resource_size(regs) != SZ_16M) {
+		dev_err(dev, "config mem is not 16MB!\n");
+		return -EINVAL;
+	}
+	v3->config_mem = regs->start;
+	v3->config_base = devm_ioremap_resource(dev, regs);
+	if (IS_ERR(v3->config_base))
+		return PTR_ERR(v3->config_base);
+
+	/* Get and request error IRQ resource */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, v3_irq, 0,
+			"PCIv3 error", v3);
+	if (ret < 0) {
+		dev_err(dev,
+			"unable to request PCIv3 error IRQ %d (%d)\n",
+			irq, ret);
+		return ret;
+	}
+
+	/*
+	 * Unlock V3 registers, but only if they were previously locked.
+	 */
+	if (readw(v3->base + V3_SYSTEM) & V3_SYSTEM_M_LOCK)
+		writew(V3_SYSTEM_UNLOCK, v3->base + V3_SYSTEM);
+
+	/* Disable all slave access while we set up the windows */
+	val = readw(v3->base + V3_PCI_CMD);
+	val &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	writew(val, v3->base + V3_PCI_CMD);
+
+	/* Put the PCI bus into reset */
+	val = readw(v3->base + V3_SYSTEM);
+	val &= ~V3_SYSTEM_M_RST_OUT;
+	writew(val, v3->base + V3_SYSTEM);
+
+	/* Retry until we're ready */
+	val = readw(v3->base + V3_PCI_CFG);
+	val |= V3_PCI_CFG_M_RETRY_EN;
+	writew(val, v3->base + V3_PCI_CFG);
+
+	/* Set up the local bus protocol */
+	val = readw(v3->base + V3_LB_CFG);
+	val |= V3_LB_CFG_LB_BE_IMODE; /* Byte enable input */
+	val |= V3_LB_CFG_LB_BE_OMODE; /* Byte enable output */
+	val &= ~V3_LB_CFG_LB_ENDIAN; /* Little endian */
+	val &= ~V3_LB_CFG_LB_PPC_RDY; /* TODO: when using on PPC403Gx, set to 1 */
+	writew(val, v3->base + V3_LB_CFG);
+
+	/* Enable the PCI bus master */
+	val = readw(v3->base + V3_PCI_CMD);
+	val |= PCI_COMMAND_MASTER;
+	writew(val, v3->base + V3_PCI_CMD);
+
+	/* Get the I/O and memory ranges from DT */
+	resource_list_for_each_entry(win, &host->windows) {
+		ret = v3_pci_setup_resource(v3, host, win);
+		if (ret) {
+			dev_err(dev, "error setting up resources\n");
+			return ret;
+		}
+	}
+	ret = v3_pci_parse_map_dma_ranges(v3, np);
+	if (ret)
+		return ret;
+
+	/*
+	 * Disable PCI to host IO cycles, enable I/O buffers @3.3V,
+	 * set AD_LOW0 to 1 if one of the LB_MAP registers choose
+	 * to use this (should be unused).
+	 */
+	writel(0x00000000, v3->base + V3_PCI_IO_BASE);
+	val = V3_PCI_CFG_M_IO_REG_DIS | V3_PCI_CFG_M_IO_DIS |
+		V3_PCI_CFG_M_EN3V | V3_PCI_CFG_M_AD_LOW0;
+	/*
+	 * DMA read and write from PCI bus commands types
+	 */
+	val |=  V3_PCI_CFG_TYPE_DEFAULT << V3_PCI_CFG_M_RTYPE_SHIFT;
+	val |=  V3_PCI_CFG_TYPE_DEFAULT << V3_PCI_CFG_M_WTYPE_SHIFT;
+	writew(val, v3->base + V3_PCI_CFG);
+
+	/*
+	 * Set the V3 FIFO such that writes have higher priority than
+	 * reads, and local bus write causes local bus read fifo flush
+	 * on aperture 1. Same for PCI.
+	 */
+	writew(V3_FIFO_PRIO_LB_RD1_FLUSH_AP1 |
+	       V3_FIFO_PRIO_LB_RD0_FLUSH_AP1 |
+	       V3_FIFO_PRIO_PCI_RD1_FLUSH_AP1 |
+	       V3_FIFO_PRIO_PCI_RD0_FLUSH_AP1,
+	       v3->base + V3_FIFO_PRIORITY);
+
+
+	/*
+	 * Clear any error interrupts, and enable parity and write error
+	 * interrupts
+	 */
+	writeb(0, v3->base + V3_LB_ISTAT);
+	val = readw(v3->base + V3_LB_CFG);
+	val |= V3_LB_CFG_LB_LB_INT;
+	writew(val, v3->base + V3_LB_CFG);
+	writeb(V3_LB_ISTAT_PCI_WR | V3_LB_ISTAT_PCI_PERR,
+	       v3->base + V3_LB_IMASK);
+
+	/* Special Integrator initialization */
+	if (of_device_is_compatible(np, "arm,integrator-ap-pci")) {
+		ret = v3_integrator_init(v3);
+		if (ret)
+			return ret;
+	}
+
+	/* Post-init: enable PCI memory and invalidate (master already on) */
+	val = readw(v3->base + V3_PCI_CMD);
+	val |= PCI_COMMAND_MEMORY | PCI_COMMAND_INVALIDATE;
+	writew(val, v3->base + V3_PCI_CMD);
+
+	/* Clear pending interrupts */
+	writeb(0, v3->base + V3_LB_ISTAT);
+	/* Read or write errors and parity errors cause interrupts */
+	writeb(V3_LB_ISTAT_PCI_RD | V3_LB_ISTAT_PCI_WR | V3_LB_ISTAT_PCI_PERR,
+	       v3->base + V3_LB_IMASK);
+
+	/* Take the PCI bus out of reset so devices can initialize */
+	val = readw(v3->base + V3_SYSTEM);
+	val |= V3_SYSTEM_M_RST_OUT;
+	writew(val, v3->base + V3_SYSTEM);
+
+	/*
+	 * Re-lock the system register.
+	 */
+	val = readw(v3->base + V3_SYSTEM);
+	val |= V3_SYSTEM_M_LOCK;
+	writew(val, v3->base + V3_SYSTEM);
+
+	return pci_host_probe(host);
+}
+
+static const struct of_device_id v3_pci_of_match[] = {
+	{
+		.compatible = "v3,v360epc-pci",
+	},
+	{},
+};
+
+static struct platform_driver v3_pci_driver = {
+	.driver = {
+		.name = "pci-v3-semi",
+		.of_match_table = v3_pci_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe  = v3_pci_probe,
+};
+builtin_platform_driver(v3_pci_driver);
diff --git a/drivers/pci/controller/pci-versatile.c b/drivers/pci/controller/pci-versatile.c
new file mode 100644
index 000000000..e9a6758fe
--- /dev/null
+++ b/drivers/pci/controller/pci-versatile.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2004 Koninklijke Philips Electronics NV
+ *
+ * Conversion to platform driver and DT:
+ * Copyright 2014 Linaro Ltd.
+ *
+ * 14/04/2005 Initial version, colin.king@philips.com
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "../pci.h"
+
+static void __iomem *versatile_pci_base;
+static void __iomem *versatile_cfg_base[2];
+
+#define PCI_IMAP(m)		(versatile_pci_base + ((m) * 4))
+#define PCI_SMAP(m)		(versatile_pci_base + 0x14 + ((m) * 4))
+#define PCI_SELFID		(versatile_pci_base + 0xc)
+
+#define VP_PCI_DEVICE_ID		0x030010ee
+#define VP_PCI_CLASS_ID			0x0b400000
+
+static u32 pci_slot_ignore;
+
+static int __init versatile_pci_slot_ignore(char *str)
+{
+	int slot;
+
+	while (get_option(&str, &slot)) {
+		if ((slot < 0) || (slot > 31))
+			pr_err("Illegal slot value: %d\n", slot);
+		else
+			pci_slot_ignore |= (1 << slot);
+	}
+	return 1;
+}
+__setup("pci_slot_ignore=", versatile_pci_slot_ignore);
+
+
+static void __iomem *versatile_map_bus(struct pci_bus *bus,
+				       unsigned int devfn, int offset)
+{
+	unsigned int busnr = bus->number;
+
+	if (pci_slot_ignore & (1 << PCI_SLOT(devfn)))
+		return NULL;
+
+	return versatile_cfg_base[1] + ((busnr << 16) | (devfn << 8) | offset);
+}
+
+static struct pci_ops pci_versatile_ops = {
+	.map_bus = versatile_map_bus,
+	.read	= pci_generic_config_read32,
+	.write	= pci_generic_config_write,
+};
+
+static int versatile_pci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct resource_entry *entry;
+	int i, myslot = -1, mem = 1;
+	u32 val;
+	void __iomem *local_pci_cfg_base;
+	struct pci_host_bridge *bridge;
+
+	bridge = devm_pci_alloc_host_bridge(dev, 0);
+	if (!bridge)
+		return -ENOMEM;
+
+	versatile_pci_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(versatile_pci_base))
+		return PTR_ERR(versatile_pci_base);
+
+	versatile_cfg_base[0] = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(versatile_cfg_base[0]))
+		return PTR_ERR(versatile_cfg_base[0]);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	versatile_cfg_base[1] = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(versatile_cfg_base[1]))
+		return PTR_ERR(versatile_cfg_base[1]);
+
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		if (resource_type(entry->res) == IORESOURCE_MEM) {
+			writel(entry->res->start >> 28, PCI_IMAP(mem));
+			writel(__pa(PAGE_OFFSET) >> 28, PCI_SMAP(mem));
+			mem++;
+		}
+	}
+
+	/*
+	 * We need to discover the PCI core first to configure itself
+	 * before the main PCI probing is performed
+	 */
+	for (i = 0; i < 32; i++) {
+		if ((readl(versatile_cfg_base[0] + (i << 11) + PCI_VENDOR_ID) == VP_PCI_DEVICE_ID) &&
+		    (readl(versatile_cfg_base[0] + (i << 11) + PCI_CLASS_REVISION) == VP_PCI_CLASS_ID)) {
+			myslot = i;
+			break;
+		}
+	}
+	if (myslot == -1) {
+		dev_err(dev, "Cannot find PCI core!\n");
+		return -EIO;
+	}
+	/*
+	 * Do not to map Versatile FPGA PCI device into memory space
+	 */
+	pci_slot_ignore |= (1 << myslot);
+
+	dev_info(dev, "PCI core found (slot %d)\n", myslot);
+
+	writel(myslot, PCI_SELFID);
+	local_pci_cfg_base = versatile_cfg_base[1] + (myslot << 11);
+
+	val = readl(local_pci_cfg_base + PCI_COMMAND);
+	val |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
+	writel(val, local_pci_cfg_base + PCI_COMMAND);
+
+	/*
+	 * Configure the PCI inbound memory windows to be 1:1 mapped to SDRAM
+	 */
+	writel(__pa(PAGE_OFFSET), local_pci_cfg_base + PCI_BASE_ADDRESS_0);
+	writel(__pa(PAGE_OFFSET), local_pci_cfg_base + PCI_BASE_ADDRESS_1);
+	writel(__pa(PAGE_OFFSET), local_pci_cfg_base + PCI_BASE_ADDRESS_2);
+
+	/*
+	 * For many years the kernel and QEMU were symbiotically buggy
+	 * in that they both assumed the same broken IRQ mapping.
+	 * QEMU therefore attempts to auto-detect old broken kernels
+	 * so that they still work on newer QEMU as they did on old
+	 * QEMU. Since we now use the correct (ie matching-hardware)
+	 * IRQ mapping we write a definitely different value to a
+	 * PCI_INTERRUPT_LINE register to tell QEMU that we expect
+	 * real hardware behaviour and it need not be backwards
+	 * compatible for us. This write is harmless on real hardware.
+	 */
+	writel(0, versatile_cfg_base[0] + PCI_INTERRUPT_LINE);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	bridge->ops = &pci_versatile_ops;
+
+	return pci_host_probe(bridge);
+}
+
+static const struct of_device_id versatile_pci_of_match[] = {
+	{ .compatible = "arm,versatile-pci", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, versatile_pci_of_match);
+
+static struct platform_driver versatile_pci_driver = {
+	.driver = {
+		.name = "versatile-pci",
+		.of_match_table = versatile_pci_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = versatile_pci_probe,
+};
+module_platform_driver(versatile_pci_driver);
+
+MODULE_DESCRIPTION("Versatile PCI driver");
diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
new file mode 100644
index 000000000..3ce38dfd0
--- /dev/null
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * APM X-Gene MSI Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Author: Tanmay Inamdar <tinamdar@apm.com>
+ *	   Duc Dang <dhdang@apm.com>
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/of_pci.h>
+
+#define MSI_IR0			0x000000
+#define MSI_INT0		0x800000
+#define IDX_PER_GROUP		8
+#define IRQS_PER_IDX		16
+#define NR_HW_IRQS		16
+#define NR_MSI_VEC		(IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS)
+
+struct xgene_msi_group {
+	struct xgene_msi	*msi;
+	int			gic_irq;
+	u32			msi_grp;
+};
+
+struct xgene_msi {
+	struct device_node	*node;
+	struct irq_domain	*inner_domain;
+	struct irq_domain	*msi_domain;
+	u64			msi_addr;
+	void __iomem		*msi_regs;
+	unsigned long		*bitmap;
+	struct mutex		bitmap_lock;
+	struct xgene_msi_group	*msi_groups;
+	int			num_cpus;
+};
+
+/* Global data */
+static struct xgene_msi xgene_msi_ctrl;
+
+static struct irq_chip xgene_msi_top_irq_chip = {
+	.name		= "X-Gene1 MSI",
+	.irq_enable	= pci_msi_unmask_irq,
+	.irq_disable	= pci_msi_mask_irq,
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+};
+
+static struct  msi_domain_info xgene_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_PCI_MSIX),
+	.chip	= &xgene_msi_top_irq_chip,
+};
+
+/*
+ * X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where
+ * n is group number (0..F), x is index of registers in each group (0..7)
+ * The register layout is as follows:
+ * MSI0IR0			base_addr
+ * MSI0IR1			base_addr +  0x10000
+ * ...				...
+ * MSI0IR6			base_addr +  0x60000
+ * MSI0IR7			base_addr +  0x70000
+ * MSI1IR0			base_addr +  0x80000
+ * MSI1IR1			base_addr +  0x90000
+ * ...				...
+ * MSI1IR7			base_addr +  0xF0000
+ * MSI2IR0			base_addr + 0x100000
+ * ...				...
+ * MSIFIR0			base_addr + 0x780000
+ * MSIFIR1			base_addr + 0x790000
+ * ...				...
+ * MSIFIR7			base_addr + 0x7F0000
+ * MSIINT0			base_addr + 0x800000
+ * MSIINT1			base_addr + 0x810000
+ * ...				...
+ * MSIINTF			base_addr + 0x8F0000
+ *
+ * Each index register supports 16 MSI vectors (0..15) to generate interrupt.
+ * There are total 16 GIC IRQs assigned for these 16 groups of MSI termination
+ * registers.
+ *
+ * Each MSI termination group has 1 MSIINTn register (n is 0..15) to indicate
+ * the MSI pending status caused by 1 of its 8 index registers.
+ */
+
+/* MSInIRx read helper */
+static u32 xgene_msi_ir_read(struct xgene_msi *msi,
+				    u32 msi_grp, u32 msir_idx)
+{
+	return readl_relaxed(msi->msi_regs + MSI_IR0 +
+			      (msi_grp << 19) + (msir_idx << 16));
+}
+
+/* MSIINTn read helper */
+static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp)
+{
+	return readl_relaxed(msi->msi_regs + MSI_INT0 + (msi_grp << 16));
+}
+
+/*
+ * With 2048 MSI vectors supported, the MSI message can be constructed using
+ * following scheme:
+ * - Divide into 8 256-vector groups
+ *		Group 0: 0-255
+ *		Group 1: 256-511
+ *		Group 2: 512-767
+ *		...
+ *		Group 7: 1792-2047
+ * - Each 256-vector group is divided into 16 16-vector groups
+ *	As an example: 16 16-vector groups for 256-vector group 0-255 is
+ *		Group 0: 0-15
+ *		Group 1: 16-32
+ *		...
+ *		Group 15: 240-255
+ * - The termination address of MSI vector in 256-vector group n and 16-vector
+ *   group x is the address of MSIxIRn
+ * - The data for MSI vector in 16-vector group x is x
+ */
+static u32 hwirq_to_reg_set(unsigned long hwirq)
+{
+	return (hwirq / (NR_HW_IRQS * IRQS_PER_IDX));
+}
+
+static u32 hwirq_to_group(unsigned long hwirq)
+{
+	return (hwirq % NR_HW_IRQS);
+}
+
+static u32 hwirq_to_msi_data(unsigned long hwirq)
+{
+	return ((hwirq / NR_HW_IRQS) % IRQS_PER_IDX);
+}
+
+static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
+	u32 reg_set = hwirq_to_reg_set(data->hwirq);
+	u32 group = hwirq_to_group(data->hwirq);
+	u64 target_addr = msi->msi_addr + (((8 * group) + reg_set) << 16);
+
+	msg->address_hi = upper_32_bits(target_addr);
+	msg->address_lo = lower_32_bits(target_addr);
+	msg->data = hwirq_to_msi_data(data->hwirq);
+}
+
+/*
+ * X-Gene v1 only has 16 MSI GIC IRQs for 2048 MSI vectors.  To maintain
+ * the expected behaviour of .set_affinity for each MSI interrupt, the 16
+ * MSI GIC IRQs are statically allocated to 8 X-Gene v1 cores (2 GIC IRQs
+ * for each core).  The MSI vector is moved fom 1 MSI GIC IRQ to another
+ * MSI GIC IRQ to steer its MSI interrupt to correct X-Gene v1 core.  As a
+ * consequence, the total MSI vectors that X-Gene v1 supports will be
+ * reduced to 256 (2048/8) vectors.
+ */
+static int hwirq_to_cpu(unsigned long hwirq)
+{
+	return (hwirq % xgene_msi_ctrl.num_cpus);
+}
+
+static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq)
+{
+	return (hwirq - hwirq_to_cpu(hwirq));
+}
+
+static int xgene_msi_set_affinity(struct irq_data *irqdata,
+				  const struct cpumask *mask, bool force)
+{
+	int target_cpu = cpumask_first(mask);
+	int curr_cpu;
+
+	curr_cpu = hwirq_to_cpu(irqdata->hwirq);
+	if (curr_cpu == target_cpu)
+		return IRQ_SET_MASK_OK_DONE;
+
+	/* Update MSI number to target the new CPU */
+	irqdata->hwirq = hwirq_to_canonical_hwirq(irqdata->hwirq) + target_cpu;
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip xgene_msi_bottom_irq_chip = {
+	.name			= "MSI",
+	.irq_set_affinity       = xgene_msi_set_affinity,
+	.irq_compose_msi_msg	= xgene_compose_msi_msg,
+};
+
+static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct xgene_msi *msi = domain->host_data;
+	int msi_irq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0,
+					     msi->num_cpus, 0);
+	if (msi_irq < NR_MSI_VEC)
+		bitmap_set(msi->bitmap, msi_irq, msi->num_cpus);
+	else
+		msi_irq = -ENOSPC;
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	if (msi_irq < 0)
+		return msi_irq;
+
+	irq_domain_set_info(domain, virq, msi_irq,
+			    &xgene_msi_bottom_irq_chip, domain->host_data,
+			    handle_simple_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xgene_irq_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
+	u32 hwirq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	hwirq = hwirq_to_canonical_hwirq(d->hwirq);
+	bitmap_clear(msi->bitmap, hwirq, msi->num_cpus);
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc  = xgene_irq_domain_alloc,
+	.free   = xgene_irq_domain_free,
+};
+
+static int xgene_allocate_domains(struct xgene_msi *msi)
+{
+	msi->inner_domain = irq_domain_add_linear(NULL, NR_MSI_VEC,
+						  &msi_domain_ops, msi);
+	if (!msi->inner_domain)
+		return -ENOMEM;
+
+	msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(msi->node),
+						    &xgene_msi_domain_info,
+						    msi->inner_domain);
+
+	if (!msi->msi_domain) {
+		irq_domain_remove(msi->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void xgene_free_domains(struct xgene_msi *msi)
+{
+	if (msi->msi_domain)
+		irq_domain_remove(msi->msi_domain);
+	if (msi->inner_domain)
+		irq_domain_remove(msi->inner_domain);
+}
+
+static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
+{
+	xgene_msi->bitmap = bitmap_zalloc(NR_MSI_VEC, GFP_KERNEL);
+	if (!xgene_msi->bitmap)
+		return -ENOMEM;
+
+	mutex_init(&xgene_msi->bitmap_lock);
+
+	xgene_msi->msi_groups = kcalloc(NR_HW_IRQS,
+					sizeof(struct xgene_msi_group),
+					GFP_KERNEL);
+	if (!xgene_msi->msi_groups)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void xgene_msi_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct xgene_msi_group *msi_groups;
+	struct xgene_msi *xgene_msi;
+	int msir_index, msir_val, hw_irq, ret;
+	u32 intr_index, grp_select, msi_grp;
+
+	chained_irq_enter(chip, desc);
+
+	msi_groups = irq_desc_get_handler_data(desc);
+	xgene_msi = msi_groups->msi;
+	msi_grp = msi_groups->msi_grp;
+
+	/*
+	 * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt
+	 * If bit x of this register is set (x is 0..7), one or more interrupts
+	 * corresponding to MSInIRx is set.
+	 */
+	grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+	while (grp_select) {
+		msir_index = ffs(grp_select) - 1;
+		/*
+		 * Calculate MSInIRx address to read to check for interrupts
+		 * (refer to termination address and data assignment
+		 * described in xgene_compose_msi_msg() )
+		 */
+		msir_val = xgene_msi_ir_read(xgene_msi, msi_grp, msir_index);
+		while (msir_val) {
+			intr_index = ffs(msir_val) - 1;
+			/*
+			 * Calculate MSI vector number (refer to the termination
+			 * address and data assignment described in
+			 * xgene_compose_msi_msg function)
+			 */
+			hw_irq = (((msir_index * IRQS_PER_IDX) + intr_index) *
+				 NR_HW_IRQS) + msi_grp;
+			/*
+			 * As we have multiple hw_irq that maps to single MSI,
+			 * always look up the virq using the hw_irq as seen from
+			 * CPU0
+			 */
+			hw_irq = hwirq_to_canonical_hwirq(hw_irq);
+			ret = generic_handle_domain_irq(xgene_msi->inner_domain, hw_irq);
+			WARN_ON_ONCE(ret);
+			msir_val &= ~(1 << intr_index);
+		}
+		grp_select &= ~(1 << msir_index);
+
+		if (!grp_select) {
+			/*
+			 * We handled all interrupts happened in this group,
+			 * resample this group MSI_INTx register in case
+			 * something else has been made pending in the meantime
+			 */
+			grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static enum cpuhp_state pci_xgene_online;
+
+static void xgene_msi_remove(struct platform_device *pdev)
+{
+	struct xgene_msi *msi = platform_get_drvdata(pdev);
+
+	if (pci_xgene_online)
+		cpuhp_remove_state(pci_xgene_online);
+	cpuhp_remove_state(CPUHP_PCI_XGENE_DEAD);
+
+	kfree(msi->msi_groups);
+
+	bitmap_free(msi->bitmap);
+	msi->bitmap = NULL;
+
+	xgene_free_domains(msi);
+}
+
+static int xgene_msi_hwirq_alloc(unsigned int cpu)
+{
+	struct xgene_msi *msi = &xgene_msi_ctrl;
+	struct xgene_msi_group *msi_group;
+	cpumask_var_t mask;
+	int i;
+	int err;
+
+	for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+		msi_group = &msi->msi_groups[i];
+		if (!msi_group->gic_irq)
+			continue;
+
+		irq_set_chained_handler_and_data(msi_group->gic_irq,
+			xgene_msi_isr, msi_group);
+
+		/*
+		 * Statically allocate MSI GIC IRQs to each CPU core.
+		 * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
+		 * to each core.
+		 */
+		if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
+			cpumask_clear(mask);
+			cpumask_set_cpu(cpu, mask);
+			err = irq_set_affinity(msi_group->gic_irq, mask);
+			if (err)
+				pr_err("failed to set affinity for GIC IRQ");
+			free_cpumask_var(mask);
+		} else {
+			pr_err("failed to alloc CPU mask for affinity\n");
+			err = -EINVAL;
+		}
+
+		if (err) {
+			irq_set_chained_handler_and_data(msi_group->gic_irq,
+							 NULL, NULL);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int xgene_msi_hwirq_free(unsigned int cpu)
+{
+	struct xgene_msi *msi = &xgene_msi_ctrl;
+	struct xgene_msi_group *msi_group;
+	int i;
+
+	for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+		msi_group = &msi->msi_groups[i];
+		if (!msi_group->gic_irq)
+			continue;
+
+		irq_set_chained_handler_and_data(msi_group->gic_irq, NULL,
+						 NULL);
+	}
+	return 0;
+}
+
+static const struct of_device_id xgene_msi_match_table[] = {
+	{.compatible = "apm,xgene1-msi"},
+	{},
+};
+
+static int xgene_msi_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int rc, irq_index;
+	struct xgene_msi *xgene_msi;
+	int virt_msir;
+	u32 msi_val, msi_idx;
+
+	xgene_msi = &xgene_msi_ctrl;
+
+	platform_set_drvdata(pdev, xgene_msi);
+
+	xgene_msi->msi_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(xgene_msi->msi_regs)) {
+		rc = PTR_ERR(xgene_msi->msi_regs);
+		goto error;
+	}
+	xgene_msi->msi_addr = res->start;
+	xgene_msi->node = pdev->dev.of_node;
+	xgene_msi->num_cpus = num_possible_cpus();
+
+	rc = xgene_msi_init_allocator(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
+		goto error;
+	}
+
+	rc = xgene_allocate_domains(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
+		goto error;
+	}
+
+	for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+		virt_msir = platform_get_irq(pdev, irq_index);
+		if (virt_msir < 0) {
+			rc = virt_msir;
+			goto error;
+		}
+		xgene_msi->msi_groups[irq_index].gic_irq = virt_msir;
+		xgene_msi->msi_groups[irq_index].msi_grp = irq_index;
+		xgene_msi->msi_groups[irq_index].msi = xgene_msi;
+	}
+
+	/*
+	 * MSInIRx registers are read-to-clear; before registering
+	 * interrupt handlers, read all of them to clear spurious
+	 * interrupts that may occur before the driver is probed.
+	 */
+	for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+		for (msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++)
+			xgene_msi_ir_read(xgene_msi, irq_index, msi_idx);
+
+		/* Read MSIINTn to confirm */
+		msi_val = xgene_msi_int_read(xgene_msi, irq_index);
+		if (msi_val) {
+			dev_err(&pdev->dev, "Failed to clear spurious IRQ\n");
+			rc = -EINVAL;
+			goto error;
+		}
+	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "pci/xgene:online",
+			       xgene_msi_hwirq_alloc, NULL);
+	if (rc < 0)
+		goto err_cpuhp;
+	pci_xgene_online = rc;
+	rc = cpuhp_setup_state(CPUHP_PCI_XGENE_DEAD, "pci/xgene:dead", NULL,
+			       xgene_msi_hwirq_free);
+	if (rc)
+		goto err_cpuhp;
+
+	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
+
+	return 0;
+
+err_cpuhp:
+	dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
+error:
+	xgene_msi_remove(pdev);
+	return rc;
+}
+
+static struct platform_driver xgene_msi_driver = {
+	.driver = {
+		.name = "xgene-msi",
+		.of_match_table = xgene_msi_match_table,
+	},
+	.probe = xgene_msi_probe,
+	.remove_new = xgene_msi_remove,
+};
+
+static int __init xgene_pcie_msi_init(void)
+{
+	return platform_driver_register(&xgene_msi_driver);
+}
+subsys_initcall(xgene_pcie_msi_init);
diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c
new file mode 100644
index 000000000..887b4941f
--- /dev/null
+++ b/drivers/pci/controller/pci-xgene.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * APM X-Gene PCIe Driver
+ *
+ * Copyright (c) 2014 Applied Micro Circuits Corporation.
+ *
+ * Author: Tanmay Inamdar <tinamdar@apm.com>.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "../pci.h"
+
+#define PCIECORE_CTLANDSTATUS		0x50
+#define PIM1_1L				0x80
+#define IBAR2				0x98
+#define IR2MSK				0x9c
+#define PIM2_1L				0xa0
+#define IBAR3L				0xb4
+#define IR3MSKL				0xbc
+#define PIM3_1L				0xc4
+#define OMR1BARL			0x100
+#define OMR2BARL			0x118
+#define OMR3BARL			0x130
+#define CFGBARL				0x154
+#define CFGBARH				0x158
+#define CFGCTL				0x15c
+#define RTDID				0x160
+#define BRIDGE_CFG_0			0x2000
+#define BRIDGE_CFG_4			0x2010
+#define BRIDGE_STATUS_0			0x2600
+
+#define LINK_UP_MASK			0x00000100
+#define AXI_EP_CFG_ACCESS		0x10000
+#define EN_COHERENCY			0xF0000000
+#define EN_REG				0x00000001
+#define OB_LO_IO			0x00000002
+#define XGENE_PCIE_DEVICEID		0xE004
+#define PIPE_PHY_RATE_RD(src)		((0xc000 & (u32)(src)) >> 0xe)
+
+#define XGENE_V1_PCI_EXP_CAP		0x40
+
+/* PCIe IP version */
+#define XGENE_PCIE_IP_VER_UNKN		0
+#define XGENE_PCIE_IP_VER_1		1
+#define XGENE_PCIE_IP_VER_2		2
+
+#if defined(CONFIG_PCI_XGENE) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
+struct xgene_pcie {
+	struct device_node	*node;
+	struct device		*dev;
+	struct clk		*clk;
+	void __iomem		*csr_base;
+	void __iomem		*cfg_base;
+	unsigned long		cfg_addr;
+	bool			link_up;
+	u32			version;
+};
+
+static u32 xgene_pcie_readl(struct xgene_pcie *port, u32 reg)
+{
+	return readl(port->csr_base + reg);
+}
+
+static void xgene_pcie_writel(struct xgene_pcie *port, u32 reg, u32 val)
+{
+	writel(val, port->csr_base + reg);
+}
+
+static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
+{
+	return (addr & PCI_BASE_ADDRESS_MEM_MASK) | flags;
+}
+
+static inline struct xgene_pcie *pcie_bus_to_port(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg;
+
+	if (acpi_disabled)
+		return (struct xgene_pcie *)(bus->sysdata);
+
+	cfg = bus->sysdata;
+	return (struct xgene_pcie *)(cfg->priv);
+}
+
+/*
+ * When the address bit [17:16] is 2'b01, the Configuration access will be
+ * treated as Type 1 and it will be forwarded to external PCIe device.
+ */
+static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
+{
+	struct xgene_pcie *port = pcie_bus_to_port(bus);
+
+	if (bus->number >= (bus->primary + 1))
+		return port->cfg_base + AXI_EP_CFG_ACCESS;
+
+	return port->cfg_base;
+}
+
+/*
+ * For Configuration request, RTDID register is used as Bus Number,
+ * Device Number and Function number of the header fields.
+ */
+static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
+{
+	struct xgene_pcie *port = pcie_bus_to_port(bus);
+	unsigned int b, d, f;
+	u32 rtdid_val = 0;
+
+	b = bus->number;
+	d = PCI_SLOT(devfn);
+	f = PCI_FUNC(devfn);
+
+	if (!pci_is_root_bus(bus))
+		rtdid_val = (b << 8) | (d << 3) | f;
+
+	xgene_pcie_writel(port, RTDID, rtdid_val);
+	/* read the register back to ensure flush */
+	xgene_pcie_readl(port, RTDID);
+}
+
+/*
+ * X-Gene PCIe port uses BAR0-BAR1 of RC's configuration space as
+ * the translation from PCI bus to native BUS.  Entire DDR region
+ * is mapped into PCIe space using these registers, so it can be
+ * reached by DMA from EP devices.  The BAR0/1 of bridge should be
+ * hidden during enumeration to avoid the sizing and resource allocation
+ * by PCIe core.
+ */
+static bool xgene_pcie_hide_rc_bars(struct pci_bus *bus, int offset)
+{
+	if (pci_is_root_bus(bus) && ((offset == PCI_BASE_ADDRESS_0) ||
+				     (offset == PCI_BASE_ADDRESS_1)))
+		return true;
+
+	return false;
+}
+
+static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+					int offset)
+{
+	if ((pci_is_root_bus(bus) && devfn != 0) ||
+	    xgene_pcie_hide_rc_bars(bus, offset))
+		return NULL;
+
+	xgene_pcie_set_rtdid_reg(bus, devfn);
+	return xgene_pcie_get_cfg_base(bus) + offset;
+}
+
+static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 *val)
+{
+	struct xgene_pcie *port = pcie_bus_to_port(bus);
+
+	if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
+	    PCIBIOS_SUCCESSFUL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * The v1 controller has a bug in its Configuration Request Retry
+	 * Status (CRS) logic: when CRS Software Visibility is enabled and
+	 * we read the Vendor and Device ID of a non-existent device, the
+	 * controller fabricates return data of 0xFFFF0001 ("device exists
+	 * but is not ready") instead of 0xFFFFFFFF (PCI_ERROR_RESPONSE)
+	 * ("device does not exist").  This causes the PCI core to retry
+	 * the read until it times out.  Avoid this by not claiming to
+	 * support CRS SV.
+	 */
+	if (pci_is_root_bus(bus) && (port->version == XGENE_PCIE_IP_VER_1) &&
+	    ((where & ~0x3) == XGENE_V1_PCI_EXP_CAP + PCI_EXP_RTCTL))
+		*val &= ~(PCI_EXP_RTCAP_CRSVIS << 16);
+
+	if (size <= 2)
+		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+#endif
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+static int xgene_get_csr_resource(struct acpi_device *adev,
+				  struct resource *res)
+{
+	struct device *dev = &adev->dev;
+	struct resource_entry *entry;
+	struct list_head list;
+	unsigned long flags;
+	int ret;
+
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_MEM;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *) flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n",
+			ret);
+		return ret;
+	}
+
+	if (ret == 0) {
+		dev_err(dev, "no IO and memory resources present in _CRS\n");
+		return -EINVAL;
+	}
+
+	entry = list_first_entry(&list, struct resource_entry, node);
+	*res = *entry->res;
+	acpi_dev_free_resource_list(&list);
+	return 0;
+}
+
+static int xgene_pcie_ecam_init(struct pci_config_window *cfg, u32 ipversion)
+{
+	struct device *dev = cfg->parent;
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct xgene_pcie *port;
+	struct resource csr;
+	int ret;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	ret = xgene_get_csr_resource(adev, &csr);
+	if (ret) {
+		dev_err(dev, "can't get CSR resource\n");
+		return ret;
+	}
+	port->csr_base = devm_pci_remap_cfg_resource(dev, &csr);
+	if (IS_ERR(port->csr_base))
+		return PTR_ERR(port->csr_base);
+
+	port->cfg_base = cfg->win;
+	port->version = ipversion;
+
+	cfg->priv = port;
+	return 0;
+}
+
+static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_1);
+}
+
+const struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
+	.init		= xgene_v1_pcie_ecam_init,
+	.pci_ops	= {
+		.map_bus	= xgene_pcie_map_bus,
+		.read		= xgene_pcie_config_read32,
+		.write		= pci_generic_config_write,
+	}
+};
+
+static int xgene_v2_pcie_ecam_init(struct pci_config_window *cfg)
+{
+	return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_2);
+}
+
+const struct pci_ecam_ops xgene_v2_pcie_ecam_ops = {
+	.init		= xgene_v2_pcie_ecam_init,
+	.pci_ops	= {
+		.map_bus	= xgene_pcie_map_bus,
+		.read		= xgene_pcie_config_read32,
+		.write		= pci_generic_config_write,
+	}
+};
+#endif
+
+#if defined(CONFIG_PCI_XGENE)
+static u64 xgene_pcie_set_ib_mask(struct xgene_pcie *port, u32 addr,
+				  u32 flags, u64 size)
+{
+	u64 mask = (~(size - 1) & PCI_BASE_ADDRESS_MEM_MASK) | flags;
+	u32 val32 = 0;
+	u32 val;
+
+	val32 = xgene_pcie_readl(port, addr);
+	val = (val32 & 0x0000ffff) | (lower_32_bits(mask) << 16);
+	xgene_pcie_writel(port, addr, val);
+
+	val32 = xgene_pcie_readl(port, addr + 0x04);
+	val = (val32 & 0xffff0000) | (lower_32_bits(mask) >> 16);
+	xgene_pcie_writel(port, addr + 0x04, val);
+
+	val32 = xgene_pcie_readl(port, addr + 0x04);
+	val = (val32 & 0x0000ffff) | (upper_32_bits(mask) << 16);
+	xgene_pcie_writel(port, addr + 0x04, val);
+
+	val32 = xgene_pcie_readl(port, addr + 0x08);
+	val = (val32 & 0xffff0000) | (upper_32_bits(mask) >> 16);
+	xgene_pcie_writel(port, addr + 0x08, val);
+
+	return mask;
+}
+
+static void xgene_pcie_linkup(struct xgene_pcie *port,
+			      u32 *lanes, u32 *speed)
+{
+	u32 val32;
+
+	port->link_up = false;
+	val32 = xgene_pcie_readl(port, PCIECORE_CTLANDSTATUS);
+	if (val32 & LINK_UP_MASK) {
+		port->link_up = true;
+		*speed = PIPE_PHY_RATE_RD(val32);
+		val32 = xgene_pcie_readl(port, BRIDGE_STATUS_0);
+		*lanes = val32 >> 26;
+	}
+}
+
+static int xgene_pcie_init_port(struct xgene_pcie *port)
+{
+	struct device *dev = port->dev;
+	int rc;
+
+	port->clk = clk_get(dev, NULL);
+	if (IS_ERR(port->clk)) {
+		dev_err(dev, "clock not available\n");
+		return -ENODEV;
+	}
+
+	rc = clk_prepare_enable(port->clk);
+	if (rc) {
+		dev_err(dev, "clock enable failed\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int xgene_pcie_map_reg(struct xgene_pcie *port,
+			      struct platform_device *pdev)
+{
+	struct device *dev = port->dev;
+	struct resource *res;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "csr");
+	port->csr_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(port->csr_base))
+		return PTR_ERR(port->csr_base);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
+	port->cfg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(port->cfg_base))
+		return PTR_ERR(port->cfg_base);
+	port->cfg_addr = res->start;
+
+	return 0;
+}
+
+static void xgene_pcie_setup_ob_reg(struct xgene_pcie *port,
+				    struct resource *res, u32 offset,
+				    u64 cpu_addr, u64 pci_addr)
+{
+	struct device *dev = port->dev;
+	resource_size_t size = resource_size(res);
+	u64 restype = resource_type(res);
+	u64 mask = 0;
+	u32 min_size;
+	u32 flag = EN_REG;
+
+	if (restype == IORESOURCE_MEM) {
+		min_size = SZ_128M;
+	} else {
+		min_size = 128;
+		flag |= OB_LO_IO;
+	}
+
+	if (size >= min_size)
+		mask = ~(size - 1) | flag;
+	else
+		dev_warn(dev, "res size 0x%llx less than minimum 0x%x\n",
+			 (u64)size, min_size);
+
+	xgene_pcie_writel(port, offset, lower_32_bits(cpu_addr));
+	xgene_pcie_writel(port, offset + 0x04, upper_32_bits(cpu_addr));
+	xgene_pcie_writel(port, offset + 0x08, lower_32_bits(mask));
+	xgene_pcie_writel(port, offset + 0x0c, upper_32_bits(mask));
+	xgene_pcie_writel(port, offset + 0x10, lower_32_bits(pci_addr));
+	xgene_pcie_writel(port, offset + 0x14, upper_32_bits(pci_addr));
+}
+
+static void xgene_pcie_setup_cfg_reg(struct xgene_pcie *port)
+{
+	u64 addr = port->cfg_addr;
+
+	xgene_pcie_writel(port, CFGBARL, lower_32_bits(addr));
+	xgene_pcie_writel(port, CFGBARH, upper_32_bits(addr));
+	xgene_pcie_writel(port, CFGCTL, EN_REG);
+}
+
+static int xgene_pcie_map_ranges(struct xgene_pcie *port)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port);
+	struct resource_entry *window;
+	struct device *dev = port->dev;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		struct resource *res = window->res;
+		u64 restype = resource_type(res);
+
+		dev_dbg(dev, "%pR\n", res);
+
+		switch (restype) {
+		case IORESOURCE_IO:
+			xgene_pcie_setup_ob_reg(port, res, OMR3BARL,
+						pci_pio_to_address(res->start),
+						res->start - window->offset);
+			break;
+		case IORESOURCE_MEM:
+			if (res->flags & IORESOURCE_PREFETCH)
+				xgene_pcie_setup_ob_reg(port, res, OMR2BARL,
+							res->start,
+							res->start -
+							window->offset);
+			else
+				xgene_pcie_setup_ob_reg(port, res, OMR1BARL,
+							res->start,
+							res->start -
+							window->offset);
+			break;
+		case IORESOURCE_BUS:
+			break;
+		default:
+			dev_err(dev, "invalid resource %pR\n", res);
+			return -EINVAL;
+		}
+	}
+	xgene_pcie_setup_cfg_reg(port);
+	return 0;
+}
+
+static void xgene_pcie_setup_pims(struct xgene_pcie *port, u32 pim_reg,
+				  u64 pim, u64 size)
+{
+	xgene_pcie_writel(port, pim_reg, lower_32_bits(pim));
+	xgene_pcie_writel(port, pim_reg + 0x04,
+			  upper_32_bits(pim) | EN_COHERENCY);
+	xgene_pcie_writel(port, pim_reg + 0x10, lower_32_bits(size));
+	xgene_pcie_writel(port, pim_reg + 0x14, upper_32_bits(size));
+}
+
+/*
+ * X-Gene PCIe support maximum 3 inbound memory regions
+ * This function helps to select a region based on size of region
+ */
+static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size)
+{
+	if ((size > 4) && (size < SZ_16M) && !(*ib_reg_mask & (1 << 1))) {
+		*ib_reg_mask |= (1 << 1);
+		return 1;
+	}
+
+	if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) {
+		*ib_reg_mask |= (1 << 0);
+		return 0;
+	}
+
+	if ((size > SZ_1M) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 2))) {
+		*ib_reg_mask |= (1 << 2);
+		return 2;
+	}
+
+	return -EINVAL;
+}
+
+static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port,
+				    struct of_pci_range *range, u8 *ib_reg_mask)
+{
+	void __iomem *cfg_base = port->cfg_base;
+	struct device *dev = port->dev;
+	void __iomem *bar_addr;
+	u32 pim_reg;
+	u64 cpu_addr = range->cpu_addr;
+	u64 pci_addr = range->pci_addr;
+	u64 size = range->size;
+	u64 mask = ~(size - 1) | EN_REG;
+	u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64;
+	u32 bar_low;
+	int region;
+
+	region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size);
+	if (region < 0) {
+		dev_warn(dev, "invalid pcie dma-range config\n");
+		return;
+	}
+
+	if (range->flags & IORESOURCE_PREFETCH)
+		flags |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+	bar_low = pcie_bar_low_val((u32)cpu_addr, flags);
+	switch (region) {
+	case 0:
+		xgene_pcie_set_ib_mask(port, BRIDGE_CFG_4, flags, size);
+		bar_addr = cfg_base + PCI_BASE_ADDRESS_0;
+		writel(bar_low, bar_addr);
+		writel(upper_32_bits(cpu_addr), bar_addr + 0x4);
+		pim_reg = PIM1_1L;
+		break;
+	case 1:
+		xgene_pcie_writel(port, IBAR2, bar_low);
+		xgene_pcie_writel(port, IR2MSK, lower_32_bits(mask));
+		pim_reg = PIM2_1L;
+		break;
+	case 2:
+		xgene_pcie_writel(port, IBAR3L, bar_low);
+		xgene_pcie_writel(port, IBAR3L + 0x4, upper_32_bits(cpu_addr));
+		xgene_pcie_writel(port, IR3MSKL, lower_32_bits(mask));
+		xgene_pcie_writel(port, IR3MSKL + 0x4, upper_32_bits(mask));
+		pim_reg = PIM3_1L;
+		break;
+	}
+
+	xgene_pcie_setup_pims(port, pim_reg, pci_addr, ~(size - 1));
+}
+
+static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port)
+{
+	struct device_node *np = port->node;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	struct device *dev = port->dev;
+	u8 ib_reg_mask = 0;
+
+	if (of_pci_dma_range_parser_init(&parser, np)) {
+		dev_err(dev, "missing dma-ranges property\n");
+		return -EINVAL;
+	}
+
+	/* Get the dma-ranges from DT */
+	for_each_of_pci_range(&parser, &range) {
+		u64 end = range.cpu_addr + range.size - 1;
+
+		dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n",
+			range.flags, range.cpu_addr, end, range.pci_addr);
+		xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask);
+	}
+	return 0;
+}
+
+/* clear BAR configuration which was done by firmware */
+static void xgene_pcie_clear_config(struct xgene_pcie *port)
+{
+	int i;
+
+	for (i = PIM1_1L; i <= CFGCTL; i += 4)
+		xgene_pcie_writel(port, i, 0);
+}
+
+static int xgene_pcie_setup(struct xgene_pcie *port)
+{
+	struct device *dev = port->dev;
+	u32 val, lanes = 0, speed = 0;
+	int ret;
+
+	xgene_pcie_clear_config(port);
+
+	/* setup the vendor and device IDs correctly */
+	val = (XGENE_PCIE_DEVICEID << 16) | PCI_VENDOR_ID_AMCC;
+	xgene_pcie_writel(port, BRIDGE_CFG_0, val);
+
+	ret = xgene_pcie_map_ranges(port);
+	if (ret)
+		return ret;
+
+	ret = xgene_pcie_parse_map_dma_ranges(port);
+	if (ret)
+		return ret;
+
+	xgene_pcie_linkup(port, &lanes, &speed);
+	if (!port->link_up)
+		dev_info(dev, "(rc) link down\n");
+	else
+		dev_info(dev, "(rc) x%d gen-%d link up\n", lanes, speed + 1);
+	return 0;
+}
+
+static struct pci_ops xgene_pcie_ops = {
+	.map_bus = xgene_pcie_map_bus,
+	.read = xgene_pcie_config_read32,
+	.write = pci_generic_config_write32,
+};
+
+static int xgene_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *dn = dev->of_node;
+	struct xgene_pcie *port;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*port));
+	if (!bridge)
+		return -ENOMEM;
+
+	port = pci_host_bridge_priv(bridge);
+
+	port->node = of_node_get(dn);
+	port->dev = dev;
+
+	port->version = XGENE_PCIE_IP_VER_UNKN;
+	if (of_device_is_compatible(port->node, "apm,xgene-pcie"))
+		port->version = XGENE_PCIE_IP_VER_1;
+
+	ret = xgene_pcie_map_reg(port, pdev);
+	if (ret)
+		return ret;
+
+	ret = xgene_pcie_init_port(port);
+	if (ret)
+		return ret;
+
+	ret = xgene_pcie_setup(port);
+	if (ret)
+		return ret;
+
+	bridge->sysdata = port;
+	bridge->ops = &xgene_pcie_ops;
+
+	return pci_host_probe(bridge);
+}
+
+static const struct of_device_id xgene_pcie_match_table[] = {
+	{.compatible = "apm,xgene-pcie",},
+	{},
+};
+
+static struct platform_driver xgene_pcie_driver = {
+	.driver = {
+		.name = "xgene-pcie",
+		.of_match_table = xgene_pcie_match_table,
+		.suppress_bind_attrs = true,
+	},
+	.probe = xgene_pcie_probe,
+};
+builtin_platform_driver(xgene_pcie_driver);
+#endif
diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
new file mode 100644
index 000000000..6ad542749
--- /dev/null
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Altera PCIe MSI support
+ *
+ * Author: Ley Foon Tan <lftan@altera.com>
+ *
+ * Copyright Altera Corporation (C) 2013-2015. All rights reserved
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define MSI_STATUS		0x0
+#define MSI_ERROR		0x4
+#define MSI_INTMASK		0x8
+
+#define MAX_MSI_VECTORS		32
+
+struct altera_msi {
+	DECLARE_BITMAP(used, MAX_MSI_VECTORS);
+	struct mutex		lock;	/* protect "used" bitmap */
+	struct platform_device	*pdev;
+	struct irq_domain	*msi_domain;
+	struct irq_domain	*inner_domain;
+	void __iomem		*csr_base;
+	void __iomem		*vector_base;
+	phys_addr_t		vector_phy;
+	u32			num_of_vectors;
+	int			irq;
+};
+
+static inline void msi_writel(struct altera_msi *msi, const u32 value,
+			      const u32 reg)
+{
+	writel_relaxed(value, msi->csr_base + reg);
+}
+
+static inline u32 msi_readl(struct altera_msi *msi, const u32 reg)
+{
+	return readl_relaxed(msi->csr_base + reg);
+}
+
+static void altera_msi_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct altera_msi *msi;
+	unsigned long status;
+	u32 bit;
+	int ret;
+
+	chained_irq_enter(chip, desc);
+	msi = irq_desc_get_handler_data(desc);
+
+	while ((status = msi_readl(msi, MSI_STATUS)) != 0) {
+		for_each_set_bit(bit, &status, msi->num_of_vectors) {
+			/* Dummy read from vector to clear the interrupt */
+			readl_relaxed(msi->vector_base + (bit * sizeof(u32)));
+
+			ret = generic_handle_domain_irq(msi->inner_domain, bit);
+			if (ret)
+				dev_err_ratelimited(&msi->pdev->dev, "unexpected MSI\n");
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static struct irq_chip altera_msi_irq_chip = {
+	.name = "Altera PCIe MSI",
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info altera_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		     MSI_FLAG_PCI_MSIX),
+	.chip	= &altera_msi_irq_chip,
+};
+
+static void altera_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct altera_msi *msi = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr = msi->vector_phy + (data->hwirq * sizeof(u32));
+
+	msg->address_lo = lower_32_bits(addr);
+	msg->address_hi = upper_32_bits(addr);
+	msg->data = data->hwirq;
+
+	dev_dbg(&msi->pdev->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int altera_msi_set_affinity(struct irq_data *irq_data,
+				   const struct cpumask *mask, bool force)
+{
+	 return -EINVAL;
+}
+
+static struct irq_chip altera_msi_bottom_irq_chip = {
+	.name			= "Altera MSI",
+	.irq_compose_msi_msg	= altera_compose_msi_msg,
+	.irq_set_affinity	= altera_msi_set_affinity,
+};
+
+static int altera_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	struct altera_msi *msi = domain->host_data;
+	unsigned long bit;
+	u32 mask;
+
+	WARN_ON(nr_irqs != 1);
+	mutex_lock(&msi->lock);
+
+	bit = find_first_zero_bit(msi->used, msi->num_of_vectors);
+	if (bit >= msi->num_of_vectors) {
+		mutex_unlock(&msi->lock);
+		return -ENOSPC;
+	}
+
+	set_bit(bit, msi->used);
+
+	mutex_unlock(&msi->lock);
+
+	irq_domain_set_info(domain, virq, bit, &altera_msi_bottom_irq_chip,
+			    domain->host_data, handle_simple_irq,
+			    NULL, NULL);
+
+	mask = msi_readl(msi, MSI_INTMASK);
+	mask |= 1 << bit;
+	msi_writel(msi, mask, MSI_INTMASK);
+
+	return 0;
+}
+
+static void altera_irq_domain_free(struct irq_domain *domain,
+				   unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct altera_msi *msi = irq_data_get_irq_chip_data(d);
+	u32 mask;
+
+	mutex_lock(&msi->lock);
+
+	if (!test_bit(d->hwirq, msi->used)) {
+		dev_err(&msi->pdev->dev, "trying to free unused MSI#%lu\n",
+			d->hwirq);
+	} else {
+		__clear_bit(d->hwirq, msi->used);
+		mask = msi_readl(msi, MSI_INTMASK);
+		mask &= ~(1 << d->hwirq);
+		msi_writel(msi, mask, MSI_INTMASK);
+	}
+
+	mutex_unlock(&msi->lock);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc	= altera_irq_domain_alloc,
+	.free	= altera_irq_domain_free,
+};
+
+static int altera_allocate_domains(struct altera_msi *msi)
+{
+	struct fwnode_handle *fwnode = of_node_to_fwnode(msi->pdev->dev.of_node);
+
+	msi->inner_domain = irq_domain_add_linear(NULL, msi->num_of_vectors,
+					     &msi_domain_ops, msi);
+	if (!msi->inner_domain) {
+		dev_err(&msi->pdev->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	msi->msi_domain = pci_msi_create_irq_domain(fwnode,
+				&altera_msi_domain_info, msi->inner_domain);
+	if (!msi->msi_domain) {
+		dev_err(&msi->pdev->dev, "failed to create MSI domain\n");
+		irq_domain_remove(msi->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void altera_free_domains(struct altera_msi *msi)
+{
+	irq_domain_remove(msi->msi_domain);
+	irq_domain_remove(msi->inner_domain);
+}
+
+static void altera_msi_remove(struct platform_device *pdev)
+{
+	struct altera_msi *msi = platform_get_drvdata(pdev);
+
+	msi_writel(msi, 0, MSI_INTMASK);
+	irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
+
+	altera_free_domains(msi);
+
+	platform_set_drvdata(pdev, NULL);
+}
+
+static int altera_msi_probe(struct platform_device *pdev)
+{
+	struct altera_msi *msi;
+	struct device_node *np = pdev->dev.of_node;
+	struct resource *res;
+	int ret;
+
+	msi = devm_kzalloc(&pdev->dev, sizeof(struct altera_msi),
+			   GFP_KERNEL);
+	if (!msi)
+		return -ENOMEM;
+
+	mutex_init(&msi->lock);
+	msi->pdev = pdev;
+
+	msi->csr_base = devm_platform_ioremap_resource_byname(pdev, "csr");
+	if (IS_ERR(msi->csr_base)) {
+		dev_err(&pdev->dev, "failed to map csr memory\n");
+		return PTR_ERR(msi->csr_base);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "vector_slave");
+	msi->vector_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(msi->vector_base))
+		return PTR_ERR(msi->vector_base);
+
+	msi->vector_phy = res->start;
+
+	if (of_property_read_u32(np, "num-vectors", &msi->num_of_vectors)) {
+		dev_err(&pdev->dev, "failed to parse the number of vectors\n");
+		return -EINVAL;
+	}
+
+	ret = altera_allocate_domains(msi);
+	if (ret)
+		return ret;
+
+	msi->irq = platform_get_irq(pdev, 0);
+	if (msi->irq < 0) {
+		ret = msi->irq;
+		goto err;
+	}
+
+	irq_set_chained_handler_and_data(msi->irq, altera_msi_isr, msi);
+	platform_set_drvdata(pdev, msi);
+
+	return 0;
+
+err:
+	altera_msi_remove(pdev);
+	return ret;
+}
+
+static const struct of_device_id altera_msi_of_match[] = {
+	{ .compatible = "altr,msi-1.0", NULL },
+	{ },
+};
+
+static struct platform_driver altera_msi_driver = {
+	.driver = {
+		.name = "altera-msi",
+		.of_match_table = altera_msi_of_match,
+	},
+	.probe = altera_msi_probe,
+	.remove_new = altera_msi_remove,
+};
+
+static int __init altera_msi_init(void)
+{
+	return platform_driver_register(&altera_msi_driver);
+}
+
+static void __exit altera_msi_exit(void)
+{
+	platform_driver_unregister(&altera_msi_driver);
+}
+
+subsys_initcall(altera_msi_init);
+MODULE_DEVICE_TABLE(of, altera_msi_of_match);
+module_exit(altera_msi_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c
new file mode 100644
index 000000000..a9536dc4b
--- /dev/null
+++ b/drivers/pci/controller/pcie-altera.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright Altera Corporation (C) 2013-2015. All rights reserved
+ *
+ * Author: Ley Foon Tan <lftan@altera.com>
+ * Description: Altera PCIe host controller driver
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "../pci.h"
+
+#define RP_TX_REG0			0x2000
+#define RP_TX_REG1			0x2004
+#define RP_TX_CNTRL			0x2008
+#define RP_TX_EOP			0x2
+#define RP_TX_SOP			0x1
+#define RP_RXCPL_STATUS			0x2010
+#define RP_RXCPL_EOP			0x2
+#define RP_RXCPL_SOP			0x1
+#define RP_RXCPL_REG0			0x2014
+#define RP_RXCPL_REG1			0x2018
+#define P2A_INT_STATUS			0x3060
+#define P2A_INT_STS_ALL			0xf
+#define P2A_INT_ENABLE			0x3070
+#define P2A_INT_ENA_ALL			0xf
+#define RP_LTSSM			0x3c64
+#define RP_LTSSM_MASK			0x1f
+#define LTSSM_L0			0xf
+
+#define S10_RP_TX_CNTRL			0x2004
+#define S10_RP_RXCPL_REG		0x2008
+#define S10_RP_RXCPL_STATUS		0x200C
+#define S10_RP_CFG_ADDR(pcie, reg)	\
+	(((pcie)->hip_base) + (reg) + (1 << 20))
+#define S10_RP_SECONDARY(pcie)		\
+	readb(S10_RP_CFG_ADDR(pcie, PCI_SECONDARY_BUS))
+
+/* TLP configuration type 0 and 1 */
+#define TLP_FMTTYPE_CFGRD0		0x04	/* Configuration Read Type 0 */
+#define TLP_FMTTYPE_CFGWR0		0x44	/* Configuration Write Type 0 */
+#define TLP_FMTTYPE_CFGRD1		0x05	/* Configuration Read Type 1 */
+#define TLP_FMTTYPE_CFGWR1		0x45	/* Configuration Write Type 1 */
+#define TLP_PAYLOAD_SIZE		0x01
+#define TLP_READ_TAG			0x1d
+#define TLP_WRITE_TAG			0x10
+#define RP_DEVFN			0
+#define TLP_REQ_ID(bus, devfn)		(((bus) << 8) | (devfn))
+#define TLP_CFG_DW0(pcie, cfg)		\
+		(((cfg) << 24) |	\
+		  TLP_PAYLOAD_SIZE)
+#define TLP_CFG_DW1(pcie, tag, be)	\
+	(((TLP_REQ_ID(pcie->root_bus_nr,  RP_DEVFN)) << 16) | (tag << 8) | (be))
+#define TLP_CFG_DW2(bus, devfn, offset)	\
+				(((bus) << 24) | ((devfn) << 16) | (offset))
+#define TLP_COMP_STATUS(s)		(((s) >> 13) & 7)
+#define TLP_BYTE_COUNT(s)		(((s) >> 0) & 0xfff)
+#define TLP_HDR_SIZE			3
+#define TLP_LOOP			500
+
+#define LINK_UP_TIMEOUT			HZ
+#define LINK_RETRAIN_TIMEOUT		HZ
+
+#define DWORD_MASK			3
+
+#define S10_TLP_FMTTYPE_CFGRD0		0x05
+#define S10_TLP_FMTTYPE_CFGRD1		0x04
+#define S10_TLP_FMTTYPE_CFGWR0		0x45
+#define S10_TLP_FMTTYPE_CFGWR1		0x44
+
+enum altera_pcie_version {
+	ALTERA_PCIE_V1 = 0,
+	ALTERA_PCIE_V2,
+};
+
+struct altera_pcie {
+	struct platform_device	*pdev;
+	void __iomem		*cra_base;
+	void __iomem		*hip_base;
+	int			irq;
+	u8			root_bus_nr;
+	struct irq_domain	*irq_domain;
+	struct resource		bus_range;
+	const struct altera_pcie_data	*pcie_data;
+};
+
+struct altera_pcie_ops {
+	int (*tlp_read_pkt)(struct altera_pcie *pcie, u32 *value);
+	void (*tlp_write_pkt)(struct altera_pcie *pcie, u32 *headers,
+			      u32 data, bool align);
+	bool (*get_link_status)(struct altera_pcie *pcie);
+	int (*rp_read_cfg)(struct altera_pcie *pcie, int where,
+			   int size, u32 *value);
+	int (*rp_write_cfg)(struct altera_pcie *pcie, u8 busno,
+			    int where, int size, u32 value);
+};
+
+struct altera_pcie_data {
+	const struct altera_pcie_ops *ops;
+	enum altera_pcie_version version;
+	u32 cap_offset;		/* PCIe capability structure register offset */
+	u32 cfgrd0;
+	u32 cfgrd1;
+	u32 cfgwr0;
+	u32 cfgwr1;
+};
+
+struct tlp_rp_regpair_t {
+	u32 ctrl;
+	u32 reg0;
+	u32 reg1;
+};
+
+static inline void cra_writel(struct altera_pcie *pcie, const u32 value,
+			      const u32 reg)
+{
+	writel_relaxed(value, pcie->cra_base + reg);
+}
+
+static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg)
+{
+	return readl_relaxed(pcie->cra_base + reg);
+}
+
+static bool altera_pcie_link_up(struct altera_pcie *pcie)
+{
+	return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0);
+}
+
+static bool s10_altera_pcie_link_up(struct altera_pcie *pcie)
+{
+	void __iomem *addr = S10_RP_CFG_ADDR(pcie,
+				   pcie->pcie_data->cap_offset +
+				   PCI_EXP_LNKSTA);
+
+	return !!(readw(addr) & PCI_EXP_LNKSTA_DLLLA);
+}
+
+/*
+ * Altera PCIe port uses BAR0 of RC's configuration space as the translation
+ * from PCI bus to native BUS.  Entire DDR region is mapped into PCIe space
+ * using these registers, so it can be reached by DMA from EP devices.
+ * This BAR0 will also access to MSI vector when receiving MSI/MSIX interrupt
+ * from EP devices, eventually trigger interrupt to GIC.  The BAR0 of bridge
+ * should be hidden during enumeration to avoid the sizing and resource
+ * allocation by PCIe core.
+ */
+static bool altera_pcie_hide_rc_bar(struct pci_bus *bus, unsigned int  devfn,
+				    int offset)
+{
+	if (pci_is_root_bus(bus) && (devfn == 0) &&
+	    (offset == PCI_BASE_ADDRESS_0))
+		return true;
+
+	return false;
+}
+
+static void tlp_write_tx(struct altera_pcie *pcie,
+			 struct tlp_rp_regpair_t *tlp_rp_regdata)
+{
+	cra_writel(pcie, tlp_rp_regdata->reg0, RP_TX_REG0);
+	cra_writel(pcie, tlp_rp_regdata->reg1, RP_TX_REG1);
+	cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL);
+}
+
+static void s10_tlp_write_tx(struct altera_pcie *pcie, u32 reg0, u32 ctrl)
+{
+	cra_writel(pcie, reg0, RP_TX_REG0);
+	cra_writel(pcie, ctrl, S10_RP_TX_CNTRL);
+}
+
+static bool altera_pcie_valid_device(struct altera_pcie *pcie,
+				     struct pci_bus *bus, int dev)
+{
+	/* If there is no link, then there is no device */
+	if (bus->number != pcie->root_bus_nr) {
+		if (!pcie->pcie_data->ops->get_link_status(pcie))
+			return false;
+	}
+
+	/* access only one slot on each root port */
+	if (bus->number == pcie->root_bus_nr && dev > 0)
+		return false;
+
+	return true;
+}
+
+static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
+{
+	int i;
+	bool sop = false;
+	u32 ctrl;
+	u32 reg0, reg1;
+	u32 comp_status = 1;
+
+	/*
+	 * Minimum 2 loops to read TLP headers and 1 loop to read data
+	 * payload.
+	 */
+	for (i = 0; i < TLP_LOOP; i++) {
+		ctrl = cra_readl(pcie, RP_RXCPL_STATUS);
+		if ((ctrl & RP_RXCPL_SOP) || (ctrl & RP_RXCPL_EOP) || sop) {
+			reg0 = cra_readl(pcie, RP_RXCPL_REG0);
+			reg1 = cra_readl(pcie, RP_RXCPL_REG1);
+
+			if (ctrl & RP_RXCPL_SOP) {
+				sop = true;
+				comp_status = TLP_COMP_STATUS(reg1);
+			}
+
+			if (ctrl & RP_RXCPL_EOP) {
+				if (comp_status)
+					return PCIBIOS_DEVICE_NOT_FOUND;
+
+				if (value)
+					*value = reg0;
+
+				return PCIBIOS_SUCCESSFUL;
+			}
+		}
+		udelay(5);
+	}
+
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int s10_tlp_read_packet(struct altera_pcie *pcie, u32 *value)
+{
+	u32 ctrl;
+	u32 comp_status;
+	u32 dw[4];
+	u32 count;
+	struct device *dev = &pcie->pdev->dev;
+
+	for (count = 0; count < TLP_LOOP; count++) {
+		ctrl = cra_readl(pcie, S10_RP_RXCPL_STATUS);
+		if (ctrl & RP_RXCPL_SOP) {
+			/* Read first DW */
+			dw[0] = cra_readl(pcie, S10_RP_RXCPL_REG);
+			break;
+		}
+
+		udelay(5);
+	}
+
+	/* SOP detection failed, return error */
+	if (count == TLP_LOOP)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	count = 1;
+
+	/* Poll for EOP */
+	while (count < ARRAY_SIZE(dw)) {
+		ctrl = cra_readl(pcie, S10_RP_RXCPL_STATUS);
+		dw[count++] = cra_readl(pcie, S10_RP_RXCPL_REG);
+		if (ctrl & RP_RXCPL_EOP) {
+			comp_status = TLP_COMP_STATUS(dw[1]);
+			if (comp_status)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+
+			if (value && TLP_BYTE_COUNT(dw[1]) == sizeof(u32) &&
+			    count == 4)
+				*value = dw[3];
+
+			return PCIBIOS_SUCCESSFUL;
+		}
+	}
+
+	dev_warn(dev, "Malformed TLP packet\n");
+
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static void tlp_write_packet(struct altera_pcie *pcie, u32 *headers,
+			     u32 data, bool align)
+{
+	struct tlp_rp_regpair_t tlp_rp_regdata;
+
+	tlp_rp_regdata.reg0 = headers[0];
+	tlp_rp_regdata.reg1 = headers[1];
+	tlp_rp_regdata.ctrl = RP_TX_SOP;
+	tlp_write_tx(pcie, &tlp_rp_regdata);
+
+	if (align) {
+		tlp_rp_regdata.reg0 = headers[2];
+		tlp_rp_regdata.reg1 = 0;
+		tlp_rp_regdata.ctrl = 0;
+		tlp_write_tx(pcie, &tlp_rp_regdata);
+
+		tlp_rp_regdata.reg0 = data;
+		tlp_rp_regdata.reg1 = 0;
+	} else {
+		tlp_rp_regdata.reg0 = headers[2];
+		tlp_rp_regdata.reg1 = data;
+	}
+
+	tlp_rp_regdata.ctrl = RP_TX_EOP;
+	tlp_write_tx(pcie, &tlp_rp_regdata);
+}
+
+static void s10_tlp_write_packet(struct altera_pcie *pcie, u32 *headers,
+				 u32 data, bool dummy)
+{
+	s10_tlp_write_tx(pcie, headers[0], RP_TX_SOP);
+	s10_tlp_write_tx(pcie, headers[1], 0);
+	s10_tlp_write_tx(pcie, headers[2], 0);
+	s10_tlp_write_tx(pcie, data, RP_TX_EOP);
+}
+
+static void get_tlp_header(struct altera_pcie *pcie, u8 bus, u32 devfn,
+			   int where, u8 byte_en, bool read, u32 *headers)
+{
+	u8 cfg;
+	u8 cfg0 = read ? pcie->pcie_data->cfgrd0 : pcie->pcie_data->cfgwr0;
+	u8 cfg1 = read ? pcie->pcie_data->cfgrd1 : pcie->pcie_data->cfgwr1;
+	u8 tag = read ? TLP_READ_TAG : TLP_WRITE_TAG;
+
+	if (pcie->pcie_data->version == ALTERA_PCIE_V1)
+		cfg = (bus == pcie->root_bus_nr) ? cfg0 : cfg1;
+	else
+		cfg = (bus > S10_RP_SECONDARY(pcie)) ? cfg0 : cfg1;
+
+	headers[0] = TLP_CFG_DW0(pcie, cfg);
+	headers[1] = TLP_CFG_DW1(pcie, tag, byte_en);
+	headers[2] = TLP_CFG_DW2(bus, devfn, where);
+}
+
+static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
+			      int where, u8 byte_en, u32 *value)
+{
+	u32 headers[TLP_HDR_SIZE];
+
+	get_tlp_header(pcie, bus, devfn, where, byte_en, true,
+		       headers);
+
+	pcie->pcie_data->ops->tlp_write_pkt(pcie, headers, 0, false);
+
+	return pcie->pcie_data->ops->tlp_read_pkt(pcie, value);
+}
+
+static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
+			       int where, u8 byte_en, u32 value)
+{
+	u32 headers[TLP_HDR_SIZE];
+	int ret;
+
+	get_tlp_header(pcie, bus, devfn, where, byte_en, false,
+		       headers);
+
+	/* check alignment to Qword */
+	if ((where & 0x7) == 0)
+		pcie->pcie_data->ops->tlp_write_pkt(pcie, headers,
+						    value, true);
+	else
+		pcie->pcie_data->ops->tlp_write_pkt(pcie, headers,
+						    value, false);
+
+	ret = pcie->pcie_data->ops->tlp_read_pkt(pcie, NULL);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	/*
+	 * Monitor changes to PCI_PRIMARY_BUS register on root port
+	 * and update local copy of root bus number accordingly.
+	 */
+	if ((bus == pcie->root_bus_nr) && (where == PCI_PRIMARY_BUS))
+		pcie->root_bus_nr = (u8)(value);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int s10_rp_read_cfg(struct altera_pcie *pcie, int where,
+			   int size, u32 *value)
+{
+	void __iomem *addr = S10_RP_CFG_ADDR(pcie, where);
+
+	switch (size) {
+	case 1:
+		*value = readb(addr);
+		break;
+	case 2:
+		*value = readw(addr);
+		break;
+	default:
+		*value = readl(addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int s10_rp_write_cfg(struct altera_pcie *pcie, u8 busno,
+			    int where, int size, u32 value)
+{
+	void __iomem *addr = S10_RP_CFG_ADDR(pcie, where);
+
+	switch (size) {
+	case 1:
+		writeb(value, addr);
+		break;
+	case 2:
+		writew(value, addr);
+		break;
+	default:
+		writel(value, addr);
+		break;
+	}
+
+	/*
+	 * Monitor changes to PCI_PRIMARY_BUS register on root port
+	 * and update local copy of root bus number accordingly.
+	 */
+	if (busno == pcie->root_bus_nr && where == PCI_PRIMARY_BUS)
+		pcie->root_bus_nr = value & 0xff;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int _altera_pcie_cfg_read(struct altera_pcie *pcie, u8 busno,
+				 unsigned int devfn, int where, int size,
+				 u32 *value)
+{
+	int ret;
+	u32 data;
+	u8 byte_en;
+
+	if (busno == pcie->root_bus_nr && pcie->pcie_data->ops->rp_read_cfg)
+		return pcie->pcie_data->ops->rp_read_cfg(pcie, where,
+							 size, value);
+
+	switch (size) {
+	case 1:
+		byte_en = 1 << (where & 3);
+		break;
+	case 2:
+		byte_en = 3 << (where & 3);
+		break;
+	default:
+		byte_en = 0xf;
+		break;
+	}
+
+	ret = tlp_cfg_dword_read(pcie, busno, devfn,
+				 (where & ~DWORD_MASK), byte_en, &data);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	switch (size) {
+	case 1:
+		*value = (data >> (8 * (where & 0x3))) & 0xff;
+		break;
+	case 2:
+		*value = (data >> (8 * (where & 0x2))) & 0xffff;
+		break;
+	default:
+		*value = data;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int _altera_pcie_cfg_write(struct altera_pcie *pcie, u8 busno,
+				  unsigned int devfn, int where, int size,
+				  u32 value)
+{
+	u32 data32;
+	u32 shift = 8 * (where & 3);
+	u8 byte_en;
+
+	if (busno == pcie->root_bus_nr && pcie->pcie_data->ops->rp_write_cfg)
+		return pcie->pcie_data->ops->rp_write_cfg(pcie, busno,
+						     where, size, value);
+
+	switch (size) {
+	case 1:
+		data32 = (value & 0xff) << shift;
+		byte_en = 1 << (where & 3);
+		break;
+	case 2:
+		data32 = (value & 0xffff) << shift;
+		byte_en = 3 << (where & 3);
+		break;
+	default:
+		data32 = value;
+		byte_en = 0xf;
+		break;
+	}
+
+	return tlp_cfg_dword_write(pcie, busno, devfn, (where & ~DWORD_MASK),
+				   byte_en, data32);
+}
+
+static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 *value)
+{
+	struct altera_pcie *pcie = bus->sysdata;
+
+	if (altera_pcie_hide_rc_bar(bus, devfn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return _altera_pcie_cfg_read(pcie, bus->number, devfn, where, size,
+				     value);
+}
+
+static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 value)
+{
+	struct altera_pcie *pcie = bus->sysdata;
+
+	if (altera_pcie_hide_rc_bar(bus, devfn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return _altera_pcie_cfg_write(pcie, bus->number, devfn, where, size,
+				     value);
+}
+
+static struct pci_ops altera_pcie_ops = {
+	.read = altera_pcie_cfg_read,
+	.write = altera_pcie_cfg_write,
+};
+
+static int altera_read_cap_word(struct altera_pcie *pcie, u8 busno,
+				unsigned int devfn, int offset, u16 *value)
+{
+	u32 data;
+	int ret;
+
+	ret = _altera_pcie_cfg_read(pcie, busno, devfn,
+				    pcie->pcie_data->cap_offset + offset,
+				    sizeof(*value),
+				    &data);
+	*value = data;
+	return ret;
+}
+
+static int altera_write_cap_word(struct altera_pcie *pcie, u8 busno,
+				 unsigned int devfn, int offset, u16 value)
+{
+	return _altera_pcie_cfg_write(pcie, busno, devfn,
+				      pcie->pcie_data->cap_offset + offset,
+				      sizeof(value),
+				      value);
+}
+
+static void altera_wait_link_retrain(struct altera_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	u16 reg16;
+	unsigned long start_jiffies;
+
+	/* Wait for link training end. */
+	start_jiffies = jiffies;
+	for (;;) {
+		altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+				     PCI_EXP_LNKSTA, &reg16);
+		if (!(reg16 & PCI_EXP_LNKSTA_LT))
+			break;
+
+		if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) {
+			dev_err(dev, "link retrain timeout\n");
+			break;
+		}
+		udelay(100);
+	}
+
+	/* Wait for link is up */
+	start_jiffies = jiffies;
+	for (;;) {
+		if (pcie->pcie_data->ops->get_link_status(pcie))
+			break;
+
+		if (time_after(jiffies, start_jiffies + LINK_UP_TIMEOUT)) {
+			dev_err(dev, "link up timeout\n");
+			break;
+		}
+		udelay(100);
+	}
+}
+
+static void altera_pcie_retrain(struct altera_pcie *pcie)
+{
+	u16 linkcap, linkstat, linkctl;
+
+	if (!pcie->pcie_data->ops->get_link_status(pcie))
+		return;
+
+	/*
+	 * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but
+	 * current speed is 2.5 GB/s.
+	 */
+	altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKCAP,
+			     &linkcap);
+	if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB)
+		return;
+
+	altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKSTA,
+			     &linkstat);
+	if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) {
+		altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+				     PCI_EXP_LNKCTL, &linkctl);
+		linkctl |= PCI_EXP_LNKCTL_RL;
+		altera_write_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN,
+				      PCI_EXP_LNKCTL, linkctl);
+
+		altera_wait_link_retrain(pcie);
+	}
+}
+
+static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = altera_pcie_intx_map,
+	.xlate = pci_irqd_intx_xlate,
+};
+
+static void altera_pcie_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct altera_pcie *pcie;
+	struct device *dev;
+	unsigned long status;
+	u32 bit;
+	int ret;
+
+	chained_irq_enter(chip, desc);
+	pcie = irq_desc_get_handler_data(desc);
+	dev = &pcie->pdev->dev;
+
+	while ((status = cra_readl(pcie, P2A_INT_STATUS)
+		& P2A_INT_STS_ALL) != 0) {
+		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
+			/* clear interrupts */
+			cra_writel(pcie, 1 << bit, P2A_INT_STATUS);
+
+			ret = generic_handle_domain_irq(pcie->irq_domain, bit);
+			if (ret)
+				dev_err_ratelimited(dev, "unexpected IRQ, INT%d\n", bit);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+	struct device_node *node = dev->of_node;
+
+	/* Setup INTx */
+	pcie->irq_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
+					&intx_domain_ops, pcie);
+	if (!pcie->irq_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void altera_pcie_irq_teardown(struct altera_pcie *pcie)
+{
+	irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
+	irq_domain_remove(pcie->irq_domain);
+	irq_dispose_mapping(pcie->irq);
+}
+
+static int altera_pcie_parse_dt(struct altera_pcie *pcie)
+{
+	struct platform_device *pdev = pcie->pdev;
+
+	pcie->cra_base = devm_platform_ioremap_resource_byname(pdev, "Cra");
+	if (IS_ERR(pcie->cra_base))
+		return PTR_ERR(pcie->cra_base);
+
+	if (pcie->pcie_data->version == ALTERA_PCIE_V2) {
+		pcie->hip_base =
+			devm_platform_ioremap_resource_byname(pdev, "Hip");
+		if (IS_ERR(pcie->hip_base))
+			return PTR_ERR(pcie->hip_base);
+	}
+
+	/* setup IRQ */
+	pcie->irq = platform_get_irq(pdev, 0);
+	if (pcie->irq < 0)
+		return pcie->irq;
+
+	irq_set_chained_handler_and_data(pcie->irq, altera_pcie_isr, pcie);
+	return 0;
+}
+
+static void altera_pcie_host_init(struct altera_pcie *pcie)
+{
+	altera_pcie_retrain(pcie);
+}
+
+static const struct altera_pcie_ops altera_pcie_ops_1_0 = {
+	.tlp_read_pkt = tlp_read_packet,
+	.tlp_write_pkt = tlp_write_packet,
+	.get_link_status = altera_pcie_link_up,
+};
+
+static const struct altera_pcie_ops altera_pcie_ops_2_0 = {
+	.tlp_read_pkt = s10_tlp_read_packet,
+	.tlp_write_pkt = s10_tlp_write_packet,
+	.get_link_status = s10_altera_pcie_link_up,
+	.rp_read_cfg = s10_rp_read_cfg,
+	.rp_write_cfg = s10_rp_write_cfg,
+};
+
+static const struct altera_pcie_data altera_pcie_1_0_data = {
+	.ops = &altera_pcie_ops_1_0,
+	.cap_offset = 0x80,
+	.version = ALTERA_PCIE_V1,
+	.cfgrd0 = TLP_FMTTYPE_CFGRD0,
+	.cfgrd1 = TLP_FMTTYPE_CFGRD1,
+	.cfgwr0 = TLP_FMTTYPE_CFGWR0,
+	.cfgwr1 = TLP_FMTTYPE_CFGWR1,
+};
+
+static const struct altera_pcie_data altera_pcie_2_0_data = {
+	.ops = &altera_pcie_ops_2_0,
+	.version = ALTERA_PCIE_V2,
+	.cap_offset = 0x70,
+	.cfgrd0 = S10_TLP_FMTTYPE_CFGRD0,
+	.cfgrd1 = S10_TLP_FMTTYPE_CFGRD1,
+	.cfgwr0 = S10_TLP_FMTTYPE_CFGWR0,
+	.cfgwr1 = S10_TLP_FMTTYPE_CFGWR1,
+};
+
+static const struct of_device_id altera_pcie_of_match[] = {
+	{.compatible = "altr,pcie-root-port-1.0",
+	 .data = &altera_pcie_1_0_data },
+	{.compatible = "altr,pcie-root-port-2.0",
+	 .data = &altera_pcie_2_0_data },
+	{},
+};
+
+static int altera_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct altera_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	int ret;
+	const struct altera_pcie_data *data;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->pdev = pdev;
+	platform_set_drvdata(pdev, pcie);
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data)
+		return -ENODEV;
+
+	pcie->pcie_data = data;
+
+	ret = altera_pcie_parse_dt(pcie);
+	if (ret) {
+		dev_err(dev, "Parsing DT failed\n");
+		return ret;
+	}
+
+	ret = altera_pcie_init_irq_domain(pcie);
+	if (ret) {
+		dev_err(dev, "Failed creating IRQ Domain\n");
+		return ret;
+	}
+
+	/* clear all interrupts */
+	cra_writel(pcie, P2A_INT_STS_ALL, P2A_INT_STATUS);
+	/* enable all interrupts */
+	cra_writel(pcie, P2A_INT_ENA_ALL, P2A_INT_ENABLE);
+	altera_pcie_host_init(pcie);
+
+	bridge->sysdata = pcie;
+	bridge->busnr = pcie->root_bus_nr;
+	bridge->ops = &altera_pcie_ops;
+
+	return pci_host_probe(bridge);
+}
+
+static void altera_pcie_remove(struct platform_device *pdev)
+{
+	struct altera_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	altera_pcie_irq_teardown(pcie);
+}
+
+static struct platform_driver altera_pcie_driver = {
+	.probe		= altera_pcie_probe,
+	.remove_new	= altera_pcie_remove,
+	.driver = {
+		.name	= "altera-pcie",
+		.of_match_table = altera_pcie_of_match,
+	},
+};
+
+MODULE_DEVICE_TABLE(of, altera_pcie_of_match);
+module_platform_driver(altera_pcie_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
new file mode 100644
index 000000000..f7a248393
--- /dev/null
+++ b/drivers/pci/controller/pcie-apple.c
@@ -0,0 +1,842 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host bridge driver for Apple system-on-chips.
+ *
+ * The HW is ECAM compliant, so once the controller is initialized,
+ * the driver mostly deals MSI mapping and handling of per-port
+ * interrupts (INTx, management and error signals).
+ *
+ * Initialization requires enabling power and clocks, along with a
+ * number of register pokes.
+ *
+ * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2021 Corellium LLC
+ * Copyright (C) 2021 Mark Kettenis <kettenis@openbsd.org>
+ *
+ * Author: Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/iopoll.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/notifier.h>
+#include <linux/of_irq.h>
+#include <linux/pci-ecam.h>
+
+#define CORE_RC_PHYIF_CTL		0x00024
+#define   CORE_RC_PHYIF_CTL_RUN		BIT(0)
+#define CORE_RC_PHYIF_STAT		0x00028
+#define   CORE_RC_PHYIF_STAT_REFCLK	BIT(4)
+#define CORE_RC_CTL			0x00050
+#define   CORE_RC_CTL_RUN		BIT(0)
+#define CORE_RC_STAT			0x00058
+#define   CORE_RC_STAT_READY		BIT(0)
+#define CORE_FABRIC_STAT		0x04000
+#define   CORE_FABRIC_STAT_MASK		0x001F001F
+#define CORE_LANE_CFG(port)		(0x84000 + 0x4000 * (port))
+#define   CORE_LANE_CFG_REFCLK0REQ	BIT(0)
+#define   CORE_LANE_CFG_REFCLK1REQ	BIT(1)
+#define   CORE_LANE_CFG_REFCLK0ACK	BIT(2)
+#define   CORE_LANE_CFG_REFCLK1ACK	BIT(3)
+#define   CORE_LANE_CFG_REFCLKEN	(BIT(9) | BIT(10))
+#define CORE_LANE_CTL(port)		(0x84004 + 0x4000 * (port))
+#define   CORE_LANE_CTL_CFGACC		BIT(15)
+
+#define PORT_LTSSMCTL			0x00080
+#define   PORT_LTSSMCTL_START		BIT(0)
+#define PORT_INTSTAT			0x00100
+#define   PORT_INT_TUNNEL_ERR		31
+#define   PORT_INT_CPL_TIMEOUT		23
+#define   PORT_INT_RID2SID_MAPERR	22
+#define   PORT_INT_CPL_ABORT		21
+#define   PORT_INT_MSI_BAD_DATA		19
+#define   PORT_INT_MSI_ERR		18
+#define   PORT_INT_REQADDR_GT32		17
+#define   PORT_INT_AF_TIMEOUT		15
+#define   PORT_INT_LINK_DOWN		14
+#define   PORT_INT_LINK_UP		12
+#define   PORT_INT_LINK_BWMGMT		11
+#define   PORT_INT_AER_MASK		(15 << 4)
+#define   PORT_INT_PORT_ERR		4
+#define   PORT_INT_INTx(i)		i
+#define   PORT_INT_INTx_MASK		15
+#define PORT_INTMSK			0x00104
+#define PORT_INTMSKSET			0x00108
+#define PORT_INTMSKCLR			0x0010c
+#define PORT_MSICFG			0x00124
+#define   PORT_MSICFG_EN		BIT(0)
+#define   PORT_MSICFG_L2MSINUM_SHIFT	4
+#define PORT_MSIBASE			0x00128
+#define   PORT_MSIBASE_1_SHIFT		16
+#define PORT_MSIADDR			0x00168
+#define PORT_LINKSTS			0x00208
+#define   PORT_LINKSTS_UP		BIT(0)
+#define   PORT_LINKSTS_BUSY		BIT(2)
+#define PORT_LINKCMDSTS			0x00210
+#define PORT_OUTS_NPREQS		0x00284
+#define   PORT_OUTS_NPREQS_REQ		BIT(24)
+#define   PORT_OUTS_NPREQS_CPL		BIT(16)
+#define PORT_RXWR_FIFO			0x00288
+#define   PORT_RXWR_FIFO_HDR		GENMASK(15, 10)
+#define   PORT_RXWR_FIFO_DATA		GENMASK(9, 0)
+#define PORT_RXRD_FIFO			0x0028C
+#define   PORT_RXRD_FIFO_REQ		GENMASK(6, 0)
+#define PORT_OUTS_CPLS			0x00290
+#define   PORT_OUTS_CPLS_SHRD		GENMASK(14, 8)
+#define   PORT_OUTS_CPLS_WAIT		GENMASK(6, 0)
+#define PORT_APPCLK			0x00800
+#define   PORT_APPCLK_EN		BIT(0)
+#define   PORT_APPCLK_CGDIS		BIT(8)
+#define PORT_STATUS			0x00804
+#define   PORT_STATUS_READY		BIT(0)
+#define PORT_REFCLK			0x00810
+#define   PORT_REFCLK_EN		BIT(0)
+#define   PORT_REFCLK_CGDIS		BIT(8)
+#define PORT_PERST			0x00814
+#define   PORT_PERST_OFF		BIT(0)
+#define PORT_RID2SID(i16)		(0x00828 + 4 * (i16))
+#define   PORT_RID2SID_VALID		BIT(31)
+#define   PORT_RID2SID_SID_SHIFT	16
+#define   PORT_RID2SID_BUS_SHIFT	8
+#define   PORT_RID2SID_DEV_SHIFT	3
+#define   PORT_RID2SID_FUNC_SHIFT	0
+#define PORT_OUTS_PREQS_HDR		0x00980
+#define   PORT_OUTS_PREQS_HDR_MASK	GENMASK(9, 0)
+#define PORT_OUTS_PREQS_DATA		0x00984
+#define   PORT_OUTS_PREQS_DATA_MASK	GENMASK(15, 0)
+#define PORT_TUNCTRL			0x00988
+#define   PORT_TUNCTRL_PERST_ON		BIT(0)
+#define   PORT_TUNCTRL_PERST_ACK_REQ	BIT(1)
+#define PORT_TUNSTAT			0x0098c
+#define   PORT_TUNSTAT_PERST_ON		BIT(0)
+#define   PORT_TUNSTAT_PERST_ACK_PEND	BIT(1)
+#define PORT_PREFMEM_ENABLE		0x00994
+
+#define MAX_RID2SID			64
+
+/*
+ * The doorbell address is set to 0xfffff000, which by convention
+ * matches what MacOS does, and it is possible to use any other
+ * address (in the bottom 4GB, as the base register is only 32bit).
+ * However, it has to be excluded from the IOVA range, and the DART
+ * driver has to know about it.
+ */
+#define DOORBELL_ADDR		CONFIG_PCIE_APPLE_MSI_DOORBELL_ADDR
+
+struct apple_pcie {
+	struct mutex		lock;
+	struct device		*dev;
+	void __iomem            *base;
+	struct irq_domain	*domain;
+	unsigned long		*bitmap;
+	struct list_head	ports;
+	struct completion	event;
+	struct irq_fwspec	fwspec;
+	u32			nvecs;
+};
+
+struct apple_pcie_port {
+	struct apple_pcie	*pcie;
+	struct device_node	*np;
+	void __iomem		*base;
+	struct irq_domain	*domain;
+	struct list_head	entry;
+	DECLARE_BITMAP(sid_map, MAX_RID2SID);
+	int			sid_map_sz;
+	int			idx;
+};
+
+static void rmw_set(u32 set, void __iomem *addr)
+{
+	writel_relaxed(readl_relaxed(addr) | set, addr);
+}
+
+static void rmw_clear(u32 clr, void __iomem *addr)
+{
+	writel_relaxed(readl_relaxed(addr) & ~clr, addr);
+}
+
+static void apple_msi_top_irq_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void apple_msi_top_irq_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip apple_msi_top_chip = {
+	.name			= "PCIe MSI",
+	.irq_mask		= apple_msi_top_irq_mask,
+	.irq_unmask		= apple_msi_top_irq_unmask,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+};
+
+static void apple_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	msg->address_hi = upper_32_bits(DOORBELL_ADDR);
+	msg->address_lo = lower_32_bits(DOORBELL_ADDR);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip apple_msi_bottom_chip = {
+	.name			= "MSI",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_compose_msi_msg	= apple_msi_compose_msg,
+};
+
+static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct apple_pcie *pcie = domain->host_data;
+	struct irq_fwspec fwspec = pcie->fwspec;
+	unsigned int i;
+	int ret, hwirq;
+
+	mutex_lock(&pcie->lock);
+
+	hwirq = bitmap_find_free_region(pcie->bitmap, pcie->nvecs,
+					order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	fwspec.param[fwspec.param_count - 2] += hwirq;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &apple_msi_bottom_chip,
+					      domain->host_data);
+	}
+
+	return 0;
+}
+
+static void apple_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct apple_pcie *pcie = domain->host_data;
+
+	mutex_lock(&pcie->lock);
+
+	bitmap_release_region(pcie->bitmap, d->hwirq, order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->lock);
+}
+
+static const struct irq_domain_ops apple_msi_domain_ops = {
+	.alloc	= apple_msi_domain_alloc,
+	.free	= apple_msi_domain_free,
+};
+
+static struct msi_domain_info apple_msi_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+	.chip	= &apple_msi_top_chip,
+};
+
+static void apple_port_irq_mask(struct irq_data *data)
+{
+	struct apple_pcie_port *port = irq_data_get_irq_chip_data(data);
+
+	writel_relaxed(BIT(data->hwirq), port->base + PORT_INTMSKSET);
+}
+
+static void apple_port_irq_unmask(struct irq_data *data)
+{
+	struct apple_pcie_port *port = irq_data_get_irq_chip_data(data);
+
+	writel_relaxed(BIT(data->hwirq), port->base + PORT_INTMSKCLR);
+}
+
+static bool hwirq_is_intx(unsigned int hwirq)
+{
+	return BIT(hwirq) & PORT_INT_INTx_MASK;
+}
+
+static void apple_port_irq_ack(struct irq_data *data)
+{
+	struct apple_pcie_port *port = irq_data_get_irq_chip_data(data);
+
+	if (!hwirq_is_intx(data->hwirq))
+		writel_relaxed(BIT(data->hwirq), port->base + PORT_INTSTAT);
+}
+
+static int apple_port_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	/*
+	 * It doesn't seem that there is any way to configure the
+	 * trigger, so assume INTx have to be level (as per the spec),
+	 * and the rest is edge (which looks likely).
+	 */
+	if (hwirq_is_intx(data->hwirq) ^ !!(type & IRQ_TYPE_LEVEL_MASK))
+		return -EINVAL;
+
+	irqd_set_trigger_type(data, type);
+	return 0;
+}
+
+static struct irq_chip apple_port_irqchip = {
+	.name		= "PCIe",
+	.irq_ack	= apple_port_irq_ack,
+	.irq_mask	= apple_port_irq_mask,
+	.irq_unmask	= apple_port_irq_unmask,
+	.irq_set_type	= apple_port_irq_set_type,
+};
+
+static int apple_port_irq_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs,
+				       void *args)
+{
+	struct apple_pcie_port *port = domain->host_data;
+	struct irq_fwspec *fwspec = args;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_flow_handler_t flow = handle_edge_irq;
+		unsigned int type = IRQ_TYPE_EDGE_RISING;
+
+		if (hwirq_is_intx(fwspec->param[0] + i)) {
+			flow = handle_level_irq;
+			type = IRQ_TYPE_LEVEL_HIGH;
+		}
+
+		irq_domain_set_info(domain, virq + i, fwspec->param[0] + i,
+				    &apple_port_irqchip, port, flow,
+				    NULL, NULL);
+
+		irq_set_irq_type(virq + i, type);
+	}
+
+	return 0;
+}
+
+static void apple_port_irq_domain_free(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops apple_port_irq_domain_ops = {
+	.translate	= irq_domain_translate_onecell,
+	.alloc		= apple_port_irq_domain_alloc,
+	.free		= apple_port_irq_domain_free,
+};
+
+static void apple_port_irq_handler(struct irq_desc *desc)
+{
+	struct apple_pcie_port *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long stat;
+	int i;
+
+	chained_irq_enter(chip, desc);
+
+	stat = readl_relaxed(port->base + PORT_INTSTAT);
+
+	for_each_set_bit(i, &stat, 32)
+		generic_handle_domain_irq(port->domain, i);
+
+	chained_irq_exit(chip, desc);
+}
+
+static int apple_pcie_port_setup_irq(struct apple_pcie_port *port)
+{
+	struct fwnode_handle *fwnode = &port->np->fwnode;
+	unsigned int irq;
+
+	/* FIXME: consider moving each interrupt under each port */
+	irq = irq_of_parse_and_map(to_of_node(dev_fwnode(port->pcie->dev)),
+				   port->idx);
+	if (!irq)
+		return -ENXIO;
+
+	port->domain = irq_domain_create_linear(fwnode, 32,
+						&apple_port_irq_domain_ops,
+						port);
+	if (!port->domain)
+		return -ENOMEM;
+
+	/* Disable all interrupts */
+	writel_relaxed(~0, port->base + PORT_INTMSKSET);
+	writel_relaxed(~0, port->base + PORT_INTSTAT);
+
+	irq_set_chained_handler_and_data(irq, apple_port_irq_handler, port);
+
+	/* Configure MSI base address */
+	BUILD_BUG_ON(upper_32_bits(DOORBELL_ADDR));
+	writel_relaxed(lower_32_bits(DOORBELL_ADDR), port->base + PORT_MSIADDR);
+
+	/* Enable MSIs, shared between all ports */
+	writel_relaxed(0, port->base + PORT_MSIBASE);
+	writel_relaxed((ilog2(port->pcie->nvecs) << PORT_MSICFG_L2MSINUM_SHIFT) |
+		       PORT_MSICFG_EN, port->base + PORT_MSICFG);
+
+	return 0;
+}
+
+static irqreturn_t apple_pcie_port_irq(int irq, void *data)
+{
+	struct apple_pcie_port *port = data;
+	unsigned int hwirq = irq_domain_get_irq_data(port->domain, irq)->hwirq;
+
+	switch (hwirq) {
+	case PORT_INT_LINK_UP:
+		dev_info_ratelimited(port->pcie->dev, "Link up on %pOF\n",
+				     port->np);
+		complete_all(&port->pcie->event);
+		break;
+	case PORT_INT_LINK_DOWN:
+		dev_info_ratelimited(port->pcie->dev, "Link down on %pOF\n",
+				     port->np);
+		break;
+	default:
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int apple_pcie_port_register_irqs(struct apple_pcie_port *port)
+{
+	static struct {
+		unsigned int	hwirq;
+		const char	*name;
+	} port_irqs[] = {
+		{ PORT_INT_LINK_UP,	"Link up",	},
+		{ PORT_INT_LINK_DOWN,	"Link down",	},
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(port_irqs); i++) {
+		struct irq_fwspec fwspec = {
+			.fwnode		= &port->np->fwnode,
+			.param_count	= 1,
+			.param		= {
+				[0]	= port_irqs[i].hwirq,
+			},
+		};
+		unsigned int irq;
+		int ret;
+
+		irq = irq_domain_alloc_irqs(port->domain, 1, NUMA_NO_NODE,
+					    &fwspec);
+		if (WARN_ON(!irq))
+			continue;
+
+		ret = request_irq(irq, apple_pcie_port_irq, 0,
+				  port_irqs[i].name, port);
+		WARN_ON(ret);
+	}
+
+	return 0;
+}
+
+static int apple_pcie_setup_refclk(struct apple_pcie *pcie,
+				   struct apple_pcie_port *port)
+{
+	u32 stat;
+	int res;
+
+	res = readl_relaxed_poll_timeout(pcie->base + CORE_RC_PHYIF_STAT, stat,
+					 stat & CORE_RC_PHYIF_STAT_REFCLK,
+					 100, 50000);
+	if (res < 0)
+		return res;
+
+	rmw_set(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+	rmw_set(CORE_LANE_CFG_REFCLK0REQ, pcie->base + CORE_LANE_CFG(port->idx));
+
+	res = readl_relaxed_poll_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+					 stat, stat & CORE_LANE_CFG_REFCLK0ACK,
+					 100, 50000);
+	if (res < 0)
+		return res;
+
+	rmw_set(CORE_LANE_CFG_REFCLK1REQ, pcie->base + CORE_LANE_CFG(port->idx));
+	res = readl_relaxed_poll_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+					 stat, stat & CORE_LANE_CFG_REFCLK1ACK,
+					 100, 50000);
+
+	if (res < 0)
+		return res;
+
+	rmw_clear(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+
+	rmw_set(CORE_LANE_CFG_REFCLKEN, pcie->base + CORE_LANE_CFG(port->idx));
+	rmw_set(PORT_REFCLK_EN, port->base + PORT_REFCLK);
+
+	return 0;
+}
+
+static u32 apple_pcie_rid2sid_write(struct apple_pcie_port *port,
+				    int idx, u32 val)
+{
+	writel_relaxed(val, port->base + PORT_RID2SID(idx));
+	/* Read back to ensure completion of the write */
+	return readl_relaxed(port->base + PORT_RID2SID(idx));
+}
+
+static int apple_pcie_setup_port(struct apple_pcie *pcie,
+				 struct device_node *np)
+{
+	struct platform_device *platform = to_platform_device(pcie->dev);
+	struct apple_pcie_port *port;
+	struct gpio_desc *reset;
+	u32 stat, idx;
+	int ret, i;
+
+	reset = devm_fwnode_gpiod_get(pcie->dev, of_fwnode_handle(np), "reset",
+				      GPIOD_OUT_LOW, "PERST#");
+	if (IS_ERR(reset))
+		return PTR_ERR(reset);
+
+	port = devm_kzalloc(pcie->dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_index(np, "reg", 0, &idx);
+	if (ret)
+		return ret;
+
+	/* Use the first reg entry to work out the port index */
+	port->idx = idx >> 11;
+	port->pcie = pcie;
+	port->np = np;
+
+	port->base = devm_platform_ioremap_resource(platform, port->idx + 2);
+	if (IS_ERR(port->base))
+		return PTR_ERR(port->base);
+
+	rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK);
+
+	/* Assert PERST# before setting up the clock */
+	gpiod_set_value(reset, 1);
+
+	ret = apple_pcie_setup_refclk(pcie, port);
+	if (ret < 0)
+		return ret;
+
+	/* The minimal Tperst-clk value is 100us (PCIe CEM r5.0, 2.9.2) */
+	usleep_range(100, 200);
+
+	/* Deassert PERST# */
+	rmw_set(PORT_PERST_OFF, port->base + PORT_PERST);
+	gpiod_set_value(reset, 0);
+
+	/* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */
+	msleep(100);
+
+	ret = readl_relaxed_poll_timeout(port->base + PORT_STATUS, stat,
+					 stat & PORT_STATUS_READY, 100, 250000);
+	if (ret < 0) {
+		dev_err(pcie->dev, "port %pOF ready wait timeout\n", np);
+		return ret;
+	}
+
+	rmw_clear(PORT_REFCLK_CGDIS, port->base + PORT_REFCLK);
+	rmw_clear(PORT_APPCLK_CGDIS, port->base + PORT_APPCLK);
+
+	ret = apple_pcie_port_setup_irq(port);
+	if (ret)
+		return ret;
+
+	/* Reset all RID/SID mappings, and check for RAZ/WI registers */
+	for (i = 0; i < MAX_RID2SID; i++) {
+		if (apple_pcie_rid2sid_write(port, i, 0xbad1d) != 0xbad1d)
+			break;
+		apple_pcie_rid2sid_write(port, i, 0);
+	}
+
+	dev_dbg(pcie->dev, "%pOF: %d RID/SID mapping entries\n", np, i);
+
+	port->sid_map_sz = i;
+
+	list_add_tail(&port->entry, &pcie->ports);
+	init_completion(&pcie->event);
+
+	ret = apple_pcie_port_register_irqs(port);
+	WARN_ON(ret);
+
+	writel_relaxed(PORT_LTSSMCTL_START, port->base + PORT_LTSSMCTL);
+
+	if (!wait_for_completion_timeout(&pcie->event, HZ / 10))
+		dev_warn(pcie->dev, "%pOF link didn't come up\n", np);
+
+	return 0;
+}
+
+static int apple_msi_init(struct apple_pcie *pcie)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+	struct of_phandle_args args = {};
+	struct irq_domain *parent;
+	int ret;
+
+	ret = of_parse_phandle_with_args(to_of_node(fwnode), "msi-ranges",
+					 "#interrupt-cells", 0, &args);
+	if (ret)
+		return ret;
+
+	ret = of_property_read_u32_index(to_of_node(fwnode), "msi-ranges",
+					 args.args_count + 1, &pcie->nvecs);
+	if (ret)
+		return ret;
+
+	of_phandle_args_to_fwspec(args.np, args.args, args.args_count,
+				  &pcie->fwspec);
+
+	pcie->bitmap = devm_bitmap_zalloc(pcie->dev, pcie->nvecs, GFP_KERNEL);
+	if (!pcie->bitmap)
+		return -ENOMEM;
+
+	parent = irq_find_matching_fwspec(&pcie->fwspec, DOMAIN_BUS_WIRED);
+	if (!parent) {
+		dev_err(pcie->dev, "failed to find parent domain\n");
+		return -ENXIO;
+	}
+
+	parent = irq_domain_create_hierarchy(parent, 0, pcie->nvecs, fwnode,
+					     &apple_msi_domain_ops, pcie);
+	if (!parent) {
+		dev_err(pcie->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+	irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+	pcie->domain = pci_msi_create_irq_domain(fwnode, &apple_msi_info,
+						 parent);
+	if (!pcie->domain) {
+		dev_err(pcie->dev, "failed to create MSI domain\n");
+		irq_domain_remove(parent);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev)
+{
+	struct pci_config_window *cfg = pdev->sysdata;
+	struct apple_pcie *pcie = cfg->priv;
+	struct pci_dev *port_pdev;
+	struct apple_pcie_port *port;
+
+	/* Find the root port this device is on */
+	port_pdev = pcie_find_root_port(pdev);
+
+	/* If finding the port itself, nothing to do */
+	if (WARN_ON(!port_pdev) || pdev == port_pdev)
+		return NULL;
+
+	list_for_each_entry(port, &pcie->ports, entry) {
+		if (port->idx == PCI_SLOT(port_pdev->devfn))
+			return port;
+	}
+
+	return NULL;
+}
+
+static int apple_pcie_add_device(struct apple_pcie_port *port,
+				 struct pci_dev *pdev)
+{
+	u32 sid, rid = pci_dev_id(pdev);
+	int idx, err;
+
+	dev_dbg(&pdev->dev, "added to bus %s, index %d\n",
+		pci_name(pdev->bus->self), port->idx);
+
+	err = of_map_id(port->pcie->dev->of_node, rid, "iommu-map",
+			"iommu-map-mask", NULL, &sid);
+	if (err)
+		return err;
+
+	mutex_lock(&port->pcie->lock);
+
+	idx = bitmap_find_free_region(port->sid_map, port->sid_map_sz, 0);
+	if (idx >= 0) {
+		apple_pcie_rid2sid_write(port, idx,
+					 PORT_RID2SID_VALID |
+					 (sid << PORT_RID2SID_SID_SHIFT) | rid);
+
+		dev_dbg(&pdev->dev, "mapping RID%x to SID%x (index %d)\n",
+			rid, sid, idx);
+	}
+
+	mutex_unlock(&port->pcie->lock);
+
+	return idx >= 0 ? 0 : -ENOSPC;
+}
+
+static void apple_pcie_release_device(struct apple_pcie_port *port,
+				      struct pci_dev *pdev)
+{
+	u32 rid = pci_dev_id(pdev);
+	int idx;
+
+	mutex_lock(&port->pcie->lock);
+
+	for_each_set_bit(idx, port->sid_map, port->sid_map_sz) {
+		u32 val;
+
+		val = readl_relaxed(port->base + PORT_RID2SID(idx));
+		if ((val & 0xffff) == rid) {
+			apple_pcie_rid2sid_write(port, idx, 0);
+			bitmap_release_region(port->sid_map, idx, 0);
+			dev_dbg(&pdev->dev, "Released %x (%d)\n", val, idx);
+			break;
+		}
+	}
+
+	mutex_unlock(&port->pcie->lock);
+}
+
+static int apple_pcie_bus_notifier(struct notifier_block *nb,
+				   unsigned long action,
+				   void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct apple_pcie_port *port;
+	int err;
+
+	/*
+	 * This is a bit ugly. We assume that if we get notified for
+	 * any PCI device, we must be in charge of it, and that there
+	 * is no other PCI controller in the whole system. It probably
+	 * holds for now, but who knows for how long?
+	 */
+	port = apple_pcie_get_port(pdev);
+	if (!port)
+		return NOTIFY_DONE;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		err = apple_pcie_add_device(port, pdev);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		apple_pcie_release_device(port, pdev);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block apple_pcie_nb = {
+	.notifier_call = apple_pcie_bus_notifier,
+};
+
+static int apple_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct platform_device *platform = to_platform_device(dev);
+	struct device_node *of_port;
+	struct apple_pcie *pcie;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->dev = dev;
+
+	mutex_init(&pcie->lock);
+
+	pcie->base = devm_platform_ioremap_resource(platform, 1);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	cfg->priv = pcie;
+	INIT_LIST_HEAD(&pcie->ports);
+
+	ret = apple_msi_init(pcie);
+	if (ret)
+		return ret;
+
+	for_each_child_of_node(dev->of_node, of_port) {
+		ret = apple_pcie_setup_port(pcie, of_port);
+		if (ret) {
+			dev_err(pcie->dev, "Port %pOF setup fail: %d\n", of_port, ret);
+			of_node_put(of_port);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int apple_pcie_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = bus_register_notifier(&pci_bus_type, &apple_pcie_nb);
+	if (ret)
+		return ret;
+
+	ret = pci_host_common_probe(pdev);
+	if (ret)
+		bus_unregister_notifier(&pci_bus_type, &apple_pcie_nb);
+
+	return ret;
+}
+
+static const struct pci_ecam_ops apple_pcie_cfg_ecam_ops = {
+	.init		= apple_pcie_init,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
+
+static const struct of_device_id apple_pcie_of_match[] = {
+	{ .compatible = "apple,pcie", .data = &apple_pcie_cfg_ecam_ops },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, apple_pcie_of_match);
+
+static struct platform_driver apple_pcie_driver = {
+	.probe	= apple_pcie_probe,
+	.driver	= {
+		.name			= "pcie-apple",
+		.of_match_table		= apple_pcie_of_match,
+		.suppress_bind_attrs	= true,
+	},
+};
+module_platform_driver(apple_pcie_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
new file mode 100644
index 000000000..f9dd6622f
--- /dev/null
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -0,0 +1,1628 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2009 - 2019 Broadcom */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/printk.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "../pci.h"
+
+/* BRCM_PCIE_CAP_REGS - Offset for the mandatory capability config regs */
+#define BRCM_PCIE_CAP_REGS				0x00ac
+
+/* Broadcom STB PCIe Register Offsets */
+#define PCIE_RC_CFG_VENDOR_VENDOR_SPECIFIC_REG1				0x0188
+#define  PCIE_RC_CFG_VENDOR_VENDOR_SPECIFIC_REG1_ENDIAN_MODE_BAR2_MASK	0xc
+#define  PCIE_RC_CFG_VENDOR_SPCIFIC_REG1_LITTLE_ENDIAN			0x0
+
+#define PCIE_RC_CFG_PRIV1_ID_VAL3			0x043c
+#define  PCIE_RC_CFG_PRIV1_ID_VAL3_CLASS_CODE_MASK	0xffffff
+
+#define PCIE_RC_CFG_PRIV1_LINK_CAPABILITY			0x04dc
+#define  PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK	0xc00
+
+#define PCIE_RC_DL_MDIO_ADDR				0x1100
+#define PCIE_RC_DL_MDIO_WR_DATA				0x1104
+#define PCIE_RC_DL_MDIO_RD_DATA				0x1108
+
+#define PCIE_MISC_MISC_CTRL				0x4008
+#define  PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK	0x80
+#define  PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK	0x400
+#define  PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK		0x1000
+#define  PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK	0x2000
+#define  PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK	0x300000
+
+#define  PCIE_MISC_MISC_CTRL_SCB0_SIZE_MASK		0xf8000000
+#define  PCIE_MISC_MISC_CTRL_SCB1_SIZE_MASK		0x07c00000
+#define  PCIE_MISC_MISC_CTRL_SCB2_SIZE_MASK		0x0000001f
+#define  SCB_SIZE_MASK(x) PCIE_MISC_MISC_CTRL_SCB ## x ## _SIZE_MASK
+
+#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LO		0x400c
+#define PCIE_MEM_WIN0_LO(win)	\
+		PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LO + ((win) * 8)
+
+#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_HI		0x4010
+#define PCIE_MEM_WIN0_HI(win)	\
+		PCIE_MISC_CPU_2_PCIE_MEM_WIN0_HI + ((win) * 8)
+
+#define PCIE_MISC_RC_BAR1_CONFIG_LO			0x402c
+#define  PCIE_MISC_RC_BAR1_CONFIG_LO_SIZE_MASK		0x1f
+
+#define PCIE_MISC_RC_BAR2_CONFIG_LO			0x4034
+#define  PCIE_MISC_RC_BAR2_CONFIG_LO_SIZE_MASK		0x1f
+#define PCIE_MISC_RC_BAR2_CONFIG_HI			0x4038
+
+#define PCIE_MISC_RC_BAR3_CONFIG_LO			0x403c
+#define  PCIE_MISC_RC_BAR3_CONFIG_LO_SIZE_MASK		0x1f
+
+#define PCIE_MISC_MSI_BAR_CONFIG_LO			0x4044
+#define PCIE_MISC_MSI_BAR_CONFIG_HI			0x4048
+
+#define PCIE_MISC_MSI_DATA_CONFIG			0x404c
+#define  PCIE_MISC_MSI_DATA_CONFIG_VAL_32		0xffe06540
+#define  PCIE_MISC_MSI_DATA_CONFIG_VAL_8		0xfff86540
+
+#define PCIE_MISC_PCIE_CTRL				0x4064
+#define  PCIE_MISC_PCIE_CTRL_PCIE_L23_REQUEST_MASK	0x1
+#define PCIE_MISC_PCIE_CTRL_PCIE_PERSTB_MASK		0x4
+
+#define PCIE_MISC_PCIE_STATUS				0x4068
+#define  PCIE_MISC_PCIE_STATUS_PCIE_PORT_MASK		0x80
+#define  PCIE_MISC_PCIE_STATUS_PCIE_DL_ACTIVE_MASK	0x20
+#define  PCIE_MISC_PCIE_STATUS_PCIE_PHYLINKUP_MASK	0x10
+#define  PCIE_MISC_PCIE_STATUS_PCIE_LINK_IN_L23_MASK	0x40
+
+#define PCIE_MISC_REVISION				0x406c
+#define  BRCM_PCIE_HW_REV_33				0x0303
+#define  BRCM_PCIE_HW_REV_3_20				0x0320
+
+#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT		0x4070
+#define  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_LIMIT_MASK	0xfff00000
+#define  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_BASE_MASK	0xfff0
+#define PCIE_MEM_WIN0_BASE_LIMIT(win)	\
+		PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT + ((win) * 4)
+
+#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_HI			0x4080
+#define  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_HI_BASE_MASK	0xff
+#define PCIE_MEM_WIN0_BASE_HI(win)	\
+		PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_HI + ((win) * 8)
+
+#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LIMIT_HI			0x4084
+#define  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LIMIT_HI_LIMIT_MASK	0xff
+#define PCIE_MEM_WIN0_LIMIT_HI(win)	\
+		PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LIMIT_HI + ((win) * 8)
+
+#define PCIE_MISC_HARD_PCIE_HARD_DEBUG					0x4204
+#define  PCIE_MISC_HARD_PCIE_HARD_DEBUG_CLKREQ_DEBUG_ENABLE_MASK	0x2
+#define  PCIE_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK		0x08000000
+#define  PCIE_BMIPS_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK		0x00800000
+
+
+#define PCIE_INTR2_CPU_BASE		0x4300
+#define PCIE_MSI_INTR2_BASE		0x4500
+/* Offsets from PCIE_INTR2_CPU_BASE and PCIE_MSI_INTR2_BASE */
+#define  MSI_INT_STATUS			0x0
+#define  MSI_INT_CLR			0x8
+#define  MSI_INT_MASK_SET		0x10
+#define  MSI_INT_MASK_CLR		0x14
+
+#define PCIE_EXT_CFG_DATA				0x8000
+#define PCIE_EXT_CFG_INDEX				0x9000
+
+#define  PCIE_RGR1_SW_INIT_1_PERST_MASK			0x1
+#define  PCIE_RGR1_SW_INIT_1_PERST_SHIFT		0x0
+
+#define RGR1_SW_INIT_1_INIT_GENERIC_MASK		0x2
+#define RGR1_SW_INIT_1_INIT_GENERIC_SHIFT		0x1
+#define RGR1_SW_INIT_1_INIT_7278_MASK			0x1
+#define RGR1_SW_INIT_1_INIT_7278_SHIFT			0x0
+
+/* PCIe parameters */
+#define BRCM_NUM_PCIE_OUT_WINS		0x4
+#define BRCM_INT_PCI_MSI_NR		32
+#define BRCM_INT_PCI_MSI_LEGACY_NR	8
+#define BRCM_INT_PCI_MSI_SHIFT		0
+#define BRCM_INT_PCI_MSI_MASK		GENMASK(BRCM_INT_PCI_MSI_NR - 1, 0)
+#define BRCM_INT_PCI_MSI_LEGACY_MASK	GENMASK(31, \
+						32 - BRCM_INT_PCI_MSI_LEGACY_NR)
+
+/* MSI target addresses */
+#define BRCM_MSI_TARGET_ADDR_LT_4GB	0x0fffffffcULL
+#define BRCM_MSI_TARGET_ADDR_GT_4GB	0xffffffffcULL
+
+/* MDIO registers */
+#define MDIO_PORT0			0x0
+#define MDIO_DATA_MASK			0x7fffffff
+#define MDIO_PORT_MASK			0xf0000
+#define MDIO_REGAD_MASK			0xffff
+#define MDIO_CMD_MASK			0xfff00000
+#define MDIO_CMD_READ			0x1
+#define MDIO_CMD_WRITE			0x0
+#define MDIO_DATA_DONE_MASK		0x80000000
+#define MDIO_RD_DONE(x)			(((x) & MDIO_DATA_DONE_MASK) ? 1 : 0)
+#define MDIO_WT_DONE(x)			(((x) & MDIO_DATA_DONE_MASK) ? 0 : 1)
+#define SSC_REGS_ADDR			0x1100
+#define SET_ADDR_OFFSET			0x1f
+#define SSC_CNTL_OFFSET			0x2
+#define SSC_CNTL_OVRD_EN_MASK		0x8000
+#define SSC_CNTL_OVRD_VAL_MASK		0x4000
+#define SSC_STATUS_OFFSET		0x1
+#define SSC_STATUS_SSC_MASK		0x400
+#define SSC_STATUS_PLL_LOCK_MASK	0x800
+#define PCIE_BRCM_MAX_MEMC		3
+
+#define IDX_ADDR(pcie)			(pcie->reg_offsets[EXT_CFG_INDEX])
+#define DATA_ADDR(pcie)			(pcie->reg_offsets[EXT_CFG_DATA])
+#define PCIE_RGR1_SW_INIT_1(pcie)	(pcie->reg_offsets[RGR1_SW_INIT_1])
+
+/* Rescal registers */
+#define PCIE_DVT_PMU_PCIE_PHY_CTRL				0xc700
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_NFLDS			0x3
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_DIG_RESET_MASK		0x4
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_DIG_RESET_SHIFT	0x2
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_RESET_MASK		0x2
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_RESET_SHIFT		0x1
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_PWRDN_MASK		0x1
+#define  PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_PWRDN_SHIFT		0x0
+
+/* Forward declarations */
+struct brcm_pcie;
+
+enum {
+	RGR1_SW_INIT_1,
+	EXT_CFG_INDEX,
+	EXT_CFG_DATA,
+};
+
+enum {
+	RGR1_SW_INIT_1_INIT_MASK,
+	RGR1_SW_INIT_1_INIT_SHIFT,
+};
+
+enum pcie_type {
+	GENERIC,
+	BCM7425,
+	BCM7435,
+	BCM4908,
+	BCM7278,
+	BCM2711,
+};
+
+struct pcie_cfg_data {
+	const int *offsets;
+	const enum pcie_type type;
+	void (*perst_set)(struct brcm_pcie *pcie, u32 val);
+	void (*bridge_sw_init_set)(struct brcm_pcie *pcie, u32 val);
+};
+
+struct subdev_regulators {
+	unsigned int num_supplies;
+	struct regulator_bulk_data supplies[];
+};
+
+struct brcm_msi {
+	struct device		*dev;
+	void __iomem		*base;
+	struct device_node	*np;
+	struct irq_domain	*msi_domain;
+	struct irq_domain	*inner_domain;
+	struct mutex		lock; /* guards the alloc/free operations */
+	u64			target_addr;
+	int			irq;
+	DECLARE_BITMAP(used, BRCM_INT_PCI_MSI_NR);
+	bool			legacy;
+	/* Some chips have MSIs in bits [31..24] of a shared register. */
+	int			legacy_shift;
+	int			nr; /* No. of MSI available, depends on chip */
+	/* This is the base pointer for interrupt status/set/clr regs */
+	void __iomem		*intr_base;
+};
+
+/* Internal PCIe Host Controller Information.*/
+struct brcm_pcie {
+	struct device		*dev;
+	void __iomem		*base;
+	struct clk		*clk;
+	struct device_node	*np;
+	bool			ssc;
+	int			gen;
+	u64			msi_target_addr;
+	struct brcm_msi		*msi;
+	const int		*reg_offsets;
+	enum pcie_type		type;
+	struct reset_control	*rescal;
+	struct reset_control	*perst_reset;
+	int			num_memc;
+	u64			memc_size[PCIE_BRCM_MAX_MEMC];
+	u32			hw_rev;
+	void			(*perst_set)(struct brcm_pcie *pcie, u32 val);
+	void			(*bridge_sw_init_set)(struct brcm_pcie *pcie, u32 val);
+	struct subdev_regulators *sr;
+	bool			ep_wakeup_capable;
+};
+
+static inline bool is_bmips(const struct brcm_pcie *pcie)
+{
+	return pcie->type == BCM7435 || pcie->type == BCM7425;
+}
+
+/*
+ * This is to convert the size of the inbound "BAR" region to the
+ * non-linear values of PCIE_X_MISC_RC_BAR[123]_CONFIG_LO.SIZE
+ */
+static int brcm_pcie_encode_ibar_size(u64 size)
+{
+	int log2_in = ilog2(size);
+
+	if (log2_in >= 12 && log2_in <= 15)
+		/* Covers 4KB to 32KB (inclusive) */
+		return (log2_in - 12) + 0x1c;
+	else if (log2_in >= 16 && log2_in <= 35)
+		/* Covers 64KB to 32GB, (inclusive) */
+		return log2_in - 15;
+	/* Something is awry so disable */
+	return 0;
+}
+
+static u32 brcm_pcie_mdio_form_pkt(int port, int regad, int cmd)
+{
+	u32 pkt = 0;
+
+	pkt |= FIELD_PREP(MDIO_PORT_MASK, port);
+	pkt |= FIELD_PREP(MDIO_REGAD_MASK, regad);
+	pkt |= FIELD_PREP(MDIO_CMD_MASK, cmd);
+
+	return pkt;
+}
+
+/* negative return value indicates error */
+static int brcm_pcie_mdio_read(void __iomem *base, u8 port, u8 regad, u32 *val)
+{
+	u32 data;
+	int err;
+
+	writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_READ),
+		   base + PCIE_RC_DL_MDIO_ADDR);
+	readl(base + PCIE_RC_DL_MDIO_ADDR);
+	err = readl_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_RD_DATA, data,
+					MDIO_RD_DONE(data), 10, 100);
+	*val = FIELD_GET(MDIO_DATA_MASK, data);
+
+	return err;
+}
+
+/* negative return value indicates error */
+static int brcm_pcie_mdio_write(void __iomem *base, u8 port,
+				u8 regad, u16 wrdata)
+{
+	u32 data;
+	int err;
+
+	writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_WRITE),
+		   base + PCIE_RC_DL_MDIO_ADDR);
+	readl(base + PCIE_RC_DL_MDIO_ADDR);
+	writel(MDIO_DATA_DONE_MASK | wrdata, base + PCIE_RC_DL_MDIO_WR_DATA);
+
+	err = readw_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_WR_DATA, data,
+					MDIO_WT_DONE(data), 10, 100);
+	return err;
+}
+
+/*
+ * Configures device for Spread Spectrum Clocking (SSC) mode; a negative
+ * return value indicates error.
+ */
+static int brcm_pcie_set_ssc(struct brcm_pcie *pcie)
+{
+	int pll, ssc;
+	int ret;
+	u32 tmp;
+
+	ret = brcm_pcie_mdio_write(pcie->base, MDIO_PORT0, SET_ADDR_OFFSET,
+				   SSC_REGS_ADDR);
+	if (ret < 0)
+		return ret;
+
+	ret = brcm_pcie_mdio_read(pcie->base, MDIO_PORT0,
+				  SSC_CNTL_OFFSET, &tmp);
+	if (ret < 0)
+		return ret;
+
+	u32p_replace_bits(&tmp, 1, SSC_CNTL_OVRD_EN_MASK);
+	u32p_replace_bits(&tmp, 1, SSC_CNTL_OVRD_VAL_MASK);
+	ret = brcm_pcie_mdio_write(pcie->base, MDIO_PORT0,
+				   SSC_CNTL_OFFSET, tmp);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(1000, 2000);
+	ret = brcm_pcie_mdio_read(pcie->base, MDIO_PORT0,
+				  SSC_STATUS_OFFSET, &tmp);
+	if (ret < 0)
+		return ret;
+
+	ssc = FIELD_GET(SSC_STATUS_SSC_MASK, tmp);
+	pll = FIELD_GET(SSC_STATUS_PLL_LOCK_MASK, tmp);
+
+	return ssc && pll ? 0 : -EIO;
+}
+
+/* Limits operation to a specific generation (1, 2, or 3) */
+static void brcm_pcie_set_gen(struct brcm_pcie *pcie, int gen)
+{
+	u16 lnkctl2 = readw(pcie->base + BRCM_PCIE_CAP_REGS + PCI_EXP_LNKCTL2);
+	u32 lnkcap = readl(pcie->base + BRCM_PCIE_CAP_REGS + PCI_EXP_LNKCAP);
+
+	lnkcap = (lnkcap & ~PCI_EXP_LNKCAP_SLS) | gen;
+	writel(lnkcap, pcie->base + BRCM_PCIE_CAP_REGS + PCI_EXP_LNKCAP);
+
+	lnkctl2 = (lnkctl2 & ~0xf) | gen;
+	writew(lnkctl2, pcie->base + BRCM_PCIE_CAP_REGS + PCI_EXP_LNKCTL2);
+}
+
+static void brcm_pcie_set_outbound_win(struct brcm_pcie *pcie,
+				       unsigned int win, u64 cpu_addr,
+				       u64 pcie_addr, u64 size)
+{
+	u32 cpu_addr_mb_high, limit_addr_mb_high;
+	phys_addr_t cpu_addr_mb, limit_addr_mb;
+	int high_addr_shift;
+	u32 tmp;
+
+	/* Set the base of the pcie_addr window */
+	writel(lower_32_bits(pcie_addr), pcie->base + PCIE_MEM_WIN0_LO(win));
+	writel(upper_32_bits(pcie_addr), pcie->base + PCIE_MEM_WIN0_HI(win));
+
+	/* Write the addr base & limit lower bits (in MBs) */
+	cpu_addr_mb = cpu_addr / SZ_1M;
+	limit_addr_mb = (cpu_addr + size - 1) / SZ_1M;
+
+	tmp = readl(pcie->base + PCIE_MEM_WIN0_BASE_LIMIT(win));
+	u32p_replace_bits(&tmp, cpu_addr_mb,
+			  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_BASE_MASK);
+	u32p_replace_bits(&tmp, limit_addr_mb,
+			  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_LIMIT_MASK);
+	writel(tmp, pcie->base + PCIE_MEM_WIN0_BASE_LIMIT(win));
+
+	if (is_bmips(pcie))
+		return;
+
+	/* Write the cpu & limit addr upper bits */
+	high_addr_shift =
+		HWEIGHT32(PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_BASE_MASK);
+
+	cpu_addr_mb_high = cpu_addr_mb >> high_addr_shift;
+	tmp = readl(pcie->base + PCIE_MEM_WIN0_BASE_HI(win));
+	u32p_replace_bits(&tmp, cpu_addr_mb_high,
+			  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_HI_BASE_MASK);
+	writel(tmp, pcie->base + PCIE_MEM_WIN0_BASE_HI(win));
+
+	limit_addr_mb_high = limit_addr_mb >> high_addr_shift;
+	tmp = readl(pcie->base + PCIE_MEM_WIN0_LIMIT_HI(win));
+	u32p_replace_bits(&tmp, limit_addr_mb_high,
+			  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LIMIT_HI_LIMIT_MASK);
+	writel(tmp, pcie->base + PCIE_MEM_WIN0_LIMIT_HI(win));
+}
+
+static struct irq_chip brcm_msi_irq_chip = {
+	.name            = "BRCM STB PCIe MSI",
+	.irq_ack         = irq_chip_ack_parent,
+	.irq_mask        = pci_msi_mask_irq,
+	.irq_unmask      = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info brcm_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &brcm_msi_irq_chip,
+};
+
+static void brcm_pcie_msi_isr(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long status;
+	struct brcm_msi *msi;
+	struct device *dev;
+	u32 bit;
+
+	chained_irq_enter(chip, desc);
+	msi = irq_desc_get_handler_data(desc);
+	dev = msi->dev;
+
+	status = readl(msi->intr_base + MSI_INT_STATUS);
+	status >>= msi->legacy_shift;
+
+	for_each_set_bit(bit, &status, msi->nr) {
+		int ret;
+		ret = generic_handle_domain_irq(msi->inner_domain, bit);
+		if (ret)
+			dev_dbg(dev, "unexpected MSI\n");
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void brcm_msi_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct brcm_msi *msi = irq_data_get_irq_chip_data(data);
+
+	msg->address_lo = lower_32_bits(msi->target_addr);
+	msg->address_hi = upper_32_bits(msi->target_addr);
+	msg->data = (0xffff & PCIE_MISC_MSI_DATA_CONFIG_VAL_32) | data->hwirq;
+}
+
+static int brcm_msi_set_affinity(struct irq_data *irq_data,
+				 const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void brcm_msi_ack_irq(struct irq_data *data)
+{
+	struct brcm_msi *msi = irq_data_get_irq_chip_data(data);
+	const int shift_amt = data->hwirq + msi->legacy_shift;
+
+	writel(1 << shift_amt, msi->intr_base + MSI_INT_CLR);
+}
+
+
+static struct irq_chip brcm_msi_bottom_irq_chip = {
+	.name			= "BRCM STB MSI",
+	.irq_compose_msi_msg	= brcm_msi_compose_msi_msg,
+	.irq_set_affinity	= brcm_msi_set_affinity,
+	.irq_ack                = brcm_msi_ack_irq,
+};
+
+static int brcm_msi_alloc(struct brcm_msi *msi, unsigned int nr_irqs)
+{
+	int hwirq;
+
+	mutex_lock(&msi->lock);
+	hwirq = bitmap_find_free_region(msi->used, msi->nr,
+					order_base_2(nr_irqs));
+	mutex_unlock(&msi->lock);
+
+	return hwirq;
+}
+
+static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq,
+			  unsigned int nr_irqs)
+{
+	mutex_lock(&msi->lock);
+	bitmap_release_region(msi->used, hwirq, order_base_2(nr_irqs));
+	mutex_unlock(&msi->lock);
+}
+
+static int brcm_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *args)
+{
+	struct brcm_msi *msi = domain->host_data;
+	int hwirq, i;
+
+	hwirq = brcm_msi_alloc(msi, nr_irqs);
+
+	if (hwirq < 0)
+		return hwirq;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &brcm_msi_bottom_irq_chip, domain->host_data,
+				    handle_edge_irq, NULL, NULL);
+	return 0;
+}
+
+static void brcm_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct brcm_msi *msi = irq_data_get_irq_chip_data(d);
+
+	brcm_msi_free(msi, d->hwirq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc	= brcm_irq_domain_alloc,
+	.free	= brcm_irq_domain_free,
+};
+
+static int brcm_allocate_domains(struct brcm_msi *msi)
+{
+	struct fwnode_handle *fwnode = of_node_to_fwnode(msi->np);
+	struct device *dev = msi->dev;
+
+	msi->inner_domain = irq_domain_add_linear(NULL, msi->nr, &msi_domain_ops, msi);
+	if (!msi->inner_domain) {
+		dev_err(dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	msi->msi_domain = pci_msi_create_irq_domain(fwnode,
+						    &brcm_msi_domain_info,
+						    msi->inner_domain);
+	if (!msi->msi_domain) {
+		dev_err(dev, "failed to create MSI domain\n");
+		irq_domain_remove(msi->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void brcm_free_domains(struct brcm_msi *msi)
+{
+	irq_domain_remove(msi->msi_domain);
+	irq_domain_remove(msi->inner_domain);
+}
+
+static void brcm_msi_remove(struct brcm_pcie *pcie)
+{
+	struct brcm_msi *msi = pcie->msi;
+
+	if (!msi)
+		return;
+	irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
+	brcm_free_domains(msi);
+}
+
+static void brcm_msi_set_regs(struct brcm_msi *msi)
+{
+	u32 val = msi->legacy ? BRCM_INT_PCI_MSI_LEGACY_MASK :
+				BRCM_INT_PCI_MSI_MASK;
+
+	writel(val, msi->intr_base + MSI_INT_MASK_CLR);
+	writel(val, msi->intr_base + MSI_INT_CLR);
+
+	/*
+	 * The 0 bit of PCIE_MISC_MSI_BAR_CONFIG_LO is repurposed to MSI
+	 * enable, which we set to 1.
+	 */
+	writel(lower_32_bits(msi->target_addr) | 0x1,
+	       msi->base + PCIE_MISC_MSI_BAR_CONFIG_LO);
+	writel(upper_32_bits(msi->target_addr),
+	       msi->base + PCIE_MISC_MSI_BAR_CONFIG_HI);
+
+	val = msi->legacy ? PCIE_MISC_MSI_DATA_CONFIG_VAL_8 : PCIE_MISC_MSI_DATA_CONFIG_VAL_32;
+	writel(val, msi->base + PCIE_MISC_MSI_DATA_CONFIG);
+}
+
+static int brcm_pcie_enable_msi(struct brcm_pcie *pcie)
+{
+	struct brcm_msi *msi;
+	int irq, ret;
+	struct device *dev = pcie->dev;
+
+	irq = irq_of_parse_and_map(dev->of_node, 1);
+	if (irq <= 0) {
+		dev_err(dev, "cannot map MSI interrupt\n");
+		return -ENODEV;
+	}
+
+	msi = devm_kzalloc(dev, sizeof(struct brcm_msi), GFP_KERNEL);
+	if (!msi)
+		return -ENOMEM;
+
+	mutex_init(&msi->lock);
+	msi->dev = dev;
+	msi->base = pcie->base;
+	msi->np = pcie->np;
+	msi->target_addr = pcie->msi_target_addr;
+	msi->irq = irq;
+	msi->legacy = pcie->hw_rev < BRCM_PCIE_HW_REV_33;
+
+	/*
+	 * Sanity check to make sure that the 'used' bitmap in struct brcm_msi
+	 * is large enough.
+	 */
+	BUILD_BUG_ON(BRCM_INT_PCI_MSI_LEGACY_NR > BRCM_INT_PCI_MSI_NR);
+
+	if (msi->legacy) {
+		msi->intr_base = msi->base + PCIE_INTR2_CPU_BASE;
+		msi->nr = BRCM_INT_PCI_MSI_LEGACY_NR;
+		msi->legacy_shift = 24;
+	} else {
+		msi->intr_base = msi->base + PCIE_MSI_INTR2_BASE;
+		msi->nr = BRCM_INT_PCI_MSI_NR;
+		msi->legacy_shift = 0;
+	}
+
+	ret = brcm_allocate_domains(msi);
+	if (ret)
+		return ret;
+
+	irq_set_chained_handler_and_data(msi->irq, brcm_pcie_msi_isr, msi);
+
+	brcm_msi_set_regs(msi);
+	pcie->msi = msi;
+
+	return 0;
+}
+
+/* The controller is capable of serving in both RC and EP roles */
+static bool brcm_pcie_rc_mode(struct brcm_pcie *pcie)
+{
+	void __iomem *base = pcie->base;
+	u32 val = readl(base + PCIE_MISC_PCIE_STATUS);
+
+	return !!FIELD_GET(PCIE_MISC_PCIE_STATUS_PCIE_PORT_MASK, val);
+}
+
+static bool brcm_pcie_link_up(struct brcm_pcie *pcie)
+{
+	u32 val = readl(pcie->base + PCIE_MISC_PCIE_STATUS);
+	u32 dla = FIELD_GET(PCIE_MISC_PCIE_STATUS_PCIE_DL_ACTIVE_MASK, val);
+	u32 plu = FIELD_GET(PCIE_MISC_PCIE_STATUS_PCIE_PHYLINKUP_MASK, val);
+
+	return dla && plu;
+}
+
+static void __iomem *brcm_pcie_map_bus(struct pci_bus *bus,
+				       unsigned int devfn, int where)
+{
+	struct brcm_pcie *pcie = bus->sysdata;
+	void __iomem *base = pcie->base;
+	int idx;
+
+	/* Accesses to the RC go right to the RC registers if !devfn */
+	if (pci_is_root_bus(bus))
+		return devfn ? NULL : base + PCIE_ECAM_REG(where);
+
+	/* An access to our HW w/o link-up will cause a CPU Abort */
+	if (!brcm_pcie_link_up(pcie))
+		return NULL;
+
+	/* For devices, write to the config space index register */
+	idx = PCIE_ECAM_OFFSET(bus->number, devfn, 0);
+	writel(idx, pcie->base + PCIE_EXT_CFG_INDEX);
+	return base + PCIE_EXT_CFG_DATA + PCIE_ECAM_REG(where);
+}
+
+static void __iomem *brcm7425_pcie_map_bus(struct pci_bus *bus,
+					   unsigned int devfn, int where)
+{
+	struct brcm_pcie *pcie = bus->sysdata;
+	void __iomem *base = pcie->base;
+	int idx;
+
+	/* Accesses to the RC go right to the RC registers if !devfn */
+	if (pci_is_root_bus(bus))
+		return devfn ? NULL : base + PCIE_ECAM_REG(where);
+
+	/* An access to our HW w/o link-up will cause a CPU Abort */
+	if (!brcm_pcie_link_up(pcie))
+		return NULL;
+
+	/* For devices, write to the config space index register */
+	idx = PCIE_ECAM_OFFSET(bus->number, devfn, where);
+	writel(idx, base + IDX_ADDR(pcie));
+	return base + DATA_ADDR(pcie);
+}
+
+static void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val)
+{
+	u32 tmp, mask =  RGR1_SW_INIT_1_INIT_GENERIC_MASK;
+	u32 shift = RGR1_SW_INIT_1_INIT_GENERIC_SHIFT;
+
+	tmp = readl(pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+	tmp = (tmp & ~mask) | ((val << shift) & mask);
+	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+}
+
+static void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val)
+{
+	u32 tmp, mask =  RGR1_SW_INIT_1_INIT_7278_MASK;
+	u32 shift = RGR1_SW_INIT_1_INIT_7278_SHIFT;
+
+	tmp = readl(pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+	tmp = (tmp & ~mask) | ((val << shift) & mask);
+	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+}
+
+static void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val)
+{
+	if (WARN_ONCE(!pcie->perst_reset, "missing PERST# reset controller\n"))
+		return;
+
+	if (val)
+		reset_control_assert(pcie->perst_reset);
+	else
+		reset_control_deassert(pcie->perst_reset);
+}
+
+static void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val)
+{
+	u32 tmp;
+
+	/* Perst bit has moved and assert value is 0 */
+	tmp = readl(pcie->base + PCIE_MISC_PCIE_CTRL);
+	u32p_replace_bits(&tmp, !val, PCIE_MISC_PCIE_CTRL_PCIE_PERSTB_MASK);
+	writel(tmp, pcie->base +  PCIE_MISC_PCIE_CTRL);
+}
+
+static void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val)
+{
+	u32 tmp;
+
+	tmp = readl(pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+	u32p_replace_bits(&tmp, val, PCIE_RGR1_SW_INIT_1_PERST_MASK);
+	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
+}
+
+static int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
+							u64 *rc_bar2_size,
+							u64 *rc_bar2_offset)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	struct resource_entry *entry;
+	struct device *dev = pcie->dev;
+	u64 lowest_pcie_addr = ~(u64)0;
+	int ret, i = 0;
+	u64 size = 0;
+
+	resource_list_for_each_entry(entry, &bridge->dma_ranges) {
+		u64 pcie_beg = entry->res->start - entry->offset;
+
+		size += entry->res->end - entry->res->start + 1;
+		if (pcie_beg < lowest_pcie_addr)
+			lowest_pcie_addr = pcie_beg;
+	}
+
+	if (lowest_pcie_addr == ~(u64)0) {
+		dev_err(dev, "DT node has no dma-ranges\n");
+		return -EINVAL;
+	}
+
+	ret = of_property_read_variable_u64_array(pcie->np, "brcm,scb-sizes", pcie->memc_size, 1,
+						  PCIE_BRCM_MAX_MEMC);
+
+	if (ret <= 0) {
+		/* Make an educated guess */
+		pcie->num_memc = 1;
+		pcie->memc_size[0] = 1ULL << fls64(size - 1);
+	} else {
+		pcie->num_memc = ret;
+	}
+
+	/* Each memc is viewed through a "port" that is a power of 2 */
+	for (i = 0, size = 0; i < pcie->num_memc; i++)
+		size += pcie->memc_size[i];
+
+	/* System memory starts at this address in PCIe-space */
+	*rc_bar2_offset = lowest_pcie_addr;
+	/* The sum of all memc views must also be a power of 2 */
+	*rc_bar2_size = 1ULL << fls64(size - 1);
+
+	/*
+	 * We validate the inbound memory view even though we should trust
+	 * whatever the device-tree provides. This is because of an HW issue on
+	 * early Raspberry Pi 4's revisions (bcm2711). It turns out its
+	 * firmware has to dynamically edit dma-ranges due to a bug on the
+	 * PCIe controller integration, which prohibits any access above the
+	 * lower 3GB of memory. Given this, we decided to keep the dma-ranges
+	 * in check, avoiding hard to debug device-tree related issues in the
+	 * future:
+	 *
+	 * The PCIe host controller by design must set the inbound viewport to
+	 * be a contiguous arrangement of all of the system's memory.  In
+	 * addition, its size mut be a power of two.  To further complicate
+	 * matters, the viewport must start on a pcie-address that is aligned
+	 * on a multiple of its size.  If a portion of the viewport does not
+	 * represent system memory -- e.g. 3GB of memory requires a 4GB
+	 * viewport -- we can map the outbound memory in or after 3GB and even
+	 * though the viewport will overlap the outbound memory the controller
+	 * will know to send outbound memory downstream and everything else
+	 * upstream.
+	 *
+	 * For example:
+	 *
+	 * - The best-case scenario, memory up to 3GB, is to place the inbound
+	 *   region in the first 4GB of pcie-space, as some legacy devices can
+	 *   only address 32bits. We would also like to put the MSI under 4GB
+	 *   as well, since some devices require a 32bit MSI target address.
+	 *
+	 * - If the system memory is 4GB or larger we cannot start the inbound
+	 *   region at location 0 (since we have to allow some space for
+	 *   outbound memory @ 3GB). So instead it will  start at the 1x
+	 *   multiple of its size
+	 */
+	if (!*rc_bar2_size || (*rc_bar2_offset & (*rc_bar2_size - 1)) ||
+	    (*rc_bar2_offset < SZ_4G && *rc_bar2_offset > SZ_2G)) {
+		dev_err(dev, "Invalid rc_bar2_offset/size: size 0x%llx, off 0x%llx\n",
+			*rc_bar2_size, *rc_bar2_offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int brcm_pcie_setup(struct brcm_pcie *pcie)
+{
+	u64 rc_bar2_offset, rc_bar2_size;
+	void __iomem *base = pcie->base;
+	struct pci_host_bridge *bridge;
+	struct resource_entry *entry;
+	u32 tmp, burst, aspm_support;
+	int num_out_wins = 0;
+	int ret, memc;
+
+	/* Reset the bridge */
+	pcie->bridge_sw_init_set(pcie, 1);
+
+	/* Ensure that PERST# is asserted; some bootloaders may deassert it. */
+	if (pcie->type == BCM2711)
+		pcie->perst_set(pcie, 1);
+
+	usleep_range(100, 200);
+
+	/* Take the bridge out of reset */
+	pcie->bridge_sw_init_set(pcie, 0);
+
+	tmp = readl(base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+	if (is_bmips(pcie))
+		tmp &= ~PCIE_BMIPS_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK;
+	else
+		tmp &= ~PCIE_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK;
+	writel(tmp, base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+	/* Wait for SerDes to be stable */
+	usleep_range(100, 200);
+
+	/*
+	 * SCB_MAX_BURST_SIZE is a two bit field.  For GENERIC chips it
+	 * is encoded as 0=128, 1=256, 2=512, 3=Rsvd, for BCM7278 it
+	 * is encoded as 0=Rsvd, 1=128, 2=256, 3=512.
+	 */
+	if (is_bmips(pcie))
+		burst = 0x1; /* 256 bytes */
+	else if (pcie->type == BCM2711)
+		burst = 0x0; /* 128 bytes */
+	else if (pcie->type == BCM7278)
+		burst = 0x3; /* 512 bytes */
+	else
+		burst = 0x2; /* 512 bytes */
+
+	/*
+	 * Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN,
+	 * RCB_MPS_MODE, RCB_64B_MODE
+	 */
+	tmp = readl(base + PCIE_MISC_MISC_CTRL);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK);
+	u32p_replace_bits(&tmp, burst, PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK);
+	writel(tmp, base + PCIE_MISC_MISC_CTRL);
+
+	ret = brcm_pcie_get_rc_bar2_size_and_offset(pcie, &rc_bar2_size,
+						    &rc_bar2_offset);
+	if (ret)
+		return ret;
+
+	tmp = lower_32_bits(rc_bar2_offset);
+	u32p_replace_bits(&tmp, brcm_pcie_encode_ibar_size(rc_bar2_size),
+			  PCIE_MISC_RC_BAR2_CONFIG_LO_SIZE_MASK);
+	writel(tmp, base + PCIE_MISC_RC_BAR2_CONFIG_LO);
+	writel(upper_32_bits(rc_bar2_offset),
+	       base + PCIE_MISC_RC_BAR2_CONFIG_HI);
+
+	tmp = readl(base + PCIE_MISC_MISC_CTRL);
+	for (memc = 0; memc < pcie->num_memc; memc++) {
+		u32 scb_size_val = ilog2(pcie->memc_size[memc]) - 15;
+
+		if (memc == 0)
+			u32p_replace_bits(&tmp, scb_size_val, SCB_SIZE_MASK(0));
+		else if (memc == 1)
+			u32p_replace_bits(&tmp, scb_size_val, SCB_SIZE_MASK(1));
+		else if (memc == 2)
+			u32p_replace_bits(&tmp, scb_size_val, SCB_SIZE_MASK(2));
+	}
+	writel(tmp, base + PCIE_MISC_MISC_CTRL);
+
+	/*
+	 * We ideally want the MSI target address to be located in the 32bit
+	 * addressable memory area. Some devices might depend on it. This is
+	 * possible either when the inbound window is located above the lower
+	 * 4GB or when the inbound area is smaller than 4GB (taking into
+	 * account the rounding-up we're forced to perform).
+	 */
+	if (rc_bar2_offset >= SZ_4G || (rc_bar2_size + rc_bar2_offset) < SZ_4G)
+		pcie->msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
+	else
+		pcie->msi_target_addr = BRCM_MSI_TARGET_ADDR_GT_4GB;
+
+	if (!brcm_pcie_rc_mode(pcie)) {
+		dev_err(pcie->dev, "PCIe RC controller misconfigured as Endpoint\n");
+		return -EINVAL;
+	}
+
+	/* disable the PCIe->GISB memory window (RC_BAR1) */
+	tmp = readl(base + PCIE_MISC_RC_BAR1_CONFIG_LO);
+	tmp &= ~PCIE_MISC_RC_BAR1_CONFIG_LO_SIZE_MASK;
+	writel(tmp, base + PCIE_MISC_RC_BAR1_CONFIG_LO);
+
+	/* disable the PCIe->SCB memory window (RC_BAR3) */
+	tmp = readl(base + PCIE_MISC_RC_BAR3_CONFIG_LO);
+	tmp &= ~PCIE_MISC_RC_BAR3_CONFIG_LO_SIZE_MASK;
+	writel(tmp, base + PCIE_MISC_RC_BAR3_CONFIG_LO);
+
+	/* Don't advertise L0s capability if 'aspm-no-l0s' */
+	aspm_support = PCIE_LINK_STATE_L1;
+	if (!of_property_read_bool(pcie->np, "aspm-no-l0s"))
+		aspm_support |= PCIE_LINK_STATE_L0S;
+	tmp = readl(base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
+	u32p_replace_bits(&tmp, aspm_support,
+		PCIE_RC_CFG_PRIV1_LINK_CAPABILITY_ASPM_SUPPORT_MASK);
+	writel(tmp, base + PCIE_RC_CFG_PRIV1_LINK_CAPABILITY);
+
+	/*
+	 * For config space accesses on the RC, show the right class for
+	 * a PCIe-PCIe bridge (the default setting is to be EP mode).
+	 */
+	tmp = readl(base + PCIE_RC_CFG_PRIV1_ID_VAL3);
+	u32p_replace_bits(&tmp, 0x060400,
+			  PCIE_RC_CFG_PRIV1_ID_VAL3_CLASS_CODE_MASK);
+	writel(tmp, base + PCIE_RC_CFG_PRIV1_ID_VAL3);
+
+	bridge = pci_host_bridge_from_priv(pcie);
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		struct resource *res = entry->res;
+
+		if (resource_type(res) != IORESOURCE_MEM)
+			continue;
+
+		if (num_out_wins >= BRCM_NUM_PCIE_OUT_WINS) {
+			dev_err(pcie->dev, "too many outbound wins\n");
+			return -EINVAL;
+		}
+
+		if (is_bmips(pcie)) {
+			u64 start = res->start;
+			unsigned int j, nwins = resource_size(res) / SZ_128M;
+
+			/* bmips PCIe outbound windows have a 128MB max size */
+			if (nwins > BRCM_NUM_PCIE_OUT_WINS)
+				nwins = BRCM_NUM_PCIE_OUT_WINS;
+			for (j = 0; j < nwins; j++, start += SZ_128M)
+				brcm_pcie_set_outbound_win(pcie, j, start,
+							   start - entry->offset,
+							   SZ_128M);
+			break;
+		}
+		brcm_pcie_set_outbound_win(pcie, num_out_wins, res->start,
+					   res->start - entry->offset,
+					   resource_size(res));
+		num_out_wins++;
+	}
+
+	/* PCIe->SCB endian mode for BAR */
+	tmp = readl(base + PCIE_RC_CFG_VENDOR_VENDOR_SPECIFIC_REG1);
+	u32p_replace_bits(&tmp, PCIE_RC_CFG_VENDOR_SPCIFIC_REG1_LITTLE_ENDIAN,
+		PCIE_RC_CFG_VENDOR_VENDOR_SPECIFIC_REG1_ENDIAN_MODE_BAR2_MASK);
+	writel(tmp, base + PCIE_RC_CFG_VENDOR_VENDOR_SPECIFIC_REG1);
+
+	return 0;
+}
+
+static int brcm_pcie_start_link(struct brcm_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	void __iomem *base = pcie->base;
+	u16 nlw, cls, lnksta;
+	bool ssc_good = false;
+	u32 tmp;
+	int ret, i;
+
+	/* Unassert the fundamental reset */
+	pcie->perst_set(pcie, 0);
+
+	/*
+	 * Wait for 100ms after PERST# deassertion; see PCIe CEM specification
+	 * sections 2.2, PCIe r5.0, 6.6.1.
+	 */
+	msleep(100);
+
+	/*
+	 * Give the RC/EP even more time to wake up, before trying to
+	 * configure RC.  Intermittently check status for link-up, up to a
+	 * total of 100ms.
+	 */
+	for (i = 0; i < 100 && !brcm_pcie_link_up(pcie); i += 5)
+		msleep(5);
+
+	if (!brcm_pcie_link_up(pcie)) {
+		dev_err(dev, "link down\n");
+		return -ENODEV;
+	}
+
+	if (pcie->gen)
+		brcm_pcie_set_gen(pcie, pcie->gen);
+
+	if (pcie->ssc) {
+		ret = brcm_pcie_set_ssc(pcie);
+		if (ret == 0)
+			ssc_good = true;
+		else
+			dev_err(dev, "failed attempt to enter ssc mode\n");
+	}
+
+	lnksta = readw(base + BRCM_PCIE_CAP_REGS + PCI_EXP_LNKSTA);
+	cls = FIELD_GET(PCI_EXP_LNKSTA_CLS, lnksta);
+	nlw = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
+	dev_info(dev, "link up, %s x%u %s\n",
+		 pci_speed_string(pcie_link_speed[cls]), nlw,
+		 ssc_good ? "(SSC)" : "(!SSC)");
+
+	/*
+	 * Refclk from RC should be gated with CLKREQ# input when ASPM L0s,L1
+	 * is enabled => setting the CLKREQ_DEBUG_ENABLE field to 1.
+	 */
+	tmp = readl(base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+	tmp |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_CLKREQ_DEBUG_ENABLE_MASK;
+	writel(tmp, base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+
+	return 0;
+}
+
+static const char * const supplies[] = {
+	"vpcie3v3",
+	"vpcie3v3aux",
+	"vpcie12v",
+};
+
+static void *alloc_subdev_regulators(struct device *dev)
+{
+	const size_t size = sizeof(struct subdev_regulators) +
+		sizeof(struct regulator_bulk_data) * ARRAY_SIZE(supplies);
+	struct subdev_regulators *sr;
+	int i;
+
+	sr = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (sr) {
+		sr->num_supplies = ARRAY_SIZE(supplies);
+		for (i = 0; i < ARRAY_SIZE(supplies); i++)
+			sr->supplies[i].supply = supplies[i];
+	}
+
+	return sr;
+}
+
+static int brcm_pcie_add_bus(struct pci_bus *bus)
+{
+	struct brcm_pcie *pcie = bus->sysdata;
+	struct device *dev = &bus->dev;
+	struct subdev_regulators *sr;
+	int ret;
+
+	if (!bus->parent || !pci_is_root_bus(bus->parent))
+		return 0;
+
+	if (dev->of_node) {
+		sr = alloc_subdev_regulators(dev);
+		if (!sr) {
+			dev_info(dev, "Can't allocate regulators for downstream device\n");
+			goto no_regulators;
+		}
+
+		pcie->sr = sr;
+
+		ret = regulator_bulk_get(dev, sr->num_supplies, sr->supplies);
+		if (ret) {
+			dev_info(dev, "No regulators for downstream device\n");
+			goto no_regulators;
+		}
+
+		ret = regulator_bulk_enable(sr->num_supplies, sr->supplies);
+		if (ret) {
+			dev_err(dev, "Can't enable regulators for downstream device\n");
+			regulator_bulk_free(sr->num_supplies, sr->supplies);
+			pcie->sr = NULL;
+		}
+	}
+
+no_regulators:
+	brcm_pcie_start_link(pcie);
+	return 0;
+}
+
+static void brcm_pcie_remove_bus(struct pci_bus *bus)
+{
+	struct brcm_pcie *pcie = bus->sysdata;
+	struct subdev_regulators *sr = pcie->sr;
+	struct device *dev = &bus->dev;
+
+	if (!sr)
+		return;
+
+	if (regulator_bulk_disable(sr->num_supplies, sr->supplies))
+		dev_err(dev, "Failed to disable regulators for downstream device\n");
+	regulator_bulk_free(sr->num_supplies, sr->supplies);
+	pcie->sr = NULL;
+}
+
+/* L23 is a low-power PCIe link state */
+static void brcm_pcie_enter_l23(struct brcm_pcie *pcie)
+{
+	void __iomem *base = pcie->base;
+	int l23, i;
+	u32 tmp;
+
+	/* Assert request for L23 */
+	tmp = readl(base + PCIE_MISC_PCIE_CTRL);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_PCIE_CTRL_PCIE_L23_REQUEST_MASK);
+	writel(tmp, base + PCIE_MISC_PCIE_CTRL);
+
+	/* Wait up to 36 msec for L23 */
+	tmp = readl(base + PCIE_MISC_PCIE_STATUS);
+	l23 = FIELD_GET(PCIE_MISC_PCIE_STATUS_PCIE_LINK_IN_L23_MASK, tmp);
+	for (i = 0; i < 15 && !l23; i++) {
+		usleep_range(2000, 2400);
+		tmp = readl(base + PCIE_MISC_PCIE_STATUS);
+		l23 = FIELD_GET(PCIE_MISC_PCIE_STATUS_PCIE_LINK_IN_L23_MASK,
+				tmp);
+	}
+
+	if (!l23)
+		dev_err(pcie->dev, "failed to enter low-power link state\n");
+}
+
+static int brcm_phy_cntl(struct brcm_pcie *pcie, const int start)
+{
+	static const u32 shifts[PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_NFLDS] = {
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_PWRDN_SHIFT,
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_RESET_SHIFT,
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_DIG_RESET_SHIFT,};
+	static const u32 masks[PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_NFLDS] = {
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_PWRDN_MASK,
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_RESET_MASK,
+		PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_DIG_RESET_MASK,};
+	const int beg = start ? 0 : PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_NFLDS - 1;
+	const int end = start ? PCIE_DVT_PMU_PCIE_PHY_CTRL_DAST_NFLDS : -1;
+	u32 tmp, combined_mask = 0;
+	u32 val;
+	void __iomem *base = pcie->base;
+	int i, ret;
+
+	for (i = beg; i != end; start ? i++ : i--) {
+		val = start ? BIT_MASK(shifts[i]) : 0;
+		tmp = readl(base + PCIE_DVT_PMU_PCIE_PHY_CTRL);
+		tmp = (tmp & ~masks[i]) | (val & masks[i]);
+		writel(tmp, base + PCIE_DVT_PMU_PCIE_PHY_CTRL);
+		usleep_range(50, 200);
+		combined_mask |= masks[i];
+	}
+
+	tmp = readl(base + PCIE_DVT_PMU_PCIE_PHY_CTRL);
+	val = start ? combined_mask : 0;
+
+	ret = (tmp & combined_mask) == val ? 0 : -EIO;
+	if (ret)
+		dev_err(pcie->dev, "failed to %s phy\n", (start ? "start" : "stop"));
+
+	return ret;
+}
+
+static inline int brcm_phy_start(struct brcm_pcie *pcie)
+{
+	return pcie->rescal ? brcm_phy_cntl(pcie, 1) : 0;
+}
+
+static inline int brcm_phy_stop(struct brcm_pcie *pcie)
+{
+	return pcie->rescal ? brcm_phy_cntl(pcie, 0) : 0;
+}
+
+static void brcm_pcie_turn_off(struct brcm_pcie *pcie)
+{
+	void __iomem *base = pcie->base;
+	int tmp;
+
+	if (brcm_pcie_link_up(pcie))
+		brcm_pcie_enter_l23(pcie);
+	/* Assert fundamental reset */
+	pcie->perst_set(pcie, 1);
+
+	/* Deassert request for L23 in case it was asserted */
+	tmp = readl(base + PCIE_MISC_PCIE_CTRL);
+	u32p_replace_bits(&tmp, 0, PCIE_MISC_PCIE_CTRL_PCIE_L23_REQUEST_MASK);
+	writel(tmp, base + PCIE_MISC_PCIE_CTRL);
+
+	/* Turn off SerDes */
+	tmp = readl(base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK);
+	writel(tmp, base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+
+	/* Shutdown PCIe bridge */
+	pcie->bridge_sw_init_set(pcie, 1);
+}
+
+static int pci_dev_may_wakeup(struct pci_dev *dev, void *data)
+{
+	bool *ret = data;
+
+	if (device_may_wakeup(&dev->dev)) {
+		*ret = true;
+		dev_info(&dev->dev, "Possible wake-up device; regulators will not be disabled\n");
+	}
+	return (int) *ret;
+}
+
+static int brcm_pcie_suspend_noirq(struct device *dev)
+{
+	struct brcm_pcie *pcie = dev_get_drvdata(dev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	int ret;
+
+	brcm_pcie_turn_off(pcie);
+	/*
+	 * If brcm_phy_stop() returns an error, just dev_err(). If we
+	 * return the error it will cause the suspend to fail and this is a
+	 * forgivable offense that will probably be erased on resume.
+	 */
+	if (brcm_phy_stop(pcie))
+		dev_err(dev, "Could not stop phy for suspend\n");
+
+	ret = reset_control_rearm(pcie->rescal);
+	if (ret) {
+		dev_err(dev, "Could not rearm rescal reset\n");
+		return ret;
+	}
+
+	if (pcie->sr) {
+		/*
+		 * Now turn off the regulators, but if at least one
+		 * downstream device is enabled as a wake-up source, do not
+		 * turn off regulators.
+		 */
+		pcie->ep_wakeup_capable = false;
+		pci_walk_bus(bridge->bus, pci_dev_may_wakeup,
+			     &pcie->ep_wakeup_capable);
+		if (!pcie->ep_wakeup_capable) {
+			ret = regulator_bulk_disable(pcie->sr->num_supplies,
+						     pcie->sr->supplies);
+			if (ret) {
+				dev_err(dev, "Could not turn off regulators\n");
+				reset_control_reset(pcie->rescal);
+				return ret;
+			}
+		}
+	}
+	clk_disable_unprepare(pcie->clk);
+
+	return 0;
+}
+
+static int brcm_pcie_resume_noirq(struct device *dev)
+{
+	struct brcm_pcie *pcie = dev_get_drvdata(dev);
+	void __iomem *base;
+	u32 tmp;
+	int ret;
+
+	base = pcie->base;
+	ret = clk_prepare_enable(pcie->clk);
+	if (ret)
+		return ret;
+
+	ret = reset_control_reset(pcie->rescal);
+	if (ret)
+		goto err_disable_clk;
+
+	ret = brcm_phy_start(pcie);
+	if (ret)
+		goto err_reset;
+
+	/* Take bridge out of reset so we can access the SERDES reg */
+	pcie->bridge_sw_init_set(pcie, 0);
+
+	/* SERDES_IDDQ = 0 */
+	tmp = readl(base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+	u32p_replace_bits(&tmp, 0, PCIE_MISC_HARD_PCIE_HARD_DEBUG_SERDES_IDDQ_MASK);
+	writel(tmp, base + PCIE_MISC_HARD_PCIE_HARD_DEBUG);
+
+	/* wait for serdes to be stable */
+	udelay(100);
+
+	ret = brcm_pcie_setup(pcie);
+	if (ret)
+		goto err_reset;
+
+	if (pcie->sr) {
+		if (pcie->ep_wakeup_capable) {
+			/*
+			 * We are resuming from a suspend.  In the suspend we
+			 * did not disable the power supplies, so there is
+			 * no need to enable them (and falsely increase their
+			 * usage count).
+			 */
+			pcie->ep_wakeup_capable = false;
+		} else {
+			ret = regulator_bulk_enable(pcie->sr->num_supplies,
+						    pcie->sr->supplies);
+			if (ret) {
+				dev_err(dev, "Could not turn on regulators\n");
+				goto err_reset;
+			}
+		}
+	}
+
+	ret = brcm_pcie_start_link(pcie);
+	if (ret)
+		goto err_regulator;
+
+	if (pcie->msi)
+		brcm_msi_set_regs(pcie->msi);
+
+	return 0;
+
+err_regulator:
+	if (pcie->sr)
+		regulator_bulk_disable(pcie->sr->num_supplies, pcie->sr->supplies);
+err_reset:
+	reset_control_rearm(pcie->rescal);
+err_disable_clk:
+	clk_disable_unprepare(pcie->clk);
+	return ret;
+}
+
+static void __brcm_pcie_remove(struct brcm_pcie *pcie)
+{
+	brcm_msi_remove(pcie);
+	brcm_pcie_turn_off(pcie);
+	if (brcm_phy_stop(pcie))
+		dev_err(pcie->dev, "Could not stop phy\n");
+	if (reset_control_rearm(pcie->rescal))
+		dev_err(pcie->dev, "Could not rearm rescal reset\n");
+	clk_disable_unprepare(pcie->clk);
+}
+
+static void brcm_pcie_remove(struct platform_device *pdev)
+{
+	struct brcm_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	__brcm_pcie_remove(pcie);
+}
+
+static const int pcie_offsets[] = {
+	[RGR1_SW_INIT_1] = 0x9210,
+	[EXT_CFG_INDEX]  = 0x9000,
+	[EXT_CFG_DATA]   = 0x9004,
+};
+
+static const int pcie_offsets_bmips_7425[] = {
+	[RGR1_SW_INIT_1] = 0x8010,
+	[EXT_CFG_INDEX]  = 0x8300,
+	[EXT_CFG_DATA]   = 0x8304,
+};
+
+static const struct pcie_cfg_data generic_cfg = {
+	.offsets	= pcie_offsets,
+	.type		= GENERIC,
+	.perst_set	= brcm_pcie_perst_set_generic,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
+static const struct pcie_cfg_data bcm7425_cfg = {
+	.offsets	= pcie_offsets_bmips_7425,
+	.type		= BCM7425,
+	.perst_set	= brcm_pcie_perst_set_generic,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
+static const struct pcie_cfg_data bcm7435_cfg = {
+	.offsets	= pcie_offsets,
+	.type		= BCM7435,
+	.perst_set	= brcm_pcie_perst_set_generic,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
+static const struct pcie_cfg_data bcm4908_cfg = {
+	.offsets	= pcie_offsets,
+	.type		= BCM4908,
+	.perst_set	= brcm_pcie_perst_set_4908,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
+static const int pcie_offset_bcm7278[] = {
+	[RGR1_SW_INIT_1] = 0xc010,
+	[EXT_CFG_INDEX] = 0x9000,
+	[EXT_CFG_DATA] = 0x9004,
+};
+
+static const struct pcie_cfg_data bcm7278_cfg = {
+	.offsets	= pcie_offset_bcm7278,
+	.type		= BCM7278,
+	.perst_set	= brcm_pcie_perst_set_7278,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_7278,
+};
+
+static const struct pcie_cfg_data bcm2711_cfg = {
+	.offsets	= pcie_offsets,
+	.type		= BCM2711,
+	.perst_set	= brcm_pcie_perst_set_generic,
+	.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
+static const struct of_device_id brcm_pcie_match[] = {
+	{ .compatible = "brcm,bcm2711-pcie", .data = &bcm2711_cfg },
+	{ .compatible = "brcm,bcm4908-pcie", .data = &bcm4908_cfg },
+	{ .compatible = "brcm,bcm7211-pcie", .data = &generic_cfg },
+	{ .compatible = "brcm,bcm7278-pcie", .data = &bcm7278_cfg },
+	{ .compatible = "brcm,bcm7216-pcie", .data = &bcm7278_cfg },
+	{ .compatible = "brcm,bcm7445-pcie", .data = &generic_cfg },
+	{ .compatible = "brcm,bcm7435-pcie", .data = &bcm7435_cfg },
+	{ .compatible = "brcm,bcm7425-pcie", .data = &bcm7425_cfg },
+	{},
+};
+
+static struct pci_ops brcm_pcie_ops = {
+	.map_bus = brcm_pcie_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+	.add_bus = brcm_pcie_add_bus,
+	.remove_bus = brcm_pcie_remove_bus,
+};
+
+static struct pci_ops brcm7425_pcie_ops = {
+	.map_bus = brcm7425_pcie_map_bus,
+	.read = pci_generic_config_read32,
+	.write = pci_generic_config_write32,
+	.add_bus = brcm_pcie_add_bus,
+	.remove_bus = brcm_pcie_remove_bus,
+};
+
+static int brcm_pcie_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node, *msi_np;
+	struct pci_host_bridge *bridge;
+	const struct pcie_cfg_data *data;
+	struct brcm_pcie *pcie;
+	int ret;
+
+	bridge = devm_pci_alloc_host_bridge(&pdev->dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data) {
+		pr_err("failed to look up compatible string\n");
+		return -EINVAL;
+	}
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->dev = &pdev->dev;
+	pcie->np = np;
+	pcie->reg_offsets = data->offsets;
+	pcie->type = data->type;
+	pcie->perst_set = data->perst_set;
+	pcie->bridge_sw_init_set = data->bridge_sw_init_set;
+
+	pcie->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	pcie->clk = devm_clk_get_optional(&pdev->dev, "sw_pcie");
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	ret = of_pci_get_max_link_speed(np);
+	pcie->gen = (ret < 0) ? 0 : ret;
+
+	pcie->ssc = of_property_read_bool(np, "brcm,enable-ssc");
+
+	ret = clk_prepare_enable(pcie->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "could not enable clock\n");
+		return ret;
+	}
+	pcie->rescal = devm_reset_control_get_optional_shared(&pdev->dev, "rescal");
+	if (IS_ERR(pcie->rescal)) {
+		clk_disable_unprepare(pcie->clk);
+		return PTR_ERR(pcie->rescal);
+	}
+	pcie->perst_reset = devm_reset_control_get_optional_exclusive(&pdev->dev, "perst");
+	if (IS_ERR(pcie->perst_reset)) {
+		clk_disable_unprepare(pcie->clk);
+		return PTR_ERR(pcie->perst_reset);
+	}
+
+	ret = reset_control_reset(pcie->rescal);
+	if (ret)
+		dev_err(&pdev->dev, "failed to deassert 'rescal'\n");
+
+	ret = brcm_phy_start(pcie);
+	if (ret) {
+		reset_control_rearm(pcie->rescal);
+		clk_disable_unprepare(pcie->clk);
+		return ret;
+	}
+
+	ret = brcm_pcie_setup(pcie);
+	if (ret)
+		goto fail;
+
+	pcie->hw_rev = readl(pcie->base + PCIE_MISC_REVISION);
+	if (pcie->type == BCM4908 && pcie->hw_rev >= BRCM_PCIE_HW_REV_3_20) {
+		dev_err(pcie->dev, "hardware revision with unsupported PERST# setup\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	msi_np = of_parse_phandle(pcie->np, "msi-parent", 0);
+	if (pci_msi_enabled() && msi_np == pcie->np) {
+		ret = brcm_pcie_enable_msi(pcie);
+		if (ret) {
+			dev_err(pcie->dev, "probe of internal MSI failed");
+			goto fail;
+		}
+	}
+
+	bridge->ops = pcie->type == BCM7425 ? &brcm7425_pcie_ops : &brcm_pcie_ops;
+	bridge->sysdata = pcie;
+
+	platform_set_drvdata(pdev, pcie);
+
+	ret = pci_host_probe(bridge);
+	if (!ret && !brcm_pcie_link_up(pcie))
+		ret = -ENODEV;
+
+	if (ret) {
+		brcm_pcie_remove(pdev);
+		return ret;
+	}
+
+	return 0;
+
+fail:
+	__brcm_pcie_remove(pcie);
+	return ret;
+}
+
+MODULE_DEVICE_TABLE(of, brcm_pcie_match);
+
+static const struct dev_pm_ops brcm_pcie_pm_ops = {
+	.suspend_noirq = brcm_pcie_suspend_noirq,
+	.resume_noirq = brcm_pcie_resume_noirq,
+};
+
+static struct platform_driver brcm_pcie_driver = {
+	.probe = brcm_pcie_probe,
+	.remove_new = brcm_pcie_remove,
+	.driver = {
+		.name = "brcm-pcie",
+		.of_match_table = brcm_pcie_match,
+		.pm = &brcm_pcie_pm_ops,
+	},
+};
+module_platform_driver(brcm_pcie_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcom STB PCIe RC driver");
+MODULE_AUTHOR("Broadcom");
diff --git a/drivers/pci/controller/pcie-hisi-error.c b/drivers/pci/controller/pcie-hisi-error.c
new file mode 100644
index 000000000..ad9d5ffcd
--- /dev/null
+++ b/drivers/pci/controller/pcie-hisi-error.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for handling the PCIe controller errors on
+ * HiSilicon HIP SoCs.
+ *
+ * Copyright (c) 2020 HiSilicon Limited.
+ */
+
+#include <linux/acpi.h>
+#include <acpi/ghes.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+
+/* HISI PCIe controller error definitions */
+#define HISI_PCIE_ERR_MISC_REGS	33
+
+#define HISI_PCIE_LOCAL_VALID_VERSION		BIT(0)
+#define HISI_PCIE_LOCAL_VALID_SOC_ID		BIT(1)
+#define HISI_PCIE_LOCAL_VALID_SOCKET_ID		BIT(2)
+#define HISI_PCIE_LOCAL_VALID_NIMBUS_ID		BIT(3)
+#define HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID	BIT(4)
+#define HISI_PCIE_LOCAL_VALID_CORE_ID		BIT(5)
+#define HISI_PCIE_LOCAL_VALID_PORT_ID		BIT(6)
+#define HISI_PCIE_LOCAL_VALID_ERR_TYPE		BIT(7)
+#define HISI_PCIE_LOCAL_VALID_ERR_SEVERITY	BIT(8)
+#define HISI_PCIE_LOCAL_VALID_ERR_MISC		9
+
+static guid_t hisi_pcie_sec_guid =
+	GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D,
+		  0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72);
+
+/*
+ * Firmware reports the socket port ID where the error occurred.  These
+ * macros convert that to the core ID and core port ID required by the
+ * ACPI reset method.
+ */
+#define HISI_PCIE_PORT_ID(core, v)       (((v) >> 1) + ((core) << 3))
+#define HISI_PCIE_CORE_ID(v)             ((v) >> 3)
+#define HISI_PCIE_CORE_PORT_ID(v)        (((v) & 7) << 1)
+
+struct hisi_pcie_error_data {
+	u64	val_bits;
+	u8	version;
+	u8	soc_id;
+	u8	socket_id;
+	u8	nimbus_id;
+	u8	sub_module_id;
+	u8	core_id;
+	u8	port_id;
+	u8	err_severity;
+	u16	err_type;
+	u8	reserv[2];
+	u32	err_misc[HISI_PCIE_ERR_MISC_REGS];
+};
+
+struct hisi_pcie_error_private {
+	struct notifier_block	nb;
+	struct device *dev;
+};
+
+enum hisi_pcie_submodule_id {
+	HISI_PCIE_SUB_MODULE_ID_AP,
+	HISI_PCIE_SUB_MODULE_ID_TL,
+	HISI_PCIE_SUB_MODULE_ID_MAC,
+	HISI_PCIE_SUB_MODULE_ID_DL,
+	HISI_PCIE_SUB_MODULE_ID_SDI,
+};
+
+static const char * const hisi_pcie_sub_module[] = {
+	[HISI_PCIE_SUB_MODULE_ID_AP]	= "AP Layer",
+	[HISI_PCIE_SUB_MODULE_ID_TL]	= "TL Layer",
+	[HISI_PCIE_SUB_MODULE_ID_MAC]	= "MAC Layer",
+	[HISI_PCIE_SUB_MODULE_ID_DL]	= "DL Layer",
+	[HISI_PCIE_SUB_MODULE_ID_SDI]	= "SDI Layer",
+};
+
+enum hisi_pcie_err_severity {
+	HISI_PCIE_ERR_SEV_RECOVERABLE,
+	HISI_PCIE_ERR_SEV_FATAL,
+	HISI_PCIE_ERR_SEV_CORRECTED,
+	HISI_PCIE_ERR_SEV_NONE,
+};
+
+static const char * const hisi_pcie_error_sev[] = {
+	[HISI_PCIE_ERR_SEV_RECOVERABLE]	= "recoverable",
+	[HISI_PCIE_ERR_SEV_FATAL]	= "fatal",
+	[HISI_PCIE_ERR_SEV_CORRECTED]	= "corrected",
+	[HISI_PCIE_ERR_SEV_NONE]	= "none",
+};
+
+static const char *hisi_pcie_get_string(const char * const *array,
+					size_t n, u32 id)
+{
+	u32 index;
+
+	for (index = 0; index < n; index++) {
+		if (index == id && array[index])
+			return array[index];
+	}
+
+	return "unknown";
+}
+
+static int hisi_pcie_port_reset(struct platform_device *pdev,
+				u32 chip_id, u32 port_id)
+{
+	struct device *dev = &pdev->dev;
+	acpi_handle handle = ACPI_HANDLE(dev);
+	union acpi_object arg[3];
+	struct acpi_object_list arg_list;
+	acpi_status s;
+	unsigned long long data = 0;
+
+	arg[0].type = ACPI_TYPE_INTEGER;
+	arg[0].integer.value = chip_id;
+	arg[1].type = ACPI_TYPE_INTEGER;
+	arg[1].integer.value = HISI_PCIE_CORE_ID(port_id);
+	arg[2].type = ACPI_TYPE_INTEGER;
+	arg[2].integer.value = HISI_PCIE_CORE_PORT_ID(port_id);
+
+	arg_list.count = 3;
+	arg_list.pointer = arg;
+
+	s = acpi_evaluate_integer(handle, "RST", &arg_list, &data);
+	if (ACPI_FAILURE(s)) {
+		dev_err(dev, "No RST method\n");
+		return -EIO;
+	}
+
+	if (data) {
+		dev_err(dev, "Failed to Reset\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int hisi_pcie_port_do_recovery(struct platform_device *dev,
+				      u32 chip_id, u32 port_id)
+{
+	acpi_status s;
+	struct device *device = &dev->dev;
+	acpi_handle root_handle = ACPI_HANDLE(device);
+	struct acpi_pci_root *pci_root;
+	struct pci_bus *root_bus;
+	struct pci_dev *pdev;
+	u32 domain, busnr, devfn;
+
+	s = acpi_get_parent(root_handle, &root_handle);
+	if (ACPI_FAILURE(s))
+		return -ENODEV;
+	pci_root = acpi_pci_find_root(root_handle);
+	if (!pci_root)
+		return -ENODEV;
+	root_bus = pci_root->bus;
+	domain = pci_root->segment;
+
+	busnr = root_bus->number;
+	devfn = PCI_DEVFN(port_id, 0);
+	pdev = pci_get_domain_bus_and_slot(domain, busnr, devfn);
+	if (!pdev) {
+		dev_info(device, "Fail to get root port %04x:%02x:%02x.%d device\n",
+			 domain, busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
+		return -ENODEV;
+	}
+
+	pci_stop_and_remove_bus_device_locked(pdev);
+	pci_dev_put(pdev);
+
+	if (hisi_pcie_port_reset(dev, chip_id, port_id))
+		return -EIO;
+
+	/*
+	 * The initialization time of subordinate devices after
+	 * hot reset is no more than 1s, which is required by
+	 * the PCI spec v5.0 sec 6.6.1. The time will shorten
+	 * if Readiness Notifications mechanisms are used. But
+	 * wait 1s here to adapt any conditions.
+	 */
+	ssleep(1UL);
+
+	/* add root port and downstream devices */
+	pci_lock_rescan_remove();
+	pci_rescan_bus(root_bus);
+	pci_unlock_rescan_remove();
+
+	return 0;
+}
+
+static void hisi_pcie_handle_error(struct platform_device *pdev,
+				   const struct hisi_pcie_error_data *edata)
+{
+	struct device *dev = &pdev->dev;
+	int idx, rc;
+	const unsigned long valid_bits[] = {BITMAP_FROM_U64(edata->val_bits)};
+
+	if (edata->val_bits == 0) {
+		dev_warn(dev, "%s: no valid error information\n", __func__);
+		return;
+	}
+
+	dev_info(dev, "\nHISI : HIP : PCIe controller error\n");
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOC_ID)
+		dev_info(dev, "Table version = %d\n", edata->version);
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOCKET_ID)
+		dev_info(dev, "Socket ID = %d\n", edata->socket_id);
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_NIMBUS_ID)
+		dev_info(dev, "Nimbus ID = %d\n", edata->nimbus_id);
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID)
+		dev_info(dev, "Sub Module = %s\n",
+			 hisi_pcie_get_string(hisi_pcie_sub_module,
+					      ARRAY_SIZE(hisi_pcie_sub_module),
+					      edata->sub_module_id));
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_CORE_ID)
+		dev_info(dev, "Core ID = core%d\n", edata->core_id);
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_PORT_ID)
+		dev_info(dev, "Port ID = port%d\n", edata->port_id);
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_SEVERITY)
+		dev_info(dev, "Error severity = %s\n",
+			 hisi_pcie_get_string(hisi_pcie_error_sev,
+					      ARRAY_SIZE(hisi_pcie_error_sev),
+					      edata->err_severity));
+	if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_TYPE)
+		dev_info(dev, "Error type = 0x%x\n", edata->err_type);
+
+	dev_info(dev, "Reg Dump:\n");
+	idx = HISI_PCIE_LOCAL_VALID_ERR_MISC;
+	for_each_set_bit_from(idx, valid_bits,
+			      HISI_PCIE_LOCAL_VALID_ERR_MISC + HISI_PCIE_ERR_MISC_REGS)
+		dev_info(dev, "ERR_MISC_%d = 0x%x\n", idx - HISI_PCIE_LOCAL_VALID_ERR_MISC,
+			 edata->err_misc[idx - HISI_PCIE_LOCAL_VALID_ERR_MISC]);
+
+	if (edata->err_severity != HISI_PCIE_ERR_SEV_RECOVERABLE)
+		return;
+
+	/* Recovery for the PCIe controller errors, try reset
+	 * PCI port for the error recovery
+	 */
+	rc = hisi_pcie_port_do_recovery(pdev, edata->socket_id,
+			HISI_PCIE_PORT_ID(edata->core_id, edata->port_id));
+	if (rc)
+		dev_info(dev, "fail to do hisi pcie port reset\n");
+}
+
+static int hisi_pcie_notify_error(struct notifier_block *nb,
+				  unsigned long event, void *data)
+{
+	struct acpi_hest_generic_data *gdata = data;
+	const struct hisi_pcie_error_data *error_data = acpi_hest_get_payload(gdata);
+	struct hisi_pcie_error_private *priv;
+	struct device *dev;
+	struct platform_device *pdev;
+	guid_t err_sec_guid;
+	u8 socket;
+
+	import_guid(&err_sec_guid, gdata->section_type);
+	if (!guid_equal(&err_sec_guid, &hisi_pcie_sec_guid))
+		return NOTIFY_DONE;
+
+	priv = container_of(nb, struct hisi_pcie_error_private, nb);
+	dev = priv->dev;
+
+	if (device_property_read_u8(dev, "socket", &socket))
+		return NOTIFY_DONE;
+
+	if (error_data->socket_id != socket)
+		return NOTIFY_DONE;
+
+	pdev = container_of(dev, struct platform_device, dev);
+	hisi_pcie_handle_error(pdev, error_data);
+
+	return NOTIFY_OK;
+}
+
+static int hisi_pcie_error_handler_probe(struct platform_device *pdev)
+{
+	struct hisi_pcie_error_private *priv;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->nb.notifier_call = hisi_pcie_notify_error;
+	priv->dev = &pdev->dev;
+	ret = ghes_register_vendor_record_notifier(&priv->nb);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to register hisi pcie controller error handler with apei\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	return 0;
+}
+
+static void hisi_pcie_error_handler_remove(struct platform_device *pdev)
+{
+	struct hisi_pcie_error_private *priv = platform_get_drvdata(pdev);
+
+	ghes_unregister_vendor_record_notifier(&priv->nb);
+}
+
+static const struct acpi_device_id hisi_pcie_acpi_match[] = {
+	{ "HISI0361", 0 },
+	{ }
+};
+
+static struct platform_driver hisi_pcie_error_handler_driver = {
+	.driver = {
+		.name	= "hisi-pcie-error-handler",
+		.acpi_match_table = hisi_pcie_acpi_match,
+	},
+	.probe		= hisi_pcie_error_handler_probe,
+	.remove_new	= hisi_pcie_error_handler_remove,
+};
+module_platform_driver(hisi_pcie_error_handler_driver);
+
+MODULE_DESCRIPTION("HiSilicon HIP PCIe controller error handling driver");
diff --git a/drivers/pci/controller/pcie-iproc-bcma.c b/drivers/pci/controller/pcie-iproc-bcma.c
new file mode 100644
index 000000000..99a999004
--- /dev/null
+++ b/drivers/pci/controller/pcie-iproc-bcma.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ * Copyright (C) 2015 Hauke Mehrtens <hauke@hauke-m.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/phy/phy.h>
+#include <linux/bcma/bcma.h>
+#include <linux/ioport.h>
+
+#include "pcie-iproc.h"
+
+
+/* NS: CLASS field is R/O, and set to wrong 0x200 value */
+static void bcma_pcie2_fixup_class(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8011, bcma_pcie2_fixup_class);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8012, bcma_pcie2_fixup_class);
+
+static int iproc_bcma_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct iproc_pcie *pcie = dev->sysdata;
+	struct bcma_device *bdev = container_of(pcie->dev, struct bcma_device, dev);
+
+	return bcma_core_irq(bdev, 5);
+}
+
+static int iproc_bcma_pcie_probe(struct bcma_device *bdev)
+{
+	struct device *dev = &bdev->dev;
+	struct iproc_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+
+	pcie->dev = dev;
+
+	pcie->type = IPROC_PCIE_PAXB_BCMA;
+	pcie->base = bdev->io_addr;
+	if (!pcie->base) {
+		dev_err(dev, "no controller registers\n");
+		return -ENOMEM;
+	}
+
+	pcie->base_addr = bdev->addr;
+
+	pcie->mem.start = bdev->addr_s[0];
+	pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
+	pcie->mem.name = "PCIe MEM space";
+	pcie->mem.flags = IORESOURCE_MEM;
+	pci_add_resource(&bridge->windows, &pcie->mem);
+	ret = devm_request_pci_bus_resources(dev, &bridge->windows);
+	if (ret)
+		return ret;
+
+	pcie->map_irq = iproc_bcma_pcie_map_irq;
+
+	bcma_set_drvdata(bdev, pcie);
+
+	return iproc_pcie_setup(pcie, &bridge->windows);
+}
+
+static void iproc_bcma_pcie_remove(struct bcma_device *bdev)
+{
+	struct iproc_pcie *pcie = bcma_get_drvdata(bdev);
+
+	iproc_pcie_remove(pcie);
+}
+
+static const struct bcma_device_id iproc_bcma_pcie_table[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_PCIEG2, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	{},
+};
+MODULE_DEVICE_TABLE(bcma, iproc_bcma_pcie_table);
+
+static struct bcma_driver iproc_bcma_pcie_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= iproc_bcma_pcie_table,
+	.probe		= iproc_bcma_pcie_probe,
+	.remove		= iproc_bcma_pcie_remove,
+};
+module_bcma_driver(iproc_bcma_pcie_driver);
+
+MODULE_AUTHOR("Hauke Mehrtens");
+MODULE_DESCRIPTION("Broadcom iProc PCIe BCMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c
new file mode 100644
index 000000000..649fcb449
--- /dev/null
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+
+#include "pcie-iproc.h"
+
+#define IPROC_MSI_INTR_EN_SHIFT        11
+#define IPROC_MSI_INTR_EN              BIT(IPROC_MSI_INTR_EN_SHIFT)
+#define IPROC_MSI_INT_N_EVENT_SHIFT    1
+#define IPROC_MSI_INT_N_EVENT          BIT(IPROC_MSI_INT_N_EVENT_SHIFT)
+#define IPROC_MSI_EQ_EN_SHIFT          0
+#define IPROC_MSI_EQ_EN                BIT(IPROC_MSI_EQ_EN_SHIFT)
+
+#define IPROC_MSI_EQ_MASK              0x3f
+
+/* Max number of GIC interrupts */
+#define NR_HW_IRQS                     6
+
+/* Number of entries in each event queue */
+#define EQ_LEN                         64
+
+/* Size of each event queue memory region */
+#define EQ_MEM_REGION_SIZE             SZ_4K
+
+/* Size of each MSI address region */
+#define MSI_MEM_REGION_SIZE            SZ_4K
+
+enum iproc_msi_reg {
+	IPROC_MSI_EQ_PAGE = 0,
+	IPROC_MSI_EQ_PAGE_UPPER,
+	IPROC_MSI_PAGE,
+	IPROC_MSI_PAGE_UPPER,
+	IPROC_MSI_CTRL,
+	IPROC_MSI_EQ_HEAD,
+	IPROC_MSI_EQ_TAIL,
+	IPROC_MSI_INTS_EN,
+	IPROC_MSI_REG_SIZE,
+};
+
+struct iproc_msi;
+
+/**
+ * struct iproc_msi_grp - iProc MSI group
+ *
+ * One MSI group is allocated per GIC interrupt, serviced by one iProc MSI
+ * event queue.
+ *
+ * @msi: pointer to iProc MSI data
+ * @gic_irq: GIC interrupt
+ * @eq: Event queue number
+ */
+struct iproc_msi_grp {
+	struct iproc_msi *msi;
+	int gic_irq;
+	unsigned int eq;
+};
+
+/**
+ * struct iproc_msi - iProc event queue based MSI
+ *
+ * Only meant to be used on platforms without MSI support integrated into the
+ * GIC.
+ *
+ * @pcie: pointer to iProc PCIe data
+ * @reg_offsets: MSI register offsets
+ * @grps: MSI groups
+ * @nr_irqs: number of total interrupts connected to GIC
+ * @nr_cpus: number of toal CPUs
+ * @has_inten_reg: indicates the MSI interrupt enable register needs to be
+ * set explicitly (required for some legacy platforms)
+ * @bitmap: MSI vector bitmap
+ * @bitmap_lock: lock to protect access to the MSI bitmap
+ * @nr_msi_vecs: total number of MSI vectors
+ * @inner_domain: inner IRQ domain
+ * @msi_domain: MSI IRQ domain
+ * @nr_eq_region: required number of 4K aligned memory region for MSI event
+ * queues
+ * @nr_msi_region: required number of 4K aligned address region for MSI posted
+ * writes
+ * @eq_cpu: pointer to allocated memory region for MSI event queues
+ * @eq_dma: DMA address of MSI event queues
+ * @msi_addr: MSI address
+ */
+struct iproc_msi {
+	struct iproc_pcie *pcie;
+	const u16 (*reg_offsets)[IPROC_MSI_REG_SIZE];
+	struct iproc_msi_grp *grps;
+	int nr_irqs;
+	int nr_cpus;
+	bool has_inten_reg;
+	unsigned long *bitmap;
+	struct mutex bitmap_lock;
+	unsigned int nr_msi_vecs;
+	struct irq_domain *inner_domain;
+	struct irq_domain *msi_domain;
+	unsigned int nr_eq_region;
+	unsigned int nr_msi_region;
+	void *eq_cpu;
+	dma_addr_t eq_dma;
+	phys_addr_t msi_addr;
+};
+
+static const u16 iproc_msi_reg_paxb[NR_HW_IRQS][IPROC_MSI_REG_SIZE] = {
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x210, 0x250, 0x254, 0x208 },
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x214, 0x258, 0x25c, 0x208 },
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x218, 0x260, 0x264, 0x208 },
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x21c, 0x268, 0x26c, 0x208 },
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x220, 0x270, 0x274, 0x208 },
+	{ 0x200, 0x2c0, 0x204, 0x2c4, 0x224, 0x278, 0x27c, 0x208 },
+};
+
+static const u16 iproc_msi_reg_paxc[NR_HW_IRQS][IPROC_MSI_REG_SIZE] = {
+	{ 0xc00, 0xc04, 0xc08, 0xc0c, 0xc40, 0xc50, 0xc60 },
+	{ 0xc10, 0xc14, 0xc18, 0xc1c, 0xc44, 0xc54, 0xc64 },
+	{ 0xc20, 0xc24, 0xc28, 0xc2c, 0xc48, 0xc58, 0xc68 },
+	{ 0xc30, 0xc34, 0xc38, 0xc3c, 0xc4c, 0xc5c, 0xc6c },
+};
+
+static inline u32 iproc_msi_read_reg(struct iproc_msi *msi,
+				     enum iproc_msi_reg reg,
+				     unsigned int eq)
+{
+	struct iproc_pcie *pcie = msi->pcie;
+
+	return readl_relaxed(pcie->base + msi->reg_offsets[eq][reg]);
+}
+
+static inline void iproc_msi_write_reg(struct iproc_msi *msi,
+				       enum iproc_msi_reg reg,
+				       int eq, u32 val)
+{
+	struct iproc_pcie *pcie = msi->pcie;
+
+	writel_relaxed(val, pcie->base + msi->reg_offsets[eq][reg]);
+}
+
+static inline u32 hwirq_to_group(struct iproc_msi *msi, unsigned long hwirq)
+{
+	return (hwirq % msi->nr_irqs);
+}
+
+static inline unsigned int iproc_msi_addr_offset(struct iproc_msi *msi,
+						 unsigned long hwirq)
+{
+	if (msi->nr_msi_region > 1)
+		return hwirq_to_group(msi, hwirq) * MSI_MEM_REGION_SIZE;
+	else
+		return hwirq_to_group(msi, hwirq) * sizeof(u32);
+}
+
+static inline unsigned int iproc_msi_eq_offset(struct iproc_msi *msi, u32 eq)
+{
+	if (msi->nr_eq_region > 1)
+		return eq * EQ_MEM_REGION_SIZE;
+	else
+		return eq * EQ_LEN * sizeof(u32);
+}
+
+static struct irq_chip iproc_msi_irq_chip = {
+	.name = "iProc-MSI",
+};
+
+static struct msi_domain_info iproc_msi_domain_info = {
+	.flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		MSI_FLAG_PCI_MSIX,
+	.chip = &iproc_msi_irq_chip,
+};
+
+/*
+ * In iProc PCIe core, each MSI group is serviced by a GIC interrupt and a
+ * dedicated event queue.  Each MSI group can support up to 64 MSI vectors.
+ *
+ * The number of MSI groups varies between different iProc SoCs.  The total
+ * number of CPU cores also varies.  To support MSI IRQ affinity, we
+ * distribute GIC interrupts across all available CPUs.  MSI vector is moved
+ * from one GIC interrupt to another to steer to the target CPU.
+ *
+ * Assuming:
+ * - the number of MSI groups is M
+ * - the number of CPU cores is N
+ * - M is always a multiple of N
+ *
+ * Total number of raw MSI vectors = M * 64
+ * Total number of supported MSI vectors = (M * 64) / N
+ */
+static inline int hwirq_to_cpu(struct iproc_msi *msi, unsigned long hwirq)
+{
+	return (hwirq % msi->nr_cpus);
+}
+
+static inline unsigned long hwirq_to_canonical_hwirq(struct iproc_msi *msi,
+						     unsigned long hwirq)
+{
+	return (hwirq - hwirq_to_cpu(msi, hwirq));
+}
+
+static int iproc_msi_irq_set_affinity(struct irq_data *data,
+				      const struct cpumask *mask, bool force)
+{
+	struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
+	int target_cpu = cpumask_first(mask);
+	int curr_cpu;
+	int ret;
+
+	curr_cpu = hwirq_to_cpu(msi, data->hwirq);
+	if (curr_cpu == target_cpu)
+		ret = IRQ_SET_MASK_OK_DONE;
+	else {
+		/* steer MSI to the target CPU */
+		data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu;
+		ret = IRQ_SET_MASK_OK;
+	}
+
+	irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
+
+	return ret;
+}
+
+static void iproc_msi_irq_compose_msi_msg(struct irq_data *data,
+					  struct msi_msg *msg)
+{
+	struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
+	dma_addr_t addr;
+
+	addr = msi->msi_addr + iproc_msi_addr_offset(msi, data->hwirq);
+	msg->address_lo = lower_32_bits(addr);
+	msg->address_hi = upper_32_bits(addr);
+	msg->data = data->hwirq << 5;
+}
+
+static struct irq_chip iproc_msi_bottom_irq_chip = {
+	.name = "MSI",
+	.irq_set_affinity = iproc_msi_irq_set_affinity,
+	.irq_compose_msi_msg = iproc_msi_irq_compose_msi_msg,
+};
+
+static int iproc_msi_irq_domain_alloc(struct irq_domain *domain,
+				      unsigned int virq, unsigned int nr_irqs,
+				      void *args)
+{
+	struct iproc_msi *msi = domain->host_data;
+	int hwirq, i;
+
+	if (msi->nr_cpus > 1 && nr_irqs > 1)
+		return -EINVAL;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	/*
+	 * Allocate 'nr_irqs' multiplied by 'nr_cpus' number of MSI vectors
+	 * each time
+	 */
+	hwirq = bitmap_find_free_region(msi->bitmap, msi->nr_msi_vecs,
+					order_base_2(msi->nr_cpus * nr_irqs));
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &iproc_msi_bottom_irq_chip,
+				    domain->host_data, handle_simple_irq,
+				    NULL, NULL);
+	}
+
+	return 0;
+}
+
+static void iproc_msi_irq_domain_free(struct irq_domain *domain,
+				      unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *data = irq_domain_get_irq_data(domain, virq);
+	struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
+	unsigned int hwirq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq);
+	bitmap_release_region(msi->bitmap, hwirq,
+			      order_base_2(msi->nr_cpus * nr_irqs));
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc = iproc_msi_irq_domain_alloc,
+	.free = iproc_msi_irq_domain_free,
+};
+
+static inline u32 decode_msi_hwirq(struct iproc_msi *msi, u32 eq, u32 head)
+{
+	u32 __iomem *msg;
+	u32 hwirq;
+	unsigned int offs;
+
+	offs = iproc_msi_eq_offset(msi, eq) + head * sizeof(u32);
+	msg = (u32 __iomem *)(msi->eq_cpu + offs);
+	hwirq = readl(msg);
+	hwirq = (hwirq >> 5) + (hwirq & 0x1f);
+
+	/*
+	 * Since we have multiple hwirq mapped to a single MSI vector,
+	 * now we need to derive the hwirq at CPU0.  It can then be used to
+	 * mapped back to virq.
+	 */
+	return hwirq_to_canonical_hwirq(msi, hwirq);
+}
+
+static void iproc_msi_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct iproc_msi_grp *grp;
+	struct iproc_msi *msi;
+	u32 eq, head, tail, nr_events;
+	unsigned long hwirq;
+
+	chained_irq_enter(chip, desc);
+
+	grp = irq_desc_get_handler_data(desc);
+	msi = grp->msi;
+	eq = grp->eq;
+
+	/*
+	 * iProc MSI event queue is tracked by head and tail pointers.  Head
+	 * pointer indicates the next entry (MSI data) to be consumed by SW in
+	 * the queue and needs to be updated by SW.  iProc MSI core uses the
+	 * tail pointer as the next data insertion point.
+	 *
+	 * Entries between head and tail pointers contain valid MSI data.  MSI
+	 * data is guaranteed to be in the event queue memory before the tail
+	 * pointer is updated by the iProc MSI core.
+	 */
+	head = iproc_msi_read_reg(msi, IPROC_MSI_EQ_HEAD,
+				  eq) & IPROC_MSI_EQ_MASK;
+	do {
+		tail = iproc_msi_read_reg(msi, IPROC_MSI_EQ_TAIL,
+					  eq) & IPROC_MSI_EQ_MASK;
+
+		/*
+		 * Figure out total number of events (MSI data) to be
+		 * processed.
+		 */
+		nr_events = (tail < head) ?
+			(EQ_LEN - (head - tail)) : (tail - head);
+		if (!nr_events)
+			break;
+
+		/* process all outstanding events */
+		while (nr_events--) {
+			hwirq = decode_msi_hwirq(msi, eq, head);
+			generic_handle_domain_irq(msi->inner_domain, hwirq);
+
+			head++;
+			head %= EQ_LEN;
+		}
+
+		/*
+		 * Now all outstanding events have been processed.  Update the
+		 * head pointer.
+		 */
+		iproc_msi_write_reg(msi, IPROC_MSI_EQ_HEAD, eq, head);
+
+		/*
+		 * Now go read the tail pointer again to see if there are new
+		 * outstanding events that came in during the above window.
+		 */
+	} while (true);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void iproc_msi_enable(struct iproc_msi *msi)
+{
+	int i, eq;
+	u32 val;
+
+	/* Program memory region for each event queue */
+	for (i = 0; i < msi->nr_eq_region; i++) {
+		dma_addr_t addr = msi->eq_dma + (i * EQ_MEM_REGION_SIZE);
+
+		iproc_msi_write_reg(msi, IPROC_MSI_EQ_PAGE, i,
+				    lower_32_bits(addr));
+		iproc_msi_write_reg(msi, IPROC_MSI_EQ_PAGE_UPPER, i,
+				    upper_32_bits(addr));
+	}
+
+	/* Program address region for MSI posted writes */
+	for (i = 0; i < msi->nr_msi_region; i++) {
+		phys_addr_t addr = msi->msi_addr + (i * MSI_MEM_REGION_SIZE);
+
+		iproc_msi_write_reg(msi, IPROC_MSI_PAGE, i,
+				    lower_32_bits(addr));
+		iproc_msi_write_reg(msi, IPROC_MSI_PAGE_UPPER, i,
+				    upper_32_bits(addr));
+	}
+
+	for (eq = 0; eq < msi->nr_irqs; eq++) {
+		/* Enable MSI event queue */
+		val = IPROC_MSI_INTR_EN | IPROC_MSI_INT_N_EVENT |
+			IPROC_MSI_EQ_EN;
+		iproc_msi_write_reg(msi, IPROC_MSI_CTRL, eq, val);
+
+		/*
+		 * Some legacy platforms require the MSI interrupt enable
+		 * register to be set explicitly.
+		 */
+		if (msi->has_inten_reg) {
+			val = iproc_msi_read_reg(msi, IPROC_MSI_INTS_EN, eq);
+			val |= BIT(eq);
+			iproc_msi_write_reg(msi, IPROC_MSI_INTS_EN, eq, val);
+		}
+	}
+}
+
+static void iproc_msi_disable(struct iproc_msi *msi)
+{
+	u32 eq, val;
+
+	for (eq = 0; eq < msi->nr_irqs; eq++) {
+		if (msi->has_inten_reg) {
+			val = iproc_msi_read_reg(msi, IPROC_MSI_INTS_EN, eq);
+			val &= ~BIT(eq);
+			iproc_msi_write_reg(msi, IPROC_MSI_INTS_EN, eq, val);
+		}
+
+		val = iproc_msi_read_reg(msi, IPROC_MSI_CTRL, eq);
+		val &= ~(IPROC_MSI_INTR_EN | IPROC_MSI_INT_N_EVENT |
+			 IPROC_MSI_EQ_EN);
+		iproc_msi_write_reg(msi, IPROC_MSI_CTRL, eq, val);
+	}
+}
+
+static int iproc_msi_alloc_domains(struct device_node *node,
+				   struct iproc_msi *msi)
+{
+	msi->inner_domain = irq_domain_add_linear(NULL, msi->nr_msi_vecs,
+						  &msi_domain_ops, msi);
+	if (!msi->inner_domain)
+		return -ENOMEM;
+
+	msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+						    &iproc_msi_domain_info,
+						    msi->inner_domain);
+	if (!msi->msi_domain) {
+		irq_domain_remove(msi->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void iproc_msi_free_domains(struct iproc_msi *msi)
+{
+	if (msi->msi_domain)
+		irq_domain_remove(msi->msi_domain);
+
+	if (msi->inner_domain)
+		irq_domain_remove(msi->inner_domain);
+}
+
+static void iproc_msi_irq_free(struct iproc_msi *msi, unsigned int cpu)
+{
+	int i;
+
+	for (i = cpu; i < msi->nr_irqs; i += msi->nr_cpus) {
+		irq_set_chained_handler_and_data(msi->grps[i].gic_irq,
+						 NULL, NULL);
+	}
+}
+
+static int iproc_msi_irq_setup(struct iproc_msi *msi, unsigned int cpu)
+{
+	int i, ret;
+	cpumask_var_t mask;
+	struct iproc_pcie *pcie = msi->pcie;
+
+	for (i = cpu; i < msi->nr_irqs; i += msi->nr_cpus) {
+		irq_set_chained_handler_and_data(msi->grps[i].gic_irq,
+						 iproc_msi_handler,
+						 &msi->grps[i]);
+		/* Dedicate GIC interrupt to each CPU core */
+		if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
+			cpumask_clear(mask);
+			cpumask_set_cpu(cpu, mask);
+			ret = irq_set_affinity(msi->grps[i].gic_irq, mask);
+			if (ret)
+				dev_err(pcie->dev,
+					"failed to set affinity for IRQ%d\n",
+					msi->grps[i].gic_irq);
+			free_cpumask_var(mask);
+		} else {
+			dev_err(pcie->dev, "failed to alloc CPU mask\n");
+			ret = -EINVAL;
+		}
+
+		if (ret) {
+			/* Free all configured/unconfigured IRQs */
+			iproc_msi_irq_free(msi, cpu);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node)
+{
+	struct iproc_msi *msi;
+	int i, ret;
+	unsigned int cpu;
+
+	if (!of_device_is_compatible(node, "brcm,iproc-msi"))
+		return -ENODEV;
+
+	if (!of_property_read_bool(node, "msi-controller"))
+		return -ENODEV;
+
+	if (pcie->msi)
+		return -EBUSY;
+
+	msi = devm_kzalloc(pcie->dev, sizeof(*msi), GFP_KERNEL);
+	if (!msi)
+		return -ENOMEM;
+
+	msi->pcie = pcie;
+	pcie->msi = msi;
+	msi->msi_addr = pcie->base_addr;
+	mutex_init(&msi->bitmap_lock);
+	msi->nr_cpus = num_possible_cpus();
+
+	if (msi->nr_cpus == 1)
+		iproc_msi_domain_info.flags |=  MSI_FLAG_MULTI_PCI_MSI;
+
+	msi->nr_irqs = of_irq_count(node);
+	if (!msi->nr_irqs) {
+		dev_err(pcie->dev, "found no MSI GIC interrupt\n");
+		return -ENODEV;
+	}
+
+	if (msi->nr_irqs > NR_HW_IRQS) {
+		dev_warn(pcie->dev, "too many MSI GIC interrupts defined %d\n",
+			 msi->nr_irqs);
+		msi->nr_irqs = NR_HW_IRQS;
+	}
+
+	if (msi->nr_irqs < msi->nr_cpus) {
+		dev_err(pcie->dev,
+			"not enough GIC interrupts for MSI affinity\n");
+		return -EINVAL;
+	}
+
+	if (msi->nr_irqs % msi->nr_cpus != 0) {
+		msi->nr_irqs -= msi->nr_irqs % msi->nr_cpus;
+		dev_warn(pcie->dev, "Reducing number of interrupts to %d\n",
+			 msi->nr_irqs);
+	}
+
+	switch (pcie->type) {
+	case IPROC_PCIE_PAXB_BCMA:
+	case IPROC_PCIE_PAXB:
+		msi->reg_offsets = iproc_msi_reg_paxb;
+		msi->nr_eq_region = 1;
+		msi->nr_msi_region = 1;
+		break;
+	case IPROC_PCIE_PAXC:
+		msi->reg_offsets = iproc_msi_reg_paxc;
+		msi->nr_eq_region = msi->nr_irqs;
+		msi->nr_msi_region = msi->nr_irqs;
+		break;
+	default:
+		dev_err(pcie->dev, "incompatible iProc PCIe interface\n");
+		return -EINVAL;
+	}
+
+	msi->has_inten_reg = of_property_read_bool(node, "brcm,pcie-msi-inten");
+
+	msi->nr_msi_vecs = msi->nr_irqs * EQ_LEN;
+	msi->bitmap = devm_bitmap_zalloc(pcie->dev, msi->nr_msi_vecs,
+					 GFP_KERNEL);
+	if (!msi->bitmap)
+		return -ENOMEM;
+
+	msi->grps = devm_kcalloc(pcie->dev, msi->nr_irqs, sizeof(*msi->grps),
+				 GFP_KERNEL);
+	if (!msi->grps)
+		return -ENOMEM;
+
+	for (i = 0; i < msi->nr_irqs; i++) {
+		unsigned int irq = irq_of_parse_and_map(node, i);
+
+		if (!irq) {
+			dev_err(pcie->dev, "unable to parse/map interrupt\n");
+			ret = -ENODEV;
+			goto free_irqs;
+		}
+		msi->grps[i].gic_irq = irq;
+		msi->grps[i].msi = msi;
+		msi->grps[i].eq = i;
+	}
+
+	/* Reserve memory for event queue and make sure memories are zeroed */
+	msi->eq_cpu = dma_alloc_coherent(pcie->dev,
+					 msi->nr_eq_region * EQ_MEM_REGION_SIZE,
+					 &msi->eq_dma, GFP_KERNEL);
+	if (!msi->eq_cpu) {
+		ret = -ENOMEM;
+		goto free_irqs;
+	}
+
+	ret = iproc_msi_alloc_domains(node, msi);
+	if (ret) {
+		dev_err(pcie->dev, "failed to create MSI domains\n");
+		goto free_eq_dma;
+	}
+
+	for_each_online_cpu(cpu) {
+		ret = iproc_msi_irq_setup(msi, cpu);
+		if (ret)
+			goto free_msi_irq;
+	}
+
+	iproc_msi_enable(msi);
+
+	return 0;
+
+free_msi_irq:
+	for_each_online_cpu(cpu)
+		iproc_msi_irq_free(msi, cpu);
+	iproc_msi_free_domains(msi);
+
+free_eq_dma:
+	dma_free_coherent(pcie->dev, msi->nr_eq_region * EQ_MEM_REGION_SIZE,
+			  msi->eq_cpu, msi->eq_dma);
+
+free_irqs:
+	for (i = 0; i < msi->nr_irqs; i++) {
+		if (msi->grps[i].gic_irq)
+			irq_dispose_mapping(msi->grps[i].gic_irq);
+	}
+	pcie->msi = NULL;
+	return ret;
+}
+EXPORT_SYMBOL(iproc_msi_init);
+
+void iproc_msi_exit(struct iproc_pcie *pcie)
+{
+	struct iproc_msi *msi = pcie->msi;
+	unsigned int i, cpu;
+
+	if (!msi)
+		return;
+
+	iproc_msi_disable(msi);
+
+	for_each_online_cpu(cpu)
+		iproc_msi_irq_free(msi, cpu);
+
+	iproc_msi_free_domains(msi);
+
+	dma_free_coherent(pcie->dev, msi->nr_eq_region * EQ_MEM_REGION_SIZE,
+			  msi->eq_cpu, msi->eq_dma);
+
+	for (i = 0; i < msi->nr_irqs; i++) {
+		if (msi->grps[i].gic_irq)
+			irq_dispose_mapping(msi->grps[i].gic_irq);
+	}
+}
+EXPORT_SYMBOL(iproc_msi_exit);
diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c
new file mode 100644
index 000000000..acdc583d2
--- /dev/null
+++ b/drivers/pci/controller/pcie-iproc-platform.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#include "../pci.h"
+#include "pcie-iproc.h"
+
+static const struct of_device_id iproc_pcie_of_match_table[] = {
+	{
+		.compatible = "brcm,iproc-pcie",
+		.data = (int *)IPROC_PCIE_PAXB,
+	}, {
+		.compatible = "brcm,iproc-pcie-paxb-v2",
+		.data = (int *)IPROC_PCIE_PAXB_V2,
+	}, {
+		.compatible = "brcm,iproc-pcie-paxc",
+		.data = (int *)IPROC_PCIE_PAXC,
+	}, {
+		.compatible = "brcm,iproc-pcie-paxc-v2",
+		.data = (int *)IPROC_PCIE_PAXC_V2,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, iproc_pcie_of_match_table);
+
+static int iproc_pltfm_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct iproc_pcie *pcie;
+	struct device_node *np = dev->of_node;
+	struct resource reg;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+
+	pcie->dev = dev;
+	pcie->type = (enum iproc_pcie_type) of_device_get_match_data(dev);
+
+	ret = of_address_to_resource(np, 0, &reg);
+	if (ret < 0) {
+		dev_err(dev, "unable to obtain controller resources\n");
+		return ret;
+	}
+
+	pcie->base = devm_pci_remap_cfgspace(dev, reg.start,
+					     resource_size(&reg));
+	if (!pcie->base) {
+		dev_err(dev, "unable to map controller registers\n");
+		return -ENOMEM;
+	}
+	pcie->base_addr = reg.start;
+
+	if (of_property_read_bool(np, "brcm,pcie-ob")) {
+		u32 val;
+
+		ret = of_property_read_u32(np, "brcm,pcie-ob-axi-offset",
+					   &val);
+		if (ret) {
+			dev_err(dev,
+				"missing brcm,pcie-ob-axi-offset property\n");
+			return ret;
+		}
+		pcie->ob.axi_offset = val;
+		pcie->need_ob_cfg = true;
+	}
+
+	/*
+	 * DT nodes are not used by all platforms that use the iProc PCIe
+	 * core driver. For platforms that require explicit inbound mapping
+	 * configuration, "dma-ranges" would have been present in DT
+	 */
+	pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges");
+
+	/* PHY use is optional */
+	pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+	if (IS_ERR(pcie->phy))
+		return PTR_ERR(pcie->phy);
+
+	/* PAXC doesn't support legacy IRQs, skip mapping */
+	switch (pcie->type) {
+	case IPROC_PCIE_PAXC:
+	case IPROC_PCIE_PAXC_V2:
+		pcie->map_irq = NULL;
+		break;
+	default:
+		break;
+	}
+
+	ret = iproc_pcie_setup(pcie, &bridge->windows);
+	if (ret) {
+		dev_err(dev, "PCIe controller setup failed\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, pcie);
+	return 0;
+}
+
+static void iproc_pltfm_pcie_remove(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
+
+	iproc_pcie_remove(pcie);
+}
+
+static void iproc_pltfm_pcie_shutdown(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
+
+	iproc_pcie_shutdown(pcie);
+}
+
+static struct platform_driver iproc_pltfm_pcie_driver = {
+	.driver = {
+		.name = "iproc-pcie",
+		.of_match_table = of_match_ptr(iproc_pcie_of_match_table),
+	},
+	.probe = iproc_pltfm_pcie_probe,
+	.remove_new = iproc_pltfm_pcie_remove,
+	.shutdown = iproc_pltfm_pcie_shutdown,
+};
+module_platform_driver(iproc_pltfm_pcie_driver);
+
+MODULE_AUTHOR("Ray Jui <rjui@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom iPROC PCIe platform driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c
new file mode 100644
index 000000000..bd1c98b68
--- /dev/null
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -0,0 +1,1598 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2014 Hauke Mehrtens <hauke@hauke-m.de>
+ * Copyright (C) 2015 Broadcom Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/msi.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/mbus.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#include "pcie-iproc.h"
+
+#define EP_PERST_SOURCE_SELECT_SHIFT	2
+#define EP_PERST_SOURCE_SELECT		BIT(EP_PERST_SOURCE_SELECT_SHIFT)
+#define EP_MODE_SURVIVE_PERST_SHIFT	1
+#define EP_MODE_SURVIVE_PERST		BIT(EP_MODE_SURVIVE_PERST_SHIFT)
+#define RC_PCIE_RST_OUTPUT_SHIFT	0
+#define RC_PCIE_RST_OUTPUT		BIT(RC_PCIE_RST_OUTPUT_SHIFT)
+#define PAXC_RESET_MASK			0x7f
+
+#define GIC_V3_CFG_SHIFT		0
+#define GIC_V3_CFG			BIT(GIC_V3_CFG_SHIFT)
+
+#define MSI_ENABLE_CFG_SHIFT		0
+#define MSI_ENABLE_CFG			BIT(MSI_ENABLE_CFG_SHIFT)
+
+#define CFG_IND_ADDR_MASK		0x00001ffc
+
+#define CFG_ADDR_REG_NUM_MASK		0x00000ffc
+#define CFG_ADDR_CFG_TYPE_1		1
+
+#define SYS_RC_INTX_MASK		0xf
+
+#define PCIE_PHYLINKUP_SHIFT		3
+#define PCIE_PHYLINKUP			BIT(PCIE_PHYLINKUP_SHIFT)
+#define PCIE_DL_ACTIVE_SHIFT		2
+#define PCIE_DL_ACTIVE			BIT(PCIE_DL_ACTIVE_SHIFT)
+
+#define APB_ERR_EN_SHIFT		0
+#define APB_ERR_EN			BIT(APB_ERR_EN_SHIFT)
+
+#define CFG_RD_SUCCESS			0
+#define CFG_RD_UR			1
+#define CFG_RD_CRS			2
+#define CFG_RD_CA			3
+#define CFG_RETRY_STATUS		0xffff0001
+#define CFG_RETRY_STATUS_TIMEOUT_US	500000 /* 500 milliseconds */
+
+/* derive the enum index of the outbound/inbound mapping registers */
+#define MAP_REG(base_reg, index)	((base_reg) + (index) * 2)
+
+/*
+ * Maximum number of outbound mapping window sizes that can be supported by any
+ * OARR/OMAP mapping pair
+ */
+#define MAX_NUM_OB_WINDOW_SIZES		4
+
+#define OARR_VALID_SHIFT		0
+#define OARR_VALID			BIT(OARR_VALID_SHIFT)
+#define OARR_SIZE_CFG_SHIFT		1
+
+/*
+ * Maximum number of inbound mapping region sizes that can be supported by an
+ * IARR
+ */
+#define MAX_NUM_IB_REGION_SIZES		9
+
+#define IMAP_VALID_SHIFT		0
+#define IMAP_VALID			BIT(IMAP_VALID_SHIFT)
+
+#define IPROC_PCI_PM_CAP		0x48
+#define IPROC_PCI_PM_CAP_MASK		0xffff
+#define IPROC_PCI_EXP_CAP		0xac
+
+#define IPROC_PCIE_REG_INVALID		0xffff
+
+/**
+ * struct iproc_pcie_ob_map - iProc PCIe outbound mapping controller-specific
+ * parameters
+ * @window_sizes: list of supported outbound mapping window sizes in MB
+ * @nr_sizes: number of supported outbound mapping window sizes
+ */
+struct iproc_pcie_ob_map {
+	resource_size_t window_sizes[MAX_NUM_OB_WINDOW_SIZES];
+	unsigned int nr_sizes;
+};
+
+static const struct iproc_pcie_ob_map paxb_ob_map[] = {
+	{
+		/* OARR0/OMAP0 */
+		.window_sizes = { 128, 256 },
+		.nr_sizes = 2,
+	},
+	{
+		/* OARR1/OMAP1 */
+		.window_sizes = { 128, 256 },
+		.nr_sizes = 2,
+	},
+};
+
+static const struct iproc_pcie_ob_map paxb_v2_ob_map[] = {
+	{
+		/* OARR0/OMAP0 */
+		.window_sizes = { 128, 256 },
+		.nr_sizes = 2,
+	},
+	{
+		/* OARR1/OMAP1 */
+		.window_sizes = { 128, 256 },
+		.nr_sizes = 2,
+	},
+	{
+		/* OARR2/OMAP2 */
+		.window_sizes = { 128, 256, 512, 1024 },
+		.nr_sizes = 4,
+	},
+	{
+		/* OARR3/OMAP3 */
+		.window_sizes = { 128, 256, 512, 1024 },
+		.nr_sizes = 4,
+	},
+};
+
+/**
+ * enum iproc_pcie_ib_map_type - iProc PCIe inbound mapping type
+ * @IPROC_PCIE_IB_MAP_MEM: DDR memory
+ * @IPROC_PCIE_IB_MAP_IO: device I/O memory
+ * @IPROC_PCIE_IB_MAP_INVALID: invalid or unused
+ */
+enum iproc_pcie_ib_map_type {
+	IPROC_PCIE_IB_MAP_MEM = 0,
+	IPROC_PCIE_IB_MAP_IO,
+	IPROC_PCIE_IB_MAP_INVALID
+};
+
+/**
+ * struct iproc_pcie_ib_map - iProc PCIe inbound mapping controller-specific
+ * parameters
+ * @type: inbound mapping region type
+ * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or
+ * SZ_1G
+ * @region_sizes: list of supported inbound mapping region sizes in KB, MB, or
+ * GB, depending on the size unit
+ * @nr_sizes: number of supported inbound mapping region sizes
+ * @nr_windows: number of supported inbound mapping windows for the region
+ * @imap_addr_offset: register offset between the upper and lower 32-bit
+ * IMAP address registers
+ * @imap_window_offset: register offset between each IMAP window
+ */
+struct iproc_pcie_ib_map {
+	enum iproc_pcie_ib_map_type type;
+	unsigned int size_unit;
+	resource_size_t region_sizes[MAX_NUM_IB_REGION_SIZES];
+	unsigned int nr_sizes;
+	unsigned int nr_windows;
+	u16 imap_addr_offset;
+	u16 imap_window_offset;
+};
+
+static const struct iproc_pcie_ib_map paxb_v2_ib_map[] = {
+	{
+		/* IARR0/IMAP0 */
+		.type = IPROC_PCIE_IB_MAP_IO,
+		.size_unit = SZ_1K,
+		.region_sizes = { 32 },
+		.nr_sizes = 1,
+		.nr_windows = 8,
+		.imap_addr_offset = 0x40,
+		.imap_window_offset = 0x4,
+	},
+	{
+		/* IARR1/IMAP1 */
+		.type = IPROC_PCIE_IB_MAP_MEM,
+		.size_unit = SZ_1M,
+		.region_sizes = { 8 },
+		.nr_sizes = 1,
+		.nr_windows = 8,
+		.imap_addr_offset = 0x4,
+		.imap_window_offset = 0x8,
+
+	},
+	{
+		/* IARR2/IMAP2 */
+		.type = IPROC_PCIE_IB_MAP_MEM,
+		.size_unit = SZ_1M,
+		.region_sizes = { 64, 128, 256, 512, 1024, 2048, 4096, 8192,
+				  16384 },
+		.nr_sizes = 9,
+		.nr_windows = 1,
+		.imap_addr_offset = 0x4,
+		.imap_window_offset = 0x8,
+	},
+	{
+		/* IARR3/IMAP3 */
+		.type = IPROC_PCIE_IB_MAP_MEM,
+		.size_unit = SZ_1G,
+		.region_sizes = { 1, 2, 4, 8, 16, 32 },
+		.nr_sizes = 6,
+		.nr_windows = 8,
+		.imap_addr_offset = 0x4,
+		.imap_window_offset = 0x8,
+	},
+	{
+		/* IARR4/IMAP4 */
+		.type = IPROC_PCIE_IB_MAP_MEM,
+		.size_unit = SZ_1G,
+		.region_sizes = { 32, 64, 128, 256, 512 },
+		.nr_sizes = 5,
+		.nr_windows = 8,
+		.imap_addr_offset = 0x4,
+		.imap_window_offset = 0x8,
+	},
+};
+
+/*
+ * iProc PCIe host registers
+ */
+enum iproc_pcie_reg {
+	/* clock/reset signal control */
+	IPROC_PCIE_CLK_CTRL = 0,
+
+	/*
+	 * To allow MSI to be steered to an external MSI controller (e.g., ARM
+	 * GICv3 ITS)
+	 */
+	IPROC_PCIE_MSI_GIC_MODE,
+
+	/*
+	 * IPROC_PCIE_MSI_BASE_ADDR and IPROC_PCIE_MSI_WINDOW_SIZE define the
+	 * window where the MSI posted writes are written, for the writes to be
+	 * interpreted as MSI writes.
+	 */
+	IPROC_PCIE_MSI_BASE_ADDR,
+	IPROC_PCIE_MSI_WINDOW_SIZE,
+
+	/*
+	 * To hold the address of the register where the MSI writes are
+	 * programmed.  When ARM GICv3 ITS is used, this should be programmed
+	 * with the address of the GITS_TRANSLATER register.
+	 */
+	IPROC_PCIE_MSI_ADDR_LO,
+	IPROC_PCIE_MSI_ADDR_HI,
+
+	/* enable MSI */
+	IPROC_PCIE_MSI_EN_CFG,
+
+	/* allow access to root complex configuration space */
+	IPROC_PCIE_CFG_IND_ADDR,
+	IPROC_PCIE_CFG_IND_DATA,
+
+	/* allow access to device configuration space */
+	IPROC_PCIE_CFG_ADDR,
+	IPROC_PCIE_CFG_DATA,
+
+	/* enable INTx */
+	IPROC_PCIE_INTX_EN,
+
+	/* outbound address mapping */
+	IPROC_PCIE_OARR0,
+	IPROC_PCIE_OMAP0,
+	IPROC_PCIE_OARR1,
+	IPROC_PCIE_OMAP1,
+	IPROC_PCIE_OARR2,
+	IPROC_PCIE_OMAP2,
+	IPROC_PCIE_OARR3,
+	IPROC_PCIE_OMAP3,
+
+	/* inbound address mapping */
+	IPROC_PCIE_IARR0,
+	IPROC_PCIE_IMAP0,
+	IPROC_PCIE_IARR1,
+	IPROC_PCIE_IMAP1,
+	IPROC_PCIE_IARR2,
+	IPROC_PCIE_IMAP2,
+	IPROC_PCIE_IARR3,
+	IPROC_PCIE_IMAP3,
+	IPROC_PCIE_IARR4,
+	IPROC_PCIE_IMAP4,
+
+	/* config read status */
+	IPROC_PCIE_CFG_RD_STATUS,
+
+	/* link status */
+	IPROC_PCIE_LINK_STATUS,
+
+	/* enable APB error for unsupported requests */
+	IPROC_PCIE_APB_ERR_EN,
+
+	/* total number of core registers */
+	IPROC_PCIE_MAX_NUM_REG,
+};
+
+/* iProc PCIe PAXB BCMA registers */
+static const u16 iproc_pcie_reg_paxb_bcma[IPROC_PCIE_MAX_NUM_REG] = {
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
+};
+
+/* iProc PCIe PAXB registers */
+static const u16 iproc_pcie_reg_paxb[IPROC_PCIE_MAX_NUM_REG] = {
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_OARR0]		= 0xd20,
+	[IPROC_PCIE_OMAP0]		= 0xd40,
+	[IPROC_PCIE_OARR1]		= 0xd28,
+	[IPROC_PCIE_OMAP1]		= 0xd48,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
+	[IPROC_PCIE_APB_ERR_EN]		= 0xf40,
+};
+
+/* iProc PCIe PAXB v2 registers */
+static const u16 iproc_pcie_reg_paxb_v2[IPROC_PCIE_MAX_NUM_REG] = {
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_OARR0]		= 0xd20,
+	[IPROC_PCIE_OMAP0]		= 0xd40,
+	[IPROC_PCIE_OARR1]		= 0xd28,
+	[IPROC_PCIE_OMAP1]		= 0xd48,
+	[IPROC_PCIE_OARR2]		= 0xd60,
+	[IPROC_PCIE_OMAP2]		= 0xd68,
+	[IPROC_PCIE_OARR3]		= 0xdf0,
+	[IPROC_PCIE_OMAP3]		= 0xdf8,
+	[IPROC_PCIE_IARR0]		= 0xd00,
+	[IPROC_PCIE_IMAP0]		= 0xc00,
+	[IPROC_PCIE_IARR1]		= 0xd08,
+	[IPROC_PCIE_IMAP1]		= 0xd70,
+	[IPROC_PCIE_IARR2]		= 0xd10,
+	[IPROC_PCIE_IMAP2]		= 0xcc0,
+	[IPROC_PCIE_IARR3]		= 0xe00,
+	[IPROC_PCIE_IMAP3]		= 0xe08,
+	[IPROC_PCIE_IARR4]		= 0xe68,
+	[IPROC_PCIE_IMAP4]		= 0xe70,
+	[IPROC_PCIE_CFG_RD_STATUS]	= 0xee0,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
+	[IPROC_PCIE_APB_ERR_EN]		= 0xf40,
+};
+
+/* iProc PCIe PAXC v1 registers */
+static const u16 iproc_pcie_reg_paxc[IPROC_PCIE_MAX_NUM_REG] = {
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x1f0,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x1f4,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+};
+
+/* iProc PCIe PAXC v2 registers */
+static const u16 iproc_pcie_reg_paxc_v2[IPROC_PCIE_MAX_NUM_REG] = {
+	[IPROC_PCIE_MSI_GIC_MODE]	= 0x050,
+	[IPROC_PCIE_MSI_BASE_ADDR]	= 0x074,
+	[IPROC_PCIE_MSI_WINDOW_SIZE]	= 0x078,
+	[IPROC_PCIE_MSI_ADDR_LO]	= 0x07c,
+	[IPROC_PCIE_MSI_ADDR_HI]	= 0x080,
+	[IPROC_PCIE_MSI_EN_CFG]		= 0x09c,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x1f0,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x1f4,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+};
+
+/*
+ * List of device IDs of controllers that have corrupted capability list that
+ * require SW fixup
+ */
+static const u16 iproc_pcie_corrupt_cap_did[] = {
+	0x16cd,
+	0x16f0,
+	0xd802,
+	0xd804
+};
+
+static inline struct iproc_pcie *iproc_data(struct pci_bus *bus)
+{
+	struct iproc_pcie *pcie = bus->sysdata;
+	return pcie;
+}
+
+static inline bool iproc_pcie_reg_is_invalid(u16 reg_offset)
+{
+	return !!(reg_offset == IPROC_PCIE_REG_INVALID);
+}
+
+static inline u16 iproc_pcie_reg_offset(struct iproc_pcie *pcie,
+					enum iproc_pcie_reg reg)
+{
+	return pcie->reg_offsets[reg];
+}
+
+static inline u32 iproc_pcie_read_reg(struct iproc_pcie *pcie,
+				      enum iproc_pcie_reg reg)
+{
+	u16 offset = iproc_pcie_reg_offset(pcie, reg);
+
+	if (iproc_pcie_reg_is_invalid(offset))
+		return 0;
+
+	return readl(pcie->base + offset);
+}
+
+static inline void iproc_pcie_write_reg(struct iproc_pcie *pcie,
+					enum iproc_pcie_reg reg, u32 val)
+{
+	u16 offset = iproc_pcie_reg_offset(pcie, reg);
+
+	if (iproc_pcie_reg_is_invalid(offset))
+		return;
+
+	writel(val, pcie->base + offset);
+}
+
+/*
+ * APB error forwarding can be disabled during access of configuration
+ * registers of the endpoint device, to prevent unsupported requests
+ * (typically seen during enumeration with multi-function devices) from
+ * triggering a system exception.
+ */
+static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
+					      bool disable)
+{
+	struct iproc_pcie *pcie = iproc_data(bus);
+	u32 val;
+
+	if (bus->number && pcie->has_apb_err_disable) {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_APB_ERR_EN);
+		if (disable)
+			val &= ~APB_ERR_EN;
+		else
+			val |= APB_ERR_EN;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_APB_ERR_EN, val);
+	}
+}
+
+static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
+					       unsigned int busno,
+					       unsigned int devfn,
+					       int where)
+{
+	u16 offset;
+	u32 val;
+
+	/* EP device access */
+	val = ALIGN_DOWN(PCIE_ECAM_OFFSET(busno, devfn, where), 4) |
+		CFG_ADDR_CFG_TYPE_1;
+
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
+	offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
+
+	if (iproc_pcie_reg_is_invalid(offset))
+		return NULL;
+
+	return (pcie->base + offset);
+}
+
+static unsigned int iproc_pcie_cfg_retry(struct iproc_pcie *pcie,
+					 void __iomem *cfg_data_p)
+{
+	int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
+	unsigned int data;
+	u32 status;
+
+	/*
+	 * As per PCIe spec r3.1, sec 2.3.2, CRS Software Visibility only
+	 * affects config reads of the Vendor ID.  For config writes or any
+	 * other config reads, the Root may automatically reissue the
+	 * configuration request again as a new request.
+	 *
+	 * For config reads, this hardware returns CFG_RETRY_STATUS data
+	 * when it receives a CRS completion, regardless of the address of
+	 * the read or the CRS Software Visibility Enable bit.  As a
+	 * partial workaround for this, we retry in software any read that
+	 * returns CFG_RETRY_STATUS.
+	 *
+	 * Note that a non-Vendor ID config register may have a value of
+	 * CFG_RETRY_STATUS.  If we read that, we can't distinguish it from
+	 * a CRS completion, so we will incorrectly retry the read and
+	 * eventually return the wrong data (0xffffffff).
+	 */
+	data = readl(cfg_data_p);
+	while (data == CFG_RETRY_STATUS && timeout--) {
+		/*
+		 * CRS state is set in CFG_RD status register
+		 * This will handle the case where CFG_RETRY_STATUS is
+		 * valid config data.
+		 */
+		status = iproc_pcie_read_reg(pcie, IPROC_PCIE_CFG_RD_STATUS);
+		if (status != CFG_RD_CRS)
+			return data;
+
+		udelay(1);
+		data = readl(cfg_data_p);
+	}
+
+	if (data == CFG_RETRY_STATUS)
+		data = 0xffffffff;
+
+	return data;
+}
+
+static void iproc_pcie_fix_cap(struct iproc_pcie *pcie, int where, u32 *val)
+{
+	u32 i, dev_id;
+
+	switch (where & ~0x3) {
+	case PCI_VENDOR_ID:
+		dev_id = *val >> 16;
+
+		/*
+		 * Activate fixup for those controllers that have corrupted
+		 * capability list registers
+		 */
+		for (i = 0; i < ARRAY_SIZE(iproc_pcie_corrupt_cap_did); i++)
+			if (dev_id == iproc_pcie_corrupt_cap_did[i])
+				pcie->fix_paxc_cap = true;
+		break;
+
+	case IPROC_PCI_PM_CAP:
+		if (pcie->fix_paxc_cap) {
+			/* advertise PM, force next capability to PCIe */
+			*val &= ~IPROC_PCI_PM_CAP_MASK;
+			*val |= IPROC_PCI_EXP_CAP << 8 | PCI_CAP_ID_PM;
+		}
+		break;
+
+	case IPROC_PCI_EXP_CAP:
+		if (pcie->fix_paxc_cap) {
+			/* advertise root port, version 2, terminate here */
+			*val = (PCI_EXP_TYPE_ROOT_PORT << 4 | 2) << 16 |
+				PCI_CAP_ID_EXP;
+		}
+		break;
+
+	case IPROC_PCI_EXP_CAP + PCI_EXP_RTCTL:
+		/* Don't advertise CRS SV support */
+		*val &= ~(PCI_EXP_RTCAP_CRSVIS << 16);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *val)
+{
+	struct iproc_pcie *pcie = iproc_data(bus);
+	unsigned int busno = bus->number;
+	void __iomem *cfg_data_p;
+	unsigned int data;
+	int ret;
+
+	/* root complex access */
+	if (busno == 0) {
+		ret = pci_generic_config_read32(bus, devfn, where, size, val);
+		if (ret == PCIBIOS_SUCCESSFUL)
+			iproc_pcie_fix_cap(pcie, where, val);
+
+		return ret;
+	}
+
+	cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, devfn, where);
+
+	if (!cfg_data_p)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	data = iproc_pcie_cfg_retry(pcie, cfg_data_p);
+
+	*val = data;
+	if (size <= 2)
+		*val = (data >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	/*
+	 * For PAXC and PAXCv2, the total number of PFs that one can enumerate
+	 * depends on the firmware configuration. Unfortunately, due to an ASIC
+	 * bug, unconfigured PFs cannot be properly hidden from the root
+	 * complex. As a result, write access to these PFs will cause bus lock
+	 * up on the embedded processor
+	 *
+	 * Since all unconfigured PFs are left with an incorrect, staled device
+	 * ID of 0x168e (PCI_DEVICE_ID_NX2_57810), we try to catch those access
+	 * early here and reject them all
+	 */
+#define DEVICE_ID_MASK     0xffff0000
+#define DEVICE_ID_SHIFT    16
+	if (pcie->rej_unconfig_pf &&
+	    (where & CFG_ADDR_REG_NUM_MASK) == PCI_VENDOR_ID)
+		if ((*val & DEVICE_ID_MASK) ==
+		    (PCI_DEVICE_ID_NX2_57810 << DEVICE_ID_SHIFT))
+			return PCIBIOS_FUNC_NOT_SUPPORTED;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * Note access to the configuration registers are protected at the higher layer
+ * by 'pci_lock' in drivers/pci/access.c
+ */
+static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie,
+					    int busno, unsigned int devfn,
+					    int where)
+{
+	u16 offset;
+
+	/* root complex access */
+	if (busno == 0) {
+		if (PCIE_ECAM_DEVFN(devfn) > 0)
+			return NULL;
+
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_IND_ADDR,
+				     where & CFG_IND_ADDR_MASK);
+		offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_IND_DATA);
+		if (iproc_pcie_reg_is_invalid(offset))
+			return NULL;
+		else
+			return (pcie->base + offset);
+	}
+
+	return iproc_pcie_map_ep_cfg_reg(pcie, busno, devfn, where);
+}
+
+static void __iomem *iproc_pcie_bus_map_cfg_bus(struct pci_bus *bus,
+						unsigned int devfn,
+						int where)
+{
+	return iproc_pcie_map_cfg_bus(iproc_data(bus), bus->number, devfn,
+				      where);
+}
+
+static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie,
+				       unsigned int devfn, int where,
+				       int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*val = readl(addr);
+
+	if (size <= 2)
+		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie,
+					unsigned int devfn, int where,
+					int size, u32 val)
+{
+	void __iomem *addr;
+	u32 mask, tmp;
+
+	addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 4) {
+		writel(val, addr);
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+	tmp = readl(addr) & mask;
+	tmp |= val << ((where & 0x3) * 8);
+	writel(tmp, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 *val)
+{
+	int ret;
+	struct iproc_pcie *pcie = iproc_data(bus);
+
+	iproc_pcie_apb_err_disable(bus, true);
+	if (pcie->iproc_cfg_read)
+		ret = iproc_pcie_config_read(bus, devfn, where, size, val);
+	else
+		ret = pci_generic_config_read32(bus, devfn, where, size, val);
+	iproc_pcie_apb_err_disable(bus, false);
+
+	return ret;
+}
+
+static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn,
+				     int where, int size, u32 val)
+{
+	int ret;
+
+	iproc_pcie_apb_err_disable(bus, true);
+	ret = pci_generic_config_write32(bus, devfn, where, size, val);
+	iproc_pcie_apb_err_disable(bus, false);
+
+	return ret;
+}
+
+static struct pci_ops iproc_pcie_ops = {
+	.map_bus = iproc_pcie_bus_map_cfg_bus,
+	.read = iproc_pcie_config_read32,
+	.write = iproc_pcie_config_write32,
+};
+
+static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
+{
+	u32 val;
+
+	/*
+	 * PAXC and the internal emulated endpoint device downstream should not
+	 * be reset.  If firmware has been loaded on the endpoint device at an
+	 * earlier boot stage, reset here causes issues.
+	 */
+	if (pcie->ep_is_internal)
+		return;
+
+	if (assert) {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
+			~RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		udelay(250);
+	} else {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val |= RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		msleep(100);
+	}
+}
+
+int iproc_pcie_shutdown(struct iproc_pcie *pcie)
+{
+	iproc_pcie_perst_ctrl(pcie, true);
+	msleep(500);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iproc_pcie_shutdown);
+
+static int iproc_pcie_check_link(struct iproc_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	u32 hdr_type, link_ctrl, link_status, class, val;
+	bool link_is_active = false;
+
+	/*
+	 * PAXC connects to emulated endpoint devices directly and does not
+	 * have a Serdes.  Therefore skip the link detection logic here.
+	 */
+	if (pcie->ep_is_internal)
+		return 0;
+
+	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_LINK_STATUS);
+	if (!(val & PCIE_PHYLINKUP) || !(val & PCIE_DL_ACTIVE)) {
+		dev_err(dev, "PHY or data link is INACTIVE!\n");
+		return -ENODEV;
+	}
+
+	/* make sure we are not in EP mode */
+	iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, &hdr_type);
+	if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) {
+		dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type);
+		return -EFAULT;
+	}
+
+	/* force class to PCI_CLASS_BRIDGE_PCI_NORMAL (0x060400) */
+#define PCI_BRIDGE_CTRL_REG_OFFSET	0x43c
+#define PCI_BRIDGE_CTRL_REG_CLASS_MASK	0xffffff
+	iproc_pci_raw_config_read32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
+				    4, &class);
+	class &= ~PCI_BRIDGE_CTRL_REG_CLASS_MASK;
+	class |= PCI_CLASS_BRIDGE_PCI_NORMAL;
+	iproc_pci_raw_config_write32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
+				     4, class);
+
+	/* check link status to see if link is active */
+	iproc_pci_raw_config_read32(pcie, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
+				    2, &link_status);
+	if (link_status & PCI_EXP_LNKSTA_NLW)
+		link_is_active = true;
+
+	if (!link_is_active) {
+		/* try GEN 1 link speed */
+#define PCI_TARGET_LINK_SPEED_MASK	0xf
+#define PCI_TARGET_LINK_SPEED_GEN2	0x2
+#define PCI_TARGET_LINK_SPEED_GEN1	0x1
+		iproc_pci_raw_config_read32(pcie, 0,
+					    IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
+					    4, &link_ctrl);
+		if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) ==
+		    PCI_TARGET_LINK_SPEED_GEN2) {
+			link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK;
+			link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1;
+			iproc_pci_raw_config_write32(pcie, 0,
+					IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
+					4, link_ctrl);
+			msleep(100);
+
+			iproc_pci_raw_config_read32(pcie, 0,
+					IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
+					2, &link_status);
+			if (link_status & PCI_EXP_LNKSTA_NLW)
+				link_is_active = true;
+		}
+	}
+
+	dev_info(dev, "link: %s\n", link_is_active ? "UP" : "DOWN");
+
+	return link_is_active ? 0 : -ENODEV;
+}
+
+static void iproc_pcie_enable(struct iproc_pcie *pcie)
+{
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_INTX_EN, SYS_RC_INTX_MASK);
+}
+
+static inline bool iproc_pcie_ob_is_valid(struct iproc_pcie *pcie,
+					  int window_idx)
+{
+	u32 val;
+
+	val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_OARR0, window_idx));
+
+	return !!(val & OARR_VALID);
+}
+
+static inline int iproc_pcie_ob_write(struct iproc_pcie *pcie, int window_idx,
+				      int size_idx, u64 axi_addr, u64 pci_addr)
+{
+	struct device *dev = pcie->dev;
+	u16 oarr_offset, omap_offset;
+
+	/*
+	 * Derive the OARR/OMAP offset from the first pair (OARR0/OMAP0) based
+	 * on window index.
+	 */
+	oarr_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OARR0,
+							  window_idx));
+	omap_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OMAP0,
+							  window_idx));
+	if (iproc_pcie_reg_is_invalid(oarr_offset) ||
+	    iproc_pcie_reg_is_invalid(omap_offset))
+		return -EINVAL;
+
+	/*
+	 * Program the OARR registers.  The upper 32-bit OARR register is
+	 * always right after the lower 32-bit OARR register.
+	 */
+	writel(lower_32_bits(axi_addr) | (size_idx << OARR_SIZE_CFG_SHIFT) |
+	       OARR_VALID, pcie->base + oarr_offset);
+	writel(upper_32_bits(axi_addr), pcie->base + oarr_offset + 4);
+
+	/* now program the OMAP registers */
+	writel(lower_32_bits(pci_addr), pcie->base + omap_offset);
+	writel(upper_32_bits(pci_addr), pcie->base + omap_offset + 4);
+
+	dev_dbg(dev, "ob window [%d]: offset 0x%x axi %pap pci %pap\n",
+		window_idx, oarr_offset, &axi_addr, &pci_addr);
+	dev_dbg(dev, "oarr lo 0x%x oarr hi 0x%x\n",
+		readl(pcie->base + oarr_offset),
+		readl(pcie->base + oarr_offset + 4));
+	dev_dbg(dev, "omap lo 0x%x omap hi 0x%x\n",
+		readl(pcie->base + omap_offset),
+		readl(pcie->base + omap_offset + 4));
+
+	return 0;
+}
+
+/*
+ * Some iProc SoCs require the SW to configure the outbound address mapping
+ *
+ * Outbound address translation:
+ *
+ * iproc_pcie_address = axi_address - axi_offset
+ * OARR = iproc_pcie_address
+ * OMAP = pci_addr
+ *
+ * axi_addr -> iproc_pcie_address -> OARR -> OMAP -> pci_address
+ */
+static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr,
+			       u64 pci_addr, resource_size_t size)
+{
+	struct iproc_pcie_ob *ob = &pcie->ob;
+	struct device *dev = pcie->dev;
+	int ret = -EINVAL, window_idx, size_idx;
+
+	if (axi_addr < ob->axi_offset) {
+		dev_err(dev, "axi address %pap less than offset %pap\n",
+			&axi_addr, &ob->axi_offset);
+		return -EINVAL;
+	}
+
+	/*
+	 * Translate the AXI address to the internal address used by the iProc
+	 * PCIe core before programming the OARR
+	 */
+	axi_addr -= ob->axi_offset;
+
+	/* iterate through all OARR/OMAP mapping windows */
+	for (window_idx = ob->nr_windows - 1; window_idx >= 0; window_idx--) {
+		const struct iproc_pcie_ob_map *ob_map =
+			&pcie->ob_map[window_idx];
+
+		/*
+		 * If current outbound window is already in use, move on to the
+		 * next one.
+		 */
+		if (iproc_pcie_ob_is_valid(pcie, window_idx))
+			continue;
+
+		/*
+		 * Iterate through all supported window sizes within the
+		 * OARR/OMAP pair to find a match.  Go through the window sizes
+		 * in a descending order.
+		 */
+		for (size_idx = ob_map->nr_sizes - 1; size_idx >= 0;
+		     size_idx--) {
+			resource_size_t window_size =
+				ob_map->window_sizes[size_idx] * SZ_1M;
+
+			/*
+			 * Keep iterating until we reach the last window and
+			 * with the minimal window size at index zero. In this
+			 * case, we take a compromise by mapping it using the
+			 * minimum window size that can be supported
+			 */
+			if (size < window_size) {
+				if (size_idx > 0 || window_idx > 0)
+					continue;
+
+				/*
+				 * For the corner case of reaching the minimal
+				 * window size that can be supported on the
+				 * last window
+				 */
+				axi_addr = ALIGN_DOWN(axi_addr, window_size);
+				pci_addr = ALIGN_DOWN(pci_addr, window_size);
+				size = window_size;
+			}
+
+			if (!IS_ALIGNED(axi_addr, window_size) ||
+			    !IS_ALIGNED(pci_addr, window_size)) {
+				dev_err(dev,
+					"axi %pap or pci %pap not aligned\n",
+					&axi_addr, &pci_addr);
+				return -EINVAL;
+			}
+
+			/*
+			 * Match found!  Program both OARR and OMAP and mark
+			 * them as a valid entry.
+			 */
+			ret = iproc_pcie_ob_write(pcie, window_idx, size_idx,
+						  axi_addr, pci_addr);
+			if (ret)
+				goto err_ob;
+
+			size -= window_size;
+			if (size == 0)
+				return 0;
+
+			/*
+			 * If we are here, we are done with the current window,
+			 * but not yet finished all mappings.  Need to move on
+			 * to the next window.
+			 */
+			axi_addr += window_size;
+			pci_addr += window_size;
+			break;
+		}
+	}
+
+err_ob:
+	dev_err(dev, "unable to configure outbound mapping\n");
+	dev_err(dev,
+		"axi %pap, axi offset %pap, pci %pap, res size %pap\n",
+		&axi_addr, &ob->axi_offset, &pci_addr, &size);
+
+	return ret;
+}
+
+static int iproc_pcie_map_ranges(struct iproc_pcie *pcie,
+				 struct list_head *resources)
+{
+	struct device *dev = pcie->dev;
+	struct resource_entry *window;
+	int ret;
+
+	resource_list_for_each_entry(window, resources) {
+		struct resource *res = window->res;
+		u64 res_type = resource_type(res);
+
+		switch (res_type) {
+		case IORESOURCE_IO:
+		case IORESOURCE_BUS:
+			break;
+		case IORESOURCE_MEM:
+			ret = iproc_pcie_setup_ob(pcie, res->start,
+						  res->start - window->offset,
+						  resource_size(res));
+			if (ret)
+				return ret;
+			break;
+		default:
+			dev_err(dev, "invalid resource %pR\n", res);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static inline bool iproc_pcie_ib_is_in_use(struct iproc_pcie *pcie,
+					   int region_idx)
+{
+	const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx];
+	u32 val;
+
+	val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_IARR0, region_idx));
+
+	return !!(val & (BIT(ib_map->nr_sizes) - 1));
+}
+
+static inline bool iproc_pcie_ib_check_type(const struct iproc_pcie_ib_map *ib_map,
+					    enum iproc_pcie_ib_map_type type)
+{
+	return !!(ib_map->type == type);
+}
+
+static int iproc_pcie_ib_write(struct iproc_pcie *pcie, int region_idx,
+			       int size_idx, int nr_windows, u64 axi_addr,
+			       u64 pci_addr, resource_size_t size)
+{
+	struct device *dev = pcie->dev;
+	const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx];
+	u16 iarr_offset, imap_offset;
+	u32 val;
+	int window_idx;
+
+	iarr_offset = iproc_pcie_reg_offset(pcie,
+				MAP_REG(IPROC_PCIE_IARR0, region_idx));
+	imap_offset = iproc_pcie_reg_offset(pcie,
+				MAP_REG(IPROC_PCIE_IMAP0, region_idx));
+	if (iproc_pcie_reg_is_invalid(iarr_offset) ||
+	    iproc_pcie_reg_is_invalid(imap_offset))
+		return -EINVAL;
+
+	dev_dbg(dev, "ib region [%d]: offset 0x%x axi %pap pci %pap\n",
+		region_idx, iarr_offset, &axi_addr, &pci_addr);
+
+	/*
+	 * Program the IARR registers.  The upper 32-bit IARR register is
+	 * always right after the lower 32-bit IARR register.
+	 */
+	writel(lower_32_bits(pci_addr) | BIT(size_idx),
+	       pcie->base + iarr_offset);
+	writel(upper_32_bits(pci_addr), pcie->base + iarr_offset + 4);
+
+	dev_dbg(dev, "iarr lo 0x%x iarr hi 0x%x\n",
+		readl(pcie->base + iarr_offset),
+		readl(pcie->base + iarr_offset + 4));
+
+	/*
+	 * Now program the IMAP registers.  Each IARR region may have one or
+	 * more IMAP windows.
+	 */
+	size >>= ilog2(nr_windows);
+	for (window_idx = 0; window_idx < nr_windows; window_idx++) {
+		val = readl(pcie->base + imap_offset);
+		val |= lower_32_bits(axi_addr) | IMAP_VALID;
+		writel(val, pcie->base + imap_offset);
+		writel(upper_32_bits(axi_addr),
+		       pcie->base + imap_offset + ib_map->imap_addr_offset);
+
+		dev_dbg(dev, "imap window [%d] lo 0x%x hi 0x%x\n",
+			window_idx, readl(pcie->base + imap_offset),
+			readl(pcie->base + imap_offset +
+			      ib_map->imap_addr_offset));
+
+		imap_offset += ib_map->imap_window_offset;
+		axi_addr += size;
+	}
+
+	return 0;
+}
+
+static int iproc_pcie_setup_ib(struct iproc_pcie *pcie,
+			       struct resource_entry *entry,
+			       enum iproc_pcie_ib_map_type type)
+{
+	struct device *dev = pcie->dev;
+	struct iproc_pcie_ib *ib = &pcie->ib;
+	int ret;
+	unsigned int region_idx, size_idx;
+	u64 axi_addr = entry->res->start;
+	u64 pci_addr = entry->res->start - entry->offset;
+	resource_size_t size = resource_size(entry->res);
+
+	/* iterate through all IARR mapping regions */
+	for (region_idx = 0; region_idx < ib->nr_regions; region_idx++) {
+		const struct iproc_pcie_ib_map *ib_map =
+			&pcie->ib_map[region_idx];
+
+		/*
+		 * If current inbound region is already in use or not a
+		 * compatible type, move on to the next.
+		 */
+		if (iproc_pcie_ib_is_in_use(pcie, region_idx) ||
+		    !iproc_pcie_ib_check_type(ib_map, type))
+			continue;
+
+		/* iterate through all supported region sizes to find a match */
+		for (size_idx = 0; size_idx < ib_map->nr_sizes; size_idx++) {
+			resource_size_t region_size =
+			ib_map->region_sizes[size_idx] * ib_map->size_unit;
+
+			if (size != region_size)
+				continue;
+
+			if (!IS_ALIGNED(axi_addr, region_size) ||
+			    !IS_ALIGNED(pci_addr, region_size)) {
+				dev_err(dev,
+					"axi %pap or pci %pap not aligned\n",
+					&axi_addr, &pci_addr);
+				return -EINVAL;
+			}
+
+			/* Match found!  Program IARR and all IMAP windows. */
+			ret = iproc_pcie_ib_write(pcie, region_idx, size_idx,
+						  ib_map->nr_windows, axi_addr,
+						  pci_addr, size);
+			if (ret)
+				goto err_ib;
+			else
+				return 0;
+
+		}
+	}
+	ret = -EINVAL;
+
+err_ib:
+	dev_err(dev, "unable to configure inbound mapping\n");
+	dev_err(dev, "axi %pap, pci %pap, res size %pap\n",
+		&axi_addr, &pci_addr, &size);
+
+	return ret;
+}
+
+static int iproc_pcie_map_dma_ranges(struct iproc_pcie *pcie)
+{
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+	struct resource_entry *entry;
+	int ret = 0;
+
+	resource_list_for_each_entry(entry, &host->dma_ranges) {
+		/* Each range entry corresponds to an inbound mapping region */
+		ret = iproc_pcie_setup_ib(pcie, entry, IPROC_PCIE_IB_MAP_MEM);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void iproc_pcie_invalidate_mapping(struct iproc_pcie *pcie)
+{
+	struct iproc_pcie_ib *ib = &pcie->ib;
+	struct iproc_pcie_ob *ob = &pcie->ob;
+	int idx;
+
+	if (pcie->ep_is_internal)
+		return;
+
+	if (pcie->need_ob_cfg) {
+		/* iterate through all OARR mapping regions */
+		for (idx = ob->nr_windows - 1; idx >= 0; idx--) {
+			iproc_pcie_write_reg(pcie,
+					     MAP_REG(IPROC_PCIE_OARR0, idx), 0);
+		}
+	}
+
+	if (pcie->need_ib_cfg) {
+		/* iterate through all IARR mapping regions */
+		for (idx = 0; idx < ib->nr_regions; idx++) {
+			iproc_pcie_write_reg(pcie,
+					     MAP_REG(IPROC_PCIE_IARR0, idx), 0);
+		}
+	}
+}
+
+static int iproce_pcie_get_msi(struct iproc_pcie *pcie,
+			       struct device_node *msi_node,
+			       u64 *msi_addr)
+{
+	struct device *dev = pcie->dev;
+	int ret;
+	struct resource res;
+
+	/*
+	 * Check if 'msi-map' points to ARM GICv3 ITS, which is the only
+	 * supported external MSI controller that requires steering.
+	 */
+	if (!of_device_is_compatible(msi_node, "arm,gic-v3-its")) {
+		dev_err(dev, "unable to find compatible MSI controller\n");
+		return -ENODEV;
+	}
+
+	/* derive GITS_TRANSLATER address from GICv3 */
+	ret = of_address_to_resource(msi_node, 0, &res);
+	if (ret < 0) {
+		dev_err(dev, "unable to obtain MSI controller resources\n");
+		return ret;
+	}
+
+	*msi_addr = res.start + GITS_TRANSLATER;
+	return 0;
+}
+
+static int iproc_pcie_paxb_v2_msi_steer(struct iproc_pcie *pcie, u64 msi_addr)
+{
+	int ret;
+	struct resource_entry entry;
+
+	memset(&entry, 0, sizeof(entry));
+	entry.res = &entry.__res;
+
+	msi_addr &= ~(SZ_32K - 1);
+	entry.res->start = msi_addr;
+	entry.res->end = msi_addr + SZ_32K - 1;
+
+	ret = iproc_pcie_setup_ib(pcie, &entry, IPROC_PCIE_IB_MAP_IO);
+	return ret;
+}
+
+static void iproc_pcie_paxc_v2_msi_steer(struct iproc_pcie *pcie, u64 msi_addr,
+					 bool enable)
+{
+	u32 val;
+
+	if (!enable) {
+		/*
+		 * Disable PAXC MSI steering. All write transfers will be
+		 * treated as non-MSI transfers
+		 */
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_MSI_EN_CFG);
+		val &= ~MSI_ENABLE_CFG;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_EN_CFG, val);
+		return;
+	}
+
+	/*
+	 * Program bits [43:13] of address of GITS_TRANSLATER register into
+	 * bits [30:0] of the MSI base address register.  In fact, in all iProc
+	 * based SoCs, all I/O register bases are well below the 32-bit
+	 * boundary, so we can safely assume bits [43:32] are always zeros.
+	 */
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_BASE_ADDR,
+			     (u32)(msi_addr >> 13));
+
+	/* use a default 8K window size */
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_WINDOW_SIZE, 0);
+
+	/* steering MSI to GICv3 ITS */
+	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_MSI_GIC_MODE);
+	val |= GIC_V3_CFG;
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_GIC_MODE, val);
+
+	/*
+	 * Program bits [43:2] of address of GITS_TRANSLATER register into the
+	 * iProc MSI address registers.
+	 */
+	msi_addr >>= 2;
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_ADDR_HI,
+			     upper_32_bits(msi_addr));
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_ADDR_LO,
+			     lower_32_bits(msi_addr));
+
+	/* enable MSI */
+	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_MSI_EN_CFG);
+	val |= MSI_ENABLE_CFG;
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_MSI_EN_CFG, val);
+}
+
+static int iproc_pcie_msi_steer(struct iproc_pcie *pcie,
+				struct device_node *msi_node)
+{
+	struct device *dev = pcie->dev;
+	int ret;
+	u64 msi_addr;
+
+	ret = iproce_pcie_get_msi(pcie, msi_node, &msi_addr);
+	if (ret < 0) {
+		dev_err(dev, "msi steering failed\n");
+		return ret;
+	}
+
+	switch (pcie->type) {
+	case IPROC_PCIE_PAXB_V2:
+		ret = iproc_pcie_paxb_v2_msi_steer(pcie, msi_addr);
+		if (ret)
+			return ret;
+		break;
+	case IPROC_PCIE_PAXC_V2:
+		iproc_pcie_paxc_v2_msi_steer(pcie, msi_addr, true);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int iproc_pcie_msi_enable(struct iproc_pcie *pcie)
+{
+	struct device_node *msi_node;
+	int ret;
+
+	/*
+	 * Either the "msi-parent" or the "msi-map" phandle needs to exist
+	 * for us to obtain the MSI node.
+	 */
+
+	msi_node = of_parse_phandle(pcie->dev->of_node, "msi-parent", 0);
+	if (!msi_node) {
+		const __be32 *msi_map = NULL;
+		int len;
+		u32 phandle;
+
+		msi_map = of_get_property(pcie->dev->of_node, "msi-map", &len);
+		if (!msi_map)
+			return -ENODEV;
+
+		phandle = be32_to_cpup(msi_map + 1);
+		msi_node = of_find_node_by_phandle(phandle);
+		if (!msi_node)
+			return -ENODEV;
+	}
+
+	/*
+	 * Certain revisions of the iProc PCIe controller require additional
+	 * configurations to steer the MSI writes towards an external MSI
+	 * controller.
+	 */
+	if (pcie->need_msi_steer) {
+		ret = iproc_pcie_msi_steer(pcie, msi_node);
+		if (ret)
+			goto out_put_node;
+	}
+
+	/*
+	 * If another MSI controller is being used, the call below should fail
+	 * but that is okay
+	 */
+	ret = iproc_msi_init(pcie, msi_node);
+
+out_put_node:
+	of_node_put(msi_node);
+	return ret;
+}
+
+static void iproc_pcie_msi_disable(struct iproc_pcie *pcie)
+{
+	iproc_msi_exit(pcie);
+}
+
+static int iproc_pcie_rev_init(struct iproc_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	unsigned int reg_idx;
+	const u16 *regs;
+
+	switch (pcie->type) {
+	case IPROC_PCIE_PAXB_BCMA:
+		regs = iproc_pcie_reg_paxb_bcma;
+		break;
+	case IPROC_PCIE_PAXB:
+		regs = iproc_pcie_reg_paxb;
+		pcie->has_apb_err_disable = true;
+		if (pcie->need_ob_cfg) {
+			pcie->ob_map = paxb_ob_map;
+			pcie->ob.nr_windows = ARRAY_SIZE(paxb_ob_map);
+		}
+		break;
+	case IPROC_PCIE_PAXB_V2:
+		regs = iproc_pcie_reg_paxb_v2;
+		pcie->iproc_cfg_read = true;
+		pcie->has_apb_err_disable = true;
+		if (pcie->need_ob_cfg) {
+			pcie->ob_map = paxb_v2_ob_map;
+			pcie->ob.nr_windows = ARRAY_SIZE(paxb_v2_ob_map);
+		}
+		pcie->ib.nr_regions = ARRAY_SIZE(paxb_v2_ib_map);
+		pcie->ib_map = paxb_v2_ib_map;
+		pcie->need_msi_steer = true;
+		dev_warn(dev, "reads of config registers that contain %#x return incorrect data\n",
+			 CFG_RETRY_STATUS);
+		break;
+	case IPROC_PCIE_PAXC:
+		regs = iproc_pcie_reg_paxc;
+		pcie->ep_is_internal = true;
+		pcie->iproc_cfg_read = true;
+		pcie->rej_unconfig_pf = true;
+		break;
+	case IPROC_PCIE_PAXC_V2:
+		regs = iproc_pcie_reg_paxc_v2;
+		pcie->ep_is_internal = true;
+		pcie->iproc_cfg_read = true;
+		pcie->rej_unconfig_pf = true;
+		pcie->need_msi_steer = true;
+		break;
+	default:
+		dev_err(dev, "incompatible iProc PCIe interface\n");
+		return -EINVAL;
+	}
+
+	pcie->reg_offsets = devm_kcalloc(dev, IPROC_PCIE_MAX_NUM_REG,
+					 sizeof(*pcie->reg_offsets),
+					 GFP_KERNEL);
+	if (!pcie->reg_offsets)
+		return -ENOMEM;
+
+	/* go through the register table and populate all valid registers */
+	pcie->reg_offsets[0] = (pcie->type == IPROC_PCIE_PAXC_V2) ?
+		IPROC_PCIE_REG_INVALID : regs[0];
+	for (reg_idx = 1; reg_idx < IPROC_PCIE_MAX_NUM_REG; reg_idx++)
+		pcie->reg_offsets[reg_idx] = regs[reg_idx] ?
+			regs[reg_idx] : IPROC_PCIE_REG_INVALID;
+
+	return 0;
+}
+
+int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
+{
+	struct device *dev;
+	int ret;
+	struct pci_dev *pdev;
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+
+	dev = pcie->dev;
+
+	ret = iproc_pcie_rev_init(pcie);
+	if (ret) {
+		dev_err(dev, "unable to initialize controller parameters\n");
+		return ret;
+	}
+
+	ret = phy_init(pcie->phy);
+	if (ret) {
+		dev_err(dev, "unable to initialize PCIe PHY\n");
+		return ret;
+	}
+
+	ret = phy_power_on(pcie->phy);
+	if (ret) {
+		dev_err(dev, "unable to power on PCIe PHY\n");
+		goto err_exit_phy;
+	}
+
+	iproc_pcie_perst_ctrl(pcie, true);
+	iproc_pcie_perst_ctrl(pcie, false);
+
+	iproc_pcie_invalidate_mapping(pcie);
+
+	if (pcie->need_ob_cfg) {
+		ret = iproc_pcie_map_ranges(pcie, res);
+		if (ret) {
+			dev_err(dev, "map failed\n");
+			goto err_power_off_phy;
+		}
+	}
+
+	if (pcie->need_ib_cfg) {
+		ret = iproc_pcie_map_dma_ranges(pcie);
+		if (ret && ret != -ENOENT)
+			goto err_power_off_phy;
+	}
+
+	ret = iproc_pcie_check_link(pcie);
+	if (ret) {
+		dev_err(dev, "no PCIe EP device detected\n");
+		goto err_power_off_phy;
+	}
+
+	iproc_pcie_enable(pcie);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		if (iproc_pcie_msi_enable(pcie))
+			dev_info(dev, "not using iProc MSI\n");
+
+	host->ops = &iproc_pcie_ops;
+	host->sysdata = pcie;
+	host->map_irq = pcie->map_irq;
+
+	ret = pci_host_probe(host);
+	if (ret < 0) {
+		dev_err(dev, "failed to scan host: %d\n", ret);
+		goto err_power_off_phy;
+	}
+
+	for_each_pci_bridge(pdev, host->bus) {
+		if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
+			pcie_print_link_status(pdev);
+	}
+
+	return 0;
+
+err_power_off_phy:
+	phy_power_off(pcie->phy);
+err_exit_phy:
+	phy_exit(pcie->phy);
+	return ret;
+}
+EXPORT_SYMBOL(iproc_pcie_setup);
+
+void iproc_pcie_remove(struct iproc_pcie *pcie)
+{
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+
+	pci_stop_root_bus(host->bus);
+	pci_remove_root_bus(host->bus);
+
+	iproc_pcie_msi_disable(pcie);
+
+	phy_power_off(pcie->phy);
+	phy_exit(pcie->phy);
+}
+EXPORT_SYMBOL(iproc_pcie_remove);
+
+/*
+ * The MSI parsing logic in certain revisions of Broadcom PAXC based root
+ * complex does not work and needs to be disabled
+ */
+static void quirk_paxc_disable_msi_parsing(struct pci_dev *pdev)
+{
+	struct iproc_pcie *pcie = iproc_data(pdev->bus);
+
+	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		iproc_pcie_paxc_v2_msi_steer(pcie, 0, false);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16f0,
+			quirk_paxc_disable_msi_parsing);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd802,
+			quirk_paxc_disable_msi_parsing);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd804,
+			quirk_paxc_disable_msi_parsing);
+
+static void quirk_paxc_bridge(struct pci_dev *pdev)
+{
+	/*
+	 * The PCI config space is shared with the PAXC root port and the first
+	 * Ethernet device.  So, we need to workaround this by telling the PCI
+	 * code that the bridge is not an Ethernet device.
+	 */
+	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		pdev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+
+	/*
+	 * MPSS is not being set properly (as it is currently 0).  This is
+	 * because that area of the PCI config space is hard coded to zero, and
+	 * is not modifiable by firmware.  Set this to 2 (e.g., 512 byte MPS)
+	 * so that the MPS can be set to the real max value.
+	 */
+	pdev->pcie_mpss = 2;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16cd, quirk_paxc_bridge);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16f0, quirk_paxc_bridge);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd750, quirk_paxc_bridge);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd802, quirk_paxc_bridge);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd804, quirk_paxc_bridge);
+
+MODULE_AUTHOR("Ray Jui <rjui@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom iPROC PCIe common driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-iproc.h b/drivers/pci/controller/pcie-iproc.h
new file mode 100644
index 000000000..969ded03b
--- /dev/null
+++ b/drivers/pci/controller/pcie-iproc.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014-2015 Broadcom Corporation
+ */
+
+#ifndef _PCIE_IPROC_H
+#define _PCIE_IPROC_H
+
+/**
+ * enum iproc_pcie_type - iProc PCIe interface type
+ * @IPROC_PCIE_PAXB_BCMA: BCMA-based host controllers
+ * @IPROC_PCIE_PAXB:	  PAXB-based host controllers for
+ *			  NS, NSP, Cygnus, NS2, and Pegasus SOCs
+ * @IPROC_PCIE_PAXB_V2:   PAXB-based host controllers for Stingray SoCs
+ * @IPROC_PCIE_PAXC:	  PAXC-based host controllers
+ * @IPROC_PCIE_PAXC_V2:   PAXC-based host controllers (second generation)
+ *
+ * PAXB is the wrapper used in root complex that can be connected to an
+ * external endpoint device.
+ *
+ * PAXC is the wrapper used in root complex dedicated for internal emulated
+ * endpoint devices.
+ */
+enum iproc_pcie_type {
+	IPROC_PCIE_PAXB_BCMA = 0,
+	IPROC_PCIE_PAXB,
+	IPROC_PCIE_PAXB_V2,
+	IPROC_PCIE_PAXC,
+	IPROC_PCIE_PAXC_V2,
+};
+
+/**
+ * struct iproc_pcie_ob - iProc PCIe outbound mapping
+ * @axi_offset: offset from the AXI address to the internal address used by
+ * the iProc PCIe core
+ * @nr_windows: total number of supported outbound mapping windows
+ */
+struct iproc_pcie_ob {
+	resource_size_t axi_offset;
+	unsigned int nr_windows;
+};
+
+/**
+ * struct iproc_pcie_ib - iProc PCIe inbound mapping
+ * @nr_regions: total number of supported inbound mapping regions
+ */
+struct iproc_pcie_ib {
+	unsigned int nr_regions;
+};
+
+struct iproc_pcie_ob_map;
+struct iproc_pcie_ib_map;
+struct iproc_msi;
+
+/**
+ * struct iproc_pcie - iProc PCIe device
+ * @dev: pointer to device data structure
+ * @type: iProc PCIe interface type
+ * @reg_offsets: register offsets
+ * @base: PCIe host controller I/O register base
+ * @base_addr: PCIe host controller register base physical address
+ * @mem: host bridge memory window resource
+ * @phy: optional PHY device that controls the Serdes
+ * @map_irq: function callback to map interrupts
+ * @ep_is_internal: indicates an internal emulated endpoint device is connected
+ * @iproc_cfg_read: indicates the iProc config read function should be used
+ * @rej_unconfig_pf: indicates the root complex needs to detect and reject
+ * enumeration against unconfigured physical functions emulated in the ASIC
+ * @has_apb_err_disable: indicates the controller can be configured to prevent
+ * unsupported request from being forwarded as an APB bus error
+ * @fix_paxc_cap: indicates the controller has corrupted capability list in its
+ * config space registers and requires SW based fixup
+ *
+ * @need_ob_cfg: indicates SW needs to configure the outbound mapping window
+ * @ob: outbound mapping related parameters
+ * @ob_map: outbound mapping related parameters specific to the controller
+ *
+ * @need_ib_cfg: indicates SW needs to configure the inbound mapping window
+ * @ib: inbound mapping related parameters
+ * @ib_map: outbound mapping region related parameters
+ *
+ * @need_msi_steer: indicates additional configuration of the iProc PCIe
+ * controller is required to steer MSI writes to external interrupt controller
+ * @msi: MSI data
+ */
+struct iproc_pcie {
+	struct device *dev;
+	enum iproc_pcie_type type;
+	u16 *reg_offsets;
+	void __iomem *base;
+	phys_addr_t base_addr;
+	struct resource mem;
+	struct phy *phy;
+	int (*map_irq)(const struct pci_dev *, u8, u8);
+	bool ep_is_internal;
+	bool iproc_cfg_read;
+	bool rej_unconfig_pf;
+	bool has_apb_err_disable;
+	bool fix_paxc_cap;
+
+	bool need_ob_cfg;
+	struct iproc_pcie_ob ob;
+	const struct iproc_pcie_ob_map *ob_map;
+
+	bool need_ib_cfg;
+	struct iproc_pcie_ib ib;
+	const struct iproc_pcie_ib_map *ib_map;
+
+	bool need_msi_steer;
+	struct iproc_msi *msi;
+};
+
+int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
+void iproc_pcie_remove(struct iproc_pcie *pcie);
+int iproc_pcie_shutdown(struct iproc_pcie *pcie);
+
+#ifdef CONFIG_PCIE_IPROC_MSI
+int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
+void iproc_msi_exit(struct iproc_pcie *pcie);
+#else
+static inline int iproc_msi_init(struct iproc_pcie *pcie,
+				 struct device_node *node)
+{
+	return -ENODEV;
+}
+static inline void iproc_msi_exit(struct iproc_pcie *pcie)
+{
+}
+#endif
+
+#endif /* _PCIE_IPROC_H */
diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
new file mode 100644
index 000000000..975b3024f
--- /dev/null
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek PCIe host controller driver.
+ *
+ * Copyright (c) 2020 MediaTek Inc.
+ * Author: Jianjun Wang <jianjun.wang@mediatek.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include "../pci.h"
+
+#define PCIE_SETTING_REG		0x80
+#define PCIE_PCI_IDS_1			0x9c
+#define PCI_CLASS(class)		(class << 8)
+#define PCIE_RC_MODE			BIT(0)
+
+#define PCIE_CFGNUM_REG			0x140
+#define PCIE_CFG_DEVFN(devfn)		((devfn) & GENMASK(7, 0))
+#define PCIE_CFG_BUS(bus)		(((bus) << 8) & GENMASK(15, 8))
+#define PCIE_CFG_BYTE_EN(bytes)		(((bytes) << 16) & GENMASK(19, 16))
+#define PCIE_CFG_FORCE_BYTE_EN		BIT(20)
+#define PCIE_CFG_OFFSET_ADDR		0x1000
+#define PCIE_CFG_HEADER(bus, devfn) \
+	(PCIE_CFG_BUS(bus) | PCIE_CFG_DEVFN(devfn))
+
+#define PCIE_RST_CTRL_REG		0x148
+#define PCIE_MAC_RSTB			BIT(0)
+#define PCIE_PHY_RSTB			BIT(1)
+#define PCIE_BRG_RSTB			BIT(2)
+#define PCIE_PE_RSTB			BIT(3)
+
+#define PCIE_LTSSM_STATUS_REG		0x150
+#define PCIE_LTSSM_STATE_MASK		GENMASK(28, 24)
+#define PCIE_LTSSM_STATE(val)		((val & PCIE_LTSSM_STATE_MASK) >> 24)
+#define PCIE_LTSSM_STATE_L2_IDLE	0x14
+
+#define PCIE_LINK_STATUS_REG		0x154
+#define PCIE_PORT_LINKUP		BIT(8)
+
+#define PCIE_MSI_SET_NUM		8
+#define PCIE_MSI_IRQS_PER_SET		32
+#define PCIE_MSI_IRQS_NUM \
+	(PCIE_MSI_IRQS_PER_SET * PCIE_MSI_SET_NUM)
+
+#define PCIE_INT_ENABLE_REG		0x180
+#define PCIE_MSI_ENABLE			GENMASK(PCIE_MSI_SET_NUM + 8 - 1, 8)
+#define PCIE_MSI_SHIFT			8
+#define PCIE_INTX_SHIFT			24
+#define PCIE_INTX_ENABLE \
+	GENMASK(PCIE_INTX_SHIFT + PCI_NUM_INTX - 1, PCIE_INTX_SHIFT)
+
+#define PCIE_INT_STATUS_REG		0x184
+#define PCIE_MSI_SET_ENABLE_REG		0x190
+#define PCIE_MSI_SET_ENABLE		GENMASK(PCIE_MSI_SET_NUM - 1, 0)
+
+#define PCIE_MSI_SET_BASE_REG		0xc00
+#define PCIE_MSI_SET_OFFSET		0x10
+#define PCIE_MSI_SET_STATUS_OFFSET	0x04
+#define PCIE_MSI_SET_ENABLE_OFFSET	0x08
+
+#define PCIE_MSI_SET_ADDR_HI_BASE	0xc80
+#define PCIE_MSI_SET_ADDR_HI_OFFSET	0x04
+
+#define PCIE_ICMD_PM_REG		0x198
+#define PCIE_TURN_OFF_LINK		BIT(4)
+
+#define PCIE_MISC_CTRL_REG		0x348
+#define PCIE_DISABLE_DVFSRC_VLT_REQ	BIT(1)
+
+#define PCIE_TRANS_TABLE_BASE_REG	0x800
+#define PCIE_ATR_SRC_ADDR_MSB_OFFSET	0x4
+#define PCIE_ATR_TRSL_ADDR_LSB_OFFSET	0x8
+#define PCIE_ATR_TRSL_ADDR_MSB_OFFSET	0xc
+#define PCIE_ATR_TRSL_PARAM_OFFSET	0x10
+#define PCIE_ATR_TLB_SET_OFFSET		0x20
+
+#define PCIE_MAX_TRANS_TABLES		8
+#define PCIE_ATR_EN			BIT(0)
+#define PCIE_ATR_SIZE(size) \
+	(((((size) - 1) << 1) & GENMASK(6, 1)) | PCIE_ATR_EN)
+#define PCIE_ATR_ID(id)			((id) & GENMASK(3, 0))
+#define PCIE_ATR_TYPE_MEM		PCIE_ATR_ID(0)
+#define PCIE_ATR_TYPE_IO		PCIE_ATR_ID(1)
+#define PCIE_ATR_TLP_TYPE(type)		(((type) << 16) & GENMASK(18, 16))
+#define PCIE_ATR_TLP_TYPE_MEM		PCIE_ATR_TLP_TYPE(0)
+#define PCIE_ATR_TLP_TYPE_IO		PCIE_ATR_TLP_TYPE(2)
+
+/**
+ * struct mtk_msi_set - MSI information for each set
+ * @base: IO mapped register base
+ * @msg_addr: MSI message address
+ * @saved_irq_state: IRQ enable state saved at suspend time
+ */
+struct mtk_msi_set {
+	void __iomem *base;
+	phys_addr_t msg_addr;
+	u32 saved_irq_state;
+};
+
+/**
+ * struct mtk_gen3_pcie - PCIe port information
+ * @dev: pointer to PCIe device
+ * @base: IO mapped register base
+ * @reg_base: physical register base
+ * @mac_reset: MAC reset control
+ * @phy_reset: PHY reset control
+ * @phy: PHY controller block
+ * @clks: PCIe clocks
+ * @num_clks: PCIe clocks count for this port
+ * @irq: PCIe controller interrupt number
+ * @saved_irq_state: IRQ enable state saved at suspend time
+ * @irq_lock: lock protecting IRQ register access
+ * @intx_domain: legacy INTx IRQ domain
+ * @msi_domain: MSI IRQ domain
+ * @msi_bottom_domain: MSI IRQ bottom domain
+ * @msi_sets: MSI sets information
+ * @lock: lock protecting IRQ bit map
+ * @msi_irq_in_use: bit map for assigned MSI IRQ
+ */
+struct mtk_gen3_pcie {
+	struct device *dev;
+	void __iomem *base;
+	phys_addr_t reg_base;
+	struct reset_control *mac_reset;
+	struct reset_control *phy_reset;
+	struct phy *phy;
+	struct clk_bulk_data *clks;
+	int num_clks;
+
+	int irq;
+	u32 saved_irq_state;
+	raw_spinlock_t irq_lock;
+	struct irq_domain *intx_domain;
+	struct irq_domain *msi_domain;
+	struct irq_domain *msi_bottom_domain;
+	struct mtk_msi_set msi_sets[PCIE_MSI_SET_NUM];
+	struct mutex lock;
+	DECLARE_BITMAP(msi_irq_in_use, PCIE_MSI_IRQS_NUM);
+};
+
+/* LTSSM state in PCIE_LTSSM_STATUS_REG bit[28:24] */
+static const char *const ltssm_str[] = {
+	"detect.quiet",			/* 0x00 */
+	"detect.active",		/* 0x01 */
+	"polling.active",		/* 0x02 */
+	"polling.compliance",		/* 0x03 */
+	"polling.configuration",	/* 0x04 */
+	"config.linkwidthstart",	/* 0x05 */
+	"config.linkwidthaccept",	/* 0x06 */
+	"config.lanenumwait",		/* 0x07 */
+	"config.lanenumaccept",		/* 0x08 */
+	"config.complete",		/* 0x09 */
+	"config.idle",			/* 0x0A */
+	"recovery.receiverlock",	/* 0x0B */
+	"recovery.equalization",	/* 0x0C */
+	"recovery.speed",		/* 0x0D */
+	"recovery.receiverconfig",	/* 0x0E */
+	"recovery.idle",		/* 0x0F */
+	"L0",				/* 0x10 */
+	"L0s",				/* 0x11 */
+	"L1.entry",			/* 0x12 */
+	"L1.idle",			/* 0x13 */
+	"L2.idle",			/* 0x14 */
+	"L2.transmitwake",		/* 0x15 */
+	"disable",			/* 0x16 */
+	"loopback.entry",		/* 0x17 */
+	"loopback.active",		/* 0x18 */
+	"loopback.exit",		/* 0x19 */
+	"hotreset",			/* 0x1A */
+};
+
+/**
+ * mtk_pcie_config_tlp_header() - Configure a configuration TLP header
+ * @bus: PCI bus to query
+ * @devfn: device/function number
+ * @where: offset in config space
+ * @size: data size in TLP header
+ *
+ * Set byte enable field and device information in configuration TLP header.
+ */
+static void mtk_pcie_config_tlp_header(struct pci_bus *bus, unsigned int devfn,
+					int where, int size)
+{
+	struct mtk_gen3_pcie *pcie = bus->sysdata;
+	int bytes;
+	u32 val;
+
+	bytes = (GENMASK(size - 1, 0) & 0xf) << (where & 0x3);
+
+	val = PCIE_CFG_FORCE_BYTE_EN | PCIE_CFG_BYTE_EN(bytes) |
+	      PCIE_CFG_HEADER(bus->number, devfn);
+
+	writel_relaxed(val, pcie->base + PCIE_CFGNUM_REG);
+}
+
+static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+				      int where)
+{
+	struct mtk_gen3_pcie *pcie = bus->sysdata;
+
+	return pcie->base + PCIE_CFG_OFFSET_ADDR + where;
+}
+
+static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 *val)
+{
+	mtk_pcie_config_tlp_header(bus, devfn, where, size);
+
+	return pci_generic_config_read32(bus, devfn, where, size, val);
+}
+
+static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	mtk_pcie_config_tlp_header(bus, devfn, where, size);
+
+	if (size <= 2)
+		val <<= (where & 0x3) * 8;
+
+	return pci_generic_config_write32(bus, devfn, where, 4, val);
+}
+
+static struct pci_ops mtk_pcie_ops = {
+	.map_bus = mtk_pcie_map_bus,
+	.read  = mtk_pcie_config_read,
+	.write = mtk_pcie_config_write,
+};
+
+static int mtk_pcie_set_trans_table(struct mtk_gen3_pcie *pcie,
+				    resource_size_t cpu_addr,
+				    resource_size_t pci_addr,
+				    resource_size_t size,
+				    unsigned long type, int *num)
+{
+	resource_size_t remaining = size;
+	resource_size_t table_size;
+	resource_size_t addr_align;
+	const char *range_type;
+	void __iomem *table;
+	u32 val;
+
+	while (remaining && (*num < PCIE_MAX_TRANS_TABLES)) {
+		/* Table size needs to be a power of 2 */
+		table_size = BIT(fls(remaining) - 1);
+
+		if (cpu_addr > 0) {
+			addr_align = BIT(ffs(cpu_addr) - 1);
+			table_size = min(table_size, addr_align);
+		}
+
+		/* Minimum size of translate table is 4KiB */
+		if (table_size < 0x1000) {
+			dev_err(pcie->dev, "illegal table size %#llx\n",
+				(unsigned long long)table_size);
+			return -EINVAL;
+		}
+
+		table = pcie->base + PCIE_TRANS_TABLE_BASE_REG + *num * PCIE_ATR_TLB_SET_OFFSET;
+		writel_relaxed(lower_32_bits(cpu_addr) | PCIE_ATR_SIZE(fls(table_size) - 1), table);
+		writel_relaxed(upper_32_bits(cpu_addr), table + PCIE_ATR_SRC_ADDR_MSB_OFFSET);
+		writel_relaxed(lower_32_bits(pci_addr), table + PCIE_ATR_TRSL_ADDR_LSB_OFFSET);
+		writel_relaxed(upper_32_bits(pci_addr), table + PCIE_ATR_TRSL_ADDR_MSB_OFFSET);
+
+		if (type == IORESOURCE_IO) {
+			val = PCIE_ATR_TYPE_IO | PCIE_ATR_TLP_TYPE_IO;
+			range_type = "IO";
+		} else {
+			val = PCIE_ATR_TYPE_MEM | PCIE_ATR_TLP_TYPE_MEM;
+			range_type = "MEM";
+		}
+
+		writel_relaxed(val, table + PCIE_ATR_TRSL_PARAM_OFFSET);
+
+		dev_dbg(pcie->dev, "set %s trans window[%d]: cpu_addr = %#llx, pci_addr = %#llx, size = %#llx\n",
+			range_type, *num, (unsigned long long)cpu_addr,
+			(unsigned long long)pci_addr, (unsigned long long)table_size);
+
+		cpu_addr += table_size;
+		pci_addr += table_size;
+		remaining -= table_size;
+		(*num)++;
+	}
+
+	if (remaining)
+		dev_warn(pcie->dev, "not enough translate table for addr: %#llx, limited to [%d]\n",
+			 (unsigned long long)cpu_addr, PCIE_MAX_TRANS_TABLES);
+
+	return 0;
+}
+
+static void mtk_pcie_enable_msi(struct mtk_gen3_pcie *pcie)
+{
+	int i;
+	u32 val;
+
+	for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+		struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
+
+		msi_set->base = pcie->base + PCIE_MSI_SET_BASE_REG +
+				i * PCIE_MSI_SET_OFFSET;
+		msi_set->msg_addr = pcie->reg_base + PCIE_MSI_SET_BASE_REG +
+				    i * PCIE_MSI_SET_OFFSET;
+
+		/* Configure the MSI capture address */
+		writel_relaxed(lower_32_bits(msi_set->msg_addr), msi_set->base);
+		writel_relaxed(upper_32_bits(msi_set->msg_addr),
+			       pcie->base + PCIE_MSI_SET_ADDR_HI_BASE +
+			       i * PCIE_MSI_SET_ADDR_HI_OFFSET);
+	}
+
+	val = readl_relaxed(pcie->base + PCIE_MSI_SET_ENABLE_REG);
+	val |= PCIE_MSI_SET_ENABLE;
+	writel_relaxed(val, pcie->base + PCIE_MSI_SET_ENABLE_REG);
+
+	val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
+	val |= PCIE_MSI_ENABLE;
+	writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+}
+
+static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie)
+{
+	struct resource_entry *entry;
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+	unsigned int table_index = 0;
+	int err;
+	u32 val;
+
+	/* Set as RC mode */
+	val = readl_relaxed(pcie->base + PCIE_SETTING_REG);
+	val |= PCIE_RC_MODE;
+	writel_relaxed(val, pcie->base + PCIE_SETTING_REG);
+
+	/* Set class code */
+	val = readl_relaxed(pcie->base + PCIE_PCI_IDS_1);
+	val &= ~GENMASK(31, 8);
+	val |= PCI_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL);
+	writel_relaxed(val, pcie->base + PCIE_PCI_IDS_1);
+
+	/* Mask all INTx interrupts */
+	val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
+	val &= ~PCIE_INTX_ENABLE;
+	writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+
+	/* Disable DVFSRC voltage request */
+	val = readl_relaxed(pcie->base + PCIE_MISC_CTRL_REG);
+	val |= PCIE_DISABLE_DVFSRC_VLT_REQ;
+	writel_relaxed(val, pcie->base + PCIE_MISC_CTRL_REG);
+
+	/* Assert all reset signals */
+	val = readl_relaxed(pcie->base + PCIE_RST_CTRL_REG);
+	val |= PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB;
+	writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
+
+	/*
+	 * Described in PCIe CEM specification sections 2.2 (PERST# Signal)
+	 * and 2.2.1 (Initial Power-Up (G3 to S0)).
+	 * The deassertion of PERST# should be delayed 100ms (TPVPERL)
+	 * for the power and clock to become stable.
+	 */
+	msleep(100);
+
+	/* De-assert reset signals */
+	val &= ~(PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB);
+	writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
+
+	/* Check if the link is up or not */
+	err = readl_poll_timeout(pcie->base + PCIE_LINK_STATUS_REG, val,
+				 !!(val & PCIE_PORT_LINKUP), 20,
+				 PCI_PM_D3COLD_WAIT * USEC_PER_MSEC);
+	if (err) {
+		const char *ltssm_state;
+		int ltssm_index;
+
+		val = readl_relaxed(pcie->base + PCIE_LTSSM_STATUS_REG);
+		ltssm_index = PCIE_LTSSM_STATE(val);
+		ltssm_state = ltssm_index >= ARRAY_SIZE(ltssm_str) ?
+			      "Unknown state" : ltssm_str[ltssm_index];
+		dev_err(pcie->dev,
+			"PCIe link down, current LTSSM state: %s (%#x)\n",
+			ltssm_state, val);
+		return err;
+	}
+
+	mtk_pcie_enable_msi(pcie);
+
+	/* Set PCIe translation windows */
+	resource_list_for_each_entry(entry, &host->windows) {
+		struct resource *res = entry->res;
+		unsigned long type = resource_type(res);
+		resource_size_t cpu_addr;
+		resource_size_t pci_addr;
+		resource_size_t size;
+
+		if (type == IORESOURCE_IO)
+			cpu_addr = pci_pio_to_address(res->start);
+		else if (type == IORESOURCE_MEM)
+			cpu_addr = res->start;
+		else
+			continue;
+
+		pci_addr = res->start - entry->offset;
+		size = resource_size(res);
+		err = mtk_pcie_set_trans_table(pcie, cpu_addr, pci_addr, size,
+					       type, &table_index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int mtk_pcie_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void mtk_pcie_msi_irq_mask(struct irq_data *data)
+{
+	pci_msi_mask_irq(data);
+	irq_chip_mask_parent(data);
+}
+
+static void mtk_pcie_msi_irq_unmask(struct irq_data *data)
+{
+	pci_msi_unmask_irq(data);
+	irq_chip_unmask_parent(data);
+}
+
+static struct irq_chip mtk_msi_irq_chip = {
+	.irq_ack = irq_chip_ack_parent,
+	.irq_mask = mtk_pcie_msi_irq_mask,
+	.irq_unmask = mtk_pcie_msi_irq_unmask,
+	.name = "MSI",
+};
+
+static struct msi_domain_info mtk_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &mtk_msi_irq_chip,
+};
+
+static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+	struct mtk_gen3_pcie *pcie = data->domain->host_data;
+	unsigned long hwirq;
+
+	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+	msg->address_hi = upper_32_bits(msi_set->msg_addr);
+	msg->address_lo = lower_32_bits(msi_set->msg_addr);
+	msg->data = hwirq;
+	dev_dbg(pcie->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n",
+		hwirq, msg->address_hi, msg->address_lo, msg->data);
+}
+
+static void mtk_msi_bottom_irq_ack(struct irq_data *data)
+{
+	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+	unsigned long hwirq;
+
+	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+	writel_relaxed(BIT(hwirq), msi_set->base + PCIE_MSI_SET_STATUS_OFFSET);
+}
+
+static void mtk_msi_bottom_irq_mask(struct irq_data *data)
+{
+	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+	struct mtk_gen3_pcie *pcie = data->domain->host_data;
+	unsigned long hwirq, flags;
+	u32 val;
+
+	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+	val &= ~BIT(hwirq);
+	writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+static void mtk_msi_bottom_irq_unmask(struct irq_data *data)
+{
+	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+	struct mtk_gen3_pcie *pcie = data->domain->host_data;
+	unsigned long hwirq, flags;
+	u32 val;
+
+	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+	val |= BIT(hwirq);
+	writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+static struct irq_chip mtk_msi_bottom_irq_chip = {
+	.irq_ack		= mtk_msi_bottom_irq_ack,
+	.irq_mask		= mtk_msi_bottom_irq_mask,
+	.irq_unmask		= mtk_msi_bottom_irq_unmask,
+	.irq_compose_msi_msg	= mtk_compose_msi_msg,
+	.irq_set_affinity	= mtk_pcie_set_affinity,
+	.name			= "MSI",
+};
+
+static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs,
+				       void *arg)
+{
+	struct mtk_gen3_pcie *pcie = domain->host_data;
+	struct mtk_msi_set *msi_set;
+	int i, hwirq, set_idx;
+
+	mutex_lock(&pcie->lock);
+
+	hwirq = bitmap_find_free_region(pcie->msi_irq_in_use, PCIE_MSI_IRQS_NUM,
+					order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	set_idx = hwirq / PCIE_MSI_IRQS_PER_SET;
+	msi_set = &pcie->msi_sets[set_idx];
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &mtk_msi_bottom_irq_chip, msi_set,
+				    handle_edge_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void mtk_msi_bottom_domain_free(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs)
+{
+	struct mtk_gen3_pcie *pcie = domain->host_data;
+	struct irq_data *data = irq_domain_get_irq_data(domain, virq);
+
+	mutex_lock(&pcie->lock);
+
+	bitmap_release_region(pcie->msi_irq_in_use, data->hwirq,
+			      order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->lock);
+
+	irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops mtk_msi_bottom_domain_ops = {
+	.alloc = mtk_msi_bottom_domain_alloc,
+	.free = mtk_msi_bottom_domain_free,
+};
+
+static void mtk_intx_mask(struct irq_data *data)
+{
+	struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
+	val &= ~BIT(data->hwirq + PCIE_INTX_SHIFT);
+	writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+static void mtk_intx_unmask(struct irq_data *data)
+{
+	struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
+	val = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
+	val |= BIT(data->hwirq + PCIE_INTX_SHIFT);
+	writel_relaxed(val, pcie->base + PCIE_INT_ENABLE_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
+}
+
+/**
+ * mtk_intx_eoi() - Clear INTx IRQ status at the end of interrupt
+ * @data: pointer to chip specific data
+ *
+ * As an emulated level IRQ, its interrupt status will remain
+ * until the corresponding de-assert message is received; hence that
+ * the status can only be cleared when the interrupt has been serviced.
+ */
+static void mtk_intx_eoi(struct irq_data *data)
+{
+	struct mtk_gen3_pcie *pcie = irq_data_get_irq_chip_data(data);
+	unsigned long hwirq;
+
+	hwirq = data->hwirq + PCIE_INTX_SHIFT;
+	writel_relaxed(BIT(hwirq), pcie->base + PCIE_INT_STATUS_REG);
+}
+
+static struct irq_chip mtk_intx_irq_chip = {
+	.irq_mask		= mtk_intx_mask,
+	.irq_unmask		= mtk_intx_unmask,
+	.irq_eoi		= mtk_intx_eoi,
+	.irq_set_affinity	= mtk_pcie_set_affinity,
+	.name			= "INTx",
+};
+
+static int mtk_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+			     irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(irq, domain->host_data);
+	irq_set_chip_and_handler_name(irq, &mtk_intx_irq_chip,
+				      handle_fasteoi_irq, "INTx");
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = mtk_pcie_intx_map,
+};
+
+static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *intc_node, *node = dev->of_node;
+	int ret;
+
+	raw_spin_lock_init(&pcie->irq_lock);
+
+	/* Setup INTx */
+	intc_node = of_get_child_by_name(node, "interrupt-controller");
+	if (!intc_node) {
+		dev_err(dev, "missing interrupt-controller node\n");
+		return -ENODEV;
+	}
+
+	pcie->intx_domain = irq_domain_add_linear(intc_node, PCI_NUM_INTX,
+						  &intx_domain_ops, pcie);
+	if (!pcie->intx_domain) {
+		dev_err(dev, "failed to create INTx IRQ domain\n");
+		ret = -ENODEV;
+		goto out_put_node;
+	}
+
+	/* Setup MSI */
+	mutex_init(&pcie->lock);
+
+	pcie->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
+				  &mtk_msi_bottom_domain_ops, pcie);
+	if (!pcie->msi_bottom_domain) {
+		dev_err(dev, "failed to create MSI bottom domain\n");
+		ret = -ENODEV;
+		goto err_msi_bottom_domain;
+	}
+
+	pcie->msi_domain = pci_msi_create_irq_domain(dev->fwnode,
+						     &mtk_msi_domain_info,
+						     pcie->msi_bottom_domain);
+	if (!pcie->msi_domain) {
+		dev_err(dev, "failed to create MSI domain\n");
+		ret = -ENODEV;
+		goto err_msi_domain;
+	}
+
+	of_node_put(intc_node);
+	return 0;
+
+err_msi_domain:
+	irq_domain_remove(pcie->msi_bottom_domain);
+err_msi_bottom_domain:
+	irq_domain_remove(pcie->intx_domain);
+out_put_node:
+	of_node_put(intc_node);
+	return ret;
+}
+
+static void mtk_pcie_irq_teardown(struct mtk_gen3_pcie *pcie)
+{
+	irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
+
+	if (pcie->intx_domain)
+		irq_domain_remove(pcie->intx_domain);
+
+	if (pcie->msi_domain)
+		irq_domain_remove(pcie->msi_domain);
+
+	if (pcie->msi_bottom_domain)
+		irq_domain_remove(pcie->msi_bottom_domain);
+
+	irq_dispose_mapping(pcie->irq);
+}
+
+static void mtk_pcie_msi_handler(struct mtk_gen3_pcie *pcie, int set_idx)
+{
+	struct mtk_msi_set *msi_set = &pcie->msi_sets[set_idx];
+	unsigned long msi_enable, msi_status;
+	irq_hw_number_t bit, hwirq;
+
+	msi_enable = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+
+	do {
+		msi_status = readl_relaxed(msi_set->base +
+					   PCIE_MSI_SET_STATUS_OFFSET);
+		msi_status &= msi_enable;
+		if (!msi_status)
+			break;
+
+		for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) {
+			hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET;
+			generic_handle_domain_irq(pcie->msi_bottom_domain, hwirq);
+		}
+	} while (true);
+}
+
+static void mtk_pcie_irq_handler(struct irq_desc *desc)
+{
+	struct mtk_gen3_pcie *pcie = irq_desc_get_handler_data(desc);
+	struct irq_chip *irqchip = irq_desc_get_chip(desc);
+	unsigned long status;
+	irq_hw_number_t irq_bit = PCIE_INTX_SHIFT;
+
+	chained_irq_enter(irqchip, desc);
+
+	status = readl_relaxed(pcie->base + PCIE_INT_STATUS_REG);
+	for_each_set_bit_from(irq_bit, &status, PCI_NUM_INTX +
+			      PCIE_INTX_SHIFT)
+		generic_handle_domain_irq(pcie->intx_domain,
+					  irq_bit - PCIE_INTX_SHIFT);
+
+	irq_bit = PCIE_MSI_SHIFT;
+	for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM +
+			      PCIE_MSI_SHIFT) {
+		mtk_pcie_msi_handler(pcie, irq_bit - PCIE_MSI_SHIFT);
+
+		writel_relaxed(BIT(irq_bit), pcie->base + PCIE_INT_STATUS_REG);
+	}
+
+	chained_irq_exit(irqchip, desc);
+}
+
+static int mtk_pcie_setup_irq(struct mtk_gen3_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int err;
+
+	err = mtk_pcie_init_irq_domains(pcie);
+	if (err)
+		return err;
+
+	pcie->irq = platform_get_irq(pdev, 0);
+	if (pcie->irq < 0)
+		return pcie->irq;
+
+	irq_set_chained_handler_and_data(pcie->irq, mtk_pcie_irq_handler, pcie);
+
+	return 0;
+}
+
+static int mtk_pcie_parse_port(struct mtk_gen3_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *regs;
+	int ret;
+
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcie-mac");
+	if (!regs)
+		return -EINVAL;
+	pcie->base = devm_ioremap_resource(dev, regs);
+	if (IS_ERR(pcie->base)) {
+		dev_err(dev, "failed to map register base\n");
+		return PTR_ERR(pcie->base);
+	}
+
+	pcie->reg_base = regs->start;
+
+	pcie->phy_reset = devm_reset_control_get_optional_exclusive(dev, "phy");
+	if (IS_ERR(pcie->phy_reset)) {
+		ret = PTR_ERR(pcie->phy_reset);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get PHY reset\n");
+
+		return ret;
+	}
+
+	pcie->mac_reset = devm_reset_control_get_optional_exclusive(dev, "mac");
+	if (IS_ERR(pcie->mac_reset)) {
+		ret = PTR_ERR(pcie->mac_reset);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get MAC reset\n");
+
+		return ret;
+	}
+
+	pcie->phy = devm_phy_optional_get(dev, "pcie-phy");
+	if (IS_ERR(pcie->phy)) {
+		ret = PTR_ERR(pcie->phy);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get PHY\n");
+
+		return ret;
+	}
+
+	pcie->num_clks = devm_clk_bulk_get_all(dev, &pcie->clks);
+	if (pcie->num_clks < 0) {
+		dev_err(dev, "failed to get clocks\n");
+		return pcie->num_clks;
+	}
+
+	return 0;
+}
+
+static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	int err;
+
+	/* PHY power on and enable pipe clock */
+	reset_control_deassert(pcie->phy_reset);
+
+	err = phy_init(pcie->phy);
+	if (err) {
+		dev_err(dev, "failed to initialize PHY\n");
+		goto err_phy_init;
+	}
+
+	err = phy_power_on(pcie->phy);
+	if (err) {
+		dev_err(dev, "failed to power on PHY\n");
+		goto err_phy_on;
+	}
+
+	/* MAC power on and enable transaction layer clocks */
+	reset_control_deassert(pcie->mac_reset);
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	err = clk_bulk_prepare_enable(pcie->num_clks, pcie->clks);
+	if (err) {
+		dev_err(dev, "failed to enable clocks\n");
+		goto err_clk_init;
+	}
+
+	return 0;
+
+err_clk_init:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+	reset_control_assert(pcie->mac_reset);
+	phy_power_off(pcie->phy);
+err_phy_on:
+	phy_exit(pcie->phy);
+err_phy_init:
+	reset_control_assert(pcie->phy_reset);
+
+	return err;
+}
+
+static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
+{
+	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
+
+	pm_runtime_put_sync(pcie->dev);
+	pm_runtime_disable(pcie->dev);
+	reset_control_assert(pcie->mac_reset);
+
+	phy_power_off(pcie->phy);
+	phy_exit(pcie->phy);
+	reset_control_assert(pcie->phy_reset);
+}
+
+static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
+{
+	int err;
+
+	err = mtk_pcie_parse_port(pcie);
+	if (err)
+		return err;
+
+	/*
+	 * The controller may have been left out of reset by the bootloader
+	 * so make sure that we get a clean start by asserting resets here.
+	 */
+	reset_control_assert(pcie->phy_reset);
+	reset_control_assert(pcie->mac_reset);
+	usleep_range(10, 20);
+
+	/* Don't touch the hardware registers before power up */
+	err = mtk_pcie_power_up(pcie);
+	if (err)
+		return err;
+
+	/* Try link up */
+	err = mtk_pcie_startup_port(pcie);
+	if (err)
+		goto err_setup;
+
+	err = mtk_pcie_setup_irq(pcie);
+	if (err)
+		goto err_setup;
+
+	return 0;
+
+err_setup:
+	mtk_pcie_power_down(pcie);
+
+	return err;
+}
+
+static int mtk_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_gen3_pcie *pcie;
+	struct pci_host_bridge *host;
+	int err;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!host)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(host);
+
+	pcie->dev = dev;
+	platform_set_drvdata(pdev, pcie);
+
+	err = mtk_pcie_setup(pcie);
+	if (err)
+		return err;
+
+	host->ops = &mtk_pcie_ops;
+	host->sysdata = pcie;
+
+	err = pci_host_probe(host);
+	if (err) {
+		mtk_pcie_irq_teardown(pcie);
+		mtk_pcie_power_down(pcie);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mtk_pcie_remove(struct platform_device *pdev)
+{
+	struct mtk_gen3_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+
+	pci_lock_rescan_remove();
+	pci_stop_root_bus(host->bus);
+	pci_remove_root_bus(host->bus);
+	pci_unlock_rescan_remove();
+
+	mtk_pcie_irq_teardown(pcie);
+	mtk_pcie_power_down(pcie);
+}
+
+static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
+{
+	int i;
+
+	raw_spin_lock(&pcie->irq_lock);
+
+	pcie->saved_irq_state = readl_relaxed(pcie->base + PCIE_INT_ENABLE_REG);
+
+	for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+		struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
+
+		msi_set->saved_irq_state = readl_relaxed(msi_set->base +
+					   PCIE_MSI_SET_ENABLE_OFFSET);
+	}
+
+	raw_spin_unlock(&pcie->irq_lock);
+}
+
+static void mtk_pcie_irq_restore(struct mtk_gen3_pcie *pcie)
+{
+	int i;
+
+	raw_spin_lock(&pcie->irq_lock);
+
+	writel_relaxed(pcie->saved_irq_state, pcie->base + PCIE_INT_ENABLE_REG);
+
+	for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+		struct mtk_msi_set *msi_set = &pcie->msi_sets[i];
+
+		writel_relaxed(msi_set->saved_irq_state,
+			       msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+	}
+
+	raw_spin_unlock(&pcie->irq_lock);
+}
+
+static int mtk_pcie_turn_off_link(struct mtk_gen3_pcie *pcie)
+{
+	u32 val;
+
+	val = readl_relaxed(pcie->base + PCIE_ICMD_PM_REG);
+	val |= PCIE_TURN_OFF_LINK;
+	writel_relaxed(val, pcie->base + PCIE_ICMD_PM_REG);
+
+	/* Check the link is L2 */
+	return readl_poll_timeout(pcie->base + PCIE_LTSSM_STATUS_REG, val,
+				  (PCIE_LTSSM_STATE(val) ==
+				   PCIE_LTSSM_STATE_L2_IDLE), 20,
+				   50 * USEC_PER_MSEC);
+}
+
+static int mtk_pcie_suspend_noirq(struct device *dev)
+{
+	struct mtk_gen3_pcie *pcie = dev_get_drvdata(dev);
+	int err;
+	u32 val;
+
+	/* Trigger link to L2 state */
+	err = mtk_pcie_turn_off_link(pcie);
+	if (err) {
+		dev_err(pcie->dev, "cannot enter L2 state\n");
+		return err;
+	}
+
+	/* Pull down the PERST# pin */
+	val = readl_relaxed(pcie->base + PCIE_RST_CTRL_REG);
+	val |= PCIE_PE_RSTB;
+	writel_relaxed(val, pcie->base + PCIE_RST_CTRL_REG);
+
+	dev_dbg(pcie->dev, "entered L2 states successfully");
+
+	mtk_pcie_irq_save(pcie);
+	mtk_pcie_power_down(pcie);
+
+	return 0;
+}
+
+static int mtk_pcie_resume_noirq(struct device *dev)
+{
+	struct mtk_gen3_pcie *pcie = dev_get_drvdata(dev);
+	int err;
+
+	err = mtk_pcie_power_up(pcie);
+	if (err)
+		return err;
+
+	err = mtk_pcie_startup_port(pcie);
+	if (err) {
+		mtk_pcie_power_down(pcie);
+		return err;
+	}
+
+	mtk_pcie_irq_restore(pcie);
+
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_pcie_suspend_noirq,
+				  mtk_pcie_resume_noirq)
+};
+
+static const struct of_device_id mtk_pcie_of_match[] = {
+	{ .compatible = "mediatek,mt8192-pcie" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_pcie_of_match);
+
+static struct platform_driver mtk_pcie_driver = {
+	.probe = mtk_pcie_probe,
+	.remove_new = mtk_pcie_remove,
+	.driver = {
+		.name = "mtk-pcie-gen3",
+		.of_match_table = mtk_pcie_of_match,
+		.pm = &mtk_pcie_pm_ops,
+	},
+};
+
+module_platform_driver(mtk_pcie_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
new file mode 100644
index 000000000..48372013f
--- /dev/null
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -0,0 +1,1255 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek PCIe host controller driver.
+ *
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ *	   Honghui Zhang <honghui.zhang@mediatek.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/msi.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include "../pci.h"
+
+/* PCIe shared registers */
+#define PCIE_SYS_CFG		0x00
+#define PCIE_INT_ENABLE		0x0c
+#define PCIE_CFG_ADDR		0x20
+#define PCIE_CFG_DATA		0x24
+
+/* PCIe per port registers */
+#define PCIE_BAR0_SETUP		0x10
+#define PCIE_CLASS		0x34
+#define PCIE_LINK_STATUS	0x50
+
+#define PCIE_PORT_INT_EN(x)	BIT(20 + (x))
+#define PCIE_PORT_PERST(x)	BIT(1 + (x))
+#define PCIE_PORT_LINKUP	BIT(0)
+#define PCIE_BAR_MAP_MAX	GENMASK(31, 16)
+
+#define PCIE_BAR_ENABLE		BIT(0)
+#define PCIE_REVISION_ID	BIT(0)
+#define PCIE_CLASS_CODE		(0x60400 << 8)
+#define PCIE_CONF_REG(regn)	(((regn) & GENMASK(7, 2)) | \
+				((((regn) >> 8) & GENMASK(3, 0)) << 24))
+#define PCIE_CONF_FUN(fun)	(((fun) << 8) & GENMASK(10, 8))
+#define PCIE_CONF_DEV(dev)	(((dev) << 11) & GENMASK(15, 11))
+#define PCIE_CONF_BUS(bus)	(((bus) << 16) & GENMASK(23, 16))
+#define PCIE_CONF_ADDR(regn, fun, dev, bus) \
+	(PCIE_CONF_REG(regn) | PCIE_CONF_FUN(fun) | \
+	 PCIE_CONF_DEV(dev) | PCIE_CONF_BUS(bus))
+
+/* MediaTek specific configuration registers */
+#define PCIE_FTS_NUM		0x70c
+#define PCIE_FTS_NUM_MASK	GENMASK(15, 8)
+#define PCIE_FTS_NUM_L0(x)	((x) & 0xff << 8)
+
+#define PCIE_FC_CREDIT		0x73c
+#define PCIE_FC_CREDIT_MASK	(GENMASK(31, 31) | GENMASK(28, 16))
+#define PCIE_FC_CREDIT_VAL(x)	((x) << 16)
+
+/* PCIe V2 share registers */
+#define PCIE_SYS_CFG_V2		0x0
+#define PCIE_CSR_LTSSM_EN(x)	BIT(0 + (x) * 8)
+#define PCIE_CSR_ASPM_L1_EN(x)	BIT(1 + (x) * 8)
+
+/* PCIe V2 per-port registers */
+#define PCIE_MSI_VECTOR		0x0c0
+
+#define PCIE_CONF_VEND_ID	0x100
+#define PCIE_CONF_DEVICE_ID	0x102
+#define PCIE_CONF_CLASS_ID	0x106
+
+#define PCIE_INT_MASK		0x420
+#define INTX_MASK		GENMASK(19, 16)
+#define INTX_SHIFT		16
+#define PCIE_INT_STATUS		0x424
+#define MSI_STATUS		BIT(23)
+#define PCIE_IMSI_STATUS	0x42c
+#define PCIE_IMSI_ADDR		0x430
+#define MSI_MASK		BIT(23)
+#define MTK_MSI_IRQS_NUM	32
+
+#define PCIE_AHB_TRANS_BASE0_L	0x438
+#define PCIE_AHB_TRANS_BASE0_H	0x43c
+#define AHB2PCIE_SIZE(x)	((x) & GENMASK(4, 0))
+#define PCIE_AXI_WINDOW0	0x448
+#define WIN_ENABLE		BIT(7)
+/*
+ * Define PCIe to AHB window size as 2^33 to support max 8GB address space
+ * translate, support least 4GB DRAM size access from EP DMA(physical DRAM
+ * start from 0x40000000).
+ */
+#define PCIE2AHB_SIZE	0x21
+
+/* PCIe V2 configuration transaction header */
+#define PCIE_CFG_HEADER0	0x460
+#define PCIE_CFG_HEADER1	0x464
+#define PCIE_CFG_HEADER2	0x468
+#define PCIE_CFG_WDATA		0x470
+#define PCIE_APP_TLP_REQ	0x488
+#define PCIE_CFG_RDATA		0x48c
+#define APP_CFG_REQ		BIT(0)
+#define APP_CPL_STATUS		GENMASK(7, 5)
+
+#define CFG_WRRD_TYPE_0		4
+#define CFG_WR_FMT		2
+#define CFG_RD_FMT		0
+
+#define CFG_DW0_LENGTH(length)	((length) & GENMASK(9, 0))
+#define CFG_DW0_TYPE(type)	(((type) << 24) & GENMASK(28, 24))
+#define CFG_DW0_FMT(fmt)	(((fmt) << 29) & GENMASK(31, 29))
+#define CFG_DW2_REGN(regn)	((regn) & GENMASK(11, 2))
+#define CFG_DW2_FUN(fun)	(((fun) << 16) & GENMASK(18, 16))
+#define CFG_DW2_DEV(dev)	(((dev) << 19) & GENMASK(23, 19))
+#define CFG_DW2_BUS(bus)	(((bus) << 24) & GENMASK(31, 24))
+#define CFG_HEADER_DW0(type, fmt) \
+	(CFG_DW0_LENGTH(1) | CFG_DW0_TYPE(type) | CFG_DW0_FMT(fmt))
+#define CFG_HEADER_DW1(where, size) \
+	(GENMASK(((size) - 1), 0) << ((where) & 0x3))
+#define CFG_HEADER_DW2(regn, fun, dev, bus) \
+	(CFG_DW2_REGN(regn) | CFG_DW2_FUN(fun) | \
+	CFG_DW2_DEV(dev) | CFG_DW2_BUS(bus))
+
+#define PCIE_RST_CTRL		0x510
+#define PCIE_PHY_RSTB		BIT(0)
+#define PCIE_PIPE_SRSTB		BIT(1)
+#define PCIE_MAC_SRSTB		BIT(2)
+#define PCIE_CRSTB		BIT(3)
+#define PCIE_PERSTB		BIT(8)
+#define PCIE_LINKDOWN_RST_EN	GENMASK(15, 13)
+#define PCIE_LINK_STATUS_V2	0x804
+#define PCIE_PORT_LINKUP_V2	BIT(10)
+
+struct mtk_pcie_port;
+
+/**
+ * struct mtk_pcie_soc - differentiate between host generations
+ * @need_fix_class_id: whether this host's class ID needed to be fixed or not
+ * @need_fix_device_id: whether this host's device ID needed to be fixed or not
+ * @no_msi: Bridge has no MSI support, and relies on an external block
+ * @device_id: device ID which this host need to be fixed
+ * @ops: pointer to configuration access functions
+ * @startup: pointer to controller setting functions
+ * @setup_irq: pointer to initialize IRQ functions
+ */
+struct mtk_pcie_soc {
+	bool need_fix_class_id;
+	bool need_fix_device_id;
+	bool no_msi;
+	unsigned int device_id;
+	struct pci_ops *ops;
+	int (*startup)(struct mtk_pcie_port *port);
+	int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node);
+};
+
+/**
+ * struct mtk_pcie_port - PCIe port information
+ * @base: IO mapped register base
+ * @list: port list
+ * @pcie: pointer to PCIe host info
+ * @reset: pointer to port reset control
+ * @sys_ck: pointer to transaction/data link layer clock
+ * @ahb_ck: pointer to AHB slave interface operating clock for CSR access
+ *          and RC initiated MMIO access
+ * @axi_ck: pointer to application layer MMIO channel operating clock
+ * @aux_ck: pointer to pe2_mac_bridge and pe2_mac_core operating clock
+ *          when pcie_mac_ck/pcie_pipe_ck is turned off
+ * @obff_ck: pointer to OBFF functional block operating clock
+ * @pipe_ck: pointer to LTSSM and PHY/MAC layer operating clock
+ * @phy: pointer to PHY control block
+ * @slot: port slot
+ * @irq: GIC irq
+ * @irq_domain: legacy INTx IRQ domain
+ * @inner_domain: inner IRQ domain
+ * @msi_domain: MSI IRQ domain
+ * @lock: protect the msi_irq_in_use bitmap
+ * @msi_irq_in_use: bit map for assigned MSI IRQ
+ */
+struct mtk_pcie_port {
+	void __iomem *base;
+	struct list_head list;
+	struct mtk_pcie *pcie;
+	struct reset_control *reset;
+	struct clk *sys_ck;
+	struct clk *ahb_ck;
+	struct clk *axi_ck;
+	struct clk *aux_ck;
+	struct clk *obff_ck;
+	struct clk *pipe_ck;
+	struct phy *phy;
+	u32 slot;
+	int irq;
+	struct irq_domain *irq_domain;
+	struct irq_domain *inner_domain;
+	struct irq_domain *msi_domain;
+	struct mutex lock;
+	DECLARE_BITMAP(msi_irq_in_use, MTK_MSI_IRQS_NUM);
+};
+
+/**
+ * struct mtk_pcie - PCIe host information
+ * @dev: pointer to PCIe device
+ * @base: IO mapped register base
+ * @cfg: IO mapped register map for PCIe config
+ * @free_ck: free-run reference clock
+ * @mem: non-prefetchable memory resource
+ * @ports: pointer to PCIe port information
+ * @soc: pointer to SoC-dependent operations
+ */
+struct mtk_pcie {
+	struct device *dev;
+	void __iomem *base;
+	struct regmap *cfg;
+	struct clk *free_ck;
+
+	struct list_head ports;
+	const struct mtk_pcie_soc *soc;
+};
+
+static void mtk_pcie_subsys_powerdown(struct mtk_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+
+	clk_disable_unprepare(pcie->free_ck);
+
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+}
+
+static void mtk_pcie_port_free(struct mtk_pcie_port *port)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+
+	devm_iounmap(dev, port->base);
+	list_del(&port->list);
+	devm_kfree(dev, port);
+}
+
+static void mtk_pcie_put_resources(struct mtk_pcie *pcie)
+{
+	struct mtk_pcie_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
+		phy_power_off(port->phy);
+		phy_exit(port->phy);
+		clk_disable_unprepare(port->pipe_ck);
+		clk_disable_unprepare(port->obff_ck);
+		clk_disable_unprepare(port->axi_ck);
+		clk_disable_unprepare(port->aux_ck);
+		clk_disable_unprepare(port->ahb_ck);
+		clk_disable_unprepare(port->sys_ck);
+		mtk_pcie_port_free(port);
+	}
+
+	mtk_pcie_subsys_powerdown(pcie);
+}
+
+static int mtk_pcie_check_cfg_cpld(struct mtk_pcie_port *port)
+{
+	u32 val;
+	int err;
+
+	err = readl_poll_timeout_atomic(port->base + PCIE_APP_TLP_REQ, val,
+					!(val & APP_CFG_REQ), 10,
+					100 * USEC_PER_MSEC);
+	if (err)
+		return PCIBIOS_SET_FAILED;
+
+	if (readl(port->base + PCIE_APP_TLP_REQ) & APP_CPL_STATUS)
+		return PCIBIOS_SET_FAILED;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mtk_pcie_hw_rd_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn,
+			      int where, int size, u32 *val)
+{
+	u32 tmp;
+
+	/* Write PCIe configuration transaction header for Cfgrd */
+	writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_RD_FMT),
+	       port->base + PCIE_CFG_HEADER0);
+	writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1);
+	writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus),
+	       port->base + PCIE_CFG_HEADER2);
+
+	/* Trigger h/w to transmit Cfgrd TLP */
+	tmp = readl(port->base + PCIE_APP_TLP_REQ);
+	tmp |= APP_CFG_REQ;
+	writel(tmp, port->base + PCIE_APP_TLP_REQ);
+
+	/* Check completion status */
+	if (mtk_pcie_check_cfg_cpld(port))
+		return PCIBIOS_SET_FAILED;
+
+	/* Read cpld payload of Cfgrd */
+	*val = readl(port->base + PCIE_CFG_RDATA);
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 3))) & 0xffff;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mtk_pcie_hw_wr_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn,
+			      int where, int size, u32 val)
+{
+	/* Write PCIe configuration transaction header for Cfgwr */
+	writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_WR_FMT),
+	       port->base + PCIE_CFG_HEADER0);
+	writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1);
+	writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus),
+	       port->base + PCIE_CFG_HEADER2);
+
+	/* Write Cfgwr data */
+	val = val << 8 * (where & 3);
+	writel(val, port->base + PCIE_CFG_WDATA);
+
+	/* Trigger h/w to transmit Cfgwr TLP */
+	val = readl(port->base + PCIE_APP_TLP_REQ);
+	val |= APP_CFG_REQ;
+	writel(val, port->base + PCIE_APP_TLP_REQ);
+
+	/* Check completion status */
+	return mtk_pcie_check_cfg_cpld(port);
+}
+
+static struct mtk_pcie_port *mtk_pcie_find_port(struct pci_bus *bus,
+						unsigned int devfn)
+{
+	struct mtk_pcie *pcie = bus->sysdata;
+	struct mtk_pcie_port *port;
+	struct pci_dev *dev = NULL;
+
+	/*
+	 * Walk the bus hierarchy to get the devfn value
+	 * of the port in the root bus.
+	 */
+	while (bus && bus->number) {
+		dev = bus->self;
+		bus = dev->bus;
+		devfn = dev->devfn;
+	}
+
+	list_for_each_entry(port, &pcie->ports, list)
+		if (port->slot == PCI_SLOT(devfn))
+			return port;
+
+	return NULL;
+}
+
+static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 *val)
+{
+	struct mtk_pcie_port *port;
+	u32 bn = bus->number;
+
+	port = mtk_pcie_find_port(bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return mtk_pcie_hw_rd_cfg(port, bn, devfn, where, size, val);
+}
+
+static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	struct mtk_pcie_port *port;
+	u32 bn = bus->number;
+
+	port = mtk_pcie_find_port(bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return mtk_pcie_hw_wr_cfg(port, bn, devfn, where, size, val);
+}
+
+static struct pci_ops mtk_pcie_ops_v2 = {
+	.read  = mtk_pcie_config_read,
+	.write = mtk_pcie_config_write,
+};
+
+static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr;
+
+	/* MT2712/MT7622 only support 32-bit MSI addresses */
+	addr = virt_to_phys(port->base + PCIE_MSI_VECTOR);
+	msg->address_hi = 0;
+	msg->address_lo = lower_32_bits(addr);
+
+	msg->data = data->hwirq;
+
+	dev_dbg(port->pcie->dev, "msi#%d address_hi %#x address_lo %#x\n",
+		(int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int mtk_msi_set_affinity(struct irq_data *irq_data,
+				const struct cpumask *mask, bool force)
+{
+	 return -EINVAL;
+}
+
+static void mtk_msi_ack_irq(struct irq_data *data)
+{
+	struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+	u32 hwirq = data->hwirq;
+
+	writel(1 << hwirq, port->base + PCIE_IMSI_STATUS);
+}
+
+static struct irq_chip mtk_msi_bottom_irq_chip = {
+	.name			= "MTK MSI",
+	.irq_compose_msi_msg	= mtk_compose_msi_msg,
+	.irq_set_affinity	= mtk_msi_set_affinity,
+	.irq_ack		= mtk_msi_ack_irq,
+};
+
+static int mtk_pcie_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nr_irqs, void *args)
+{
+	struct mtk_pcie_port *port = domain->host_data;
+	unsigned long bit;
+
+	WARN_ON(nr_irqs != 1);
+	mutex_lock(&port->lock);
+
+	bit = find_first_zero_bit(port->msi_irq_in_use, MTK_MSI_IRQS_NUM);
+	if (bit >= MTK_MSI_IRQS_NUM) {
+		mutex_unlock(&port->lock);
+		return -ENOSPC;
+	}
+
+	__set_bit(bit, port->msi_irq_in_use);
+
+	mutex_unlock(&port->lock);
+
+	irq_domain_set_info(domain, virq, bit, &mtk_msi_bottom_irq_chip,
+			    domain->host_data, handle_edge_irq,
+			    NULL, NULL);
+
+	return 0;
+}
+
+static void mtk_pcie_irq_domain_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct mtk_pcie_port *port = irq_data_get_irq_chip_data(d);
+
+	mutex_lock(&port->lock);
+
+	if (!test_bit(d->hwirq, port->msi_irq_in_use))
+		dev_err(port->pcie->dev, "trying to free unused MSI#%lu\n",
+			d->hwirq);
+	else
+		__clear_bit(d->hwirq, port->msi_irq_in_use);
+
+	mutex_unlock(&port->lock);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc	= mtk_pcie_irq_domain_alloc,
+	.free	= mtk_pcie_irq_domain_free,
+};
+
+static struct irq_chip mtk_msi_irq_chip = {
+	.name		= "MTK PCIe MSI",
+	.irq_ack	= irq_chip_ack_parent,
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info mtk_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX),
+	.chip	= &mtk_msi_irq_chip,
+};
+
+static int mtk_pcie_allocate_msi_domains(struct mtk_pcie_port *port)
+{
+	struct fwnode_handle *fwnode = of_node_to_fwnode(port->pcie->dev->of_node);
+
+	mutex_init(&port->lock);
+
+	port->inner_domain = irq_domain_create_linear(fwnode, MTK_MSI_IRQS_NUM,
+						      &msi_domain_ops, port);
+	if (!port->inner_domain) {
+		dev_err(port->pcie->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	port->msi_domain = pci_msi_create_irq_domain(fwnode, &mtk_msi_domain_info,
+						     port->inner_domain);
+	if (!port->msi_domain) {
+		dev_err(port->pcie->dev, "failed to create MSI domain\n");
+		irq_domain_remove(port->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mtk_pcie_enable_msi(struct mtk_pcie_port *port)
+{
+	u32 val;
+	phys_addr_t msg_addr;
+
+	msg_addr = virt_to_phys(port->base + PCIE_MSI_VECTOR);
+	val = lower_32_bits(msg_addr);
+	writel(val, port->base + PCIE_IMSI_ADDR);
+
+	val = readl(port->base + PCIE_INT_MASK);
+	val &= ~MSI_MASK;
+	writel(val, port->base + PCIE_INT_MASK);
+}
+
+static void mtk_pcie_irq_teardown(struct mtk_pcie *pcie)
+{
+	struct mtk_pcie_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
+		irq_set_chained_handler_and_data(port->irq, NULL, NULL);
+
+		if (port->irq_domain)
+			irq_domain_remove(port->irq_domain);
+
+		if (IS_ENABLED(CONFIG_PCI_MSI)) {
+			if (port->msi_domain)
+				irq_domain_remove(port->msi_domain);
+			if (port->inner_domain)
+				irq_domain_remove(port->inner_domain);
+		}
+
+		irq_dispose_mapping(port->irq);
+	}
+}
+
+static int mtk_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+			     irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = mtk_pcie_intx_map,
+};
+
+static int mtk_pcie_init_irq_domain(struct mtk_pcie_port *port,
+				    struct device_node *node)
+{
+	struct device *dev = port->pcie->dev;
+	struct device_node *pcie_intc_node;
+	int ret;
+
+	/* Setup INTx */
+	pcie_intc_node = of_get_next_child(node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "no PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	port->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						 &intx_domain_ops, port);
+	of_node_put(pcie_intc_node);
+	if (!port->irq_domain) {
+		dev_err(dev, "failed to get INTx IRQ domain\n");
+		return -ENODEV;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		ret = mtk_pcie_allocate_msi_domains(port);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void mtk_pcie_intr_handler(struct irq_desc *desc)
+{
+	struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *irqchip = irq_desc_get_chip(desc);
+	unsigned long status;
+	u32 bit = INTX_SHIFT;
+
+	chained_irq_enter(irqchip, desc);
+
+	status = readl(port->base + PCIE_INT_STATUS);
+	if (status & INTX_MASK) {
+		for_each_set_bit_from(bit, &status, PCI_NUM_INTX + INTX_SHIFT) {
+			/* Clear the INTx */
+			writel(1 << bit, port->base + PCIE_INT_STATUS);
+			generic_handle_domain_irq(port->irq_domain,
+						  bit - INTX_SHIFT);
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		if (status & MSI_STATUS){
+			unsigned long imsi_status;
+
+			/*
+			 * The interrupt status can be cleared even if the
+			 * MSI status remains pending. As such, given the
+			 * edge-triggered interrupt type, its status should
+			 * be cleared before being dispatched to the
+			 * handler of the underlying device.
+			 */
+			writel(MSI_STATUS, port->base + PCIE_INT_STATUS);
+			while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) {
+				for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM)
+					generic_handle_domain_irq(port->inner_domain, bit);
+			}
+		}
+	}
+
+	chained_irq_exit(irqchip, desc);
+}
+
+static int mtk_pcie_setup_irq(struct mtk_pcie_port *port,
+			      struct device_node *node)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int err;
+
+	err = mtk_pcie_init_irq_domain(port, node);
+	if (err) {
+		dev_err(dev, "failed to init PCIe IRQ domain\n");
+		return err;
+	}
+
+	if (of_property_present(dev->of_node, "interrupt-names"))
+		port->irq = platform_get_irq_byname(pdev, "pcie_irq");
+	else
+		port->irq = platform_get_irq(pdev, port->slot);
+
+	if (port->irq < 0)
+		return port->irq;
+
+	irq_set_chained_handler_and_data(port->irq,
+					 mtk_pcie_intr_handler, port);
+
+	return 0;
+}
+
+static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+	struct resource *mem = NULL;
+	struct resource_entry *entry;
+	const struct mtk_pcie_soc *soc = port->pcie->soc;
+	u32 val;
+	int err;
+
+	entry = resource_list_first_type(&host->windows, IORESOURCE_MEM);
+	if (entry)
+		mem = entry->res;
+	if (!mem)
+		return -EINVAL;
+
+	/* MT7622 platforms need to enable LTSSM and ASPM from PCIe subsys */
+	if (pcie->base) {
+		val = readl(pcie->base + PCIE_SYS_CFG_V2);
+		val |= PCIE_CSR_LTSSM_EN(port->slot) |
+		       PCIE_CSR_ASPM_L1_EN(port->slot);
+		writel(val, pcie->base + PCIE_SYS_CFG_V2);
+	} else if (pcie->cfg) {
+		val = PCIE_CSR_LTSSM_EN(port->slot) |
+		      PCIE_CSR_ASPM_L1_EN(port->slot);
+		regmap_update_bits(pcie->cfg, PCIE_SYS_CFG_V2, val, val);
+	}
+
+	/* Assert all reset signals */
+	writel(0, port->base + PCIE_RST_CTRL);
+
+	/*
+	 * Enable PCIe link down reset, if link status changed from link up to
+	 * link down, this will reset MAC control registers and configuration
+	 * space.
+	 */
+	writel(PCIE_LINKDOWN_RST_EN, port->base + PCIE_RST_CTRL);
+
+	/*
+	 * Described in PCIe CEM specification sections 2.2 (PERST# Signal) and
+	 * 2.2.1 (Initial Power-Up (G3 to S0)). The deassertion of PERST# should
+	 * be delayed 100ms (TPVPERL) for the power and clock to become stable.
+	 */
+	msleep(100);
+
+	/* De-assert PHY, PE, PIPE, MAC and configuration reset	*/
+	val = readl(port->base + PCIE_RST_CTRL);
+	val |= PCIE_PHY_RSTB | PCIE_PERSTB | PCIE_PIPE_SRSTB |
+	       PCIE_MAC_SRSTB | PCIE_CRSTB;
+	writel(val, port->base + PCIE_RST_CTRL);
+
+	/* Set up vendor ID and class code */
+	if (soc->need_fix_class_id) {
+		val = PCI_VENDOR_ID_MEDIATEK;
+		writew(val, port->base + PCIE_CONF_VEND_ID);
+
+		val = PCI_CLASS_BRIDGE_PCI;
+		writew(val, port->base + PCIE_CONF_CLASS_ID);
+	}
+
+	if (soc->need_fix_device_id)
+		writew(soc->device_id, port->base + PCIE_CONF_DEVICE_ID);
+
+	/* 100ms timeout value should be enough for Gen1/2 training */
+	err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val,
+				 !!(val & PCIE_PORT_LINKUP_V2), 20,
+				 100 * USEC_PER_MSEC);
+	if (err)
+		return -ETIMEDOUT;
+
+	/* Set INTx mask */
+	val = readl(port->base + PCIE_INT_MASK);
+	val &= ~INTX_MASK;
+	writel(val, port->base + PCIE_INT_MASK);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		mtk_pcie_enable_msi(port);
+
+	/* Set AHB to PCIe translation windows */
+	val = lower_32_bits(mem->start) |
+	      AHB2PCIE_SIZE(fls(resource_size(mem)));
+	writel(val, port->base + PCIE_AHB_TRANS_BASE0_L);
+
+	val = upper_32_bits(mem->start);
+	writel(val, port->base + PCIE_AHB_TRANS_BASE0_H);
+
+	/* Set PCIe to AXI translation memory space.*/
+	val = PCIE2AHB_SIZE | WIN_ENABLE;
+	writel(val, port->base + PCIE_AXI_WINDOW0);
+
+	return 0;
+}
+
+static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus,
+				      unsigned int devfn, int where)
+{
+	struct mtk_pcie *pcie = bus->sysdata;
+
+	writel(PCIE_CONF_ADDR(where, PCI_FUNC(devfn), PCI_SLOT(devfn),
+			      bus->number), pcie->base + PCIE_CFG_ADDR);
+
+	return pcie->base + PCIE_CFG_DATA + (where & 3);
+}
+
+static struct pci_ops mtk_pcie_ops = {
+	.map_bus = mtk_pcie_map_bus,
+	.read  = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	u32 func = PCI_FUNC(port->slot);
+	u32 slot = PCI_SLOT(port->slot << 3);
+	u32 val;
+	int err;
+
+	/* assert port PERST_N */
+	val = readl(pcie->base + PCIE_SYS_CFG);
+	val |= PCIE_PORT_PERST(port->slot);
+	writel(val, pcie->base + PCIE_SYS_CFG);
+
+	/* de-assert port PERST_N */
+	val = readl(pcie->base + PCIE_SYS_CFG);
+	val &= ~PCIE_PORT_PERST(port->slot);
+	writel(val, pcie->base + PCIE_SYS_CFG);
+
+	/* 100ms timeout value should be enough for Gen1/2 training */
+	err = readl_poll_timeout(port->base + PCIE_LINK_STATUS, val,
+				 !!(val & PCIE_PORT_LINKUP), 20,
+				 100 * USEC_PER_MSEC);
+	if (err)
+		return -ETIMEDOUT;
+
+	/* enable interrupt */
+	val = readl(pcie->base + PCIE_INT_ENABLE);
+	val |= PCIE_PORT_INT_EN(port->slot);
+	writel(val, pcie->base + PCIE_INT_ENABLE);
+
+	/* map to all DDR region. We need to set it before cfg operation. */
+	writel(PCIE_BAR_MAP_MAX | PCIE_BAR_ENABLE,
+	       port->base + PCIE_BAR0_SETUP);
+
+	/* configure class code and revision ID */
+	writel(PCIE_CLASS_CODE | PCIE_REVISION_ID, port->base + PCIE_CLASS);
+
+	/* configure FC credit */
+	writel(PCIE_CONF_ADDR(PCIE_FC_CREDIT, func, slot, 0),
+	       pcie->base + PCIE_CFG_ADDR);
+	val = readl(pcie->base + PCIE_CFG_DATA);
+	val &= ~PCIE_FC_CREDIT_MASK;
+	val |= PCIE_FC_CREDIT_VAL(0x806c);
+	writel(PCIE_CONF_ADDR(PCIE_FC_CREDIT, func, slot, 0),
+	       pcie->base + PCIE_CFG_ADDR);
+	writel(val, pcie->base + PCIE_CFG_DATA);
+
+	/* configure RC FTS number to 250 when it leaves L0s */
+	writel(PCIE_CONF_ADDR(PCIE_FTS_NUM, func, slot, 0),
+	       pcie->base + PCIE_CFG_ADDR);
+	val = readl(pcie->base + PCIE_CFG_DATA);
+	val &= ~PCIE_FTS_NUM_MASK;
+	val |= PCIE_FTS_NUM_L0(0x50);
+	writel(PCIE_CONF_ADDR(PCIE_FTS_NUM, func, slot, 0),
+	       pcie->base + PCIE_CFG_ADDR);
+	writel(val, pcie->base + PCIE_CFG_DATA);
+
+	return 0;
+}
+
+static void mtk_pcie_enable_port(struct mtk_pcie_port *port)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+	int err;
+
+	err = clk_prepare_enable(port->sys_ck);
+	if (err) {
+		dev_err(dev, "failed to enable sys_ck%d clock\n", port->slot);
+		goto err_sys_clk;
+	}
+
+	err = clk_prepare_enable(port->ahb_ck);
+	if (err) {
+		dev_err(dev, "failed to enable ahb_ck%d\n", port->slot);
+		goto err_ahb_clk;
+	}
+
+	err = clk_prepare_enable(port->aux_ck);
+	if (err) {
+		dev_err(dev, "failed to enable aux_ck%d\n", port->slot);
+		goto err_aux_clk;
+	}
+
+	err = clk_prepare_enable(port->axi_ck);
+	if (err) {
+		dev_err(dev, "failed to enable axi_ck%d\n", port->slot);
+		goto err_axi_clk;
+	}
+
+	err = clk_prepare_enable(port->obff_ck);
+	if (err) {
+		dev_err(dev, "failed to enable obff_ck%d\n", port->slot);
+		goto err_obff_clk;
+	}
+
+	err = clk_prepare_enable(port->pipe_ck);
+	if (err) {
+		dev_err(dev, "failed to enable pipe_ck%d\n", port->slot);
+		goto err_pipe_clk;
+	}
+
+	reset_control_assert(port->reset);
+	reset_control_deassert(port->reset);
+
+	err = phy_init(port->phy);
+	if (err) {
+		dev_err(dev, "failed to initialize port%d phy\n", port->slot);
+		goto err_phy_init;
+	}
+
+	err = phy_power_on(port->phy);
+	if (err) {
+		dev_err(dev, "failed to power on port%d phy\n", port->slot);
+		goto err_phy_on;
+	}
+
+	if (!pcie->soc->startup(port))
+		return;
+
+	dev_info(dev, "Port%d link down\n", port->slot);
+
+	phy_power_off(port->phy);
+err_phy_on:
+	phy_exit(port->phy);
+err_phy_init:
+	clk_disable_unprepare(port->pipe_ck);
+err_pipe_clk:
+	clk_disable_unprepare(port->obff_ck);
+err_obff_clk:
+	clk_disable_unprepare(port->axi_ck);
+err_axi_clk:
+	clk_disable_unprepare(port->aux_ck);
+err_aux_clk:
+	clk_disable_unprepare(port->ahb_ck);
+err_ahb_clk:
+	clk_disable_unprepare(port->sys_ck);
+err_sys_clk:
+	mtk_pcie_port_free(port);
+}
+
+static int mtk_pcie_parse_port(struct mtk_pcie *pcie,
+			       struct device_node *node,
+			       int slot)
+{
+	struct mtk_pcie_port *port;
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	char name[10];
+	int err;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "port%d", slot);
+	port->base = devm_platform_ioremap_resource_byname(pdev, name);
+	if (IS_ERR(port->base)) {
+		dev_err(dev, "failed to map port%d base\n", slot);
+		return PTR_ERR(port->base);
+	}
+
+	snprintf(name, sizeof(name), "sys_ck%d", slot);
+	port->sys_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->sys_ck)) {
+		dev_err(dev, "failed to get sys_ck%d clock\n", slot);
+		return PTR_ERR(port->sys_ck);
+	}
+
+	/* sys_ck might be divided into the following parts in some chips */
+	snprintf(name, sizeof(name), "ahb_ck%d", slot);
+	port->ahb_ck = devm_clk_get_optional(dev, name);
+	if (IS_ERR(port->ahb_ck))
+		return PTR_ERR(port->ahb_ck);
+
+	snprintf(name, sizeof(name), "axi_ck%d", slot);
+	port->axi_ck = devm_clk_get_optional(dev, name);
+	if (IS_ERR(port->axi_ck))
+		return PTR_ERR(port->axi_ck);
+
+	snprintf(name, sizeof(name), "aux_ck%d", slot);
+	port->aux_ck = devm_clk_get_optional(dev, name);
+	if (IS_ERR(port->aux_ck))
+		return PTR_ERR(port->aux_ck);
+
+	snprintf(name, sizeof(name), "obff_ck%d", slot);
+	port->obff_ck = devm_clk_get_optional(dev, name);
+	if (IS_ERR(port->obff_ck))
+		return PTR_ERR(port->obff_ck);
+
+	snprintf(name, sizeof(name), "pipe_ck%d", slot);
+	port->pipe_ck = devm_clk_get_optional(dev, name);
+	if (IS_ERR(port->pipe_ck))
+		return PTR_ERR(port->pipe_ck);
+
+	snprintf(name, sizeof(name), "pcie-rst%d", slot);
+	port->reset = devm_reset_control_get_optional_exclusive(dev, name);
+	if (PTR_ERR(port->reset) == -EPROBE_DEFER)
+		return PTR_ERR(port->reset);
+
+	/* some platforms may use default PHY setting */
+	snprintf(name, sizeof(name), "pcie-phy%d", slot);
+	port->phy = devm_phy_optional_get(dev, name);
+	if (IS_ERR(port->phy))
+		return PTR_ERR(port->phy);
+
+	port->slot = slot;
+	port->pcie = pcie;
+
+	if (pcie->soc->setup_irq) {
+		err = pcie->soc->setup_irq(port, node);
+		if (err)
+			return err;
+	}
+
+	INIT_LIST_HEAD(&port->list);
+	list_add_tail(&port->list, &pcie->ports);
+
+	return 0;
+}
+
+static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *regs;
+	struct device_node *cfg_node;
+	int err;
+
+	/* get shared registers, which are optional */
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "subsys");
+	if (regs) {
+		pcie->base = devm_ioremap_resource(dev, regs);
+		if (IS_ERR(pcie->base))
+			return PTR_ERR(pcie->base);
+	}
+
+	cfg_node = of_find_compatible_node(NULL, NULL,
+					   "mediatek,generic-pciecfg");
+	if (cfg_node) {
+		pcie->cfg = syscon_node_to_regmap(cfg_node);
+		of_node_put(cfg_node);
+		if (IS_ERR(pcie->cfg))
+			return PTR_ERR(pcie->cfg);
+	}
+
+	pcie->free_ck = devm_clk_get(dev, "free_ck");
+	if (IS_ERR(pcie->free_ck)) {
+		if (PTR_ERR(pcie->free_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		pcie->free_ck = NULL;
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	/* enable top level clock */
+	err = clk_prepare_enable(pcie->free_ck);
+	if (err) {
+		dev_err(dev, "failed to enable free_ck\n");
+		goto err_free_ck;
+	}
+
+	return 0;
+
+err_free_ck:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+
+	return err;
+}
+
+static int mtk_pcie_setup(struct mtk_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *node = dev->of_node, *child;
+	struct mtk_pcie_port *port, *tmp;
+	int err, slot;
+
+	slot = of_get_pci_domain_nr(dev->of_node);
+	if (slot < 0) {
+		for_each_available_child_of_node(node, child) {
+			err = of_pci_get_devfn(child);
+			if (err < 0) {
+				dev_err(dev, "failed to get devfn: %d\n", err);
+				goto error_put_node;
+			}
+
+			slot = PCI_SLOT(err);
+
+			err = mtk_pcie_parse_port(pcie, child, slot);
+			if (err)
+				goto error_put_node;
+		}
+	} else {
+		err = mtk_pcie_parse_port(pcie, node, slot);
+		if (err)
+			return err;
+	}
+
+	err = mtk_pcie_subsys_powerup(pcie);
+	if (err)
+		return err;
+
+	/* enable each port, and then check link status */
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list)
+		mtk_pcie_enable_port(port);
+
+	/* power down PCIe subsys if slots are all empty (link down) */
+	if (list_empty(&pcie->ports))
+		mtk_pcie_subsys_powerdown(pcie);
+
+	return 0;
+error_put_node:
+	of_node_put(child);
+	return err;
+}
+
+static int mtk_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_pcie *pcie;
+	struct pci_host_bridge *host;
+	int err;
+
+	host = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!host)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(host);
+
+	pcie->dev = dev;
+	pcie->soc = of_device_get_match_data(dev);
+	platform_set_drvdata(pdev, pcie);
+	INIT_LIST_HEAD(&pcie->ports);
+
+	err = mtk_pcie_setup(pcie);
+	if (err)
+		return err;
+
+	host->ops = pcie->soc->ops;
+	host->sysdata = pcie;
+	host->msi_domain = pcie->soc->no_msi;
+
+	err = pci_host_probe(host);
+	if (err)
+		goto put_resources;
+
+	return 0;
+
+put_resources:
+	if (!list_empty(&pcie->ports))
+		mtk_pcie_put_resources(pcie);
+
+	return err;
+}
+
+
+static void mtk_pcie_free_resources(struct mtk_pcie *pcie)
+{
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+	struct list_head *windows = &host->windows;
+
+	pci_free_resource_list(windows);
+}
+
+static void mtk_pcie_remove(struct platform_device *pdev)
+{
+	struct mtk_pcie *pcie = platform_get_drvdata(pdev);
+	struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
+
+	pci_stop_root_bus(host->bus);
+	pci_remove_root_bus(host->bus);
+	mtk_pcie_free_resources(pcie);
+
+	mtk_pcie_irq_teardown(pcie);
+
+	mtk_pcie_put_resources(pcie);
+}
+
+static int mtk_pcie_suspend_noirq(struct device *dev)
+{
+	struct mtk_pcie *pcie = dev_get_drvdata(dev);
+	struct mtk_pcie_port *port;
+
+	if (list_empty(&pcie->ports))
+		return 0;
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		clk_disable_unprepare(port->pipe_ck);
+		clk_disable_unprepare(port->obff_ck);
+		clk_disable_unprepare(port->axi_ck);
+		clk_disable_unprepare(port->aux_ck);
+		clk_disable_unprepare(port->ahb_ck);
+		clk_disable_unprepare(port->sys_ck);
+		phy_power_off(port->phy);
+		phy_exit(port->phy);
+	}
+
+	clk_disable_unprepare(pcie->free_ck);
+
+	return 0;
+}
+
+static int mtk_pcie_resume_noirq(struct device *dev)
+{
+	struct mtk_pcie *pcie = dev_get_drvdata(dev);
+	struct mtk_pcie_port *port, *tmp;
+
+	if (list_empty(&pcie->ports))
+		return 0;
+
+	clk_prepare_enable(pcie->free_ck);
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list)
+		mtk_pcie_enable_port(port);
+
+	/* In case of EP was removed while system suspend. */
+	if (list_empty(&pcie->ports))
+		clk_disable_unprepare(pcie->free_ck);
+
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_pcie_suspend_noirq,
+				  mtk_pcie_resume_noirq)
+};
+
+static const struct mtk_pcie_soc mtk_pcie_soc_v1 = {
+	.no_msi = true,
+	.ops = &mtk_pcie_ops,
+	.startup = mtk_pcie_startup_port,
+};
+
+static const struct mtk_pcie_soc mtk_pcie_soc_mt2712 = {
+	.ops = &mtk_pcie_ops_v2,
+	.startup = mtk_pcie_startup_port_v2,
+	.setup_irq = mtk_pcie_setup_irq,
+};
+
+static const struct mtk_pcie_soc mtk_pcie_soc_mt7622 = {
+	.need_fix_class_id = true,
+	.ops = &mtk_pcie_ops_v2,
+	.startup = mtk_pcie_startup_port_v2,
+	.setup_irq = mtk_pcie_setup_irq,
+};
+
+static const struct mtk_pcie_soc mtk_pcie_soc_mt7629 = {
+	.need_fix_class_id = true,
+	.need_fix_device_id = true,
+	.device_id = PCI_DEVICE_ID_MEDIATEK_7629,
+	.ops = &mtk_pcie_ops_v2,
+	.startup = mtk_pcie_startup_port_v2,
+	.setup_irq = mtk_pcie_setup_irq,
+};
+
+static const struct of_device_id mtk_pcie_ids[] = {
+	{ .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 },
+	{ .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 },
+	{ .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_mt2712 },
+	{ .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_mt7622 },
+	{ .compatible = "mediatek,mt7629-pcie", .data = &mtk_pcie_soc_mt7629 },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_pcie_ids);
+
+static struct platform_driver mtk_pcie_driver = {
+	.probe = mtk_pcie_probe,
+	.remove_new = mtk_pcie_remove,
+	.driver = {
+		.name = "mtk-pcie",
+		.of_match_table = mtk_pcie_ids,
+		.suppress_bind_attrs = true,
+		.pm = &mtk_pcie_pm_ops,
+	},
+};
+module_platform_driver(mtk_pcie_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
new file mode 100644
index 000000000..137fb8570
--- /dev/null
+++ b/drivers/pci/controller/pcie-microchip-host.c
@@ -0,0 +1,1216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip AXI PCIe Bridge host controller driver
+ *
+ * Copyright (c) 2018 - 2020 Microchip Corporation. All rights reserved.
+ *
+ * Author: Daire McNamara <daire.mcnamara@microchip.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+#include "../pci.h"
+
+/* Number of MSI IRQs */
+#define MC_MAX_NUM_MSI_IRQS			32
+
+/* PCIe Bridge Phy and Controller Phy offsets */
+#define MC_PCIE1_BRIDGE_ADDR			0x00008000u
+#define MC_PCIE1_CTRL_ADDR			0x0000a000u
+
+#define MC_PCIE_BRIDGE_ADDR			(MC_PCIE1_BRIDGE_ADDR)
+#define MC_PCIE_CTRL_ADDR			(MC_PCIE1_CTRL_ADDR)
+
+/* PCIe Bridge Phy Regs */
+#define PCIE_PCI_IRQ_DW0			0xa8
+#define  MSIX_CAP_MASK				BIT(31)
+#define  NUM_MSI_MSGS_MASK			GENMASK(6, 4)
+#define  NUM_MSI_MSGS_SHIFT			4
+
+#define IMASK_LOCAL				0x180
+#define  DMA_END_ENGINE_0_MASK			0x00000000u
+#define  DMA_END_ENGINE_0_SHIFT			0
+#define  DMA_END_ENGINE_1_MASK			0x00000000u
+#define  DMA_END_ENGINE_1_SHIFT			1
+#define  DMA_ERROR_ENGINE_0_MASK		0x00000100u
+#define  DMA_ERROR_ENGINE_0_SHIFT		8
+#define  DMA_ERROR_ENGINE_1_MASK		0x00000200u
+#define  DMA_ERROR_ENGINE_1_SHIFT		9
+#define  A_ATR_EVT_POST_ERR_MASK		0x00010000u
+#define  A_ATR_EVT_POST_ERR_SHIFT		16
+#define  A_ATR_EVT_FETCH_ERR_MASK		0x00020000u
+#define  A_ATR_EVT_FETCH_ERR_SHIFT		17
+#define  A_ATR_EVT_DISCARD_ERR_MASK		0x00040000u
+#define  A_ATR_EVT_DISCARD_ERR_SHIFT		18
+#define  A_ATR_EVT_DOORBELL_MASK		0x00000000u
+#define  A_ATR_EVT_DOORBELL_SHIFT		19
+#define  P_ATR_EVT_POST_ERR_MASK		0x00100000u
+#define  P_ATR_EVT_POST_ERR_SHIFT		20
+#define  P_ATR_EVT_FETCH_ERR_MASK		0x00200000u
+#define  P_ATR_EVT_FETCH_ERR_SHIFT		21
+#define  P_ATR_EVT_DISCARD_ERR_MASK		0x00400000u
+#define  P_ATR_EVT_DISCARD_ERR_SHIFT		22
+#define  P_ATR_EVT_DOORBELL_MASK		0x00000000u
+#define  P_ATR_EVT_DOORBELL_SHIFT		23
+#define  PM_MSI_INT_INTA_MASK			0x01000000u
+#define  PM_MSI_INT_INTA_SHIFT			24
+#define  PM_MSI_INT_INTB_MASK			0x02000000u
+#define  PM_MSI_INT_INTB_SHIFT			25
+#define  PM_MSI_INT_INTC_MASK			0x04000000u
+#define  PM_MSI_INT_INTC_SHIFT			26
+#define  PM_MSI_INT_INTD_MASK			0x08000000u
+#define  PM_MSI_INT_INTD_SHIFT			27
+#define  PM_MSI_INT_INTX_MASK			0x0f000000u
+#define  PM_MSI_INT_INTX_SHIFT			24
+#define  PM_MSI_INT_MSI_MASK			0x10000000u
+#define  PM_MSI_INT_MSI_SHIFT			28
+#define  PM_MSI_INT_AER_EVT_MASK		0x20000000u
+#define  PM_MSI_INT_AER_EVT_SHIFT		29
+#define  PM_MSI_INT_EVENTS_MASK			0x40000000u
+#define  PM_MSI_INT_EVENTS_SHIFT		30
+#define  PM_MSI_INT_SYS_ERR_MASK		0x80000000u
+#define  PM_MSI_INT_SYS_ERR_SHIFT		31
+#define  NUM_LOCAL_EVENTS			15
+#define ISTATUS_LOCAL				0x184
+#define IMASK_HOST				0x188
+#define ISTATUS_HOST				0x18c
+#define IMSI_ADDR				0x190
+#define ISTATUS_MSI				0x194
+
+/* PCIe Master table init defines */
+#define ATR0_PCIE_WIN0_SRCADDR_PARAM		0x600u
+#define  ATR0_PCIE_ATR_SIZE			0x25
+#define  ATR0_PCIE_ATR_SIZE_SHIFT		1
+#define ATR0_PCIE_WIN0_SRC_ADDR			0x604u
+#define ATR0_PCIE_WIN0_TRSL_ADDR_LSB		0x608u
+#define ATR0_PCIE_WIN0_TRSL_ADDR_UDW		0x60cu
+#define ATR0_PCIE_WIN0_TRSL_PARAM		0x610u
+
+/* PCIe AXI slave table init defines */
+#define ATR0_AXI4_SLV0_SRCADDR_PARAM		0x800u
+#define  ATR_SIZE_SHIFT				1
+#define  ATR_IMPL_ENABLE			1
+#define ATR0_AXI4_SLV0_SRC_ADDR			0x804u
+#define ATR0_AXI4_SLV0_TRSL_ADDR_LSB		0x808u
+#define ATR0_AXI4_SLV0_TRSL_ADDR_UDW		0x80cu
+#define ATR0_AXI4_SLV0_TRSL_PARAM		0x810u
+#define  PCIE_TX_RX_INTERFACE			0x00000000u
+#define  PCIE_CONFIG_INTERFACE			0x00000001u
+
+#define ATR_ENTRY_SIZE				32
+
+/* PCIe Controller Phy Regs */
+#define SEC_ERROR_EVENT_CNT			0x20
+#define DED_ERROR_EVENT_CNT			0x24
+#define SEC_ERROR_INT				0x28
+#define  SEC_ERROR_INT_TX_RAM_SEC_ERR_INT	GENMASK(3, 0)
+#define  SEC_ERROR_INT_RX_RAM_SEC_ERR_INT	GENMASK(7, 4)
+#define  SEC_ERROR_INT_PCIE2AXI_RAM_SEC_ERR_INT	GENMASK(11, 8)
+#define  SEC_ERROR_INT_AXI2PCIE_RAM_SEC_ERR_INT	GENMASK(15, 12)
+#define  SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT	GENMASK(15, 0)
+#define  NUM_SEC_ERROR_INTS			(4)
+#define SEC_ERROR_INT_MASK			0x2c
+#define DED_ERROR_INT				0x30
+#define  DED_ERROR_INT_TX_RAM_DED_ERR_INT	GENMASK(3, 0)
+#define  DED_ERROR_INT_RX_RAM_DED_ERR_INT	GENMASK(7, 4)
+#define  DED_ERROR_INT_PCIE2AXI_RAM_DED_ERR_INT	GENMASK(11, 8)
+#define  DED_ERROR_INT_AXI2PCIE_RAM_DED_ERR_INT	GENMASK(15, 12)
+#define  DED_ERROR_INT_ALL_RAM_DED_ERR_INT	GENMASK(15, 0)
+#define  NUM_DED_ERROR_INTS			(4)
+#define DED_ERROR_INT_MASK			0x34
+#define ECC_CONTROL				0x38
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_0		BIT(0)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_1		BIT(1)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_2		BIT(2)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_3		BIT(3)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_0		BIT(4)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_1		BIT(5)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_2		BIT(6)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_3		BIT(7)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_0	BIT(8)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_1	BIT(9)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_2	BIT(10)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_3	BIT(11)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_0	BIT(12)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_1	BIT(13)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_2	BIT(14)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_3	BIT(15)
+#define  ECC_CONTROL_TX_RAM_ECC_BYPASS		BIT(24)
+#define  ECC_CONTROL_RX_RAM_ECC_BYPASS		BIT(25)
+#define  ECC_CONTROL_PCIE2AXI_RAM_ECC_BYPASS	BIT(26)
+#define  ECC_CONTROL_AXI2PCIE_RAM_ECC_BYPASS	BIT(27)
+#define PCIE_EVENT_INT				0x14c
+#define  PCIE_EVENT_INT_L2_EXIT_INT		BIT(0)
+#define  PCIE_EVENT_INT_HOTRST_EXIT_INT		BIT(1)
+#define  PCIE_EVENT_INT_DLUP_EXIT_INT		BIT(2)
+#define  PCIE_EVENT_INT_MASK			GENMASK(2, 0)
+#define  PCIE_EVENT_INT_L2_EXIT_INT_MASK	BIT(16)
+#define  PCIE_EVENT_INT_HOTRST_EXIT_INT_MASK	BIT(17)
+#define  PCIE_EVENT_INT_DLUP_EXIT_INT_MASK	BIT(18)
+#define  PCIE_EVENT_INT_ENB_MASK		GENMASK(18, 16)
+#define  PCIE_EVENT_INT_ENB_SHIFT		16
+#define  NUM_PCIE_EVENTS			(3)
+
+/* PCIe Config space MSI capability structure */
+#define MC_MSI_CAP_CTRL_OFFSET			0xe0u
+
+/* Events */
+#define EVENT_PCIE_L2_EXIT			0
+#define EVENT_PCIE_HOTRST_EXIT			1
+#define EVENT_PCIE_DLUP_EXIT			2
+#define EVENT_SEC_TX_RAM_SEC_ERR		3
+#define EVENT_SEC_RX_RAM_SEC_ERR		4
+#define EVENT_SEC_PCIE2AXI_RAM_SEC_ERR		5
+#define EVENT_SEC_AXI2PCIE_RAM_SEC_ERR		6
+#define EVENT_DED_TX_RAM_DED_ERR		7
+#define EVENT_DED_RX_RAM_DED_ERR		8
+#define EVENT_DED_PCIE2AXI_RAM_DED_ERR		9
+#define EVENT_DED_AXI2PCIE_RAM_DED_ERR		10
+#define EVENT_LOCAL_DMA_END_ENGINE_0		11
+#define EVENT_LOCAL_DMA_END_ENGINE_1		12
+#define EVENT_LOCAL_DMA_ERROR_ENGINE_0		13
+#define EVENT_LOCAL_DMA_ERROR_ENGINE_1		14
+#define EVENT_LOCAL_A_ATR_EVT_POST_ERR		15
+#define EVENT_LOCAL_A_ATR_EVT_FETCH_ERR		16
+#define EVENT_LOCAL_A_ATR_EVT_DISCARD_ERR	17
+#define EVENT_LOCAL_A_ATR_EVT_DOORBELL		18
+#define EVENT_LOCAL_P_ATR_EVT_POST_ERR		19
+#define EVENT_LOCAL_P_ATR_EVT_FETCH_ERR		20
+#define EVENT_LOCAL_P_ATR_EVT_DISCARD_ERR	21
+#define EVENT_LOCAL_P_ATR_EVT_DOORBELL		22
+#define EVENT_LOCAL_PM_MSI_INT_INTX		23
+#define EVENT_LOCAL_PM_MSI_INT_MSI		24
+#define EVENT_LOCAL_PM_MSI_INT_AER_EVT		25
+#define EVENT_LOCAL_PM_MSI_INT_EVENTS		26
+#define EVENT_LOCAL_PM_MSI_INT_SYS_ERR		27
+#define NUM_EVENTS				28
+
+#define PCIE_EVENT_CAUSE(x, s)	\
+	[EVENT_PCIE_ ## x] = { __stringify(x), s }
+
+#define SEC_ERROR_CAUSE(x, s) \
+	[EVENT_SEC_ ## x] = { __stringify(x), s }
+
+#define DED_ERROR_CAUSE(x, s) \
+	[EVENT_DED_ ## x] = { __stringify(x), s }
+
+#define LOCAL_EVENT_CAUSE(x, s) \
+	[EVENT_LOCAL_ ## x] = { __stringify(x), s }
+
+#define PCIE_EVENT(x) \
+	.base = MC_PCIE_CTRL_ADDR, \
+	.offset = PCIE_EVENT_INT, \
+	.mask_offset = PCIE_EVENT_INT, \
+	.mask_high = 1, \
+	.mask = PCIE_EVENT_INT_ ## x ## _INT, \
+	.enb_mask = PCIE_EVENT_INT_ENB_MASK
+
+#define SEC_EVENT(x) \
+	.base = MC_PCIE_CTRL_ADDR, \
+	.offset = SEC_ERROR_INT, \
+	.mask_offset = SEC_ERROR_INT_MASK, \
+	.mask = SEC_ERROR_INT_ ## x ## _INT, \
+	.mask_high = 1, \
+	.enb_mask = 0
+
+#define DED_EVENT(x) \
+	.base = MC_PCIE_CTRL_ADDR, \
+	.offset = DED_ERROR_INT, \
+	.mask_offset = DED_ERROR_INT_MASK, \
+	.mask_high = 1, \
+	.mask = DED_ERROR_INT_ ## x ## _INT, \
+	.enb_mask = 0
+
+#define LOCAL_EVENT(x) \
+	.base = MC_PCIE_BRIDGE_ADDR, \
+	.offset = ISTATUS_LOCAL, \
+	.mask_offset = IMASK_LOCAL, \
+	.mask_high = 0, \
+	.mask = x ## _MASK, \
+	.enb_mask = 0
+
+#define PCIE_EVENT_TO_EVENT_MAP(x) \
+	{ PCIE_EVENT_INT_ ## x ## _INT, EVENT_PCIE_ ## x }
+
+#define SEC_ERROR_TO_EVENT_MAP(x) \
+	{ SEC_ERROR_INT_ ## x ## _INT, EVENT_SEC_ ## x }
+
+#define DED_ERROR_TO_EVENT_MAP(x) \
+	{ DED_ERROR_INT_ ## x ## _INT, EVENT_DED_ ## x }
+
+#define LOCAL_STATUS_TO_EVENT_MAP(x) \
+	{ x ## _MASK, EVENT_LOCAL_ ## x }
+
+struct event_map {
+	u32 reg_mask;
+	u32 event_bit;
+};
+
+struct mc_msi {
+	struct mutex lock;		/* Protect used bitmap */
+	struct irq_domain *msi_domain;
+	struct irq_domain *dev_domain;
+	u32 num_vectors;
+	u64 vector_phy;
+	DECLARE_BITMAP(used, MC_MAX_NUM_MSI_IRQS);
+};
+
+struct mc_pcie {
+	void __iomem *axi_base_addr;
+	struct device *dev;
+	struct irq_domain *intx_domain;
+	struct irq_domain *event_domain;
+	raw_spinlock_t lock;
+	struct mc_msi msi;
+};
+
+struct cause {
+	const char *sym;
+	const char *str;
+};
+
+static const struct cause event_cause[NUM_EVENTS] = {
+	PCIE_EVENT_CAUSE(L2_EXIT, "L2 exit event"),
+	PCIE_EVENT_CAUSE(HOTRST_EXIT, "Hot reset exit event"),
+	PCIE_EVENT_CAUSE(DLUP_EXIT, "DLUP exit event"),
+	SEC_ERROR_CAUSE(TX_RAM_SEC_ERR,  "sec error in tx buffer"),
+	SEC_ERROR_CAUSE(RX_RAM_SEC_ERR,  "sec error in rx buffer"),
+	SEC_ERROR_CAUSE(PCIE2AXI_RAM_SEC_ERR,  "sec error in pcie2axi buffer"),
+	SEC_ERROR_CAUSE(AXI2PCIE_RAM_SEC_ERR,  "sec error in axi2pcie buffer"),
+	DED_ERROR_CAUSE(TX_RAM_DED_ERR,  "ded error in tx buffer"),
+	DED_ERROR_CAUSE(RX_RAM_DED_ERR,  "ded error in rx buffer"),
+	DED_ERROR_CAUSE(PCIE2AXI_RAM_DED_ERR,  "ded error in pcie2axi buffer"),
+	DED_ERROR_CAUSE(AXI2PCIE_RAM_DED_ERR,  "ded error in axi2pcie buffer"),
+	LOCAL_EVENT_CAUSE(DMA_ERROR_ENGINE_0, "dma engine 0 error"),
+	LOCAL_EVENT_CAUSE(DMA_ERROR_ENGINE_1, "dma engine 1 error"),
+	LOCAL_EVENT_CAUSE(A_ATR_EVT_POST_ERR, "axi write request error"),
+	LOCAL_EVENT_CAUSE(A_ATR_EVT_FETCH_ERR, "axi read request error"),
+	LOCAL_EVENT_CAUSE(A_ATR_EVT_DISCARD_ERR, "axi read timeout"),
+	LOCAL_EVENT_CAUSE(P_ATR_EVT_POST_ERR, "pcie write request error"),
+	LOCAL_EVENT_CAUSE(P_ATR_EVT_FETCH_ERR, "pcie read request error"),
+	LOCAL_EVENT_CAUSE(P_ATR_EVT_DISCARD_ERR, "pcie read timeout"),
+	LOCAL_EVENT_CAUSE(PM_MSI_INT_AER_EVT, "aer event"),
+	LOCAL_EVENT_CAUSE(PM_MSI_INT_EVENTS, "pm/ltr/hotplug event"),
+	LOCAL_EVENT_CAUSE(PM_MSI_INT_SYS_ERR, "system error"),
+};
+
+static struct event_map pcie_event_to_event[] = {
+	PCIE_EVENT_TO_EVENT_MAP(L2_EXIT),
+	PCIE_EVENT_TO_EVENT_MAP(HOTRST_EXIT),
+	PCIE_EVENT_TO_EVENT_MAP(DLUP_EXIT),
+};
+
+static struct event_map sec_error_to_event[] = {
+	SEC_ERROR_TO_EVENT_MAP(TX_RAM_SEC_ERR),
+	SEC_ERROR_TO_EVENT_MAP(RX_RAM_SEC_ERR),
+	SEC_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_SEC_ERR),
+	SEC_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_SEC_ERR),
+};
+
+static struct event_map ded_error_to_event[] = {
+	DED_ERROR_TO_EVENT_MAP(TX_RAM_DED_ERR),
+	DED_ERROR_TO_EVENT_MAP(RX_RAM_DED_ERR),
+	DED_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_DED_ERR),
+	DED_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_DED_ERR),
+};
+
+static struct event_map local_status_to_event[] = {
+	LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_0),
+	LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_1),
+	LOCAL_STATUS_TO_EVENT_MAP(DMA_ERROR_ENGINE_0),
+	LOCAL_STATUS_TO_EVENT_MAP(DMA_ERROR_ENGINE_1),
+	LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_POST_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_FETCH_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_DISCARD_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_DOORBELL),
+	LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_POST_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_FETCH_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_DISCARD_ERR),
+	LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_DOORBELL),
+	LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_INTX),
+	LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_MSI),
+	LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_AER_EVT),
+	LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_EVENTS),
+	LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_SYS_ERR),
+};
+
+static struct {
+	u32 base;
+	u32 offset;
+	u32 mask;
+	u32 shift;
+	u32 enb_mask;
+	u32 mask_high;
+	u32 mask_offset;
+} event_descs[] = {
+	{ PCIE_EVENT(L2_EXIT) },
+	{ PCIE_EVENT(HOTRST_EXIT) },
+	{ PCIE_EVENT(DLUP_EXIT) },
+	{ SEC_EVENT(TX_RAM_SEC_ERR) },
+	{ SEC_EVENT(RX_RAM_SEC_ERR) },
+	{ SEC_EVENT(PCIE2AXI_RAM_SEC_ERR) },
+	{ SEC_EVENT(AXI2PCIE_RAM_SEC_ERR) },
+	{ DED_EVENT(TX_RAM_DED_ERR) },
+	{ DED_EVENT(RX_RAM_DED_ERR) },
+	{ DED_EVENT(PCIE2AXI_RAM_DED_ERR) },
+	{ DED_EVENT(AXI2PCIE_RAM_DED_ERR) },
+	{ LOCAL_EVENT(DMA_END_ENGINE_0) },
+	{ LOCAL_EVENT(DMA_END_ENGINE_1) },
+	{ LOCAL_EVENT(DMA_ERROR_ENGINE_0) },
+	{ LOCAL_EVENT(DMA_ERROR_ENGINE_1) },
+	{ LOCAL_EVENT(A_ATR_EVT_POST_ERR) },
+	{ LOCAL_EVENT(A_ATR_EVT_FETCH_ERR) },
+	{ LOCAL_EVENT(A_ATR_EVT_DISCARD_ERR) },
+	{ LOCAL_EVENT(A_ATR_EVT_DOORBELL) },
+	{ LOCAL_EVENT(P_ATR_EVT_POST_ERR) },
+	{ LOCAL_EVENT(P_ATR_EVT_FETCH_ERR) },
+	{ LOCAL_EVENT(P_ATR_EVT_DISCARD_ERR) },
+	{ LOCAL_EVENT(P_ATR_EVT_DOORBELL) },
+	{ LOCAL_EVENT(PM_MSI_INT_INTX) },
+	{ LOCAL_EVENT(PM_MSI_INT_MSI) },
+	{ LOCAL_EVENT(PM_MSI_INT_AER_EVT) },
+	{ LOCAL_EVENT(PM_MSI_INT_EVENTS) },
+	{ LOCAL_EVENT(PM_MSI_INT_SYS_ERR) },
+};
+
+static char poss_clks[][5] = { "fic0", "fic1", "fic2", "fic3" };
+
+static struct mc_pcie *port;
+
+static void mc_pcie_enable_msi(struct mc_pcie *port, void __iomem *ecam)
+{
+	struct mc_msi *msi = &port->msi;
+	u16 reg;
+	u8 queue_size;
+
+	/* Fixup MSI enable flag */
+	reg = readw_relaxed(ecam + MC_MSI_CAP_CTRL_OFFSET + PCI_MSI_FLAGS);
+	reg |= PCI_MSI_FLAGS_ENABLE;
+	writew_relaxed(reg, ecam + MC_MSI_CAP_CTRL_OFFSET + PCI_MSI_FLAGS);
+
+	/* Fixup PCI MSI queue flags */
+	queue_size = FIELD_GET(PCI_MSI_FLAGS_QMASK, reg);
+	reg |= FIELD_PREP(PCI_MSI_FLAGS_QSIZE, queue_size);
+	writew_relaxed(reg, ecam + MC_MSI_CAP_CTRL_OFFSET + PCI_MSI_FLAGS);
+
+	/* Fixup MSI addr fields */
+	writel_relaxed(lower_32_bits(msi->vector_phy),
+		       ecam + MC_MSI_CAP_CTRL_OFFSET + PCI_MSI_ADDRESS_LO);
+	writel_relaxed(upper_32_bits(msi->vector_phy),
+		       ecam + MC_MSI_CAP_CTRL_OFFSET + PCI_MSI_ADDRESS_HI);
+}
+
+static void mc_handle_msi(struct irq_desc *desc)
+{
+	struct mc_pcie *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct device *dev = port->dev;
+	struct mc_msi *msi = &port->msi;
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	unsigned long status;
+	u32 bit;
+	int ret;
+
+	chained_irq_enter(chip, desc);
+
+	status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+	if (status & PM_MSI_INT_MSI_MASK) {
+		writel_relaxed(status & PM_MSI_INT_MSI_MASK, bridge_base_addr + ISTATUS_LOCAL);
+		status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
+		for_each_set_bit(bit, &status, msi->num_vectors) {
+			ret = generic_handle_domain_irq(msi->dev_domain, bit);
+			if (ret)
+				dev_err_ratelimited(dev, "bad MSI IRQ %d\n",
+						    bit);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mc_msi_bottom_irq_ack(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	u32 bitpos = data->hwirq;
+
+	writel_relaxed(BIT(bitpos), bridge_base_addr + ISTATUS_MSI);
+}
+
+static void mc_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr = port->msi.vector_phy;
+
+	msg->address_lo = lower_32_bits(addr);
+	msg->address_hi = upper_32_bits(addr);
+	msg->data = data->hwirq;
+
+	dev_dbg(port->dev, "msi#%x address_hi %#x address_lo %#x\n",
+		(int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int mc_msi_set_affinity(struct irq_data *irq_data,
+			       const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static struct irq_chip mc_msi_bottom_irq_chip = {
+	.name = "Microchip MSI",
+	.irq_ack = mc_msi_bottom_irq_ack,
+	.irq_compose_msi_msg = mc_compose_msi_msg,
+	.irq_set_affinity = mc_msi_set_affinity,
+};
+
+static int mc_irq_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	struct mc_pcie *port = domain->host_data;
+	struct mc_msi *msi = &port->msi;
+	unsigned long bit;
+
+	mutex_lock(&msi->lock);
+	bit = find_first_zero_bit(msi->used, msi->num_vectors);
+	if (bit >= msi->num_vectors) {
+		mutex_unlock(&msi->lock);
+		return -ENOSPC;
+	}
+
+	set_bit(bit, msi->used);
+
+	irq_domain_set_info(domain, virq, bit, &mc_msi_bottom_irq_chip,
+			    domain->host_data, handle_edge_irq, NULL, NULL);
+
+	mutex_unlock(&msi->lock);
+
+	return 0;
+}
+
+static void mc_irq_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct mc_pcie *port = irq_data_get_irq_chip_data(d);
+	struct mc_msi *msi = &port->msi;
+
+	mutex_lock(&msi->lock);
+
+	if (test_bit(d->hwirq, msi->used))
+		__clear_bit(d->hwirq, msi->used);
+	else
+		dev_err(port->dev, "trying to free unused MSI%lu\n", d->hwirq);
+
+	mutex_unlock(&msi->lock);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc	= mc_irq_msi_domain_alloc,
+	.free	= mc_irq_msi_domain_free,
+};
+
+static struct irq_chip mc_msi_irq_chip = {
+	.name = "Microchip PCIe MSI",
+	.irq_ack = irq_chip_ack_parent,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info mc_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_PCI_MSIX),
+	.chip = &mc_msi_irq_chip,
+};
+
+static int mc_allocate_msi_domains(struct mc_pcie *port)
+{
+	struct device *dev = port->dev;
+	struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+	struct mc_msi *msi = &port->msi;
+
+	mutex_init(&port->msi.lock);
+
+	msi->dev_domain = irq_domain_add_linear(NULL, msi->num_vectors,
+						&msi_domain_ops, port);
+	if (!msi->dev_domain) {
+		dev_err(dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	msi->msi_domain = pci_msi_create_irq_domain(fwnode, &mc_msi_domain_info,
+						    msi->dev_domain);
+	if (!msi->msi_domain) {
+		dev_err(dev, "failed to create MSI domain\n");
+		irq_domain_remove(msi->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mc_handle_intx(struct irq_desc *desc)
+{
+	struct mc_pcie *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct device *dev = port->dev;
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	unsigned long status;
+	u32 bit;
+	int ret;
+
+	chained_irq_enter(chip, desc);
+
+	status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+	if (status & PM_MSI_INT_INTX_MASK) {
+		status &= PM_MSI_INT_INTX_MASK;
+		status >>= PM_MSI_INT_INTX_SHIFT;
+		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
+			ret = generic_handle_domain_irq(port->intx_domain, bit);
+			if (ret)
+				dev_err_ratelimited(dev, "bad INTx IRQ %d\n",
+						    bit);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mc_ack_intx_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+
+	writel_relaxed(mask, bridge_base_addr + ISTATUS_LOCAL);
+}
+
+static void mc_mask_intx_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	unsigned long flags;
+	u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+	u32 val;
+
+	raw_spin_lock_irqsave(&port->lock, flags);
+	val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+	val &= ~mask;
+	writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+	raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mc_unmask_intx_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	unsigned long flags;
+	u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+	u32 val;
+
+	raw_spin_lock_irqsave(&port->lock, flags);
+	val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+	val |= mask;
+	writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+	raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static struct irq_chip mc_intx_irq_chip = {
+	.name = "Microchip PCIe INTx",
+	.irq_ack = mc_ack_intx_irq,
+	.irq_mask = mc_mask_intx_irq,
+	.irq_unmask = mc_unmask_intx_irq,
+};
+
+static int mc_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+			    irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &mc_intx_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = mc_pcie_intx_map,
+};
+
+static inline u32 reg_to_event(u32 reg, struct event_map field)
+{
+	return (reg & field.reg_mask) ? BIT(field.event_bit) : 0;
+}
+
+static u32 pcie_events(struct mc_pcie *port)
+{
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+	u32 reg = readl_relaxed(ctrl_base_addr + PCIE_EVENT_INT);
+	u32 val = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pcie_event_to_event); i++)
+		val |= reg_to_event(reg, pcie_event_to_event[i]);
+
+	return val;
+}
+
+static u32 sec_errors(struct mc_pcie *port)
+{
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+	u32 reg = readl_relaxed(ctrl_base_addr + SEC_ERROR_INT);
+	u32 val = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sec_error_to_event); i++)
+		val |= reg_to_event(reg, sec_error_to_event[i]);
+
+	return val;
+}
+
+static u32 ded_errors(struct mc_pcie *port)
+{
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+	u32 reg = readl_relaxed(ctrl_base_addr + DED_ERROR_INT);
+	u32 val = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ded_error_to_event); i++)
+		val |= reg_to_event(reg, ded_error_to_event[i]);
+
+	return val;
+}
+
+static u32 local_events(struct mc_pcie *port)
+{
+	void __iomem *bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	u32 reg = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+	u32 val = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(local_status_to_event); i++)
+		val |= reg_to_event(reg, local_status_to_event[i]);
+
+	return val;
+}
+
+static u32 get_events(struct mc_pcie *port)
+{
+	u32 events = 0;
+
+	events |= pcie_events(port);
+	events |= sec_errors(port);
+	events |= ded_errors(port);
+	events |= local_events(port);
+
+	return events;
+}
+
+static irqreturn_t mc_event_handler(int irq, void *dev_id)
+{
+	struct mc_pcie *port = dev_id;
+	struct device *dev = port->dev;
+	struct irq_data *data;
+
+	data = irq_domain_get_irq_data(port->event_domain, irq);
+
+	if (event_cause[data->hwirq].str)
+		dev_err_ratelimited(dev, "%s\n", event_cause[data->hwirq].str);
+	else
+		dev_err_ratelimited(dev, "bad event IRQ %ld\n", data->hwirq);
+
+	return IRQ_HANDLED;
+}
+
+static void mc_handle_event(struct irq_desc *desc)
+{
+	struct mc_pcie *port = irq_desc_get_handler_data(desc);
+	unsigned long events;
+	u32 bit;
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+
+	chained_irq_enter(chip, desc);
+
+	events = get_events(port);
+
+	for_each_set_bit(bit, &events, NUM_EVENTS)
+		generic_handle_domain_irq(port->event_domain, bit);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mc_ack_event_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	u32 event = data->hwirq;
+	void __iomem *addr;
+	u32 mask;
+
+	addr = port->axi_base_addr + event_descs[event].base +
+		event_descs[event].offset;
+	mask = event_descs[event].mask;
+	mask |= event_descs[event].enb_mask;
+
+	writel_relaxed(mask, addr);
+}
+
+static void mc_mask_event_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	u32 event = data->hwirq;
+	void __iomem *addr;
+	u32 mask;
+	u32 val;
+
+	addr = port->axi_base_addr + event_descs[event].base +
+		event_descs[event].mask_offset;
+	mask = event_descs[event].mask;
+	if (event_descs[event].enb_mask) {
+		mask <<= PCIE_EVENT_INT_ENB_SHIFT;
+		mask &= PCIE_EVENT_INT_ENB_MASK;
+	}
+
+	if (!event_descs[event].mask_high)
+		mask = ~mask;
+
+	raw_spin_lock(&port->lock);
+	val = readl_relaxed(addr);
+	if (event_descs[event].mask_high)
+		val |= mask;
+	else
+		val &= mask;
+
+	writel_relaxed(val, addr);
+	raw_spin_unlock(&port->lock);
+}
+
+static void mc_unmask_event_irq(struct irq_data *data)
+{
+	struct mc_pcie *port = irq_data_get_irq_chip_data(data);
+	u32 event = data->hwirq;
+	void __iomem *addr;
+	u32 mask;
+	u32 val;
+
+	addr = port->axi_base_addr + event_descs[event].base +
+		event_descs[event].mask_offset;
+	mask = event_descs[event].mask;
+
+	if (event_descs[event].enb_mask)
+		mask <<= PCIE_EVENT_INT_ENB_SHIFT;
+
+	if (event_descs[event].mask_high)
+		mask = ~mask;
+
+	if (event_descs[event].enb_mask)
+		mask &= PCIE_EVENT_INT_ENB_MASK;
+
+	raw_spin_lock(&port->lock);
+	val = readl_relaxed(addr);
+	if (event_descs[event].mask_high)
+		val &= mask;
+	else
+		val |= mask;
+	writel_relaxed(val, addr);
+	raw_spin_unlock(&port->lock);
+}
+
+static struct irq_chip mc_event_irq_chip = {
+	.name = "Microchip PCIe EVENT",
+	.irq_ack = mc_ack_event_irq,
+	.irq_mask = mc_mask_event_irq,
+	.irq_unmask = mc_unmask_event_irq,
+};
+
+static int mc_pcie_event_map(struct irq_domain *domain, unsigned int irq,
+			     irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &mc_event_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops event_domain_ops = {
+	.map = mc_pcie_event_map,
+};
+
+static inline void mc_pcie_deinit_clk(void *data)
+{
+	struct clk *clk = data;
+
+	clk_disable_unprepare(clk);
+}
+
+static inline struct clk *mc_pcie_init_clk(struct device *dev, const char *id)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = devm_clk_get_optional(dev, id);
+	if (IS_ERR(clk))
+		return clk;
+	if (!clk)
+		return clk;
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	devm_add_action_or_reset(dev, mc_pcie_deinit_clk, clk);
+
+	return clk;
+}
+
+static int mc_pcie_init_clks(struct device *dev)
+{
+	int i;
+	struct clk *fic;
+
+	/*
+	 * PCIe may be clocked via Fabric Interface using between 1 and 4
+	 * clocks. Scan DT for clocks and enable them if present
+	 */
+	for (i = 0; i < ARRAY_SIZE(poss_clks); i++) {
+		fic = mc_pcie_init_clk(dev, poss_clks[i]);
+		if (IS_ERR(fic))
+			return PTR_ERR(fic);
+	}
+
+	return 0;
+}
+
+static int mc_pcie_init_irq_domains(struct mc_pcie *port)
+{
+	struct device *dev = port->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node;
+
+	/* Setup INTx */
+	pcie_intc_node = of_get_next_child(node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "failed to find PCIe Intc node\n");
+		return -EINVAL;
+	}
+
+	port->event_domain = irq_domain_add_linear(pcie_intc_node, NUM_EVENTS,
+						   &event_domain_ops, port);
+	if (!port->event_domain) {
+		dev_err(dev, "failed to get event domain\n");
+		of_node_put(pcie_intc_node);
+		return -ENOMEM;
+	}
+
+	irq_domain_update_bus_token(port->event_domain, DOMAIN_BUS_NEXUS);
+
+	port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						  &intx_domain_ops, port);
+	if (!port->intx_domain) {
+		dev_err(dev, "failed to get an INTx IRQ domain\n");
+		of_node_put(pcie_intc_node);
+		return -ENOMEM;
+	}
+
+	irq_domain_update_bus_token(port->intx_domain, DOMAIN_BUS_WIRED);
+
+	of_node_put(pcie_intc_node);
+	raw_spin_lock_init(&port->lock);
+
+	return mc_allocate_msi_domains(port);
+}
+
+static void mc_pcie_setup_window(void __iomem *bridge_base_addr, u32 index,
+				 phys_addr_t axi_addr, phys_addr_t pci_addr,
+				 size_t size)
+{
+	u32 atr_sz = ilog2(size) - 1;
+	u32 val;
+
+	if (index == 0)
+		val = PCIE_CONFIG_INTERFACE;
+	else
+		val = PCIE_TX_RX_INTERFACE;
+
+	writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+	       ATR0_AXI4_SLV0_TRSL_PARAM);
+
+	val = lower_32_bits(axi_addr) | (atr_sz << ATR_SIZE_SHIFT) |
+			    ATR_IMPL_ENABLE;
+	writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+	       ATR0_AXI4_SLV0_SRCADDR_PARAM);
+
+	val = upper_32_bits(axi_addr);
+	writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+	       ATR0_AXI4_SLV0_SRC_ADDR);
+
+	val = lower_32_bits(pci_addr);
+	writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+	       ATR0_AXI4_SLV0_TRSL_ADDR_LSB);
+
+	val = upper_32_bits(pci_addr);
+	writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+	       ATR0_AXI4_SLV0_TRSL_ADDR_UDW);
+
+	val = readl(bridge_base_addr + ATR0_PCIE_WIN0_SRCADDR_PARAM);
+	val |= (ATR0_PCIE_ATR_SIZE << ATR0_PCIE_ATR_SIZE_SHIFT);
+	writel(val, bridge_base_addr + ATR0_PCIE_WIN0_SRCADDR_PARAM);
+	writel(0, bridge_base_addr + ATR0_PCIE_WIN0_SRC_ADDR);
+}
+
+static int mc_pcie_setup_windows(struct platform_device *pdev,
+				 struct mc_pcie *port)
+{
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	struct pci_host_bridge *bridge = platform_get_drvdata(pdev);
+	struct resource_entry *entry;
+	u64 pci_addr;
+	u32 index = 1;
+
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		if (resource_type(entry->res) == IORESOURCE_MEM) {
+			pci_addr = entry->res->start - entry->offset;
+			mc_pcie_setup_window(bridge_base_addr, index,
+					     entry->res->start, pci_addr,
+					     resource_size(entry->res));
+			index++;
+		}
+	}
+
+	return 0;
+}
+
+static inline void mc_clear_secs(struct mc_pcie *port)
+{
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+
+	writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, ctrl_base_addr +
+		       SEC_ERROR_INT);
+	writel_relaxed(0, ctrl_base_addr + SEC_ERROR_EVENT_CNT);
+}
+
+static inline void mc_clear_deds(struct mc_pcie *port)
+{
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+
+	writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, ctrl_base_addr +
+		       DED_ERROR_INT);
+	writel_relaxed(0, ctrl_base_addr + DED_ERROR_EVENT_CNT);
+}
+
+static void mc_disable_interrupts(struct mc_pcie *port)
+{
+	void __iomem *bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+	u32 val;
+
+	/* Ensure ECC bypass is enabled */
+	val = ECC_CONTROL_TX_RAM_ECC_BYPASS |
+	      ECC_CONTROL_RX_RAM_ECC_BYPASS |
+	      ECC_CONTROL_PCIE2AXI_RAM_ECC_BYPASS |
+	      ECC_CONTROL_AXI2PCIE_RAM_ECC_BYPASS;
+	writel_relaxed(val, ctrl_base_addr + ECC_CONTROL);
+
+	/* Disable SEC errors and clear any outstanding */
+	writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, ctrl_base_addr +
+		       SEC_ERROR_INT_MASK);
+	mc_clear_secs(port);
+
+	/* Disable DED errors and clear any outstanding */
+	writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, ctrl_base_addr +
+		       DED_ERROR_INT_MASK);
+	mc_clear_deds(port);
+
+	/* Disable local interrupts and clear any outstanding */
+	writel_relaxed(0, bridge_base_addr + IMASK_LOCAL);
+	writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_LOCAL);
+	writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_MSI);
+
+	/* Disable PCIe events and clear any outstanding */
+	val = PCIE_EVENT_INT_L2_EXIT_INT |
+	      PCIE_EVENT_INT_HOTRST_EXIT_INT |
+	      PCIE_EVENT_INT_DLUP_EXIT_INT |
+	      PCIE_EVENT_INT_L2_EXIT_INT_MASK |
+	      PCIE_EVENT_INT_HOTRST_EXIT_INT_MASK |
+	      PCIE_EVENT_INT_DLUP_EXIT_INT_MASK;
+	writel_relaxed(val, ctrl_base_addr + PCIE_EVENT_INT);
+
+	/* Disable host interrupts and clear any outstanding */
+	writel_relaxed(0, bridge_base_addr + IMASK_HOST);
+	writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_HOST);
+}
+
+static int mc_init_interrupts(struct platform_device *pdev, struct mc_pcie *port)
+{
+	struct device *dev = &pdev->dev;
+	int irq;
+	int i, intx_irq, msi_irq, event_irq;
+	int ret;
+
+	ret = mc_pcie_init_irq_domains(port);
+	if (ret) {
+		dev_err(dev, "failed creating IRQ domains\n");
+		return ret;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+
+	for (i = 0; i < NUM_EVENTS; i++) {
+		event_irq = irq_create_mapping(port->event_domain, i);
+		if (!event_irq) {
+			dev_err(dev, "failed to map hwirq %d\n", i);
+			return -ENXIO;
+		}
+
+		ret = devm_request_irq(dev, event_irq, mc_event_handler,
+				       0, event_cause[i].sym, port);
+		if (ret) {
+			dev_err(dev, "failed to request IRQ %d\n", event_irq);
+			return ret;
+		}
+	}
+
+	intx_irq = irq_create_mapping(port->event_domain,
+				      EVENT_LOCAL_PM_MSI_INT_INTX);
+	if (!intx_irq) {
+		dev_err(dev, "failed to map INTx interrupt\n");
+		return -ENXIO;
+	}
+
+	/* Plug the INTx chained handler */
+	irq_set_chained_handler_and_data(intx_irq, mc_handle_intx, port);
+
+	msi_irq = irq_create_mapping(port->event_domain,
+				     EVENT_LOCAL_PM_MSI_INT_MSI);
+	if (!msi_irq)
+		return -ENXIO;
+
+	/* Plug the MSI chained handler */
+	irq_set_chained_handler_and_data(msi_irq, mc_handle_msi, port);
+
+	/* Plug the main event chained handler */
+	irq_set_chained_handler_and_data(irq, mc_handle_event, port);
+
+	return 0;
+}
+
+static int mc_platform_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct platform_device *pdev = to_platform_device(dev);
+	void __iomem *bridge_base_addr =
+		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+	int ret;
+
+	/* Configure address translation table 0 for PCIe config space */
+	mc_pcie_setup_window(bridge_base_addr, 0, cfg->res.start,
+			     cfg->res.start,
+			     resource_size(&cfg->res));
+
+	/* Need some fixups in config space */
+	mc_pcie_enable_msi(port, cfg->win);
+
+	/* Configure non-config space outbound ranges */
+	ret = mc_pcie_setup_windows(pdev, port);
+	if (ret)
+		return ret;
+
+	/* Address translation is up; safe to enable interrupts */
+	ret = mc_init_interrupts(pdev, port);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mc_host_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	void __iomem *bridge_base_addr;
+	int ret;
+	u32 val;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = dev;
+
+	port->axi_base_addr = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(port->axi_base_addr))
+		return PTR_ERR(port->axi_base_addr);
+
+	mc_disable_interrupts(port);
+
+	bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+
+	/* Allow enabling MSI by disabling MSI-X */
+	val = readl(bridge_base_addr + PCIE_PCI_IRQ_DW0);
+	val &= ~MSIX_CAP_MASK;
+	writel(val, bridge_base_addr + PCIE_PCI_IRQ_DW0);
+
+	/* Pick num vectors from bitfile programmed onto FPGA fabric */
+	val = readl(bridge_base_addr + PCIE_PCI_IRQ_DW0);
+	val &= NUM_MSI_MSGS_MASK;
+	val >>= NUM_MSI_MSGS_SHIFT;
+
+	port->msi.num_vectors = 1 << val;
+
+	/* Pick vector address from design */
+	port->msi.vector_phy = readl_relaxed(bridge_base_addr + IMSI_ADDR);
+
+	ret = mc_pcie_init_clks(dev);
+	if (ret) {
+		dev_err(dev, "failed to get clock resources, error %d\n", ret);
+		return -ENODEV;
+	}
+
+	return pci_host_common_probe(pdev);
+}
+
+static const struct pci_ecam_ops mc_ecam_ops = {
+	.init = mc_platform_init,
+	.pci_ops = {
+		.map_bus = pci_ecam_map_bus,
+		.read = pci_generic_config_read,
+		.write = pci_generic_config_write,
+	}
+};
+
+static const struct of_device_id mc_pcie_of_match[] = {
+	{
+		.compatible = "microchip,pcie-host-1.0",
+		.data = &mc_ecam_ops,
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mc_pcie_of_match);
+
+static struct platform_driver mc_pcie_driver = {
+	.probe = mc_host_probe,
+	.driver = {
+		.name = "microchip-pcie",
+		.of_match_table = mc_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+builtin_platform_driver(mc_pcie_driver);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microchip PCIe host controller driver");
+MODULE_AUTHOR("Daire McNamara <daire.mcnamara@microchip.com>");
diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c
new file mode 100644
index 000000000..79e225edb
--- /dev/null
+++ b/drivers/pci/controller/pcie-mt7621.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * BRIEF MODULE DESCRIPTION
+ *     PCI init for Ralink RT2880 solution
+ *
+ * Copyright 2007 Ralink Inc. (bruce_chang@ralinktech.com.tw)
+ *
+ * May 2007 Bruce Chang
+ * Initial Release
+ *
+ * May 2009 Bruce Chang
+ * support RT2880/RT3883 PCIe
+ *
+ * May 2011 Bruce Chang
+ * support RT6855/MT7620 PCIe
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/sys_soc.h>
+
+#include "../pci.h"
+
+/* MediaTek-specific configuration registers */
+#define PCIE_FTS_NUM			0x70c
+#define PCIE_FTS_NUM_MASK		GENMASK(15, 8)
+#define PCIE_FTS_NUM_L0(x)		(((x) & 0xff) << 8)
+
+/* Host-PCI bridge registers */
+#define RALINK_PCI_PCICFG_ADDR		0x0000
+#define RALINK_PCI_PCIMSK_ADDR		0x000c
+#define RALINK_PCI_CONFIG_ADDR		0x0020
+#define RALINK_PCI_CONFIG_DATA		0x0024
+#define RALINK_PCI_MEMBASE		0x0028
+#define RALINK_PCI_IOBASE		0x002c
+
+/* PCIe RC control registers */
+#define RALINK_PCI_ID			0x0030
+#define RALINK_PCI_CLASS		0x0034
+#define RALINK_PCI_SUBID		0x0038
+#define RALINK_PCI_STATUS		0x0050
+
+/* Some definition values */
+#define PCIE_REVISION_ID		BIT(0)
+#define PCIE_CLASS_CODE			(0x60400 << 8)
+#define PCIE_BAR_MAP_MAX		GENMASK(30, 16)
+#define PCIE_BAR_ENABLE			BIT(0)
+#define PCIE_PORT_INT_EN(x)		BIT(20 + (x))
+#define PCIE_PORT_LINKUP		BIT(0)
+#define PCIE_PORT_CNT			3
+
+#define INIT_PORTS_DELAY_MS		100
+#define PERST_DELAY_MS			100
+
+/**
+ * struct mt7621_pcie_port - PCIe port information
+ * @base: I/O mapped register base
+ * @list: port list
+ * @pcie: pointer to PCIe host info
+ * @clk: pointer to the port clock gate
+ * @phy: pointer to PHY control block
+ * @pcie_rst: pointer to port reset control
+ * @gpio_rst: gpio reset
+ * @slot: port slot
+ * @enabled: indicates if port is enabled
+ */
+struct mt7621_pcie_port {
+	void __iomem *base;
+	struct list_head list;
+	struct mt7621_pcie *pcie;
+	struct clk *clk;
+	struct phy *phy;
+	struct reset_control *pcie_rst;
+	struct gpio_desc *gpio_rst;
+	u32 slot;
+	bool enabled;
+};
+
+/**
+ * struct mt7621_pcie - PCIe host information
+ * @base: IO Mapped Register Base
+ * @dev: Pointer to PCIe device
+ * @ports: pointer to PCIe port information
+ * @resets_inverted: depends on chip revision
+ * reset lines are inverted.
+ */
+struct mt7621_pcie {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head ports;
+	bool resets_inverted;
+};
+
+static inline u32 pcie_read(struct mt7621_pcie *pcie, u32 reg)
+{
+	return readl_relaxed(pcie->base + reg);
+}
+
+static inline void pcie_write(struct mt7621_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->base + reg);
+}
+
+static inline u32 pcie_port_read(struct mt7621_pcie_port *port, u32 reg)
+{
+	return readl_relaxed(port->base + reg);
+}
+
+static inline void pcie_port_write(struct mt7621_pcie_port *port,
+				   u32 val, u32 reg)
+{
+	writel_relaxed(val, port->base + reg);
+}
+
+static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus,
+					 unsigned int devfn, int where)
+{
+	struct mt7621_pcie *pcie = bus->sysdata;
+	u32 address = PCI_CONF1_EXT_ADDRESS(bus->number, PCI_SLOT(devfn),
+					    PCI_FUNC(devfn), where);
+
+	writel_relaxed(address, pcie->base + RALINK_PCI_CONFIG_ADDR);
+
+	return pcie->base + RALINK_PCI_CONFIG_DATA + (where & 3);
+}
+
+static struct pci_ops mt7621_pcie_ops = {
+	.map_bus	= mt7621_pcie_map_bus,
+	.read		= pci_generic_config_read,
+	.write		= pci_generic_config_write,
+};
+
+static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg)
+{
+	u32 address = PCI_CONF1_EXT_ADDRESS(0, dev, 0, reg);
+
+	pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
+	return pcie_read(pcie, RALINK_PCI_CONFIG_DATA);
+}
+
+static void write_config(struct mt7621_pcie *pcie, unsigned int dev,
+			 u32 reg, u32 val)
+{
+	u32 address = PCI_CONF1_EXT_ADDRESS(0, dev, 0, reg);
+
+	pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
+	pcie_write(pcie, val, RALINK_PCI_CONFIG_DATA);
+}
+
+static inline void mt7621_rst_gpio_pcie_assert(struct mt7621_pcie_port *port)
+{
+	if (port->gpio_rst)
+		gpiod_set_value(port->gpio_rst, 1);
+}
+
+static inline void mt7621_rst_gpio_pcie_deassert(struct mt7621_pcie_port *port)
+{
+	if (port->gpio_rst)
+		gpiod_set_value(port->gpio_rst, 0);
+}
+
+static inline bool mt7621_pcie_port_is_linkup(struct mt7621_pcie_port *port)
+{
+	return (pcie_port_read(port, RALINK_PCI_STATUS) & PCIE_PORT_LINKUP) != 0;
+}
+
+static inline void mt7621_control_assert(struct mt7621_pcie_port *port)
+{
+	struct mt7621_pcie *pcie = port->pcie;
+
+	if (pcie->resets_inverted)
+		reset_control_assert(port->pcie_rst);
+	else
+		reset_control_deassert(port->pcie_rst);
+}
+
+static inline void mt7621_control_deassert(struct mt7621_pcie_port *port)
+{
+	struct mt7621_pcie *pcie = port->pcie;
+
+	if (pcie->resets_inverted)
+		reset_control_deassert(port->pcie_rst);
+	else
+		reset_control_assert(port->pcie_rst);
+}
+
+static int mt7621_pcie_parse_port(struct mt7621_pcie *pcie,
+				  struct device_node *node,
+				  int slot)
+{
+	struct mt7621_pcie_port *port;
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	char name[10];
+	int err;
+
+	port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->base = devm_platform_ioremap_resource(pdev, slot + 1);
+	if (IS_ERR(port->base))
+		return PTR_ERR(port->base);
+
+	port->clk = devm_get_clk_from_child(dev, node, NULL);
+	if (IS_ERR(port->clk)) {
+		dev_err(dev, "failed to get pcie%d clock\n", slot);
+		return PTR_ERR(port->clk);
+	}
+
+	port->pcie_rst = of_reset_control_get_exclusive(node, NULL);
+	if (PTR_ERR(port->pcie_rst) == -EPROBE_DEFER) {
+		dev_err(dev, "failed to get pcie%d reset control\n", slot);
+		return PTR_ERR(port->pcie_rst);
+	}
+
+	snprintf(name, sizeof(name), "pcie-phy%d", slot);
+	port->phy = devm_of_phy_get(dev, node, name);
+	if (IS_ERR(port->phy)) {
+		dev_err(dev, "failed to get pcie-phy%d\n", slot);
+		err = PTR_ERR(port->phy);
+		goto remove_reset;
+	}
+
+	port->gpio_rst = devm_gpiod_get_index_optional(dev, "reset", slot,
+						       GPIOD_OUT_LOW);
+	if (IS_ERR(port->gpio_rst)) {
+		dev_err(dev, "failed to get GPIO for PCIe%d\n", slot);
+		err = PTR_ERR(port->gpio_rst);
+		goto remove_reset;
+	}
+
+	port->slot = slot;
+	port->pcie = pcie;
+
+	INIT_LIST_HEAD(&port->list);
+	list_add_tail(&port->list, &pcie->ports);
+
+	return 0;
+
+remove_reset:
+	reset_control_put(port->pcie_rst);
+	return err;
+}
+
+static int mt7621_pcie_parse_dt(struct mt7621_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct device_node *node = dev->of_node, *child;
+	int err;
+
+	pcie->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	for_each_available_child_of_node(node, child) {
+		int slot;
+
+		err = of_pci_get_devfn(child);
+		if (err < 0) {
+			of_node_put(child);
+			dev_err(dev, "failed to parse devfn: %d\n", err);
+			return err;
+		}
+
+		slot = PCI_SLOT(err);
+
+		err = mt7621_pcie_parse_port(pcie, child, slot);
+		if (err) {
+			of_node_put(child);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mt7621_pcie_init_port(struct mt7621_pcie_port *port)
+{
+	struct mt7621_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+	u32 slot = port->slot;
+	int err;
+
+	err = phy_init(port->phy);
+	if (err) {
+		dev_err(dev, "failed to initialize port%d phy\n", slot);
+		return err;
+	}
+
+	err = phy_power_on(port->phy);
+	if (err) {
+		dev_err(dev, "failed to power on port%d phy\n", slot);
+		phy_exit(port->phy);
+		return err;
+	}
+
+	port->enabled = true;
+
+	return 0;
+}
+
+static void mt7621_pcie_reset_assert(struct mt7621_pcie *pcie)
+{
+	struct mt7621_pcie_port *port;
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		/* PCIe RC reset assert */
+		mt7621_control_assert(port);
+
+		/* PCIe EP reset assert */
+		mt7621_rst_gpio_pcie_assert(port);
+	}
+
+	msleep(PERST_DELAY_MS);
+}
+
+static void mt7621_pcie_reset_rc_deassert(struct mt7621_pcie *pcie)
+{
+	struct mt7621_pcie_port *port;
+
+	list_for_each_entry(port, &pcie->ports, list)
+		mt7621_control_deassert(port);
+}
+
+static void mt7621_pcie_reset_ep_deassert(struct mt7621_pcie *pcie)
+{
+	struct mt7621_pcie_port *port;
+
+	list_for_each_entry(port, &pcie->ports, list)
+		mt7621_rst_gpio_pcie_deassert(port);
+
+	msleep(PERST_DELAY_MS);
+}
+
+static int mt7621_pcie_init_ports(struct mt7621_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct mt7621_pcie_port *port, *tmp;
+	u8 num_disabled = 0;
+	int err;
+
+	mt7621_pcie_reset_assert(pcie);
+	mt7621_pcie_reset_rc_deassert(pcie);
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
+		u32 slot = port->slot;
+
+		if (slot == 1) {
+			port->enabled = true;
+			continue;
+		}
+
+		err = mt7621_pcie_init_port(port);
+		if (err) {
+			dev_err(dev, "initializing port %d failed\n", slot);
+			list_del(&port->list);
+		}
+	}
+
+	msleep(INIT_PORTS_DELAY_MS);
+	mt7621_pcie_reset_ep_deassert(pcie);
+
+	tmp = NULL;
+	list_for_each_entry(port, &pcie->ports, list) {
+		u32 slot = port->slot;
+
+		if (!mt7621_pcie_port_is_linkup(port)) {
+			dev_info(dev, "pcie%d no card, disable it (RST & CLK)\n",
+				 slot);
+			mt7621_control_assert(port);
+			port->enabled = false;
+			num_disabled++;
+
+			if (slot == 0) {
+				tmp = port;
+				continue;
+			}
+
+			if (slot == 1 && tmp && !tmp->enabled)
+				phy_power_off(tmp->phy);
+		}
+	}
+
+	return (num_disabled != PCIE_PORT_CNT) ? 0 : -ENODEV;
+}
+
+static void mt7621_pcie_enable_port(struct mt7621_pcie_port *port)
+{
+	struct mt7621_pcie *pcie = port->pcie;
+	u32 slot = port->slot;
+	u32 val;
+
+	/* enable pcie interrupt */
+	val = pcie_read(pcie, RALINK_PCI_PCIMSK_ADDR);
+	val |= PCIE_PORT_INT_EN(slot);
+	pcie_write(pcie, val, RALINK_PCI_PCIMSK_ADDR);
+
+	/* map 2G DDR region */
+	pcie_port_write(port, PCIE_BAR_MAP_MAX | PCIE_BAR_ENABLE,
+			PCI_BASE_ADDRESS_0);
+
+	/* configure class code and revision ID */
+	pcie_port_write(port, PCIE_CLASS_CODE | PCIE_REVISION_ID,
+			RALINK_PCI_CLASS);
+
+	/* configure RC FTS number to 250 when it leaves L0s */
+	val = read_config(pcie, slot, PCIE_FTS_NUM);
+	val &= ~PCIE_FTS_NUM_MASK;
+	val |= PCIE_FTS_NUM_L0(0x50);
+	write_config(pcie, slot, PCIE_FTS_NUM, val);
+}
+
+static int mt7621_pcie_enable_ports(struct pci_host_bridge *host)
+{
+	struct mt7621_pcie *pcie = pci_host_bridge_priv(host);
+	struct device *dev = pcie->dev;
+	struct mt7621_pcie_port *port;
+	struct resource_entry *entry;
+	int err;
+
+	entry = resource_list_first_type(&host->windows, IORESOURCE_IO);
+	if (!entry) {
+		dev_err(dev, "cannot get io resource\n");
+		return -EINVAL;
+	}
+
+	/* Setup MEMWIN and IOWIN */
+	pcie_write(pcie, 0xffffffff, RALINK_PCI_MEMBASE);
+	pcie_write(pcie, entry->res->start - entry->offset, RALINK_PCI_IOBASE);
+
+	list_for_each_entry(port, &pcie->ports, list) {
+		if (port->enabled) {
+			err = clk_prepare_enable(port->clk);
+			if (err) {
+				dev_err(dev, "enabling clk pcie%d\n",
+					port->slot);
+				return err;
+			}
+
+			mt7621_pcie_enable_port(port);
+			dev_info(dev, "PCIE%d enabled\n", port->slot);
+		}
+	}
+
+	return 0;
+}
+
+static int mt7621_pcie_register_host(struct pci_host_bridge *host)
+{
+	struct mt7621_pcie *pcie = pci_host_bridge_priv(host);
+
+	host->ops = &mt7621_pcie_ops;
+	host->sysdata = pcie;
+	return pci_host_probe(host);
+}
+
+static const struct soc_device_attribute mt7621_pcie_quirks_match[] = {
+	{ .soc_id = "mt7621", .revision = "E2" },
+	{ /* sentinel */ }
+};
+
+static int mt7621_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct soc_device_attribute *attr;
+	struct mt7621_pcie_port *port;
+	struct mt7621_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	int err;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENOMEM;
+
+	pcie = pci_host_bridge_priv(bridge);
+	pcie->dev = dev;
+	platform_set_drvdata(pdev, pcie);
+	INIT_LIST_HEAD(&pcie->ports);
+
+	attr = soc_device_match(mt7621_pcie_quirks_match);
+	if (attr)
+		pcie->resets_inverted = true;
+
+	err = mt7621_pcie_parse_dt(pcie);
+	if (err) {
+		dev_err(dev, "parsing DT failed\n");
+		return err;
+	}
+
+	err = mt7621_pcie_init_ports(pcie);
+	if (err) {
+		dev_err(dev, "nothing connected in virtual bridges\n");
+		return 0;
+	}
+
+	err = mt7621_pcie_enable_ports(bridge);
+	if (err) {
+		dev_err(dev, "error enabling pcie ports\n");
+		goto remove_resets;
+	}
+
+	return mt7621_pcie_register_host(bridge);
+
+remove_resets:
+	list_for_each_entry(port, &pcie->ports, list)
+		reset_control_put(port->pcie_rst);
+
+	return err;
+}
+
+static void mt7621_pcie_remove(struct platform_device *pdev)
+{
+	struct mt7621_pcie *pcie = platform_get_drvdata(pdev);
+	struct mt7621_pcie_port *port;
+
+	list_for_each_entry(port, &pcie->ports, list)
+		reset_control_put(port->pcie_rst);
+}
+
+static const struct of_device_id mt7621_pcie_ids[] = {
+	{ .compatible = "mediatek,mt7621-pci" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mt7621_pcie_ids);
+
+static struct platform_driver mt7621_pcie_driver = {
+	.probe = mt7621_pcie_probe,
+	.remove_new = mt7621_pcie_remove,
+	.driver = {
+		.name = "mt7621-pci",
+		.of_match_table = mt7621_pcie_ids,
+	},
+};
+builtin_platform_driver(mt7621_pcie_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-rcar-ep.c b/drivers/pci/controller/pcie-rcar-ep.c
new file mode 100644
index 000000000..f9682df1d
--- /dev/null
+++ b/drivers/pci/controller/pcie-rcar-ep.c
@@ -0,0 +1,561 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe endpoint driver for Renesas R-Car SoCs
+ *  Copyright (c) 2020 Renesas Electronics Europe GmbH
+ *
+ * Author: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/pci-epc.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "pcie-rcar.h"
+
+#define RCAR_EPC_MAX_FUNCTIONS		1
+
+/* Structure representing the PCIe interface */
+struct rcar_pcie_endpoint {
+	struct rcar_pcie	pcie;
+	phys_addr_t		*ob_mapped_addr;
+	struct pci_epc_mem_window *ob_window;
+	u8			max_functions;
+	unsigned int		bar_to_atu[MAX_NR_INBOUND_MAPS];
+	unsigned long		*ib_window_map;
+	u32			num_ib_windows;
+	u32			num_ob_windows;
+};
+
+static void rcar_pcie_ep_hw_init(struct rcar_pcie *pcie)
+{
+	u32 val;
+
+	rcar_pci_write_reg(pcie, 0, PCIETCTLR);
+
+	/* Set endpoint mode */
+	rcar_pci_write_reg(pcie, 0, PCIEMSR);
+
+	/* Initialize default capabilities. */
+	rcar_rmw32(pcie, REXPCAP(0), 0xff, PCI_CAP_ID_EXP);
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_FLAGS),
+		   PCI_EXP_FLAGS_TYPE, PCI_EXP_TYPE_ENDPOINT << 4);
+	rcar_rmw32(pcie, RCONF(PCI_HEADER_TYPE), 0x7f,
+		   PCI_HEADER_TYPE_NORMAL);
+
+	/* Write out the physical slot number = 0 */
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_SLTCAP), PCI_EXP_SLTCAP_PSN, 0);
+
+	val = rcar_pci_read_reg(pcie, EXPCAP(1));
+	/* device supports fixed 128 bytes MPSS */
+	val &= ~GENMASK(2, 0);
+	rcar_pci_write_reg(pcie, val, EXPCAP(1));
+
+	val = rcar_pci_read_reg(pcie, EXPCAP(2));
+	/* read requests size 128 bytes */
+	val &= ~GENMASK(14, 12);
+	/* payload size 128 bytes */
+	val &= ~GENMASK(7, 5);
+	rcar_pci_write_reg(pcie, val, EXPCAP(2));
+
+	/* Set target link speed to 5.0 GT/s */
+	rcar_rmw32(pcie, EXPCAP(12), PCI_EXP_LNKSTA_CLS,
+		   PCI_EXP_LNKSTA_CLS_5_0GB);
+
+	/* Set the completion timer timeout to the maximum 50ms. */
+	rcar_rmw32(pcie, TLCTLR + 1, 0x3f, 50);
+
+	/* Terminate list of capabilities (Next Capability Offset=0) */
+	rcar_rmw32(pcie, RVCCAP(0), 0xfff00000, 0);
+
+	/* flush modifications */
+	wmb();
+}
+
+static int rcar_pcie_ep_get_window(struct rcar_pcie_endpoint *ep,
+				   phys_addr_t addr)
+{
+	int i;
+
+	for (i = 0; i < ep->num_ob_windows; i++)
+		if (ep->ob_window[i].phys_base == addr)
+			return i;
+
+	return -EINVAL;
+}
+
+static int rcar_pcie_parse_outbound_ranges(struct rcar_pcie_endpoint *ep,
+					   struct platform_device *pdev)
+{
+	struct rcar_pcie *pcie = &ep->pcie;
+	char outbound_name[10];
+	struct resource *res;
+	unsigned int i = 0;
+
+	ep->num_ob_windows = 0;
+	for (i = 0; i < RCAR_PCI_MAX_RESOURCES; i++) {
+		sprintf(outbound_name, "memory%u", i);
+		res = platform_get_resource_byname(pdev,
+						   IORESOURCE_MEM,
+						   outbound_name);
+		if (!res) {
+			dev_err(pcie->dev, "missing outbound window %u\n", i);
+			return -EINVAL;
+		}
+		if (!devm_request_mem_region(&pdev->dev, res->start,
+					     resource_size(res),
+					     outbound_name)) {
+			dev_err(pcie->dev, "Cannot request memory region %s.\n",
+				outbound_name);
+			return -EIO;
+		}
+
+		ep->ob_window[i].phys_base = res->start;
+		ep->ob_window[i].size = resource_size(res);
+		/* controller doesn't support multiple allocation
+		 * from same window, so set page_size to window size
+		 */
+		ep->ob_window[i].page_size = resource_size(res);
+	}
+	ep->num_ob_windows = i;
+
+	return 0;
+}
+
+static int rcar_pcie_ep_get_pdata(struct rcar_pcie_endpoint *ep,
+				  struct platform_device *pdev)
+{
+	struct rcar_pcie *pcie = &ep->pcie;
+	struct pci_epc_mem_window *window;
+	struct device *dev = pcie->dev;
+	struct resource res;
+	int err;
+
+	err = of_address_to_resource(dev->of_node, 0, &res);
+	if (err)
+		return err;
+	pcie->base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	ep->ob_window = devm_kcalloc(dev, RCAR_PCI_MAX_RESOURCES,
+				     sizeof(*window), GFP_KERNEL);
+	if (!ep->ob_window)
+		return -ENOMEM;
+
+	rcar_pcie_parse_outbound_ranges(ep, pdev);
+
+	err = of_property_read_u8(dev->of_node, "max-functions",
+				  &ep->max_functions);
+	if (err < 0 || ep->max_functions > RCAR_EPC_MAX_FUNCTIONS)
+		ep->max_functions = RCAR_EPC_MAX_FUNCTIONS;
+
+	return 0;
+}
+
+static int rcar_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
+				     struct pci_epf_header *hdr)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	struct rcar_pcie *pcie = &ep->pcie;
+	u32 val;
+
+	if (!fn)
+		val = hdr->vendorid;
+	else
+		val = rcar_pci_read_reg(pcie, IDSETR0);
+	val |= hdr->deviceid << 16;
+	rcar_pci_write_reg(pcie, val, IDSETR0);
+
+	val = hdr->revid;
+	val |= hdr->progif_code << 8;
+	val |= hdr->subclass_code << 16;
+	val |= hdr->baseclass_code << 24;
+	rcar_pci_write_reg(pcie, val, IDSETR1);
+
+	if (!fn)
+		val = hdr->subsys_vendor_id;
+	else
+		val = rcar_pci_read_reg(pcie, SUBIDSETR);
+	val |= hdr->subsys_id << 16;
+	rcar_pci_write_reg(pcie, val, SUBIDSETR);
+
+	if (hdr->interrupt_pin > PCI_INTERRUPT_INTA)
+		return -EINVAL;
+	val = rcar_pci_read_reg(pcie, PCICONF(15));
+	val |= (hdr->interrupt_pin << 8);
+	rcar_pci_write_reg(pcie, val, PCICONF(15));
+
+	return 0;
+}
+
+static int rcar_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+				struct pci_epf_bar *epf_bar)
+{
+	int flags = epf_bar->flags | LAR_ENABLE | LAM_64BIT;
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	u64 size = 1ULL << fls64(epf_bar->size - 1);
+	dma_addr_t cpu_addr = epf_bar->phys_addr;
+	enum pci_barno bar = epf_bar->barno;
+	struct rcar_pcie *pcie = &ep->pcie;
+	u32 mask;
+	int idx;
+	int err;
+
+	idx = find_first_zero_bit(ep->ib_window_map, ep->num_ib_windows);
+	if (idx >= ep->num_ib_windows) {
+		dev_err(pcie->dev, "no free inbound window\n");
+		return -EINVAL;
+	}
+
+	if ((flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO)
+		flags |= IO_SPACE;
+
+	ep->bar_to_atu[bar] = idx;
+	/* use 64-bit BARs */
+	set_bit(idx, ep->ib_window_map);
+	set_bit(idx + 1, ep->ib_window_map);
+
+	if (cpu_addr > 0) {
+		unsigned long nr_zeros = __ffs64(cpu_addr);
+		u64 alignment = 1ULL << nr_zeros;
+
+		size = min(size, alignment);
+	}
+
+	size = min(size, 1ULL << 32);
+
+	mask = roundup_pow_of_two(size) - 1;
+	mask &= ~0xf;
+
+	rcar_pcie_set_inbound(pcie, cpu_addr,
+			      0x0, mask | flags, idx, false);
+
+	err = rcar_pcie_wait_for_phyrdy(pcie);
+	if (err) {
+		dev_err(pcie->dev, "phy not ready\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void rcar_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+				   struct pci_epf_bar *epf_bar)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	enum pci_barno bar = epf_bar->barno;
+	u32 atu_index = ep->bar_to_atu[bar];
+
+	rcar_pcie_set_inbound(&ep->pcie, 0x0, 0x0, 0x0, bar, false);
+
+	clear_bit(atu_index, ep->ib_window_map);
+	clear_bit(atu_index + 1, ep->ib_window_map);
+}
+
+static int rcar_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn,
+				u8 interrupts)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	struct rcar_pcie *pcie = &ep->pcie;
+	u32 flags;
+
+	flags = rcar_pci_read_reg(pcie, MSICAP(fn));
+	flags |= interrupts << MSICAP0_MMESCAP_OFFSET;
+	rcar_pci_write_reg(pcie, flags, MSICAP(fn));
+
+	return 0;
+}
+
+static int rcar_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	struct rcar_pcie *pcie = &ep->pcie;
+	u32 flags;
+
+	flags = rcar_pci_read_reg(pcie, MSICAP(fn));
+	if (!(flags & MSICAP0_MSIE))
+		return -EINVAL;
+
+	return ((flags & MSICAP0_MMESE_MASK) >> MSICAP0_MMESE_OFFSET);
+}
+
+static int rcar_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				 phys_addr_t addr, u64 pci_addr, size_t size)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	struct rcar_pcie *pcie = &ep->pcie;
+	struct resource_entry win;
+	struct resource res;
+	int window;
+	int err;
+
+	/* check if we have a link. */
+	err = rcar_pcie_wait_for_dl(pcie);
+	if (err) {
+		dev_err(pcie->dev, "link not up\n");
+		return err;
+	}
+
+	window = rcar_pcie_ep_get_window(ep, addr);
+	if (window < 0) {
+		dev_err(pcie->dev, "failed to get corresponding window\n");
+		return -EINVAL;
+	}
+
+	memset(&win, 0x0, sizeof(win));
+	memset(&res, 0x0, sizeof(res));
+	res.start = pci_addr;
+	res.end = pci_addr + size - 1;
+	res.flags = IORESOURCE_MEM;
+	win.res = &res;
+
+	rcar_pcie_set_outbound(pcie, window, &win);
+
+	ep->ob_mapped_addr[window] = addr;
+
+	return 0;
+}
+
+static void rcar_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				    phys_addr_t addr)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+	struct resource_entry win;
+	struct resource res;
+	int idx;
+
+	for (idx = 0; idx < ep->num_ob_windows; idx++)
+		if (ep->ob_mapped_addr[idx] == addr)
+			break;
+
+	if (idx >= ep->num_ob_windows)
+		return;
+
+	memset(&win, 0x0, sizeof(win));
+	memset(&res, 0x0, sizeof(res));
+	win.res = &res;
+	rcar_pcie_set_outbound(&ep->pcie, idx, &win);
+
+	ep->ob_mapped_addr[idx] = 0;
+}
+
+static int rcar_pcie_ep_assert_intx(struct rcar_pcie_endpoint *ep,
+				    u8 fn, u8 intx)
+{
+	struct rcar_pcie *pcie = &ep->pcie;
+	u32 val;
+
+	val = rcar_pci_read_reg(pcie, PCIEMSITXR);
+	if ((val & PCI_MSI_FLAGS_ENABLE)) {
+		dev_err(pcie->dev, "MSI is enabled, cannot assert INTx\n");
+		return -EINVAL;
+	}
+
+	val = rcar_pci_read_reg(pcie, PCICONF(1));
+	if ((val & INTDIS)) {
+		dev_err(pcie->dev, "INTx message transmission is disabled\n");
+		return -EINVAL;
+	}
+
+	val = rcar_pci_read_reg(pcie, PCIEINTXR);
+	if ((val & ASTINTX)) {
+		dev_err(pcie->dev, "INTx is already asserted\n");
+		return -EINVAL;
+	}
+
+	val |= ASTINTX;
+	rcar_pci_write_reg(pcie, val, PCIEINTXR);
+	usleep_range(1000, 1001);
+	val = rcar_pci_read_reg(pcie, PCIEINTXR);
+	val &= ~ASTINTX;
+	rcar_pci_write_reg(pcie, val, PCIEINTXR);
+
+	return 0;
+}
+
+static int rcar_pcie_ep_assert_msi(struct rcar_pcie *pcie,
+				   u8 fn, u8 interrupt_num)
+{
+	u16 msi_count;
+	u32 val;
+
+	/* Check MSI enable bit */
+	val = rcar_pci_read_reg(pcie, MSICAP(fn));
+	if (!(val & MSICAP0_MSIE))
+		return -EINVAL;
+
+	/* Get MSI numbers from MME */
+	msi_count = ((val & MSICAP0_MMESE_MASK) >> MSICAP0_MMESE_OFFSET);
+	msi_count = 1 << msi_count;
+
+	if (!interrupt_num || interrupt_num > msi_count)
+		return -EINVAL;
+
+	val = rcar_pci_read_reg(pcie, PCIEMSITXR);
+	rcar_pci_write_reg(pcie, val | (interrupt_num - 1), PCIEMSITXR);
+
+	return 0;
+}
+
+static int rcar_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
+				  enum pci_epc_irq_type type,
+				  u16 interrupt_num)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return rcar_pcie_ep_assert_intx(ep, fn, 0);
+
+	case PCI_EPC_IRQ_MSI:
+		return rcar_pcie_ep_assert_msi(&ep->pcie, fn, interrupt_num);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int rcar_pcie_ep_start(struct pci_epc *epc)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+
+	rcar_pci_write_reg(&ep->pcie, MACCTLR_INIT_VAL, MACCTLR);
+	rcar_pci_write_reg(&ep->pcie, CFINIT, PCIETCTLR);
+
+	return 0;
+}
+
+static void rcar_pcie_ep_stop(struct pci_epc *epc)
+{
+	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
+
+	rcar_pci_write_reg(&ep->pcie, 0, PCIETCTLR);
+}
+
+static const struct pci_epc_features rcar_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = false,
+	/* use 64-bit BARs so mark BAR[1,3,5] as reserved */
+	.reserved_bar = 1 << BAR_1 | 1 << BAR_3 | 1 << BAR_5,
+	.bar_fixed_64bit = 1 << BAR_0 | 1 << BAR_2 | 1 << BAR_4,
+	.bar_fixed_size[0] = 128,
+	.bar_fixed_size[2] = 256,
+	.bar_fixed_size[4] = 256,
+};
+
+static const struct pci_epc_features*
+rcar_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	return &rcar_pcie_epc_features;
+}
+
+static const struct pci_epc_ops rcar_pcie_epc_ops = {
+	.write_header	= rcar_pcie_ep_write_header,
+	.set_bar	= rcar_pcie_ep_set_bar,
+	.clear_bar	= rcar_pcie_ep_clear_bar,
+	.set_msi	= rcar_pcie_ep_set_msi,
+	.get_msi	= rcar_pcie_ep_get_msi,
+	.map_addr	= rcar_pcie_ep_map_addr,
+	.unmap_addr	= rcar_pcie_ep_unmap_addr,
+	.raise_irq	= rcar_pcie_ep_raise_irq,
+	.start		= rcar_pcie_ep_start,
+	.stop		= rcar_pcie_ep_stop,
+	.get_features	= rcar_pcie_ep_get_features,
+};
+
+static const struct of_device_id rcar_pcie_ep_of_match[] = {
+	{ .compatible = "renesas,r8a774c0-pcie-ep", },
+	{ .compatible = "renesas,rcar-gen3-pcie-ep" },
+	{ },
+};
+
+static int rcar_pcie_ep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rcar_pcie_endpoint *ep;
+	struct rcar_pcie *pcie;
+	struct pci_epc *epc;
+	int err;
+
+	ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	pcie = &ep->pcie;
+	pcie->dev = dev;
+
+	pm_runtime_enable(dev);
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "pm_runtime_resume_and_get failed\n");
+		goto err_pm_disable;
+	}
+
+	err = rcar_pcie_ep_get_pdata(ep, pdev);
+	if (err < 0) {
+		dev_err(dev, "failed to request resources: %d\n", err);
+		goto err_pm_put;
+	}
+
+	ep->num_ib_windows = MAX_NR_INBOUND_MAPS;
+	ep->ib_window_map =
+			devm_kcalloc(dev, BITS_TO_LONGS(ep->num_ib_windows),
+				     sizeof(long), GFP_KERNEL);
+	if (!ep->ib_window_map) {
+		err = -ENOMEM;
+		dev_err(dev, "failed to allocate memory for inbound map\n");
+		goto err_pm_put;
+	}
+
+	ep->ob_mapped_addr = devm_kcalloc(dev, ep->num_ob_windows,
+					  sizeof(*ep->ob_mapped_addr),
+					  GFP_KERNEL);
+	if (!ep->ob_mapped_addr) {
+		err = -ENOMEM;
+		dev_err(dev, "failed to allocate memory for outbound memory pointers\n");
+		goto err_pm_put;
+	}
+
+	epc = devm_pci_epc_create(dev, &rcar_pcie_epc_ops);
+	if (IS_ERR(epc)) {
+		dev_err(dev, "failed to create epc device\n");
+		err = PTR_ERR(epc);
+		goto err_pm_put;
+	}
+
+	epc->max_functions = ep->max_functions;
+	epc_set_drvdata(epc, ep);
+
+	rcar_pcie_ep_hw_init(pcie);
+
+	err = pci_epc_multi_mem_init(epc, ep->ob_window, ep->num_ob_windows);
+	if (err < 0) {
+		dev_err(dev, "failed to initialize the epc memory space\n");
+		goto err_pm_put;
+	}
+
+	return 0;
+
+err_pm_put:
+	pm_runtime_put(dev);
+
+err_pm_disable:
+	pm_runtime_disable(dev);
+
+	return err;
+}
+
+static struct platform_driver rcar_pcie_ep_driver = {
+	.driver = {
+		.name = "rcar-pcie-ep",
+		.of_match_table = rcar_pcie_ep_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = rcar_pcie_ep_probe,
+};
+builtin_platform_driver(rcar_pcie_ep_driver);
diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c
new file mode 100644
index 000000000..88975e40e
--- /dev/null
+++ b/drivers/pci/controller/pcie-rcar-host.c
@@ -0,0 +1,1155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe driver for Renesas R-Car SoCs
+ *  Copyright (C) 2014-2020 Renesas Electronics Europe Ltd
+ *
+ * Based on:
+ *  arch/sh/drivers/pci/pcie-sh7786.c
+ *  arch/sh/drivers/pci/ops-sh7786.c
+ *  Copyright (C) 2009 - 2011  Paul Mundt
+ *
+ * Author: Phil Edworthy <phil.edworthy@renesas.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "pcie-rcar.h"
+
+struct rcar_msi {
+	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
+	struct irq_domain *domain;
+	struct mutex map_lock;
+	spinlock_t mask_lock;
+	int irq1;
+	int irq2;
+};
+
+/* Structure representing the PCIe interface */
+struct rcar_pcie_host {
+	struct rcar_pcie	pcie;
+	struct phy		*phy;
+	struct clk		*bus_clk;
+	struct			rcar_msi msi;
+	int			(*phy_init_fn)(struct rcar_pcie_host *host);
+};
+
+static DEFINE_SPINLOCK(pmsr_lock);
+
+static int rcar_pcie_wakeup(struct device *pcie_dev, void __iomem *pcie_base)
+{
+	unsigned long flags;
+	u32 pmsr, val;
+	int ret = 0;
+
+	spin_lock_irqsave(&pmsr_lock, flags);
+
+	if (!pcie_base || pm_runtime_suspended(pcie_dev)) {
+		ret = -EINVAL;
+		goto unlock_exit;
+	}
+
+	pmsr = readl(pcie_base + PMSR);
+
+	/*
+	 * Test if the PCIe controller received PM_ENTER_L1 DLLP and
+	 * the PCIe controller is not in L1 link state. If true, apply
+	 * fix, which will put the controller into L1 link state, from
+	 * which it can return to L0s/L0 on its own.
+	 */
+	if ((pmsr & PMEL1RX) && ((pmsr & PMSTATE) != PMSTATE_L1)) {
+		writel(L1IATN, pcie_base + PMCTLR);
+		ret = readl_poll_timeout_atomic(pcie_base + PMSR, val,
+						val & L1FAEG, 10, 1000);
+		WARN(ret, "Timeout waiting for L1 link state, ret=%d\n", ret);
+		writel(L1FAEG | PMEL1RX, pcie_base + PMSR);
+	}
+
+unlock_exit:
+	spin_unlock_irqrestore(&pmsr_lock, flags);
+	return ret;
+}
+
+static struct rcar_pcie_host *msi_to_host(struct rcar_msi *msi)
+{
+	return container_of(msi, struct rcar_pcie_host, msi);
+}
+
+static u32 rcar_read_conf(struct rcar_pcie *pcie, int where)
+{
+	unsigned int shift = BITS_PER_BYTE * (where & 3);
+	u32 val = rcar_pci_read_reg(pcie, where & ~3);
+
+	return val >> shift;
+}
+
+#ifdef CONFIG_ARM
+#define __rcar_pci_rw_reg_workaround(instr)				\
+		"	.arch armv7-a\n"				\
+		"1:	" instr " %1, [%2]\n"				\
+		"2:	isb\n"						\
+		"3:	.pushsection .text.fixup,\"ax\"\n"		\
+		"	.align	2\n"					\
+		"4:	mov	%0, #" __stringify(PCIBIOS_SET_FAILED) "\n" \
+		"	b	3b\n"					\
+		"	.popsection\n"					\
+		"	.pushsection __ex_table,\"a\"\n"		\
+		"	.align	3\n"					\
+		"	.long	1b, 4b\n"				\
+		"	.long	2b, 4b\n"				\
+		"	.popsection\n"
+#endif
+
+static int rcar_pci_write_reg_workaround(struct rcar_pcie *pcie, u32 val,
+					 unsigned int reg)
+{
+	int error = PCIBIOS_SUCCESSFUL;
+#ifdef CONFIG_ARM
+	asm volatile(
+		__rcar_pci_rw_reg_workaround("str")
+	: "+r"(error):"r"(val), "r"(pcie->base + reg) : "memory");
+#else
+	rcar_pci_write_reg(pcie, val, reg);
+#endif
+	return error;
+}
+
+static int rcar_pci_read_reg_workaround(struct rcar_pcie *pcie, u32 *val,
+					unsigned int reg)
+{
+	int error = PCIBIOS_SUCCESSFUL;
+#ifdef CONFIG_ARM
+	asm volatile(
+		__rcar_pci_rw_reg_workaround("ldr")
+	: "+r"(error), "=r"(*val) : "r"(pcie->base + reg) : "memory");
+
+	if (error != PCIBIOS_SUCCESSFUL)
+		PCI_SET_ERROR_RESPONSE(val);
+#else
+	*val = rcar_pci_read_reg(pcie, reg);
+#endif
+	return error;
+}
+
+/* Serialization is provided by 'pci_lock' in drivers/pci/access.c */
+static int rcar_pcie_config_access(struct rcar_pcie_host *host,
+		unsigned char access_type, struct pci_bus *bus,
+		unsigned int devfn, int where, u32 *data)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+	unsigned int dev, func, reg, index;
+	int ret;
+
+	/* Wake the bus up in case it is in L1 state. */
+	ret = rcar_pcie_wakeup(pcie->dev, pcie->base);
+	if (ret) {
+		PCI_SET_ERROR_RESPONSE(data);
+		return PCIBIOS_SET_FAILED;
+	}
+
+	dev = PCI_SLOT(devfn);
+	func = PCI_FUNC(devfn);
+	reg = where & ~3;
+	index = reg / 4;
+
+	/*
+	 * While each channel has its own memory-mapped extended config
+	 * space, it's generally only accessible when in endpoint mode.
+	 * When in root complex mode, the controller is unable to target
+	 * itself with either type 0 or type 1 accesses, and indeed, any
+	 * controller initiated target transfer to its own config space
+	 * result in a completer abort.
+	 *
+	 * Each channel effectively only supports a single device, but as
+	 * the same channel <-> device access works for any PCI_SLOT()
+	 * value, we cheat a bit here and bind the controller's config
+	 * space to devfn 0 in order to enable self-enumeration. In this
+	 * case the regular ECAR/ECDR path is sidelined and the mangled
+	 * config access itself is initiated as an internal bus transaction.
+	 */
+	if (pci_is_root_bus(bus)) {
+		if (dev != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (access_type == RCAR_PCI_ACCESS_READ)
+			*data = rcar_pci_read_reg(pcie, PCICONF(index));
+		else
+			rcar_pci_write_reg(pcie, *data, PCICONF(index));
+
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	/* Clear errors */
+	rcar_pci_write_reg(pcie, rcar_pci_read_reg(pcie, PCIEERRFR), PCIEERRFR);
+
+	/* Set the PIO address */
+	rcar_pci_write_reg(pcie, PCIE_CONF_BUS(bus->number) |
+		PCIE_CONF_DEV(dev) | PCIE_CONF_FUNC(func) | reg, PCIECAR);
+
+	/* Enable the configuration access */
+	if (pci_is_root_bus(bus->parent))
+		rcar_pci_write_reg(pcie, PCIECCTLR_CCIE | TYPE0, PCIECCTLR);
+	else
+		rcar_pci_write_reg(pcie, PCIECCTLR_CCIE | TYPE1, PCIECCTLR);
+
+	/* Check for errors */
+	if (rcar_pci_read_reg(pcie, PCIEERRFR) & UNSUPPORTED_REQUEST)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Check for master and target aborts */
+	if (rcar_read_conf(pcie, RCONF(PCI_STATUS)) &
+		(PCI_STATUS_REC_MASTER_ABORT | PCI_STATUS_REC_TARGET_ABORT))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (access_type == RCAR_PCI_ACCESS_READ)
+		ret = rcar_pci_read_reg_workaround(pcie, data, PCIECDR);
+	else
+		ret = rcar_pci_write_reg_workaround(pcie, *data, PCIECDR);
+
+	/* Disable the configuration access */
+	rcar_pci_write_reg(pcie, 0, PCIECCTLR);
+
+	return ret;
+}
+
+static int rcar_pcie_read_conf(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct rcar_pcie_host *host = bus->sysdata;
+	int ret;
+
+	ret = rcar_pcie_config_access(host, RCAR_PCI_ACCESS_READ,
+				      bus, devfn, where, val);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	if (size == 1)
+		*val = (*val >> (BITS_PER_BYTE * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (BITS_PER_BYTE * (where & 2))) & 0xffff;
+
+	dev_dbg(&bus->dev, "pcie-config-read: bus=%3d devfn=0x%04x where=0x%04x size=%d val=0x%08x\n",
+		bus->number, devfn, where, size, *val);
+
+	return ret;
+}
+
+/* Serialization is provided by 'pci_lock' in drivers/pci/access.c */
+static int rcar_pcie_write_conf(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct rcar_pcie_host *host = bus->sysdata;
+	unsigned int shift;
+	u32 data;
+	int ret;
+
+	ret = rcar_pcie_config_access(host, RCAR_PCI_ACCESS_READ,
+				      bus, devfn, where, &data);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	dev_dbg(&bus->dev, "pcie-config-write: bus=%3d devfn=0x%04x where=0x%04x size=%d val=0x%08x\n",
+		bus->number, devfn, where, size, val);
+
+	if (size == 1) {
+		shift = BITS_PER_BYTE * (where & 3);
+		data &= ~(0xff << shift);
+		data |= ((val & 0xff) << shift);
+	} else if (size == 2) {
+		shift = BITS_PER_BYTE * (where & 2);
+		data &= ~(0xffff << shift);
+		data |= ((val & 0xffff) << shift);
+	} else
+		data = val;
+
+	ret = rcar_pcie_config_access(host, RCAR_PCI_ACCESS_WRITE,
+				      bus, devfn, where, &data);
+
+	return ret;
+}
+
+static struct pci_ops rcar_pcie_ops = {
+	.read	= rcar_pcie_read_conf,
+	.write	= rcar_pcie_write_conf,
+};
+
+static void rcar_pcie_force_speedup(struct rcar_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	unsigned int timeout = 1000;
+	u32 macsr;
+
+	if ((rcar_pci_read_reg(pcie, MACS2R) & LINK_SPEED) != LINK_SPEED_5_0GTS)
+		return;
+
+	if (rcar_pci_read_reg(pcie, MACCTLR) & SPEED_CHANGE) {
+		dev_err(dev, "Speed change already in progress\n");
+		return;
+	}
+
+	macsr = rcar_pci_read_reg(pcie, MACSR);
+	if ((macsr & LINK_SPEED) == LINK_SPEED_5_0GTS)
+		goto done;
+
+	/* Set target link speed to 5.0 GT/s */
+	rcar_rmw32(pcie, EXPCAP(12), PCI_EXP_LNKSTA_CLS,
+		   PCI_EXP_LNKSTA_CLS_5_0GB);
+
+	/* Set speed change reason as intentional factor */
+	rcar_rmw32(pcie, MACCGSPSETR, SPCNGRSN, 0);
+
+	/* Clear SPCHGFIN, SPCHGSUC, and SPCHGFAIL */
+	if (macsr & (SPCHGFIN | SPCHGSUC | SPCHGFAIL))
+		rcar_pci_write_reg(pcie, macsr, MACSR);
+
+	/* Start link speed change */
+	rcar_rmw32(pcie, MACCTLR, SPEED_CHANGE, SPEED_CHANGE);
+
+	while (timeout--) {
+		macsr = rcar_pci_read_reg(pcie, MACSR);
+		if (macsr & SPCHGFIN) {
+			/* Clear the interrupt bits */
+			rcar_pci_write_reg(pcie, macsr, MACSR);
+
+			if (macsr & SPCHGFAIL)
+				dev_err(dev, "Speed change failed\n");
+
+			goto done;
+		}
+
+		msleep(1);
+	}
+
+	dev_err(dev, "Speed change timed out\n");
+
+done:
+	dev_info(dev, "Current link speed is %s GT/s\n",
+		 (macsr & LINK_SPEED) == LINK_SPEED_5_0GTS ? "5" : "2.5");
+}
+
+static void rcar_pcie_hw_enable(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(host);
+	struct resource_entry *win;
+	LIST_HEAD(res);
+	int i = 0;
+
+	/* Try setting 5 GT/s link speed */
+	rcar_pcie_force_speedup(pcie);
+
+	/* Setup PCI resources */
+	resource_list_for_each_entry(win, &bridge->windows) {
+		struct resource *res = win->res;
+
+		if (!res->flags)
+			continue;
+
+		switch (resource_type(res)) {
+		case IORESOURCE_IO:
+		case IORESOURCE_MEM:
+			rcar_pcie_set_outbound(pcie, i, win);
+			i++;
+			break;
+		}
+	}
+}
+
+static int rcar_pcie_enable(struct rcar_pcie_host *host)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(host);
+
+	rcar_pcie_hw_enable(host);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	bridge->sysdata = host;
+	bridge->ops = &rcar_pcie_ops;
+
+	return pci_host_probe(bridge);
+}
+
+static int phy_wait_for_ack(struct rcar_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	unsigned int timeout = 100;
+
+	while (timeout--) {
+		if (rcar_pci_read_reg(pcie, H1_PCIEPHYADRR) & PHY_ACK)
+			return 0;
+
+		udelay(100);
+	}
+
+	dev_err(dev, "Access to PCIe phy timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static void phy_write_reg(struct rcar_pcie *pcie,
+			  unsigned int rate, u32 addr,
+			  unsigned int lane, u32 data)
+{
+	u32 phyaddr;
+
+	phyaddr = WRITE_CMD |
+		((rate & 1) << RATE_POS) |
+		((lane & 0xf) << LANE_POS) |
+		((addr & 0xff) << ADR_POS);
+
+	/* Set write data */
+	rcar_pci_write_reg(pcie, data, H1_PCIEPHYDOUTR);
+	rcar_pci_write_reg(pcie, phyaddr, H1_PCIEPHYADRR);
+
+	/* Ignore errors as they will be dealt with if the data link is down */
+	phy_wait_for_ack(pcie);
+
+	/* Clear command */
+	rcar_pci_write_reg(pcie, 0, H1_PCIEPHYDOUTR);
+	rcar_pci_write_reg(pcie, 0, H1_PCIEPHYADRR);
+
+	/* Ignore errors as they will be dealt with if the data link is down */
+	phy_wait_for_ack(pcie);
+}
+
+static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
+{
+	int err;
+
+	/* Begin initialization */
+	rcar_pci_write_reg(pcie, 0, PCIETCTLR);
+
+	/* Set mode */
+	rcar_pci_write_reg(pcie, 1, PCIEMSR);
+
+	err = rcar_pcie_wait_for_phyrdy(pcie);
+	if (err)
+		return err;
+
+	/*
+	 * Initial header for port config space is type 1, set the device
+	 * class to match. Hardware takes care of propagating the IDSETR
+	 * settings, so there is no need to bother with a quirk.
+	 */
+	rcar_pci_write_reg(pcie, PCI_CLASS_BRIDGE_PCI_NORMAL << 8, IDSETR1);
+
+	/*
+	 * Setup Secondary Bus Number & Subordinate Bus Number, even though
+	 * they aren't used, to avoid bridge being detected as broken.
+	 */
+	rcar_rmw32(pcie, RCONF(PCI_SECONDARY_BUS), 0xff, 1);
+	rcar_rmw32(pcie, RCONF(PCI_SUBORDINATE_BUS), 0xff, 1);
+
+	/* Initialize default capabilities. */
+	rcar_rmw32(pcie, REXPCAP(0), 0xff, PCI_CAP_ID_EXP);
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_FLAGS),
+		PCI_EXP_FLAGS_TYPE, PCI_EXP_TYPE_ROOT_PORT << 4);
+	rcar_rmw32(pcie, RCONF(PCI_HEADER_TYPE), 0x7f,
+		PCI_HEADER_TYPE_BRIDGE);
+
+	/* Enable data link layer active state reporting */
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_LNKCAP), PCI_EXP_LNKCAP_DLLLARC,
+		PCI_EXP_LNKCAP_DLLLARC);
+
+	/* Write out the physical slot number = 0 */
+	rcar_rmw32(pcie, REXPCAP(PCI_EXP_SLTCAP), PCI_EXP_SLTCAP_PSN, 0);
+
+	/* Set the completion timer timeout to the maximum 50ms. */
+	rcar_rmw32(pcie, TLCTLR + 1, 0x3f, 50);
+
+	/* Terminate list of capabilities (Next Capability Offset=0) */
+	rcar_rmw32(pcie, RVCCAP(0), 0xfff00000, 0);
+
+	/* Enable MSI */
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		rcar_pci_write_reg(pcie, 0x801f0000, PCIEMSITXR);
+
+	rcar_pci_write_reg(pcie, MACCTLR_INIT_VAL, MACCTLR);
+
+	/* Finish initialization - establish a PCI Express link */
+	rcar_pci_write_reg(pcie, CFINIT, PCIETCTLR);
+
+	/* This will timeout if we don't have a link. */
+	err = rcar_pcie_wait_for_dl(pcie);
+	if (err)
+		return err;
+
+	/* Enable INTx interrupts */
+	rcar_rmw32(pcie, PCIEINTXR, 0, 0xF << 8);
+
+	wmb();
+
+	return 0;
+}
+
+static int rcar_pcie_phy_init_h1(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+
+	/* Initialize the phy */
+	phy_write_reg(pcie, 0, 0x42, 0x1, 0x0EC34191);
+	phy_write_reg(pcie, 1, 0x42, 0x1, 0x0EC34180);
+	phy_write_reg(pcie, 0, 0x43, 0x1, 0x00210188);
+	phy_write_reg(pcie, 1, 0x43, 0x1, 0x00210188);
+	phy_write_reg(pcie, 0, 0x44, 0x1, 0x015C0014);
+	phy_write_reg(pcie, 1, 0x44, 0x1, 0x015C0014);
+	phy_write_reg(pcie, 1, 0x4C, 0x1, 0x786174A0);
+	phy_write_reg(pcie, 1, 0x4D, 0x1, 0x048000BB);
+	phy_write_reg(pcie, 0, 0x51, 0x1, 0x079EC062);
+	phy_write_reg(pcie, 0, 0x52, 0x1, 0x20000000);
+	phy_write_reg(pcie, 1, 0x52, 0x1, 0x20000000);
+	phy_write_reg(pcie, 1, 0x56, 0x1, 0x00003806);
+
+	phy_write_reg(pcie, 0, 0x60, 0x1, 0x004B03A5);
+	phy_write_reg(pcie, 0, 0x64, 0x1, 0x3F0F1F0F);
+	phy_write_reg(pcie, 0, 0x66, 0x1, 0x00008000);
+
+	return 0;
+}
+
+static int rcar_pcie_phy_init_gen2(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+
+	/*
+	 * These settings come from the R-Car Series, 2nd Generation User's
+	 * Manual, section 50.3.1 (2) Initialization of the physical layer.
+	 */
+	rcar_pci_write_reg(pcie, 0x000f0030, GEN2_PCIEPHYADDR);
+	rcar_pci_write_reg(pcie, 0x00381203, GEN2_PCIEPHYDATA);
+	rcar_pci_write_reg(pcie, 0x00000001, GEN2_PCIEPHYCTRL);
+	rcar_pci_write_reg(pcie, 0x00000006, GEN2_PCIEPHYCTRL);
+
+	rcar_pci_write_reg(pcie, 0x000f0054, GEN2_PCIEPHYADDR);
+	/* The following value is for DC connection, no termination resistor */
+	rcar_pci_write_reg(pcie, 0x13802007, GEN2_PCIEPHYDATA);
+	rcar_pci_write_reg(pcie, 0x00000001, GEN2_PCIEPHYCTRL);
+	rcar_pci_write_reg(pcie, 0x00000006, GEN2_PCIEPHYCTRL);
+
+	return 0;
+}
+
+static int rcar_pcie_phy_init_gen3(struct rcar_pcie_host *host)
+{
+	int err;
+
+	err = phy_init(host->phy);
+	if (err)
+		return err;
+
+	err = phy_power_on(host->phy);
+	if (err)
+		phy_exit(host->phy);
+
+	return err;
+}
+
+static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
+{
+	struct rcar_pcie_host *host = data;
+	struct rcar_pcie *pcie = &host->pcie;
+	struct rcar_msi *msi = &host->msi;
+	struct device *dev = pcie->dev;
+	unsigned long reg;
+
+	reg = rcar_pci_read_reg(pcie, PCIEMSIFR);
+
+	/* MSI & INTx share an interrupt - we only handle MSI here */
+	if (!reg)
+		return IRQ_NONE;
+
+	while (reg) {
+		unsigned int index = find_first_bit(&reg, 32);
+		int ret;
+
+		ret = generic_handle_domain_irq(msi->domain->parent, index);
+		if (ret) {
+			/* Unknown MSI, just clear it */
+			dev_dbg(dev, "unexpected MSI\n");
+			rcar_pci_write_reg(pcie, BIT(index), PCIEMSIFR);
+		}
+
+		/* see if there's any more pending in this vector */
+		reg = rcar_pci_read_reg(pcie, PCIEMSIFR);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void rcar_msi_top_irq_ack(struct irq_data *d)
+{
+	irq_chip_ack_parent(d);
+}
+
+static void rcar_msi_top_irq_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void rcar_msi_top_irq_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip rcar_msi_top_chip = {
+	.name		= "PCIe MSI",
+	.irq_ack	= rcar_msi_top_irq_ack,
+	.irq_mask	= rcar_msi_top_irq_mask,
+	.irq_unmask	= rcar_msi_top_irq_unmask,
+};
+
+static void rcar_msi_irq_ack(struct irq_data *d)
+{
+	struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+	struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+
+	/* clear the interrupt */
+	rcar_pci_write_reg(pcie, BIT(d->hwirq), PCIEMSIFR);
+}
+
+static void rcar_msi_irq_mask(struct irq_data *d)
+{
+	struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+	struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+	unsigned long flags;
+	u32 value;
+
+	spin_lock_irqsave(&msi->mask_lock, flags);
+	value = rcar_pci_read_reg(pcie, PCIEMSIIER);
+	value &= ~BIT(d->hwirq);
+	rcar_pci_write_reg(pcie, value, PCIEMSIIER);
+	spin_unlock_irqrestore(&msi->mask_lock, flags);
+}
+
+static void rcar_msi_irq_unmask(struct irq_data *d)
+{
+	struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+	struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+	unsigned long flags;
+	u32 value;
+
+	spin_lock_irqsave(&msi->mask_lock, flags);
+	value = rcar_pci_read_reg(pcie, PCIEMSIIER);
+	value |= BIT(d->hwirq);
+	rcar_pci_write_reg(pcie, value, PCIEMSIIER);
+	spin_unlock_irqrestore(&msi->mask_lock, flags);
+}
+
+static int rcar_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void rcar_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct rcar_msi *msi = irq_data_get_irq_chip_data(data);
+	struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+
+	msg->address_lo = rcar_pci_read_reg(pcie, PCIEMSIALR) & ~MSIFE;
+	msg->address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip rcar_msi_bottom_chip = {
+	.name			= "R-Car MSI",
+	.irq_ack		= rcar_msi_irq_ack,
+	.irq_mask		= rcar_msi_irq_mask,
+	.irq_unmask		= rcar_msi_irq_unmask,
+	.irq_set_affinity 	= rcar_msi_set_affinity,
+	.irq_compose_msi_msg	= rcar_compose_msi_msg,
+};
+
+static int rcar_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct rcar_msi *msi = domain->host_data;
+	unsigned int i;
+	int hwirq;
+
+	mutex_lock(&msi->map_lock);
+
+	hwirq = bitmap_find_free_region(msi->used, INT_PCI_MSI_NR, order_base_2(nr_irqs));
+
+	mutex_unlock(&msi->map_lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &rcar_msi_bottom_chip, domain->host_data,
+				    handle_edge_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void rcar_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct rcar_msi *msi = domain->host_data;
+
+	mutex_lock(&msi->map_lock);
+
+	bitmap_release_region(msi->used, d->hwirq, order_base_2(nr_irqs));
+
+	mutex_unlock(&msi->map_lock);
+}
+
+static const struct irq_domain_ops rcar_msi_domain_ops = {
+	.alloc	= rcar_msi_domain_alloc,
+	.free	= rcar_msi_domain_free,
+};
+
+static struct msi_domain_info rcar_msi_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &rcar_msi_top_chip,
+};
+
+static int rcar_allocate_domains(struct rcar_msi *msi)
+{
+	struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+	struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+	struct irq_domain *parent;
+
+	parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR,
+					  &rcar_msi_domain_ops, msi);
+	if (!parent) {
+		dev_err(pcie->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+	irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+	msi->domain = pci_msi_create_irq_domain(fwnode, &rcar_msi_info, parent);
+	if (!msi->domain) {
+		dev_err(pcie->dev, "failed to create MSI domain\n");
+		irq_domain_remove(parent);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void rcar_free_domains(struct rcar_msi *msi)
+{
+	struct irq_domain *parent = msi->domain->parent;
+
+	irq_domain_remove(msi->domain);
+	irq_domain_remove(parent);
+}
+
+static int rcar_pcie_enable_msi(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+	struct device *dev = pcie->dev;
+	struct rcar_msi *msi = &host->msi;
+	struct resource res;
+	int err;
+
+	mutex_init(&msi->map_lock);
+	spin_lock_init(&msi->mask_lock);
+
+	err = of_address_to_resource(dev->of_node, 0, &res);
+	if (err)
+		return err;
+
+	err = rcar_allocate_domains(msi);
+	if (err)
+		return err;
+
+	/* Two irqs are for MSI, but they are also used for non-MSI irqs */
+	err = devm_request_irq(dev, msi->irq1, rcar_pcie_msi_irq,
+			       IRQF_SHARED | IRQF_NO_THREAD,
+			       rcar_msi_bottom_chip.name, host);
+	if (err < 0) {
+		dev_err(dev, "failed to request IRQ: %d\n", err);
+		goto err;
+	}
+
+	err = devm_request_irq(dev, msi->irq2, rcar_pcie_msi_irq,
+			       IRQF_SHARED | IRQF_NO_THREAD,
+			       rcar_msi_bottom_chip.name, host);
+	if (err < 0) {
+		dev_err(dev, "failed to request IRQ: %d\n", err);
+		goto err;
+	}
+
+	/* disable all MSIs */
+	rcar_pci_write_reg(pcie, 0, PCIEMSIIER);
+
+	/*
+	 * Setup MSI data target using RC base address address, which
+	 * is guaranteed to be in the low 32bit range on any R-Car HW.
+	 */
+	rcar_pci_write_reg(pcie, lower_32_bits(res.start) | MSIFE, PCIEMSIALR);
+	rcar_pci_write_reg(pcie, upper_32_bits(res.start), PCIEMSIAUR);
+
+	return 0;
+
+err:
+	rcar_free_domains(msi);
+	return err;
+}
+
+static void rcar_pcie_teardown_msi(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+
+	/* Disable all MSI interrupts */
+	rcar_pci_write_reg(pcie, 0, PCIEMSIIER);
+
+	/* Disable address decoding of the MSI interrupt, MSIFE */
+	rcar_pci_write_reg(pcie, 0, PCIEMSIALR);
+
+	rcar_free_domains(&host->msi);
+}
+
+static int rcar_pcie_get_resources(struct rcar_pcie_host *host)
+{
+	struct rcar_pcie *pcie = &host->pcie;
+	struct device *dev = pcie->dev;
+	struct resource res;
+	int err, i;
+
+	host->phy = devm_phy_optional_get(dev, "pcie");
+	if (IS_ERR(host->phy))
+		return PTR_ERR(host->phy);
+
+	err = of_address_to_resource(dev->of_node, 0, &res);
+	if (err)
+		return err;
+
+	pcie->base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(pcie->base))
+		return PTR_ERR(pcie->base);
+
+	host->bus_clk = devm_clk_get(dev, "pcie_bus");
+	if (IS_ERR(host->bus_clk)) {
+		dev_err(dev, "cannot get pcie bus clock\n");
+		return PTR_ERR(host->bus_clk);
+	}
+
+	i = irq_of_parse_and_map(dev->of_node, 0);
+	if (!i) {
+		dev_err(dev, "cannot get platform resources for msi interrupt\n");
+		err = -ENOENT;
+		goto err_irq1;
+	}
+	host->msi.irq1 = i;
+
+	i = irq_of_parse_and_map(dev->of_node, 1);
+	if (!i) {
+		dev_err(dev, "cannot get platform resources for msi interrupt\n");
+		err = -ENOENT;
+		goto err_irq2;
+	}
+	host->msi.irq2 = i;
+
+	return 0;
+
+err_irq2:
+	irq_dispose_mapping(host->msi.irq1);
+err_irq1:
+	return err;
+}
+
+static int rcar_pcie_inbound_ranges(struct rcar_pcie *pcie,
+				    struct resource_entry *entry,
+				    int *index)
+{
+	u64 restype = entry->res->flags;
+	u64 cpu_addr = entry->res->start;
+	u64 cpu_end = entry->res->end;
+	u64 pci_addr = entry->res->start - entry->offset;
+	u32 flags = LAM_64BIT | LAR_ENABLE;
+	u64 mask;
+	u64 size = resource_size(entry->res);
+	int idx = *index;
+
+	if (restype & IORESOURCE_PREFETCH)
+		flags |= LAM_PREFETCH;
+
+	while (cpu_addr < cpu_end) {
+		if (idx >= MAX_NR_INBOUND_MAPS - 1) {
+			dev_err(pcie->dev, "Failed to map inbound regions!\n");
+			return -EINVAL;
+		}
+		/*
+		 * If the size of the range is larger than the alignment of
+		 * the start address, we have to use multiple entries to
+		 * perform the mapping.
+		 */
+		if (cpu_addr > 0) {
+			unsigned long nr_zeros = __ffs64(cpu_addr);
+			u64 alignment = 1ULL << nr_zeros;
+
+			size = min(size, alignment);
+		}
+		/* Hardware supports max 4GiB inbound region */
+		size = min(size, 1ULL << 32);
+
+		mask = roundup_pow_of_two(size) - 1;
+		mask &= ~0xf;
+
+		rcar_pcie_set_inbound(pcie, cpu_addr, pci_addr,
+				      lower_32_bits(mask) | flags, idx, true);
+
+		pci_addr += size;
+		cpu_addr += size;
+		idx += 2;
+	}
+	*index = idx;
+
+	return 0;
+}
+
+static int rcar_pcie_parse_map_dma_ranges(struct rcar_pcie_host *host)
+{
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(host);
+	struct resource_entry *entry;
+	int index = 0, err = 0;
+
+	resource_list_for_each_entry(entry, &bridge->dma_ranges) {
+		err = rcar_pcie_inbound_ranges(&host->pcie, entry, &index);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static const struct of_device_id rcar_pcie_of_match[] = {
+	{ .compatible = "renesas,pcie-r8a7779",
+	  .data = rcar_pcie_phy_init_h1 },
+	{ .compatible = "renesas,pcie-r8a7790",
+	  .data = rcar_pcie_phy_init_gen2 },
+	{ .compatible = "renesas,pcie-r8a7791",
+	  .data = rcar_pcie_phy_init_gen2 },
+	{ .compatible = "renesas,pcie-rcar-gen2",
+	  .data = rcar_pcie_phy_init_gen2 },
+	{ .compatible = "renesas,pcie-r8a7795",
+	  .data = rcar_pcie_phy_init_gen3 },
+	{ .compatible = "renesas,pcie-rcar-gen3",
+	  .data = rcar_pcie_phy_init_gen3 },
+	{},
+};
+
+static int rcar_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rcar_pcie_host *host;
+	struct rcar_pcie *pcie;
+	u32 data;
+	int err;
+	struct pci_host_bridge *bridge;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*host));
+	if (!bridge)
+		return -ENOMEM;
+
+	host = pci_host_bridge_priv(bridge);
+	pcie = &host->pcie;
+	pcie->dev = dev;
+	platform_set_drvdata(pdev, host);
+
+	pm_runtime_enable(pcie->dev);
+	err = pm_runtime_get_sync(pcie->dev);
+	if (err < 0) {
+		dev_err(pcie->dev, "pm_runtime_get_sync failed\n");
+		goto err_pm_put;
+	}
+
+	err = rcar_pcie_get_resources(host);
+	if (err < 0) {
+		dev_err(dev, "failed to request resources: %d\n", err);
+		goto err_pm_put;
+	}
+
+	err = clk_prepare_enable(host->bus_clk);
+	if (err) {
+		dev_err(dev, "failed to enable bus clock: %d\n", err);
+		goto err_unmap_msi_irqs;
+	}
+
+	err = rcar_pcie_parse_map_dma_ranges(host);
+	if (err)
+		goto err_clk_disable;
+
+	host->phy_init_fn = of_device_get_match_data(dev);
+	err = host->phy_init_fn(host);
+	if (err) {
+		dev_err(dev, "failed to init PCIe PHY\n");
+		goto err_clk_disable;
+	}
+
+	/* Failure to get a link might just be that no cards are inserted */
+	if (rcar_pcie_hw_init(pcie)) {
+		dev_info(dev, "PCIe link down\n");
+		err = -ENODEV;
+		goto err_phy_shutdown;
+	}
+
+	data = rcar_pci_read_reg(pcie, MACSR);
+	dev_info(dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		err = rcar_pcie_enable_msi(host);
+		if (err < 0) {
+			dev_err(dev,
+				"failed to enable MSI support: %d\n",
+				err);
+			goto err_phy_shutdown;
+		}
+	}
+
+	err = rcar_pcie_enable(host);
+	if (err)
+		goto err_msi_teardown;
+
+	return 0;
+
+err_msi_teardown:
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		rcar_pcie_teardown_msi(host);
+
+err_phy_shutdown:
+	if (host->phy) {
+		phy_power_off(host->phy);
+		phy_exit(host->phy);
+	}
+
+err_clk_disable:
+	clk_disable_unprepare(host->bus_clk);
+
+err_unmap_msi_irqs:
+	irq_dispose_mapping(host->msi.irq2);
+	irq_dispose_mapping(host->msi.irq1);
+
+err_pm_put:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+	return err;
+}
+
+static int rcar_pcie_resume(struct device *dev)
+{
+	struct rcar_pcie_host *host = dev_get_drvdata(dev);
+	struct rcar_pcie *pcie = &host->pcie;
+	unsigned int data;
+	int err;
+
+	err = rcar_pcie_parse_map_dma_ranges(host);
+	if (err)
+		return 0;
+
+	/* Failure to get a link might just be that no cards are inserted */
+	err = host->phy_init_fn(host);
+	if (err) {
+		dev_info(dev, "PCIe link down\n");
+		return 0;
+	}
+
+	data = rcar_pci_read_reg(pcie, MACSR);
+	dev_info(dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
+
+	/* Enable MSI */
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		struct resource res;
+		u32 val;
+
+		of_address_to_resource(dev->of_node, 0, &res);
+		rcar_pci_write_reg(pcie, upper_32_bits(res.start), PCIEMSIAUR);
+		rcar_pci_write_reg(pcie, lower_32_bits(res.start) | MSIFE, PCIEMSIALR);
+
+		bitmap_to_arr32(&val, host->msi.used, INT_PCI_MSI_NR);
+		rcar_pci_write_reg(pcie, val, PCIEMSIIER);
+	}
+
+	rcar_pcie_hw_enable(host);
+
+	return 0;
+}
+
+static int rcar_pcie_resume_noirq(struct device *dev)
+{
+	struct rcar_pcie_host *host = dev_get_drvdata(dev);
+	struct rcar_pcie *pcie = &host->pcie;
+
+	if (rcar_pci_read_reg(pcie, PMSR) &&
+	    !(rcar_pci_read_reg(pcie, PCIETCTLR) & DL_DOWN))
+		return 0;
+
+	/* Re-establish the PCIe link */
+	rcar_pci_write_reg(pcie, MACCTLR_INIT_VAL, MACCTLR);
+	rcar_pci_write_reg(pcie, CFINIT, PCIETCTLR);
+	return rcar_pcie_wait_for_dl(pcie);
+}
+
+static const struct dev_pm_ops rcar_pcie_pm_ops = {
+	SYSTEM_SLEEP_PM_OPS(NULL, rcar_pcie_resume)
+	.resume_noirq = rcar_pcie_resume_noirq,
+};
+
+static struct platform_driver rcar_pcie_driver = {
+	.driver = {
+		.name = "rcar-pcie",
+		.of_match_table = rcar_pcie_of_match,
+		.pm = &rcar_pcie_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe = rcar_pcie_probe,
+};
+
+#ifdef CONFIG_ARM
+static int rcar_pcie_aarch32_abort_handler(unsigned long addr,
+		unsigned int fsr, struct pt_regs *regs)
+{
+	return !fixup_exception(regs);
+}
+
+static const struct of_device_id rcar_pcie_abort_handler_of_match[] __initconst = {
+	{ .compatible = "renesas,pcie-r8a7779" },
+	{ .compatible = "renesas,pcie-r8a7790" },
+	{ .compatible = "renesas,pcie-r8a7791" },
+	{ .compatible = "renesas,pcie-rcar-gen2" },
+	{},
+};
+
+static int __init rcar_pcie_init(void)
+{
+	if (of_find_matching_node(NULL, rcar_pcie_abort_handler_of_match)) {
+#ifdef CONFIG_ARM_LPAE
+		hook_fault_code(17, rcar_pcie_aarch32_abort_handler, SIGBUS, 0,
+				"asynchronous external abort");
+#else
+		hook_fault_code(22, rcar_pcie_aarch32_abort_handler, SIGBUS, 0,
+				"imprecise external abort");
+#endif
+	}
+
+	return platform_driver_register(&rcar_pcie_driver);
+}
+device_initcall(rcar_pcie_init);
+#else
+builtin_platform_driver(rcar_pcie_driver);
+#endif
diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c
new file mode 100644
index 000000000..7583699ef
--- /dev/null
+++ b/drivers/pci/controller/pcie-rcar.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe driver for Renesas R-Car SoCs
+ *  Copyright (C) 2014-2020 Renesas Electronics Europe Ltd
+ *
+ * Author: Phil Edworthy <phil.edworthy@renesas.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include "pcie-rcar.h"
+
+void rcar_pci_write_reg(struct rcar_pcie *pcie, u32 val, unsigned int reg)
+{
+	writel(val, pcie->base + reg);
+}
+
+u32 rcar_pci_read_reg(struct rcar_pcie *pcie, unsigned int reg)
+{
+	return readl(pcie->base + reg);
+}
+
+void rcar_rmw32(struct rcar_pcie *pcie, int where, u32 mask, u32 data)
+{
+	unsigned int shift = BITS_PER_BYTE * (where & 3);
+	u32 val = rcar_pci_read_reg(pcie, where & ~3);
+
+	val &= ~(mask << shift);
+	val |= data << shift;
+	rcar_pci_write_reg(pcie, val, where & ~3);
+}
+
+int rcar_pcie_wait_for_phyrdy(struct rcar_pcie *pcie)
+{
+	unsigned int timeout = 10;
+
+	while (timeout--) {
+		if (rcar_pci_read_reg(pcie, PCIEPHYSR) & PHYRDY)
+			return 0;
+
+		msleep(5);
+	}
+
+	return -ETIMEDOUT;
+}
+
+int rcar_pcie_wait_for_dl(struct rcar_pcie *pcie)
+{
+	unsigned int timeout = 10000;
+
+	while (timeout--) {
+		if ((rcar_pci_read_reg(pcie, PCIETSTR) & DATA_LINK_ACTIVE))
+			return 0;
+
+		udelay(5);
+		cpu_relax();
+	}
+
+	return -ETIMEDOUT;
+}
+
+void rcar_pcie_set_outbound(struct rcar_pcie *pcie, int win,
+			    struct resource_entry *window)
+{
+	/* Setup PCIe address space mappings for each resource */
+	struct resource *res = window->res;
+	resource_size_t res_start;
+	resource_size_t size;
+	u32 mask;
+
+	rcar_pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
+
+	/*
+	 * The PAMR mask is calculated in units of 128Bytes, which
+	 * keeps things pretty simple.
+	 */
+	size = resource_size(res);
+	if (size > 128)
+		mask = (roundup_pow_of_two(size) / SZ_128) - 1;
+	else
+		mask = 0x0;
+	rcar_pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
+
+	if (res->flags & IORESOURCE_IO)
+		res_start = pci_pio_to_address(res->start) - window->offset;
+	else
+		res_start = res->start - window->offset;
+
+	rcar_pci_write_reg(pcie, upper_32_bits(res_start), PCIEPAUR(win));
+	rcar_pci_write_reg(pcie, lower_32_bits(res_start) & ~0x7F,
+			   PCIEPALR(win));
+
+	/* First resource is for IO */
+	mask = PAR_ENABLE;
+	if (res->flags & IORESOURCE_IO)
+		mask |= IO_SPACE;
+
+	rcar_pci_write_reg(pcie, mask, PCIEPTCTLR(win));
+}
+
+void rcar_pcie_set_inbound(struct rcar_pcie *pcie, u64 cpu_addr,
+			   u64 pci_addr, u64 flags, int idx, bool host)
+{
+	/*
+	 * Set up 64-bit inbound regions as the range parser doesn't
+	 * distinguish between 32 and 64-bit types.
+	 */
+	if (host)
+		rcar_pci_write_reg(pcie, lower_32_bits(pci_addr),
+				   PCIEPRAR(idx));
+	rcar_pci_write_reg(pcie, lower_32_bits(cpu_addr), PCIELAR(idx));
+	rcar_pci_write_reg(pcie, flags, PCIELAMR(idx));
+
+	if (host)
+		rcar_pci_write_reg(pcie, upper_32_bits(pci_addr),
+				   PCIEPRAR(idx + 1));
+	rcar_pci_write_reg(pcie, upper_32_bits(cpu_addr), PCIELAR(idx + 1));
+	rcar_pci_write_reg(pcie, 0, PCIELAMR(idx + 1));
+}
diff --git a/drivers/pci/controller/pcie-rcar.h b/drivers/pci/controller/pcie-rcar.h
new file mode 100644
index 000000000..6799f8139
--- /dev/null
+++ b/drivers/pci/controller/pcie-rcar.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCIe driver for Renesas R-Car SoCs
+ *  Copyright (C) 2014-2020 Renesas Electronics Europe Ltd
+ *
+ * Author: Phil Edworthy <phil.edworthy@renesas.com>
+ */
+
+#ifndef _PCIE_RCAR_H
+#define _PCIE_RCAR_H
+
+#define PCIECAR			0x000010
+#define PCIECCTLR		0x000018
+#define  PCIECCTLR_CCIE		BIT(31)
+#define  TYPE0			(0 << 8)
+#define  TYPE1			BIT(8)
+#define PCIECDR			0x000020
+#define PCIEMSR			0x000028
+#define PCIEINTXR		0x000400
+#define  ASTINTX		BIT(16)
+#define PCIEPHYSR		0x0007f0
+#define  PHYRDY			BIT(0)
+#define PCIEMSITXR		0x000840
+
+/* Transfer control */
+#define PCIETCTLR		0x02000
+#define  DL_DOWN		BIT(3)
+#define  CFINIT			BIT(0)
+#define PCIETSTR		0x02004
+#define  DATA_LINK_ACTIVE	BIT(0)
+#define PCIEERRFR		0x02020
+#define  UNSUPPORTED_REQUEST	BIT(4)
+#define PCIEMSIFR		0x02044
+#define PCIEMSIALR		0x02048
+#define  MSIFE			BIT(0)
+#define PCIEMSIAUR		0x0204c
+#define PCIEMSIIER		0x02050
+
+/* root port address */
+#define PCIEPRAR(x)		(0x02080 + ((x) * 0x4))
+
+/* local address reg & mask */
+#define PCIELAR(x)		(0x02200 + ((x) * 0x20))
+#define PCIELAMR(x)		(0x02208 + ((x) * 0x20))
+#define  LAM_PREFETCH		BIT(3)
+#define  LAM_64BIT		BIT(2)
+#define  LAR_ENABLE		BIT(1)
+
+/* PCIe address reg & mask */
+#define PCIEPALR(x)		(0x03400 + ((x) * 0x20))
+#define PCIEPAUR(x)		(0x03404 + ((x) * 0x20))
+#define PCIEPAMR(x)		(0x03408 + ((x) * 0x20))
+#define PCIEPTCTLR(x)		(0x0340c + ((x) * 0x20))
+#define  PAR_ENABLE		BIT(31)
+#define  IO_SPACE		BIT(8)
+
+/* Configuration */
+#define PCICONF(x)		(0x010000 + ((x) * 0x4))
+#define  INTDIS			BIT(10)
+#define PMCAP(x)		(0x010040 + ((x) * 0x4))
+#define MSICAP(x)		(0x010050 + ((x) * 0x4))
+#define  MSICAP0_MSIE		BIT(16)
+#define  MSICAP0_MMESCAP_OFFSET	17
+#define  MSICAP0_MMESE_OFFSET	20
+#define  MSICAP0_MMESE_MASK	GENMASK(22, 20)
+#define EXPCAP(x)		(0x010070 + ((x) * 0x4))
+#define VCCAP(x)		(0x010100 + ((x) * 0x4))
+
+/* link layer */
+#define IDSETR0			0x011000
+#define IDSETR1			0x011004
+#define SUBIDSETR		0x011024
+#define TLCTLR			0x011048
+#define MACSR			0x011054
+#define  SPCHGFIN		BIT(4)
+#define  SPCHGFAIL		BIT(6)
+#define  SPCHGSUC		BIT(7)
+#define  LINK_SPEED		(0xf << 16)
+#define  LINK_SPEED_2_5GTS	(1 << 16)
+#define  LINK_SPEED_5_0GTS	(2 << 16)
+#define MACCTLR			0x011058
+#define  MACCTLR_NFTS_MASK	GENMASK(23, 16)	/* The name is from SH7786 */
+#define  SPEED_CHANGE		BIT(24)
+#define  SCRAMBLE_DISABLE	BIT(27)
+#define  LTSMDIS		BIT(31)
+#define  MACCTLR_INIT_VAL	(LTSMDIS | MACCTLR_NFTS_MASK)
+#define PMSR			0x01105c
+#define  L1FAEG			BIT(31)
+#define  PMEL1RX		BIT(23)
+#define  PMSTATE		GENMASK(18, 16)
+#define  PMSTATE_L1		(3 << 16)
+#define PMCTLR			0x011060
+#define  L1IATN			BIT(31)
+
+#define MACS2R			0x011078
+#define MACCGSPSETR		0x011084
+#define  SPCNGRSN		BIT(31)
+
+/* R-Car H1 PHY */
+#define H1_PCIEPHYADRR		0x04000c
+#define  WRITE_CMD		BIT(16)
+#define  PHY_ACK		BIT(24)
+#define  RATE_POS		12
+#define  LANE_POS		8
+#define  ADR_POS		0
+#define H1_PCIEPHYDOUTR		0x040014
+
+/* R-Car Gen2 PHY */
+#define GEN2_PCIEPHYADDR	0x780
+#define GEN2_PCIEPHYDATA	0x784
+#define GEN2_PCIEPHYCTRL	0x78c
+
+#define INT_PCI_MSI_NR		32
+
+#define RCONF(x)		(PCICONF(0) + (x))
+#define RPMCAP(x)		(PMCAP(0) + (x))
+#define REXPCAP(x)		(EXPCAP(0) + (x))
+#define RVCCAP(x)		(VCCAP(0) + (x))
+
+#define PCIE_CONF_BUS(b)	(((b) & 0xff) << 24)
+#define PCIE_CONF_DEV(d)	(((d) & 0x1f) << 19)
+#define PCIE_CONF_FUNC(f)	(((f) & 0x7) << 16)
+
+#define RCAR_PCI_MAX_RESOURCES	4
+#define MAX_NR_INBOUND_MAPS	6
+
+struct rcar_pcie {
+	struct device		*dev;
+	void __iomem		*base;
+};
+
+enum {
+	RCAR_PCI_ACCESS_READ,
+	RCAR_PCI_ACCESS_WRITE,
+};
+
+void rcar_pci_write_reg(struct rcar_pcie *pcie, u32 val, unsigned int reg);
+u32 rcar_pci_read_reg(struct rcar_pcie *pcie, unsigned int reg);
+void rcar_rmw32(struct rcar_pcie *pcie, int where, u32 mask, u32 data);
+int rcar_pcie_wait_for_phyrdy(struct rcar_pcie *pcie);
+int rcar_pcie_wait_for_dl(struct rcar_pcie *pcie);
+void rcar_pcie_set_outbound(struct rcar_pcie *pcie, int win,
+			    struct resource_entry *window);
+void rcar_pcie_set_inbound(struct rcar_pcie *pcie, u64 cpu_addr,
+			   u64 pci_addr, u64 flags, int idx, bool host);
+
+#endif
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
new file mode 100644
index 000000000..0af0e965f
--- /dev/null
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -0,0 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Rockchip AXI PCIe endpoint controller driver
+ *
+ * Copyright (c) 2018 Rockchip, Inc.
+ *
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *         Simon Xue <xxm@rock-chips.com>
+ */
+
+#include <linux/configfs.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/pci-epc.h>
+#include <linux/platform_device.h>
+#include <linux/pci-epf.h>
+#include <linux/sizes.h>
+
+#include "pcie-rockchip.h"
+
+/**
+ * struct rockchip_pcie_ep - private data for PCIe endpoint controller driver
+ * @rockchip: Rockchip PCIe controller
+ * @epc: PCI EPC device
+ * @max_regions: maximum number of regions supported by hardware
+ * @ob_region_map: bitmask of mapped outbound regions
+ * @ob_addr: base addresses in the AXI bus where the outbound regions start
+ * @irq_phys_addr: base address on the AXI bus where the MSI/legacy IRQ
+ *		   dedicated outbound regions is mapped.
+ * @irq_cpu_addr: base address in the CPU space where a write access triggers
+ *		  the sending of a memory write (MSI) / normal message (legacy
+ *		  IRQ) TLP through the PCIe bus.
+ * @irq_pci_addr: used to save the current mapping of the MSI/legacy IRQ
+ *		  dedicated outbound region.
+ * @irq_pci_fn: the latest PCI function that has updated the mapping of
+ *		the MSI/legacy IRQ dedicated outbound region.
+ * @irq_pending: bitmask of asserted legacy IRQs.
+ */
+struct rockchip_pcie_ep {
+	struct rockchip_pcie	rockchip;
+	struct pci_epc		*epc;
+	u32			max_regions;
+	unsigned long		ob_region_map;
+	phys_addr_t		*ob_addr;
+	phys_addr_t		irq_phys_addr;
+	void __iomem		*irq_cpu_addr;
+	u64			irq_pci_addr;
+	u8			irq_pci_fn;
+	u8			irq_pending;
+};
+
+static void rockchip_pcie_clear_ep_ob_atu(struct rockchip_pcie *rockchip,
+					  u32 region)
+{
+	rockchip_pcie_write(rockchip, 0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(region));
+	rockchip_pcie_write(rockchip, 0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(region));
+	rockchip_pcie_write(rockchip, 0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_DESC0(region));
+	rockchip_pcie_write(rockchip, 0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_DESC1(region));
+}
+
+static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn,
+					 u32 r, u64 cpu_addr, u64 pci_addr,
+					 size_t size)
+{
+	int num_pass_bits = fls64(size - 1);
+	u32 addr0, addr1, desc0;
+
+	if (num_pass_bits < 8)
+		num_pass_bits = 8;
+
+	addr0 = ((num_pass_bits - 1) & PCIE_CORE_OB_REGION_ADDR0_NUM_BITS) |
+		(lower_32_bits(pci_addr) & PCIE_CORE_OB_REGION_ADDR0_LO_ADDR);
+	addr1 = upper_32_bits(pci_addr);
+	desc0 = ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN(fn) | AXI_WRAPPER_MEM_WRITE;
+
+	/* PCI bus address region */
+	rockchip_pcie_write(rockchip, addr0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r));
+	rockchip_pcie_write(rockchip, addr1,
+			    ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r));
+	rockchip_pcie_write(rockchip, desc0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r));
+	rockchip_pcie_write(rockchip, 0,
+			    ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r));
+}
+
+static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
+					 struct pci_epf_header *hdr)
+{
+	u32 reg;
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+
+	/* All functions share the same vendor ID with function 0 */
+	if (fn == 0) {
+		u32 vid_regs = (hdr->vendorid & GENMASK(15, 0)) |
+			       (hdr->subsys_vendor_id & GENMASK(31, 16)) << 16;
+
+		rockchip_pcie_write(rockchip, vid_regs,
+				    PCIE_CORE_CONFIG_VENDOR);
+	}
+
+	reg = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_DID_VID);
+	reg = (reg & 0xFFFF) | (hdr->deviceid << 16);
+	rockchip_pcie_write(rockchip, reg, PCIE_EP_CONFIG_DID_VID);
+
+	rockchip_pcie_write(rockchip,
+			    hdr->revid |
+			    hdr->progif_code << 8 |
+			    hdr->subclass_code << 16 |
+			    hdr->baseclass_code << 24,
+			    ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + PCI_REVISION_ID);
+	rockchip_pcie_write(rockchip, hdr->cache_line_size,
+			    ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+			    PCI_CACHE_LINE_SIZE);
+	rockchip_pcie_write(rockchip, hdr->subsys_id << 16,
+			    ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+			    PCI_SUBSYSTEM_VENDOR_ID);
+	rockchip_pcie_write(rockchip, hdr->interrupt_pin << 8,
+			    ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+			    PCI_INTERRUPT_LINE);
+
+	return 0;
+}
+
+static int rockchip_pcie_ep_set_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+				    struct pci_epf_bar *epf_bar)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	dma_addr_t bar_phys = epf_bar->phys_addr;
+	enum pci_barno bar = epf_bar->barno;
+	int flags = epf_bar->flags;
+	u32 addr0, addr1, reg, cfg, b, aperture, ctrl;
+	u64 sz;
+
+	/* BAR size is 2^(aperture + 7) */
+	sz = max_t(size_t, epf_bar->size, MIN_EP_APERTURE);
+
+	/*
+	 * roundup_pow_of_two() returns an unsigned long, which is not suited
+	 * for 64bit values.
+	 */
+	sz = 1ULL << fls64(sz - 1);
+	aperture = ilog2(sz) - 7; /* 128B -> 0, 256B -> 1, 512B -> 2, ... */
+
+	if ((flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
+		ctrl = ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_IO_32BITS;
+	} else {
+		bool is_prefetch = !!(flags & PCI_BASE_ADDRESS_MEM_PREFETCH);
+		bool is_64bits = sz > SZ_2G;
+
+		if (is_64bits && (bar & 1))
+			return -EINVAL;
+
+		if (is_64bits && is_prefetch)
+			ctrl =
+			    ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_PREFETCH_MEM_64BITS;
+		else if (is_prefetch)
+			ctrl =
+			    ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_PREFETCH_MEM_32BITS;
+		else if (is_64bits)
+			ctrl = ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_MEM_64BITS;
+		else
+			ctrl = ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_MEM_32BITS;
+	}
+
+	if (bar < BAR_4) {
+		reg = ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG0(fn);
+		b = bar;
+	} else {
+		reg = ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG1(fn);
+		b = bar - BAR_4;
+	}
+
+	addr0 = lower_32_bits(bar_phys);
+	addr1 = upper_32_bits(bar_phys);
+
+	cfg = rockchip_pcie_read(rockchip, reg);
+	cfg &= ~(ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+		 ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+	cfg |= (ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) |
+		ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl));
+
+	rockchip_pcie_write(rockchip, cfg, reg);
+	rockchip_pcie_write(rockchip, addr0,
+			    ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar));
+	rockchip_pcie_write(rockchip, addr1,
+			    ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar));
+
+	return 0;
+}
+
+static void rockchip_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+				       struct pci_epf_bar *epf_bar)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	u32 reg, cfg, b, ctrl;
+	enum pci_barno bar = epf_bar->barno;
+
+	if (bar < BAR_4) {
+		reg = ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG0(fn);
+		b = bar;
+	} else {
+		reg = ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG1(fn);
+		b = bar - BAR_4;
+	}
+
+	ctrl = ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_DISABLED;
+	cfg = rockchip_pcie_read(rockchip, reg);
+	cfg &= ~(ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+		 ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+	cfg |= ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl);
+
+	rockchip_pcie_write(rockchip, cfg, reg);
+	rockchip_pcie_write(rockchip, 0x0,
+			    ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar));
+	rockchip_pcie_write(rockchip, 0x0,
+			    ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar));
+}
+
+static inline u32 rockchip_ob_region(phys_addr_t addr)
+{
+	return (addr >> ilog2(SZ_1M)) & 0x1f;
+}
+
+static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				     phys_addr_t addr, u64 pci_addr,
+				     size_t size)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *pcie = &ep->rockchip;
+	u32 r = rockchip_ob_region(addr);
+
+	rockchip_pcie_prog_ep_ob_atu(pcie, fn, r, addr, pci_addr, size);
+
+	set_bit(r, &ep->ob_region_map);
+	ep->ob_addr[r] = addr;
+
+	return 0;
+}
+
+static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+					phys_addr_t addr)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	u32 r;
+
+	for (r = 0; r < ep->max_regions; r++)
+		if (ep->ob_addr[r] == addr)
+			break;
+
+	if (r == ep->max_regions)
+		return;
+
+	rockchip_pcie_clear_ep_ob_atu(rockchip, r);
+
+	ep->ob_addr[r] = 0;
+	clear_bit(r, &ep->ob_region_map);
+}
+
+static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn,
+				    u8 multi_msg_cap)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	u32 flags;
+
+	flags = rockchip_pcie_read(rockchip,
+				   ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				   ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+	flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK;
+	flags |=
+	   (multi_msg_cap << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) |
+	   (PCI_MSI_FLAGS_64BIT << ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET);
+	flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP;
+	rockchip_pcie_write(rockchip, flags,
+			    ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+			    ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+	return 0;
+}
+
+static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	u32 flags;
+
+	flags = rockchip_pcie_read(rockchip,
+				   ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				   ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+	if (!(flags & ROCKCHIP_PCIE_EP_MSI_CTRL_ME))
+		return -EINVAL;
+
+	return ((flags & ROCKCHIP_PCIE_EP_MSI_CTRL_MME_MASK) >>
+			ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET);
+}
+
+static void rockchip_pcie_ep_assert_intx(struct rockchip_pcie_ep *ep, u8 fn,
+					 u8 intx, bool do_assert)
+{
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+
+	intx &= 3;
+
+	if (do_assert) {
+		ep->irq_pending |= BIT(intx);
+		rockchip_pcie_write(rockchip,
+				    PCIE_CLIENT_INT_IN_ASSERT |
+				    PCIE_CLIENT_INT_PEND_ST_PEND,
+				    PCIE_CLIENT_LEGACY_INT_CTRL);
+	} else {
+		ep->irq_pending &= ~BIT(intx);
+		rockchip_pcie_write(rockchip,
+				    PCIE_CLIENT_INT_IN_DEASSERT |
+				    PCIE_CLIENT_INT_PEND_ST_NORMAL,
+				    PCIE_CLIENT_LEGACY_INT_CTRL);
+	}
+}
+
+static int rockchip_pcie_ep_send_legacy_irq(struct rockchip_pcie_ep *ep, u8 fn,
+					    u8 intx)
+{
+	u16 cmd;
+
+	cmd = rockchip_pcie_read(&ep->rockchip,
+				 ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				 ROCKCHIP_PCIE_EP_CMD_STATUS);
+
+	if (cmd & PCI_COMMAND_INTX_DISABLE)
+		return -EINVAL;
+
+	/*
+	 * Should add some delay between toggling INTx per TRM vaguely saying
+	 * it depends on some cycles of the AHB bus clock to function it. So
+	 * add sufficient 1ms here.
+	 */
+	rockchip_pcie_ep_assert_intx(ep, fn, intx, true);
+	mdelay(1);
+	rockchip_pcie_ep_assert_intx(ep, fn, intx, false);
+	return 0;
+}
+
+static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
+					 u8 interrupt_num)
+{
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	u32 flags, mme, data, data_mask;
+	u8 msi_count;
+	u64 pci_addr;
+	u32 r;
+
+	/* Check MSI enable bit */
+	flags = rockchip_pcie_read(&ep->rockchip,
+				   ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				   ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+	if (!(flags & ROCKCHIP_PCIE_EP_MSI_CTRL_ME))
+		return -EINVAL;
+
+	/* Get MSI numbers from MME */
+	mme = ((flags & ROCKCHIP_PCIE_EP_MSI_CTRL_MME_MASK) >>
+			ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET);
+	msi_count = 1 << mme;
+	if (!interrupt_num || interrupt_num > msi_count)
+		return -EINVAL;
+
+	/* Set MSI private data */
+	data_mask = msi_count - 1;
+	data = rockchip_pcie_read(rockchip,
+				  ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				  ROCKCHIP_PCIE_EP_MSI_CTRL_REG +
+				  PCI_MSI_DATA_64);
+	data = (data & ~data_mask) | ((interrupt_num - 1) & data_mask);
+
+	/* Get MSI PCI address */
+	pci_addr = rockchip_pcie_read(rockchip,
+				      ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				      ROCKCHIP_PCIE_EP_MSI_CTRL_REG +
+				      PCI_MSI_ADDRESS_HI);
+	pci_addr <<= 32;
+	pci_addr |= rockchip_pcie_read(rockchip,
+				       ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+				       ROCKCHIP_PCIE_EP_MSI_CTRL_REG +
+				       PCI_MSI_ADDRESS_LO);
+
+	/* Set the outbound region if needed. */
+	if (unlikely(ep->irq_pci_addr != (pci_addr & PCIE_ADDR_MASK) ||
+		     ep->irq_pci_fn != fn)) {
+		r = rockchip_ob_region(ep->irq_phys_addr);
+		rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r,
+					     ep->irq_phys_addr,
+					     pci_addr & PCIE_ADDR_MASK,
+					     ~PCIE_ADDR_MASK + 1);
+		ep->irq_pci_addr = (pci_addr & PCIE_ADDR_MASK);
+		ep->irq_pci_fn = fn;
+	}
+
+	writew(data, ep->irq_cpu_addr + (pci_addr & ~PCIE_ADDR_MASK));
+	return 0;
+}
+
+static int rockchip_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
+				      enum pci_epc_irq_type type,
+				      u16 interrupt_num)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return rockchip_pcie_ep_send_legacy_irq(ep, fn, 0);
+	case PCI_EPC_IRQ_MSI:
+		return rockchip_pcie_ep_send_msi_irq(ep, fn, interrupt_num);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int rockchip_pcie_ep_start(struct pci_epc *epc)
+{
+	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+	struct rockchip_pcie *rockchip = &ep->rockchip;
+	struct pci_epf *epf;
+	u32 cfg;
+
+	cfg = BIT(0);
+	list_for_each_entry(epf, &epc->pci_epf, list)
+		cfg |= BIT(epf->func_no);
+
+	rockchip_pcie_write(rockchip, cfg, PCIE_CORE_PHY_FUNC_CFG);
+
+	return 0;
+}
+
+static const struct pci_epc_features rockchip_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = false,
+	.align = 256,
+};
+
+static const struct pci_epc_features*
+rockchip_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	return &rockchip_pcie_epc_features;
+}
+
+static const struct pci_epc_ops rockchip_pcie_epc_ops = {
+	.write_header	= rockchip_pcie_ep_write_header,
+	.set_bar	= rockchip_pcie_ep_set_bar,
+	.clear_bar	= rockchip_pcie_ep_clear_bar,
+	.map_addr	= rockchip_pcie_ep_map_addr,
+	.unmap_addr	= rockchip_pcie_ep_unmap_addr,
+	.set_msi	= rockchip_pcie_ep_set_msi,
+	.get_msi	= rockchip_pcie_ep_get_msi,
+	.raise_irq	= rockchip_pcie_ep_raise_irq,
+	.start		= rockchip_pcie_ep_start,
+	.get_features	= rockchip_pcie_ep_get_features,
+};
+
+static int rockchip_pcie_parse_ep_dt(struct rockchip_pcie *rockchip,
+				     struct rockchip_pcie_ep *ep)
+{
+	struct device *dev = rockchip->dev;
+	int err;
+
+	err = rockchip_pcie_parse_dt(rockchip);
+	if (err)
+		return err;
+
+	err = rockchip_pcie_get_phys(rockchip);
+	if (err)
+		return err;
+
+	err = of_property_read_u32(dev->of_node,
+				   "rockchip,max-outbound-regions",
+				   &ep->max_regions);
+	if (err < 0 || ep->max_regions > MAX_REGION_LIMIT)
+		ep->max_regions = MAX_REGION_LIMIT;
+
+	ep->ob_region_map = 0;
+
+	err = of_property_read_u8(dev->of_node, "max-functions",
+				  &ep->epc->max_functions);
+	if (err < 0)
+		ep->epc->max_functions = 1;
+
+	return 0;
+}
+
+static const struct of_device_id rockchip_pcie_ep_of_match[] = {
+	{ .compatible = "rockchip,rk3399-pcie-ep"},
+	{},
+};
+
+static int rockchip_pcie_ep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie_ep *ep;
+	struct rockchip_pcie *rockchip;
+	struct pci_epc *epc;
+	size_t max_regions;
+	struct pci_epc_mem_window *windows = NULL;
+	int err, i;
+	u32 cfg_msi, cfg_msix_cp;
+
+	ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	rockchip = &ep->rockchip;
+	rockchip->is_rc = false;
+	rockchip->dev = dev;
+
+	epc = devm_pci_epc_create(dev, &rockchip_pcie_epc_ops);
+	if (IS_ERR(epc)) {
+		dev_err(dev, "failed to create epc device\n");
+		return PTR_ERR(epc);
+	}
+
+	ep->epc = epc;
+	epc_set_drvdata(epc, ep);
+
+	err = rockchip_pcie_parse_ep_dt(rockchip, ep);
+	if (err)
+		return err;
+
+	err = rockchip_pcie_enable_clocks(rockchip);
+	if (err)
+		return err;
+
+	err = rockchip_pcie_init_port(rockchip);
+	if (err)
+		goto err_disable_clocks;
+
+	/* Establish the link automatically */
+	rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE,
+			    PCIE_CLIENT_CONFIG);
+
+	max_regions = ep->max_regions;
+	ep->ob_addr = devm_kcalloc(dev, max_regions, sizeof(*ep->ob_addr),
+				   GFP_KERNEL);
+
+	if (!ep->ob_addr) {
+		err = -ENOMEM;
+		goto err_uninit_port;
+	}
+
+	/* Only enable function 0 by default */
+	rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG);
+
+	windows = devm_kcalloc(dev, ep->max_regions,
+			       sizeof(struct pci_epc_mem_window), GFP_KERNEL);
+	if (!windows) {
+		err = -ENOMEM;
+		goto err_uninit_port;
+	}
+	for (i = 0; i < ep->max_regions; i++) {
+		windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i);
+		windows[i].size = SZ_1M;
+		windows[i].page_size = SZ_1M;
+	}
+	err = pci_epc_multi_mem_init(epc, windows, ep->max_regions);
+	devm_kfree(dev, windows);
+
+	if (err < 0) {
+		dev_err(dev, "failed to initialize the memory space\n");
+		goto err_uninit_port;
+	}
+
+	ep->irq_cpu_addr = pci_epc_mem_alloc_addr(epc, &ep->irq_phys_addr,
+						  SZ_1M);
+	if (!ep->irq_cpu_addr) {
+		dev_err(dev, "failed to reserve memory space for MSI\n");
+		err = -ENOMEM;
+		goto err_epc_mem_exit;
+	}
+
+	ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR;
+
+	/*
+	 * MSI-X is not supported but the controller still advertises the MSI-X
+	 * capability by default, which can lead to the Root Complex side
+	 * allocating MSI-X vectors which cannot be used. Avoid this by skipping
+	 * the MSI-X capability entry in the PCIe capabilities linked-list: get
+	 * the next pointer from the MSI-X entry and set that in the MSI
+	 * capability entry (which is the previous entry). This way the MSI-X
+	 * entry is skipped (left out of the linked-list) and not advertised.
+	 */
+	cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
+				     ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+
+	cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK;
+
+	cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
+					 ROCKCHIP_PCIE_EP_MSIX_CAP_REG) &
+					 ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK;
+
+	cfg_msi |= cfg_msix_cp;
+
+	rockchip_pcie_write(rockchip, cfg_msi,
+			    PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+
+	rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE,
+			    PCIE_CLIENT_CONFIG);
+
+	return 0;
+err_epc_mem_exit:
+	pci_epc_mem_exit(epc);
+err_uninit_port:
+	rockchip_pcie_deinit_phys(rockchip);
+err_disable_clocks:
+	rockchip_pcie_disable_clocks(rockchip);
+	return err;
+}
+
+static struct platform_driver rockchip_pcie_ep_driver = {
+	.driver = {
+		.name = "rockchip-pcie-ep",
+		.of_match_table = rockchip_pcie_ep_of_match,
+	},
+	.probe = rockchip_pcie_ep_probe,
+};
+
+builtin_platform_driver(rockchip_pcie_ep_driver);
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
new file mode 100644
index 000000000..afbbdccd1
--- /dev/null
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Rockchip AXI PCIe host controller driver
+ *
+ * Copyright (c) 2016 Rockchip, Inc.
+ *
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *         Wenrui Li <wenrui.li@rock-chips.com>
+ *
+ * Bits taken from Synopsys DesignWare Host controller driver and
+ * ARM PCI Host generic driver.
+ */
+
+#include <linux/bitrev.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/regmap.h>
+
+#include "../pci.h"
+#include "pcie-rockchip.h"
+
+static void rockchip_pcie_enable_bw_int(struct rockchip_pcie *rockchip)
+{
+	u32 status;
+
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS);
+	status |= (PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS);
+}
+
+static void rockchip_pcie_clr_bw_int(struct rockchip_pcie *rockchip)
+{
+	u32 status;
+
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS);
+	status |= (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS) << 16;
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS);
+}
+
+static void rockchip_pcie_update_txcredit_mui(struct rockchip_pcie *rockchip)
+{
+	u32 val;
+
+	/* Update Tx credit maximum update interval */
+	val = rockchip_pcie_read(rockchip, PCIE_CORE_TXCREDIT_CFG1);
+	val &= ~PCIE_CORE_TXCREDIT_CFG1_MUI_MASK;
+	val |= PCIE_CORE_TXCREDIT_CFG1_MUI_ENCODE(24000);	/* ns */
+	rockchip_pcie_write(rockchip, val, PCIE_CORE_TXCREDIT_CFG1);
+}
+
+static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip,
+				      struct pci_bus *bus, int dev)
+{
+	/*
+	 * Access only one slot on each root port.
+	 * Do not read more than one device on the bus directly attached
+	 * to RC's downstream side.
+	 */
+	if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent))
+		return dev == 0;
+
+	return 1;
+}
+
+static u8 rockchip_pcie_lane_map(struct rockchip_pcie *rockchip)
+{
+	u32 val;
+	u8 map;
+
+	if (rockchip->legacy_phy)
+		return GENMASK(MAX_LANE_NUM - 1, 0);
+
+	val = rockchip_pcie_read(rockchip, PCIE_CORE_LANE_MAP);
+	map = val & PCIE_CORE_LANE_MAP_MASK;
+
+	/* The link may be using a reverse-indexed mapping. */
+	if (val & PCIE_CORE_LANE_MAP_REVERSE)
+		map = bitrev8(map) >> 4;
+
+	return map;
+}
+
+static int rockchip_pcie_rd_own_conf(struct rockchip_pcie *rockchip,
+				     int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = rockchip->apb_base + PCIE_RC_CONFIG_NORMAL_BASE + where;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size)) {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (size == 4) {
+		*val = readl(addr);
+	} else if (size == 2) {
+		*val = readw(addr);
+	} else if (size == 1) {
+		*val = readb(addr);
+	} else {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rockchip_pcie_wr_own_conf(struct rockchip_pcie *rockchip,
+				     int where, int size, u32 val)
+{
+	u32 mask, tmp, offset;
+	void __iomem *addr;
+
+	offset = where & ~0x3;
+	addr = rockchip->apb_base + PCIE_RC_CONFIG_NORMAL_BASE + offset;
+
+	if (size == 4) {
+		writel(val, addr);
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+
+	/*
+	 * N.B. This read/modify/write isn't safe in general because it can
+	 * corrupt RW1C bits in adjacent registers.  But the hardware
+	 * doesn't support smaller writes.
+	 */
+	tmp = readl(addr) & mask;
+	tmp |= val << ((where & 0x3) * 8);
+	writel(tmp, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rockchip_pcie_rd_other_conf(struct rockchip_pcie *rockchip,
+				       struct pci_bus *bus, u32 devfn,
+				       int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = rockchip->reg_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
+
+	if (!IS_ALIGNED((uintptr_t)addr, size)) {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (pci_is_root_bus(bus->parent))
+		rockchip_pcie_cfg_configuration_accesses(rockchip,
+						AXI_WRAPPER_TYPE0_CFG);
+	else
+		rockchip_pcie_cfg_configuration_accesses(rockchip,
+						AXI_WRAPPER_TYPE1_CFG);
+
+	if (size == 4) {
+		*val = readl(addr);
+	} else if (size == 2) {
+		*val = readw(addr);
+	} else if (size == 1) {
+		*val = readb(addr);
+	} else {
+		*val = 0;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rockchip_pcie_wr_other_conf(struct rockchip_pcie *rockchip,
+				       struct pci_bus *bus, u32 devfn,
+				       int where, int size, u32 val)
+{
+	void __iomem *addr;
+
+	addr = rockchip->reg_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (pci_is_root_bus(bus->parent))
+		rockchip_pcie_cfg_configuration_accesses(rockchip,
+						AXI_WRAPPER_TYPE0_CFG);
+	else
+		rockchip_pcie_cfg_configuration_accesses(rockchip,
+						AXI_WRAPPER_TYPE1_CFG);
+
+	if (size == 4)
+		writel(val, addr);
+	else if (size == 2)
+		writew(val, addr);
+	else if (size == 1)
+		writeb(val, addr);
+	else
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rockchip_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+				 int size, u32 *val)
+{
+	struct rockchip_pcie *rockchip = bus->sysdata;
+
+	if (!rockchip_pcie_valid_device(rockchip, bus, PCI_SLOT(devfn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pci_is_root_bus(bus))
+		return rockchip_pcie_rd_own_conf(rockchip, where, size, val);
+
+	return rockchip_pcie_rd_other_conf(rockchip, bus, devfn, where, size,
+					   val);
+}
+
+static int rockchip_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+				 int where, int size, u32 val)
+{
+	struct rockchip_pcie *rockchip = bus->sysdata;
+
+	if (!rockchip_pcie_valid_device(rockchip, bus, PCI_SLOT(devfn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pci_is_root_bus(bus))
+		return rockchip_pcie_wr_own_conf(rockchip, where, size, val);
+
+	return rockchip_pcie_wr_other_conf(rockchip, bus, devfn, where, size,
+					   val);
+}
+
+static struct pci_ops rockchip_pcie_ops = {
+	.read = rockchip_pcie_rd_conf,
+	.write = rockchip_pcie_wr_conf,
+};
+
+static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
+{
+	int curr;
+	u32 status, scale, power;
+
+	if (IS_ERR(rockchip->vpcie3v3))
+		return;
+
+	/*
+	 * Set RC's captured slot power limit and scale if
+	 * vpcie3v3 available. The default values are both zero
+	 * which means the software should set these two according
+	 * to the actual power supply.
+	 */
+	curr = regulator_get_current_limit(rockchip->vpcie3v3);
+	if (curr <= 0)
+		return;
+
+	scale = 3; /* 0.001x */
+	curr = curr / 1000; /* convert to mA */
+	power = (curr * 3300) / 1000; /* milliwatt */
+	while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) {
+		if (!scale) {
+			dev_warn(rockchip->dev, "invalid power supply\n");
+			return;
+		}
+		scale--;
+		power = power / 10;
+	}
+
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR);
+	status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) |
+		  (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT);
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR);
+}
+
+/**
+ * rockchip_pcie_host_init_port - Initialize hardware
+ * @rockchip: PCIe port information
+ */
+static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err, i = MAX_LANE_NUM;
+	u32 status;
+
+	gpiod_set_value_cansleep(rockchip->ep_gpio, 0);
+
+	err = rockchip_pcie_init_port(rockchip);
+	if (err)
+		return err;
+
+	/* Fix the transmitted FTS count desired to exit from L0s. */
+	status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL_PLC1);
+	status = (status & ~PCIE_CORE_CTRL_PLC1_FTS_MASK) |
+		 (PCIE_CORE_CTRL_PLC1_FTS_CNT << PCIE_CORE_CTRL_PLC1_FTS_SHIFT);
+	rockchip_pcie_write(rockchip, status, PCIE_CORE_CTRL_PLC1);
+
+	rockchip_pcie_set_power_limit(rockchip);
+
+	/* Set RC's clock architecture as common clock */
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS);
+	status |= PCI_EXP_LNKSTA_SLC << 16;
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS);
+
+	/* Set RC's RCB to 128 */
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS);
+	status |= PCI_EXP_LNKCTL_RCB;
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS);
+
+	/* Enable Gen1 training */
+	rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE,
+			    PCIE_CLIENT_CONFIG);
+
+	gpiod_set_value_cansleep(rockchip->ep_gpio, 1);
+
+	/* 500ms timeout value should be enough for Gen1/2 training */
+	err = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1,
+				 status, PCIE_LINK_UP(status), 20,
+				 500 * USEC_PER_MSEC);
+	if (err) {
+		dev_err(dev, "PCIe link training gen1 timeout!\n");
+		goto err_power_off_phy;
+	}
+
+	if (rockchip->link_gen == 2) {
+		/*
+		 * Enable retrain for gen2. This should be configured only after
+		 * gen1 finished.
+		 */
+		status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LCS);
+		status |= PCI_EXP_LNKCTL_RL;
+		rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LCS);
+
+		err = readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL,
+					 status, PCIE_LINK_IS_GEN2(status), 20,
+					 500 * USEC_PER_MSEC);
+		if (err)
+			dev_dbg(dev, "PCIe link training gen2 timeout, fall back to gen1!\n");
+	}
+
+	/* Check the final link width from negotiated lane counter from MGMT */
+	status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL);
+	status = 0x1 << ((status & PCIE_CORE_PL_CONF_LANE_MASK) >>
+			  PCIE_CORE_PL_CONF_LANE_SHIFT);
+	dev_dbg(dev, "current link width is x%d\n", status);
+
+	/* Power off unused lane(s) */
+	rockchip->lanes_map = rockchip_pcie_lane_map(rockchip);
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		if (!(rockchip->lanes_map & BIT(i))) {
+			dev_dbg(dev, "idling lane %d\n", i);
+			phy_power_off(rockchip->phys[i]);
+		}
+	}
+
+	rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID,
+			    PCIE_CORE_CONFIG_VENDOR);
+	rockchip_pcie_write(rockchip,
+			    PCI_CLASS_BRIDGE_PCI_NORMAL << 8,
+			    PCIE_RC_CONFIG_RID_CCR);
+
+	/* Clear THP cap's next cap pointer to remove L1 substate cap */
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_THP_CAP);
+	status &= ~PCIE_RC_CONFIG_THP_CAP_NEXT_MASK;
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_THP_CAP);
+
+	/* Clear L0s from RC's link cap */
+	if (of_property_read_bool(dev->of_node, "aspm-no-l0s")) {
+		status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_LINK_CAP);
+		status &= ~PCIE_RC_CONFIG_LINK_CAP_L0S;
+		rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_LINK_CAP);
+	}
+
+	status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCSR);
+	status &= ~PCIE_RC_CONFIG_DCSR_MPS_MASK;
+	status |= PCIE_RC_CONFIG_DCSR_MPS_256;
+	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCSR);
+
+	return 0;
+err_power_off_phy:
+	while (i--)
+		phy_power_off(rockchip->phys[i]);
+	i = MAX_LANE_NUM;
+	while (i--)
+		phy_exit(rockchip->phys[i]);
+	return err;
+}
+
+static irqreturn_t rockchip_pcie_subsys_irq_handler(int irq, void *arg)
+{
+	struct rockchip_pcie *rockchip = arg;
+	struct device *dev = rockchip->dev;
+	u32 reg;
+	u32 sub_reg;
+
+	reg = rockchip_pcie_read(rockchip, PCIE_CLIENT_INT_STATUS);
+	if (reg & PCIE_CLIENT_INT_LOCAL) {
+		dev_dbg(dev, "local interrupt received\n");
+		sub_reg = rockchip_pcie_read(rockchip, PCIE_CORE_INT_STATUS);
+		if (sub_reg & PCIE_CORE_INT_PRFPE)
+			dev_dbg(dev, "parity error detected while reading from the PNP receive FIFO RAM\n");
+
+		if (sub_reg & PCIE_CORE_INT_CRFPE)
+			dev_dbg(dev, "parity error detected while reading from the Completion Receive FIFO RAM\n");
+
+		if (sub_reg & PCIE_CORE_INT_RRPE)
+			dev_dbg(dev, "parity error detected while reading from replay buffer RAM\n");
+
+		if (sub_reg & PCIE_CORE_INT_PRFO)
+			dev_dbg(dev, "overflow occurred in the PNP receive FIFO\n");
+
+		if (sub_reg & PCIE_CORE_INT_CRFO)
+			dev_dbg(dev, "overflow occurred in the completion receive FIFO\n");
+
+		if (sub_reg & PCIE_CORE_INT_RT)
+			dev_dbg(dev, "replay timer timed out\n");
+
+		if (sub_reg & PCIE_CORE_INT_RTR)
+			dev_dbg(dev, "replay timer rolled over after 4 transmissions of the same TLP\n");
+
+		if (sub_reg & PCIE_CORE_INT_PE)
+			dev_dbg(dev, "phy error detected on receive side\n");
+
+		if (sub_reg & PCIE_CORE_INT_MTR)
+			dev_dbg(dev, "malformed TLP received from the link\n");
+
+		if (sub_reg & PCIE_CORE_INT_UCR)
+			dev_dbg(dev, "malformed TLP received from the link\n");
+
+		if (sub_reg & PCIE_CORE_INT_FCE)
+			dev_dbg(dev, "an error was observed in the flow control advertisements from the other side\n");
+
+		if (sub_reg & PCIE_CORE_INT_CT)
+			dev_dbg(dev, "a request timed out waiting for completion\n");
+
+		if (sub_reg & PCIE_CORE_INT_UTC)
+			dev_dbg(dev, "unmapped TC error\n");
+
+		if (sub_reg & PCIE_CORE_INT_MMVC)
+			dev_dbg(dev, "MSI mask register changes\n");
+
+		rockchip_pcie_write(rockchip, sub_reg, PCIE_CORE_INT_STATUS);
+	} else if (reg & PCIE_CLIENT_INT_PHY) {
+		dev_dbg(dev, "phy link changes\n");
+		rockchip_pcie_update_txcredit_mui(rockchip);
+		rockchip_pcie_clr_bw_int(rockchip);
+	}
+
+	rockchip_pcie_write(rockchip, reg & PCIE_CLIENT_INT_LOCAL,
+			    PCIE_CLIENT_INT_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rockchip_pcie_client_irq_handler(int irq, void *arg)
+{
+	struct rockchip_pcie *rockchip = arg;
+	struct device *dev = rockchip->dev;
+	u32 reg;
+
+	reg = rockchip_pcie_read(rockchip, PCIE_CLIENT_INT_STATUS);
+	if (reg & PCIE_CLIENT_INT_LEGACY_DONE)
+		dev_dbg(dev, "legacy done interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_MSG)
+		dev_dbg(dev, "message done interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_HOT_RST)
+		dev_dbg(dev, "hot reset interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_DPA)
+		dev_dbg(dev, "dpa interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_FATAL_ERR)
+		dev_dbg(dev, "fatal error interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_NFATAL_ERR)
+		dev_dbg(dev, "no fatal error interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_CORR_ERR)
+		dev_dbg(dev, "correctable error interrupt received\n");
+
+	if (reg & PCIE_CLIENT_INT_PHY)
+		dev_dbg(dev, "phy interrupt received\n");
+
+	rockchip_pcie_write(rockchip, reg & (PCIE_CLIENT_INT_LEGACY_DONE |
+			      PCIE_CLIENT_INT_MSG | PCIE_CLIENT_INT_HOT_RST |
+			      PCIE_CLIENT_INT_DPA | PCIE_CLIENT_INT_FATAL_ERR |
+			      PCIE_CLIENT_INT_NFATAL_ERR |
+			      PCIE_CLIENT_INT_CORR_ERR |
+			      PCIE_CLIENT_INT_PHY),
+		   PCIE_CLIENT_INT_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct rockchip_pcie *rockchip = irq_desc_get_handler_data(desc);
+	struct device *dev = rockchip->dev;
+	u32 reg;
+	u32 hwirq;
+	int ret;
+
+	chained_irq_enter(chip, desc);
+
+	reg = rockchip_pcie_read(rockchip, PCIE_CLIENT_INT_STATUS);
+	reg = (reg & PCIE_CLIENT_INTR_MASK) >> PCIE_CLIENT_INTR_SHIFT;
+
+	while (reg) {
+		hwirq = ffs(reg) - 1;
+		reg &= ~BIT(hwirq);
+
+		ret = generic_handle_domain_irq(rockchip->irq_domain, hwirq);
+		if (ret)
+			dev_err(dev, "unexpected IRQ, INT%d\n", hwirq);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int rockchip_pcie_setup_irq(struct rockchip_pcie *rockchip)
+{
+	int irq, err;
+	struct device *dev = rockchip->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	irq = platform_get_irq_byname(pdev, "sys");
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(dev, irq, rockchip_pcie_subsys_irq_handler,
+			       IRQF_SHARED, "pcie-sys", rockchip);
+	if (err) {
+		dev_err(dev, "failed to request PCIe subsystem IRQ\n");
+		return err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "legacy");
+	if (irq < 0)
+		return irq;
+
+	irq_set_chained_handler_and_data(irq,
+					 rockchip_pcie_legacy_int_handler,
+					 rockchip);
+
+	irq = platform_get_irq_byname(pdev, "client");
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(dev, irq, rockchip_pcie_client_irq_handler,
+			       IRQF_SHARED, "pcie-client", rockchip);
+	if (err) {
+		dev_err(dev, "failed to request PCIe client IRQ\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * rockchip_pcie_parse_host_dt - Parse Device Tree
+ * @rockchip: PCIe port information
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int rockchip_pcie_parse_host_dt(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err;
+
+	err = rockchip_pcie_parse_dt(rockchip);
+	if (err)
+		return err;
+
+	rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v");
+	if (IS_ERR(rockchip->vpcie12v)) {
+		if (PTR_ERR(rockchip->vpcie12v) != -ENODEV)
+			return PTR_ERR(rockchip->vpcie12v);
+		dev_info(dev, "no vpcie12v regulator found\n");
+	}
+
+	rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
+	if (IS_ERR(rockchip->vpcie3v3)) {
+		if (PTR_ERR(rockchip->vpcie3v3) != -ENODEV)
+			return PTR_ERR(rockchip->vpcie3v3);
+		dev_info(dev, "no vpcie3v3 regulator found\n");
+	}
+
+	rockchip->vpcie1v8 = devm_regulator_get(dev, "vpcie1v8");
+	if (IS_ERR(rockchip->vpcie1v8))
+		return PTR_ERR(rockchip->vpcie1v8);
+
+	rockchip->vpcie0v9 = devm_regulator_get(dev, "vpcie0v9");
+	if (IS_ERR(rockchip->vpcie0v9))
+		return PTR_ERR(rockchip->vpcie0v9);
+
+	return 0;
+}
+
+static int rockchip_pcie_set_vpcie(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err;
+
+	if (!IS_ERR(rockchip->vpcie12v)) {
+		err = regulator_enable(rockchip->vpcie12v);
+		if (err) {
+			dev_err(dev, "fail to enable vpcie12v regulator\n");
+			goto err_out;
+		}
+	}
+
+	if (!IS_ERR(rockchip->vpcie3v3)) {
+		err = regulator_enable(rockchip->vpcie3v3);
+		if (err) {
+			dev_err(dev, "fail to enable vpcie3v3 regulator\n");
+			goto err_disable_12v;
+		}
+	}
+
+	err = regulator_enable(rockchip->vpcie1v8);
+	if (err) {
+		dev_err(dev, "fail to enable vpcie1v8 regulator\n");
+		goto err_disable_3v3;
+	}
+
+	err = regulator_enable(rockchip->vpcie0v9);
+	if (err) {
+		dev_err(dev, "fail to enable vpcie0v9 regulator\n");
+		goto err_disable_1v8;
+	}
+
+	return 0;
+
+err_disable_1v8:
+	regulator_disable(rockchip->vpcie1v8);
+err_disable_3v3:
+	if (!IS_ERR(rockchip->vpcie3v3))
+		regulator_disable(rockchip->vpcie3v3);
+err_disable_12v:
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
+err_out:
+	return err;
+}
+
+static void rockchip_pcie_enable_interrupts(struct rockchip_pcie *rockchip)
+{
+	rockchip_pcie_write(rockchip, (PCIE_CLIENT_INT_CLI << 16) &
+			    (~PCIE_CLIENT_INT_CLI), PCIE_CLIENT_INT_MASK);
+	rockchip_pcie_write(rockchip, (u32)(~PCIE_CORE_INT),
+			    PCIE_CORE_INT_MASK);
+
+	rockchip_pcie_enable_bw_int(rockchip);
+}
+
+static int rockchip_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				  irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = rockchip_pcie_intx_map,
+};
+
+static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	struct device_node *intc = of_get_next_child(dev->of_node, NULL);
+
+	if (!intc) {
+		dev_err(dev, "missing child interrupt-controller node\n");
+		return -EINVAL;
+	}
+
+	rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX,
+						    &intx_domain_ops, rockchip);
+	of_node_put(intc);
+	if (!rockchip->irq_domain) {
+		dev_err(dev, "failed to get a INTx IRQ domain\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rockchip_pcie_prog_ob_atu(struct rockchip_pcie *rockchip,
+				     int region_no, int type, u8 num_pass_bits,
+				     u32 lower_addr, u32 upper_addr)
+{
+	u32 ob_addr_0;
+	u32 ob_addr_1;
+	u32 ob_desc_0;
+	u32 aw_offset;
+
+	if (region_no >= MAX_AXI_WRAPPER_REGION_NUM)
+		return -EINVAL;
+	if (num_pass_bits + 1 < 8)
+		return -EINVAL;
+	if (num_pass_bits > 63)
+		return -EINVAL;
+	if (region_no == 0) {
+		if (AXI_REGION_0_SIZE < (2ULL << num_pass_bits))
+			return -EINVAL;
+	}
+	if (region_no != 0) {
+		if (AXI_REGION_SIZE < (2ULL << num_pass_bits))
+			return -EINVAL;
+	}
+
+	aw_offset = (region_no << OB_REG_SIZE_SHIFT);
+
+	ob_addr_0 = num_pass_bits & PCIE_CORE_OB_REGION_ADDR0_NUM_BITS;
+	ob_addr_0 |= lower_addr & PCIE_CORE_OB_REGION_ADDR0_LO_ADDR;
+	ob_addr_1 = upper_addr;
+	ob_desc_0 = (1 << 23 | type);
+
+	rockchip_pcie_write(rockchip, ob_addr_0,
+			    PCIE_CORE_OB_REGION_ADDR0 + aw_offset);
+	rockchip_pcie_write(rockchip, ob_addr_1,
+			    PCIE_CORE_OB_REGION_ADDR1 + aw_offset);
+	rockchip_pcie_write(rockchip, ob_desc_0,
+			    PCIE_CORE_OB_REGION_DESC0 + aw_offset);
+	rockchip_pcie_write(rockchip, 0,
+			    PCIE_CORE_OB_REGION_DESC1 + aw_offset);
+
+	return 0;
+}
+
+static int rockchip_pcie_prog_ib_atu(struct rockchip_pcie *rockchip,
+				     int region_no, u8 num_pass_bits,
+				     u32 lower_addr, u32 upper_addr)
+{
+	u32 ib_addr_0;
+	u32 ib_addr_1;
+	u32 aw_offset;
+
+	if (region_no > MAX_AXI_IB_ROOTPORT_REGION_NUM)
+		return -EINVAL;
+	if (num_pass_bits + 1 < MIN_AXI_ADDR_BITS_PASSED)
+		return -EINVAL;
+	if (num_pass_bits > 63)
+		return -EINVAL;
+
+	aw_offset = (region_no << IB_ROOT_PORT_REG_SIZE_SHIFT);
+
+	ib_addr_0 = num_pass_bits & PCIE_CORE_IB_REGION_ADDR0_NUM_BITS;
+	ib_addr_0 |= (lower_addr << 8) & PCIE_CORE_IB_REGION_ADDR0_LO_ADDR;
+	ib_addr_1 = upper_addr;
+
+	rockchip_pcie_write(rockchip, ib_addr_0, PCIE_RP_IB_ADDR0 + aw_offset);
+	rockchip_pcie_write(rockchip, ib_addr_1, PCIE_RP_IB_ADDR1 + aw_offset);
+
+	return 0;
+}
+
+static int rockchip_pcie_cfg_atu(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(rockchip);
+	struct resource_entry *entry;
+	u64 pci_addr, size;
+	int offset;
+	int err;
+	int reg_no;
+
+	rockchip_pcie_cfg_configuration_accesses(rockchip,
+						 AXI_WRAPPER_TYPE0_CFG);
+	entry = resource_list_first_type(&bridge->windows, IORESOURCE_MEM);
+	if (!entry)
+		return -ENODEV;
+
+	size = resource_size(entry->res);
+	pci_addr = entry->res->start - entry->offset;
+	rockchip->msg_bus_addr = pci_addr;
+
+	for (reg_no = 0; reg_no < (size >> 20); reg_no++) {
+		err = rockchip_pcie_prog_ob_atu(rockchip, reg_no + 1,
+						AXI_WRAPPER_MEM_WRITE,
+						20 - 1,
+						pci_addr + (reg_no << 20),
+						0);
+		if (err) {
+			dev_err(dev, "program RC mem outbound ATU failed\n");
+			return err;
+		}
+	}
+
+	err = rockchip_pcie_prog_ib_atu(rockchip, 2, 32 - 1, 0x0, 0);
+	if (err) {
+		dev_err(dev, "program RC mem inbound ATU failed\n");
+		return err;
+	}
+
+	entry = resource_list_first_type(&bridge->windows, IORESOURCE_IO);
+	if (!entry)
+		return -ENODEV;
+
+	/* store the register number offset to program RC io outbound ATU */
+	offset = size >> 20;
+
+	size = resource_size(entry->res);
+	pci_addr = entry->res->start - entry->offset;
+
+	for (reg_no = 0; reg_no < (size >> 20); reg_no++) {
+		err = rockchip_pcie_prog_ob_atu(rockchip,
+						reg_no + 1 + offset,
+						AXI_WRAPPER_IO_WRITE,
+						20 - 1,
+						pci_addr + (reg_no << 20),
+						0);
+		if (err) {
+			dev_err(dev, "program RC io outbound ATU failed\n");
+			return err;
+		}
+	}
+
+	/* assign message regions */
+	rockchip_pcie_prog_ob_atu(rockchip, reg_no + 1 + offset,
+				  AXI_WRAPPER_NOR_MSG,
+				  20 - 1, 0, 0);
+
+	rockchip->msg_bus_addr += ((reg_no + offset) << 20);
+	return err;
+}
+
+static int rockchip_pcie_wait_l2(struct rockchip_pcie *rockchip)
+{
+	u32 value;
+	int err;
+
+	/* send PME_TURN_OFF message */
+	writel(0x0, rockchip->msg_region + PCIE_RC_SEND_PME_OFF);
+
+	/* read LTSSM and wait for falling into L2 link state */
+	err = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_DEBUG_OUT_0,
+				 value, PCIE_LINK_IS_L2(value), 20,
+				 jiffies_to_usecs(5 * HZ));
+	if (err) {
+		dev_err(rockchip->dev, "PCIe link enter L2 timeout!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int rockchip_pcie_suspend_noirq(struct device *dev)
+{
+	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
+	int ret;
+
+	/* disable core and cli int since we don't need to ack PME_ACK */
+	rockchip_pcie_write(rockchip, (PCIE_CLIENT_INT_CLI << 16) |
+			    PCIE_CLIENT_INT_CLI, PCIE_CLIENT_INT_MASK);
+	rockchip_pcie_write(rockchip, (u32)PCIE_CORE_INT, PCIE_CORE_INT_MASK);
+
+	ret = rockchip_pcie_wait_l2(rockchip);
+	if (ret) {
+		rockchip_pcie_enable_interrupts(rockchip);
+		return ret;
+	}
+
+	rockchip_pcie_deinit_phys(rockchip);
+
+	rockchip_pcie_disable_clocks(rockchip);
+
+	regulator_disable(rockchip->vpcie0v9);
+
+	return ret;
+}
+
+static int rockchip_pcie_resume_noirq(struct device *dev)
+{
+	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
+	int err;
+
+	err = regulator_enable(rockchip->vpcie0v9);
+	if (err) {
+		dev_err(dev, "fail to enable vpcie0v9 regulator\n");
+		return err;
+	}
+
+	err = rockchip_pcie_enable_clocks(rockchip);
+	if (err)
+		goto err_disable_0v9;
+
+	err = rockchip_pcie_host_init_port(rockchip);
+	if (err)
+		goto err_pcie_resume;
+
+	err = rockchip_pcie_cfg_atu(rockchip);
+	if (err)
+		goto err_err_deinit_port;
+
+	/* Need this to enter L1 again */
+	rockchip_pcie_update_txcredit_mui(rockchip);
+	rockchip_pcie_enable_interrupts(rockchip);
+
+	return 0;
+
+err_err_deinit_port:
+	rockchip_pcie_deinit_phys(rockchip);
+err_pcie_resume:
+	rockchip_pcie_disable_clocks(rockchip);
+err_disable_0v9:
+	regulator_disable(rockchip->vpcie0v9);
+	return err;
+}
+
+static int rockchip_pcie_probe(struct platform_device *pdev)
+{
+	struct rockchip_pcie *rockchip;
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *bridge;
+	int err;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rockchip));
+	if (!bridge)
+		return -ENOMEM;
+
+	rockchip = pci_host_bridge_priv(bridge);
+
+	platform_set_drvdata(pdev, rockchip);
+	rockchip->dev = dev;
+	rockchip->is_rc = true;
+
+	err = rockchip_pcie_parse_host_dt(rockchip);
+	if (err)
+		return err;
+
+	err = rockchip_pcie_enable_clocks(rockchip);
+	if (err)
+		return err;
+
+	err = rockchip_pcie_set_vpcie(rockchip);
+	if (err) {
+		dev_err(dev, "failed to set vpcie regulator\n");
+		goto err_set_vpcie;
+	}
+
+	err = rockchip_pcie_host_init_port(rockchip);
+	if (err)
+		goto err_vpcie;
+
+	err = rockchip_pcie_init_irq_domain(rockchip);
+	if (err < 0)
+		goto err_deinit_port;
+
+	err = rockchip_pcie_cfg_atu(rockchip);
+	if (err)
+		goto err_remove_irq_domain;
+
+	rockchip->msg_region = devm_ioremap(dev, rockchip->msg_bus_addr, SZ_1M);
+	if (!rockchip->msg_region) {
+		err = -ENOMEM;
+		goto err_remove_irq_domain;
+	}
+
+	bridge->sysdata = rockchip;
+	bridge->ops = &rockchip_pcie_ops;
+
+	err = rockchip_pcie_setup_irq(rockchip);
+	if (err)
+		goto err_remove_irq_domain;
+
+	rockchip_pcie_enable_interrupts(rockchip);
+
+	err = pci_host_probe(bridge);
+	if (err < 0)
+		goto err_remove_irq_domain;
+
+	return 0;
+
+err_remove_irq_domain:
+	irq_domain_remove(rockchip->irq_domain);
+err_deinit_port:
+	rockchip_pcie_deinit_phys(rockchip);
+err_vpcie:
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
+	if (!IS_ERR(rockchip->vpcie3v3))
+		regulator_disable(rockchip->vpcie3v3);
+	regulator_disable(rockchip->vpcie1v8);
+	regulator_disable(rockchip->vpcie0v9);
+err_set_vpcie:
+	rockchip_pcie_disable_clocks(rockchip);
+	return err;
+}
+
+static void rockchip_pcie_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
+	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(rockchip);
+
+	pci_stop_root_bus(bridge->bus);
+	pci_remove_root_bus(bridge->bus);
+	irq_domain_remove(rockchip->irq_domain);
+
+	rockchip_pcie_deinit_phys(rockchip);
+
+	rockchip_pcie_disable_clocks(rockchip);
+
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
+	if (!IS_ERR(rockchip->vpcie3v3))
+		regulator_disable(rockchip->vpcie3v3);
+	regulator_disable(rockchip->vpcie1v8);
+	regulator_disable(rockchip->vpcie0v9);
+}
+
+static const struct dev_pm_ops rockchip_pcie_pm_ops = {
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
+				  rockchip_pcie_resume_noirq)
+};
+
+static const struct of_device_id rockchip_pcie_of_match[] = {
+	{ .compatible = "rockchip,rk3399-pcie", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rockchip_pcie_of_match);
+
+static struct platform_driver rockchip_pcie_driver = {
+	.driver = {
+		.name = "rockchip-pcie",
+		.of_match_table = rockchip_pcie_of_match,
+		.pm = &rockchip_pcie_pm_ops,
+	},
+	.probe = rockchip_pcie_probe,
+	.remove_new = rockchip_pcie_remove,
+};
+module_platform_driver(rockchip_pcie_driver);
+
+MODULE_AUTHOR("Rockchip Inc");
+MODULE_DESCRIPTION("Rockchip AXI PCIe driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c
new file mode 100644
index 000000000..0ef2e622d
--- /dev/null
+++ b/drivers/pci/controller/pcie-rockchip.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Rockchip AXI PCIe host controller driver
+ *
+ * Copyright (c) 2016 Rockchip, Inc.
+ *
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *         Wenrui Li <wenrui.li@rock-chips.com>
+ *
+ * Bits taken from Synopsys DesignWare Host controller driver and
+ * ARM PCI Host generic driver.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/of_pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "../pci.h"
+#include "pcie-rockchip.h"
+
+int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct device_node *node = dev->of_node;
+	struct resource *regs;
+	int err;
+
+	if (rockchip->is_rc) {
+		regs = platform_get_resource_byname(pdev,
+						    IORESOURCE_MEM,
+						    "axi-base");
+		rockchip->reg_base = devm_pci_remap_cfg_resource(dev, regs);
+		if (IS_ERR(rockchip->reg_base))
+			return PTR_ERR(rockchip->reg_base);
+	} else {
+		rockchip->mem_res =
+			platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						     "mem-base");
+		if (!rockchip->mem_res)
+			return -EINVAL;
+	}
+
+	rockchip->apb_base =
+		devm_platform_ioremap_resource_byname(pdev, "apb-base");
+	if (IS_ERR(rockchip->apb_base))
+		return PTR_ERR(rockchip->apb_base);
+
+	err = rockchip_pcie_get_phys(rockchip);
+	if (err)
+		return err;
+
+	rockchip->lanes = 1;
+	err = of_property_read_u32(node, "num-lanes", &rockchip->lanes);
+	if (!err && (rockchip->lanes == 0 ||
+		     rockchip->lanes == 3 ||
+		     rockchip->lanes > 4)) {
+		dev_warn(dev, "invalid num-lanes, default to use one lane\n");
+		rockchip->lanes = 1;
+	}
+
+	rockchip->link_gen = of_pci_get_max_link_speed(node);
+	if (rockchip->link_gen < 0 || rockchip->link_gen > 2)
+		rockchip->link_gen = 2;
+
+	rockchip->core_rst = devm_reset_control_get_exclusive(dev, "core");
+	if (IS_ERR(rockchip->core_rst)) {
+		if (PTR_ERR(rockchip->core_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing core reset property in node\n");
+		return PTR_ERR(rockchip->core_rst);
+	}
+
+	rockchip->mgmt_rst = devm_reset_control_get_exclusive(dev, "mgmt");
+	if (IS_ERR(rockchip->mgmt_rst)) {
+		if (PTR_ERR(rockchip->mgmt_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing mgmt reset property in node\n");
+		return PTR_ERR(rockchip->mgmt_rst);
+	}
+
+	rockchip->mgmt_sticky_rst = devm_reset_control_get_exclusive(dev,
+								"mgmt-sticky");
+	if (IS_ERR(rockchip->mgmt_sticky_rst)) {
+		if (PTR_ERR(rockchip->mgmt_sticky_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing mgmt-sticky reset property in node\n");
+		return PTR_ERR(rockchip->mgmt_sticky_rst);
+	}
+
+	rockchip->pipe_rst = devm_reset_control_get_exclusive(dev, "pipe");
+	if (IS_ERR(rockchip->pipe_rst)) {
+		if (PTR_ERR(rockchip->pipe_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing pipe reset property in node\n");
+		return PTR_ERR(rockchip->pipe_rst);
+	}
+
+	rockchip->pm_rst = devm_reset_control_get_exclusive(dev, "pm");
+	if (IS_ERR(rockchip->pm_rst)) {
+		if (PTR_ERR(rockchip->pm_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing pm reset property in node\n");
+		return PTR_ERR(rockchip->pm_rst);
+	}
+
+	rockchip->pclk_rst = devm_reset_control_get_exclusive(dev, "pclk");
+	if (IS_ERR(rockchip->pclk_rst)) {
+		if (PTR_ERR(rockchip->pclk_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing pclk reset property in node\n");
+		return PTR_ERR(rockchip->pclk_rst);
+	}
+
+	rockchip->aclk_rst = devm_reset_control_get_exclusive(dev, "aclk");
+	if (IS_ERR(rockchip->aclk_rst)) {
+		if (PTR_ERR(rockchip->aclk_rst) != -EPROBE_DEFER)
+			dev_err(dev, "missing aclk reset property in node\n");
+		return PTR_ERR(rockchip->aclk_rst);
+	}
+
+	if (rockchip->is_rc) {
+		rockchip->ep_gpio = devm_gpiod_get_optional(dev, "ep",
+							    GPIOD_OUT_HIGH);
+		if (IS_ERR(rockchip->ep_gpio))
+			return dev_err_probe(dev, PTR_ERR(rockchip->ep_gpio),
+					     "failed to get ep GPIO\n");
+	}
+
+	rockchip->aclk_pcie = devm_clk_get(dev, "aclk");
+	if (IS_ERR(rockchip->aclk_pcie)) {
+		dev_err(dev, "aclk clock not found\n");
+		return PTR_ERR(rockchip->aclk_pcie);
+	}
+
+	rockchip->aclk_perf_pcie = devm_clk_get(dev, "aclk-perf");
+	if (IS_ERR(rockchip->aclk_perf_pcie)) {
+		dev_err(dev, "aclk_perf clock not found\n");
+		return PTR_ERR(rockchip->aclk_perf_pcie);
+	}
+
+	rockchip->hclk_pcie = devm_clk_get(dev, "hclk");
+	if (IS_ERR(rockchip->hclk_pcie)) {
+		dev_err(dev, "hclk clock not found\n");
+		return PTR_ERR(rockchip->hclk_pcie);
+	}
+
+	rockchip->clk_pcie_pm = devm_clk_get(dev, "pm");
+	if (IS_ERR(rockchip->clk_pcie_pm)) {
+		dev_err(dev, "pm clock not found\n");
+		return PTR_ERR(rockchip->clk_pcie_pm);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_parse_dt);
+
+#define rockchip_pcie_read_addr(addr) rockchip_pcie_read(rockchip, addr)
+/* 100 ms max wait time for PHY PLLs to lock */
+#define RK_PHY_PLL_LOCK_TIMEOUT_US 100000
+/* Sleep should be less than 20ms */
+#define RK_PHY_PLL_LOCK_SLEEP_US 1000
+
+int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err, i;
+	u32 regs;
+
+	err = reset_control_assert(rockchip->aclk_rst);
+	if (err) {
+		dev_err(dev, "assert aclk_rst err %d\n", err);
+		return err;
+	}
+
+	err = reset_control_assert(rockchip->pclk_rst);
+	if (err) {
+		dev_err(dev, "assert pclk_rst err %d\n", err);
+		return err;
+	}
+
+	err = reset_control_assert(rockchip->pm_rst);
+	if (err) {
+		dev_err(dev, "assert pm_rst err %d\n", err);
+		return err;
+	}
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		err = phy_init(rockchip->phys[i]);
+		if (err) {
+			dev_err(dev, "init phy%d err %d\n", i, err);
+			goto err_exit_phy;
+		}
+	}
+
+	err = reset_control_assert(rockchip->core_rst);
+	if (err) {
+		dev_err(dev, "assert core_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	err = reset_control_assert(rockchip->mgmt_rst);
+	if (err) {
+		dev_err(dev, "assert mgmt_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	err = reset_control_assert(rockchip->mgmt_sticky_rst);
+	if (err) {
+		dev_err(dev, "assert mgmt_sticky_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	err = reset_control_assert(rockchip->pipe_rst);
+	if (err) {
+		dev_err(dev, "assert pipe_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	udelay(10);
+
+	err = reset_control_deassert(rockchip->pm_rst);
+	if (err) {
+		dev_err(dev, "deassert pm_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	err = reset_control_deassert(rockchip->aclk_rst);
+	if (err) {
+		dev_err(dev, "deassert aclk_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	err = reset_control_deassert(rockchip->pclk_rst);
+	if (err) {
+		dev_err(dev, "deassert pclk_rst err %d\n", err);
+		goto err_exit_phy;
+	}
+
+	if (rockchip->link_gen == 2)
+		rockchip_pcie_write(rockchip, PCIE_CLIENT_GEN_SEL_2,
+				    PCIE_CLIENT_CONFIG);
+	else
+		rockchip_pcie_write(rockchip, PCIE_CLIENT_GEN_SEL_1,
+				    PCIE_CLIENT_CONFIG);
+
+	regs = PCIE_CLIENT_LINK_TRAIN_ENABLE | PCIE_CLIENT_ARI_ENABLE |
+	       PCIE_CLIENT_CONF_LANE_NUM(rockchip->lanes);
+
+	if (rockchip->is_rc)
+		regs |= PCIE_CLIENT_CONF_ENABLE | PCIE_CLIENT_MODE_RC;
+	else
+		regs |= PCIE_CLIENT_CONF_DISABLE | PCIE_CLIENT_MODE_EP;
+
+	rockchip_pcie_write(rockchip, regs, PCIE_CLIENT_CONFIG);
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		err = phy_power_on(rockchip->phys[i]);
+		if (err) {
+			dev_err(dev, "power on phy%d err %d\n", i, err);
+			goto err_power_off_phy;
+		}
+	}
+
+	err = readx_poll_timeout(rockchip_pcie_read_addr,
+				 PCIE_CLIENT_SIDE_BAND_STATUS,
+				 regs, !(regs & PCIE_CLIENT_PHY_ST),
+				 RK_PHY_PLL_LOCK_SLEEP_US,
+				 RK_PHY_PLL_LOCK_TIMEOUT_US);
+	if (err) {
+		dev_err(dev, "PHY PLLs could not lock, %d\n", err);
+		goto err_power_off_phy;
+	}
+
+	/*
+	 * Please don't reorder the deassert sequence of the following
+	 * four reset pins.
+	 */
+	err = reset_control_deassert(rockchip->mgmt_sticky_rst);
+	if (err) {
+		dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err);
+		goto err_power_off_phy;
+	}
+
+	err = reset_control_deassert(rockchip->core_rst);
+	if (err) {
+		dev_err(dev, "deassert core_rst err %d\n", err);
+		goto err_power_off_phy;
+	}
+
+	err = reset_control_deassert(rockchip->mgmt_rst);
+	if (err) {
+		dev_err(dev, "deassert mgmt_rst err %d\n", err);
+		goto err_power_off_phy;
+	}
+
+	err = reset_control_deassert(rockchip->pipe_rst);
+	if (err) {
+		dev_err(dev, "deassert pipe_rst err %d\n", err);
+		goto err_power_off_phy;
+	}
+
+	return 0;
+err_power_off_phy:
+	while (i--)
+		phy_power_off(rockchip->phys[i]);
+	i = MAX_LANE_NUM;
+err_exit_phy:
+	while (i--)
+		phy_exit(rockchip->phys[i]);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_init_port);
+
+int rockchip_pcie_get_phys(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	struct phy *phy;
+	char *name;
+	u32 i;
+
+	phy = devm_phy_get(dev, "pcie-phy");
+	if (!IS_ERR(phy)) {
+		rockchip->legacy_phy = true;
+		rockchip->phys[0] = phy;
+		dev_warn(dev, "legacy phy model is deprecated!\n");
+		return 0;
+	}
+
+	if (PTR_ERR(phy) == -EPROBE_DEFER)
+		return PTR_ERR(phy);
+
+	dev_dbg(dev, "missing legacy phy; search for per-lane PHY\n");
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		name = kasprintf(GFP_KERNEL, "pcie-phy-%u", i);
+		if (!name)
+			return -ENOMEM;
+
+		phy = devm_of_phy_get(dev, dev->of_node, name);
+		kfree(name);
+
+		if (IS_ERR(phy)) {
+			if (PTR_ERR(phy) != -EPROBE_DEFER)
+				dev_err(dev, "missing phy for lane %d: %ld\n",
+					i, PTR_ERR(phy));
+			return PTR_ERR(phy);
+		}
+
+		rockchip->phys[i] = phy;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_get_phys);
+
+void rockchip_pcie_deinit_phys(struct rockchip_pcie *rockchip)
+{
+	int i;
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		/* inactive lanes are already powered off */
+		if (rockchip->lanes_map & BIT(i))
+			phy_power_off(rockchip->phys[i]);
+		phy_exit(rockchip->phys[i]);
+	}
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_deinit_phys);
+
+int rockchip_pcie_enable_clocks(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err;
+
+	err = clk_prepare_enable(rockchip->aclk_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable aclk_pcie clock\n");
+		return err;
+	}
+
+	err = clk_prepare_enable(rockchip->aclk_perf_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable aclk_perf_pcie clock\n");
+		goto err_aclk_perf_pcie;
+	}
+
+	err = clk_prepare_enable(rockchip->hclk_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable hclk_pcie clock\n");
+		goto err_hclk_pcie;
+	}
+
+	err = clk_prepare_enable(rockchip->clk_pcie_pm);
+	if (err) {
+		dev_err(dev, "unable to enable clk_pcie_pm clock\n");
+		goto err_clk_pcie_pm;
+	}
+
+	return 0;
+
+err_clk_pcie_pm:
+	clk_disable_unprepare(rockchip->hclk_pcie);
+err_hclk_pcie:
+	clk_disable_unprepare(rockchip->aclk_perf_pcie);
+err_aclk_perf_pcie:
+	clk_disable_unprepare(rockchip->aclk_pcie);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_enable_clocks);
+
+void rockchip_pcie_disable_clocks(void *data)
+{
+	struct rockchip_pcie *rockchip = data;
+
+	clk_disable_unprepare(rockchip->clk_pcie_pm);
+	clk_disable_unprepare(rockchip->hclk_pcie);
+	clk_disable_unprepare(rockchip->aclk_perf_pcie);
+	clk_disable_unprepare(rockchip->aclk_pcie);
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_disable_clocks);
+
+void rockchip_pcie_cfg_configuration_accesses(
+		struct rockchip_pcie *rockchip, u32 type)
+{
+	u32 ob_desc_0;
+
+	/* Configuration Accesses for region 0 */
+	rockchip_pcie_write(rockchip, 0x0, PCIE_RC_BAR_CONF);
+
+	rockchip_pcie_write(rockchip,
+			    (RC_REGION_0_ADDR_TRANS_L + RC_REGION_0_PASS_BITS),
+			    PCIE_CORE_OB_REGION_ADDR0);
+	rockchip_pcie_write(rockchip, RC_REGION_0_ADDR_TRANS_H,
+			    PCIE_CORE_OB_REGION_ADDR1);
+	ob_desc_0 = rockchip_pcie_read(rockchip, PCIE_CORE_OB_REGION_DESC0);
+	ob_desc_0 &= ~(RC_REGION_0_TYPE_MASK);
+	ob_desc_0 |= (type | (0x1 << 23));
+	rockchip_pcie_write(rockchip, ob_desc_0, PCIE_CORE_OB_REGION_DESC0);
+	rockchip_pcie_write(rockchip, 0x0, PCIE_CORE_OB_REGION_DESC1);
+}
+EXPORT_SYMBOL_GPL(rockchip_pcie_cfg_configuration_accesses);
diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h
new file mode 100644
index 000000000..6111de35f
--- /dev/null
+++ b/drivers/pci/controller/pcie-rockchip.h
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Rockchip AXI PCIe controller driver
+ *
+ * Copyright (c) 2018 Rockchip, Inc.
+ *
+ * Author: Shawn Lin <shawn.lin@rock-chips.com>
+ *
+ */
+
+#ifndef _PCIE_ROCKCHIP_H
+#define _PCIE_ROCKCHIP_H
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+
+/*
+ * The upper 16 bits of PCIE_CLIENT_CONFIG are a write mask for the lower 16
+ * bits.  This allows atomic updates of the register without locking.
+ */
+#define HIWORD_UPDATE(mask, val)	(((mask) << 16) | (val))
+#define HIWORD_UPDATE_BIT(val)		HIWORD_UPDATE(val, val)
+
+#define ENCODE_LANES(x)			((((x) >> 1) & 3) << 4)
+#define MAX_LANE_NUM			4
+#define MAX_REGION_LIMIT		32
+#define MIN_EP_APERTURE			28
+
+#define PCIE_CLIENT_BASE		0x0
+#define PCIE_CLIENT_CONFIG		(PCIE_CLIENT_BASE + 0x00)
+#define   PCIE_CLIENT_CONF_ENABLE	  HIWORD_UPDATE_BIT(0x0001)
+#define   PCIE_CLIENT_CONF_DISABLE       HIWORD_UPDATE(0x0001, 0)
+#define   PCIE_CLIENT_LINK_TRAIN_ENABLE	  HIWORD_UPDATE_BIT(0x0002)
+#define   PCIE_CLIENT_ARI_ENABLE	  HIWORD_UPDATE_BIT(0x0008)
+#define   PCIE_CLIENT_CONF_LANE_NUM(x)	  HIWORD_UPDATE(0x0030, ENCODE_LANES(x))
+#define   PCIE_CLIENT_MODE_RC		  HIWORD_UPDATE_BIT(0x0040)
+#define   PCIE_CLIENT_MODE_EP            HIWORD_UPDATE(0x0040, 0)
+#define   PCIE_CLIENT_GEN_SEL_1		  HIWORD_UPDATE(0x0080, 0)
+#define   PCIE_CLIENT_GEN_SEL_2		  HIWORD_UPDATE_BIT(0x0080)
+#define PCIE_CLIENT_LEGACY_INT_CTRL	(PCIE_CLIENT_BASE + 0x0c)
+#define   PCIE_CLIENT_INT_IN_ASSERT		HIWORD_UPDATE_BIT(0x0002)
+#define   PCIE_CLIENT_INT_IN_DEASSERT		HIWORD_UPDATE(0x0002, 0)
+#define   PCIE_CLIENT_INT_PEND_ST_PEND		HIWORD_UPDATE_BIT(0x0001)
+#define   PCIE_CLIENT_INT_PEND_ST_NORMAL	HIWORD_UPDATE(0x0001, 0)
+#define PCIE_CLIENT_SIDE_BAND_STATUS	(PCIE_CLIENT_BASE + 0x20)
+#define   PCIE_CLIENT_PHY_ST			BIT(12)
+#define PCIE_CLIENT_DEBUG_OUT_0		(PCIE_CLIENT_BASE + 0x3c)
+#define   PCIE_CLIENT_DEBUG_LTSSM_MASK		GENMASK(5, 0)
+#define   PCIE_CLIENT_DEBUG_LTSSM_L1		0x18
+#define   PCIE_CLIENT_DEBUG_LTSSM_L2		0x19
+#define PCIE_CLIENT_BASIC_STATUS1	(PCIE_CLIENT_BASE + 0x48)
+#define   PCIE_CLIENT_LINK_STATUS_UP		0x00300000
+#define   PCIE_CLIENT_LINK_STATUS_MASK		0x00300000
+#define PCIE_CLIENT_INT_MASK		(PCIE_CLIENT_BASE + 0x4c)
+#define PCIE_CLIENT_INT_STATUS		(PCIE_CLIENT_BASE + 0x50)
+#define   PCIE_CLIENT_INTR_MASK			GENMASK(8, 5)
+#define   PCIE_CLIENT_INTR_SHIFT		5
+#define   PCIE_CLIENT_INT_LEGACY_DONE		BIT(15)
+#define   PCIE_CLIENT_INT_MSG			BIT(14)
+#define   PCIE_CLIENT_INT_HOT_RST		BIT(13)
+#define   PCIE_CLIENT_INT_DPA			BIT(12)
+#define   PCIE_CLIENT_INT_FATAL_ERR		BIT(11)
+#define   PCIE_CLIENT_INT_NFATAL_ERR		BIT(10)
+#define   PCIE_CLIENT_INT_CORR_ERR		BIT(9)
+#define   PCIE_CLIENT_INT_INTD			BIT(8)
+#define   PCIE_CLIENT_INT_INTC			BIT(7)
+#define   PCIE_CLIENT_INT_INTB			BIT(6)
+#define   PCIE_CLIENT_INT_INTA			BIT(5)
+#define   PCIE_CLIENT_INT_LOCAL			BIT(4)
+#define   PCIE_CLIENT_INT_UDMA			BIT(3)
+#define   PCIE_CLIENT_INT_PHY			BIT(2)
+#define   PCIE_CLIENT_INT_HOT_PLUG		BIT(1)
+#define   PCIE_CLIENT_INT_PWR_STCG		BIT(0)
+
+#define PCIE_CLIENT_INT_LEGACY \
+	(PCIE_CLIENT_INT_INTA | PCIE_CLIENT_INT_INTB | \
+	PCIE_CLIENT_INT_INTC | PCIE_CLIENT_INT_INTD)
+
+#define PCIE_CLIENT_INT_CLI \
+	(PCIE_CLIENT_INT_CORR_ERR | PCIE_CLIENT_INT_NFATAL_ERR | \
+	PCIE_CLIENT_INT_FATAL_ERR | PCIE_CLIENT_INT_DPA | \
+	PCIE_CLIENT_INT_HOT_RST | PCIE_CLIENT_INT_MSG | \
+	PCIE_CLIENT_INT_LEGACY_DONE | PCIE_CLIENT_INT_LEGACY | \
+	PCIE_CLIENT_INT_PHY)
+
+#define PCIE_CORE_CTRL_MGMT_BASE	0x900000
+#define PCIE_CORE_CTRL			(PCIE_CORE_CTRL_MGMT_BASE + 0x000)
+#define   PCIE_CORE_PL_CONF_SPEED_5G		0x00000008
+#define   PCIE_CORE_PL_CONF_SPEED_MASK		0x00000018
+#define   PCIE_CORE_PL_CONF_LANE_MASK		0x00000006
+#define   PCIE_CORE_PL_CONF_LANE_SHIFT		1
+#define PCIE_CORE_CTRL_PLC1		(PCIE_CORE_CTRL_MGMT_BASE + 0x004)
+#define   PCIE_CORE_CTRL_PLC1_FTS_MASK		GENMASK(23, 8)
+#define   PCIE_CORE_CTRL_PLC1_FTS_SHIFT		8
+#define   PCIE_CORE_CTRL_PLC1_FTS_CNT		0xffff
+#define PCIE_CORE_TXCREDIT_CFG1		(PCIE_CORE_CTRL_MGMT_BASE + 0x020)
+#define   PCIE_CORE_TXCREDIT_CFG1_MUI_MASK	0xFFFF0000
+#define   PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT	16
+#define   PCIE_CORE_TXCREDIT_CFG1_MUI_ENCODE(x) \
+		(((x) >> 3) << PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT)
+#define PCIE_CORE_LANE_MAP             (PCIE_CORE_CTRL_MGMT_BASE + 0x200)
+#define   PCIE_CORE_LANE_MAP_MASK              0x0000000f
+#define   PCIE_CORE_LANE_MAP_REVERSE           BIT(16)
+#define PCIE_CORE_INT_STATUS		(PCIE_CORE_CTRL_MGMT_BASE + 0x20c)
+#define   PCIE_CORE_INT_PRFPE			BIT(0)
+#define   PCIE_CORE_INT_CRFPE			BIT(1)
+#define   PCIE_CORE_INT_RRPE			BIT(2)
+#define   PCIE_CORE_INT_PRFO			BIT(3)
+#define   PCIE_CORE_INT_CRFO			BIT(4)
+#define   PCIE_CORE_INT_RT			BIT(5)
+#define   PCIE_CORE_INT_RTR			BIT(6)
+#define   PCIE_CORE_INT_PE			BIT(7)
+#define   PCIE_CORE_INT_MTR			BIT(8)
+#define   PCIE_CORE_INT_UCR			BIT(9)
+#define   PCIE_CORE_INT_FCE			BIT(10)
+#define   PCIE_CORE_INT_CT			BIT(11)
+#define   PCIE_CORE_INT_UTC			BIT(18)
+#define   PCIE_CORE_INT_MMVC			BIT(19)
+#define PCIE_CORE_CONFIG_VENDOR		(PCIE_CORE_CTRL_MGMT_BASE + 0x44)
+#define PCIE_CORE_INT_MASK		(PCIE_CORE_CTRL_MGMT_BASE + 0x210)
+#define PCIE_CORE_PHY_FUNC_CFG		(PCIE_CORE_CTRL_MGMT_BASE + 0x2c0)
+#define PCIE_RC_BAR_CONF		(PCIE_CORE_CTRL_MGMT_BASE + 0x300)
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_DISABLED		0x0
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_IO_32BITS		0x1
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_MEM_32BITS		0x4
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_PREFETCH_MEM_32BITS	0x5
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_MEM_64BITS		0x6
+#define ROCKCHIP_PCIE_CORE_BAR_CFG_CTRL_PREFETCH_MEM_64BITS	0x7
+
+#define PCIE_CORE_INT \
+		(PCIE_CORE_INT_PRFPE | PCIE_CORE_INT_CRFPE | \
+		 PCIE_CORE_INT_RRPE | PCIE_CORE_INT_CRFO | \
+		 PCIE_CORE_INT_RT | PCIE_CORE_INT_RTR | \
+		 PCIE_CORE_INT_PE | PCIE_CORE_INT_MTR | \
+		 PCIE_CORE_INT_UCR | PCIE_CORE_INT_FCE | \
+		 PCIE_CORE_INT_CT | PCIE_CORE_INT_UTC | \
+		 PCIE_CORE_INT_MMVC)
+
+#define PCIE_RC_RP_ATS_BASE		0x400000
+#define PCIE_RC_CONFIG_NORMAL_BASE	0x800000
+#define PCIE_EP_PF_CONFIG_REGS_BASE	0x800000
+#define PCIE_RC_CONFIG_BASE		0xa00000
+#define PCIE_EP_CONFIG_BASE		0xa00000
+#define PCIE_EP_CONFIG_DID_VID		(PCIE_EP_CONFIG_BASE + 0x00)
+#define PCIE_RC_CONFIG_RID_CCR		(PCIE_RC_CONFIG_BASE + 0x08)
+#define PCIE_RC_CONFIG_DCR		(PCIE_RC_CONFIG_BASE + 0xc4)
+#define   PCIE_RC_CONFIG_DCR_CSPL_SHIFT		18
+#define   PCIE_RC_CONFIG_DCR_CSPL_LIMIT		0xff
+#define   PCIE_RC_CONFIG_DCR_CPLS_SHIFT		26
+#define PCIE_RC_CONFIG_DCSR		(PCIE_RC_CONFIG_BASE + 0xc8)
+#define   PCIE_RC_CONFIG_DCSR_MPS_MASK		GENMASK(7, 5)
+#define   PCIE_RC_CONFIG_DCSR_MPS_256		(0x1 << 5)
+#define PCIE_RC_CONFIG_LINK_CAP		(PCIE_RC_CONFIG_BASE + 0xcc)
+#define   PCIE_RC_CONFIG_LINK_CAP_L0S		BIT(10)
+#define PCIE_RC_CONFIG_LCS		(PCIE_RC_CONFIG_BASE + 0xd0)
+#define PCIE_RC_CONFIG_L1_SUBSTATE_CTRL2 (PCIE_RC_CONFIG_BASE + 0x90c)
+#define PCIE_RC_CONFIG_THP_CAP		(PCIE_RC_CONFIG_BASE + 0x274)
+#define   PCIE_RC_CONFIG_THP_CAP_NEXT_MASK	GENMASK(31, 20)
+
+#define MAX_AXI_IB_ROOTPORT_REGION_NUM		3
+#define MIN_AXI_ADDR_BITS_PASSED		8
+#define PCIE_ADDR_MASK			GENMASK_ULL(63, MIN_AXI_ADDR_BITS_PASSED)
+#define PCIE_CORE_AXI_CONF_BASE		0xc00000
+#define PCIE_CORE_OB_REGION_ADDR0	(PCIE_CORE_AXI_CONF_BASE + 0x0)
+#define   PCIE_CORE_OB_REGION_ADDR0_NUM_BITS	0x3f
+#define   PCIE_CORE_OB_REGION_ADDR0_LO_ADDR	PCIE_ADDR_MASK
+#define PCIE_CORE_OB_REGION_ADDR1	(PCIE_CORE_AXI_CONF_BASE + 0x4)
+#define PCIE_CORE_OB_REGION_DESC0	(PCIE_CORE_AXI_CONF_BASE + 0x8)
+#define PCIE_CORE_OB_REGION_DESC1	(PCIE_CORE_AXI_CONF_BASE + 0xc)
+
+#define PCIE_CORE_AXI_INBOUND_BASE	0xc00800
+#define PCIE_RP_IB_ADDR0		(PCIE_CORE_AXI_INBOUND_BASE + 0x0)
+#define   PCIE_CORE_IB_REGION_ADDR0_NUM_BITS	0x3f
+#define   PCIE_CORE_IB_REGION_ADDR0_LO_ADDR	PCIE_ADDR_MASK
+#define PCIE_RP_IB_ADDR1		(PCIE_CORE_AXI_INBOUND_BASE + 0x4)
+
+/* Size of one AXI Region (not Region 0) */
+#define AXI_REGION_SIZE				BIT(20)
+/* Size of Region 0, equal to sum of sizes of other regions */
+#define AXI_REGION_0_SIZE			(32 * (0x1 << 20))
+#define OB_REG_SIZE_SHIFT			5
+#define IB_ROOT_PORT_REG_SIZE_SHIFT		3
+#define AXI_WRAPPER_IO_WRITE			0x6
+#define AXI_WRAPPER_MEM_WRITE			0x2
+#define AXI_WRAPPER_TYPE0_CFG			0xa
+#define AXI_WRAPPER_TYPE1_CFG			0xb
+#define AXI_WRAPPER_NOR_MSG			0xc
+
+#define PCIE_RC_SEND_PME_OFF			0x11960
+#define ROCKCHIP_VENDOR_ID			0x1d87
+#define PCIE_LINK_IS_L2(x) \
+	(((x) & PCIE_CLIENT_DEBUG_LTSSM_MASK) == PCIE_CLIENT_DEBUG_LTSSM_L2)
+#define PCIE_LINK_UP(x) \
+	(((x) & PCIE_CLIENT_LINK_STATUS_MASK) == PCIE_CLIENT_LINK_STATUS_UP)
+#define PCIE_LINK_IS_GEN2(x) \
+	(((x) & PCIE_CORE_PL_CONF_SPEED_MASK) == PCIE_CORE_PL_CONF_SPEED_5G)
+
+#define RC_REGION_0_ADDR_TRANS_H		0x00000000
+#define RC_REGION_0_ADDR_TRANS_L		0x00000000
+#define RC_REGION_0_PASS_BITS			(25 - 1)
+#define RC_REGION_0_TYPE_MASK			GENMASK(3, 0)
+#define MAX_AXI_WRAPPER_REGION_NUM		33
+
+#define ROCKCHIP_PCIE_MSG_ROUTING_TO_RC		0x0
+#define ROCKCHIP_PCIE_MSG_ROUTING_VIA_ADDR		0x1
+#define ROCKCHIP_PCIE_MSG_ROUTING_VIA_ID		0x2
+#define ROCKCHIP_PCIE_MSG_ROUTING_BROADCAST		0x3
+#define ROCKCHIP_PCIE_MSG_ROUTING_LOCAL_INTX		0x4
+#define ROCKCHIP_PCIE_MSG_ROUTING_PME_ACK		0x5
+#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA		0x20
+#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTB		0x21
+#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTC		0x22
+#define ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTD		0x23
+#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTA		0x24
+#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTB		0x25
+#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTC		0x26
+#define ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTD		0x27
+#define ROCKCHIP_PCIE_MSG_ROUTING_MASK			GENMASK(7, 5)
+#define ROCKCHIP_PCIE_MSG_ROUTING(route) \
+	(((route) << 5) & ROCKCHIP_PCIE_MSG_ROUTING_MASK)
+#define ROCKCHIP_PCIE_MSG_CODE_MASK			GENMASK(15, 8)
+#define ROCKCHIP_PCIE_MSG_CODE(code) \
+	(((code) << 8) & ROCKCHIP_PCIE_MSG_CODE_MASK)
+#define ROCKCHIP_PCIE_MSG_NO_DATA			BIT(16)
+
+#define ROCKCHIP_PCIE_EP_CMD_STATUS			0x4
+#define   ROCKCHIP_PCIE_EP_CMD_STATUS_IS		BIT(19)
+#define ROCKCHIP_PCIE_EP_MSI_CTRL_REG			0x90
+#define   ROCKCHIP_PCIE_EP_MSI_CP1_OFFSET		8
+#define   ROCKCHIP_PCIE_EP_MSI_CP1_MASK			GENMASK(15, 8)
+#define   ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET		16
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET		17
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK		GENMASK(19, 17)
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET		20
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_MME_MASK		GENMASK(22, 20)
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_ME				BIT(16)
+#define   ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP	BIT(24)
+#define ROCKCHIP_PCIE_EP_MSIX_CAP_REG			0xb0
+#define   ROCKCHIP_PCIE_EP_MSIX_CAP_CP_OFFSET		8
+#define   ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK		GENMASK(15, 8)
+#define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR				0x1
+#define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR		0x3
+#define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) \
+	(PCIE_EP_PF_CONFIG_REGS_BASE + (((fn) << 12) & GENMASK(19, 12)))
+#define ROCKCHIP_PCIE_EP_VIRT_FUNC_BASE(fn) \
+	(PCIE_EP_PF_CONFIG_REGS_BASE + 0x10000 + (((fn) << 12) & GENMASK(19, 12)))
+#define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \
+	(PCIE_CORE_AXI_CONF_BASE + 0x0828 + (fn) * 0x0040 + (bar) * 0x0008)
+#define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar) \
+	(PCIE_CORE_AXI_CONF_BASE + 0x082c + (fn) * 0x0040 + (bar) * 0x0008)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN_MASK	GENMASK(19, 12)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN(devfn) \
+	(((devfn) << 12) & \
+		 ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN_MASK)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK	GENMASK(27, 20)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS(bus) \
+		(((bus) << 20) & ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK)
+#define PCIE_RC_EP_ATR_OB_REGIONS_1_32 (PCIE_CORE_AXI_CONF_BASE + 0x0020)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r) \
+		(PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0000 + ((r) & 0x1f) * 0x0020)
+#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r) \
+		(PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0004 + ((r) & 0x1f) * 0x0020)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID	BIT(23)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK	GENMASK(31, 24)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN(devfn) \
+		(((devfn) << 24) & ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r) \
+		(PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0008 + ((r) & 0x1f) * 0x0020)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r) \
+		(PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x000c + ((r) & 0x1f) * 0x0020)
+#define ROCKCHIP_PCIE_AT_OB_REGION_DESC2(r) \
+		(PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0010 + ((r) & 0x1f) * 0x0020)
+
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG0(fn) \
+		(PCIE_CORE_CTRL_MGMT_BASE + 0x0240 + (fn) * 0x0008)
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG1(fn) \
+		(PCIE_CORE_CTRL_MGMT_BASE + 0x0244 + (fn) * 0x0008)
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) \
+		(GENMASK(4, 0) << ((b) * 8))
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE(b, a) \
+		(((a) << ((b) * 8)) & \
+		 ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b))
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b) \
+		(GENMASK(7, 5) << ((b) * 8))
+#define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL(b, c) \
+		(((c) << ((b) * 8 + 5)) & \
+		 ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b))
+
+struct rockchip_pcie {
+	void	__iomem *reg_base;		/* DT axi-base */
+	void	__iomem *apb_base;		/* DT apb-base */
+	bool    legacy_phy;
+	struct  phy *phys[MAX_LANE_NUM];
+	struct	reset_control *core_rst;
+	struct	reset_control *mgmt_rst;
+	struct	reset_control *mgmt_sticky_rst;
+	struct	reset_control *pipe_rst;
+	struct	reset_control *pm_rst;
+	struct	reset_control *aclk_rst;
+	struct	reset_control *pclk_rst;
+	struct	clk *aclk_pcie;
+	struct	clk *aclk_perf_pcie;
+	struct	clk *hclk_pcie;
+	struct	clk *clk_pcie_pm;
+	struct	regulator *vpcie12v; /* 12V power supply */
+	struct	regulator *vpcie3v3; /* 3.3V power supply */
+	struct	regulator *vpcie1v8; /* 1.8V power supply */
+	struct	regulator *vpcie0v9; /* 0.9V power supply */
+	struct	gpio_desc *ep_gpio;
+	u32	lanes;
+	u8      lanes_map;
+	int	link_gen;
+	struct	device *dev;
+	struct	irq_domain *irq_domain;
+	int     offset;
+	void    __iomem *msg_region;
+	phys_addr_t msg_bus_addr;
+	bool is_rc;
+	struct resource *mem_res;
+};
+
+static u32 rockchip_pcie_read(struct rockchip_pcie *rockchip, u32 reg)
+{
+	return readl(rockchip->apb_base + reg);
+}
+
+static void rockchip_pcie_write(struct rockchip_pcie *rockchip, u32 val,
+				u32 reg)
+{
+	writel(val, rockchip->apb_base + reg);
+}
+
+int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip);
+int rockchip_pcie_init_port(struct rockchip_pcie *rockchip);
+int rockchip_pcie_get_phys(struct rockchip_pcie *rockchip);
+void rockchip_pcie_deinit_phys(struct rockchip_pcie *rockchip);
+int rockchip_pcie_enable_clocks(struct rockchip_pcie *rockchip);
+void rockchip_pcie_disable_clocks(void *data);
+void rockchip_pcie_cfg_configuration_accesses(
+		struct rockchip_pcie *rockchip, u32 type);
+
+#endif /* _PCIE_ROCKCHIP_H */
diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c
new file mode 100644
index 000000000..4a787a941
--- /dev/null
+++ b/drivers/pci/controller/pcie-xilinx-cpm.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Xilinx Versal CPM DMA Bridge
+ *
+ * (C) Copyright 2019 - 2020, Xilinx, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pci-ecam.h>
+
+#include "../pci.h"
+
+/* Register definitions */
+#define XILINX_CPM_PCIE_REG_IDR		0x00000E10
+#define XILINX_CPM_PCIE_REG_IMR		0x00000E14
+#define XILINX_CPM_PCIE_REG_PSCR	0x00000E1C
+#define XILINX_CPM_PCIE_REG_RPSC	0x00000E20
+#define XILINX_CPM_PCIE_REG_RPEFR	0x00000E2C
+#define XILINX_CPM_PCIE_REG_IDRN	0x00000E38
+#define XILINX_CPM_PCIE_REG_IDRN_MASK	0x00000E3C
+#define XILINX_CPM_PCIE_MISC_IR_STATUS	0x00000340
+#define XILINX_CPM_PCIE_MISC_IR_ENABLE	0x00000348
+#define XILINX_CPM_PCIE_MISC_IR_LOCAL	BIT(1)
+
+#define XILINX_CPM_PCIE_IR_STATUS       0x000002A0
+#define XILINX_CPM_PCIE_IR_ENABLE       0x000002A8
+#define XILINX_CPM_PCIE_IR_LOCAL        BIT(0)
+
+/* Interrupt registers definitions */
+#define XILINX_CPM_PCIE_INTR_LINK_DOWN		0
+#define XILINX_CPM_PCIE_INTR_HOT_RESET		3
+#define XILINX_CPM_PCIE_INTR_CFG_PCIE_TIMEOUT	4
+#define XILINX_CPM_PCIE_INTR_CFG_TIMEOUT	8
+#define XILINX_CPM_PCIE_INTR_CORRECTABLE	9
+#define XILINX_CPM_PCIE_INTR_NONFATAL		10
+#define XILINX_CPM_PCIE_INTR_FATAL		11
+#define XILINX_CPM_PCIE_INTR_CFG_ERR_POISON	12
+#define XILINX_CPM_PCIE_INTR_PME_TO_ACK_RCVD	15
+#define XILINX_CPM_PCIE_INTR_INTX		16
+#define XILINX_CPM_PCIE_INTR_PM_PME_RCVD	17
+#define XILINX_CPM_PCIE_INTR_SLV_UNSUPP		20
+#define XILINX_CPM_PCIE_INTR_SLV_UNEXP		21
+#define XILINX_CPM_PCIE_INTR_SLV_COMPL		22
+#define XILINX_CPM_PCIE_INTR_SLV_ERRP		23
+#define XILINX_CPM_PCIE_INTR_SLV_CMPABT		24
+#define XILINX_CPM_PCIE_INTR_SLV_ILLBUR		25
+#define XILINX_CPM_PCIE_INTR_MST_DECERR		26
+#define XILINX_CPM_PCIE_INTR_MST_SLVERR		27
+#define XILINX_CPM_PCIE_INTR_SLV_PCIE_TIMEOUT	28
+
+#define IMR(x) BIT(XILINX_CPM_PCIE_INTR_ ##x)
+
+#define XILINX_CPM_PCIE_IMR_ALL_MASK			\
+	(						\
+		IMR(LINK_DOWN)		|		\
+		IMR(HOT_RESET)		|		\
+		IMR(CFG_PCIE_TIMEOUT)	|		\
+		IMR(CFG_TIMEOUT)	|		\
+		IMR(CORRECTABLE)	|		\
+		IMR(NONFATAL)		|		\
+		IMR(FATAL)		|		\
+		IMR(CFG_ERR_POISON)	|		\
+		IMR(PME_TO_ACK_RCVD)	|		\
+		IMR(INTX)		|		\
+		IMR(PM_PME_RCVD)	|		\
+		IMR(SLV_UNSUPP)		|		\
+		IMR(SLV_UNEXP)		|		\
+		IMR(SLV_COMPL)		|		\
+		IMR(SLV_ERRP)		|		\
+		IMR(SLV_CMPABT)		|		\
+		IMR(SLV_ILLBUR)		|		\
+		IMR(MST_DECERR)		|		\
+		IMR(MST_SLVERR)		|		\
+		IMR(SLV_PCIE_TIMEOUT)			\
+	)
+
+#define XILINX_CPM_PCIE_IDR_ALL_MASK		0xFFFFFFFF
+#define XILINX_CPM_PCIE_IDRN_MASK		GENMASK(19, 16)
+#define XILINX_CPM_PCIE_IDRN_SHIFT		16
+
+/* Root Port Error FIFO Read Register definitions */
+#define XILINX_CPM_PCIE_RPEFR_ERR_VALID		BIT(18)
+#define XILINX_CPM_PCIE_RPEFR_REQ_ID		GENMASK(15, 0)
+#define XILINX_CPM_PCIE_RPEFR_ALL_MASK		0xFFFFFFFF
+
+/* Root Port Status/control Register definitions */
+#define XILINX_CPM_PCIE_REG_RPSC_BEN		BIT(0)
+
+/* Phy Status/Control Register definitions */
+#define XILINX_CPM_PCIE_REG_PSCR_LNKUP		BIT(11)
+
+enum xilinx_cpm_version {
+	CPM,
+	CPM5,
+};
+
+/**
+ * struct xilinx_cpm_variant - CPM variant information
+ * @version: CPM version
+ */
+struct xilinx_cpm_variant {
+	enum xilinx_cpm_version version;
+};
+
+/**
+ * struct xilinx_cpm_pcie - PCIe port information
+ * @dev: Device pointer
+ * @reg_base: Bridge Register Base
+ * @cpm_base: CPM System Level Control and Status Register(SLCR) Base
+ * @intx_domain: Legacy IRQ domain pointer
+ * @cpm_domain: CPM IRQ domain pointer
+ * @cfg: Holds mappings of config space window
+ * @intx_irq: legacy interrupt number
+ * @irq: Error interrupt number
+ * @lock: lock protecting shared register access
+ * @variant: CPM version check pointer
+ */
+struct xilinx_cpm_pcie {
+	struct device			*dev;
+	void __iomem			*reg_base;
+	void __iomem			*cpm_base;
+	struct irq_domain		*intx_domain;
+	struct irq_domain		*cpm_domain;
+	struct pci_config_window	*cfg;
+	int				intx_irq;
+	int				irq;
+	raw_spinlock_t			lock;
+	const struct xilinx_cpm_variant   *variant;
+};
+
+static u32 pcie_read(struct xilinx_cpm_pcie *port, u32 reg)
+{
+	return readl_relaxed(port->reg_base + reg);
+}
+
+static void pcie_write(struct xilinx_cpm_pcie *port,
+		       u32 val, u32 reg)
+{
+	writel_relaxed(val, port->reg_base + reg);
+}
+
+static bool cpm_pcie_link_up(struct xilinx_cpm_pcie *port)
+{
+	return (pcie_read(port, XILINX_CPM_PCIE_REG_PSCR) &
+		XILINX_CPM_PCIE_REG_PSCR_LNKUP);
+}
+
+static void cpm_pcie_clear_err_interrupts(struct xilinx_cpm_pcie *port)
+{
+	unsigned long val = pcie_read(port, XILINX_CPM_PCIE_REG_RPEFR);
+
+	if (val & XILINX_CPM_PCIE_RPEFR_ERR_VALID) {
+		dev_dbg(port->dev, "Requester ID %lu\n",
+			val & XILINX_CPM_PCIE_RPEFR_REQ_ID);
+		pcie_write(port, XILINX_CPM_PCIE_RPEFR_ALL_MASK,
+			   XILINX_CPM_PCIE_REG_RPEFR);
+	}
+}
+
+static void xilinx_cpm_mask_leg_irq(struct irq_data *data)
+{
+	struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 mask;
+	u32 val;
+
+	mask = BIT(data->hwirq + XILINX_CPM_PCIE_IDRN_SHIFT);
+	raw_spin_lock_irqsave(&port->lock, flags);
+	val = pcie_read(port, XILINX_CPM_PCIE_REG_IDRN_MASK);
+	pcie_write(port, (val & (~mask)), XILINX_CPM_PCIE_REG_IDRN_MASK);
+	raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void xilinx_cpm_unmask_leg_irq(struct irq_data *data)
+{
+	struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 mask;
+	u32 val;
+
+	mask = BIT(data->hwirq + XILINX_CPM_PCIE_IDRN_SHIFT);
+	raw_spin_lock_irqsave(&port->lock, flags);
+	val = pcie_read(port, XILINX_CPM_PCIE_REG_IDRN_MASK);
+	pcie_write(port, (val | mask), XILINX_CPM_PCIE_REG_IDRN_MASK);
+	raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static struct irq_chip xilinx_cpm_leg_irq_chip = {
+	.name		= "INTx",
+	.irq_mask	= xilinx_cpm_mask_leg_irq,
+	.irq_unmask	= xilinx_cpm_unmask_leg_irq,
+};
+
+/**
+ * xilinx_cpm_pcie_intx_map - Set the handler for the INTx and mark IRQ as valid
+ * @domain: IRQ domain
+ * @irq: Virtual IRQ number
+ * @hwirq: HW interrupt number
+ *
+ * Return: Always returns 0.
+ */
+static int xilinx_cpm_pcie_intx_map(struct irq_domain *domain,
+				    unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &xilinx_cpm_leg_irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+	irq_set_status_flags(irq, IRQ_LEVEL);
+
+	return 0;
+}
+
+/* INTx IRQ Domain operations */
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = xilinx_cpm_pcie_intx_map,
+};
+
+static void xilinx_cpm_pcie_intx_flow(struct irq_desc *desc)
+{
+	struct xilinx_cpm_pcie *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long val;
+	int i;
+
+	chained_irq_enter(chip, desc);
+
+	val = FIELD_GET(XILINX_CPM_PCIE_IDRN_MASK,
+			pcie_read(port, XILINX_CPM_PCIE_REG_IDRN));
+
+	for_each_set_bit(i, &val, PCI_NUM_INTX)
+		generic_handle_domain_irq(port->intx_domain, i);
+
+	chained_irq_exit(chip, desc);
+}
+
+static void xilinx_cpm_mask_event_irq(struct irq_data *d)
+{
+	struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(d);
+	u32 val;
+
+	raw_spin_lock(&port->lock);
+	val = pcie_read(port, XILINX_CPM_PCIE_REG_IMR);
+	val &= ~BIT(d->hwirq);
+	pcie_write(port, val, XILINX_CPM_PCIE_REG_IMR);
+	raw_spin_unlock(&port->lock);
+}
+
+static void xilinx_cpm_unmask_event_irq(struct irq_data *d)
+{
+	struct xilinx_cpm_pcie *port = irq_data_get_irq_chip_data(d);
+	u32 val;
+
+	raw_spin_lock(&port->lock);
+	val = pcie_read(port, XILINX_CPM_PCIE_REG_IMR);
+	val |= BIT(d->hwirq);
+	pcie_write(port, val, XILINX_CPM_PCIE_REG_IMR);
+	raw_spin_unlock(&port->lock);
+}
+
+static struct irq_chip xilinx_cpm_event_irq_chip = {
+	.name		= "RC-Event",
+	.irq_mask	= xilinx_cpm_mask_event_irq,
+	.irq_unmask	= xilinx_cpm_unmask_event_irq,
+};
+
+static int xilinx_cpm_pcie_event_map(struct irq_domain *domain,
+				     unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &xilinx_cpm_event_irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+	irq_set_status_flags(irq, IRQ_LEVEL);
+	return 0;
+}
+
+static const struct irq_domain_ops event_domain_ops = {
+	.map = xilinx_cpm_pcie_event_map,
+};
+
+static void xilinx_cpm_pcie_event_flow(struct irq_desc *desc)
+{
+	struct xilinx_cpm_pcie *port = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long val;
+	int i;
+
+	chained_irq_enter(chip, desc);
+	val =  pcie_read(port, XILINX_CPM_PCIE_REG_IDR);
+	val &= pcie_read(port, XILINX_CPM_PCIE_REG_IMR);
+	for_each_set_bit(i, &val, 32)
+		generic_handle_domain_irq(port->cpm_domain, i);
+	pcie_write(port, val, XILINX_CPM_PCIE_REG_IDR);
+
+	if (port->variant->version == CPM5) {
+		val = readl_relaxed(port->cpm_base + XILINX_CPM_PCIE_IR_STATUS);
+		if (val)
+			writel_relaxed(val, port->cpm_base +
+					    XILINX_CPM_PCIE_IR_STATUS);
+	}
+
+	/*
+	 * XILINX_CPM_PCIE_MISC_IR_STATUS register is mapped to
+	 * CPM SLCR block.
+	 */
+	val = readl_relaxed(port->cpm_base + XILINX_CPM_PCIE_MISC_IR_STATUS);
+	if (val)
+		writel_relaxed(val,
+			       port->cpm_base + XILINX_CPM_PCIE_MISC_IR_STATUS);
+
+	chained_irq_exit(chip, desc);
+}
+
+#define _IC(x, s)                              \
+	[XILINX_CPM_PCIE_INTR_ ## x] = { __stringify(x), s }
+
+static const struct {
+	const char      *sym;
+	const char      *str;
+} intr_cause[32] = {
+	_IC(LINK_DOWN,		"Link Down"),
+	_IC(HOT_RESET,		"Hot reset"),
+	_IC(CFG_TIMEOUT,	"ECAM access timeout"),
+	_IC(CORRECTABLE,	"Correctable error message"),
+	_IC(NONFATAL,		"Non fatal error message"),
+	_IC(FATAL,		"Fatal error message"),
+	_IC(SLV_UNSUPP,		"Slave unsupported request"),
+	_IC(SLV_UNEXP,		"Slave unexpected completion"),
+	_IC(SLV_COMPL,		"Slave completion timeout"),
+	_IC(SLV_ERRP,		"Slave Error Poison"),
+	_IC(SLV_CMPABT,		"Slave Completer Abort"),
+	_IC(SLV_ILLBUR,		"Slave Illegal Burst"),
+	_IC(MST_DECERR,		"Master decode error"),
+	_IC(MST_SLVERR,		"Master slave error"),
+	_IC(CFG_PCIE_TIMEOUT,	"PCIe ECAM access timeout"),
+	_IC(CFG_ERR_POISON,	"ECAM poisoned completion received"),
+	_IC(PME_TO_ACK_RCVD,	"PME_TO_ACK message received"),
+	_IC(PM_PME_RCVD,	"PM_PME message received"),
+	_IC(SLV_PCIE_TIMEOUT,	"PCIe completion timeout received"),
+};
+
+static irqreturn_t xilinx_cpm_pcie_intr_handler(int irq, void *dev_id)
+{
+	struct xilinx_cpm_pcie *port = dev_id;
+	struct device *dev = port->dev;
+	struct irq_data *d;
+
+	d = irq_domain_get_irq_data(port->cpm_domain, irq);
+
+	switch (d->hwirq) {
+	case XILINX_CPM_PCIE_INTR_CORRECTABLE:
+	case XILINX_CPM_PCIE_INTR_NONFATAL:
+	case XILINX_CPM_PCIE_INTR_FATAL:
+		cpm_pcie_clear_err_interrupts(port);
+		fallthrough;
+
+	default:
+		if (intr_cause[d->hwirq].str)
+			dev_warn(dev, "%s\n", intr_cause[d->hwirq].str);
+		else
+			dev_warn(dev, "Unknown IRQ %ld\n", d->hwirq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void xilinx_cpm_free_irq_domains(struct xilinx_cpm_pcie *port)
+{
+	if (port->intx_domain) {
+		irq_domain_remove(port->intx_domain);
+		port->intx_domain = NULL;
+	}
+
+	if (port->cpm_domain) {
+		irq_domain_remove(port->cpm_domain);
+		port->cpm_domain = NULL;
+	}
+}
+
+/**
+ * xilinx_cpm_pcie_init_irq_domain - Initialize IRQ domain
+ * @port: PCIe port information
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie *port)
+{
+	struct device *dev = port->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node;
+
+	/* Setup INTx */
+	pcie_intc_node = of_get_next_child(node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return -EINVAL;
+	}
+
+	port->cpm_domain = irq_domain_add_linear(pcie_intc_node, 32,
+						 &event_domain_ops,
+						 port);
+	if (!port->cpm_domain)
+		goto out;
+
+	irq_domain_update_bus_token(port->cpm_domain, DOMAIN_BUS_NEXUS);
+
+	port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						  &intx_domain_ops,
+						  port);
+	if (!port->intx_domain)
+		goto out;
+
+	irq_domain_update_bus_token(port->intx_domain, DOMAIN_BUS_WIRED);
+
+	of_node_put(pcie_intc_node);
+	raw_spin_lock_init(&port->lock);
+
+	return 0;
+out:
+	xilinx_cpm_free_irq_domains(port);
+	of_node_put(pcie_intc_node);
+	dev_err(dev, "Failed to allocate IRQ domains\n");
+
+	return -ENOMEM;
+}
+
+static int xilinx_cpm_setup_irq(struct xilinx_cpm_pcie *port)
+{
+	struct device *dev = port->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int i, irq;
+
+	port->irq = platform_get_irq(pdev, 0);
+	if (port->irq < 0)
+		return port->irq;
+
+	for (i = 0; i < ARRAY_SIZE(intr_cause); i++) {
+		int err;
+
+		if (!intr_cause[i].str)
+			continue;
+
+		irq = irq_create_mapping(port->cpm_domain, i);
+		if (!irq) {
+			dev_err(dev, "Failed to map interrupt\n");
+			return -ENXIO;
+		}
+
+		err = devm_request_irq(dev, irq, xilinx_cpm_pcie_intr_handler,
+				       0, intr_cause[i].sym, port);
+		if (err) {
+			dev_err(dev, "Failed to request IRQ %d\n", irq);
+			return err;
+		}
+	}
+
+	port->intx_irq = irq_create_mapping(port->cpm_domain,
+					    XILINX_CPM_PCIE_INTR_INTX);
+	if (!port->intx_irq) {
+		dev_err(dev, "Failed to map INTx interrupt\n");
+		return -ENXIO;
+	}
+
+	/* Plug the INTx chained handler */
+	irq_set_chained_handler_and_data(port->intx_irq,
+					 xilinx_cpm_pcie_intx_flow, port);
+
+	/* Plug the main event chained handler */
+	irq_set_chained_handler_and_data(port->irq,
+					 xilinx_cpm_pcie_event_flow, port);
+
+	return 0;
+}
+
+/**
+ * xilinx_cpm_pcie_init_port - Initialize hardware
+ * @port: PCIe port information
+ */
+static void xilinx_cpm_pcie_init_port(struct xilinx_cpm_pcie *port)
+{
+	if (cpm_pcie_link_up(port))
+		dev_info(port->dev, "PCIe Link is UP\n");
+	else
+		dev_info(port->dev, "PCIe Link is DOWN\n");
+
+	/* Disable all interrupts */
+	pcie_write(port, ~XILINX_CPM_PCIE_IDR_ALL_MASK,
+		   XILINX_CPM_PCIE_REG_IMR);
+
+	/* Clear pending interrupts */
+	pcie_write(port, pcie_read(port, XILINX_CPM_PCIE_REG_IDR) &
+		   XILINX_CPM_PCIE_IMR_ALL_MASK,
+		   XILINX_CPM_PCIE_REG_IDR);
+
+	/*
+	 * XILINX_CPM_PCIE_MISC_IR_ENABLE register is mapped to
+	 * CPM SLCR block.
+	 */
+	writel(XILINX_CPM_PCIE_MISC_IR_LOCAL,
+	       port->cpm_base + XILINX_CPM_PCIE_MISC_IR_ENABLE);
+
+	if (port->variant->version == CPM5) {
+		writel(XILINX_CPM_PCIE_IR_LOCAL,
+		       port->cpm_base + XILINX_CPM_PCIE_IR_ENABLE);
+	}
+
+	/* Enable the Bridge enable bit */
+	pcie_write(port, pcie_read(port, XILINX_CPM_PCIE_REG_RPSC) |
+		   XILINX_CPM_PCIE_REG_RPSC_BEN,
+		   XILINX_CPM_PCIE_REG_RPSC);
+}
+
+/**
+ * xilinx_cpm_pcie_parse_dt - Parse Device tree
+ * @port: PCIe port information
+ * @bus_range: Bus resource
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_cpm_pcie_parse_dt(struct xilinx_cpm_pcie *port,
+				    struct resource *bus_range)
+{
+	struct device *dev = port->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+
+	port->cpm_base = devm_platform_ioremap_resource_byname(pdev,
+							       "cpm_slcr");
+	if (IS_ERR(port->cpm_base))
+		return PTR_ERR(port->cpm_base);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
+	if (!res)
+		return -ENXIO;
+
+	port->cfg = pci_ecam_create(dev, res, bus_range,
+				    &pci_generic_ecam_ops);
+	if (IS_ERR(port->cfg))
+		return PTR_ERR(port->cfg);
+
+	if (port->variant->version == CPM5) {
+		port->reg_base = devm_platform_ioremap_resource_byname(pdev,
+								    "cpm_csr");
+		if (IS_ERR(port->reg_base))
+			return PTR_ERR(port->reg_base);
+	} else {
+		port->reg_base = port->cfg->win;
+	}
+
+	return 0;
+}
+
+static void xilinx_cpm_free_interrupts(struct xilinx_cpm_pcie *port)
+{
+	irq_set_chained_handler_and_data(port->intx_irq, NULL, NULL);
+	irq_set_chained_handler_and_data(port->irq, NULL, NULL);
+}
+
+/**
+ * xilinx_cpm_pcie_probe - Probe function
+ * @pdev: Platform device pointer
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_cpm_pcie_probe(struct platform_device *pdev)
+{
+	struct xilinx_cpm_pcie *port;
+	struct device *dev = &pdev->dev;
+	struct pci_host_bridge *bridge;
+	struct resource_entry *bus;
+	int err;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*port));
+	if (!bridge)
+		return -ENODEV;
+
+	port = pci_host_bridge_priv(bridge);
+
+	port->dev = dev;
+
+	err = xilinx_cpm_pcie_init_irq_domain(port);
+	if (err)
+		return err;
+
+	bus = resource_list_first_type(&bridge->windows, IORESOURCE_BUS);
+	if (!bus)
+		return -ENODEV;
+
+	port->variant = of_device_get_match_data(dev);
+
+	err = xilinx_cpm_pcie_parse_dt(port, bus->res);
+	if (err) {
+		dev_err(dev, "Parsing DT failed\n");
+		goto err_parse_dt;
+	}
+
+	xilinx_cpm_pcie_init_port(port);
+
+	err = xilinx_cpm_setup_irq(port);
+	if (err) {
+		dev_err(dev, "Failed to set up interrupts\n");
+		goto err_setup_irq;
+	}
+
+	bridge->sysdata = port->cfg;
+	bridge->ops = (struct pci_ops *)&pci_generic_ecam_ops.pci_ops;
+
+	err = pci_host_probe(bridge);
+	if (err < 0)
+		goto err_host_bridge;
+
+	return 0;
+
+err_host_bridge:
+	xilinx_cpm_free_interrupts(port);
+err_setup_irq:
+	pci_ecam_free(port->cfg);
+err_parse_dt:
+	xilinx_cpm_free_irq_domains(port);
+	return err;
+}
+
+static const struct xilinx_cpm_variant cpm_host = {
+	.version = CPM,
+};
+
+static const struct xilinx_cpm_variant cpm5_host = {
+	.version = CPM5,
+};
+
+static const struct of_device_id xilinx_cpm_pcie_of_match[] = {
+	{
+		.compatible = "xlnx,versal-cpm-host-1.00",
+		.data = &cpm_host,
+	},
+	{
+		.compatible = "xlnx,versal-cpm5-host",
+		.data = &cpm5_host,
+	},
+	{}
+};
+
+static struct platform_driver xilinx_cpm_pcie_driver = {
+	.driver = {
+		.name = "xilinx-cpm-pcie",
+		.of_match_table = xilinx_cpm_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = xilinx_cpm_pcie_probe,
+};
+
+builtin_platform_driver(xilinx_cpm_pcie_driver);
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
new file mode 100644
index 000000000..176686bdb
--- /dev/null
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for NWL PCIe Bridge
+ * Based on pcie-xilinx.c, pci-tegra.c
+ *
+ * (C) Copyright 2014 - 2015, Xilinx, Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+#include <linux/irqchip/chained_irq.h>
+
+#include "../pci.h"
+
+/* Bridge core config registers */
+#define BRCFG_PCIE_RX0			0x00000000
+#define BRCFG_PCIE_RX1			0x00000004
+#define BRCFG_INTERRUPT			0x00000010
+#define BRCFG_PCIE_RX_MSG_FILTER	0x00000020
+
+/* Egress - Bridge translation registers */
+#define E_BREG_CAPABILITIES		0x00000200
+#define E_BREG_CONTROL			0x00000208
+#define E_BREG_BASE_LO			0x00000210
+#define E_BREG_BASE_HI			0x00000214
+#define E_ECAM_CAPABILITIES		0x00000220
+#define E_ECAM_CONTROL			0x00000228
+#define E_ECAM_BASE_LO			0x00000230
+#define E_ECAM_BASE_HI			0x00000234
+
+/* Ingress - address translations */
+#define I_MSII_CAPABILITIES		0x00000300
+#define I_MSII_CONTROL			0x00000308
+#define I_MSII_BASE_LO			0x00000310
+#define I_MSII_BASE_HI			0x00000314
+
+#define I_ISUB_CONTROL			0x000003E8
+#define SET_ISUB_CONTROL		BIT(0)
+/* Rxed msg fifo  - Interrupt status registers */
+#define MSGF_MISC_STATUS		0x00000400
+#define MSGF_MISC_MASK			0x00000404
+#define MSGF_LEG_STATUS			0x00000420
+#define MSGF_LEG_MASK			0x00000424
+#define MSGF_MSI_STATUS_LO		0x00000440
+#define MSGF_MSI_STATUS_HI		0x00000444
+#define MSGF_MSI_MASK_LO		0x00000448
+#define MSGF_MSI_MASK_HI		0x0000044C
+
+/* Msg filter mask bits */
+#define CFG_ENABLE_PM_MSG_FWD		BIT(1)
+#define CFG_ENABLE_INT_MSG_FWD		BIT(2)
+#define CFG_ENABLE_ERR_MSG_FWD		BIT(3)
+#define CFG_ENABLE_MSG_FILTER_MASK	(CFG_ENABLE_PM_MSG_FWD | \
+					CFG_ENABLE_INT_MSG_FWD | \
+					CFG_ENABLE_ERR_MSG_FWD)
+
+/* Misc interrupt status mask bits */
+#define MSGF_MISC_SR_RXMSG_AVAIL	BIT(0)
+#define MSGF_MISC_SR_RXMSG_OVER		BIT(1)
+#define MSGF_MISC_SR_SLAVE_ERR		BIT(4)
+#define MSGF_MISC_SR_MASTER_ERR		BIT(5)
+#define MSGF_MISC_SR_I_ADDR_ERR		BIT(6)
+#define MSGF_MISC_SR_E_ADDR_ERR		BIT(7)
+#define MSGF_MISC_SR_FATAL_AER		BIT(16)
+#define MSGF_MISC_SR_NON_FATAL_AER	BIT(17)
+#define MSGF_MISC_SR_CORR_AER		BIT(18)
+#define MSGF_MISC_SR_UR_DETECT		BIT(20)
+#define MSGF_MISC_SR_NON_FATAL_DEV	BIT(22)
+#define MSGF_MISC_SR_FATAL_DEV		BIT(23)
+#define MSGF_MISC_SR_LINK_DOWN		BIT(24)
+#define MSGF_MSIC_SR_LINK_AUTO_BWIDTH	BIT(25)
+#define MSGF_MSIC_SR_LINK_BWIDTH	BIT(26)
+
+#define MSGF_MISC_SR_MASKALL		(MSGF_MISC_SR_RXMSG_AVAIL | \
+					MSGF_MISC_SR_RXMSG_OVER | \
+					MSGF_MISC_SR_SLAVE_ERR | \
+					MSGF_MISC_SR_MASTER_ERR | \
+					MSGF_MISC_SR_I_ADDR_ERR | \
+					MSGF_MISC_SR_E_ADDR_ERR | \
+					MSGF_MISC_SR_FATAL_AER | \
+					MSGF_MISC_SR_NON_FATAL_AER | \
+					MSGF_MISC_SR_CORR_AER | \
+					MSGF_MISC_SR_UR_DETECT | \
+					MSGF_MISC_SR_NON_FATAL_DEV | \
+					MSGF_MISC_SR_FATAL_DEV | \
+					MSGF_MISC_SR_LINK_DOWN | \
+					MSGF_MSIC_SR_LINK_AUTO_BWIDTH | \
+					MSGF_MSIC_SR_LINK_BWIDTH)
+
+/* Legacy interrupt status mask bits */
+#define MSGF_LEG_SR_INTA		BIT(0)
+#define MSGF_LEG_SR_INTB		BIT(1)
+#define MSGF_LEG_SR_INTC		BIT(2)
+#define MSGF_LEG_SR_INTD		BIT(3)
+#define MSGF_LEG_SR_MASKALL		(MSGF_LEG_SR_INTA | MSGF_LEG_SR_INTB | \
+					MSGF_LEG_SR_INTC | MSGF_LEG_SR_INTD)
+
+/* MSI interrupt status mask bits */
+#define MSGF_MSI_SR_LO_MASK		GENMASK(31, 0)
+#define MSGF_MSI_SR_HI_MASK		GENMASK(31, 0)
+
+#define MSII_PRESENT			BIT(0)
+#define MSII_ENABLE			BIT(0)
+#define MSII_STATUS_ENABLE		BIT(15)
+
+/* Bridge config interrupt mask */
+#define BRCFG_INTERRUPT_MASK		BIT(0)
+#define BREG_PRESENT			BIT(0)
+#define BREG_ENABLE			BIT(0)
+#define BREG_ENABLE_FORCE		BIT(1)
+
+/* E_ECAM status mask bits */
+#define E_ECAM_PRESENT			BIT(0)
+#define E_ECAM_CR_ENABLE		BIT(0)
+#define E_ECAM_SIZE_LOC			GENMASK(20, 16)
+#define E_ECAM_SIZE_SHIFT		16
+#define NWL_ECAM_VALUE_DEFAULT		12
+
+#define CFG_DMA_REG_BAR			GENMASK(2, 0)
+#define CFG_PCIE_CACHE			GENMASK(7, 0)
+
+#define INT_PCI_MSI_NR			(2 * 32)
+
+/* Readin the PS_LINKUP */
+#define PS_LINKUP_OFFSET		0x00000238
+#define PCIE_PHY_LINKUP_BIT		BIT(0)
+#define PHY_RDY_LINKUP_BIT		BIT(1)
+
+/* Parameters for the waiting for link up routine */
+#define LINK_WAIT_MAX_RETRIES          10
+#define LINK_WAIT_USLEEP_MIN           90000
+#define LINK_WAIT_USLEEP_MAX           100000
+
+struct nwl_msi {			/* MSI information */
+	struct irq_domain *msi_domain;
+	DECLARE_BITMAP(bitmap, INT_PCI_MSI_NR);
+	struct irq_domain *dev_domain;
+	struct mutex lock;		/* protect bitmap variable */
+	int irq_msi0;
+	int irq_msi1;
+};
+
+struct nwl_pcie {
+	struct device *dev;
+	void __iomem *breg_base;
+	void __iomem *pcireg_base;
+	void __iomem *ecam_base;
+	phys_addr_t phys_breg_base;	/* Physical Bridge Register Base */
+	phys_addr_t phys_pcie_reg_base;	/* Physical PCIe Controller Base */
+	phys_addr_t phys_ecam_base;	/* Physical Configuration Base */
+	u32 breg_size;
+	u32 pcie_reg_size;
+	u32 ecam_size;
+	int irq_intx;
+	int irq_misc;
+	u32 ecam_value;
+	u8 last_busno;
+	struct nwl_msi msi;
+	struct irq_domain *legacy_irq_domain;
+	struct clk *clk;
+	raw_spinlock_t leg_mask_lock;
+};
+
+static inline u32 nwl_bridge_readl(struct nwl_pcie *pcie, u32 off)
+{
+	return readl(pcie->breg_base + off);
+}
+
+static inline void nwl_bridge_writel(struct nwl_pcie *pcie, u32 val, u32 off)
+{
+	writel(val, pcie->breg_base + off);
+}
+
+static bool nwl_pcie_link_up(struct nwl_pcie *pcie)
+{
+	if (readl(pcie->pcireg_base + PS_LINKUP_OFFSET) & PCIE_PHY_LINKUP_BIT)
+		return true;
+	return false;
+}
+
+static bool nwl_phy_link_up(struct nwl_pcie *pcie)
+{
+	if (readl(pcie->pcireg_base + PS_LINKUP_OFFSET) & PHY_RDY_LINKUP_BIT)
+		return true;
+	return false;
+}
+
+static int nwl_wait_for_link(struct nwl_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	int retries;
+
+	/* check if the link is up or not */
+	for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+		if (nwl_phy_link_up(pcie))
+			return 0;
+		usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
+	}
+
+	dev_err(dev, "PHY link never came up\n");
+	return -ETIMEDOUT;
+}
+
+static bool nwl_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct nwl_pcie *pcie = bus->sysdata;
+
+	/* Check link before accessing downstream ports */
+	if (!pci_is_root_bus(bus)) {
+		if (!nwl_pcie_link_up(pcie))
+			return false;
+	} else if (devfn > 0)
+		/* Only one device down on each root port */
+		return false;
+
+	return true;
+}
+
+/**
+ * nwl_pcie_map_bus - Get configuration base
+ *
+ * @bus: Bus structure of current bus
+ * @devfn: Device/function
+ * @where: Offset from base
+ *
+ * Return: Base address of the configuration space needed to be
+ *	   accessed.
+ */
+static void __iomem *nwl_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+				      int where)
+{
+	struct nwl_pcie *pcie = bus->sysdata;
+
+	if (!nwl_pcie_valid_device(bus, devfn))
+		return NULL;
+
+	return pcie->ecam_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
+}
+
+/* PCIe operations */
+static struct pci_ops nwl_pcie_ops = {
+	.map_bus = nwl_pcie_map_bus,
+	.read  = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static irqreturn_t nwl_pcie_misc_handler(int irq, void *data)
+{
+	struct nwl_pcie *pcie = data;
+	struct device *dev = pcie->dev;
+	u32 misc_stat;
+
+	/* Checking for misc interrupts */
+	misc_stat = nwl_bridge_readl(pcie, MSGF_MISC_STATUS) &
+				     MSGF_MISC_SR_MASKALL;
+	if (!misc_stat)
+		return IRQ_NONE;
+
+	if (misc_stat & MSGF_MISC_SR_RXMSG_OVER)
+		dev_err(dev, "Received Message FIFO Overflow\n");
+
+	if (misc_stat & MSGF_MISC_SR_SLAVE_ERR)
+		dev_err(dev, "Slave error\n");
+
+	if (misc_stat & MSGF_MISC_SR_MASTER_ERR)
+		dev_err(dev, "Master error\n");
+
+	if (misc_stat & MSGF_MISC_SR_I_ADDR_ERR)
+		dev_err(dev, "In Misc Ingress address translation error\n");
+
+	if (misc_stat & MSGF_MISC_SR_E_ADDR_ERR)
+		dev_err(dev, "In Misc Egress address translation error\n");
+
+	if (misc_stat & MSGF_MISC_SR_FATAL_AER)
+		dev_err(dev, "Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER)
+		dev_err(dev, "Non-Fatal Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_CORR_AER)
+		dev_err(dev, "Correctable Error in AER Capability\n");
+
+	if (misc_stat & MSGF_MISC_SR_UR_DETECT)
+		dev_err(dev, "Unsupported request Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV)
+		dev_err(dev, "Non-Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MISC_SR_FATAL_DEV)
+		dev_err(dev, "Fatal Error Detected\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH)
+		dev_info(dev, "Link Autonomous Bandwidth Management Status bit set\n");
+
+	if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH)
+		dev_info(dev, "Link Bandwidth Management Status bit set\n");
+
+	/* Clear misc interrupt status */
+	nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static void nwl_pcie_leg_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct nwl_pcie *pcie;
+	unsigned long status;
+	u32 bit;
+
+	chained_irq_enter(chip, desc);
+	pcie = irq_desc_get_handler_data(desc);
+
+	while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
+				MSGF_LEG_SR_MASKALL) != 0) {
+		for_each_set_bit(bit, &status, PCI_NUM_INTX)
+			generic_handle_domain_irq(pcie->legacy_irq_domain, bit);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void nwl_pcie_handle_msi_irq(struct nwl_pcie *pcie, u32 status_reg)
+{
+	struct nwl_msi *msi = &pcie->msi;
+	unsigned long status;
+	u32 bit;
+
+	while ((status = nwl_bridge_readl(pcie, status_reg)) != 0) {
+		for_each_set_bit(bit, &status, 32) {
+			nwl_bridge_writel(pcie, 1 << bit, status_reg);
+			generic_handle_domain_irq(msi->dev_domain, bit);
+		}
+	}
+}
+
+static void nwl_pcie_msi_handler_high(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct nwl_pcie *pcie = irq_desc_get_handler_data(desc);
+
+	chained_irq_enter(chip, desc);
+	nwl_pcie_handle_msi_irq(pcie, MSGF_MSI_STATUS_HI);
+	chained_irq_exit(chip, desc);
+}
+
+static void nwl_pcie_msi_handler_low(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct nwl_pcie *pcie = irq_desc_get_handler_data(desc);
+
+	chained_irq_enter(chip, desc);
+	nwl_pcie_handle_msi_irq(pcie, MSGF_MSI_STATUS_LO);
+	chained_irq_exit(chip, desc);
+}
+
+static void nwl_mask_leg_irq(struct irq_data *data)
+{
+	struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 mask;
+	u32 val;
+
+	mask = 1 << (data->hwirq - 1);
+	raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
+	val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
+	nwl_bridge_writel(pcie, (val & (~mask)), MSGF_LEG_MASK);
+	raw_spin_unlock_irqrestore(&pcie->leg_mask_lock, flags);
+}
+
+static void nwl_unmask_leg_irq(struct irq_data *data)
+{
+	struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
+	unsigned long flags;
+	u32 mask;
+	u32 val;
+
+	mask = 1 << (data->hwirq - 1);
+	raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
+	val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
+	nwl_bridge_writel(pcie, (val | mask), MSGF_LEG_MASK);
+	raw_spin_unlock_irqrestore(&pcie->leg_mask_lock, flags);
+}
+
+static struct irq_chip nwl_leg_irq_chip = {
+	.name = "nwl_pcie:legacy",
+	.irq_enable = nwl_unmask_leg_irq,
+	.irq_disable = nwl_mask_leg_irq,
+	.irq_mask = nwl_mask_leg_irq,
+	.irq_unmask = nwl_unmask_leg_irq,
+};
+
+static int nwl_legacy_map(struct irq_domain *domain, unsigned int irq,
+			  irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &nwl_leg_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, domain->host_data);
+	irq_set_status_flags(irq, IRQ_LEVEL);
+
+	return 0;
+}
+
+static const struct irq_domain_ops legacy_domain_ops = {
+	.map = nwl_legacy_map,
+	.xlate = pci_irqd_intx_xlate,
+};
+
+#ifdef CONFIG_PCI_MSI
+static struct irq_chip nwl_msi_irq_chip = {
+	.name = "nwl_pcie:msi",
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info nwl_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI),
+	.chip = &nwl_msi_irq_chip,
+};
+#endif
+
+static void nwl_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
+	phys_addr_t msi_addr = pcie->phys_pcie_reg_base;
+
+	msg->address_lo = lower_32_bits(msi_addr);
+	msg->address_hi = upper_32_bits(msi_addr);
+	msg->data = data->hwirq;
+}
+
+static int nwl_msi_set_affinity(struct irq_data *irq_data,
+				const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static struct irq_chip nwl_irq_chip = {
+	.name = "Xilinx MSI",
+	.irq_compose_msi_msg = nwl_compose_msi_msg,
+	.irq_set_affinity = nwl_msi_set_affinity,
+};
+
+static int nwl_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *args)
+{
+	struct nwl_pcie *pcie = domain->host_data;
+	struct nwl_msi *msi = &pcie->msi;
+	int bit;
+	int i;
+
+	mutex_lock(&msi->lock);
+	bit = bitmap_find_free_region(msi->bitmap, INT_PCI_MSI_NR,
+				      get_count_order(nr_irqs));
+	if (bit < 0) {
+		mutex_unlock(&msi->lock);
+		return -ENOSPC;
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip,
+				    domain->host_data, handle_simple_irq,
+				    NULL, NULL);
+	}
+	mutex_unlock(&msi->lock);
+	return 0;
+}
+
+static void nwl_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *data = irq_domain_get_irq_data(domain, virq);
+	struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
+	struct nwl_msi *msi = &pcie->msi;
+
+	mutex_lock(&msi->lock);
+	bitmap_release_region(msi->bitmap, data->hwirq,
+			      get_count_order(nr_irqs));
+	mutex_unlock(&msi->lock);
+}
+
+static const struct irq_domain_ops dev_msi_domain_ops = {
+	.alloc  = nwl_irq_domain_alloc,
+	.free   = nwl_irq_domain_free,
+};
+
+static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie)
+{
+#ifdef CONFIG_PCI_MSI
+	struct device *dev = pcie->dev;
+	struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+	struct nwl_msi *msi = &pcie->msi;
+
+	msi->dev_domain = irq_domain_add_linear(NULL, INT_PCI_MSI_NR,
+						&dev_msi_domain_ops, pcie);
+	if (!msi->dev_domain) {
+		dev_err(dev, "failed to create dev IRQ domain\n");
+		return -ENOMEM;
+	}
+	msi->msi_domain = pci_msi_create_irq_domain(fwnode,
+						    &nwl_msi_domain_info,
+						    msi->dev_domain);
+	if (!msi->msi_domain) {
+		dev_err(dev, "failed to create msi IRQ domain\n");
+		irq_domain_remove(msi->dev_domain);
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *legacy_intc_node;
+
+	legacy_intc_node = of_get_next_child(node, NULL);
+	if (!legacy_intc_node) {
+		dev_err(dev, "No legacy intc node found\n");
+		return -EINVAL;
+	}
+
+	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
+							PCI_NUM_INTX,
+							&legacy_domain_ops,
+							pcie);
+	of_node_put(legacy_intc_node);
+	if (!pcie->legacy_irq_domain) {
+		dev_err(dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	raw_spin_lock_init(&pcie->leg_mask_lock);
+	nwl_pcie_init_msi_irq_domain(pcie);
+	return 0;
+}
+
+static int nwl_pcie_enable_msi(struct nwl_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct nwl_msi *msi = &pcie->msi;
+	unsigned long base;
+	int ret;
+
+	mutex_init(&msi->lock);
+
+	/* Get msi_1 IRQ number */
+	msi->irq_msi1 = platform_get_irq_byname(pdev, "msi1");
+	if (msi->irq_msi1 < 0)
+		return -EINVAL;
+
+	irq_set_chained_handler_and_data(msi->irq_msi1,
+					 nwl_pcie_msi_handler_high, pcie);
+
+	/* Get msi_0 IRQ number */
+	msi->irq_msi0 = platform_get_irq_byname(pdev, "msi0");
+	if (msi->irq_msi0 < 0)
+		return -EINVAL;
+
+	irq_set_chained_handler_and_data(msi->irq_msi0,
+					 nwl_pcie_msi_handler_low, pcie);
+
+	/* Check for msii_present bit */
+	ret = nwl_bridge_readl(pcie, I_MSII_CAPABILITIES) & MSII_PRESENT;
+	if (!ret) {
+		dev_err(dev, "MSI not present\n");
+		return -EIO;
+	}
+
+	/* Enable MSII */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, I_MSII_CONTROL) |
+			  MSII_ENABLE, I_MSII_CONTROL);
+
+	/* Enable MSII status */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, I_MSII_CONTROL) |
+			  MSII_STATUS_ENABLE, I_MSII_CONTROL);
+
+	/* setup AFI/FPCI range */
+	base = pcie->phys_pcie_reg_base;
+	nwl_bridge_writel(pcie, lower_32_bits(base), I_MSII_BASE_LO);
+	nwl_bridge_writel(pcie, upper_32_bits(base), I_MSII_BASE_HI);
+
+	/*
+	 * For high range MSI interrupts: disable, clear any pending,
+	 * and enable
+	 */
+	nwl_bridge_writel(pcie, 0, MSGF_MSI_MASK_HI);
+
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie,  MSGF_MSI_STATUS_HI) &
+			  MSGF_MSI_SR_HI_MASK, MSGF_MSI_STATUS_HI);
+
+	nwl_bridge_writel(pcie, MSGF_MSI_SR_HI_MASK, MSGF_MSI_MASK_HI);
+
+	/*
+	 * For low range MSI interrupts: disable, clear any pending,
+	 * and enable
+	 */
+	nwl_bridge_writel(pcie, 0, MSGF_MSI_MASK_LO);
+
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, MSGF_MSI_STATUS_LO) &
+			  MSGF_MSI_SR_LO_MASK, MSGF_MSI_STATUS_LO);
+
+	nwl_bridge_writel(pcie, MSGF_MSI_SR_LO_MASK, MSGF_MSI_MASK_LO);
+
+	return 0;
+}
+
+static int nwl_pcie_bridge_init(struct nwl_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	u32 breg_val, ecam_val, first_busno = 0;
+	int err;
+
+	breg_val = nwl_bridge_readl(pcie, E_BREG_CAPABILITIES) & BREG_PRESENT;
+	if (!breg_val) {
+		dev_err(dev, "BREG is not present\n");
+		return breg_val;
+	}
+
+	/* Write bridge_off to breg base */
+	nwl_bridge_writel(pcie, lower_32_bits(pcie->phys_breg_base),
+			  E_BREG_BASE_LO);
+	nwl_bridge_writel(pcie, upper_32_bits(pcie->phys_breg_base),
+			  E_BREG_BASE_HI);
+
+	/* Enable BREG */
+	nwl_bridge_writel(pcie, ~BREG_ENABLE_FORCE & BREG_ENABLE,
+			  E_BREG_CONTROL);
+
+	/* Disable DMA channel registers */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, BRCFG_PCIE_RX0) |
+			  CFG_DMA_REG_BAR, BRCFG_PCIE_RX0);
+
+	/* Enable Ingress subtractive decode translation */
+	nwl_bridge_writel(pcie, SET_ISUB_CONTROL, I_ISUB_CONTROL);
+
+	/* Enable msg filtering details */
+	nwl_bridge_writel(pcie, CFG_ENABLE_MSG_FILTER_MASK,
+			  BRCFG_PCIE_RX_MSG_FILTER);
+
+	/* This routes the PCIe DMA traffic to go through CCI path */
+	if (of_dma_is_coherent(dev->of_node))
+		nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, BRCFG_PCIE_RX1) |
+				  CFG_PCIE_CACHE, BRCFG_PCIE_RX1);
+
+	err = nwl_wait_for_link(pcie);
+	if (err)
+		return err;
+
+	ecam_val = nwl_bridge_readl(pcie, E_ECAM_CAPABILITIES) & E_ECAM_PRESENT;
+	if (!ecam_val) {
+		dev_err(dev, "ECAM is not present\n");
+		return ecam_val;
+	}
+
+	/* Enable ECAM */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, E_ECAM_CONTROL) |
+			  E_ECAM_CR_ENABLE, E_ECAM_CONTROL);
+
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, E_ECAM_CONTROL) |
+			  (pcie->ecam_value << E_ECAM_SIZE_SHIFT),
+			  E_ECAM_CONTROL);
+
+	nwl_bridge_writel(pcie, lower_32_bits(pcie->phys_ecam_base),
+			  E_ECAM_BASE_LO);
+	nwl_bridge_writel(pcie, upper_32_bits(pcie->phys_ecam_base),
+			  E_ECAM_BASE_HI);
+
+	/* Get bus range */
+	ecam_val = nwl_bridge_readl(pcie, E_ECAM_CONTROL);
+	pcie->last_busno = (ecam_val & E_ECAM_SIZE_LOC) >> E_ECAM_SIZE_SHIFT;
+	/* Write primary, secondary and subordinate bus numbers */
+	ecam_val = first_busno;
+	ecam_val |= (first_busno + 1) << 8;
+	ecam_val |= (pcie->last_busno << E_ECAM_SIZE_SHIFT);
+	writel(ecam_val, (pcie->ecam_base + PCI_PRIMARY_BUS));
+
+	if (nwl_pcie_link_up(pcie))
+		dev_info(dev, "Link is UP\n");
+	else
+		dev_info(dev, "Link is DOWN\n");
+
+	/* Get misc IRQ number */
+	pcie->irq_misc = platform_get_irq_byname(pdev, "misc");
+	if (pcie->irq_misc < 0)
+		return -EINVAL;
+
+	err = devm_request_irq(dev, pcie->irq_misc,
+			       nwl_pcie_misc_handler, IRQF_SHARED,
+			       "nwl_pcie:misc", pcie);
+	if (err) {
+		dev_err(dev, "fail to register misc IRQ#%d\n",
+			pcie->irq_misc);
+		return err;
+	}
+
+	/* Disable all misc interrupts */
+	nwl_bridge_writel(pcie, (u32)~MSGF_MISC_SR_MASKALL, MSGF_MISC_MASK);
+
+	/* Clear pending misc interrupts */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, MSGF_MISC_STATUS) &
+			  MSGF_MISC_SR_MASKALL, MSGF_MISC_STATUS);
+
+	/* Enable all misc interrupts */
+	nwl_bridge_writel(pcie, MSGF_MISC_SR_MASKALL, MSGF_MISC_MASK);
+
+	/* Disable all legacy interrupts */
+	nwl_bridge_writel(pcie, (u32)~MSGF_LEG_SR_MASKALL, MSGF_LEG_MASK);
+
+	/* Clear pending legacy interrupts */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
+			  MSGF_LEG_SR_MASKALL, MSGF_LEG_STATUS);
+
+	/* Enable all legacy interrupts */
+	nwl_bridge_writel(pcie, MSGF_LEG_SR_MASKALL, MSGF_LEG_MASK);
+
+	/* Enable the bridge config interrupt */
+	nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, BRCFG_INTERRUPT) |
+			  BRCFG_INTERRUPT_MASK, BRCFG_INTERRUPT);
+
+	return 0;
+}
+
+static int nwl_pcie_parse_dt(struct nwl_pcie *pcie,
+			     struct platform_device *pdev)
+{
+	struct device *dev = pcie->dev;
+	struct resource *res;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "breg");
+	pcie->breg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pcie->breg_base))
+		return PTR_ERR(pcie->breg_base);
+	pcie->phys_breg_base = res->start;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcireg");
+	pcie->pcireg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pcie->pcireg_base))
+		return PTR_ERR(pcie->pcireg_base);
+	pcie->phys_pcie_reg_base = res->start;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
+	pcie->ecam_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pcie->ecam_base))
+		return PTR_ERR(pcie->ecam_base);
+	pcie->phys_ecam_base = res->start;
+
+	/* Get intx IRQ number */
+	pcie->irq_intx = platform_get_irq_byname(pdev, "intx");
+	if (pcie->irq_intx < 0)
+		return pcie->irq_intx;
+
+	irq_set_chained_handler_and_data(pcie->irq_intx,
+					 nwl_pcie_leg_handler, pcie);
+
+	return 0;
+}
+
+static const struct of_device_id nwl_pcie_of_match[] = {
+	{ .compatible = "xlnx,nwl-pcie-2.11", },
+	{}
+};
+
+static int nwl_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct nwl_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	int err;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENODEV;
+
+	pcie = pci_host_bridge_priv(bridge);
+
+	pcie->dev = dev;
+	pcie->ecam_value = NWL_ECAM_VALUE_DEFAULT;
+
+	err = nwl_pcie_parse_dt(pcie, pdev);
+	if (err) {
+		dev_err(dev, "Parsing DT failed\n");
+		return err;
+	}
+
+	pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	err = clk_prepare_enable(pcie->clk);
+	if (err) {
+		dev_err(dev, "can't enable PCIe ref clock\n");
+		return err;
+	}
+
+	err = nwl_pcie_bridge_init(pcie);
+	if (err) {
+		dev_err(dev, "HW Initialization failed\n");
+		return err;
+	}
+
+	err = nwl_pcie_init_irq_domain(pcie);
+	if (err) {
+		dev_err(dev, "Failed creating IRQ Domain\n");
+		return err;
+	}
+
+	bridge->sysdata = pcie;
+	bridge->ops = &nwl_pcie_ops;
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		err = nwl_pcie_enable_msi(pcie);
+		if (err < 0) {
+			dev_err(dev, "failed to enable MSI support: %d\n", err);
+			return err;
+		}
+	}
+
+	return pci_host_probe(bridge);
+}
+
+static struct platform_driver nwl_pcie_driver = {
+	.driver = {
+		.name = "nwl-pcie",
+		.suppress_bind_attrs = true,
+		.of_match_table = nwl_pcie_of_match,
+	},
+	.probe = nwl_pcie_probe,
+};
+builtin_platform_driver(nwl_pcie_driver);
diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
new file mode 100644
index 000000000..cb6e9f7b0
--- /dev/null
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe host controller driver for Xilinx AXI PCIe Bridge
+ *
+ * Copyright (c) 2012 - 2014 Xilinx, Inc.
+ *
+ * Based on the Tegra PCIe driver
+ *
+ * Bits taken from Synopsys DesignWare Host controller driver and
+ * ARM PCI Host generic driver.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+#include "../pci.h"
+
+/* Register definitions */
+#define XILINX_PCIE_REG_BIR		0x00000130
+#define XILINX_PCIE_REG_IDR		0x00000138
+#define XILINX_PCIE_REG_IMR		0x0000013c
+#define XILINX_PCIE_REG_PSCR		0x00000144
+#define XILINX_PCIE_REG_RPSC		0x00000148
+#define XILINX_PCIE_REG_MSIBASE1	0x0000014c
+#define XILINX_PCIE_REG_MSIBASE2	0x00000150
+#define XILINX_PCIE_REG_RPEFR		0x00000154
+#define XILINX_PCIE_REG_RPIFR1		0x00000158
+#define XILINX_PCIE_REG_RPIFR2		0x0000015c
+
+/* Interrupt registers definitions */
+#define XILINX_PCIE_INTR_LINK_DOWN	BIT(0)
+#define XILINX_PCIE_INTR_ECRC_ERR	BIT(1)
+#define XILINX_PCIE_INTR_STR_ERR	BIT(2)
+#define XILINX_PCIE_INTR_HOT_RESET	BIT(3)
+#define XILINX_PCIE_INTR_CFG_TIMEOUT	BIT(8)
+#define XILINX_PCIE_INTR_CORRECTABLE	BIT(9)
+#define XILINX_PCIE_INTR_NONFATAL	BIT(10)
+#define XILINX_PCIE_INTR_FATAL		BIT(11)
+#define XILINX_PCIE_INTR_INTX		BIT(16)
+#define XILINX_PCIE_INTR_MSI		BIT(17)
+#define XILINX_PCIE_INTR_SLV_UNSUPP	BIT(20)
+#define XILINX_PCIE_INTR_SLV_UNEXP	BIT(21)
+#define XILINX_PCIE_INTR_SLV_COMPL	BIT(22)
+#define XILINX_PCIE_INTR_SLV_ERRP	BIT(23)
+#define XILINX_PCIE_INTR_SLV_CMPABT	BIT(24)
+#define XILINX_PCIE_INTR_SLV_ILLBUR	BIT(25)
+#define XILINX_PCIE_INTR_MST_DECERR	BIT(26)
+#define XILINX_PCIE_INTR_MST_SLVERR	BIT(27)
+#define XILINX_PCIE_INTR_MST_ERRP	BIT(28)
+#define XILINX_PCIE_IMR_ALL_MASK	0x1FF30FED
+#define XILINX_PCIE_IMR_ENABLE_MASK	0x1FF30F0D
+#define XILINX_PCIE_IDR_ALL_MASK	0xFFFFFFFF
+
+/* Root Port Error FIFO Read Register definitions */
+#define XILINX_PCIE_RPEFR_ERR_VALID	BIT(18)
+#define XILINX_PCIE_RPEFR_REQ_ID	GENMASK(15, 0)
+#define XILINX_PCIE_RPEFR_ALL_MASK	0xFFFFFFFF
+
+/* Root Port Interrupt FIFO Read Register 1 definitions */
+#define XILINX_PCIE_RPIFR1_INTR_VALID	BIT(31)
+#define XILINX_PCIE_RPIFR1_MSI_INTR	BIT(30)
+#define XILINX_PCIE_RPIFR1_INTR_MASK	GENMASK(28, 27)
+#define XILINX_PCIE_RPIFR1_ALL_MASK	0xFFFFFFFF
+#define XILINX_PCIE_RPIFR1_INTR_SHIFT	27
+
+/* Bridge Info Register definitions */
+#define XILINX_PCIE_BIR_ECAM_SZ_MASK	GENMASK(18, 16)
+#define XILINX_PCIE_BIR_ECAM_SZ_SHIFT	16
+
+/* Root Port Interrupt FIFO Read Register 2 definitions */
+#define XILINX_PCIE_RPIFR2_MSG_DATA	GENMASK(15, 0)
+
+/* Root Port Status/control Register definitions */
+#define XILINX_PCIE_REG_RPSC_BEN	BIT(0)
+
+/* Phy Status/Control Register definitions */
+#define XILINX_PCIE_REG_PSCR_LNKUP	BIT(11)
+
+/* Number of MSI IRQs */
+#define XILINX_NUM_MSI_IRQS		128
+
+/**
+ * struct xilinx_pcie - PCIe port information
+ * @dev: Device pointer
+ * @reg_base: IO Mapped Register Base
+ * @msi_map: Bitmap of allocated MSIs
+ * @map_lock: Mutex protecting the MSI allocation
+ * @msi_domain: MSI IRQ domain pointer
+ * @leg_domain: Legacy IRQ domain pointer
+ * @resources: Bus Resources
+ */
+struct xilinx_pcie {
+	struct device *dev;
+	void __iomem *reg_base;
+	unsigned long msi_map[BITS_TO_LONGS(XILINX_NUM_MSI_IRQS)];
+	struct mutex map_lock;
+	struct irq_domain *msi_domain;
+	struct irq_domain *leg_domain;
+	struct list_head resources;
+};
+
+static inline u32 pcie_read(struct xilinx_pcie *pcie, u32 reg)
+{
+	return readl(pcie->reg_base + reg);
+}
+
+static inline void pcie_write(struct xilinx_pcie *pcie, u32 val, u32 reg)
+{
+	writel(val, pcie->reg_base + reg);
+}
+
+static inline bool xilinx_pcie_link_up(struct xilinx_pcie *pcie)
+{
+	return (pcie_read(pcie, XILINX_PCIE_REG_PSCR) &
+		XILINX_PCIE_REG_PSCR_LNKUP) ? 1 : 0;
+}
+
+/**
+ * xilinx_pcie_clear_err_interrupts - Clear Error Interrupts
+ * @pcie: PCIe port information
+ */
+static void xilinx_pcie_clear_err_interrupts(struct xilinx_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	unsigned long val = pcie_read(pcie, XILINX_PCIE_REG_RPEFR);
+
+	if (val & XILINX_PCIE_RPEFR_ERR_VALID) {
+		dev_dbg(dev, "Requester ID %lu\n",
+			val & XILINX_PCIE_RPEFR_REQ_ID);
+		pcie_write(pcie, XILINX_PCIE_RPEFR_ALL_MASK,
+			   XILINX_PCIE_REG_RPEFR);
+	}
+}
+
+/**
+ * xilinx_pcie_valid_device - Check if a valid device is present on bus
+ * @bus: PCI Bus structure
+ * @devfn: device/function
+ *
+ * Return: 'true' on success and 'false' if invalid device is found
+ */
+static bool xilinx_pcie_valid_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct xilinx_pcie *pcie = bus->sysdata;
+
+	/* Check if link is up when trying to access downstream pcie ports */
+	if (!pci_is_root_bus(bus)) {
+		if (!xilinx_pcie_link_up(pcie))
+			return false;
+	} else if (devfn > 0) {
+		/* Only one device down on each root port */
+		return false;
+	}
+	return true;
+}
+
+/**
+ * xilinx_pcie_map_bus - Get configuration base
+ * @bus: PCI Bus structure
+ * @devfn: Device/function
+ * @where: Offset from base
+ *
+ * Return: Base address of the configuration space needed to be
+ *	   accessed.
+ */
+static void __iomem *xilinx_pcie_map_bus(struct pci_bus *bus,
+					 unsigned int devfn, int where)
+{
+	struct xilinx_pcie *pcie = bus->sysdata;
+
+	if (!xilinx_pcie_valid_device(bus, devfn))
+		return NULL;
+
+	return pcie->reg_base + PCIE_ECAM_OFFSET(bus->number, devfn, where);
+}
+
+/* PCIe operations */
+static struct pci_ops xilinx_pcie_ops = {
+	.map_bus = xilinx_pcie_map_bus,
+	.read	= pci_generic_config_read,
+	.write	= pci_generic_config_write,
+};
+
+/* MSI functions */
+
+static void xilinx_msi_top_irq_ack(struct irq_data *d)
+{
+	/*
+	 * xilinx_pcie_intr_handler() will have performed the Ack.
+	 * Eventually, this should be fixed and the Ack be moved in
+	 * the respective callbacks for INTx and MSI.
+	 */
+}
+
+static struct irq_chip xilinx_msi_top_chip = {
+	.name		= "PCIe MSI",
+	.irq_ack	= xilinx_msi_top_irq_ack,
+};
+
+static int xilinx_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+	return -EINVAL;
+}
+
+static void xilinx_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct xilinx_pcie *pcie = irq_data_get_irq_chip_data(data);
+	phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);
+
+	msg->address_lo = lower_32_bits(pa);
+	msg->address_hi = upper_32_bits(pa);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip xilinx_msi_bottom_chip = {
+	.name			= "Xilinx MSI",
+	.irq_set_affinity 	= xilinx_msi_set_affinity,
+	.irq_compose_msi_msg	= xilinx_compose_msi_msg,
+};
+
+static int xilinx_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct xilinx_pcie *pcie = domain->host_data;
+	int hwirq, i;
+
+	mutex_lock(&pcie->map_lock);
+
+	hwirq = bitmap_find_free_region(pcie->msi_map, XILINX_NUM_MSI_IRQS, order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->map_lock);
+
+	if (hwirq < 0)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &xilinx_msi_bottom_chip, domain->host_data,
+				    handle_edge_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xilinx_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct xilinx_pcie *pcie = domain->host_data;
+
+	mutex_lock(&pcie->map_lock);
+
+	bitmap_release_region(pcie->msi_map, d->hwirq, order_base_2(nr_irqs));
+
+	mutex_unlock(&pcie->map_lock);
+}
+
+static const struct irq_domain_ops xilinx_msi_domain_ops = {
+	.alloc	= xilinx_msi_domain_alloc,
+	.free	= xilinx_msi_domain_free,
+};
+
+static struct msi_domain_info xilinx_msi_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+	.chip	= &xilinx_msi_top_chip,
+};
+
+static int xilinx_allocate_msi_domains(struct xilinx_pcie *pcie)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+	struct irq_domain *parent;
+
+	parent = irq_domain_create_linear(fwnode, XILINX_NUM_MSI_IRQS,
+					  &xilinx_msi_domain_ops, pcie);
+	if (!parent) {
+		dev_err(pcie->dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+	irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+	pcie->msi_domain = pci_msi_create_irq_domain(fwnode, &xilinx_msi_info, parent);
+	if (!pcie->msi_domain) {
+		dev_err(pcie->dev, "failed to create MSI domain\n");
+		irq_domain_remove(parent);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void xilinx_free_msi_domains(struct xilinx_pcie *pcie)
+{
+	struct irq_domain *parent = pcie->msi_domain->parent;
+
+	irq_domain_remove(pcie->msi_domain);
+	irq_domain_remove(parent);
+}
+
+/* INTx Functions */
+
+/**
+ * xilinx_pcie_intx_map - Set the handler for the INTx and mark IRQ as valid
+ * @domain: IRQ domain
+ * @irq: Virtual IRQ number
+ * @hwirq: HW interrupt number
+ *
+ * Return: Always returns 0.
+ */
+static int xilinx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+/* INTx IRQ Domain operations */
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = xilinx_pcie_intx_map,
+	.xlate = pci_irqd_intx_xlate,
+};
+
+/* PCIe HW Functions */
+
+/**
+ * xilinx_pcie_intr_handler - Interrupt Service Handler
+ * @irq: IRQ number
+ * @data: PCIe port information
+ *
+ * Return: IRQ_HANDLED on success and IRQ_NONE on failure
+ */
+static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
+{
+	struct xilinx_pcie *pcie = (struct xilinx_pcie *)data;
+	struct device *dev = pcie->dev;
+	u32 val, mask, status;
+
+	/* Read interrupt decode and mask registers */
+	val = pcie_read(pcie, XILINX_PCIE_REG_IDR);
+	mask = pcie_read(pcie, XILINX_PCIE_REG_IMR);
+
+	status = val & mask;
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & XILINX_PCIE_INTR_LINK_DOWN)
+		dev_warn(dev, "Link Down\n");
+
+	if (status & XILINX_PCIE_INTR_ECRC_ERR)
+		dev_warn(dev, "ECRC failed\n");
+
+	if (status & XILINX_PCIE_INTR_STR_ERR)
+		dev_warn(dev, "Streaming error\n");
+
+	if (status & XILINX_PCIE_INTR_HOT_RESET)
+		dev_info(dev, "Hot reset\n");
+
+	if (status & XILINX_PCIE_INTR_CFG_TIMEOUT)
+		dev_warn(dev, "ECAM access timeout\n");
+
+	if (status & XILINX_PCIE_INTR_CORRECTABLE) {
+		dev_warn(dev, "Correctable error message\n");
+		xilinx_pcie_clear_err_interrupts(pcie);
+	}
+
+	if (status & XILINX_PCIE_INTR_NONFATAL) {
+		dev_warn(dev, "Non fatal error message\n");
+		xilinx_pcie_clear_err_interrupts(pcie);
+	}
+
+	if (status & XILINX_PCIE_INTR_FATAL) {
+		dev_warn(dev, "Fatal error message\n");
+		xilinx_pcie_clear_err_interrupts(pcie);
+	}
+
+	if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) {
+		struct irq_domain *domain;
+
+		val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR1);
+
+		/* Check whether interrupt valid */
+		if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) {
+			dev_warn(dev, "RP Intr FIFO1 read error\n");
+			goto error;
+		}
+
+		/* Decode the IRQ number */
+		if (val & XILINX_PCIE_RPIFR1_MSI_INTR) {
+			val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR2) &
+				XILINX_PCIE_RPIFR2_MSG_DATA;
+			domain = pcie->msi_domain->parent;
+		} else {
+			val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
+				XILINX_PCIE_RPIFR1_INTR_SHIFT;
+			domain = pcie->leg_domain;
+		}
+
+		/* Clear interrupt FIFO register 1 */
+		pcie_write(pcie, XILINX_PCIE_RPIFR1_ALL_MASK,
+			   XILINX_PCIE_REG_RPIFR1);
+
+		generic_handle_domain_irq(domain, val);
+	}
+
+	if (status & XILINX_PCIE_INTR_SLV_UNSUPP)
+		dev_warn(dev, "Slave unsupported request\n");
+
+	if (status & XILINX_PCIE_INTR_SLV_UNEXP)
+		dev_warn(dev, "Slave unexpected completion\n");
+
+	if (status & XILINX_PCIE_INTR_SLV_COMPL)
+		dev_warn(dev, "Slave completion timeout\n");
+
+	if (status & XILINX_PCIE_INTR_SLV_ERRP)
+		dev_warn(dev, "Slave Error Poison\n");
+
+	if (status & XILINX_PCIE_INTR_SLV_CMPABT)
+		dev_warn(dev, "Slave Completer Abort\n");
+
+	if (status & XILINX_PCIE_INTR_SLV_ILLBUR)
+		dev_warn(dev, "Slave Illegal Burst\n");
+
+	if (status & XILINX_PCIE_INTR_MST_DECERR)
+		dev_warn(dev, "Master decode error\n");
+
+	if (status & XILINX_PCIE_INTR_MST_SLVERR)
+		dev_warn(dev, "Master slave error\n");
+
+	if (status & XILINX_PCIE_INTR_MST_ERRP)
+		dev_warn(dev, "Master error poison\n");
+
+error:
+	/* Clear the Interrupt Decode register */
+	pcie_write(pcie, status, XILINX_PCIE_REG_IDR);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * xilinx_pcie_init_irq_domain - Initialize IRQ domain
+ * @pcie: PCIe port information
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_pcie_init_irq_domain(struct xilinx_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *pcie_intc_node;
+	int ret;
+
+	/* Setup INTx */
+	pcie_intc_node = of_get_next_child(dev->of_node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	pcie->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						 &intx_domain_ops,
+						 pcie);
+	of_node_put(pcie_intc_node);
+	if (!pcie->leg_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return -ENODEV;
+	}
+
+	/* Setup MSI */
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);
+
+		ret = xilinx_allocate_msi_domains(pcie);
+		if (ret)
+			return ret;
+
+		pcie_write(pcie, upper_32_bits(pa), XILINX_PCIE_REG_MSIBASE1);
+		pcie_write(pcie, lower_32_bits(pa), XILINX_PCIE_REG_MSIBASE2);
+	}
+
+	return 0;
+}
+
+/**
+ * xilinx_pcie_init_port - Initialize hardware
+ * @pcie: PCIe port information
+ */
+static void xilinx_pcie_init_port(struct xilinx_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+
+	if (xilinx_pcie_link_up(pcie))
+		dev_info(dev, "PCIe Link is UP\n");
+	else
+		dev_info(dev, "PCIe Link is DOWN\n");
+
+	/* Disable all interrupts */
+	pcie_write(pcie, ~XILINX_PCIE_IDR_ALL_MASK,
+		   XILINX_PCIE_REG_IMR);
+
+	/* Clear pending interrupts */
+	pcie_write(pcie, pcie_read(pcie, XILINX_PCIE_REG_IDR) &
+			 XILINX_PCIE_IMR_ALL_MASK,
+		   XILINX_PCIE_REG_IDR);
+
+	/* Enable all interrupts we handle */
+	pcie_write(pcie, XILINX_PCIE_IMR_ENABLE_MASK, XILINX_PCIE_REG_IMR);
+
+	/* Enable the Bridge enable bit */
+	pcie_write(pcie, pcie_read(pcie, XILINX_PCIE_REG_RPSC) |
+			 XILINX_PCIE_REG_RPSC_BEN,
+		   XILINX_PCIE_REG_RPSC);
+}
+
+/**
+ * xilinx_pcie_parse_dt - Parse Device tree
+ * @pcie: PCIe port information
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_pcie_parse_dt(struct xilinx_pcie *pcie)
+{
+	struct device *dev = pcie->dev;
+	struct device_node *node = dev->of_node;
+	struct resource regs;
+	unsigned int irq;
+	int err;
+
+	err = of_address_to_resource(node, 0, &regs);
+	if (err) {
+		dev_err(dev, "missing \"reg\" property\n");
+		return err;
+	}
+
+	pcie->reg_base = devm_pci_remap_cfg_resource(dev, &regs);
+	if (IS_ERR(pcie->reg_base))
+		return PTR_ERR(pcie->reg_base);
+
+	irq = irq_of_parse_and_map(node, 0);
+	err = devm_request_irq(dev, irq, xilinx_pcie_intr_handler,
+			       IRQF_SHARED | IRQF_NO_THREAD,
+			       "xilinx-pcie", pcie);
+	if (err) {
+		dev_err(dev, "unable to request irq %d\n", irq);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * xilinx_pcie_probe - Probe function
+ * @pdev: Platform device pointer
+ *
+ * Return: '0' on success and error value on failure
+ */
+static int xilinx_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct xilinx_pcie *pcie;
+	struct pci_host_bridge *bridge;
+	int err;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie));
+	if (!bridge)
+		return -ENODEV;
+
+	pcie = pci_host_bridge_priv(bridge);
+	mutex_init(&pcie->map_lock);
+	pcie->dev = dev;
+
+	err = xilinx_pcie_parse_dt(pcie);
+	if (err) {
+		dev_err(dev, "Parsing DT failed\n");
+		return err;
+	}
+
+	xilinx_pcie_init_port(pcie);
+
+	err = xilinx_pcie_init_irq_domain(pcie);
+	if (err) {
+		dev_err(dev, "Failed creating IRQ Domain\n");
+		return err;
+	}
+
+	bridge->sysdata = pcie;
+	bridge->ops = &xilinx_pcie_ops;
+
+	err = pci_host_probe(bridge);
+	if (err)
+		xilinx_free_msi_domains(pcie);
+
+	return err;
+}
+
+static const struct of_device_id xilinx_pcie_of_match[] = {
+	{ .compatible = "xlnx,axi-pcie-host-1.00.a", },
+	{}
+};
+
+static struct platform_driver xilinx_pcie_driver = {
+	.driver = {
+		.name = "xilinx-pcie",
+		.of_match_table = xilinx_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = xilinx_pcie_probe,
+};
+builtin_platform_driver(xilinx_pcie_driver);
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
new file mode 100644
index 000000000..6ac0afae0
--- /dev/null
+++ b/drivers/pci/controller/vmd.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Volume Management Device driver
+ * Copyright (c) 2015, Intel Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+#include <linux/srcu.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+
+#include <asm/irqdomain.h>
+
+#define VMD_CFGBAR	0
+#define VMD_MEMBAR1	2
+#define VMD_MEMBAR2	4
+
+#define PCI_REG_VMCAP		0x40
+#define BUS_RESTRICT_CAP(vmcap)	(vmcap & 0x1)
+#define PCI_REG_VMCONFIG	0x44
+#define BUS_RESTRICT_CFG(vmcfg)	((vmcfg >> 8) & 0x3)
+#define VMCONFIG_MSI_REMAP	0x2
+#define PCI_REG_VMLOCK		0x70
+#define MB2_SHADOW_EN(vmlock)	(vmlock & 0x2)
+
+#define MB2_SHADOW_OFFSET	0x2000
+#define MB2_SHADOW_SIZE		16
+
+enum vmd_features {
+	/*
+	 * Device may contain registers which hint the physical location of the
+	 * membars, in order to allow proper address translation during
+	 * resource assignment to enable guest virtualization
+	 */
+	VMD_FEAT_HAS_MEMBAR_SHADOW		= (1 << 0),
+
+	/*
+	 * Device may provide root port configuration information which limits
+	 * bus numbering
+	 */
+	VMD_FEAT_HAS_BUS_RESTRICTIONS		= (1 << 1),
+
+	/*
+	 * Device contains physical location shadow registers in
+	 * vendor-specific capability space
+	 */
+	VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP	= (1 << 2),
+
+	/*
+	 * Device may use MSI-X vector 0 for software triggering and will not
+	 * be used for MSI remapping
+	 */
+	VMD_FEAT_OFFSET_FIRST_VECTOR		= (1 << 3),
+
+	/*
+	 * Device can bypass remapping MSI-X transactions into its MSI-X table,
+	 * avoiding the requirement of a VMD MSI domain for child device
+	 * interrupt handling.
+	 */
+	VMD_FEAT_CAN_BYPASS_MSI_REMAP		= (1 << 4),
+
+	/*
+	 * Enable ASPM on the PCIE root ports and set the default LTR of the
+	 * storage devices on platforms where these values are not configured by
+	 * BIOS. This is needed for laptops, which require these settings for
+	 * proper power management of the SoC.
+	 */
+	VMD_FEAT_BIOS_PM_QUIRK		= (1 << 5),
+};
+
+#define VMD_BIOS_PM_QUIRK_LTR	0x1003	/* 3145728 ns */
+
+#define VMD_FEATS_CLIENT	(VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |	\
+				 VMD_FEAT_HAS_BUS_RESTRICTIONS |	\
+				 VMD_FEAT_OFFSET_FIRST_VECTOR |		\
+				 VMD_FEAT_BIOS_PM_QUIRK)
+
+static DEFINE_IDA(vmd_instance_ida);
+
+/*
+ * Lock for manipulating VMD IRQ lists.
+ */
+static DEFINE_RAW_SPINLOCK(list_lock);
+
+/**
+ * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
+ * @node:	list item for parent traversal.
+ * @irq:	back pointer to parent.
+ * @enabled:	true if driver enabled IRQ
+ * @virq:	the virtual IRQ value provided to the requesting driver.
+ *
+ * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
+ * a VMD IRQ using this structure.
+ */
+struct vmd_irq {
+	struct list_head	node;
+	struct vmd_irq_list	*irq;
+	bool			enabled;
+	unsigned int		virq;
+};
+
+/**
+ * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
+ * @irq_list:	the list of irq's the VMD one demuxes to.
+ * @srcu:	SRCU struct for local synchronization.
+ * @count:	number of child IRQs assigned to this vector; used to track
+ *		sharing.
+ * @virq:	The underlying VMD Linux interrupt number
+ */
+struct vmd_irq_list {
+	struct list_head	irq_list;
+	struct srcu_struct	srcu;
+	unsigned int		count;
+	unsigned int		virq;
+};
+
+struct vmd_dev {
+	struct pci_dev		*dev;
+
+	spinlock_t		cfg_lock;
+	void __iomem		*cfgbar;
+
+	int msix_count;
+	struct vmd_irq_list	*irqs;
+
+	struct pci_sysdata	sysdata;
+	struct resource		resources[3];
+	struct irq_domain	*irq_domain;
+	struct pci_bus		*bus;
+	u8			busn_start;
+	u8			first_vec;
+	char			*name;
+	int			instance;
+};
+
+static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
+{
+	return container_of(bus->sysdata, struct vmd_dev, sysdata);
+}
+
+static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
+					   struct vmd_irq_list *irqs)
+{
+	return irqs - vmd->irqs;
+}
+
+/*
+ * Drivers managing a device in a VMD domain allocate their own IRQs as before,
+ * but the MSI entry for the hardware it's driving will be programmed with a
+ * destination ID for the VMD MSI-X table.  The VMD muxes interrupts in its
+ * domain into one of its own, and the VMD driver de-muxes these for the
+ * handlers sharing that VMD IRQ.  The vmd irq_domain provides the operations
+ * and irq_chip to set this up.
+ */
+static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct vmd_irq *vmdirq = data->chip_data;
+	struct vmd_irq_list *irq = vmdirq->irq;
+	struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
+
+	memset(msg, 0, sizeof(*msg));
+	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+	msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq);
+}
+
+/*
+ * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
+ */
+static void vmd_irq_enable(struct irq_data *data)
+{
+	struct vmd_irq *vmdirq = data->chip_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&list_lock, flags);
+	WARN_ON(vmdirq->enabled);
+	list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
+	vmdirq->enabled = true;
+	raw_spin_unlock_irqrestore(&list_lock, flags);
+
+	data->chip->irq_unmask(data);
+}
+
+static void vmd_irq_disable(struct irq_data *data)
+{
+	struct vmd_irq *vmdirq = data->chip_data;
+	unsigned long flags;
+
+	data->chip->irq_mask(data);
+
+	raw_spin_lock_irqsave(&list_lock, flags);
+	if (vmdirq->enabled) {
+		list_del_rcu(&vmdirq->node);
+		vmdirq->enabled = false;
+	}
+	raw_spin_unlock_irqrestore(&list_lock, flags);
+}
+
+/*
+ * XXX: Stubbed until we develop acceptable way to not create conflicts with
+ * other devices sharing the same vector.
+ */
+static int vmd_irq_set_affinity(struct irq_data *data,
+				const struct cpumask *dest, bool force)
+{
+	return -EINVAL;
+}
+
+static struct irq_chip vmd_msi_controller = {
+	.name			= "VMD-MSI",
+	.irq_enable		= vmd_irq_enable,
+	.irq_disable		= vmd_irq_disable,
+	.irq_compose_msi_msg	= vmd_compose_msi_msg,
+	.irq_set_affinity	= vmd_irq_set_affinity,
+};
+
+static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
+				     msi_alloc_info_t *arg)
+{
+	return 0;
+}
+
+/*
+ * XXX: We can be even smarter selecting the best IRQ once we solve the
+ * affinity problem.
+ */
+static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
+{
+	unsigned long flags;
+	int i, best;
+
+	if (vmd->msix_count == 1 + vmd->first_vec)
+		return &vmd->irqs[vmd->first_vec];
+
+	/*
+	 * White list for fast-interrupt handlers. All others will share the
+	 * "slow" interrupt vector.
+	 */
+	switch (msi_desc_to_pci_dev(desc)->class) {
+	case PCI_CLASS_STORAGE_EXPRESS:
+		break;
+	default:
+		return &vmd->irqs[vmd->first_vec];
+	}
+
+	raw_spin_lock_irqsave(&list_lock, flags);
+	best = vmd->first_vec + 1;
+	for (i = best; i < vmd->msix_count; i++)
+		if (vmd->irqs[i].count < vmd->irqs[best].count)
+			best = i;
+	vmd->irqs[best].count++;
+	raw_spin_unlock_irqrestore(&list_lock, flags);
+
+	return &vmd->irqs[best];
+}
+
+static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
+			unsigned int virq, irq_hw_number_t hwirq,
+			msi_alloc_info_t *arg)
+{
+	struct msi_desc *desc = arg->desc;
+	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
+	struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
+
+	if (!vmdirq)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&vmdirq->node);
+	vmdirq->irq = vmd_next_irq(vmd, desc);
+	vmdirq->virq = virq;
+
+	irq_domain_set_info(domain, virq, vmdirq->irq->virq, info->chip, vmdirq,
+			    handle_untracked_irq, vmd, NULL);
+	return 0;
+}
+
+static void vmd_msi_free(struct irq_domain *domain,
+			struct msi_domain_info *info, unsigned int virq)
+{
+	struct vmd_irq *vmdirq = irq_get_chip_data(virq);
+	unsigned long flags;
+
+	synchronize_srcu(&vmdirq->irq->srcu);
+
+	/* XXX: Potential optimization to rebalance */
+	raw_spin_lock_irqsave(&list_lock, flags);
+	vmdirq->irq->count--;
+	raw_spin_unlock_irqrestore(&list_lock, flags);
+
+	kfree(vmdirq);
+}
+
+static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
+			   int nvec, msi_alloc_info_t *arg)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
+
+	if (nvec > vmd->msix_count)
+		return vmd->msix_count;
+
+	memset(arg, 0, sizeof(*arg));
+	return 0;
+}
+
+static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+	arg->desc = desc;
+}
+
+static struct msi_domain_ops vmd_msi_domain_ops = {
+	.get_hwirq	= vmd_get_hwirq,
+	.msi_init	= vmd_msi_init,
+	.msi_free	= vmd_msi_free,
+	.msi_prepare	= vmd_msi_prepare,
+	.set_desc	= vmd_set_desc,
+};
+
+static struct msi_domain_info vmd_msi_domain_info = {
+	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+			  MSI_FLAG_PCI_MSIX,
+	.ops		= &vmd_msi_domain_ops,
+	.chip		= &vmd_msi_controller,
+};
+
+static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable)
+{
+	u16 reg;
+
+	pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, &reg);
+	reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) :
+		       (reg | VMCONFIG_MSI_REMAP);
+	pci_write_config_word(vmd->dev, PCI_REG_VMCONFIG, reg);
+}
+
+static int vmd_create_irq_domain(struct vmd_dev *vmd)
+{
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain);
+	if (!fn)
+		return -ENODEV;
+
+	vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, NULL);
+	if (!vmd->irq_domain) {
+		irq_domain_free_fwnode(fn);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void vmd_remove_irq_domain(struct vmd_dev *vmd)
+{
+	/*
+	 * Some production BIOS won't enable remapping between soft reboots.
+	 * Ensure remapping is restored before unloading the driver.
+	 */
+	if (!vmd->msix_count)
+		vmd_set_msi_remapping(vmd, true);
+
+	if (vmd->irq_domain) {
+		struct fwnode_handle *fn = vmd->irq_domain->fwnode;
+
+		irq_domain_remove(vmd->irq_domain);
+		irq_domain_free_fwnode(fn);
+	}
+}
+
+static void __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
+				  unsigned int devfn, int reg, int len)
+{
+	unsigned int busnr_ecam = bus->number - vmd->busn_start;
+	u32 offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
+
+	if (offset + len >= resource_size(&vmd->dev->resource[VMD_CFGBAR]))
+		return NULL;
+
+	return vmd->cfgbar + offset;
+}
+
+/*
+ * CPU may deadlock if config space is not serialized on some versions of this
+ * hardware, so all config space access is done under a spinlock.
+ */
+static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
+			int len, u32 *value)
+{
+	struct vmd_dev *vmd = vmd_from_bus(bus);
+	void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
+	unsigned long flags;
+	int ret = 0;
+
+	if (!addr)
+		return -EFAULT;
+
+	spin_lock_irqsave(&vmd->cfg_lock, flags);
+	switch (len) {
+	case 1:
+		*value = readb(addr);
+		break;
+	case 2:
+		*value = readw(addr);
+		break;
+	case 4:
+		*value = readl(addr);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&vmd->cfg_lock, flags);
+	return ret;
+}
+
+/*
+ * VMD h/w converts non-posted config writes to posted memory writes. The
+ * read-back in this function forces the completion so it returns only after
+ * the config space was written, as expected.
+ */
+static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
+			 int len, u32 value)
+{
+	struct vmd_dev *vmd = vmd_from_bus(bus);
+	void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
+	unsigned long flags;
+	int ret = 0;
+
+	if (!addr)
+		return -EFAULT;
+
+	spin_lock_irqsave(&vmd->cfg_lock, flags);
+	switch (len) {
+	case 1:
+		writeb(value, addr);
+		readb(addr);
+		break;
+	case 2:
+		writew(value, addr);
+		readw(addr);
+		break;
+	case 4:
+		writel(value, addr);
+		readl(addr);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&vmd->cfg_lock, flags);
+	return ret;
+}
+
+static struct pci_ops vmd_ops = {
+	.read		= vmd_pci_read,
+	.write		= vmd_pci_write,
+};
+
+#ifdef CONFIG_ACPI
+static struct acpi_device *vmd_acpi_find_companion(struct pci_dev *pci_dev)
+{
+	struct pci_host_bridge *bridge;
+	u32 busnr, addr;
+
+	if (pci_dev->bus->ops != &vmd_ops)
+		return NULL;
+
+	bridge = pci_find_host_bridge(pci_dev->bus);
+	busnr = pci_dev->bus->number - bridge->bus->number;
+	/*
+	 * The address computation below is only applicable to relative bus
+	 * numbers below 32.
+	 */
+	if (busnr > 31)
+		return NULL;
+
+	addr = (busnr << 24) | ((u32)pci_dev->devfn << 16) | 0x8000FFFFU;
+
+	dev_dbg(&pci_dev->dev, "Looking for ACPI companion (address 0x%x)\n",
+		addr);
+
+	return acpi_find_child_device(ACPI_COMPANION(bridge->dev.parent), addr,
+				      false);
+}
+
+static bool hook_installed;
+
+static void vmd_acpi_begin(void)
+{
+	if (pci_acpi_set_companion_lookup_hook(vmd_acpi_find_companion))
+		return;
+
+	hook_installed = true;
+}
+
+static void vmd_acpi_end(void)
+{
+	if (!hook_installed)
+		return;
+
+	pci_acpi_clear_companion_lookup_hook();
+	hook_installed = false;
+}
+#else
+static inline void vmd_acpi_begin(void) { }
+static inline void vmd_acpi_end(void) { }
+#endif /* CONFIG_ACPI */
+
+static void vmd_domain_reset(struct vmd_dev *vmd)
+{
+	u16 bus, max_buses = resource_size(&vmd->resources[0]);
+	u8 dev, functions, fn, hdr_type;
+	char __iomem *base;
+
+	for (bus = 0; bus < max_buses; bus++) {
+		for (dev = 0; dev < 32; dev++) {
+			base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
+						PCI_DEVFN(dev, 0), 0);
+
+			hdr_type = readb(base + PCI_HEADER_TYPE);
+
+			functions = (hdr_type & 0x80) ? 8 : 1;
+			for (fn = 0; fn < functions; fn++) {
+				base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
+						PCI_DEVFN(dev, fn), 0);
+
+				hdr_type = readb(base + PCI_HEADER_TYPE) &
+						PCI_HEADER_TYPE_MASK;
+
+				if (hdr_type != PCI_HEADER_TYPE_BRIDGE ||
+				    (readw(base + PCI_CLASS_DEVICE) !=
+				     PCI_CLASS_BRIDGE_PCI))
+					continue;
+
+				/*
+				 * Temporarily disable the I/O range before updating
+				 * PCI_IO_BASE.
+				 */
+				writel(0x0000ffff, base + PCI_IO_BASE_UPPER16);
+				/* Update lower 16 bits of I/O base/limit */
+				writew(0x00f0, base + PCI_IO_BASE);
+				/* Update upper 16 bits of I/O base/limit */
+				writel(0, base + PCI_IO_BASE_UPPER16);
+
+				/* MMIO Base/Limit */
+				writel(0x0000fff0, base + PCI_MEMORY_BASE);
+
+				/* Prefetchable MMIO Base/Limit */
+				writel(0, base + PCI_PREF_LIMIT_UPPER32);
+				writel(0x0000fff0, base + PCI_PREF_MEMORY_BASE);
+				writel(0xffffffff, base + PCI_PREF_BASE_UPPER32);
+			}
+		}
+	}
+}
+
+static void vmd_attach_resources(struct vmd_dev *vmd)
+{
+	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
+	vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
+}
+
+static void vmd_detach_resources(struct vmd_dev *vmd)
+{
+	vmd->dev->resource[VMD_MEMBAR1].child = NULL;
+	vmd->dev->resource[VMD_MEMBAR2].child = NULL;
+}
+
+/*
+ * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
+ * Per ACPI r6.0, sec 6.5.6,  _SEG returns an integer, of which the lower
+ * 16 bits are the PCI Segment Group (domain) number.  Other bits are
+ * currently reserved.
+ */
+static int vmd_find_free_domain(void)
+{
+	int domain = 0xffff;
+	struct pci_bus *bus = NULL;
+
+	while ((bus = pci_find_next_bus(bus)) != NULL)
+		domain = max_t(int, domain, pci_domain_nr(bus));
+	return domain + 1;
+}
+
+static int vmd_get_phys_offsets(struct vmd_dev *vmd, bool native_hint,
+				resource_size_t *offset1,
+				resource_size_t *offset2)
+{
+	struct pci_dev *dev = vmd->dev;
+	u64 phys1, phys2;
+
+	if (native_hint) {
+		u32 vmlock;
+		int ret;
+
+		ret = pci_read_config_dword(dev, PCI_REG_VMLOCK, &vmlock);
+		if (ret || PCI_POSSIBLE_ERROR(vmlock))
+			return -ENODEV;
+
+		if (MB2_SHADOW_EN(vmlock)) {
+			void __iomem *membar2;
+
+			membar2 = pci_iomap(dev, VMD_MEMBAR2, 0);
+			if (!membar2)
+				return -ENOMEM;
+			phys1 = readq(membar2 + MB2_SHADOW_OFFSET);
+			phys2 = readq(membar2 + MB2_SHADOW_OFFSET + 8);
+			pci_iounmap(dev, membar2);
+		} else
+			return 0;
+	} else {
+		/* Hypervisor-Emulated Vendor-Specific Capability */
+		int pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+		u32 reg, regu;
+
+		pci_read_config_dword(dev, pos + 4, &reg);
+
+		/* "SHDW" */
+		if (pos && reg == 0x53484457) {
+			pci_read_config_dword(dev, pos + 8, &reg);
+			pci_read_config_dword(dev, pos + 12, &regu);
+			phys1 = (u64) regu << 32 | reg;
+
+			pci_read_config_dword(dev, pos + 16, &reg);
+			pci_read_config_dword(dev, pos + 20, &regu);
+			phys2 = (u64) regu << 32 | reg;
+		} else
+			return 0;
+	}
+
+	*offset1 = dev->resource[VMD_MEMBAR1].start -
+			(phys1 & PCI_BASE_ADDRESS_MEM_MASK);
+	*offset2 = dev->resource[VMD_MEMBAR2].start -
+			(phys2 & PCI_BASE_ADDRESS_MEM_MASK);
+
+	return 0;
+}
+
+static int vmd_get_bus_number_start(struct vmd_dev *vmd)
+{
+	struct pci_dev *dev = vmd->dev;
+	u16 reg;
+
+	pci_read_config_word(dev, PCI_REG_VMCAP, &reg);
+	if (BUS_RESTRICT_CAP(reg)) {
+		pci_read_config_word(dev, PCI_REG_VMCONFIG, &reg);
+
+		switch (BUS_RESTRICT_CFG(reg)) {
+		case 0:
+			vmd->busn_start = 0;
+			break;
+		case 1:
+			vmd->busn_start = 128;
+			break;
+		case 2:
+			vmd->busn_start = 224;
+			break;
+		default:
+			pci_err(dev, "Unknown Bus Offset Setting (%d)\n",
+				BUS_RESTRICT_CFG(reg));
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t vmd_irq(int irq, void *data)
+{
+	struct vmd_irq_list *irqs = data;
+	struct vmd_irq *vmdirq;
+	int idx;
+
+	idx = srcu_read_lock(&irqs->srcu);
+	list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
+		generic_handle_irq(vmdirq->virq);
+	srcu_read_unlock(&irqs->srcu, idx);
+
+	return IRQ_HANDLED;
+}
+
+static int vmd_alloc_irqs(struct vmd_dev *vmd)
+{
+	struct pci_dev *dev = vmd->dev;
+	int i, err;
+
+	vmd->msix_count = pci_msix_vec_count(dev);
+	if (vmd->msix_count < 0)
+		return -ENODEV;
+
+	vmd->msix_count = pci_alloc_irq_vectors(dev, vmd->first_vec + 1,
+						vmd->msix_count, PCI_IRQ_MSIX);
+	if (vmd->msix_count < 0)
+		return vmd->msix_count;
+
+	vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
+				 GFP_KERNEL);
+	if (!vmd->irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < vmd->msix_count; i++) {
+		err = init_srcu_struct(&vmd->irqs[i].srcu);
+		if (err)
+			return err;
+
+		INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
+		vmd->irqs[i].virq = pci_irq_vector(dev, i);
+		err = devm_request_irq(&dev->dev, vmd->irqs[i].virq,
+				       vmd_irq, IRQF_NO_THREAD,
+				       vmd->name, &vmd->irqs[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Since VMD is an aperture to regular PCIe root ports, only allow it to
+ * control features that the OS is allowed to control on the physical PCI bus.
+ */
+static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
+				       struct pci_host_bridge *vmd_bridge)
+{
+	vmd_bridge->native_pcie_hotplug = root_bridge->native_pcie_hotplug;
+	vmd_bridge->native_shpc_hotplug = root_bridge->native_shpc_hotplug;
+	vmd_bridge->native_aer = root_bridge->native_aer;
+	vmd_bridge->native_pme = root_bridge->native_pme;
+	vmd_bridge->native_ltr = root_bridge->native_ltr;
+	vmd_bridge->native_dpc = root_bridge->native_dpc;
+}
+
+/*
+ * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
+ */
+static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
+{
+	unsigned long features = *(unsigned long *)userdata;
+	u16 ltr = VMD_BIOS_PM_QUIRK_LTR;
+	u32 ltr_reg;
+	int pos;
+
+	if (!(features & VMD_FEAT_BIOS_PM_QUIRK))
+		return 0;
+
+	pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR);
+	if (!pos)
+		return 0;
+
+	/*
+	 * Skip if the max snoop LTR is non-zero, indicating BIOS has set it
+	 * so the LTR quirk is not needed.
+	 */
+	pci_read_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, &ltr_reg);
+	if (!!(ltr_reg & (PCI_LTR_VALUE_MASK | PCI_LTR_SCALE_MASK)))
+		return 0;
+
+	/*
+	 * Set the default values to the maximum required by the platform to
+	 * allow the deepest power management savings. Write as a DWORD where
+	 * the lower word is the max snoop latency and the upper word is the
+	 * max non-snoop latency.
+	 */
+	ltr_reg = (ltr << 16) | ltr;
+	pci_write_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, ltr_reg);
+	pci_info(pdev, "VMD: Default LTR value set by driver\n");
+
+	return 0;
+}
+
+static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
+{
+	struct pci_sysdata *sd = &vmd->sysdata;
+	struct resource *res;
+	u32 upper_bits;
+	unsigned long flags;
+	LIST_HEAD(resources);
+	resource_size_t offset[2] = {0};
+	resource_size_t membar2_offset = 0x2000;
+	struct pci_bus *child;
+	struct pci_dev *dev;
+	int ret;
+
+	/*
+	 * Shadow registers may exist in certain VMD device ids which allow
+	 * guests to correctly assign host physical addresses to the root ports
+	 * and child devices. These registers will either return the host value
+	 * or 0, depending on an enable bit in the VMD device.
+	 */
+	if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
+		membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
+		ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]);
+		if (ret)
+			return ret;
+	} else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
+		ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Certain VMD devices may have a root port configuration option which
+	 * limits the bus range to between 0-127, 128-255, or 224-255
+	 */
+	if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
+		ret = vmd_get_bus_number_start(vmd);
+		if (ret)
+			return ret;
+	}
+
+	res = &vmd->dev->resource[VMD_CFGBAR];
+	vmd->resources[0] = (struct resource) {
+		.name  = "VMD CFGBAR",
+		.start = vmd->busn_start,
+		.end   = vmd->busn_start + (resource_size(res) >> 20) - 1,
+		.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
+	};
+
+	/*
+	 * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
+	 * put 32-bit resources in the window.
+	 *
+	 * There's no hardware reason why a 64-bit window *couldn't*
+	 * contain a 32-bit resource, but pbus_size_mem() computes the
+	 * bridge window size assuming a 64-bit window will contain no
+	 * 32-bit resources.  __pci_assign_resource() enforces that
+	 * artificial restriction to make sure everything will fit.
+	 *
+	 * The only way we could use a 64-bit non-prefetchable MEMBAR is
+	 * if its address is <4GB so that we can convert it to a 32-bit
+	 * resource.  To be visible to the host OS, all VMD endpoints must
+	 * be initially configured by platform BIOS, which includes setting
+	 * up these resources.  We can assume the device is configured
+	 * according to the platform needs.
+	 */
+	res = &vmd->dev->resource[VMD_MEMBAR1];
+	upper_bits = upper_32_bits(res->end);
+	flags = res->flags & ~IORESOURCE_SIZEALIGN;
+	if (!upper_bits)
+		flags &= ~IORESOURCE_MEM_64;
+	vmd->resources[1] = (struct resource) {
+		.name  = "VMD MEMBAR1",
+		.start = res->start,
+		.end   = res->end,
+		.flags = flags,
+		.parent = res,
+	};
+
+	res = &vmd->dev->resource[VMD_MEMBAR2];
+	upper_bits = upper_32_bits(res->end);
+	flags = res->flags & ~IORESOURCE_SIZEALIGN;
+	if (!upper_bits)
+		flags &= ~IORESOURCE_MEM_64;
+	vmd->resources[2] = (struct resource) {
+		.name  = "VMD MEMBAR2",
+		.start = res->start + membar2_offset,
+		.end   = res->end,
+		.flags = flags,
+		.parent = res,
+	};
+
+	sd->vmd_dev = vmd->dev;
+	sd->domain = vmd_find_free_domain();
+	if (sd->domain < 0)
+		return sd->domain;
+
+	sd->node = pcibus_to_node(vmd->dev->bus);
+
+	/*
+	 * Currently MSI remapping must be enabled in guest passthrough mode
+	 * due to some missing interrupt remapping plumbing. This is probably
+	 * acceptable because the guest is usually CPU-limited and MSI
+	 * remapping doesn't become a performance bottleneck.
+	 */
+	if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
+	    offset[0] || offset[1]) {
+		ret = vmd_alloc_irqs(vmd);
+		if (ret)
+			return ret;
+
+		vmd_set_msi_remapping(vmd, true);
+
+		ret = vmd_create_irq_domain(vmd);
+		if (ret)
+			return ret;
+
+		/*
+		 * Override the IRQ domain bus token so the domain can be
+		 * distinguished from a regular PCI/MSI domain.
+		 */
+		irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI);
+	} else {
+		vmd_set_msi_remapping(vmd, false);
+	}
+
+	pci_add_resource(&resources, &vmd->resources[0]);
+	pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
+	pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
+
+	vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
+				       &vmd_ops, sd, &resources);
+	if (!vmd->bus) {
+		pci_free_resource_list(&resources);
+		vmd_remove_irq_domain(vmd);
+		return -ENODEV;
+	}
+
+	vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
+				   to_pci_host_bridge(vmd->bus->bridge));
+
+	vmd_attach_resources(vmd);
+	if (vmd->irq_domain)
+		dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
+	else
+		dev_set_msi_domain(&vmd->bus->dev,
+				   dev_get_msi_domain(&vmd->dev->dev));
+
+	vmd_acpi_begin();
+
+	pci_scan_child_bus(vmd->bus);
+	vmd_domain_reset(vmd);
+
+	/* When Intel VMD is enabled, the OS does not discover the Root Ports
+	 * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies
+	 * a reset to the parent of the PCI device supplied as argument. This
+	 * is why we pass a child device, so the reset can be triggered at
+	 * the Intel bridge level and propagated to all the children in the
+	 * hierarchy.
+	 */
+	list_for_each_entry(child, &vmd->bus->children, node) {
+		if (!list_empty(&child->devices)) {
+			dev = list_first_entry(&child->devices,
+					       struct pci_dev, bus_list);
+			ret = pci_reset_bus(dev);
+			if (ret)
+				pci_warn(dev, "can't reset device: %d\n", ret);
+
+			break;
+		}
+	}
+
+	pci_assign_unassigned_bus_resources(vmd->bus);
+
+	pci_walk_bus(vmd->bus, vmd_pm_enable_quirk, &features);
+
+	/*
+	 * VMD root buses are virtual and don't return true on pci_is_pcie()
+	 * and will fail pcie_bus_configure_settings() early. It can instead be
+	 * run on each of the real root ports.
+	 */
+	list_for_each_entry(child, &vmd->bus->children, node)
+		pcie_bus_configure_settings(child);
+
+	pci_bus_add_devices(vmd->bus);
+
+	vmd_acpi_end();
+
+	WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
+			       "domain"), "Can't create symlink to domain\n");
+	return 0;
+}
+
+static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	unsigned long features = (unsigned long) id->driver_data;
+	struct vmd_dev *vmd;
+	int err;
+
+	if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
+		return -ENOMEM;
+
+	vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
+	if (!vmd)
+		return -ENOMEM;
+
+	vmd->dev = dev;
+	vmd->instance = ida_simple_get(&vmd_instance_ida, 0, 0, GFP_KERNEL);
+	if (vmd->instance < 0)
+		return vmd->instance;
+
+	vmd->name = devm_kasprintf(&dev->dev, GFP_KERNEL, "vmd%d",
+				   vmd->instance);
+	if (!vmd->name) {
+		err = -ENOMEM;
+		goto out_release_instance;
+	}
+
+	err = pcim_enable_device(dev);
+	if (err < 0)
+		goto out_release_instance;
+
+	vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
+	if (!vmd->cfgbar) {
+		err = -ENOMEM;
+		goto out_release_instance;
+	}
+
+	pci_set_master(dev);
+	if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) {
+		err = -ENODEV;
+		goto out_release_instance;
+	}
+
+	if (features & VMD_FEAT_OFFSET_FIRST_VECTOR)
+		vmd->first_vec = 1;
+
+	spin_lock_init(&vmd->cfg_lock);
+	pci_set_drvdata(dev, vmd);
+	err = vmd_enable_domain(vmd, features);
+	if (err)
+		goto out_release_instance;
+
+	dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
+		 vmd->sysdata.domain);
+	return 0;
+
+ out_release_instance:
+	ida_simple_remove(&vmd_instance_ida, vmd->instance);
+	return err;
+}
+
+static void vmd_cleanup_srcu(struct vmd_dev *vmd)
+{
+	int i;
+
+	for (i = 0; i < vmd->msix_count; i++)
+		cleanup_srcu_struct(&vmd->irqs[i].srcu);
+}
+
+static void vmd_remove(struct pci_dev *dev)
+{
+	struct vmd_dev *vmd = pci_get_drvdata(dev);
+
+	sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
+	pci_stop_root_bus(vmd->bus);
+	pci_remove_root_bus(vmd->bus);
+	vmd_cleanup_srcu(vmd);
+	vmd_detach_resources(vmd);
+	vmd_remove_irq_domain(vmd);
+	ida_simple_remove(&vmd_instance_ida, vmd->instance);
+}
+
+static void vmd_shutdown(struct pci_dev *dev)
+{
+        struct vmd_dev *vmd = pci_get_drvdata(dev);
+
+        vmd_remove_irq_domain(vmd);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int vmd_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < vmd->msix_count; i++)
+		devm_free_irq(dev, vmd->irqs[i].virq, &vmd->irqs[i]);
+
+	return 0;
+}
+
+static int vmd_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+	int err, i;
+
+       if (vmd->irq_domain)
+               vmd_set_msi_remapping(vmd, true);
+       else
+               vmd_set_msi_remapping(vmd, false);
+
+	for (i = 0; i < vmd->msix_count; i++) {
+		err = devm_request_irq(dev, vmd->irqs[i].virq,
+				       vmd_irq, IRQF_NO_THREAD,
+				       vmd->name, &vmd->irqs[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+#endif
+static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
+
+static const struct pci_device_id vmd_ids[] = {
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),
+		.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
+		.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
+				VMD_FEAT_HAS_BUS_RESTRICTIONS |
+				VMD_FEAT_CAN_BYPASS_MSI_REMAP,},
+	{PCI_VDEVICE(INTEL, 0x467f),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{PCI_VDEVICE(INTEL, 0x4c3d),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{PCI_VDEVICE(INTEL, 0xa77f),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{PCI_VDEVICE(INTEL, 0x7d0b),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{PCI_VDEVICE(INTEL, 0xad0b),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
+		.driver_data = VMD_FEATS_CLIENT,},
+	{0,}
+};
+MODULE_DEVICE_TABLE(pci, vmd_ids);
+
+static struct pci_driver vmd_drv = {
+	.name		= "vmd",
+	.id_table	= vmd_ids,
+	.probe		= vmd_probe,
+	.remove		= vmd_remove,
+	.shutdown	= vmd_shutdown,
+	.driver		= {
+		.pm	= &vmd_dev_pm_ops,
+	},
+};
+module_pci_driver(vmd_drv);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("0.6");
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
new file mode 100644
index 000000000..e3aab5eda
--- /dev/null
+++ b/drivers/pci/doe.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Object Exchange
+ *	PCIe r6.0, sec 6.30 DOE
+ *
+ * Copyright (C) 2021 Huawei
+ *	Jonathan Cameron <Jonathan.Cameron@huawei.com>
+ *
+ * Copyright (C) 2022 Intel Corporation
+ *	Ira Weiny <ira.weiny@intel.com>
+ */
+
+#define dev_fmt(fmt) "DOE: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-doe.h>
+#include <linux/workqueue.h>
+
+#include "pci.h"
+
+#define PCI_DOE_PROTOCOL_DISCOVERY 0
+
+/* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
+#define PCI_DOE_TIMEOUT HZ
+#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
+
+#define PCI_DOE_FLAG_CANCEL	0
+#define PCI_DOE_FLAG_DEAD	1
+
+/* Max data object length is 2^18 dwords */
+#define PCI_DOE_MAX_LENGTH	(1 << 18)
+
+/**
+ * struct pci_doe_mb - State for a single DOE mailbox
+ *
+ * This state is used to manage a single DOE mailbox capability.  All fields
+ * should be considered opaque to the consumers and the structure passed into
+ * the helpers below after being created by pci_doe_create_mb().
+ *
+ * @pdev: PCI device this mailbox belongs to
+ * @cap_offset: Capability offset
+ * @prots: Array of protocols supported (encoded as long values)
+ * @wq: Wait queue for work item
+ * @work_queue: Queue of pci_doe_work items
+ * @flags: Bit array of PCI_DOE_FLAG_* flags
+ */
+struct pci_doe_mb {
+	struct pci_dev *pdev;
+	u16 cap_offset;
+	struct xarray prots;
+
+	wait_queue_head_t wq;
+	struct workqueue_struct *work_queue;
+	unsigned long flags;
+};
+
+struct pci_doe_protocol {
+	u16 vid;
+	u8 type;
+};
+
+/**
+ * struct pci_doe_task - represents a single query/response
+ *
+ * @prot: DOE Protocol
+ * @request_pl: The request payload
+ * @request_pl_sz: Size of the request payload (bytes)
+ * @response_pl: The response payload
+ * @response_pl_sz: Size of the response payload (bytes)
+ * @rv: Return value.  Length of received response or error (bytes)
+ * @complete: Called when task is complete
+ * @private: Private data for the consumer
+ * @work: Used internally by the mailbox
+ * @doe_mb: Used internally by the mailbox
+ */
+struct pci_doe_task {
+	struct pci_doe_protocol prot;
+	const __le32 *request_pl;
+	size_t request_pl_sz;
+	__le32 *response_pl;
+	size_t response_pl_sz;
+	int rv;
+	void (*complete)(struct pci_doe_task *task);
+	void *private;
+
+	/* initialized by pci_doe_submit_task() */
+	struct work_struct work;
+	struct pci_doe_mb *doe_mb;
+};
+
+static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
+{
+	if (wait_event_timeout(doe_mb->wq,
+			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
+			       timeout))
+		return -EIO;
+	return 0;
+}
+
+static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
+{
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+
+	pci_write_config_dword(pdev, offset + PCI_DOE_CTRL, val);
+}
+
+static int pci_doe_abort(struct pci_doe_mb *doe_mb)
+{
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+	unsigned long timeout_jiffies;
+
+	pci_dbg(pdev, "[%x] Issuing Abort\n", offset);
+
+	timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
+	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
+
+	do {
+		int rc;
+		u32 val;
+
+		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
+		if (rc)
+			return rc;
+		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+
+		/* Abort success! */
+		if (!FIELD_GET(PCI_DOE_STATUS_ERROR, val) &&
+		    !FIELD_GET(PCI_DOE_STATUS_BUSY, val))
+			return 0;
+
+	} while (!time_after(jiffies, timeout_jiffies));
+
+	/* Abort has timed out and the MB is dead */
+	pci_err(pdev, "[%x] ABORT timed out\n", offset);
+	return -EIO;
+}
+
+static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
+			    struct pci_doe_task *task)
+{
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+	size_t length, remainder;
+	u32 val;
+	int i;
+
+	/*
+	 * Check the DOE busy bit is not set. If it is set, this could indicate
+	 * someone other than Linux (e.g. firmware) is using the mailbox. Note
+	 * it is expected that firmware and OS will negotiate access rights via
+	 * an, as yet to be defined, method.
+	 */
+	pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+	if (FIELD_GET(PCI_DOE_STATUS_BUSY, val))
+		return -EBUSY;
+
+	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
+		return -EIO;
+
+	/* Length is 2 DW of header + length of payload in DW */
+	length = 2 + DIV_ROUND_UP(task->request_pl_sz, sizeof(__le32));
+	if (length > PCI_DOE_MAX_LENGTH)
+		return -EIO;
+	if (length == PCI_DOE_MAX_LENGTH)
+		length = 0;
+
+	/* Write DOE Header */
+	val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
+		FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
+	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
+	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
+			       FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
+					  length));
+
+	/* Write payload */
+	for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
+		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
+				       le32_to_cpu(task->request_pl[i]));
+
+	/* Write last payload dword */
+	remainder = task->request_pl_sz % sizeof(__le32);
+	if (remainder) {
+		val = 0;
+		memcpy(&val, &task->request_pl[i], remainder);
+		le32_to_cpus(&val);
+		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
+	}
+
+	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
+
+	return 0;
+}
+
+static bool pci_doe_data_obj_ready(struct pci_doe_mb *doe_mb)
+{
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+	u32 val;
+
+	pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+	if (FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val))
+		return true;
+	return false;
+}
+
+static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
+{
+	size_t length, payload_length, remainder, received;
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+	int i = 0;
+	u32 val;
+
+	/* Read the first dword to get the protocol */
+	pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+	if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
+	    (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
+		dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
+				    doe_mb->cap_offset, task->prot.vid, task->prot.type,
+				    FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
+				    FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
+		return -EIO;
+	}
+
+	pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+	/* Read the second dword to get the length */
+	pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+	pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+
+	length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val);
+	/* A value of 0x0 indicates max data object length */
+	if (!length)
+		length = PCI_DOE_MAX_LENGTH;
+	if (length < 2)
+		return -EIO;
+
+	/* First 2 dwords have already been read */
+	length -= 2;
+	received = task->response_pl_sz;
+	payload_length = DIV_ROUND_UP(task->response_pl_sz, sizeof(__le32));
+	remainder = task->response_pl_sz % sizeof(__le32);
+
+	/* remainder signifies number of data bytes in last payload dword */
+	if (!remainder)
+		remainder = sizeof(__le32);
+
+	if (length < payload_length) {
+		received = length * sizeof(__le32);
+		payload_length = length;
+		remainder = sizeof(__le32);
+	}
+
+	if (payload_length) {
+		/* Read all payload dwords except the last */
+		for (; i < payload_length - 1; i++) {
+			pci_read_config_dword(pdev, offset + PCI_DOE_READ,
+					      &val);
+			task->response_pl[i] = cpu_to_le32(val);
+			pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+		}
+
+		/* Read last payload dword */
+		pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+		cpu_to_le32s(&val);
+		memcpy(&task->response_pl[i], &val, remainder);
+		/* Prior to the last ack, ensure Data Object Ready */
+		if (!pci_doe_data_obj_ready(doe_mb))
+			return -EIO;
+		pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+		i++;
+	}
+
+	/* Flush excess length */
+	for (; i < length; i++) {
+		pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+		pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+	}
+
+	/* Final error check to pick up on any since Data Object Ready */
+	pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
+		return -EIO;
+
+	return received;
+}
+
+static void signal_task_complete(struct pci_doe_task *task, int rv)
+{
+	task->rv = rv;
+	destroy_work_on_stack(&task->work);
+	task->complete(task);
+}
+
+static void signal_task_abort(struct pci_doe_task *task, int rv)
+{
+	struct pci_doe_mb *doe_mb = task->doe_mb;
+	struct pci_dev *pdev = doe_mb->pdev;
+
+	if (pci_doe_abort(doe_mb)) {
+		/*
+		 * If the device can't process an abort; set the mailbox dead
+		 *	- no more submissions
+		 */
+		pci_err(pdev, "[%x] Abort failed marking mailbox dead\n",
+			doe_mb->cap_offset);
+		set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
+	}
+	signal_task_complete(task, rv);
+}
+
+static void doe_statemachine_work(struct work_struct *work)
+{
+	struct pci_doe_task *task = container_of(work, struct pci_doe_task,
+						 work);
+	struct pci_doe_mb *doe_mb = task->doe_mb;
+	struct pci_dev *pdev = doe_mb->pdev;
+	int offset = doe_mb->cap_offset;
+	unsigned long timeout_jiffies;
+	u32 val;
+	int rc;
+
+	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) {
+		signal_task_complete(task, -EIO);
+		return;
+	}
+
+	/* Send request */
+	rc = pci_doe_send_req(doe_mb, task);
+	if (rc) {
+		/*
+		 * The specification does not provide any guidance on how to
+		 * resolve conflicting requests from other entities.
+		 * Furthermore, it is likely that busy will not be detected
+		 * most of the time.  Flag any detection of status busy with an
+		 * error.
+		 */
+		if (rc == -EBUSY)
+			dev_err_ratelimited(&pdev->dev, "[%x] busy detected; another entity is sending conflicting requests\n",
+					    offset);
+		signal_task_abort(task, rc);
+		return;
+	}
+
+	timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
+	/* Poll for response */
+retry_resp:
+	pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) {
+		signal_task_abort(task, -EIO);
+		return;
+	}
+
+	if (!FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val)) {
+		if (time_after(jiffies, timeout_jiffies)) {
+			signal_task_abort(task, -EIO);
+			return;
+		}
+		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
+		if (rc) {
+			signal_task_abort(task, rc);
+			return;
+		}
+		goto retry_resp;
+	}
+
+	rc  = pci_doe_recv_resp(doe_mb, task);
+	if (rc < 0) {
+		signal_task_abort(task, rc);
+		return;
+	}
+
+	signal_task_complete(task, rc);
+}
+
+static void pci_doe_task_complete(struct pci_doe_task *task)
+{
+	complete(task->private);
+}
+
+static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
+			     u8 *protocol)
+{
+	u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
+				    *index);
+	__le32 request_pl_le = cpu_to_le32(request_pl);
+	__le32 response_pl_le;
+	u32 response_pl;
+	int rc;
+
+	rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
+		     &request_pl_le, sizeof(request_pl_le),
+		     &response_pl_le, sizeof(response_pl_le));
+	if (rc < 0)
+		return rc;
+
+	if (rc != sizeof(response_pl_le))
+		return -EIO;
+
+	response_pl = le32_to_cpu(response_pl_le);
+	*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
+	*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
+			      response_pl);
+	*index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
+			   response_pl);
+
+	return 0;
+}
+
+static void *pci_doe_xa_prot_entry(u16 vid, u8 prot)
+{
+	return xa_mk_value((vid << 8) | prot);
+}
+
+static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
+{
+	u8 index = 0;
+	u8 xa_idx = 0;
+
+	do {
+		int rc;
+		u16 vid;
+		u8 prot;
+
+		rc = pci_doe_discovery(doe_mb, &index, &vid, &prot);
+		if (rc)
+			return rc;
+
+		pci_dbg(doe_mb->pdev,
+			"[%x] Found protocol %d vid: %x prot: %x\n",
+			doe_mb->cap_offset, xa_idx, vid, prot);
+
+		rc = xa_insert(&doe_mb->prots, xa_idx++,
+			       pci_doe_xa_prot_entry(vid, prot), GFP_KERNEL);
+		if (rc)
+			return rc;
+	} while (index);
+
+	return 0;
+}
+
+static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
+{
+	/* Stop all pending work items from starting */
+	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
+
+	/* Cancel an in progress work item, if necessary */
+	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
+	wake_up(&doe_mb->wq);
+}
+
+/**
+ * pci_doe_create_mb() - Create a DOE mailbox object
+ *
+ * @pdev: PCI device to create the DOE mailbox for
+ * @cap_offset: Offset of the DOE mailbox
+ *
+ * Create a single mailbox object to manage the mailbox protocol at the
+ * cap_offset specified.
+ *
+ * RETURNS: created mailbox object on success
+ *	    ERR_PTR(-errno) on failure
+ */
+static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
+					    u16 cap_offset)
+{
+	struct pci_doe_mb *doe_mb;
+	int rc;
+
+	doe_mb = kzalloc(sizeof(*doe_mb), GFP_KERNEL);
+	if (!doe_mb)
+		return ERR_PTR(-ENOMEM);
+
+	doe_mb->pdev = pdev;
+	doe_mb->cap_offset = cap_offset;
+	init_waitqueue_head(&doe_mb->wq);
+	xa_init(&doe_mb->prots);
+
+	doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
+						dev_bus_name(&pdev->dev),
+						pci_name(pdev),
+						doe_mb->cap_offset);
+	if (!doe_mb->work_queue) {
+		pci_err(pdev, "[%x] failed to allocate work queue\n",
+			doe_mb->cap_offset);
+		rc = -ENOMEM;
+		goto err_free;
+	}
+
+	/* Reset the mailbox by issuing an abort */
+	rc = pci_doe_abort(doe_mb);
+	if (rc) {
+		pci_err(pdev, "[%x] failed to reset mailbox with abort command : %d\n",
+			doe_mb->cap_offset, rc);
+		goto err_destroy_wq;
+	}
+
+	/*
+	 * The state machine and the mailbox should be in sync now;
+	 * Use the mailbox to query protocols.
+	 */
+	rc = pci_doe_cache_protocols(doe_mb);
+	if (rc) {
+		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
+			doe_mb->cap_offset, rc);
+		goto err_cancel;
+	}
+
+	return doe_mb;
+
+err_cancel:
+	pci_doe_cancel_tasks(doe_mb);
+	xa_destroy(&doe_mb->prots);
+err_destroy_wq:
+	destroy_workqueue(doe_mb->work_queue);
+err_free:
+	kfree(doe_mb);
+	return ERR_PTR(rc);
+}
+
+/**
+ * pci_doe_destroy_mb() - Destroy a DOE mailbox object
+ *
+ * @doe_mb: DOE mailbox
+ *
+ * Destroy all internal data structures created for the DOE mailbox.
+ */
+static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
+{
+	pci_doe_cancel_tasks(doe_mb);
+	xa_destroy(&doe_mb->prots);
+	destroy_workqueue(doe_mb->work_queue);
+	kfree(doe_mb);
+}
+
+/**
+ * pci_doe_supports_prot() - Return if the DOE instance supports the given
+ *			     protocol
+ * @doe_mb: DOE mailbox capability to query
+ * @vid: Protocol Vendor ID
+ * @type: Protocol type
+ *
+ * RETURNS: True if the DOE mailbox supports the protocol specified
+ */
+static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
+{
+	unsigned long index;
+	void *entry;
+
+	/* The discovery protocol must always be supported */
+	if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
+		return true;
+
+	xa_for_each(&doe_mb->prots, index, entry)
+		if (entry == pci_doe_xa_prot_entry(vid, type))
+			return true;
+
+	return false;
+}
+
+/**
+ * pci_doe_submit_task() - Submit a task to be processed by the state machine
+ *
+ * @doe_mb: DOE mailbox capability to submit to
+ * @task: task to be queued
+ *
+ * Submit a DOE task (request/response) to the DOE mailbox to be processed.
+ * Returns upon queueing the task object.  If the queue is full this function
+ * will sleep until there is room in the queue.
+ *
+ * task->complete will be called when the state machine is done processing this
+ * task.
+ *
+ * @task must be allocated on the stack.
+ *
+ * Excess data will be discarded.
+ *
+ * RETURNS: 0 when task has been successfully queued, -ERRNO on error
+ */
+static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
+			       struct pci_doe_task *task)
+{
+	if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
+		return -EINVAL;
+
+	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
+		return -EIO;
+
+	task->doe_mb = doe_mb;
+	INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
+	queue_work(doe_mb->work_queue, &task->work);
+	return 0;
+}
+
+/**
+ * pci_doe() - Perform Data Object Exchange
+ *
+ * @doe_mb: DOE Mailbox
+ * @vendor: Vendor ID
+ * @type: Data Object Type
+ * @request: Request payload
+ * @request_sz: Size of request payload (bytes)
+ * @response: Response payload
+ * @response_sz: Size of response payload (bytes)
+ *
+ * Submit @request to @doe_mb and store the @response.
+ * The DOE exchange is performed synchronously and may therefore sleep.
+ *
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping.  If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
+ * For convenience, arbitrary payload sizes are allowed even though PCIe r6.0
+ * sec 6.30.1 specifies the Data Object Header 2 "Length" in dwords.  The last
+ * (partial) dword is copied with byte granularity and padded with zeroes if
+ * necessary.  Callers are thus relieved of using dword-sized bounce buffers.
+ *
+ * RETURNS: Length of received response or negative errno.
+ * Received data in excess of @response_sz is discarded.
+ * The length may be smaller than @response_sz and the caller
+ * is responsible for checking that.
+ */
+int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
+	    const void *request, size_t request_sz,
+	    void *response, size_t response_sz)
+{
+	DECLARE_COMPLETION_ONSTACK(c);
+	struct pci_doe_task task = {
+		.prot.vid = vendor,
+		.prot.type = type,
+		.request_pl = request,
+		.request_pl_sz = request_sz,
+		.response_pl = response,
+		.response_pl_sz = response_sz,
+		.complete = pci_doe_task_complete,
+		.private = &c,
+	};
+	int rc;
+
+	rc = pci_doe_submit_task(doe_mb, &task);
+	if (rc)
+		return rc;
+
+	wait_for_completion(&c);
+
+	return task.rv;
+}
+EXPORT_SYMBOL_GPL(pci_doe);
+
+/**
+ * pci_find_doe_mailbox() - Find Data Object Exchange mailbox
+ *
+ * @pdev: PCI device
+ * @vendor: Vendor ID
+ * @type: Data Object Type
+ *
+ * Find first DOE mailbox of a PCI device which supports the given protocol.
+ *
+ * RETURNS: Pointer to the DOE mailbox or NULL if none was found.
+ */
+struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
+					u8 type)
+{
+	struct pci_doe_mb *doe_mb;
+	unsigned long index;
+
+	xa_for_each(&pdev->doe_mbs, index, doe_mb)
+		if (pci_doe_supports_prot(doe_mb, vendor, type))
+			return doe_mb;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_find_doe_mailbox);
+
+void pci_doe_init(struct pci_dev *pdev)
+{
+	struct pci_doe_mb *doe_mb;
+	u16 offset = 0;
+	int rc;
+
+	xa_init(&pdev->doe_mbs);
+
+	while ((offset = pci_find_next_ext_capability(pdev, offset,
+						      PCI_EXT_CAP_ID_DOE))) {
+		doe_mb = pci_doe_create_mb(pdev, offset);
+		if (IS_ERR(doe_mb)) {
+			pci_err(pdev, "[%x] failed to create mailbox: %ld\n",
+				offset, PTR_ERR(doe_mb));
+			continue;
+		}
+
+		rc = xa_insert(&pdev->doe_mbs, offset, doe_mb, GFP_KERNEL);
+		if (rc) {
+			pci_err(pdev, "[%x] failed to insert mailbox: %d\n",
+				offset, rc);
+			pci_doe_destroy_mb(doe_mb);
+		}
+	}
+}
+
+void pci_doe_destroy(struct pci_dev *pdev)
+{
+	struct pci_doe_mb *doe_mb;
+	unsigned long index;
+
+	xa_for_each(&pdev->doe_mbs, index, doe_mb)
+		pci_doe_destroy_mb(doe_mb);
+
+	xa_destroy(&pdev->doe_mbs);
+}
+
+void pci_doe_disconnected(struct pci_dev *pdev)
+{
+	struct pci_doe_mb *doe_mb;
+	unsigned long index;
+
+	xa_for_each(&pdev->doe_mbs, index, doe_mb)
+		pci_doe_cancel_tasks(doe_mb);
+}
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
new file mode 100644
index 000000000..1c40d2506
--- /dev/null
+++ b/drivers/pci/ecam.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2016 Broadcom
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/slab.h>
+
+/*
+ * On 64-bit systems, we do a single ioremap for the whole config space
+ * since we have enough virtual address range available.  On 32-bit, we
+ * ioremap the config space for each bus individually.
+ */
+static const bool per_bus_mapping = !IS_ENABLED(CONFIG_64BIT);
+
+/*
+ * Create a PCI config space window
+ *  - reserve mem region
+ *  - alloc struct pci_config_window with space for all mappings
+ *  - ioremap the config space
+ */
+struct pci_config_window *pci_ecam_create(struct device *dev,
+		struct resource *cfgres, struct resource *busr,
+		const struct pci_ecam_ops *ops)
+{
+	unsigned int bus_shift = ops->bus_shift;
+	struct pci_config_window *cfg;
+	unsigned int bus_range, bus_range_max, bsz;
+	struct resource *conflict;
+	int err;
+
+	if (busr->start > busr->end)
+		return ERR_PTR(-EINVAL);
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg)
+		return ERR_PTR(-ENOMEM);
+
+	/* ECAM-compliant platforms need not supply ops->bus_shift */
+	if (!bus_shift)
+		bus_shift = PCIE_ECAM_BUS_SHIFT;
+
+	cfg->parent = dev;
+	cfg->ops = ops;
+	cfg->busr.start = busr->start;
+	cfg->busr.end = busr->end;
+	cfg->busr.flags = IORESOURCE_BUS;
+	cfg->bus_shift = bus_shift;
+	bus_range = resource_size(&cfg->busr);
+	bus_range_max = resource_size(cfgres) >> bus_shift;
+	if (bus_range > bus_range_max) {
+		bus_range = bus_range_max;
+		cfg->busr.end = busr->start + bus_range - 1;
+		dev_warn(dev, "ECAM area %pR can only accommodate %pR (reduced from %pR desired)\n",
+			 cfgres, &cfg->busr, busr);
+	}
+	bsz = 1 << bus_shift;
+
+	cfg->res.start = cfgres->start;
+	cfg->res.end = cfgres->end;
+	cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	cfg->res.name = "PCI ECAM";
+
+	conflict = request_resource_conflict(&iomem_resource, &cfg->res);
+	if (conflict) {
+		err = -EBUSY;
+		dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n",
+			&cfg->res, conflict->name, conflict);
+		goto err_exit;
+	}
+
+	if (per_bus_mapping) {
+		cfg->winp = kcalloc(bus_range, sizeof(*cfg->winp), GFP_KERNEL);
+		if (!cfg->winp)
+			goto err_exit_malloc;
+	} else {
+		cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz);
+		if (!cfg->win)
+			goto err_exit_iomap;
+	}
+
+	if (ops->init) {
+		err = ops->init(cfg);
+		if (err)
+			goto err_exit;
+	}
+	dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr);
+	return cfg;
+
+err_exit_iomap:
+	dev_err(dev, "ECAM ioremap failed\n");
+err_exit_malloc:
+	err = -ENOMEM;
+err_exit:
+	pci_ecam_free(cfg);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(pci_ecam_create);
+
+void pci_ecam_free(struct pci_config_window *cfg)
+{
+	int i;
+
+	if (per_bus_mapping) {
+		if (cfg->winp) {
+			for (i = 0; i < resource_size(&cfg->busr); i++)
+				if (cfg->winp[i])
+					iounmap(cfg->winp[i]);
+			kfree(cfg->winp);
+		}
+	} else {
+		if (cfg->win)
+			iounmap(cfg->win);
+	}
+	if (cfg->res.parent)
+		release_resource(&cfg->res);
+	kfree(cfg);
+}
+EXPORT_SYMBOL_GPL(pci_ecam_free);
+
+static int pci_ecam_add_bus(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	unsigned int bsz = 1 << cfg->bus_shift;
+	unsigned int busn = bus->number;
+	phys_addr_t start;
+
+	if (!per_bus_mapping)
+		return 0;
+
+	if (busn < cfg->busr.start || busn > cfg->busr.end)
+		return -EINVAL;
+
+	busn -= cfg->busr.start;
+	start = cfg->res.start + busn * bsz;
+
+	cfg->winp[busn] = pci_remap_cfgspace(start, bsz);
+	if (!cfg->winp[busn])
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void pci_ecam_remove_bus(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	unsigned int busn = bus->number;
+
+	if (!per_bus_mapping || busn < cfg->busr.start || busn > cfg->busr.end)
+		return;
+
+	busn -= cfg->busr.start;
+	if (cfg->winp[busn]) {
+		iounmap(cfg->winp[busn]);
+		cfg->winp[busn] = NULL;
+	}
+}
+
+/*
+ * Function to implement the pci_ops ->map_bus method
+ */
+void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	unsigned int bus_shift = cfg->ops->bus_shift;
+	unsigned int devfn_shift = cfg->ops->bus_shift - 8;
+	unsigned int busn = bus->number;
+	void __iomem *base;
+	u32 bus_offset, devfn_offset;
+
+	if (busn < cfg->busr.start || busn > cfg->busr.end)
+		return NULL;
+
+	busn -= cfg->busr.start;
+	if (per_bus_mapping) {
+		base = cfg->winp[busn];
+		busn = 0;
+	} else
+		base = cfg->win;
+
+	if (cfg->ops->bus_shift) {
+		bus_offset = (busn & PCIE_ECAM_BUS_MASK) << bus_shift;
+		devfn_offset = (devfn & PCIE_ECAM_DEVFN_MASK) << devfn_shift;
+		where &= PCIE_ECAM_REG_MASK;
+
+		return base + (bus_offset | devfn_offset | where);
+	}
+
+	return base + PCIE_ECAM_OFFSET(busn, devfn, where);
+}
+EXPORT_SYMBOL_GPL(pci_ecam_map_bus);
+
+/* ECAM ops */
+const struct pci_ecam_ops pci_generic_ecam_ops = {
+	.pci_ops	= {
+		.add_bus	= pci_ecam_add_bus,
+		.remove_bus	= pci_ecam_remove_bus,
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
+EXPORT_SYMBOL_GPL(pci_generic_ecam_ops);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+/* ECAM ops for 32-bit access only (non-compliant) */
+const struct pci_ecam_ops pci_32b_ops = {
+	.pci_ops	= {
+		.add_bus	= pci_ecam_add_bus,
+		.remove_bus	= pci_ecam_remove_bus,
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read32,
+		.write		= pci_generic_config_write32,
+	}
+};
+
+/* ECAM ops for 32-bit read only (non-compliant) */
+const struct pci_ecam_ops pci_32b_read_ops = {
+	.pci_ops	= {
+		.add_bus	= pci_ecam_add_bus,
+		.remove_bus	= pci_ecam_remove_bus,
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read32,
+		.write		= pci_generic_config_write,
+	}
+};
+#endif
diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig
new file mode 100644
index 000000000..17bbdc9bb
--- /dev/null
+++ b/drivers/pci/endpoint/Kconfig
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# PCI Endpoint Support
+#
+
+menu "PCI Endpoint"
+
+config PCI_ENDPOINT
+	bool "PCI Endpoint Support"
+	depends on HAVE_PCI
+	help
+	   Enable this configuration option to support configurable PCI
+	   endpoint. This should be enabled if the platform has a PCI
+	   controller that can operate in endpoint mode.
+
+	   Enabling this option will build the endpoint library, which
+	   includes endpoint controller library and endpoint function
+	   library.
+
+	   If in doubt, say "N" to disable Endpoint support.
+
+config PCI_ENDPOINT_CONFIGFS
+	bool "PCI Endpoint Configfs Support"
+	depends on PCI_ENDPOINT
+	select CONFIGFS_FS
+	help
+	   This will enable the configfs entry that can be used to
+	   configure the endpoint function and used to bind the
+	   function with a endpoint controller.
+
+source "drivers/pci/endpoint/functions/Kconfig"
+
+endmenu
diff --git a/drivers/pci/endpoint/Makefile b/drivers/pci/endpoint/Makefile
new file mode 100644
index 000000000..95b2fe47e
--- /dev/null
+++ b/drivers/pci/endpoint/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for PCI Endpoint Support
+#
+
+obj-$(CONFIG_PCI_ENDPOINT_CONFIGFS)	+= pci-ep-cfs.o
+obj-$(CONFIG_PCI_ENDPOINT)		+= pci-epc-core.o pci-epf-core.o\
+					   pci-epc-mem.o functions/
diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig
new file mode 100644
index 000000000..0c9cea069
--- /dev/null
+++ b/drivers/pci/endpoint/functions/Kconfig
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# PCI Endpoint Functions
+#
+
+config PCI_EPF_TEST
+	tristate "PCI Endpoint Test driver"
+	depends on PCI_ENDPOINT
+	select CRC32
+	help
+	   Enable this configuration option to enable the test driver
+	   for PCI Endpoint.
+
+	   If in doubt, say "N" to disable Endpoint test driver.
+
+config PCI_EPF_NTB
+	tristate "PCI Endpoint NTB driver"
+	depends on PCI_ENDPOINT
+	select CONFIGFS_FS
+	help
+	  Select this configuration option to enable the Non-Transparent
+	  Bridge (NTB) driver for PCI Endpoint. NTB driver implements NTB
+	  controller functionality using multiple PCIe endpoint instances.
+	  It can support NTB endpoint function devices created using
+	  device tree.
+
+	  If in doubt, say "N" to disable Endpoint NTB driver.
+
+config PCI_EPF_VNTB
+	tristate "PCI Endpoint Virtual NTB driver"
+	depends on PCI_ENDPOINT
+	depends on NTB
+	select CONFIGFS_FS
+	help
+	  Select this configuration option to enable the Non-Transparent
+	  Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB
+	  between PCI Root Port and PCIe Endpoint.
+
+	  If in doubt, say "N" to disable Endpoint NTB driver.
+
+config PCI_EPF_MHI
+	tristate "PCI Endpoint driver for MHI bus"
+	depends on PCI_ENDPOINT && MHI_BUS_EP
+	help
+	   Enable this configuration option to enable the PCI Endpoint
+	   driver for Modem Host Interface (MHI) bus in Qualcomm Endpoint
+	   devices such as SDX55.
+
+	   If in doubt, say "N" to disable Endpoint driver for MHI bus.
diff --git a/drivers/pci/endpoint/functions/Makefile b/drivers/pci/endpoint/functions/Makefile
new file mode 100644
index 000000000..696473fce
--- /dev/null
+++ b/drivers/pci/endpoint/functions/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for PCI Endpoint Functions
+#
+
+obj-$(CONFIG_PCI_EPF_TEST)		+= pci-epf-test.o
+obj-$(CONFIG_PCI_EPF_NTB)		+= pci-epf-ntb.o
+obj-$(CONFIG_PCI_EPF_VNTB) 		+= pci-epf-vntb.o
+obj-$(CONFIG_PCI_EPF_MHI)		+= pci-epf-mhi.o
diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c
new file mode 100644
index 000000000..6dc918a8a
--- /dev/null
+++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI EPF driver for MHI Endpoint devices
+ *
+ * Copyright (C) 2023 Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/mhi_ep.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+#define MHI_VERSION_1_0 0x01000000
+
+#define to_epf_mhi(cntrl) container_of(cntrl, struct pci_epf_mhi, cntrl)
+
+/* Platform specific flags */
+#define MHI_EPF_USE_DMA BIT(0)
+
+struct pci_epf_mhi_ep_info {
+	const struct mhi_ep_cntrl_config *config;
+	struct pci_epf_header *epf_header;
+	enum pci_barno bar_num;
+	u32 epf_flags;
+	u32 msi_count;
+	u32 mru;
+	u32 flags;
+};
+
+#define MHI_EP_CHANNEL_CONFIG(ch_num, ch_name, direction)	\
+	{							\
+		.num = ch_num,					\
+		.name = ch_name,				\
+		.dir = direction,				\
+	}
+
+#define MHI_EP_CHANNEL_CONFIG_UL(ch_num, ch_name)		\
+	MHI_EP_CHANNEL_CONFIG(ch_num, ch_name, DMA_TO_DEVICE)
+
+#define MHI_EP_CHANNEL_CONFIG_DL(ch_num, ch_name)		\
+	MHI_EP_CHANNEL_CONFIG(ch_num, ch_name, DMA_FROM_DEVICE)
+
+static const struct mhi_ep_channel_config mhi_v1_channels[] = {
+	MHI_EP_CHANNEL_CONFIG_UL(0, "LOOPBACK"),
+	MHI_EP_CHANNEL_CONFIG_DL(1, "LOOPBACK"),
+	MHI_EP_CHANNEL_CONFIG_UL(2, "SAHARA"),
+	MHI_EP_CHANNEL_CONFIG_DL(3, "SAHARA"),
+	MHI_EP_CHANNEL_CONFIG_UL(4, "DIAG"),
+	MHI_EP_CHANNEL_CONFIG_DL(5, "DIAG"),
+	MHI_EP_CHANNEL_CONFIG_UL(6, "SSR"),
+	MHI_EP_CHANNEL_CONFIG_DL(7, "SSR"),
+	MHI_EP_CHANNEL_CONFIG_UL(8, "QDSS"),
+	MHI_EP_CHANNEL_CONFIG_DL(9, "QDSS"),
+	MHI_EP_CHANNEL_CONFIG_UL(10, "EFS"),
+	MHI_EP_CHANNEL_CONFIG_DL(11, "EFS"),
+	MHI_EP_CHANNEL_CONFIG_UL(12, "MBIM"),
+	MHI_EP_CHANNEL_CONFIG_DL(13, "MBIM"),
+	MHI_EP_CHANNEL_CONFIG_UL(14, "QMI"),
+	MHI_EP_CHANNEL_CONFIG_DL(15, "QMI"),
+	MHI_EP_CHANNEL_CONFIG_UL(16, "QMI"),
+	MHI_EP_CHANNEL_CONFIG_DL(17, "QMI"),
+	MHI_EP_CHANNEL_CONFIG_UL(18, "IP-CTRL-1"),
+	MHI_EP_CHANNEL_CONFIG_DL(19, "IP-CTRL-1"),
+	MHI_EP_CHANNEL_CONFIG_UL(20, "IPCR"),
+	MHI_EP_CHANNEL_CONFIG_DL(21, "IPCR"),
+	MHI_EP_CHANNEL_CONFIG_UL(32, "DUN"),
+	MHI_EP_CHANNEL_CONFIG_DL(33, "DUN"),
+	MHI_EP_CHANNEL_CONFIG_UL(46, "IP_SW0"),
+	MHI_EP_CHANNEL_CONFIG_DL(47, "IP_SW0"),
+};
+
+static const struct mhi_ep_cntrl_config mhi_v1_config = {
+	.max_channels = 128,
+	.num_channels = ARRAY_SIZE(mhi_v1_channels),
+	.ch_cfg = mhi_v1_channels,
+	.mhi_version = MHI_VERSION_1_0,
+};
+
+static struct pci_epf_header sdx55_header = {
+	.vendorid = PCI_VENDOR_ID_QCOM,
+	.deviceid = 0x0306,
+	.baseclass_code = PCI_BASE_CLASS_COMMUNICATION,
+	.subclass_code = PCI_CLASS_COMMUNICATION_MODEM & 0xff,
+	.interrupt_pin	= PCI_INTERRUPT_INTA,
+};
+
+static const struct pci_epf_mhi_ep_info sdx55_info = {
+	.config = &mhi_v1_config,
+	.epf_header = &sdx55_header,
+	.bar_num = BAR_0,
+	.epf_flags = PCI_BASE_ADDRESS_MEM_TYPE_32,
+	.msi_count = 32,
+	.mru = 0x8000,
+};
+
+static struct pci_epf_header sm8450_header = {
+	.vendorid = PCI_VENDOR_ID_QCOM,
+	.deviceid = 0x0306,
+	.baseclass_code = PCI_CLASS_OTHERS,
+	.interrupt_pin = PCI_INTERRUPT_INTA,
+};
+
+static const struct pci_epf_mhi_ep_info sm8450_info = {
+	.config = &mhi_v1_config,
+	.epf_header = &sm8450_header,
+	.bar_num = BAR_0,
+	.epf_flags = PCI_BASE_ADDRESS_MEM_TYPE_32,
+	.msi_count = 32,
+	.mru = 0x8000,
+	.flags = MHI_EPF_USE_DMA,
+};
+
+struct pci_epf_mhi {
+	const struct pci_epc_features *epc_features;
+	const struct pci_epf_mhi_ep_info *info;
+	struct mhi_ep_cntrl mhi_cntrl;
+	struct pci_epf *epf;
+	struct mutex lock;
+	void __iomem *mmio;
+	resource_size_t mmio_phys;
+	struct dma_chan *dma_chan_tx;
+	struct dma_chan *dma_chan_rx;
+	u32 mmio_size;
+	int irq;
+};
+
+static size_t get_align_offset(struct pci_epf_mhi *epf_mhi, u64 addr)
+{
+	return addr & (epf_mhi->epc_features->align -1);
+}
+
+static int __pci_epf_mhi_alloc_map(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr,
+				 phys_addr_t *paddr, void __iomem **vaddr,
+				 size_t offset, size_t size)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	struct pci_epf *epf = epf_mhi->epf;
+	struct pci_epc *epc = epf->epc;
+	int ret;
+
+	*vaddr = pci_epc_mem_alloc_addr(epc, paddr, size + offset);
+	if (!*vaddr)
+		return -ENOMEM;
+
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, *paddr,
+			       pci_addr - offset, size + offset);
+	if (ret) {
+		pci_epc_mem_free_addr(epc, *paddr, *vaddr, size + offset);
+		return ret;
+	}
+
+	*paddr = *paddr + offset;
+	*vaddr = *vaddr + offset;
+
+	return 0;
+}
+
+static int pci_epf_mhi_alloc_map(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr,
+				 phys_addr_t *paddr, void __iomem **vaddr,
+				 size_t size)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	size_t offset = get_align_offset(epf_mhi, pci_addr);
+
+	return __pci_epf_mhi_alloc_map(mhi_cntrl, pci_addr, paddr, vaddr,
+				      offset, size);
+}
+
+static void __pci_epf_mhi_unmap_free(struct mhi_ep_cntrl *mhi_cntrl,
+				     u64 pci_addr, phys_addr_t paddr,
+				     void __iomem *vaddr, size_t offset,
+				     size_t size)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	struct pci_epf *epf = epf_mhi->epf;
+	struct pci_epc *epc = epf->epc;
+
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, paddr - offset);
+	pci_epc_mem_free_addr(epc, paddr - offset, vaddr - offset,
+			      size + offset);
+}
+
+static void pci_epf_mhi_unmap_free(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr,
+				   phys_addr_t paddr, void __iomem *vaddr,
+				   size_t size)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	size_t offset = get_align_offset(epf_mhi, pci_addr);
+
+	__pci_epf_mhi_unmap_free(mhi_cntrl, pci_addr, paddr, vaddr, offset,
+				 size);
+}
+
+static void pci_epf_mhi_raise_irq(struct mhi_ep_cntrl *mhi_cntrl, u32 vector)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	struct pci_epf *epf = epf_mhi->epf;
+	struct pci_epc *epc = epf->epc;
+
+	/*
+	 * MHI supplies 0 based MSI vectors but the API expects the vector
+	 * number to start from 1, so we need to increment the vector by 1.
+	 */
+	pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no, PCI_EPC_IRQ_MSI,
+			  vector + 1);
+}
+
+static int pci_epf_mhi_iatu_read(struct mhi_ep_cntrl *mhi_cntrl,
+				 struct mhi_ep_buf_info *buf_info)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	size_t offset = get_align_offset(epf_mhi, buf_info->host_addr);
+	void __iomem *tre_buf;
+	phys_addr_t tre_phys;
+	int ret;
+
+	mutex_lock(&epf_mhi->lock);
+
+	ret = __pci_epf_mhi_alloc_map(mhi_cntrl, buf_info->host_addr, &tre_phys,
+				      &tre_buf, offset, buf_info->size);
+	if (ret) {
+		mutex_unlock(&epf_mhi->lock);
+		return ret;
+	}
+
+	memcpy_fromio(buf_info->dev_addr, tre_buf, buf_info->size);
+
+	__pci_epf_mhi_unmap_free(mhi_cntrl, buf_info->host_addr, tre_phys,
+				 tre_buf, offset, buf_info->size);
+
+	mutex_unlock(&epf_mhi->lock);
+
+	return 0;
+}
+
+static int pci_epf_mhi_iatu_write(struct mhi_ep_cntrl *mhi_cntrl,
+				  struct mhi_ep_buf_info *buf_info)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	size_t offset = get_align_offset(epf_mhi, buf_info->host_addr);
+	void __iomem *tre_buf;
+	phys_addr_t tre_phys;
+	int ret;
+
+	mutex_lock(&epf_mhi->lock);
+
+	ret = __pci_epf_mhi_alloc_map(mhi_cntrl, buf_info->host_addr, &tre_phys,
+				      &tre_buf, offset, buf_info->size);
+	if (ret) {
+		mutex_unlock(&epf_mhi->lock);
+		return ret;
+	}
+
+	memcpy_toio(tre_buf, buf_info->dev_addr, buf_info->size);
+
+	__pci_epf_mhi_unmap_free(mhi_cntrl, buf_info->host_addr, tre_phys,
+				 tre_buf, offset, buf_info->size);
+
+	mutex_unlock(&epf_mhi->lock);
+
+	return 0;
+}
+
+static void pci_epf_mhi_dma_callback(void *param)
+{
+	complete(param);
+}
+
+static int pci_epf_mhi_edma_read(struct mhi_ep_cntrl *mhi_cntrl,
+				 struct mhi_ep_buf_info *buf_info)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	struct device *dma_dev = epf_mhi->epf->epc->dev.parent;
+	struct dma_chan *chan = epf_mhi->dma_chan_rx;
+	struct device *dev = &epf_mhi->epf->dev;
+	DECLARE_COMPLETION_ONSTACK(complete);
+	struct dma_async_tx_descriptor *desc;
+	struct dma_slave_config config = {};
+	dma_cookie_t cookie;
+	dma_addr_t dst_addr;
+	int ret;
+
+	if (buf_info->size < SZ_4K)
+		return pci_epf_mhi_iatu_read(mhi_cntrl, buf_info);
+
+	mutex_lock(&epf_mhi->lock);
+
+	config.direction = DMA_DEV_TO_MEM;
+	config.src_addr = buf_info->host_addr;
+
+	ret = dmaengine_slave_config(chan, &config);
+	if (ret) {
+		dev_err(dev, "Failed to configure DMA channel\n");
+		goto err_unlock;
+	}
+
+	dst_addr = dma_map_single(dma_dev, buf_info->dev_addr, buf_info->size,
+				  DMA_FROM_DEVICE);
+	ret = dma_mapping_error(dma_dev, dst_addr);
+	if (ret) {
+		dev_err(dev, "Failed to map remote memory\n");
+		goto err_unlock;
+	}
+
+	desc = dmaengine_prep_slave_single(chan, dst_addr, buf_info->size,
+					   DMA_DEV_TO_MEM,
+					   DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!desc) {
+		dev_err(dev, "Failed to prepare DMA\n");
+		ret = -EIO;
+		goto err_unmap;
+	}
+
+	desc->callback = pci_epf_mhi_dma_callback;
+	desc->callback_param = &complete;
+
+	cookie = dmaengine_submit(desc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dev_err(dev, "Failed to do DMA submit\n");
+		goto err_unmap;
+	}
+
+	dma_async_issue_pending(chan);
+	ret = wait_for_completion_timeout(&complete, msecs_to_jiffies(1000));
+	if (!ret) {
+		dev_err(dev, "DMA transfer timeout\n");
+		dmaengine_terminate_sync(chan);
+		ret = -ETIMEDOUT;
+	}
+
+err_unmap:
+	dma_unmap_single(dma_dev, dst_addr, buf_info->size, DMA_FROM_DEVICE);
+err_unlock:
+	mutex_unlock(&epf_mhi->lock);
+
+	return ret;
+}
+
+static int pci_epf_mhi_edma_write(struct mhi_ep_cntrl *mhi_cntrl,
+				  struct mhi_ep_buf_info *buf_info)
+{
+	struct pci_epf_mhi *epf_mhi = to_epf_mhi(mhi_cntrl);
+	struct device *dma_dev = epf_mhi->epf->epc->dev.parent;
+	struct dma_chan *chan = epf_mhi->dma_chan_tx;
+	struct device *dev = &epf_mhi->epf->dev;
+	DECLARE_COMPLETION_ONSTACK(complete);
+	struct dma_async_tx_descriptor *desc;
+	struct dma_slave_config config = {};
+	dma_cookie_t cookie;
+	dma_addr_t src_addr;
+	int ret;
+
+	if (buf_info->size < SZ_4K)
+		return pci_epf_mhi_iatu_write(mhi_cntrl, buf_info);
+
+	mutex_lock(&epf_mhi->lock);
+
+	config.direction = DMA_MEM_TO_DEV;
+	config.dst_addr = buf_info->host_addr;
+
+	ret = dmaengine_slave_config(chan, &config);
+	if (ret) {
+		dev_err(dev, "Failed to configure DMA channel\n");
+		goto err_unlock;
+	}
+
+	src_addr = dma_map_single(dma_dev, buf_info->dev_addr, buf_info->size,
+				  DMA_TO_DEVICE);
+	ret = dma_mapping_error(dma_dev, src_addr);
+	if (ret) {
+		dev_err(dev, "Failed to map remote memory\n");
+		goto err_unlock;
+	}
+
+	desc = dmaengine_prep_slave_single(chan, src_addr, buf_info->size,
+					   DMA_MEM_TO_DEV,
+					   DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!desc) {
+		dev_err(dev, "Failed to prepare DMA\n");
+		ret = -EIO;
+		goto err_unmap;
+	}
+
+	desc->callback = pci_epf_mhi_dma_callback;
+	desc->callback_param = &complete;
+
+	cookie = dmaengine_submit(desc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dev_err(dev, "Failed to do DMA submit\n");
+		goto err_unmap;
+	}
+
+	dma_async_issue_pending(chan);
+	ret = wait_for_completion_timeout(&complete, msecs_to_jiffies(1000));
+	if (!ret) {
+		dev_err(dev, "DMA transfer timeout\n");
+		dmaengine_terminate_sync(chan);
+		ret = -ETIMEDOUT;
+	}
+
+err_unmap:
+	dma_unmap_single(dma_dev, src_addr, buf_info->size, DMA_TO_DEVICE);
+err_unlock:
+	mutex_unlock(&epf_mhi->lock);
+
+	return ret;
+}
+
+struct epf_dma_filter {
+	struct device *dev;
+	u32 dma_mask;
+};
+
+static bool pci_epf_mhi_filter(struct dma_chan *chan, void *node)
+{
+	struct epf_dma_filter *filter = node;
+	struct dma_slave_caps caps;
+
+	memset(&caps, 0, sizeof(caps));
+	dma_get_slave_caps(chan, &caps);
+
+	return chan->device->dev == filter->dev && filter->dma_mask &
+					caps.directions;
+}
+
+static int pci_epf_mhi_dma_init(struct pci_epf_mhi *epf_mhi)
+{
+	struct device *dma_dev = epf_mhi->epf->epc->dev.parent;
+	struct device *dev = &epf_mhi->epf->dev;
+	struct epf_dma_filter filter;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	filter.dev = dma_dev;
+	filter.dma_mask = BIT(DMA_MEM_TO_DEV);
+	epf_mhi->dma_chan_tx = dma_request_channel(mask, pci_epf_mhi_filter,
+						   &filter);
+	if (IS_ERR_OR_NULL(epf_mhi->dma_chan_tx)) {
+		dev_err(dev, "Failed to request tx channel\n");
+		return -ENODEV;
+	}
+
+	filter.dma_mask = BIT(DMA_DEV_TO_MEM);
+	epf_mhi->dma_chan_rx = dma_request_channel(mask, pci_epf_mhi_filter,
+						   &filter);
+	if (IS_ERR_OR_NULL(epf_mhi->dma_chan_rx)) {
+		dev_err(dev, "Failed to request rx channel\n");
+		dma_release_channel(epf_mhi->dma_chan_tx);
+		epf_mhi->dma_chan_tx = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void pci_epf_mhi_dma_deinit(struct pci_epf_mhi *epf_mhi)
+{
+	dma_release_channel(epf_mhi->dma_chan_tx);
+	dma_release_channel(epf_mhi->dma_chan_rx);
+	epf_mhi->dma_chan_tx = NULL;
+	epf_mhi->dma_chan_rx = NULL;
+}
+
+static int pci_epf_mhi_core_init(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	const struct pci_epf_mhi_ep_info *info = epf_mhi->info;
+	struct pci_epf_bar *epf_bar = &epf->bar[info->bar_num];
+	struct pci_epc *epc = epf->epc;
+	struct device *dev = &epf->dev;
+	int ret;
+
+	epf_bar->phys_addr = epf_mhi->mmio_phys;
+	epf_bar->size = epf_mhi->mmio_size;
+	epf_bar->barno = info->bar_num;
+	epf_bar->flags = info->epf_flags;
+	ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no, epf_bar);
+	if (ret) {
+		dev_err(dev, "Failed to set BAR: %d\n", ret);
+		return ret;
+	}
+
+	ret = pci_epc_set_msi(epc, epf->func_no, epf->vfunc_no,
+			      order_base_2(info->msi_count));
+	if (ret) {
+		dev_err(dev, "Failed to set MSI configuration: %d\n", ret);
+		return ret;
+	}
+
+	ret = pci_epc_write_header(epc, epf->func_no, epf->vfunc_no,
+				   epf->header);
+	if (ret) {
+		dev_err(dev, "Failed to set Configuration header: %d\n", ret);
+		return ret;
+	}
+
+	epf_mhi->epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
+	if (!epf_mhi->epc_features)
+		return -ENODATA;
+
+	return 0;
+}
+
+static int pci_epf_mhi_link_up(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	const struct pci_epf_mhi_ep_info *info = epf_mhi->info;
+	struct mhi_ep_cntrl *mhi_cntrl = &epf_mhi->mhi_cntrl;
+	struct pci_epc *epc = epf->epc;
+	struct device *dev = &epf->dev;
+	int ret;
+
+	if (info->flags & MHI_EPF_USE_DMA) {
+		ret = pci_epf_mhi_dma_init(epf_mhi);
+		if (ret) {
+			dev_err(dev, "Failed to initialize DMA: %d\n", ret);
+			return ret;
+		}
+	}
+
+	mhi_cntrl->mmio = epf_mhi->mmio;
+	mhi_cntrl->irq = epf_mhi->irq;
+	mhi_cntrl->mru = info->mru;
+
+	/* Assign the struct dev of PCI EP as MHI controller device */
+	mhi_cntrl->cntrl_dev = epc->dev.parent;
+	mhi_cntrl->raise_irq = pci_epf_mhi_raise_irq;
+	mhi_cntrl->alloc_map = pci_epf_mhi_alloc_map;
+	mhi_cntrl->unmap_free = pci_epf_mhi_unmap_free;
+	if (info->flags & MHI_EPF_USE_DMA) {
+		mhi_cntrl->read_from_host = pci_epf_mhi_edma_read;
+		mhi_cntrl->write_to_host = pci_epf_mhi_edma_write;
+	} else {
+		mhi_cntrl->read_from_host = pci_epf_mhi_iatu_read;
+		mhi_cntrl->write_to_host = pci_epf_mhi_iatu_write;
+	}
+
+	/* Register the MHI EP controller */
+	ret = mhi_ep_register_controller(mhi_cntrl, info->config);
+	if (ret) {
+		dev_err(dev, "Failed to register MHI EP controller: %d\n", ret);
+		if (info->flags & MHI_EPF_USE_DMA)
+			pci_epf_mhi_dma_deinit(epf_mhi);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pci_epf_mhi_link_down(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	const struct pci_epf_mhi_ep_info *info = epf_mhi->info;
+	struct mhi_ep_cntrl *mhi_cntrl = &epf_mhi->mhi_cntrl;
+
+	if (mhi_cntrl->mhi_dev) {
+		mhi_ep_power_down(mhi_cntrl);
+		if (info->flags & MHI_EPF_USE_DMA)
+			pci_epf_mhi_dma_deinit(epf_mhi);
+		mhi_ep_unregister_controller(mhi_cntrl);
+	}
+
+	return 0;
+}
+
+static int pci_epf_mhi_bme(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	const struct pci_epf_mhi_ep_info *info = epf_mhi->info;
+	struct mhi_ep_cntrl *mhi_cntrl = &epf_mhi->mhi_cntrl;
+	struct device *dev = &epf->dev;
+	int ret;
+
+	/*
+	 * Power up the MHI EP stack if link is up and stack is in power down
+	 * state.
+	 */
+	if (!mhi_cntrl->enabled && mhi_cntrl->mhi_dev) {
+		ret = mhi_ep_power_up(mhi_cntrl);
+		if (ret) {
+			dev_err(dev, "Failed to power up MHI EP: %d\n", ret);
+			if (info->flags & MHI_EPF_USE_DMA)
+				pci_epf_mhi_dma_deinit(epf_mhi);
+			mhi_ep_unregister_controller(mhi_cntrl);
+		}
+	}
+
+	return 0;
+}
+
+static int pci_epf_mhi_bind(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	struct pci_epc *epc = epf->epc;
+	struct platform_device *pdev = to_platform_device(epc->dev.parent);
+	struct resource *res;
+	int ret;
+
+	/* Get MMIO base address from Endpoint controller */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mmio");
+	epf_mhi->mmio_phys = res->start;
+	epf_mhi->mmio_size = resource_size(res);
+
+	epf_mhi->mmio = ioremap(epf_mhi->mmio_phys, epf_mhi->mmio_size);
+	if (!epf_mhi->mmio)
+		return -ENOMEM;
+
+	ret = platform_get_irq_byname(pdev, "doorbell");
+	if (ret < 0) {
+		iounmap(epf_mhi->mmio);
+		return ret;
+	}
+
+	epf_mhi->irq = ret;
+
+	return 0;
+}
+
+static void pci_epf_mhi_unbind(struct pci_epf *epf)
+{
+	struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf);
+	const struct pci_epf_mhi_ep_info *info = epf_mhi->info;
+	struct pci_epf_bar *epf_bar = &epf->bar[info->bar_num];
+	struct mhi_ep_cntrl *mhi_cntrl = &epf_mhi->mhi_cntrl;
+	struct pci_epc *epc = epf->epc;
+
+	/*
+	 * Forcefully power down the MHI EP stack. Only way to bring the MHI EP
+	 * stack back to working state after successive bind is by getting BME
+	 * from host.
+	 */
+	if (mhi_cntrl->mhi_dev) {
+		mhi_ep_power_down(mhi_cntrl);
+		if (info->flags & MHI_EPF_USE_DMA)
+			pci_epf_mhi_dma_deinit(epf_mhi);
+		mhi_ep_unregister_controller(mhi_cntrl);
+	}
+
+	iounmap(epf_mhi->mmio);
+	pci_epc_clear_bar(epc, epf->func_no, epf->vfunc_no, epf_bar);
+}
+
+static struct pci_epc_event_ops pci_epf_mhi_event_ops = {
+	.core_init = pci_epf_mhi_core_init,
+	.link_up = pci_epf_mhi_link_up,
+	.link_down = pci_epf_mhi_link_down,
+	.bme = pci_epf_mhi_bme,
+};
+
+static int pci_epf_mhi_probe(struct pci_epf *epf,
+			     const struct pci_epf_device_id *id)
+{
+	struct pci_epf_mhi_ep_info *info =
+			(struct pci_epf_mhi_ep_info *)id->driver_data;
+	struct pci_epf_mhi *epf_mhi;
+	struct device *dev = &epf->dev;
+
+	epf_mhi = devm_kzalloc(dev, sizeof(*epf_mhi), GFP_KERNEL);
+	if (!epf_mhi)
+		return -ENOMEM;
+
+	epf->header = info->epf_header;
+	epf_mhi->info = info;
+	epf_mhi->epf = epf;
+
+	epf->event_ops = &pci_epf_mhi_event_ops;
+
+	mutex_init(&epf_mhi->lock);
+
+	epf_set_drvdata(epf, epf_mhi);
+
+	return 0;
+}
+
+static const struct pci_epf_device_id pci_epf_mhi_ids[] = {
+	{ .name = "sdx55", .driver_data = (kernel_ulong_t)&sdx55_info },
+	{ .name = "sm8450", .driver_data = (kernel_ulong_t)&sm8450_info },
+	{},
+};
+
+static struct pci_epf_ops pci_epf_mhi_ops = {
+	.unbind	= pci_epf_mhi_unbind,
+	.bind	= pci_epf_mhi_bind,
+};
+
+static struct pci_epf_driver pci_epf_mhi_driver = {
+	.driver.name	= "pci_epf_mhi",
+	.probe		= pci_epf_mhi_probe,
+	.id_table	= pci_epf_mhi_ids,
+	.ops		= &pci_epf_mhi_ops,
+	.owner		= THIS_MODULE,
+};
+
+static int __init pci_epf_mhi_init(void)
+{
+	return pci_epf_register_driver(&pci_epf_mhi_driver);
+}
+module_init(pci_epf_mhi_init);
+
+static void __exit pci_epf_mhi_exit(void)
+{
+	pci_epf_unregister_driver(&pci_epf_mhi_driver);
+}
+module_exit(pci_epf_mhi_exit);
+
+MODULE_DESCRIPTION("PCI EPF driver for MHI Endpoint devices");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c
new file mode 100644
index 000000000..9aac2c6f3
--- /dev/null
+++ b/drivers/pci/endpoint/functions/pci-epf-ntb.c
@@ -0,0 +1,2149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Endpoint Function Driver to implement Non-Transparent Bridge functionality
+ *
+ * Copyright (C) 2020 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+/*
+ * The PCI NTB function driver configures the SoC with multiple PCIe Endpoint
+ * (EP) controller instances (see diagram below) in such a way that
+ * transactions from one EP controller are routed to the other EP controller.
+ * Once PCI NTB function driver configures the SoC with multiple EP instances,
+ * HOST1 and HOST2 can communicate with each other using SoC as a bridge.
+ *
+ *    +-------------+                                   +-------------+
+ *    |             |                                   |             |
+ *    |    HOST1    |                                   |    HOST2    |
+ *    |             |                                   |             |
+ *    +------^------+                                   +------^------+
+ *           |                                                 |
+ *           |                                                 |
+ * +---------|-------------------------------------------------|---------+
+ * |  +------v------+                                   +------v------+  |
+ * |  |             |                                   |             |  |
+ * |  |     EP      |                                   |     EP      |  |
+ * |  | CONTROLLER1 |                                   | CONTROLLER2 |  |
+ * |  |             <----------------------------------->             |  |
+ * |  |             |                                   |             |  |
+ * |  |             |                                   |             |  |
+ * |  |             |  SoC With Multiple EP Instances   |             |  |
+ * |  |             |  (Configured using NTB Function)  |             |  |
+ * |  +-------------+                                   +-------------+  |
+ * +---------------------------------------------------------------------+
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+static struct workqueue_struct *kpcintb_workqueue;
+
+#define COMMAND_CONFIGURE_DOORBELL	1
+#define COMMAND_TEARDOWN_DOORBELL	2
+#define COMMAND_CONFIGURE_MW		3
+#define COMMAND_TEARDOWN_MW		4
+#define COMMAND_LINK_UP			5
+#define COMMAND_LINK_DOWN		6
+
+#define COMMAND_STATUS_OK		1
+#define COMMAND_STATUS_ERROR		2
+
+#define LINK_STATUS_UP			BIT(0)
+
+#define SPAD_COUNT			64
+#define DB_COUNT			4
+#define NTB_MW_OFFSET			2
+#define DB_COUNT_MASK			GENMASK(15, 0)
+#define MSIX_ENABLE			BIT(16)
+#define MAX_DB_COUNT			32
+#define MAX_MW				4
+
+enum epf_ntb_bar {
+	BAR_CONFIG,
+	BAR_PEER_SPAD,
+	BAR_DB_MW1,
+	BAR_MW2,
+	BAR_MW3,
+	BAR_MW4,
+};
+
+struct epf_ntb {
+	u32 num_mws;
+	u32 db_count;
+	u32 spad_count;
+	struct pci_epf *epf;
+	u64 mws_size[MAX_MW];
+	struct config_group group;
+	struct epf_ntb_epc *epc[2];
+};
+
+#define to_epf_ntb(epf_group) container_of((epf_group), struct epf_ntb, group)
+
+struct epf_ntb_epc {
+	u8 func_no;
+	u8 vfunc_no;
+	bool linkup;
+	bool is_msix;
+	int msix_bar;
+	u32 spad_size;
+	struct pci_epc *epc;
+	struct epf_ntb *epf_ntb;
+	void __iomem *mw_addr[6];
+	size_t msix_table_offset;
+	struct epf_ntb_ctrl *reg;
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno epf_ntb_bar[6];
+	struct delayed_work cmd_handler;
+	enum pci_epc_interface_type type;
+	const struct pci_epc_features *epc_features;
+};
+
+struct epf_ntb_ctrl {
+	u32	command;
+	u32	argument;
+	u16	command_status;
+	u16	link_status;
+	u32	topology;
+	u64	addr;
+	u64	size;
+	u32	num_mws;
+	u32	mw1_offset;
+	u32	spad_offset;
+	u32	spad_count;
+	u32	db_entry_size;
+	u32	db_data[MAX_DB_COUNT];
+	u32	db_offset[MAX_DB_COUNT];
+} __packed;
+
+static struct pci_epf_header epf_ntb_header = {
+	.vendorid	= PCI_ANY_ID,
+	.deviceid	= PCI_ANY_ID,
+	.baseclass_code	= PCI_BASE_CLASS_MEMORY,
+	.interrupt_pin	= PCI_INTERRUPT_INTA,
+};
+
+/**
+ * epf_ntb_link_up() - Raise link_up interrupt to both the hosts
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @link_up: true or false indicating Link is UP or Down
+ *
+ * Once NTB function in HOST1 and the NTB function in HOST2 invoke
+ * ntb_link_enable(), this NTB function driver will trigger a link event to
+ * the NTB client in both the hosts.
+ */
+static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
+{
+	enum pci_epc_interface_type type;
+	enum pci_epc_irq_type irq_type;
+	struct epf_ntb_epc *ntb_epc;
+	struct epf_ntb_ctrl *ctrl;
+	struct pci_epc *epc;
+	u8 func_no, vfunc_no;
+	bool is_msix;
+	int ret;
+
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+		ntb_epc = ntb->epc[type];
+		epc = ntb_epc->epc;
+		func_no = ntb_epc->func_no;
+		vfunc_no = ntb_epc->vfunc_no;
+		is_msix = ntb_epc->is_msix;
+		ctrl = ntb_epc->reg;
+		if (link_up)
+			ctrl->link_status |= LINK_STATUS_UP;
+		else
+			ctrl->link_status &= ~LINK_STATUS_UP;
+		irq_type = is_msix ? PCI_EPC_IRQ_MSIX : PCI_EPC_IRQ_MSI;
+		ret = pci_epc_raise_irq(epc, func_no, vfunc_no, irq_type, 1);
+		if (ret) {
+			dev_err(&epc->dev,
+				"%s intf: Failed to raise Link Up IRQ\n",
+				pci_epc_interface_string(type));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_configure_mw() - Configure the Outbound Address Space for one host
+ *   to access the memory window of other host
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * This function performs stage (B) in the above diagram (see MW1) i.e., map OB
+ * address space of memory window to PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address in the outbound address space
+ *  2) Address in the PCI Address space
+ *  3) Size of the address region to be mapped
+ *
+ * The address in the outbound address space (for MW1, MW2, MW3 and MW4) is
+ * stored in epf_bar corresponding to BAR_DB_MW1 for MW1 and BAR_MW2, BAR_MW3
+ * BAR_MW4 for rest of the BARs of epf_ntb_epc that is connected to HOST1. This
+ * is populated in epf_ntb_alloc_peer_mem() in this driver.
+ *
+ * The address and size of the PCI address region that has to be mapped would
+ * be provided by HOST2 in ctrl->addr and ctrl->size of epf_ntb_epc that is
+ * connected to HOST2.
+ *
+ * Please note Memory window1 (MW1) and Doorbell registers together will be
+ * mapped to a single BAR (BAR2) above for 32-bit BARs. The exact BAR that's
+ * used for Memory window (MW) can be obtained from epf_ntb_bar[BAR_DB_MW1],
+ * epf_ntb_bar[BAR_MW2], epf_ntb_bar[BAR_MW2], epf_ntb_bar[BAR_MW2].
+ */
+static int epf_ntb_configure_mw(struct epf_ntb *ntb,
+				enum pci_epc_interface_type type, u32 mw)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *peer_epf_bar;
+	enum pci_barno peer_barno;
+	struct epf_ntb_ctrl *ctrl;
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	u64 addr, size;
+	int ret = 0;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[mw + NTB_MW_OFFSET];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+	phys_addr = peer_epf_bar->phys_addr;
+	ctrl = ntb_epc->reg;
+	addr = ctrl->addr;
+	size = ctrl->size;
+	if (mw + NTB_MW_OFFSET == BAR_DB_MW1)
+		phys_addr += ctrl->mw1_offset;
+
+	if (size > ntb->mws_size[mw]) {
+		dev_err(&epc->dev,
+			"%s intf: MW: %d Req Sz:%llxx > Supported Sz:%llx\n",
+			pci_epc_interface_string(type), mw, size,
+			ntb->mws_size[mw]);
+		ret = -EINVAL;
+		goto err_invalid_size;
+	}
+
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	ret = pci_epc_map_addr(epc, func_no, vfunc_no, phys_addr, addr, size);
+	if (ret)
+		dev_err(&epc->dev,
+			"%s intf: Failed to map memory window %d address\n",
+			pci_epc_interface_string(type), mw);
+
+err_invalid_size:
+
+	return ret;
+}
+
+/**
+ * epf_ntb_teardown_mw() - Teardown the configured OB ATU
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using
+ * pci_epc_unmap_addr()
+ */
+static void epf_ntb_teardown_mw(struct epf_ntb *ntb,
+				enum pci_epc_interface_type type, u32 mw)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *peer_epf_bar;
+	enum pci_barno peer_barno;
+	struct epf_ntb_ctrl *ctrl;
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[mw + NTB_MW_OFFSET];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+	phys_addr = peer_epf_bar->phys_addr;
+	ctrl = ntb_epc->reg;
+	if (mw + NTB_MW_OFFSET == BAR_DB_MW1)
+		phys_addr += ctrl->mw1_offset;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	pci_epc_unmap_addr(epc, func_no, vfunc_no, phys_addr);
+}
+
+/**
+ * epf_ntb_configure_msi() - Map OB address space to MSI address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Number of doorbell interrupts to map
+ *
+ *+-----------------+    +----->+----------------+-----------+-----------------+
+ *|       BAR0      |    |      |   Doorbell 1   +---+------->   MSI ADDRESS   |
+ *+-----------------+    |      +----------------+   |       +-----------------+
+ *|       BAR1      |    |      |   Doorbell 2   +---+       |                 |
+ *+-----------------+----+      +----------------+   |       |                 |
+ *|       BAR2      |           |   Doorbell 3   +---+       |                 |
+ *+-----------------+----+      +----------------+   |       |                 |
+ *|       BAR3      |    |      |   Doorbell 4   +---+       |                 |
+ *+-----------------+    |      |----------------+           |                 |
+ *|       BAR4      |    |      |                |           |                 |
+ *+-----------------+    |      |      MW1       |           |                 |
+ *|       BAR5      |    |      |                |           |                 |
+ *+-----------------+    +----->-----------------+           |                 |
+ *  EP CONTROLLER 1             |                |           |                 |
+ *                              |                |           |                 |
+ *                              +----------------+           +-----------------+
+ *                     (A)       EP CONTROLLER 2             |                 |
+ *                                 (OB SPACE)                |                 |
+ *                                                           |      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ *
+ * This function performs stage (B) in the above diagram (see Doorbell 1,
+ * Doorbell 2, Doorbell 3, Doorbell 4) i.e map OB address space corresponding to
+ * doorbell to MSI address in PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address reserved for doorbell in the outbound address space
+ *  2) MSI-X address in the PCIe Address space
+ *  3) Number of MSI-X interrupts that has to be configured
+ *
+ * The address in the outbound address space (for the Doorbell) is stored in
+ * epf_bar corresponding to BAR_DB_MW1 of epf_ntb_epc that is connected to
+ * HOST1. This is populated in epf_ntb_alloc_peer_mem() in this driver along
+ * with address for MW1.
+ *
+ * pci_epc_map_msi_irq() takes the MSI address from MSI capability register
+ * and maps the OB address (obtained in epf_ntb_alloc_peer_mem()) to the MSI
+ * address.
+ *
+ * epf_ntb_configure_msi() also stores the MSI data to raise each interrupt
+ * in db_data of the peer's control region. This helps the peer to raise
+ * doorbell of the other host by writing db_data to the BAR corresponding to
+ * BAR_DB_MW1.
+ */
+static int epf_ntb_configure_msi(struct epf_ntb *ntb,
+				 enum pci_epc_interface_type type, u16 db_count)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	u32 db_entry_size, db_data, db_offset;
+	struct pci_epf_bar *peer_epf_bar;
+	struct epf_ntb_ctrl *peer_ctrl;
+	enum pci_barno peer_barno;
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	int ret, i;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+	peer_ctrl = peer_ntb_epc->reg;
+	db_entry_size = peer_ctrl->db_entry_size;
+
+	phys_addr = peer_epf_bar->phys_addr;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	ret = pci_epc_map_msi_irq(epc, func_no, vfunc_no, phys_addr, db_count,
+				  db_entry_size, &db_data, &db_offset);
+	if (ret) {
+		dev_err(&epc->dev, "%s intf: Failed to map MSI IRQ\n",
+			pci_epc_interface_string(type));
+		return ret;
+	}
+
+	for (i = 0; i < db_count; i++) {
+		peer_ctrl->db_data[i] = db_data | i;
+		peer_ctrl->db_offset[i] = db_offset;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_configure_msix() - Map OB address space to MSI-X address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Number of doorbell interrupts to map
+ *
+ *+-----------------+    +----->+----------------+-----------+-----------------+
+ *|       BAR0      |    |      |   Doorbell 1   +-----------> MSI-X ADDRESS 1 |
+ *+-----------------+    |      +----------------+           +-----------------+
+ *|       BAR1      |    |      |   Doorbell 2   +---------+ |                 |
+ *+-----------------+----+      +----------------+         | |                 |
+ *|       BAR2      |           |   Doorbell 3   +-------+ | +-----------------+
+ *+-----------------+----+      +----------------+       | +-> MSI-X ADDRESS 2 |
+ *|       BAR3      |    |      |   Doorbell 4   +-----+ |   +-----------------+
+ *+-----------------+    |      |----------------+     | |   |                 |
+ *|       BAR4      |    |      |                |     | |   +-----------------+
+ *+-----------------+    |      |      MW1       +     | +-->+ MSI-X ADDRESS 3||
+ *|       BAR5      |    |      |                |     |     +-----------------+
+ *+-----------------+    +----->-----------------+     |     |                 |
+ *  EP CONTROLLER 1             |                |     |     +-----------------+
+ *                              |                |     +---->+ MSI-X ADDRESS 4 |
+ *                              +----------------+           +-----------------+
+ *                     (A)       EP CONTROLLER 2             |                 |
+ *                                 (OB SPACE)                |                 |
+ *                                                           |      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * This function performs stage (B) in the above diagram (see Doorbell 1,
+ * Doorbell 2, Doorbell 3, Doorbell 4) i.e map OB address space corresponding to
+ * doorbell to MSI-X address in PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address reserved for doorbell in the outbound address space
+ *  2) MSI-X address in the PCIe Address space
+ *  3) Number of MSI-X interrupts that has to be configured
+ *
+ * The address in the outbound address space (for the Doorbell) is stored in
+ * epf_bar corresponding to BAR_DB_MW1 of epf_ntb_epc that is connected to
+ * HOST1. This is populated in epf_ntb_alloc_peer_mem() in this driver along
+ * with address for MW1.
+ *
+ * The MSI-X address is in the MSI-X table of EP CONTROLLER 2 and
+ * the count of doorbell is in ctrl->argument of epf_ntb_epc that is connected
+ * to HOST2. MSI-X table is stored memory mapped to ntb_epc->msix_bar and the
+ * offset is in ntb_epc->msix_table_offset. From this epf_ntb_configure_msix()
+ * gets the MSI-X address and data.
+ *
+ * epf_ntb_configure_msix() also stores the MSI-X data to raise each interrupt
+ * in db_data of the peer's control region. This helps the peer to raise
+ * doorbell of the other host by writing db_data to the BAR corresponding to
+ * BAR_DB_MW1.
+ */
+static int epf_ntb_configure_msix(struct epf_ntb *ntb,
+				  enum pci_epc_interface_type type,
+				  u16 db_count)
+{
+	const struct pci_epc_features *epc_features;
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *peer_epf_bar, *epf_bar;
+	struct pci_epf_msix_tbl *msix_tbl;
+	struct epf_ntb_ctrl *peer_ctrl;
+	u32 db_entry_size, msg_data;
+	enum pci_barno peer_barno;
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	size_t align;
+	u64 msg_addr;
+	int ret, i;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	epf_bar = &ntb_epc->epf_bar[ntb_epc->msix_bar];
+	msix_tbl = epf_bar->addr + ntb_epc->msix_table_offset;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+	phys_addr = peer_epf_bar->phys_addr;
+	peer_ctrl = peer_ntb_epc->reg;
+	epc_features = ntb_epc->epc_features;
+	align = epc_features->align;
+
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	db_entry_size = peer_ctrl->db_entry_size;
+
+	for (i = 0; i < db_count; i++) {
+		msg_addr = ALIGN_DOWN(msix_tbl[i].msg_addr, align);
+		msg_data = msix_tbl[i].msg_data;
+		ret = pci_epc_map_addr(epc, func_no, vfunc_no, phys_addr, msg_addr,
+				       db_entry_size);
+		if (ret) {
+			dev_err(&epc->dev,
+				"%s intf: Failed to configure MSI-X IRQ\n",
+				pci_epc_interface_string(type));
+			return ret;
+		}
+		phys_addr = phys_addr + db_entry_size;
+		peer_ctrl->db_data[i] = msg_data;
+		peer_ctrl->db_offset[i] = msix_tbl[i].msg_addr & (align - 1);
+	}
+	ntb_epc->is_msix = true;
+
+	return 0;
+}
+
+/**
+ * epf_ntb_configure_db() - Configure the Outbound Address Space for one host
+ *   to ring the doorbell of other host
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Count of the number of doorbells that has to be configured
+ * @msix: Indicates whether MSI-X or MSI should be used
+ *
+ * Invokes epf_ntb_configure_msix() or epf_ntb_configure_msi() required for
+ * one HOST to ring the doorbell of other HOST.
+ */
+static int epf_ntb_configure_db(struct epf_ntb *ntb,
+				enum pci_epc_interface_type type,
+				u16 db_count, bool msix)
+{
+	struct epf_ntb_epc *ntb_epc;
+	struct pci_epc *epc;
+	int ret;
+
+	if (db_count > MAX_DB_COUNT)
+		return -EINVAL;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	if (msix)
+		ret = epf_ntb_configure_msix(ntb, type, db_count);
+	else
+		ret = epf_ntb_configure_msi(ntb, type, db_count);
+
+	if (ret)
+		dev_err(&epc->dev, "%s intf: Failed to configure DB\n",
+			pci_epc_interface_string(type));
+
+	return ret;
+}
+
+/**
+ * epf_ntb_teardown_db() - Unmap address in OB address space to MSI/MSI-X
+ *   address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Invoke pci_epc_unmap_addr() to unmap OB address to MSI/MSI-X address.
+ */
+static void
+epf_ntb_teardown_db(struct epf_ntb *ntb, enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *peer_epf_bar;
+	enum pci_barno peer_barno;
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+
+	ntb_epc = ntb->epc[type];
+	epc = ntb_epc->epc;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+	phys_addr = peer_epf_bar->phys_addr;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	pci_epc_unmap_addr(epc, func_no, vfunc_no, phys_addr);
+}
+
+/**
+ * epf_ntb_cmd_handler() - Handle commands provided by the NTB Host
+ * @work: work_struct for the two epf_ntb_epc (PRIMARY and SECONDARY)
+ *
+ * Workqueue function that gets invoked for the two epf_ntb_epc
+ * periodically (once every 5ms) to see if it has received any commands
+ * from NTB host. The host can send commands to configure doorbell or
+ * configure memory window or to update link status.
+ */
+static void epf_ntb_cmd_handler(struct work_struct *work)
+{
+	enum pci_epc_interface_type type;
+	struct epf_ntb_epc *ntb_epc;
+	struct epf_ntb_ctrl *ctrl;
+	u32 command, argument;
+	struct epf_ntb *ntb;
+	struct device *dev;
+	u16 db_count;
+	bool is_msix;
+	int ret;
+
+	ntb_epc = container_of(work, struct epf_ntb_epc, cmd_handler.work);
+	ctrl = ntb_epc->reg;
+	command = ctrl->command;
+	if (!command)
+		goto reset_handler;
+	argument = ctrl->argument;
+
+	ctrl->command = 0;
+	ctrl->argument = 0;
+
+	ctrl = ntb_epc->reg;
+	type = ntb_epc->type;
+	ntb = ntb_epc->epf_ntb;
+	dev = &ntb->epf->dev;
+
+	switch (command) {
+	case COMMAND_CONFIGURE_DOORBELL:
+		db_count = argument & DB_COUNT_MASK;
+		is_msix = argument & MSIX_ENABLE;
+		ret = epf_ntb_configure_db(ntb, type, db_count, is_msix);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_TEARDOWN_DOORBELL:
+		epf_ntb_teardown_db(ntb, type);
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_CONFIGURE_MW:
+		ret = epf_ntb_configure_mw(ntb, type, argument);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_TEARDOWN_MW:
+		epf_ntb_teardown_mw(ntb, type, argument);
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_LINK_UP:
+		ntb_epc->linkup = true;
+		if (ntb->epc[PRIMARY_INTERFACE]->linkup &&
+		    ntb->epc[SECONDARY_INTERFACE]->linkup) {
+			ret = epf_ntb_link_up(ntb, true);
+			if (ret < 0)
+				ctrl->command_status = COMMAND_STATUS_ERROR;
+			else
+				ctrl->command_status = COMMAND_STATUS_OK;
+			goto reset_handler;
+		}
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_LINK_DOWN:
+		ntb_epc->linkup = false;
+		ret = epf_ntb_link_up(ntb, false);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	default:
+		dev_err(dev, "%s intf UNKNOWN command: %d\n",
+			pci_epc_interface_string(type), command);
+		break;
+	}
+
+reset_handler:
+	queue_delayed_work(kpcintb_workqueue, &ntb_epc->cmd_handler,
+			   msecs_to_jiffies(5));
+}
+
+/**
+ * epf_ntb_peer_spad_bar_clear() - Clear Peer Scratchpad BAR
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ *	     address.
+ *
+ *+-----------------+------->+------------------+        +-----------------+
+ *|       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ *+-----------------+----+   +------------------+<-------+-----------------+
+ *|       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ *+-----------------+    +-->+------------------+<-------+-----------------+
+ *|       BAR2      |            Local Memory            |       BAR2      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR3      |                                    |       BAR3      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR4      |                                    |       BAR4      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR5      |                                    |       BAR5      |
+ *+-----------------+                                    +-----------------+
+ *  EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Clear BAR1 of EP CONTROLLER 2 which contains the HOST2's peer scratchpad
+ * region. While BAR1 is the default peer scratchpad BAR, an NTB could have
+ * other BARs for peer scratchpad (because of 64-bit BARs or reserved BARs).
+ * This function can get the exact BAR used for peer scratchpad from
+ * epf_ntb_bar[BAR_PEER_SPAD].
+ *
+ * Since HOST2's peer scratchpad is also HOST1's self scratchpad, this function
+ * gets the address of peer scratchpad from
+ * peer_ntb_epc->epf_ntb_bar[BAR_CONFIG].
+ */
+static void epf_ntb_peer_spad_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+
+	epc = ntb_epc->epc;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+	epf_bar = &ntb_epc->epf_bar[barno];
+	pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
+}
+
+/**
+ * epf_ntb_peer_spad_bar_set() - Set peer scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ *+-----------------+------->+------------------+        +-----------------+
+ *|       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ *+-----------------+----+   +------------------+<-------+-----------------+
+ *|       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ *+-----------------+    +-->+------------------+<-------+-----------------+
+ *|       BAR2      |            Local Memory            |       BAR2      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR3      |                                    |       BAR3      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR4      |                                    |       BAR4      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR5      |                                    |       BAR5      |
+ *+-----------------+                                    +-----------------+
+ *  EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Set BAR1 of EP CONTROLLER 2 which contains the HOST2's peer scratchpad
+ * region. While BAR1 is the default peer scratchpad BAR, an NTB could have
+ * other BARs for peer scratchpad (because of 64-bit BARs or reserved BARs).
+ * This function can get the exact BAR used for peer scratchpad from
+ * epf_ntb_bar[BAR_PEER_SPAD].
+ *
+ * Since HOST2's peer scratchpad is also HOST1's self scratchpad, this function
+ * gets the address of peer scratchpad from
+ * peer_ntb_epc->epf_ntb_bar[BAR_CONFIG].
+ */
+static int epf_ntb_peer_spad_bar_set(struct epf_ntb *ntb,
+				     enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *peer_epf_bar, *epf_bar;
+	enum pci_barno peer_barno, barno;
+	u32 peer_spad_offset;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	struct device *dev;
+	int ret;
+
+	dev = &ntb->epf->dev;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_CONFIG];
+	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+	ntb_epc = ntb->epc[type];
+	barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+	epf_bar = &ntb_epc->epf_bar[barno];
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	epc = ntb_epc->epc;
+
+	peer_spad_offset = peer_ntb_epc->reg->spad_offset;
+	epf_bar->phys_addr = peer_epf_bar->phys_addr + peer_spad_offset;
+	epf_bar->size = peer_ntb_epc->spad_size;
+	epf_bar->barno = barno;
+	epf_bar->flags = PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+	ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
+	if (ret) {
+		dev_err(dev, "%s intf: peer SPAD BAR set failed\n",
+			pci_epc_interface_string(type));
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ *	     address.
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * self scratchpad region (removes inbound ATU configuration). While BAR0 is
+ * the default self scratchpad BAR, an NTB could have other BARs for self
+ * scratchpad (because of reserved BARs). This function can get the exact BAR
+ * used for self scratchpad from epf_ntb_bar[BAR_CONFIG].
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR. Also note HOST2's peer
+ * scratchpad is HOST1's self scratchpad.
+ */
+static void epf_ntb_config_sspad_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+
+	epc = ntb_epc->epc;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+	epf_bar = &ntb_epc->epf_bar[barno];
+	pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
+}
+
+/**
+ * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ *	     address.
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Map BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * self scratchpad region. While BAR0 is the default self scratchpad BAR, an
+ * NTB could have other BARs for self scratchpad (because of reserved BARs).
+ * This function can get the exact BAR used for self scratchpad from
+ * epf_ntb_bar[BAR_CONFIG].
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR. Also note HOST2's peer
+ * scratchpad is HOST1's self scratchpad.
+ */
+static int epf_ntb_config_sspad_bar_set(struct epf_ntb_epc *ntb_epc)
+{
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct epf_ntb *ntb;
+	struct pci_epc *epc;
+	struct device *dev;
+	int ret;
+
+	ntb = ntb_epc->epf_ntb;
+	dev = &ntb->epf->dev;
+
+	epc = ntb_epc->epc;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+	epf_bar = &ntb_epc->epf_bar[barno];
+
+	ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
+	if (ret) {
+		dev_err(dev, "%s inft: Config/Status/SPAD BAR set failed\n",
+			pci_epc_interface_string(ntb_epc->type));
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_config_spad_bar_free() - Free the physical memory associated with
+ *   config + scratchpad region
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Free the Local Memory mentioned in the above diagram. After invoking this
+ * function, any of config + self scratchpad region of HOST1 or peer scratchpad
+ * region of HOST2 should not be accessed.
+ */
+static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+	struct epf_ntb_epc *ntb_epc;
+	enum pci_barno barno;
+	struct pci_epf *epf;
+
+	epf = ntb->epf;
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+		ntb_epc = ntb->epc[type];
+		barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+		if (ntb_epc->reg)
+			pci_epf_free_space(epf, ntb_epc->reg, barno, type);
+	}
+}
+
+/**
+ * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad
+ *   region
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Allocate the Local Memory mentioned in the above diagram. The size of
+ * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION
+ * is obtained from "spad-count" configfs entry.
+ *
+ * The size of both config region and scratchpad region has to be aligned,
+ * since the scratchpad region will also be mapped as PEER SCRATCHPAD of
+ * other host using a separate BAR.
+ */
+static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb,
+					 enum pci_epc_interface_type type)
+{
+	const struct pci_epc_features *peer_epc_features, *epc_features;
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	size_t msix_table_size, pba_size, align;
+	enum pci_barno peer_barno, barno;
+	struct epf_ntb_ctrl *ctrl;
+	u32 spad_size, ctrl_size;
+	u64 size, peer_size;
+	struct pci_epf *epf;
+	struct device *dev;
+	bool msix_capable;
+	u32 spad_count;
+	void *base;
+
+	epf = ntb->epf;
+	dev = &epf->dev;
+	ntb_epc = ntb->epc[type];
+
+	epc_features = ntb_epc->epc_features;
+	barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+	size = epc_features->bar_fixed_size[barno];
+	align = epc_features->align;
+
+	peer_ntb_epc = ntb->epc[!type];
+	peer_epc_features = peer_ntb_epc->epc_features;
+	peer_barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+	peer_size = peer_epc_features->bar_fixed_size[peer_barno];
+
+	/* Check if epc_features is populated incorrectly */
+	if ((!IS_ALIGNED(size, align)))
+		return -EINVAL;
+
+	spad_count = ntb->spad_count;
+
+	ctrl_size = sizeof(struct epf_ntb_ctrl);
+	spad_size = spad_count * 4;
+
+	msix_capable = epc_features->msix_capable;
+	if (msix_capable) {
+		msix_table_size = PCI_MSIX_ENTRY_SIZE * ntb->db_count;
+		ctrl_size = ALIGN(ctrl_size, 8);
+		ntb_epc->msix_table_offset = ctrl_size;
+		ntb_epc->msix_bar = barno;
+		/* Align to QWORD or 8 Bytes */
+		pba_size = ALIGN(DIV_ROUND_UP(ntb->db_count, 8), 8);
+		ctrl_size = ctrl_size + msix_table_size + pba_size;
+	}
+
+	if (!align) {
+		ctrl_size = roundup_pow_of_two(ctrl_size);
+		spad_size = roundup_pow_of_two(spad_size);
+	} else {
+		ctrl_size = ALIGN(ctrl_size, align);
+		spad_size = ALIGN(spad_size, align);
+	}
+
+	if (peer_size) {
+		if (peer_size < spad_size)
+			spad_count = peer_size / 4;
+		spad_size = peer_size;
+	}
+
+	/*
+	 * In order to make sure SPAD offset is aligned to its size,
+	 * expand control region size to the size of SPAD if SPAD size
+	 * is greater than control region size.
+	 */
+	if (spad_size > ctrl_size)
+		ctrl_size = spad_size;
+
+	if (!size)
+		size = ctrl_size + spad_size;
+	else if (size < ctrl_size + spad_size)
+		return -EINVAL;
+
+	base = pci_epf_alloc_space(epf, size, barno, align, type);
+	if (!base) {
+		dev_err(dev, "%s intf: Config/Status/SPAD alloc region fail\n",
+			pci_epc_interface_string(type));
+		return -ENOMEM;
+	}
+
+	ntb_epc->reg = base;
+
+	ctrl = ntb_epc->reg;
+	ctrl->spad_offset = ctrl_size;
+	ctrl->spad_count = spad_count;
+	ctrl->num_mws = ntb->num_mws;
+	ctrl->db_entry_size = align ? align : 4;
+	ntb_epc->spad_size = spad_size;
+
+	return 0;
+}
+
+/**
+ * epf_ntb_config_spad_bar_alloc_interface() - Allocate memory for config +
+ *   scratchpad region for each of PRIMARY and SECONDARY interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper for epf_ntb_config_spad_bar_alloc() which allocates memory for
+ * config + scratchpad region for a specific interface
+ */
+static int epf_ntb_config_spad_bar_alloc_interface(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+	struct device *dev;
+	int ret;
+
+	dev = &ntb->epf->dev;
+
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+		ret = epf_ntb_config_spad_bar_alloc(ntb, type);
+		if (ret) {
+			dev_err(dev, "%s intf: Config/SPAD BAR alloc failed\n",
+				pci_epc_interface_string(type));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_free_peer_mem() - Free memory allocated in peers outbound address
+ *   space
+ * @ntb_epc: EPC associated with one of the HOST which holds peers outbound
+ *   address regions
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Free memory allocated in EP CONTROLLER 2 (OB SPACE) in the above diagram.
+ * It'll free Doorbell 1, Doorbell 2, Doorbell 3, Doorbell 4, MW1 (and MW2, MW3,
+ * MW4).
+ */
+static void epf_ntb_free_peer_mem(struct epf_ntb_epc *ntb_epc)
+{
+	struct pci_epf_bar *epf_bar;
+	void __iomem *mw_addr;
+	phys_addr_t phys_addr;
+	enum epf_ntb_bar bar;
+	enum pci_barno barno;
+	struct pci_epc *epc;
+	size_t size;
+
+	epc = ntb_epc->epc;
+
+	for (bar = BAR_DB_MW1; bar < BAR_MW4; bar++) {
+		barno = ntb_epc->epf_ntb_bar[bar];
+		mw_addr = ntb_epc->mw_addr[barno];
+		epf_bar = &ntb_epc->epf_bar[barno];
+		phys_addr = epf_bar->phys_addr;
+		size = epf_bar->size;
+		if (mw_addr) {
+			pci_epc_mem_free_addr(epc, phys_addr, mw_addr, size);
+			ntb_epc->mw_addr[barno] = NULL;
+		}
+	}
+}
+
+/**
+ * epf_ntb_db_mw_bar_clear() - Clear doorbell and memory BAR
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ *   address
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Clear doorbell and memory BARs (remove inbound ATU configuration). In the above
+ * diagram it clears BAR2 TO BAR5 of EP CONTROLLER 1 (Doorbell BAR, MW1 BAR, MW2
+ * BAR, MW3 BAR and MW4 BAR).
+ */
+static void epf_ntb_db_mw_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+	struct pci_epf_bar *epf_bar;
+	enum epf_ntb_bar bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+
+	epc = ntb_epc->epc;
+
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	for (bar = BAR_DB_MW1; bar < BAR_MW4; bar++) {
+		barno = ntb_epc->epf_ntb_bar[bar];
+		epf_bar = &ntb_epc->epf_bar[barno];
+		pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
+	}
+}
+
+/**
+ * epf_ntb_db_mw_bar_cleanup() - Clear doorbell/memory BAR and free memory
+ *   allocated in peers outbound address space
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper for epf_ntb_db_mw_bar_clear() to clear HOST1's BAR and
+ * epf_ntb_free_peer_mem() which frees up HOST2 outbound memory.
+ */
+static void epf_ntb_db_mw_bar_cleanup(struct epf_ntb *ntb,
+				      enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+
+	ntb_epc = ntb->epc[type];
+	peer_ntb_epc = ntb->epc[!type];
+
+	epf_ntb_db_mw_bar_clear(ntb_epc);
+	epf_ntb_free_peer_mem(peer_ntb_epc);
+}
+
+/**
+ * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Configure MSI/MSI-X capability for each interface with number of
+ * interrupts equal to "db_count" configfs entry.
+ */
+static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
+				       enum pci_epc_interface_type type)
+{
+	const struct pci_epc_features *epc_features;
+	bool msix_capable, msi_capable;
+	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	struct device *dev;
+	u32 db_count;
+	int ret;
+
+	ntb_epc = ntb->epc[type];
+	dev = &ntb->epf->dev;
+
+	epc_features = ntb_epc->epc_features;
+	msix_capable = epc_features->msix_capable;
+	msi_capable = epc_features->msi_capable;
+
+	if (!(msix_capable || msi_capable)) {
+		dev_err(dev, "MSI or MSI-X is required for doorbell\n");
+		return -EINVAL;
+	}
+
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	db_count = ntb->db_count;
+	if (db_count > MAX_DB_COUNT) {
+		dev_err(dev, "DB count cannot be more than %d\n", MAX_DB_COUNT);
+		return -EINVAL;
+	}
+
+	ntb->db_count = db_count;
+	epc = ntb_epc->epc;
+
+	if (msi_capable) {
+		ret = pci_epc_set_msi(epc, func_no, vfunc_no, db_count);
+		if (ret) {
+			dev_err(dev, "%s intf: MSI configuration failed\n",
+				pci_epc_interface_string(type));
+			return ret;
+		}
+	}
+
+	if (msix_capable) {
+		ret = pci_epc_set_msix(epc, func_no, vfunc_no, db_count,
+				       ntb_epc->msix_bar,
+				       ntb_epc->msix_table_offset);
+		if (ret) {
+			dev_err(dev, "MSI configuration failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_alloc_peer_mem() - Allocate memory in peer's outbound address space
+ * @dev: The PCI device.
+ * @ntb_epc: EPC associated with one of the HOST whose BAR holds peer's outbound
+ *   address
+ * @bar: BAR of @ntb_epc in for which memory has to be allocated (could be
+ *   BAR_DB_MW1, BAR_MW2, BAR_MW3, BAR_MW4)
+ * @peer_ntb_epc: EPC associated with HOST whose outbound address space is
+ *   used by @ntb_epc
+ * @size: Size of the address region that has to be allocated in peers OB SPACE
+ *
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Allocate memory in OB space of EP CONTROLLER 2 in the above diagram. Allocate
+ * for Doorbell 1, Doorbell 2, Doorbell 3, Doorbell 4, MW1 (and MW2, MW3, MW4).
+ */
+static int epf_ntb_alloc_peer_mem(struct device *dev,
+				  struct epf_ntb_epc *ntb_epc,
+				  enum epf_ntb_bar bar,
+				  struct epf_ntb_epc *peer_ntb_epc,
+				  size_t size)
+{
+	const struct pci_epc_features *epc_features;
+	struct pci_epf_bar *epf_bar;
+	struct pci_epc *peer_epc;
+	phys_addr_t phys_addr;
+	void __iomem *mw_addr;
+	enum pci_barno barno;
+	size_t align;
+
+	epc_features = ntb_epc->epc_features;
+	align = epc_features->align;
+
+	if (size < 128)
+		size = 128;
+
+	if (align)
+		size = ALIGN(size, align);
+	else
+		size = roundup_pow_of_two(size);
+
+	peer_epc = peer_ntb_epc->epc;
+	mw_addr = pci_epc_mem_alloc_addr(peer_epc, &phys_addr, size);
+	if (!mw_addr) {
+		dev_err(dev, "%s intf: Failed to allocate OB address\n",
+			pci_epc_interface_string(peer_ntb_epc->type));
+		return -ENOMEM;
+	}
+
+	barno = ntb_epc->epf_ntb_bar[bar];
+	epf_bar = &ntb_epc->epf_bar[barno];
+	ntb_epc->mw_addr[barno] = mw_addr;
+
+	epf_bar->phys_addr = phys_addr;
+	epf_bar->size = size;
+	epf_bar->barno = barno;
+	epf_bar->flags = PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+	return 0;
+}
+
+/**
+ * epf_ntb_db_mw_bar_init() - Configure Doorbell and Memory window BARs
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper for epf_ntb_alloc_peer_mem() and pci_epc_set_bar() that allocates
+ * memory in OB address space of HOST2 and configures BAR of HOST1
+ */
+static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb,
+				  enum pci_epc_interface_type type)
+{
+	const struct pci_epc_features *epc_features;
+	struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+	struct pci_epf_bar *epf_bar;
+	struct epf_ntb_ctrl *ctrl;
+	u32 num_mws, db_count;
+	enum epf_ntb_bar bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	struct device *dev;
+	size_t align;
+	int ret, i;
+	u64 size;
+
+	ntb_epc = ntb->epc[type];
+	peer_ntb_epc = ntb->epc[!type];
+
+	dev = &ntb->epf->dev;
+	epc_features = ntb_epc->epc_features;
+	align = epc_features->align;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+	epc = ntb_epc->epc;
+	num_mws = ntb->num_mws;
+	db_count = ntb->db_count;
+
+	for (bar = BAR_DB_MW1, i = 0; i < num_mws; bar++, i++) {
+		if (bar == BAR_DB_MW1) {
+			align = align ? align : 4;
+			size = db_count * align;
+			size = ALIGN(size, ntb->mws_size[i]);
+			ctrl = ntb_epc->reg;
+			ctrl->mw1_offset = size;
+			size += ntb->mws_size[i];
+		} else {
+			size = ntb->mws_size[i];
+		}
+
+		ret = epf_ntb_alloc_peer_mem(dev, ntb_epc, bar,
+					     peer_ntb_epc, size);
+		if (ret) {
+			dev_err(dev, "%s intf: DoorBell mem alloc failed\n",
+				pci_epc_interface_string(type));
+			goto err_alloc_peer_mem;
+		}
+
+		barno = ntb_epc->epf_ntb_bar[bar];
+		epf_bar = &ntb_epc->epf_bar[barno];
+
+		ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
+		if (ret) {
+			dev_err(dev, "%s intf: DoorBell BAR set failed\n",
+				pci_epc_interface_string(type));
+			goto err_alloc_peer_mem;
+		}
+	}
+
+	return 0;
+
+err_alloc_peer_mem:
+	epf_ntb_db_mw_bar_cleanup(ntb, type);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_epc_destroy_interface() - Cleanup NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Unbind NTB function device from EPC and relinquish reference to pci_epc
+ * for each of the interface.
+ */
+static void epf_ntb_epc_destroy_interface(struct epf_ntb *ntb,
+					  enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *ntb_epc;
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+
+	if (type < 0)
+		return;
+
+	epf = ntb->epf;
+	ntb_epc = ntb->epc[type];
+	if (!ntb_epc)
+		return;
+	epc = ntb_epc->epc;
+	pci_epc_remove_epf(epc, epf, type);
+	pci_epc_put(epc);
+}
+
+/**
+ * epf_ntb_epc_destroy() - Cleanup NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces
+ */
+static void epf_ntb_epc_destroy(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++)
+		epf_ntb_epc_destroy_interface(ntb, type);
+}
+
+/**
+ * epf_ntb_epc_create_interface() - Create and initialize NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @epc: struct pci_epc to which a particular NTB interface should be associated
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Allocate memory for NTB EPC interface and initialize it.
+ */
+static int epf_ntb_epc_create_interface(struct epf_ntb *ntb,
+					struct pci_epc *epc,
+					enum pci_epc_interface_type type)
+{
+	const struct pci_epc_features *epc_features;
+	struct pci_epf_bar *epf_bar;
+	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
+	struct pci_epf *epf;
+	struct device *dev;
+
+	dev = &ntb->epf->dev;
+
+	ntb_epc = devm_kzalloc(dev, sizeof(*ntb_epc), GFP_KERNEL);
+	if (!ntb_epc)
+		return -ENOMEM;
+
+	epf = ntb->epf;
+	vfunc_no = epf->vfunc_no;
+	if (type == PRIMARY_INTERFACE) {
+		func_no = epf->func_no;
+		epf_bar = epf->bar;
+	} else {
+		func_no = epf->sec_epc_func_no;
+		epf_bar = epf->sec_epc_bar;
+	}
+
+	ntb_epc->linkup = false;
+	ntb_epc->epc = epc;
+	ntb_epc->func_no = func_no;
+	ntb_epc->vfunc_no = vfunc_no;
+	ntb_epc->type = type;
+	ntb_epc->epf_bar = epf_bar;
+	ntb_epc->epf_ntb = ntb;
+
+	epc_features = pci_epc_get_features(epc, func_no, vfunc_no);
+	if (!epc_features)
+		return -EINVAL;
+	ntb_epc->epc_features = epc_features;
+
+	ntb->epc[type] = ntb_epc;
+
+	return 0;
+}
+
+/**
+ * epf_ntb_epc_create() - Create and initialize NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Get a reference to EPC device and bind NTB function device to that EPC
+ * for each of the interface. It is also a wrapper to
+ * epf_ntb_epc_create_interface() to allocate memory for NTB EPC interface
+ * and initialize it
+ */
+static int epf_ntb_epc_create(struct epf_ntb *ntb)
+{
+	struct pci_epf *epf;
+	struct device *dev;
+	int ret;
+
+	epf = ntb->epf;
+	dev = &epf->dev;
+
+	ret = epf_ntb_epc_create_interface(ntb, epf->epc, PRIMARY_INTERFACE);
+	if (ret) {
+		dev_err(dev, "PRIMARY intf: Fail to create NTB EPC\n");
+		return ret;
+	}
+
+	ret = epf_ntb_epc_create_interface(ntb, epf->sec_epc,
+					   SECONDARY_INTERFACE);
+	if (ret) {
+		dev_err(dev, "SECONDARY intf: Fail to create NTB EPC\n");
+		goto err_epc_create;
+	}
+
+	return 0;
+
+err_epc_create:
+	epf_ntb_epc_destroy_interface(ntb, PRIMARY_INTERFACE);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_init_epc_bar_interface() - Identify BARs to be used for each of
+ *   the NTB constructs (scratchpad region, doorbell, memorywindow)
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Identify the free BARs to be used for each of BAR_CONFIG, BAR_PEER_SPAD,
+ * BAR_DB_MW1, BAR_MW2, BAR_MW3 and BAR_MW4.
+ */
+static int epf_ntb_init_epc_bar_interface(struct epf_ntb *ntb,
+					  enum pci_epc_interface_type type)
+{
+	const struct pci_epc_features *epc_features;
+	struct epf_ntb_epc *ntb_epc;
+	enum pci_barno barno;
+	enum epf_ntb_bar bar;
+	struct device *dev;
+	u32 num_mws;
+	int i;
+
+	barno = BAR_0;
+	ntb_epc = ntb->epc[type];
+	num_mws = ntb->num_mws;
+	dev = &ntb->epf->dev;
+	epc_features = ntb_epc->epc_features;
+
+	/* These are required BARs which are mandatory for NTB functionality */
+	for (bar = BAR_CONFIG; bar <= BAR_DB_MW1; bar++, barno++) {
+		barno = pci_epc_get_next_free_bar(epc_features, barno);
+		if (barno < 0) {
+			dev_err(dev, "%s intf: Fail to get NTB function BAR\n",
+				pci_epc_interface_string(type));
+			return barno;
+		}
+		ntb_epc->epf_ntb_bar[bar] = barno;
+	}
+
+	/* These are optional BARs which don't impact NTB functionality */
+	for (bar = BAR_MW2, i = 1; i < num_mws; bar++, barno++, i++) {
+		barno = pci_epc_get_next_free_bar(epc_features, barno);
+		if (barno < 0) {
+			ntb->num_mws = i;
+			dev_dbg(dev, "BAR not available for > MW%d\n", i + 1);
+		}
+		ntb_epc->epf_ntb_bar[bar] = barno;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB
+ * constructs (scratchpad region, doorbell, memorywindow)
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper to epf_ntb_init_epc_bar_interface() to identify the free BARs
+ * to be used for each of BAR_CONFIG, BAR_PEER_SPAD, BAR_DB_MW1, BAR_MW2,
+ * BAR_MW3 and BAR_MW4 for all the interfaces.
+ */
+static int epf_ntb_init_epc_bar(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+	struct device *dev;
+	int ret;
+
+	dev = &ntb->epf->dev;
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+		ret = epf_ntb_init_epc_bar_interface(ntb, type);
+		if (ret) {
+			dev_err(dev, "Fail to init EPC bar for %s interface\n",
+				pci_epc_interface_string(type));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_epc_init_interface() - Initialize NTB interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper to initialize a particular EPC interface and start the workqueue
+ * to check for commands from host. This function will write to the
+ * EP controller HW for configuring it.
+ */
+static int epf_ntb_epc_init_interface(struct epf_ntb *ntb,
+				      enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+	struct device *dev;
+	int ret;
+
+	ntb_epc = ntb->epc[type];
+	epf = ntb->epf;
+	dev = &epf->dev;
+	epc = ntb_epc->epc;
+	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
+
+	ret = epf_ntb_config_sspad_bar_set(ntb->epc[type]);
+	if (ret) {
+		dev_err(dev, "%s intf: Config/self SPAD BAR init failed\n",
+			pci_epc_interface_string(type));
+		return ret;
+	}
+
+	ret = epf_ntb_peer_spad_bar_set(ntb, type);
+	if (ret) {
+		dev_err(dev, "%s intf: Peer SPAD BAR init failed\n",
+			pci_epc_interface_string(type));
+		goto err_peer_spad_bar_init;
+	}
+
+	ret = epf_ntb_configure_interrupt(ntb, type);
+	if (ret) {
+		dev_err(dev, "%s intf: Interrupt configuration failed\n",
+			pci_epc_interface_string(type));
+		goto err_peer_spad_bar_init;
+	}
+
+	ret = epf_ntb_db_mw_bar_init(ntb, type);
+	if (ret) {
+		dev_err(dev, "%s intf: DB/MW BAR init failed\n",
+			pci_epc_interface_string(type));
+		goto err_db_mw_bar_init;
+	}
+
+	if (vfunc_no <= 1) {
+		ret = pci_epc_write_header(epc, func_no, vfunc_no, epf->header);
+		if (ret) {
+			dev_err(dev, "%s intf: Configuration header write failed\n",
+				pci_epc_interface_string(type));
+			goto err_write_header;
+		}
+	}
+
+	INIT_DELAYED_WORK(&ntb->epc[type]->cmd_handler, epf_ntb_cmd_handler);
+	queue_work(kpcintb_workqueue, &ntb->epc[type]->cmd_handler.work);
+
+	return 0;
+
+err_write_header:
+	epf_ntb_db_mw_bar_cleanup(ntb, type);
+
+err_db_mw_bar_init:
+	epf_ntb_peer_spad_bar_clear(ntb->epc[type]);
+
+err_peer_spad_bar_init:
+	epf_ntb_config_sspad_bar_clear(ntb->epc[type]);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_epc_cleanup_interface() - Cleanup NTB interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper to cleanup a particular NTB interface.
+ */
+static void epf_ntb_epc_cleanup_interface(struct epf_ntb *ntb,
+					  enum pci_epc_interface_type type)
+{
+	struct epf_ntb_epc *ntb_epc;
+
+	if (type < 0)
+		return;
+
+	ntb_epc = ntb->epc[type];
+	cancel_delayed_work(&ntb_epc->cmd_handler);
+	epf_ntb_db_mw_bar_cleanup(ntb, type);
+	epf_ntb_peer_spad_bar_clear(ntb_epc);
+	epf_ntb_config_sspad_bar_clear(ntb_epc);
+}
+
+/**
+ * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper to cleanup all NTB interfaces.
+ */
+static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++)
+		epf_ntb_epc_cleanup_interface(ntb, type);
+}
+
+/**
+ * epf_ntb_epc_init() - Initialize all NTB interfaces
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper to initialize all NTB interface and start the workqueue
+ * to check for commands from host.
+ */
+static int epf_ntb_epc_init(struct epf_ntb *ntb)
+{
+	enum pci_epc_interface_type type;
+	struct device *dev;
+	int ret;
+
+	dev = &ntb->epf->dev;
+
+	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+		ret = epf_ntb_epc_init_interface(ntb, type);
+		if (ret) {
+			dev_err(dev, "%s intf: Failed to initialize\n",
+				pci_epc_interface_string(type));
+			goto err_init_type;
+		}
+	}
+
+	return 0;
+
+err_init_type:
+	epf_ntb_epc_cleanup_interface(ntb, type - 1);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_bind() - Initialize endpoint controller to provide NTB functionality
+ * @epf: NTB endpoint function device
+ *
+ * Initialize both the endpoint controllers associated with NTB function device.
+ * Invoked when a primary interface or secondary interface is bound to EPC
+ * device. This function will succeed only when EPC is bound to both the
+ * interfaces.
+ */
+static int epf_ntb_bind(struct pci_epf *epf)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+	struct device *dev = &epf->dev;
+	int ret;
+
+	if (!epf->epc) {
+		dev_dbg(dev, "PRIMARY EPC interface not yet bound\n");
+		return 0;
+	}
+
+	if (!epf->sec_epc) {
+		dev_dbg(dev, "SECONDARY EPC interface not yet bound\n");
+		return 0;
+	}
+
+	ret = epf_ntb_epc_create(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to create NTB EPC\n");
+		return ret;
+	}
+
+	ret = epf_ntb_init_epc_bar(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to create NTB EPC\n");
+		goto err_bar_init;
+	}
+
+	ret = epf_ntb_config_spad_bar_alloc_interface(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to allocate BAR memory\n");
+		goto err_bar_alloc;
+	}
+
+	ret = epf_ntb_epc_init(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to initialize EPC\n");
+		goto err_bar_alloc;
+	}
+
+	epf_set_drvdata(epf, ntb);
+
+	return 0;
+
+err_bar_alloc:
+	epf_ntb_config_spad_bar_free(ntb);
+
+err_bar_init:
+	epf_ntb_epc_destroy(ntb);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_unbind() - Cleanup the initialization from epf_ntb_bind()
+ * @epf: NTB endpoint function device
+ *
+ * Cleanup the initialization from epf_ntb_bind()
+ */
+static void epf_ntb_unbind(struct pci_epf *epf)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+
+	epf_ntb_epc_cleanup(ntb);
+	epf_ntb_config_spad_bar_free(ntb);
+	epf_ntb_epc_destroy(ntb);
+}
+
+#define EPF_NTB_R(_name)						\
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,		\
+				      char *page)			\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+									\
+	return sysfs_emit(page, "%d\n", ntb->_name);			\
+}
+
+#define EPF_NTB_W(_name)						\
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,	\
+				       const char *page, size_t len)	\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	u32 val;							\
+									\
+	if (kstrtou32(page, 0, &val) < 0)				\
+		return -EINVAL;						\
+									\
+	ntb->_name = val;						\
+									\
+	return len;							\
+}
+
+#define EPF_NTB_MW_R(_name)						\
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,		\
+				      char *page)			\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	int win_no;							\
+									\
+	sscanf(#_name, "mw%d", &win_no);				\
+									\
+	return sysfs_emit(page, "%lld\n", ntb->mws_size[win_no - 1]);	\
+}
+
+#define EPF_NTB_MW_W(_name)						\
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,	\
+				       const char *page, size_t len)	\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	struct device *dev = &ntb->epf->dev;				\
+	int win_no;							\
+	u64 val;							\
+									\
+	if (kstrtou64(page, 0, &val) < 0)				\
+		return -EINVAL;						\
+									\
+	if (sscanf(#_name, "mw%d", &win_no) != 1)			\
+		return -EINVAL;						\
+									\
+	if (ntb->num_mws < win_no) {					\
+		dev_err(dev, "Invalid num_nws: %d value\n", ntb->num_mws); \
+		return -EINVAL;						\
+	}								\
+									\
+	ntb->mws_size[win_no - 1] = val;				\
+									\
+	return len;							\
+}
+
+static ssize_t epf_ntb_num_mws_store(struct config_item *item,
+				     const char *page, size_t len)
+{
+	struct config_group *group = to_config_group(item);
+	struct epf_ntb *ntb = to_epf_ntb(group);
+	u32 val;
+
+	if (kstrtou32(page, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val > MAX_MW)
+		return -EINVAL;
+
+	ntb->num_mws = val;
+
+	return len;
+}
+
+EPF_NTB_R(spad_count)
+EPF_NTB_W(spad_count)
+EPF_NTB_R(db_count)
+EPF_NTB_W(db_count)
+EPF_NTB_R(num_mws)
+EPF_NTB_MW_R(mw1)
+EPF_NTB_MW_W(mw1)
+EPF_NTB_MW_R(mw2)
+EPF_NTB_MW_W(mw2)
+EPF_NTB_MW_R(mw3)
+EPF_NTB_MW_W(mw3)
+EPF_NTB_MW_R(mw4)
+EPF_NTB_MW_W(mw4)
+
+CONFIGFS_ATTR(epf_ntb_, spad_count);
+CONFIGFS_ATTR(epf_ntb_, db_count);
+CONFIGFS_ATTR(epf_ntb_, num_mws);
+CONFIGFS_ATTR(epf_ntb_, mw1);
+CONFIGFS_ATTR(epf_ntb_, mw2);
+CONFIGFS_ATTR(epf_ntb_, mw3);
+CONFIGFS_ATTR(epf_ntb_, mw4);
+
+static struct configfs_attribute *epf_ntb_attrs[] = {
+	&epf_ntb_attr_spad_count,
+	&epf_ntb_attr_db_count,
+	&epf_ntb_attr_num_mws,
+	&epf_ntb_attr_mw1,
+	&epf_ntb_attr_mw2,
+	&epf_ntb_attr_mw3,
+	&epf_ntb_attr_mw4,
+	NULL,
+};
+
+static const struct config_item_type ntb_group_type = {
+	.ct_attrs	= epf_ntb_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/**
+ * epf_ntb_add_cfs() - Add configfs directory specific to NTB
+ * @epf: NTB endpoint function device
+ * @group: A pointer to the config_group structure referencing a group of
+ *	   config_items of a specific type that belong to a specific sub-system.
+ *
+ * Add configfs directory specific to NTB. This directory will hold
+ * NTB specific properties like db_count, spad_count, num_mws etc.,
+ */
+static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf,
+					    struct config_group *group)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+	struct config_group *ntb_group = &ntb->group;
+	struct device *dev = &epf->dev;
+
+	config_group_init_type_name(ntb_group, dev_name(dev), &ntb_group_type);
+
+	return ntb_group;
+}
+
+/**
+ * epf_ntb_probe() - Probe NTB function driver
+ * @epf: NTB endpoint function device
+ * @id: NTB endpoint function device ID
+ *
+ * Probe NTB function driver when endpoint function bus detects a NTB
+ * endpoint function.
+ */
+static int epf_ntb_probe(struct pci_epf *epf,
+			 const struct pci_epf_device_id *id)
+{
+	struct epf_ntb *ntb;
+	struct device *dev;
+
+	dev = &epf->dev;
+
+	ntb = devm_kzalloc(dev, sizeof(*ntb), GFP_KERNEL);
+	if (!ntb)
+		return -ENOMEM;
+
+	epf->header = &epf_ntb_header;
+	ntb->epf = epf;
+	epf_set_drvdata(epf, ntb);
+
+	return 0;
+}
+
+static struct pci_epf_ops epf_ntb_ops = {
+	.bind	= epf_ntb_bind,
+	.unbind	= epf_ntb_unbind,
+	.add_cfs = epf_ntb_add_cfs,
+};
+
+static const struct pci_epf_device_id epf_ntb_ids[] = {
+	{
+		.name = "pci_epf_ntb",
+	},
+	{},
+};
+
+static struct pci_epf_driver epf_ntb_driver = {
+	.driver.name	= "pci_epf_ntb",
+	.probe		= epf_ntb_probe,
+	.id_table	= epf_ntb_ids,
+	.ops		= &epf_ntb_ops,
+	.owner		= THIS_MODULE,
+};
+
+static int __init epf_ntb_init(void)
+{
+	int ret;
+
+	kpcintb_workqueue = alloc_workqueue("kpcintb", WQ_MEM_RECLAIM |
+					    WQ_HIGHPRI, 0);
+	ret = pci_epf_register_driver(&epf_ntb_driver);
+	if (ret) {
+		destroy_workqueue(kpcintb_workqueue);
+		pr_err("Failed to register pci epf ntb driver --> %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(epf_ntb_init);
+
+static void __exit epf_ntb_exit(void)
+{
+	pci_epf_unregister_driver(&epf_ntb_driver);
+	destroy_workqueue(kpcintb_workqueue);
+}
+module_exit(epf_ntb_exit);
+
+MODULE_DESCRIPTION("PCI EPF NTB DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
new file mode 100644
index 000000000..1f0d2b842
--- /dev/null
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -0,0 +1,1021 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test driver to test endpoint functionality
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci_ids.h>
+#include <linux/random.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/pci_regs.h>
+
+#define IRQ_TYPE_LEGACY			0
+#define IRQ_TYPE_MSI			1
+#define IRQ_TYPE_MSIX			2
+
+#define COMMAND_RAISE_LEGACY_IRQ	BIT(0)
+#define COMMAND_RAISE_MSI_IRQ		BIT(1)
+#define COMMAND_RAISE_MSIX_IRQ		BIT(2)
+#define COMMAND_READ			BIT(3)
+#define COMMAND_WRITE			BIT(4)
+#define COMMAND_COPY			BIT(5)
+
+#define STATUS_READ_SUCCESS		BIT(0)
+#define STATUS_READ_FAIL		BIT(1)
+#define STATUS_WRITE_SUCCESS		BIT(2)
+#define STATUS_WRITE_FAIL		BIT(3)
+#define STATUS_COPY_SUCCESS		BIT(4)
+#define STATUS_COPY_FAIL		BIT(5)
+#define STATUS_IRQ_RAISED		BIT(6)
+#define STATUS_SRC_ADDR_INVALID		BIT(7)
+#define STATUS_DST_ADDR_INVALID		BIT(8)
+
+#define FLAG_USE_DMA			BIT(0)
+
+#define TIMER_RESOLUTION		1
+
+static struct workqueue_struct *kpcitest_workqueue;
+
+struct pci_epf_test {
+	void			*reg[PCI_STD_NUM_BARS];
+	struct pci_epf		*epf;
+	enum pci_barno		test_reg_bar;
+	size_t			msix_table_offset;
+	struct delayed_work	cmd_handler;
+	struct dma_chan		*dma_chan_tx;
+	struct dma_chan		*dma_chan_rx;
+	struct dma_chan		*transfer_chan;
+	dma_cookie_t		transfer_cookie;
+	enum dma_status		transfer_status;
+	struct completion	transfer_complete;
+	bool			dma_supported;
+	bool			dma_private;
+	const struct pci_epc_features *epc_features;
+};
+
+struct pci_epf_test_reg {
+	u32	magic;
+	u32	command;
+	u32	status;
+	u64	src_addr;
+	u64	dst_addr;
+	u32	size;
+	u32	checksum;
+	u32	irq_type;
+	u32	irq_number;
+	u32	flags;
+} __packed;
+
+static struct pci_epf_header test_header = {
+	.vendorid	= PCI_ANY_ID,
+	.deviceid	= PCI_ANY_ID,
+	.baseclass_code = PCI_CLASS_OTHERS,
+	.interrupt_pin	= PCI_INTERRUPT_INTA,
+};
+
+static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 };
+
+static void pci_epf_test_dma_callback(void *param)
+{
+	struct pci_epf_test *epf_test = param;
+	struct dma_tx_state state;
+
+	epf_test->transfer_status =
+		dmaengine_tx_status(epf_test->transfer_chan,
+				    epf_test->transfer_cookie, &state);
+	if (epf_test->transfer_status == DMA_COMPLETE ||
+	    epf_test->transfer_status == DMA_ERROR)
+		complete(&epf_test->transfer_complete);
+}
+
+/**
+ * pci_epf_test_data_transfer() - Function that uses dmaengine API to transfer
+ *				  data between PCIe EP and remote PCIe RC
+ * @epf_test: the EPF test device that performs the data transfer operation
+ * @dma_dst: The destination address of the data transfer. It can be a physical
+ *	     address given by pci_epc_mem_alloc_addr or DMA mapping APIs.
+ * @dma_src: The source address of the data transfer. It can be a physical
+ *	     address given by pci_epc_mem_alloc_addr or DMA mapping APIs.
+ * @len: The size of the data transfer
+ * @dma_remote: remote RC physical address
+ * @dir: DMA transfer direction
+ *
+ * Function that uses dmaengine API to transfer data between PCIe EP and remote
+ * PCIe RC. The source and destination address can be a physical address given
+ * by pci_epc_mem_alloc_addr or the one obtained using DMA mapping APIs.
+ *
+ * The function returns '0' on success and negative value on failure.
+ */
+static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test,
+				      dma_addr_t dma_dst, dma_addr_t dma_src,
+				      size_t len, dma_addr_t dma_remote,
+				      enum dma_transfer_direction dir)
+{
+	struct dma_chan *chan = (dir == DMA_MEM_TO_DEV) ?
+				 epf_test->dma_chan_tx : epf_test->dma_chan_rx;
+	dma_addr_t dma_local = (dir == DMA_MEM_TO_DEV) ? dma_src : dma_dst;
+	enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+	struct pci_epf *epf = epf_test->epf;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_slave_config sconf = {};
+	struct device *dev = &epf->dev;
+	int ret;
+
+	if (IS_ERR_OR_NULL(chan)) {
+		dev_err(dev, "Invalid DMA memcpy channel\n");
+		return -EINVAL;
+	}
+
+	if (epf_test->dma_private) {
+		sconf.direction = dir;
+		if (dir == DMA_MEM_TO_DEV)
+			sconf.dst_addr = dma_remote;
+		else
+			sconf.src_addr = dma_remote;
+
+		if (dmaengine_slave_config(chan, &sconf)) {
+			dev_err(dev, "DMA slave config fail\n");
+			return -EIO;
+		}
+		tx = dmaengine_prep_slave_single(chan, dma_local, len, dir,
+						 flags);
+	} else {
+		tx = dmaengine_prep_dma_memcpy(chan, dma_dst, dma_src, len,
+					       flags);
+	}
+
+	if (!tx) {
+		dev_err(dev, "Failed to prepare DMA memcpy\n");
+		return -EIO;
+	}
+
+	reinit_completion(&epf_test->transfer_complete);
+	epf_test->transfer_chan = chan;
+	tx->callback = pci_epf_test_dma_callback;
+	tx->callback_param = epf_test;
+	epf_test->transfer_cookie = dmaengine_submit(tx);
+
+	ret = dma_submit_error(epf_test->transfer_cookie);
+	if (ret) {
+		dev_err(dev, "Failed to do DMA tx_submit %d\n", ret);
+		goto terminate;
+	}
+
+	dma_async_issue_pending(chan);
+	ret = wait_for_completion_interruptible(&epf_test->transfer_complete);
+	if (ret < 0) {
+		dev_err(dev, "DMA wait_for_completion interrupted\n");
+		goto terminate;
+	}
+
+	if (epf_test->transfer_status == DMA_ERROR) {
+		dev_err(dev, "DMA transfer failed\n");
+		ret = -EIO;
+	}
+
+terminate:
+	dmaengine_terminate_sync(chan);
+
+	return ret;
+}
+
+struct epf_dma_filter {
+	struct device *dev;
+	u32 dma_mask;
+};
+
+static bool epf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+	struct epf_dma_filter *filter = node;
+	struct dma_slave_caps caps;
+
+	memset(&caps, 0, sizeof(caps));
+	dma_get_slave_caps(chan, &caps);
+
+	return chan->device->dev == filter->dev
+		&& (filter->dma_mask & caps.directions);
+}
+
+/**
+ * pci_epf_test_init_dma_chan() - Function to initialize EPF test DMA channel
+ * @epf_test: the EPF test device that performs data transfer operation
+ *
+ * Function to initialize EPF test DMA channel.
+ */
+static int pci_epf_test_init_dma_chan(struct pci_epf_test *epf_test)
+{
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	struct epf_dma_filter filter;
+	struct dma_chan *dma_chan;
+	dma_cap_mask_t mask;
+	int ret;
+
+	filter.dev = epf->epc->dev.parent;
+	filter.dma_mask = BIT(DMA_DEV_TO_MEM);
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_chan = dma_request_channel(mask, epf_dma_filter_fn, &filter);
+	if (!dma_chan) {
+		dev_info(dev, "Failed to get private DMA rx channel. Falling back to generic one\n");
+		goto fail_back_tx;
+	}
+
+	epf_test->dma_chan_rx = dma_chan;
+
+	filter.dma_mask = BIT(DMA_MEM_TO_DEV);
+	dma_chan = dma_request_channel(mask, epf_dma_filter_fn, &filter);
+
+	if (!dma_chan) {
+		dev_info(dev, "Failed to get private DMA tx channel. Falling back to generic one\n");
+		goto fail_back_rx;
+	}
+
+	epf_test->dma_chan_tx = dma_chan;
+	epf_test->dma_private = true;
+
+	init_completion(&epf_test->transfer_complete);
+
+	return 0;
+
+fail_back_rx:
+	dma_release_channel(epf_test->dma_chan_rx);
+	epf_test->dma_chan_tx = NULL;
+
+fail_back_tx:
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	dma_chan = dma_request_chan_by_mask(&mask);
+	if (IS_ERR(dma_chan)) {
+		ret = PTR_ERR(dma_chan);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get DMA channel\n");
+		return ret;
+	}
+	init_completion(&epf_test->transfer_complete);
+
+	epf_test->dma_chan_tx = epf_test->dma_chan_rx = dma_chan;
+
+	return 0;
+}
+
+/**
+ * pci_epf_test_clean_dma_chan() - Function to cleanup EPF test DMA channel
+ * @epf_test: the EPF test device that performs data transfer operation
+ *
+ * Helper to cleanup EPF test DMA channel.
+ */
+static void pci_epf_test_clean_dma_chan(struct pci_epf_test *epf_test)
+{
+	if (!epf_test->dma_supported)
+		return;
+
+	dma_release_channel(epf_test->dma_chan_tx);
+	if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) {
+		epf_test->dma_chan_tx = NULL;
+		epf_test->dma_chan_rx = NULL;
+		return;
+	}
+
+	dma_release_channel(epf_test->dma_chan_rx);
+	epf_test->dma_chan_rx = NULL;
+
+	return;
+}
+
+static void pci_epf_test_print_rate(struct pci_epf_test *epf_test,
+				    const char *op, u64 size,
+				    struct timespec64 *start,
+				    struct timespec64 *end, bool dma)
+{
+	struct timespec64 ts = timespec64_sub(*end, *start);
+	u64 rate = 0, ns;
+
+	/* calculate the rate */
+	ns = timespec64_to_ns(&ts);
+	if (ns)
+		rate = div64_u64(size * NSEC_PER_SEC, ns * 1000);
+
+	dev_info(&epf_test->epf->dev,
+		 "%s => Size: %llu B, DMA: %s, Time: %llu.%09u s, Rate: %llu KB/s\n",
+		 op, size, dma ? "YES" : "NO",
+		 (u64)ts.tv_sec, (u32)ts.tv_nsec, rate);
+}
+
+static void pci_epf_test_copy(struct pci_epf_test *epf_test,
+			      struct pci_epf_test_reg *reg)
+{
+	int ret;
+	void __iomem *src_addr;
+	void __iomem *dst_addr;
+	phys_addr_t src_phys_addr;
+	phys_addr_t dst_phys_addr;
+	struct timespec64 start, end;
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	struct pci_epc *epc = epf->epc;
+
+	src_addr = pci_epc_mem_alloc_addr(epc, &src_phys_addr, reg->size);
+	if (!src_addr) {
+		dev_err(dev, "Failed to allocate source address\n");
+		reg->status = STATUS_SRC_ADDR_INVALID;
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr,
+			       reg->src_addr, reg->size);
+	if (ret) {
+		dev_err(dev, "Failed to map source address\n");
+		reg->status = STATUS_SRC_ADDR_INVALID;
+		goto err_src_addr;
+	}
+
+	dst_addr = pci_epc_mem_alloc_addr(epc, &dst_phys_addr, reg->size);
+	if (!dst_addr) {
+		dev_err(dev, "Failed to allocate destination address\n");
+		reg->status = STATUS_DST_ADDR_INVALID;
+		ret = -ENOMEM;
+		goto err_src_map_addr;
+	}
+
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr,
+			       reg->dst_addr, reg->size);
+	if (ret) {
+		dev_err(dev, "Failed to map destination address\n");
+		reg->status = STATUS_DST_ADDR_INVALID;
+		goto err_dst_addr;
+	}
+
+	ktime_get_ts64(&start);
+	if (reg->flags & FLAG_USE_DMA) {
+		if (epf_test->dma_private) {
+			dev_err(dev, "Cannot transfer data using DMA\n");
+			ret = -EINVAL;
+			goto err_map_addr;
+		}
+
+		ret = pci_epf_test_data_transfer(epf_test, dst_phys_addr,
+						 src_phys_addr, reg->size, 0,
+						 DMA_MEM_TO_MEM);
+		if (ret)
+			dev_err(dev, "Data transfer failed\n");
+	} else {
+		void *buf;
+
+		buf = kzalloc(reg->size, GFP_KERNEL);
+		if (!buf) {
+			ret = -ENOMEM;
+			goto err_map_addr;
+		}
+
+		memcpy_fromio(buf, src_addr, reg->size);
+		memcpy_toio(dst_addr, buf, reg->size);
+		kfree(buf);
+	}
+	ktime_get_ts64(&end);
+	pci_epf_test_print_rate(epf_test, "COPY", reg->size, &start, &end,
+				reg->flags & FLAG_USE_DMA);
+
+err_map_addr:
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr);
+
+err_dst_addr:
+	pci_epc_mem_free_addr(epc, dst_phys_addr, dst_addr, reg->size);
+
+err_src_map_addr:
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr);
+
+err_src_addr:
+	pci_epc_mem_free_addr(epc, src_phys_addr, src_addr, reg->size);
+
+err:
+	if (!ret)
+		reg->status |= STATUS_COPY_SUCCESS;
+	else
+		reg->status |= STATUS_COPY_FAIL;
+}
+
+static void pci_epf_test_read(struct pci_epf_test *epf_test,
+			      struct pci_epf_test_reg *reg)
+{
+	int ret;
+	void __iomem *src_addr;
+	void *buf;
+	u32 crc32;
+	phys_addr_t phys_addr;
+	phys_addr_t dst_phys_addr;
+	struct timespec64 start, end;
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	struct pci_epc *epc = epf->epc;
+	struct device *dma_dev = epf->epc->dev.parent;
+
+	src_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size);
+	if (!src_addr) {
+		dev_err(dev, "Failed to allocate address\n");
+		reg->status = STATUS_SRC_ADDR_INVALID;
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr,
+			       reg->src_addr, reg->size);
+	if (ret) {
+		dev_err(dev, "Failed to map address\n");
+		reg->status = STATUS_SRC_ADDR_INVALID;
+		goto err_addr;
+	}
+
+	buf = kzalloc(reg->size, GFP_KERNEL);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto err_map_addr;
+	}
+
+	if (reg->flags & FLAG_USE_DMA) {
+		dst_phys_addr = dma_map_single(dma_dev, buf, reg->size,
+					       DMA_FROM_DEVICE);
+		if (dma_mapping_error(dma_dev, dst_phys_addr)) {
+			dev_err(dev, "Failed to map destination buffer addr\n");
+			ret = -ENOMEM;
+			goto err_dma_map;
+		}
+
+		ktime_get_ts64(&start);
+		ret = pci_epf_test_data_transfer(epf_test, dst_phys_addr,
+						 phys_addr, reg->size,
+						 reg->src_addr, DMA_DEV_TO_MEM);
+		if (ret)
+			dev_err(dev, "Data transfer failed\n");
+		ktime_get_ts64(&end);
+
+		dma_unmap_single(dma_dev, dst_phys_addr, reg->size,
+				 DMA_FROM_DEVICE);
+	} else {
+		ktime_get_ts64(&start);
+		memcpy_fromio(buf, src_addr, reg->size);
+		ktime_get_ts64(&end);
+	}
+
+	pci_epf_test_print_rate(epf_test, "READ", reg->size, &start, &end,
+				reg->flags & FLAG_USE_DMA);
+
+	crc32 = crc32_le(~0, buf, reg->size);
+	if (crc32 != reg->checksum)
+		ret = -EIO;
+
+err_dma_map:
+	kfree(buf);
+
+err_map_addr:
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr);
+
+err_addr:
+	pci_epc_mem_free_addr(epc, phys_addr, src_addr, reg->size);
+
+err:
+	if (!ret)
+		reg->status |= STATUS_READ_SUCCESS;
+	else
+		reg->status |= STATUS_READ_FAIL;
+}
+
+static void pci_epf_test_write(struct pci_epf_test *epf_test,
+			       struct pci_epf_test_reg *reg)
+{
+	int ret;
+	void __iomem *dst_addr;
+	void *buf;
+	phys_addr_t phys_addr;
+	phys_addr_t src_phys_addr;
+	struct timespec64 start, end;
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	struct pci_epc *epc = epf->epc;
+	struct device *dma_dev = epf->epc->dev.parent;
+
+	dst_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size);
+	if (!dst_addr) {
+		dev_err(dev, "Failed to allocate address\n");
+		reg->status = STATUS_DST_ADDR_INVALID;
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr,
+			       reg->dst_addr, reg->size);
+	if (ret) {
+		dev_err(dev, "Failed to map address\n");
+		reg->status = STATUS_DST_ADDR_INVALID;
+		goto err_addr;
+	}
+
+	buf = kzalloc(reg->size, GFP_KERNEL);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto err_map_addr;
+	}
+
+	get_random_bytes(buf, reg->size);
+	reg->checksum = crc32_le(~0, buf, reg->size);
+
+	if (reg->flags & FLAG_USE_DMA) {
+		src_phys_addr = dma_map_single(dma_dev, buf, reg->size,
+					       DMA_TO_DEVICE);
+		if (dma_mapping_error(dma_dev, src_phys_addr)) {
+			dev_err(dev, "Failed to map source buffer addr\n");
+			ret = -ENOMEM;
+			goto err_dma_map;
+		}
+
+		ktime_get_ts64(&start);
+
+		ret = pci_epf_test_data_transfer(epf_test, phys_addr,
+						 src_phys_addr, reg->size,
+						 reg->dst_addr,
+						 DMA_MEM_TO_DEV);
+		if (ret)
+			dev_err(dev, "Data transfer failed\n");
+		ktime_get_ts64(&end);
+
+		dma_unmap_single(dma_dev, src_phys_addr, reg->size,
+				 DMA_TO_DEVICE);
+	} else {
+		ktime_get_ts64(&start);
+		memcpy_toio(dst_addr, buf, reg->size);
+		ktime_get_ts64(&end);
+	}
+
+	pci_epf_test_print_rate(epf_test, "WRITE", reg->size, &start, &end,
+				reg->flags & FLAG_USE_DMA);
+
+	/*
+	 * wait 1ms inorder for the write to complete. Without this delay L3
+	 * error in observed in the host system.
+	 */
+	usleep_range(1000, 2000);
+
+err_dma_map:
+	kfree(buf);
+
+err_map_addr:
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr);
+
+err_addr:
+	pci_epc_mem_free_addr(epc, phys_addr, dst_addr, reg->size);
+
+err:
+	if (!ret)
+		reg->status |= STATUS_WRITE_SUCCESS;
+	else
+		reg->status |= STATUS_WRITE_FAIL;
+}
+
+static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test,
+				   struct pci_epf_test_reg *reg)
+{
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	struct pci_epc *epc = epf->epc;
+	u32 status = reg->status | STATUS_IRQ_RAISED;
+	int count;
+
+	/*
+	 * Set the status before raising the IRQ to ensure that the host sees
+	 * the updated value when it gets the IRQ.
+	 */
+	WRITE_ONCE(reg->status, status);
+
+	switch (reg->irq_type) {
+	case IRQ_TYPE_LEGACY:
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_LEGACY, 0);
+		break;
+	case IRQ_TYPE_MSI:
+		count = pci_epc_get_msi(epc, epf->func_no, epf->vfunc_no);
+		if (reg->irq_number > count || count <= 0) {
+			dev_err(dev, "Invalid MSI IRQ number %d / %d\n",
+				reg->irq_number, count);
+			return;
+		}
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSI, reg->irq_number);
+		break;
+	case IRQ_TYPE_MSIX:
+		count = pci_epc_get_msix(epc, epf->func_no, epf->vfunc_no);
+		if (reg->irq_number > count || count <= 0) {
+			dev_err(dev, "Invalid MSIX IRQ number %d / %d\n",
+				reg->irq_number, count);
+			return;
+		}
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSIX, reg->irq_number);
+		break;
+	default:
+		dev_err(dev, "Failed to raise IRQ, unknown type\n");
+		break;
+	}
+}
+
+static void pci_epf_test_cmd_handler(struct work_struct *work)
+{
+	u32 command;
+	struct pci_epf_test *epf_test = container_of(work, struct pci_epf_test,
+						     cmd_handler.work);
+	struct pci_epf *epf = epf_test->epf;
+	struct device *dev = &epf->dev;
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
+
+	command = READ_ONCE(reg->command);
+	if (!command)
+		goto reset_handler;
+
+	WRITE_ONCE(reg->command, 0);
+	WRITE_ONCE(reg->status, 0);
+
+	if ((READ_ONCE(reg->flags) & FLAG_USE_DMA) &&
+	    !epf_test->dma_supported) {
+		dev_err(dev, "Cannot transfer data using DMA\n");
+		goto reset_handler;
+	}
+
+	if (reg->irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Failed to detect IRQ type\n");
+		goto reset_handler;
+	}
+
+	switch (command) {
+	case COMMAND_RAISE_LEGACY_IRQ:
+	case COMMAND_RAISE_MSI_IRQ:
+	case COMMAND_RAISE_MSIX_IRQ:
+		pci_epf_test_raise_irq(epf_test, reg);
+		break;
+	case COMMAND_WRITE:
+		pci_epf_test_write(epf_test, reg);
+		pci_epf_test_raise_irq(epf_test, reg);
+		break;
+	case COMMAND_READ:
+		pci_epf_test_read(epf_test, reg);
+		pci_epf_test_raise_irq(epf_test, reg);
+		break;
+	case COMMAND_COPY:
+		pci_epf_test_copy(epf_test, reg);
+		pci_epf_test_raise_irq(epf_test, reg);
+		break;
+	default:
+		dev_err(dev, "Invalid command 0x%x\n", command);
+		break;
+	}
+
+reset_handler:
+	queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler,
+			   msecs_to_jiffies(1));
+}
+
+static void pci_epf_test_unbind(struct pci_epf *epf)
+{
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	struct pci_epc *epc = epf->epc;
+	struct pci_epf_bar *epf_bar;
+	int bar;
+
+	cancel_delayed_work(&epf_test->cmd_handler);
+	pci_epf_test_clean_dma_chan(epf_test);
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		epf_bar = &epf->bar[bar];
+
+		if (epf_test->reg[bar]) {
+			pci_epc_clear_bar(epc, epf->func_no, epf->vfunc_no,
+					  epf_bar);
+			pci_epf_free_space(epf, epf_test->reg[bar], bar,
+					   PRIMARY_INTERFACE);
+		}
+	}
+}
+
+static int pci_epf_test_set_bar(struct pci_epf *epf)
+{
+	int bar, add;
+	int ret;
+	struct pci_epf_bar *epf_bar;
+	struct pci_epc *epc = epf->epc;
+	struct device *dev = &epf->dev;
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	const struct pci_epc_features *epc_features;
+
+	epc_features = epf_test->epc_features;
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar += add) {
+		epf_bar = &epf->bar[bar];
+		/*
+		 * pci_epc_set_bar() sets PCI_BASE_ADDRESS_MEM_TYPE_64
+		 * if the specific implementation required a 64-bit BAR,
+		 * even if we only requested a 32-bit BAR.
+		 */
+		add = (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ? 2 : 1;
+
+		if (!!(epc_features->reserved_bar & (1 << bar)))
+			continue;
+
+		ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no,
+				      epf_bar);
+		if (ret) {
+			pci_epf_free_space(epf, epf_test->reg[bar], bar,
+					   PRIMARY_INTERFACE);
+			dev_err(dev, "Failed to set BAR%d\n", bar);
+			if (bar == test_reg_bar)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int pci_epf_test_core_init(struct pci_epf *epf)
+{
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	struct pci_epf_header *header = epf->header;
+	const struct pci_epc_features *epc_features;
+	struct pci_epc *epc = epf->epc;
+	struct device *dev = &epf->dev;
+	bool msix_capable = false;
+	bool msi_capable = true;
+	int ret;
+
+	epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
+	if (epc_features) {
+		msix_capable = epc_features->msix_capable;
+		msi_capable = epc_features->msi_capable;
+	}
+
+	if (epf->vfunc_no <= 1) {
+		ret = pci_epc_write_header(epc, epf->func_no, epf->vfunc_no, header);
+		if (ret) {
+			dev_err(dev, "Configuration header write failed\n");
+			return ret;
+		}
+	}
+
+	ret = pci_epf_test_set_bar(epf);
+	if (ret)
+		return ret;
+
+	if (msi_capable) {
+		ret = pci_epc_set_msi(epc, epf->func_no, epf->vfunc_no,
+				      epf->msi_interrupts);
+		if (ret) {
+			dev_err(dev, "MSI configuration failed\n");
+			return ret;
+		}
+	}
+
+	if (msix_capable) {
+		ret = pci_epc_set_msix(epc, epf->func_no, epf->vfunc_no,
+				       epf->msix_interrupts,
+				       epf_test->test_reg_bar,
+				       epf_test->msix_table_offset);
+		if (ret) {
+			dev_err(dev, "MSI-X configuration failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int pci_epf_test_link_up(struct pci_epf *epf)
+{
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+
+	queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler,
+			   msecs_to_jiffies(1));
+
+	return 0;
+}
+
+static const struct pci_epc_event_ops pci_epf_test_event_ops = {
+	.core_init = pci_epf_test_core_init,
+	.link_up = pci_epf_test_link_up,
+};
+
+static int pci_epf_test_alloc_space(struct pci_epf *epf)
+{
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	struct device *dev = &epf->dev;
+	struct pci_epf_bar *epf_bar;
+	size_t msix_table_size = 0;
+	size_t test_reg_bar_size;
+	size_t pba_size = 0;
+	bool msix_capable;
+	void *base;
+	int bar, add;
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	const struct pci_epc_features *epc_features;
+	size_t test_reg_size;
+
+	epc_features = epf_test->epc_features;
+
+	test_reg_bar_size = ALIGN(sizeof(struct pci_epf_test_reg), 128);
+
+	msix_capable = epc_features->msix_capable;
+	if (msix_capable) {
+		msix_table_size = PCI_MSIX_ENTRY_SIZE * epf->msix_interrupts;
+		epf_test->msix_table_offset = test_reg_bar_size;
+		/* Align to QWORD or 8 Bytes */
+		pba_size = ALIGN(DIV_ROUND_UP(epf->msix_interrupts, 8), 8);
+	}
+	test_reg_size = test_reg_bar_size + msix_table_size + pba_size;
+
+	if (epc_features->bar_fixed_size[test_reg_bar]) {
+		if (test_reg_size > bar_size[test_reg_bar])
+			return -ENOMEM;
+		test_reg_size = bar_size[test_reg_bar];
+	}
+
+	base = pci_epf_alloc_space(epf, test_reg_size, test_reg_bar,
+				   epc_features->align, PRIMARY_INTERFACE);
+	if (!base) {
+		dev_err(dev, "Failed to allocated register space\n");
+		return -ENOMEM;
+	}
+	epf_test->reg[test_reg_bar] = base;
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar += add) {
+		epf_bar = &epf->bar[bar];
+		add = (epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ? 2 : 1;
+
+		if (bar == test_reg_bar)
+			continue;
+
+		if (!!(epc_features->reserved_bar & (1 << bar)))
+			continue;
+
+		base = pci_epf_alloc_space(epf, bar_size[bar], bar,
+					   epc_features->align,
+					   PRIMARY_INTERFACE);
+		if (!base)
+			dev_err(dev, "Failed to allocate space for BAR%d\n",
+				bar);
+		epf_test->reg[bar] = base;
+	}
+
+	return 0;
+}
+
+static void pci_epf_configure_bar(struct pci_epf *epf,
+				  const struct pci_epc_features *epc_features)
+{
+	struct pci_epf_bar *epf_bar;
+	bool bar_fixed_64bit;
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		epf_bar = &epf->bar[i];
+		bar_fixed_64bit = !!(epc_features->bar_fixed_64bit & (1 << i));
+		if (bar_fixed_64bit)
+			epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+		if (epc_features->bar_fixed_size[i])
+			bar_size[i] = epc_features->bar_fixed_size[i];
+	}
+}
+
+static int pci_epf_test_bind(struct pci_epf *epf)
+{
+	int ret;
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	const struct pci_epc_features *epc_features;
+	enum pci_barno test_reg_bar = BAR_0;
+	struct pci_epc *epc = epf->epc;
+	bool linkup_notifier = false;
+	bool core_init_notifier = false;
+
+	if (WARN_ON_ONCE(!epc))
+		return -EINVAL;
+
+	epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
+	if (!epc_features) {
+		dev_err(&epf->dev, "epc_features not implemented\n");
+		return -EOPNOTSUPP;
+	}
+
+	linkup_notifier = epc_features->linkup_notifier;
+	core_init_notifier = epc_features->core_init_notifier;
+	test_reg_bar = pci_epc_get_first_free_bar(epc_features);
+	if (test_reg_bar < 0)
+		return -EINVAL;
+	pci_epf_configure_bar(epf, epc_features);
+
+	epf_test->test_reg_bar = test_reg_bar;
+	epf_test->epc_features = epc_features;
+
+	ret = pci_epf_test_alloc_space(epf);
+	if (ret)
+		return ret;
+
+	if (!core_init_notifier) {
+		ret = pci_epf_test_core_init(epf);
+		if (ret)
+			return ret;
+	}
+
+	epf_test->dma_supported = true;
+
+	ret = pci_epf_test_init_dma_chan(epf_test);
+	if (ret)
+		epf_test->dma_supported = false;
+
+	if (!linkup_notifier && !core_init_notifier)
+		queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work);
+
+	return 0;
+}
+
+static const struct pci_epf_device_id pci_epf_test_ids[] = {
+	{
+		.name = "pci_epf_test",
+	},
+	{},
+};
+
+static int pci_epf_test_probe(struct pci_epf *epf,
+			      const struct pci_epf_device_id *id)
+{
+	struct pci_epf_test *epf_test;
+	struct device *dev = &epf->dev;
+
+	epf_test = devm_kzalloc(dev, sizeof(*epf_test), GFP_KERNEL);
+	if (!epf_test)
+		return -ENOMEM;
+
+	epf->header = &test_header;
+	epf_test->epf = epf;
+
+	INIT_DELAYED_WORK(&epf_test->cmd_handler, pci_epf_test_cmd_handler);
+
+	epf->event_ops = &pci_epf_test_event_ops;
+
+	epf_set_drvdata(epf, epf_test);
+	return 0;
+}
+
+static struct pci_epf_ops ops = {
+	.unbind	= pci_epf_test_unbind,
+	.bind	= pci_epf_test_bind,
+};
+
+static struct pci_epf_driver test_driver = {
+	.driver.name	= "pci_epf_test",
+	.probe		= pci_epf_test_probe,
+	.id_table	= pci_epf_test_ids,
+	.ops		= &ops,
+	.owner		= THIS_MODULE,
+};
+
+static int __init pci_epf_test_init(void)
+{
+	int ret;
+
+	kpcitest_workqueue = alloc_workqueue("kpcitest",
+					     WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+	if (!kpcitest_workqueue) {
+		pr_err("Failed to allocate the kpcitest work queue\n");
+		return -ENOMEM;
+	}
+
+	ret = pci_epf_register_driver(&test_driver);
+	if (ret) {
+		destroy_workqueue(kpcitest_workqueue);
+		pr_err("Failed to register pci epf test driver --> %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(pci_epf_test_init);
+
+static void __exit pci_epf_test_exit(void)
+{
+	if (kpcitest_workqueue)
+		destroy_workqueue(kpcitest_workqueue);
+	pci_epf_unregister_driver(&test_driver);
+}
+module_exit(pci_epf_test_exit);
+
+MODULE_DESCRIPTION("PCI EPF TEST DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
new file mode 100644
index 000000000..3f6012856
--- /dev/null
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -0,0 +1,1468 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Endpoint Function Driver to implement Non-Transparent Bridge functionality
+ * Between PCI RC and EP
+ *
+ * Copyright (C) 2020 Texas Instruments
+ * Copyright (C) 2022 NXP
+ *
+ * Based on pci-epf-ntb.c
+ * Author: Frank Li <Frank.Li@nxp.com>
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+/*
+ * +------------+         +---------------------------------------+
+ * |            |         |                                       |
+ * +------------+         |                        +--------------+
+ * | NTB        |         |                        | NTB          |
+ * | NetDev     |         |                        | NetDev       |
+ * +------------+         |                        +--------------+
+ * | NTB        |         |                        | NTB          |
+ * | Transfer   |         |                        | Transfer     |
+ * +------------+         |                        +--------------+
+ * |            |         |                        |              |
+ * |  PCI NTB   |         |                        |              |
+ * |    EPF     |         |                        |              |
+ * |   Driver   |         |                        | PCI Virtual  |
+ * |            |         +---------------+        | NTB Driver   |
+ * |            |         | PCI EP NTB    |<------>|              |
+ * |            |         |  FN Driver    |        |              |
+ * +------------+         +---------------+        +--------------+
+ * |            |         |               |        |              |
+ * |  PCI Bus   | <-----> |  PCI EP Bus   |        |  Virtual PCI |
+ * |            |  PCI    |               |        |     Bus      |
+ * +------------+         +---------------+--------+--------------+
+ * PCIe Root Port                        PCI EP
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/ntb.h>
+
+static struct workqueue_struct *kpcintb_workqueue;
+
+#define COMMAND_CONFIGURE_DOORBELL	1
+#define COMMAND_TEARDOWN_DOORBELL	2
+#define COMMAND_CONFIGURE_MW		3
+#define COMMAND_TEARDOWN_MW		4
+#define COMMAND_LINK_UP			5
+#define COMMAND_LINK_DOWN		6
+
+#define COMMAND_STATUS_OK		1
+#define COMMAND_STATUS_ERROR		2
+
+#define LINK_STATUS_UP			BIT(0)
+
+#define SPAD_COUNT			64
+#define DB_COUNT			4
+#define NTB_MW_OFFSET			2
+#define DB_COUNT_MASK			GENMASK(15, 0)
+#define MSIX_ENABLE			BIT(16)
+#define MAX_DB_COUNT			32
+#define MAX_MW				4
+
+enum epf_ntb_bar {
+	BAR_CONFIG,
+	BAR_DB,
+	BAR_MW0,
+	BAR_MW1,
+	BAR_MW2,
+};
+
+/*
+ * +--------------------------------------------------+ Base
+ * |                                                  |
+ * |                                                  |
+ * |                                                  |
+ * |          Common Control Register                 |
+ * |                                                  |
+ * |                                                  |
+ * |                                                  |
+ * +-----------------------+--------------------------+ Base+spad_offset
+ * |                       |                          |
+ * |    Peer Spad Space    |    Spad Space            |
+ * |                       |                          |
+ * |                       |                          |
+ * +-----------------------+--------------------------+ Base+spad_offset
+ * |                       |                          |     +spad_count * 4
+ * |                       |                          |
+ * |     Spad Space        |   Peer Spad Space        |
+ * |                       |                          |
+ * +-----------------------+--------------------------+
+ *       Virtual PCI             PCIe Endpoint
+ *       NTB Driver               NTB Driver
+ */
+struct epf_ntb_ctrl {
+	u32 command;
+	u32 argument;
+	u16 command_status;
+	u16 link_status;
+	u32 topology;
+	u64 addr;
+	u64 size;
+	u32 num_mws;
+	u32 reserved;
+	u32 spad_offset;
+	u32 spad_count;
+	u32 db_entry_size;
+	u32 db_data[MAX_DB_COUNT];
+	u32 db_offset[MAX_DB_COUNT];
+} __packed;
+
+struct epf_ntb {
+	struct ntb_dev ntb;
+	struct pci_epf *epf;
+	struct config_group group;
+
+	u32 num_mws;
+	u32 db_count;
+	u32 spad_count;
+	u64 mws_size[MAX_MW];
+	u64 db;
+	u32 vbus_number;
+	u16 vntb_pid;
+	u16 vntb_vid;
+
+	bool linkup;
+	u32 spad_size;
+
+	enum pci_barno epf_ntb_bar[6];
+
+	struct epf_ntb_ctrl *reg;
+
+	u32 *epf_db;
+
+	phys_addr_t vpci_mw_phy[MAX_MW];
+	void __iomem *vpci_mw_addr[MAX_MW];
+
+	struct delayed_work cmd_handler;
+};
+
+#define to_epf_ntb(epf_group) container_of((epf_group), struct epf_ntb, group)
+#define ntb_ndev(__ntb) container_of(__ntb, struct epf_ntb, ntb)
+
+static struct pci_epf_header epf_ntb_header = {
+	.vendorid	= PCI_ANY_ID,
+	.deviceid	= PCI_ANY_ID,
+	.baseclass_code	= PCI_BASE_CLASS_MEMORY,
+	.interrupt_pin	= PCI_INTERRUPT_INTA,
+};
+
+/**
+ * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host (VHOST)
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ * @link_up: true or false indicating Link is UP or Down
+ *
+ * Once NTB function in HOST invoke ntb_link_enable(),
+ * this NTB function driver will trigger a link event to VHOST.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
+{
+	if (link_up)
+		ntb->reg->link_status |= LINK_STATUS_UP;
+	else
+		ntb->reg->link_status &= ~LINK_STATUS_UP;
+
+	ntb_link_event(&ntb->ntb);
+	return 0;
+}
+
+/**
+ * epf_ntb_configure_mw() - Configure the Outbound Address Space for VHOST
+ *   to access the memory window of HOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ *                          EP Outbound Window
+ * +--------+              +-----------+
+ * |        |              |           |
+ * |        |              |           |
+ * |        |              |           |
+ * |        |              |           |
+ * |        |              +-----------+
+ * | Virtual|              | Memory Win|
+ * | NTB    | -----------> |           |
+ * | Driver |              |           |
+ * |        |              +-----------+
+ * |        |              |           |
+ * |        |              |           |
+ * +--------+              +-----------+
+ *  VHOST                   PCI EP
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw)
+{
+	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
+	u64 addr, size;
+	int ret = 0;
+
+	phys_addr = ntb->vpci_mw_phy[mw];
+	addr = ntb->reg->addr;
+	size = ntb->reg->size;
+
+	func_no = ntb->epf->func_no;
+	vfunc_no = ntb->epf->vfunc_no;
+
+	ret = pci_epc_map_addr(ntb->epf->epc, func_no, vfunc_no, phys_addr, addr, size);
+	if (ret)
+		dev_err(&ntb->epf->epc->dev,
+			"Failed to map memory window %d address\n", mw);
+	return ret;
+}
+
+/**
+ * epf_ntb_teardown_mw() - Teardown the configured OB ATU
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using
+ * pci_epc_unmap_addr()
+ */
+static void epf_ntb_teardown_mw(struct epf_ntb *ntb, u32 mw)
+{
+	pci_epc_unmap_addr(ntb->epf->epc,
+			   ntb->epf->func_no,
+			   ntb->epf->vfunc_no,
+			   ntb->vpci_mw_phy[mw]);
+}
+
+/**
+ * epf_ntb_cmd_handler() - Handle commands provided by the NTB HOST
+ * @work: work_struct for the epf_ntb_epc
+ *
+ * Workqueue function that gets invoked for the two epf_ntb_epc
+ * periodically (once every 5ms) to see if it has received any commands
+ * from NTB HOST. The HOST can send commands to configure doorbell or
+ * configure memory window or to update link status.
+ */
+static void epf_ntb_cmd_handler(struct work_struct *work)
+{
+	struct epf_ntb_ctrl *ctrl;
+	u32 command, argument;
+	struct epf_ntb *ntb;
+	struct device *dev;
+	int ret;
+	int i;
+
+	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
+
+	for (i = 1; i < ntb->db_count; i++) {
+		if (ntb->epf_db[i]) {
+			ntb->db |= 1 << (i - 1);
+			ntb_db_event(&ntb->ntb, i);
+			ntb->epf_db[i] = 0;
+		}
+	}
+
+	ctrl = ntb->reg;
+	command = ctrl->command;
+	if (!command)
+		goto reset_handler;
+	argument = ctrl->argument;
+
+	ctrl->command = 0;
+	ctrl->argument = 0;
+
+	ctrl = ntb->reg;
+	dev = &ntb->epf->dev;
+
+	switch (command) {
+	case COMMAND_CONFIGURE_DOORBELL:
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_TEARDOWN_DOORBELL:
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_CONFIGURE_MW:
+		ret = epf_ntb_configure_mw(ntb, argument);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_TEARDOWN_MW:
+		epf_ntb_teardown_mw(ntb, argument);
+		ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	case COMMAND_LINK_UP:
+		ntb->linkup = true;
+		ret = epf_ntb_link_up(ntb, true);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		goto reset_handler;
+	case COMMAND_LINK_DOWN:
+		ntb->linkup = false;
+		ret = epf_ntb_link_up(ntb, false);
+		if (ret < 0)
+			ctrl->command_status = COMMAND_STATUS_ERROR;
+		else
+			ctrl->command_status = COMMAND_STATUS_OK;
+		break;
+	default:
+		dev_err(dev, "UNKNOWN command: %d\n", command);
+		break;
+	}
+
+reset_handler:
+	queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler,
+			   msecs_to_jiffies(5));
+}
+
+/**
+ * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR
+ * @ntb: EPC associated with one of the HOST which holds peer's outbound
+ *	 address.
+ *
+ * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * self scratchpad region (removes inbound ATU configuration). While BAR0 is
+ * the default self scratchpad BAR, an NTB could have other BARs for self
+ * scratchpad (because of reserved BARs). This function can get the exact BAR
+ * used for self scratchpad from epf_ntb_bar[BAR_CONFIG].
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR. Also note VHOST's peer
+ * scratchpad is HOST's self scratchpad.
+ *
+ * Returns: void
+ */
+static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb)
+{
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+
+	barno = ntb->epf_ntb_bar[BAR_CONFIG];
+	epf_bar = &ntb->epf->bar[barno];
+
+	pci_epc_clear_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, epf_bar);
+}
+
+/**
+ * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Map BAR0 of EP CONTROLLER which contains the VHOST's config and
+ * self scratchpad region.
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb)
+{
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+	u8 func_no, vfunc_no;
+	struct device *dev;
+	int ret;
+
+	dev = &ntb->epf->dev;
+	func_no = ntb->epf->func_no;
+	vfunc_no = ntb->epf->vfunc_no;
+	barno = ntb->epf_ntb_bar[BAR_CONFIG];
+	epf_bar = &ntb->epf->bar[barno];
+
+	ret = pci_epc_set_bar(ntb->epf->epc, func_no, vfunc_no, epf_bar);
+	if (ret) {
+		dev_err(dev, "inft: Config/Status/SPAD BAR set failed\n");
+		return ret;
+	}
+	return 0;
+}
+
+/**
+ * epf_ntb_config_spad_bar_free() - Free the physical memory associated with
+ *   config + scratchpad region
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ */
+static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb)
+{
+	enum pci_barno barno;
+
+	barno = ntb->epf_ntb_bar[BAR_CONFIG];
+	pci_epf_free_space(ntb->epf, ntb->reg, barno, 0);
+}
+
+/**
+ * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad
+ *   region
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Allocate the Local Memory mentioned in the above diagram. The size of
+ * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION
+ * is obtained from "spad-count" configfs entry.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb)
+{
+	size_t align;
+	enum pci_barno barno;
+	struct epf_ntb_ctrl *ctrl;
+	u32 spad_size, ctrl_size;
+	u64 size;
+	struct pci_epf *epf = ntb->epf;
+	struct device *dev = &epf->dev;
+	u32 spad_count;
+	void *base;
+	int i;
+	const struct pci_epc_features *epc_features = pci_epc_get_features(epf->epc,
+								epf->func_no,
+								epf->vfunc_no);
+	barno = ntb->epf_ntb_bar[BAR_CONFIG];
+	size = epc_features->bar_fixed_size[barno];
+	align = epc_features->align;
+
+	if ((!IS_ALIGNED(size, align)))
+		return -EINVAL;
+
+	spad_count = ntb->spad_count;
+
+	ctrl_size = sizeof(struct epf_ntb_ctrl);
+	spad_size = 2 * spad_count * sizeof(u32);
+
+	if (!align) {
+		ctrl_size = roundup_pow_of_two(ctrl_size);
+		spad_size = roundup_pow_of_two(spad_size);
+	} else {
+		ctrl_size = ALIGN(ctrl_size, align);
+		spad_size = ALIGN(spad_size, align);
+	}
+
+	if (!size)
+		size = ctrl_size + spad_size;
+	else if (size < ctrl_size + spad_size)
+		return -EINVAL;
+
+	base = pci_epf_alloc_space(epf, size, barno, align, 0);
+	if (!base) {
+		dev_err(dev, "Config/Status/SPAD alloc region fail\n");
+		return -ENOMEM;
+	}
+
+	ntb->reg = base;
+
+	ctrl = ntb->reg;
+	ctrl->spad_offset = ctrl_size;
+
+	ctrl->spad_count = spad_count;
+	ctrl->num_mws = ntb->num_mws;
+	ntb->spad_size = spad_size;
+
+	ctrl->db_entry_size = sizeof(u32);
+
+	for (i = 0; i < ntb->db_count; i++) {
+		ntb->reg->db_data[i] = 1 + i;
+		ntb->reg->db_offset[i] = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Configure MSI/MSI-X capability for each interface with number of
+ * interrupts equal to "db_count" configfs entry.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
+{
+	const struct pci_epc_features *epc_features;
+	struct device *dev;
+	u32 db_count;
+	int ret;
+
+	dev = &ntb->epf->dev;
+
+	epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no);
+
+	if (!(epc_features->msix_capable || epc_features->msi_capable)) {
+		dev_err(dev, "MSI or MSI-X is required for doorbell\n");
+		return -EINVAL;
+	}
+
+	db_count = ntb->db_count;
+	if (db_count > MAX_DB_COUNT) {
+		dev_err(dev, "DB count cannot be more than %d\n", MAX_DB_COUNT);
+		return -EINVAL;
+	}
+
+	ntb->db_count = db_count;
+
+	if (epc_features->msi_capable) {
+		ret = pci_epc_set_msi(ntb->epf->epc,
+				      ntb->epf->func_no,
+				      ntb->epf->vfunc_no,
+				      16);
+		if (ret) {
+			dev_err(dev, "MSI configuration failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_db_bar_init() - Configure Doorbell window BARs
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_db_bar_init(struct epf_ntb *ntb)
+{
+	const struct pci_epc_features *epc_features;
+	u32 align;
+	struct device *dev = &ntb->epf->dev;
+	int ret;
+	struct pci_epf_bar *epf_bar;
+	void __iomem *mw_addr;
+	enum pci_barno barno;
+	size_t size = sizeof(u32) * ntb->db_count;
+
+	epc_features = pci_epc_get_features(ntb->epf->epc,
+					    ntb->epf->func_no,
+					    ntb->epf->vfunc_no);
+	align = epc_features->align;
+
+	if (size < 128)
+		size = 128;
+
+	if (align)
+		size = ALIGN(size, align);
+	else
+		size = roundup_pow_of_two(size);
+
+	barno = ntb->epf_ntb_bar[BAR_DB];
+
+	mw_addr = pci_epf_alloc_space(ntb->epf, size, barno, align, 0);
+	if (!mw_addr) {
+		dev_err(dev, "Failed to allocate OB address\n");
+		return -ENOMEM;
+	}
+
+	ntb->epf_db = mw_addr;
+
+	epf_bar = &ntb->epf->bar[barno];
+
+	ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, epf_bar);
+	if (ret) {
+		dev_err(dev, "Doorbell BAR set failed\n");
+			goto err_alloc_peer_mem;
+	}
+	return ret;
+
+err_alloc_peer_mem:
+	pci_epf_free_space(ntb->epf, mw_addr, barno, 0);
+	return -1;
+}
+
+static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws);
+
+/**
+ * epf_ntb_db_bar_clear() - Clear doorbell BAR and free memory
+ *   allocated in peer's outbound address space
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ */
+static void epf_ntb_db_bar_clear(struct epf_ntb *ntb)
+{
+	enum pci_barno barno;
+
+	barno = ntb->epf_ntb_bar[BAR_DB];
+	pci_epf_free_space(ntb->epf, ntb->epf_db, barno, 0);
+	pci_epc_clear_bar(ntb->epf->epc,
+			  ntb->epf->func_no,
+			  ntb->epf->vfunc_no,
+			  &ntb->epf->bar[barno]);
+}
+
+/**
+ * epf_ntb_mw_bar_init() - Configure Memory window BARs
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_mw_bar_init(struct epf_ntb *ntb)
+{
+	int ret = 0;
+	int i;
+	u64 size;
+	enum pci_barno barno;
+	struct device *dev = &ntb->epf->dev;
+
+	for (i = 0; i < ntb->num_mws; i++) {
+		size = ntb->mws_size[i];
+		barno = ntb->epf_ntb_bar[BAR_MW0 + i];
+
+		ntb->epf->bar[barno].barno = barno;
+		ntb->epf->bar[barno].size = size;
+		ntb->epf->bar[barno].addr = NULL;
+		ntb->epf->bar[barno].phys_addr = 0;
+		ntb->epf->bar[barno].flags |= upper_32_bits(size) ?
+				PCI_BASE_ADDRESS_MEM_TYPE_64 :
+				PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+		ret = pci_epc_set_bar(ntb->epf->epc,
+				      ntb->epf->func_no,
+				      ntb->epf->vfunc_no,
+				      &ntb->epf->bar[barno]);
+		if (ret) {
+			dev_err(dev, "MW set failed\n");
+			goto err_alloc_mem;
+		}
+
+		/* Allocate EPC outbound memory windows to vpci vntb device */
+		ntb->vpci_mw_addr[i] = pci_epc_mem_alloc_addr(ntb->epf->epc,
+							      &ntb->vpci_mw_phy[i],
+							      size);
+		if (!ntb->vpci_mw_addr[i]) {
+			ret = -ENOMEM;
+			dev_err(dev, "Failed to allocate source address\n");
+			goto err_set_bar;
+		}
+	}
+
+	return ret;
+
+err_set_bar:
+	pci_epc_clear_bar(ntb->epf->epc,
+			  ntb->epf->func_no,
+			  ntb->epf->vfunc_no,
+			  &ntb->epf->bar[barno]);
+err_alloc_mem:
+	epf_ntb_mw_bar_clear(ntb, i);
+	return ret;
+}
+
+/**
+ * epf_ntb_mw_bar_clear() - Clear Memory window BARs
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ * @num_mws: the number of Memory window BARs that to be cleared
+ */
+static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws)
+{
+	enum pci_barno barno;
+	int i;
+
+	for (i = 0; i < num_mws; i++) {
+		barno = ntb->epf_ntb_bar[BAR_MW0 + i];
+		pci_epc_clear_bar(ntb->epf->epc,
+				  ntb->epf->func_no,
+				  ntb->epf->vfunc_no,
+				  &ntb->epf->bar[barno]);
+
+		pci_epc_mem_free_addr(ntb->epf->epc,
+				      ntb->vpci_mw_phy[i],
+				      ntb->vpci_mw_addr[i],
+				      ntb->mws_size[i]);
+	}
+}
+
+/**
+ * epf_ntb_epc_destroy() - Cleanup NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces
+ */
+static void epf_ntb_epc_destroy(struct epf_ntb *ntb)
+{
+	pci_epc_remove_epf(ntb->epf->epc, ntb->epf, 0);
+	pci_epc_put(ntb->epf->epc);
+}
+
+/**
+ * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB
+ * constructs (scratchpad region, doorbell, memorywindow)
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_init_epc_bar(struct epf_ntb *ntb)
+{
+	const struct pci_epc_features *epc_features;
+	enum pci_barno barno;
+	enum epf_ntb_bar bar;
+	struct device *dev;
+	u32 num_mws;
+	int i;
+
+	barno = BAR_0;
+	num_mws = ntb->num_mws;
+	dev = &ntb->epf->dev;
+	epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no);
+
+	/* These are required BARs which are mandatory for NTB functionality */
+	for (bar = BAR_CONFIG; bar <= BAR_MW0; bar++, barno++) {
+		barno = pci_epc_get_next_free_bar(epc_features, barno);
+		if (barno < 0) {
+			dev_err(dev, "Fail to get NTB function BAR\n");
+			return barno;
+		}
+		ntb->epf_ntb_bar[bar] = barno;
+	}
+
+	/* These are optional BARs which don't impact NTB functionality */
+	for (bar = BAR_MW1, i = 1; i < num_mws; bar++, barno++, i++) {
+		barno = pci_epc_get_next_free_bar(epc_features, barno);
+		if (barno < 0) {
+			ntb->num_mws = i;
+			dev_dbg(dev, "BAR not available for > MW%d\n", i + 1);
+		}
+		ntb->epf_ntb_bar[bar] = barno;
+	}
+
+	return 0;
+}
+
+/**
+ * epf_ntb_epc_init() - Initialize NTB interface
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Wrapper to initialize a particular EPC interface and start the workqueue
+ * to check for commands from HOST. This function will write to the
+ * EP controller HW for configuring it.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_epc_init(struct epf_ntb *ntb)
+{
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+	struct device *dev;
+	int ret;
+
+	epf = ntb->epf;
+	dev = &epf->dev;
+	epc = epf->epc;
+	func_no = ntb->epf->func_no;
+	vfunc_no = ntb->epf->vfunc_no;
+
+	ret = epf_ntb_config_sspad_bar_set(ntb);
+	if (ret) {
+		dev_err(dev, "Config/self SPAD BAR init failed");
+		return ret;
+	}
+
+	ret = epf_ntb_configure_interrupt(ntb);
+	if (ret) {
+		dev_err(dev, "Interrupt configuration failed\n");
+		goto err_config_interrupt;
+	}
+
+	ret = epf_ntb_db_bar_init(ntb);
+	if (ret) {
+		dev_err(dev, "DB BAR init failed\n");
+		goto err_db_bar_init;
+	}
+
+	ret = epf_ntb_mw_bar_init(ntb);
+	if (ret) {
+		dev_err(dev, "MW BAR init failed\n");
+		goto err_mw_bar_init;
+	}
+
+	if (vfunc_no <= 1) {
+		ret = pci_epc_write_header(epc, func_no, vfunc_no, epf->header);
+		if (ret) {
+			dev_err(dev, "Configuration header write failed\n");
+			goto err_write_header;
+		}
+	}
+
+	INIT_DELAYED_WORK(&ntb->cmd_handler, epf_ntb_cmd_handler);
+	queue_work(kpcintb_workqueue, &ntb->cmd_handler.work);
+
+	return 0;
+
+err_write_header:
+	epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
+err_mw_bar_init:
+	epf_ntb_db_bar_clear(ntb);
+err_db_bar_init:
+err_config_interrupt:
+	epf_ntb_config_sspad_bar_clear(ntb);
+
+	return ret;
+}
+
+
+/**
+ * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Wrapper to cleanup all NTB interfaces.
+ */
+static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
+{
+	epf_ntb_db_bar_clear(ntb);
+	epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
+}
+
+#define EPF_NTB_R(_name)						\
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,		\
+				      char *page)			\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+									\
+	return sprintf(page, "%d\n", ntb->_name);			\
+}
+
+#define EPF_NTB_W(_name)						\
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,	\
+				       const char *page, size_t len)	\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	u32 val;							\
+	int ret;							\
+									\
+	ret = kstrtou32(page, 0, &val);					\
+	if (ret)							\
+		return ret;						\
+									\
+	ntb->_name = val;						\
+									\
+	return len;							\
+}
+
+#define EPF_NTB_MW_R(_name)						\
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,		\
+				      char *page)			\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	struct device *dev = &ntb->epf->dev;				\
+	int win_no;							\
+									\
+	if (sscanf(#_name, "mw%d", &win_no) != 1)			\
+		return -EINVAL;						\
+									\
+	if (win_no <= 0 || win_no > ntb->num_mws) {			\
+		dev_err(dev, "Invalid num_nws: %d value\n", ntb->num_mws); \
+		return -EINVAL;						\
+	}								\
+									\
+	return sprintf(page, "%lld\n", ntb->mws_size[win_no - 1]);	\
+}
+
+#define EPF_NTB_MW_W(_name)						\
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,	\
+				       const char *page, size_t len)	\
+{									\
+	struct config_group *group = to_config_group(item);		\
+	struct epf_ntb *ntb = to_epf_ntb(group);			\
+	struct device *dev = &ntb->epf->dev;				\
+	int win_no;							\
+	u64 val;							\
+	int ret;							\
+									\
+	ret = kstrtou64(page, 0, &val);					\
+	if (ret)							\
+		return ret;						\
+									\
+	if (sscanf(#_name, "mw%d", &win_no) != 1)			\
+		return -EINVAL;						\
+									\
+	if (win_no <= 0 || win_no > ntb->num_mws) {			\
+		dev_err(dev, "Invalid num_nws: %d value\n", ntb->num_mws); \
+		return -EINVAL;						\
+	}								\
+									\
+	ntb->mws_size[win_no - 1] = val;				\
+									\
+	return len;							\
+}
+
+static ssize_t epf_ntb_num_mws_store(struct config_item *item,
+				     const char *page, size_t len)
+{
+	struct config_group *group = to_config_group(item);
+	struct epf_ntb *ntb = to_epf_ntb(group);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(page, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val > MAX_MW)
+		return -EINVAL;
+
+	ntb->num_mws = val;
+
+	return len;
+}
+
+EPF_NTB_R(spad_count)
+EPF_NTB_W(spad_count)
+EPF_NTB_R(db_count)
+EPF_NTB_W(db_count)
+EPF_NTB_R(num_mws)
+EPF_NTB_R(vbus_number)
+EPF_NTB_W(vbus_number)
+EPF_NTB_R(vntb_pid)
+EPF_NTB_W(vntb_pid)
+EPF_NTB_R(vntb_vid)
+EPF_NTB_W(vntb_vid)
+EPF_NTB_MW_R(mw1)
+EPF_NTB_MW_W(mw1)
+EPF_NTB_MW_R(mw2)
+EPF_NTB_MW_W(mw2)
+EPF_NTB_MW_R(mw3)
+EPF_NTB_MW_W(mw3)
+EPF_NTB_MW_R(mw4)
+EPF_NTB_MW_W(mw4)
+
+CONFIGFS_ATTR(epf_ntb_, spad_count);
+CONFIGFS_ATTR(epf_ntb_, db_count);
+CONFIGFS_ATTR(epf_ntb_, num_mws);
+CONFIGFS_ATTR(epf_ntb_, mw1);
+CONFIGFS_ATTR(epf_ntb_, mw2);
+CONFIGFS_ATTR(epf_ntb_, mw3);
+CONFIGFS_ATTR(epf_ntb_, mw4);
+CONFIGFS_ATTR(epf_ntb_, vbus_number);
+CONFIGFS_ATTR(epf_ntb_, vntb_pid);
+CONFIGFS_ATTR(epf_ntb_, vntb_vid);
+
+static struct configfs_attribute *epf_ntb_attrs[] = {
+	&epf_ntb_attr_spad_count,
+	&epf_ntb_attr_db_count,
+	&epf_ntb_attr_num_mws,
+	&epf_ntb_attr_mw1,
+	&epf_ntb_attr_mw2,
+	&epf_ntb_attr_mw3,
+	&epf_ntb_attr_mw4,
+	&epf_ntb_attr_vbus_number,
+	&epf_ntb_attr_vntb_pid,
+	&epf_ntb_attr_vntb_vid,
+	NULL,
+};
+
+static const struct config_item_type ntb_group_type = {
+	.ct_attrs	= epf_ntb_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/**
+ * epf_ntb_add_cfs() - Add configfs directory specific to NTB
+ * @epf: NTB endpoint function device
+ * @group: A pointer to the config_group structure referencing a group of
+ *	   config_items of a specific type that belong to a specific sub-system.
+ *
+ * Add configfs directory specific to NTB. This directory will hold
+ * NTB specific properties like db_count, spad_count, num_mws etc.,
+ *
+ * Returns: Pointer to config_group
+ */
+static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf,
+					    struct config_group *group)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+	struct config_group *ntb_group = &ntb->group;
+	struct device *dev = &epf->dev;
+
+	config_group_init_type_name(ntb_group, dev_name(dev), &ntb_group_type);
+
+	return ntb_group;
+}
+
+/*==== virtual PCI bus driver, which only load virtual NTB PCI driver ====*/
+
+static u32 pci_space[] = {
+	0xffffffff,	/* Device ID, Vendor ID */
+	0,		/* Status, Command */
+	0xffffffff,	/* Base Class, Subclass, Prog Intf, Revision ID */
+	0x40,		/* BIST, Header Type, Latency Timer, Cache Line Size */
+	0,		/* BAR 0 */
+	0,		/* BAR 1 */
+	0,		/* BAR 2 */
+	0,		/* BAR 3 */
+	0,		/* BAR 4 */
+	0,		/* BAR 5 */
+	0,		/* Cardbus CIS Pointer */
+	0,		/* Subsystem ID, Subsystem Vendor ID */
+	0,		/* ROM Base Address */
+	0,		/* Reserved, Capabilities Pointer */
+	0,		/* Reserved */
+	0,		/* Max_Lat, Min_Gnt, Interrupt Pin, Interrupt Line */
+};
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val)
+{
+	if (devfn == 0) {
+		memcpy(val, ((u8 *)pci_space) + where, size);
+		return PCIBIOS_SUCCESSFUL;
+	}
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val)
+{
+	return 0;
+}
+
+static struct pci_ops vpci_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+static int vpci_scan_bus(void *sysdata)
+{
+	struct pci_bus *vpci_bus;
+	struct epf_ntb *ndev = sysdata;
+
+	vpci_bus = pci_scan_bus(ndev->vbus_number, &vpci_ops, sysdata);
+	if (vpci_bus)
+		pr_err("create pci bus\n");
+
+	pci_bus_add_devices(vpci_bus);
+
+	return 0;
+}
+
+/*==================== Virtual PCIe NTB driver ==========================*/
+
+static int vntb_epf_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	struct epf_ntb *ndev = ntb_ndev(ntb);
+
+	return ndev->num_mws;
+}
+
+static int vntb_epf_spad_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->spad_count;
+}
+
+static int vntb_epf_peer_mw_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->num_mws;
+}
+
+static u64 vntb_epf_db_valid_mask(struct ntb_dev *ntb)
+{
+	return BIT_ULL(ntb_ndev(ntb)->db_count) - 1;
+}
+
+static int vntb_epf_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	return 0;
+}
+
+static int vntb_epf_mw_set_trans(struct ntb_dev *ndev, int pidx, int idx,
+		dma_addr_t addr, resource_size_t size)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	struct pci_epf_bar *epf_bar;
+	enum pci_barno barno;
+	int ret;
+	struct device *dev;
+
+	dev = &ntb->ntb.dev;
+	barno = ntb->epf_ntb_bar[BAR_MW0 + idx];
+	epf_bar = &ntb->epf->bar[barno];
+	epf_bar->phys_addr = addr;
+	epf_bar->barno = barno;
+	epf_bar->size = size;
+
+	ret = pci_epc_set_bar(ntb->epf->epc, 0, 0, epf_bar);
+	if (ret) {
+		dev_err(dev, "failure set mw trans\n");
+		return ret;
+	}
+	return 0;
+}
+
+static int vntb_epf_mw_clear_trans(struct ntb_dev *ntb, int pidx, int idx)
+{
+	return 0;
+}
+
+static int vntb_epf_peer_mw_get_addr(struct ntb_dev *ndev, int idx,
+				phys_addr_t *base, resource_size_t *size)
+{
+
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+
+	if (base)
+		*base = ntb->vpci_mw_phy[idx];
+
+	if (size)
+		*size = ntb->mws_size[idx];
+
+	return 0;
+}
+
+static int vntb_epf_link_enable(struct ntb_dev *ntb,
+			enum ntb_speed max_speed,
+			enum ntb_width max_width)
+{
+	return 0;
+}
+
+static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32);
+	u32 val;
+	void __iomem *base = (void __iomem *)ntb->reg;
+
+	val = readl(base + off + ct + idx * sizeof(u32));
+	return val;
+}
+
+static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	struct epf_ntb_ctrl *ctrl = ntb->reg;
+	int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32);
+	void __iomem *base = (void __iomem *)ntb->reg;
+
+	writel(val, base + off + ct + idx * sizeof(u32));
+	return 0;
+}
+
+static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	struct epf_ntb_ctrl *ctrl = ntb->reg;
+	int off = ctrl->spad_offset;
+	void __iomem *base = (void __iomem *)ntb->reg;
+	u32 val;
+
+	val = readl(base + off + idx * sizeof(u32));
+	return val;
+}
+
+static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32 val)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	struct epf_ntb_ctrl *ctrl = ntb->reg;
+	int off = ctrl->spad_offset;
+	void __iomem *base = (void __iomem *)ntb->reg;
+
+	writel(val, base + off + idx * sizeof(u32));
+	return 0;
+}
+
+static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
+{
+	u32 interrupt_num = ffs(db_bits) + 1;
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+	u8 func_no, vfunc_no;
+	int ret;
+
+	func_no = ntb->epf->func_no;
+	vfunc_no = ntb->epf->vfunc_no;
+
+	ret = pci_epc_raise_irq(ntb->epf->epc,
+				func_no,
+				vfunc_no,
+				PCI_EPC_IRQ_MSI,
+				interrupt_num + 1);
+	if (ret)
+		dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
+
+	return ret;
+}
+
+static u64 vntb_epf_db_read(struct ntb_dev *ndev)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+
+	return ntb->db;
+}
+
+static int vntb_epf_mw_get_align(struct ntb_dev *ndev, int pidx, int idx,
+			resource_size_t *addr_align,
+			resource_size_t *size_align,
+			resource_size_t *size_max)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+
+	if (addr_align)
+		*addr_align = SZ_4K;
+
+	if (size_align)
+		*size_align = 1;
+
+	if (size_max)
+		*size_max = ntb->mws_size[idx];
+
+	return 0;
+}
+
+static u64 vntb_epf_link_is_up(struct ntb_dev *ndev,
+			enum ntb_speed *speed,
+			enum ntb_width *width)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+
+	return ntb->reg->link_status;
+}
+
+static int vntb_epf_db_clear_mask(struct ntb_dev *ndev, u64 db_bits)
+{
+	return 0;
+}
+
+static int vntb_epf_db_clear(struct ntb_dev *ndev, u64 db_bits)
+{
+	struct epf_ntb *ntb = ntb_ndev(ndev);
+
+	ntb->db &= ~db_bits;
+	return 0;
+}
+
+static int vntb_epf_link_disable(struct ntb_dev *ntb)
+{
+	return 0;
+}
+
+static const struct ntb_dev_ops vntb_epf_ops = {
+	.mw_count		= vntb_epf_mw_count,
+	.spad_count		= vntb_epf_spad_count,
+	.peer_mw_count		= vntb_epf_peer_mw_count,
+	.db_valid_mask		= vntb_epf_db_valid_mask,
+	.db_set_mask		= vntb_epf_db_set_mask,
+	.mw_set_trans		= vntb_epf_mw_set_trans,
+	.mw_clear_trans		= vntb_epf_mw_clear_trans,
+	.peer_mw_get_addr	= vntb_epf_peer_mw_get_addr,
+	.link_enable		= vntb_epf_link_enable,
+	.spad_read		= vntb_epf_spad_read,
+	.spad_write		= vntb_epf_spad_write,
+	.peer_spad_read		= vntb_epf_peer_spad_read,
+	.peer_spad_write	= vntb_epf_peer_spad_write,
+	.peer_db_set		= vntb_epf_peer_db_set,
+	.db_read		= vntb_epf_db_read,
+	.mw_get_align		= vntb_epf_mw_get_align,
+	.link_is_up		= vntb_epf_link_is_up,
+	.db_clear_mask		= vntb_epf_db_clear_mask,
+	.db_clear		= vntb_epf_db_clear,
+	.link_disable		= vntb_epf_link_disable,
+};
+
+static int pci_vntb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int ret;
+	struct epf_ntb *ndev = (struct epf_ntb *)pdev->sysdata;
+	struct device *dev = &pdev->dev;
+
+	ndev->ntb.pdev = pdev;
+	ndev->ntb.topo = NTB_TOPO_NONE;
+	ndev->ntb.ops =  &vntb_epf_ops;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(dev, "Cannot set DMA mask\n");
+		return -EINVAL;
+	}
+
+	ret = ntb_register_device(&ndev->ntb);
+	if (ret) {
+		dev_err(dev, "Failed to register NTB device\n");
+		goto err_register_dev;
+	}
+
+	dev_dbg(dev, "PCI Virtual NTB driver loaded\n");
+	return 0;
+
+err_register_dev:
+	put_device(&ndev->ntb.dev);
+	return -EINVAL;
+}
+
+static struct pci_device_id pci_vntb_table[] = {
+	{
+		PCI_DEVICE(0xffff, 0xffff),
+	},
+	{},
+};
+
+static struct pci_driver vntb_pci_driver = {
+	.name           = "pci-vntb",
+	.id_table       = pci_vntb_table,
+	.probe          = pci_vntb_probe,
+};
+
+/* ============ PCIe EPF Driver Bind ====================*/
+
+/**
+ * epf_ntb_bind() - Initialize endpoint controller to provide NTB functionality
+ * @epf: NTB endpoint function device
+ *
+ * Initialize both the endpoint controllers associated with NTB function device.
+ * Invoked when a primary interface or secondary interface is bound to EPC
+ * device. This function will succeed only when EPC is bound to both the
+ * interfaces.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_bind(struct pci_epf *epf)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+	struct device *dev = &epf->dev;
+	int ret;
+
+	if (!epf->epc) {
+		dev_dbg(dev, "PRIMARY EPC interface not yet bound\n");
+		return 0;
+	}
+
+	ret = epf_ntb_init_epc_bar(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to create NTB EPC\n");
+		goto err_bar_init;
+	}
+
+	ret = epf_ntb_config_spad_bar_alloc(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to allocate BAR memory\n");
+		goto err_bar_alloc;
+	}
+
+	ret = epf_ntb_epc_init(ntb);
+	if (ret) {
+		dev_err(dev, "Failed to initialize EPC\n");
+		goto err_bar_alloc;
+	}
+
+	epf_set_drvdata(epf, ntb);
+
+	pci_space[0] = (ntb->vntb_pid << 16) | ntb->vntb_vid;
+	pci_vntb_table[0].vendor = ntb->vntb_vid;
+	pci_vntb_table[0].device = ntb->vntb_pid;
+
+	ret = pci_register_driver(&vntb_pci_driver);
+	if (ret) {
+		dev_err(dev, "failure register vntb pci driver\n");
+		goto err_bar_alloc;
+	}
+
+	vpci_scan_bus(ntb);
+
+	return 0;
+
+err_bar_alloc:
+	epf_ntb_config_spad_bar_free(ntb);
+
+err_bar_init:
+	epf_ntb_epc_destroy(ntb);
+
+	return ret;
+}
+
+/**
+ * epf_ntb_unbind() - Cleanup the initialization from epf_ntb_bind()
+ * @epf: NTB endpoint function device
+ *
+ * Cleanup the initialization from epf_ntb_bind()
+ */
+static void epf_ntb_unbind(struct pci_epf *epf)
+{
+	struct epf_ntb *ntb = epf_get_drvdata(epf);
+
+	epf_ntb_epc_cleanup(ntb);
+	epf_ntb_config_spad_bar_free(ntb);
+	epf_ntb_epc_destroy(ntb);
+
+	pci_unregister_driver(&vntb_pci_driver);
+}
+
+// EPF driver probe
+static struct pci_epf_ops epf_ntb_ops = {
+	.bind   = epf_ntb_bind,
+	.unbind = epf_ntb_unbind,
+	.add_cfs = epf_ntb_add_cfs,
+};
+
+/**
+ * epf_ntb_probe() - Probe NTB function driver
+ * @epf: NTB endpoint function device
+ * @id: NTB endpoint function device ID
+ *
+ * Probe NTB function driver when endpoint function bus detects a NTB
+ * endpoint function.
+ *
+ * Returns: Zero for success, or an error code in case of failure
+ */
+static int epf_ntb_probe(struct pci_epf *epf,
+			 const struct pci_epf_device_id *id)
+{
+	struct epf_ntb *ntb;
+	struct device *dev;
+
+	dev = &epf->dev;
+
+	ntb = devm_kzalloc(dev, sizeof(*ntb), GFP_KERNEL);
+	if (!ntb)
+		return -ENOMEM;
+
+	epf->header = &epf_ntb_header;
+	ntb->epf = epf;
+	ntb->vbus_number = 0xff;
+	epf_set_drvdata(epf, ntb);
+
+	dev_info(dev, "pci-ep epf driver loaded\n");
+	return 0;
+}
+
+static const struct pci_epf_device_id epf_ntb_ids[] = {
+	{
+		.name = "pci_epf_vntb",
+	},
+	{},
+};
+
+static struct pci_epf_driver epf_ntb_driver = {
+	.driver.name    = "pci_epf_vntb",
+	.probe          = epf_ntb_probe,
+	.id_table       = epf_ntb_ids,
+	.ops            = &epf_ntb_ops,
+	.owner          = THIS_MODULE,
+};
+
+static int __init epf_ntb_init(void)
+{
+	int ret;
+
+	kpcintb_workqueue = alloc_workqueue("kpcintb", WQ_MEM_RECLAIM |
+					    WQ_HIGHPRI, 0);
+	ret = pci_epf_register_driver(&epf_ntb_driver);
+	if (ret) {
+		destroy_workqueue(kpcintb_workqueue);
+		pr_err("Failed to register pci epf ntb driver --> %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(epf_ntb_init);
+
+static void __exit epf_ntb_exit(void)
+{
+	pci_epf_unregister_driver(&epf_ntb_driver);
+	destroy_workqueue(kpcintb_workqueue);
+}
+module_exit(epf_ntb_exit);
+
+MODULE_DESCRIPTION("PCI EPF NTB DRIVER");
+MODULE_AUTHOR("Frank Li <Frank.li@nxp.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c
new file mode 100644
index 000000000..0ea64e24e
--- /dev/null
+++ b/drivers/pci/endpoint/pci-ep-cfs.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * configfs to configure the PCI endpoint
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/pci-ep-cfs.h>
+
+static DEFINE_IDR(functions_idr);
+static DEFINE_MUTEX(functions_mutex);
+static struct config_group *functions_group;
+static struct config_group *controllers_group;
+
+struct pci_epf_group {
+	struct config_group group;
+	struct config_group primary_epc_group;
+	struct config_group secondary_epc_group;
+	struct config_group *type_group;
+	struct delayed_work cfs_work;
+	struct pci_epf *epf;
+	int index;
+};
+
+struct pci_epc_group {
+	struct config_group group;
+	struct pci_epc *epc;
+	bool start;
+};
+
+static inline struct pci_epf_group *to_pci_epf_group(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct pci_epf_group, group);
+}
+
+static inline struct pci_epc_group *to_pci_epc_group(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct pci_epc_group, group);
+}
+
+static int pci_secondary_epc_epf_link(struct config_item *epf_item,
+				      struct config_item *epc_item)
+{
+	int ret;
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+	struct pci_epc *epc = epc_group->epc;
+	struct pci_epf *epf = epf_group->epf;
+
+	ret = pci_epc_add_epf(epc, epf, SECONDARY_INTERFACE);
+	if (ret)
+		return ret;
+
+	ret = pci_epf_bind(epf);
+	if (ret) {
+		pci_epc_remove_epf(epc, epf, SECONDARY_INTERFACE);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void pci_secondary_epc_epf_unlink(struct config_item *epc_item,
+					 struct config_item *epf_item)
+{
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+
+	WARN_ON_ONCE(epc_group->start);
+
+	epc = epc_group->epc;
+	epf = epf_group->epf;
+	pci_epf_unbind(epf);
+	pci_epc_remove_epf(epc, epf, SECONDARY_INTERFACE);
+}
+
+static struct configfs_item_operations pci_secondary_epc_item_ops = {
+	.allow_link	= pci_secondary_epc_epf_link,
+	.drop_link	= pci_secondary_epc_epf_unlink,
+};
+
+static const struct config_item_type pci_secondary_epc_type = {
+	.ct_item_ops	= &pci_secondary_epc_item_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group
+*pci_ep_cfs_add_secondary_group(struct pci_epf_group *epf_group)
+{
+	struct config_group *secondary_epc_group;
+
+	secondary_epc_group = &epf_group->secondary_epc_group;
+	config_group_init_type_name(secondary_epc_group, "secondary",
+				    &pci_secondary_epc_type);
+	configfs_register_group(&epf_group->group, secondary_epc_group);
+
+	return secondary_epc_group;
+}
+
+static int pci_primary_epc_epf_link(struct config_item *epf_item,
+				    struct config_item *epc_item)
+{
+	int ret;
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+	struct pci_epc *epc = epc_group->epc;
+	struct pci_epf *epf = epf_group->epf;
+
+	ret = pci_epc_add_epf(epc, epf, PRIMARY_INTERFACE);
+	if (ret)
+		return ret;
+
+	ret = pci_epf_bind(epf);
+	if (ret) {
+		pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void pci_primary_epc_epf_unlink(struct config_item *epc_item,
+				       struct config_item *epf_item)
+{
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+
+	WARN_ON_ONCE(epc_group->start);
+
+	epc = epc_group->epc;
+	epf = epf_group->epf;
+	pci_epf_unbind(epf);
+	pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+}
+
+static struct configfs_item_operations pci_primary_epc_item_ops = {
+	.allow_link	= pci_primary_epc_epf_link,
+	.drop_link	= pci_primary_epc_epf_unlink,
+};
+
+static const struct config_item_type pci_primary_epc_type = {
+	.ct_item_ops	= &pci_primary_epc_item_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group
+*pci_ep_cfs_add_primary_group(struct pci_epf_group *epf_group)
+{
+	struct config_group *primary_epc_group = &epf_group->primary_epc_group;
+
+	config_group_init_type_name(primary_epc_group, "primary",
+				    &pci_primary_epc_type);
+	configfs_register_group(&epf_group->group, primary_epc_group);
+
+	return primary_epc_group;
+}
+
+static ssize_t pci_epc_start_store(struct config_item *item, const char *page,
+				   size_t len)
+{
+	int ret;
+	bool start;
+	struct pci_epc *epc;
+	struct pci_epc_group *epc_group = to_pci_epc_group(item);
+
+	epc = epc_group->epc;
+
+	if (kstrtobool(page, &start) < 0)
+		return -EINVAL;
+
+	if (start == epc_group->start)
+		return -EALREADY;
+
+	if (!start) {
+		pci_epc_stop(epc);
+		epc_group->start = 0;
+		return len;
+	}
+
+	ret = pci_epc_start(epc);
+	if (ret) {
+		dev_err(&epc->dev, "failed to start endpoint controller\n");
+		return -EINVAL;
+	}
+
+	epc_group->start = start;
+
+	return len;
+}
+
+static ssize_t pci_epc_start_show(struct config_item *item, char *page)
+{
+	return sysfs_emit(page, "%d\n", to_pci_epc_group(item)->start);
+}
+
+CONFIGFS_ATTR(pci_epc_, start);
+
+static struct configfs_attribute *pci_epc_attrs[] = {
+	&pci_epc_attr_start,
+	NULL,
+};
+
+static int pci_epc_epf_link(struct config_item *epc_item,
+			    struct config_item *epf_item)
+{
+	int ret;
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+	struct pci_epc *epc = epc_group->epc;
+	struct pci_epf *epf = epf_group->epf;
+
+	ret = pci_epc_add_epf(epc, epf, PRIMARY_INTERFACE);
+	if (ret)
+		return ret;
+
+	ret = pci_epf_bind(epf);
+	if (ret) {
+		pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void pci_epc_epf_unlink(struct config_item *epc_item,
+			       struct config_item *epf_item)
+{
+	struct pci_epc *epc;
+	struct pci_epf *epf;
+	struct pci_epf_group *epf_group = to_pci_epf_group(epf_item);
+	struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+
+	WARN_ON_ONCE(epc_group->start);
+
+	epc = epc_group->epc;
+	epf = epf_group->epf;
+	pci_epf_unbind(epf);
+	pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+}
+
+static struct configfs_item_operations pci_epc_item_ops = {
+	.allow_link	= pci_epc_epf_link,
+	.drop_link	= pci_epc_epf_unlink,
+};
+
+static const struct config_item_type pci_epc_type = {
+	.ct_item_ops	= &pci_epc_item_ops,
+	.ct_attrs	= pci_epc_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group *pci_ep_cfs_add_epc_group(const char *name)
+{
+	int ret;
+	struct pci_epc *epc;
+	struct config_group *group;
+	struct pci_epc_group *epc_group;
+
+	epc_group = kzalloc(sizeof(*epc_group), GFP_KERNEL);
+	if (!epc_group) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	group = &epc_group->group;
+
+	config_group_init_type_name(group, name, &pci_epc_type);
+	ret = configfs_register_group(controllers_group, group);
+	if (ret) {
+		pr_err("failed to register configfs group for %s\n", name);
+		goto err_register_group;
+	}
+
+	epc = pci_epc_get(name);
+	if (IS_ERR(epc)) {
+		ret = PTR_ERR(epc);
+		goto err_epc_get;
+	}
+
+	epc_group->epc = epc;
+
+	return group;
+
+err_epc_get:
+	configfs_unregister_group(group);
+
+err_register_group:
+	kfree(epc_group);
+
+err:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(pci_ep_cfs_add_epc_group);
+
+void pci_ep_cfs_remove_epc_group(struct config_group *group)
+{
+	struct pci_epc_group *epc_group;
+
+	if (!group)
+		return;
+
+	epc_group = container_of(group, struct pci_epc_group, group);
+	pci_epc_put(epc_group->epc);
+	configfs_unregister_group(&epc_group->group);
+	kfree(epc_group);
+}
+EXPORT_SYMBOL(pci_ep_cfs_remove_epc_group);
+
+#define PCI_EPF_HEADER_R(_name)						       \
+static ssize_t pci_epf_##_name##_show(struct config_item *item,	char *page)    \
+{									       \
+	struct pci_epf *epf = to_pci_epf_group(item)->epf;		       \
+	if (WARN_ON_ONCE(!epf->header))					       \
+		return -EINVAL;						       \
+	return sysfs_emit(page, "0x%04x\n", epf->header->_name);	       \
+}
+
+#define PCI_EPF_HEADER_W_u32(_name)					       \
+static ssize_t pci_epf_##_name##_store(struct config_item *item,	       \
+				       const char *page, size_t len)	       \
+{									       \
+	u32 val;							       \
+	struct pci_epf *epf = to_pci_epf_group(item)->epf;		       \
+	if (WARN_ON_ONCE(!epf->header))					       \
+		return -EINVAL;						       \
+	if (kstrtou32(page, 0, &val) < 0)				       \
+		return -EINVAL;						       \
+	epf->header->_name = val;					       \
+	return len;							       \
+}
+
+#define PCI_EPF_HEADER_W_u16(_name)					       \
+static ssize_t pci_epf_##_name##_store(struct config_item *item,	       \
+				       const char *page, size_t len)	       \
+{									       \
+	u16 val;							       \
+	struct pci_epf *epf = to_pci_epf_group(item)->epf;		       \
+	if (WARN_ON_ONCE(!epf->header))					       \
+		return -EINVAL;						       \
+	if (kstrtou16(page, 0, &val) < 0)				       \
+		return -EINVAL;						       \
+	epf->header->_name = val;					       \
+	return len;							       \
+}
+
+#define PCI_EPF_HEADER_W_u8(_name)					       \
+static ssize_t pci_epf_##_name##_store(struct config_item *item,	       \
+				       const char *page, size_t len)	       \
+{									       \
+	u8 val;								       \
+	struct pci_epf *epf = to_pci_epf_group(item)->epf;		       \
+	if (WARN_ON_ONCE(!epf->header))					       \
+		return -EINVAL;						       \
+	if (kstrtou8(page, 0, &val) < 0)				       \
+		return -EINVAL;						       \
+	epf->header->_name = val;					       \
+	return len;							       \
+}
+
+static ssize_t pci_epf_msi_interrupts_store(struct config_item *item,
+					    const char *page, size_t len)
+{
+	u8 val;
+
+	if (kstrtou8(page, 0, &val) < 0)
+		return -EINVAL;
+
+	to_pci_epf_group(item)->epf->msi_interrupts = val;
+
+	return len;
+}
+
+static ssize_t pci_epf_msi_interrupts_show(struct config_item *item,
+					   char *page)
+{
+	return sysfs_emit(page, "%d\n",
+			  to_pci_epf_group(item)->epf->msi_interrupts);
+}
+
+static ssize_t pci_epf_msix_interrupts_store(struct config_item *item,
+					     const char *page, size_t len)
+{
+	u16 val;
+
+	if (kstrtou16(page, 0, &val) < 0)
+		return -EINVAL;
+
+	to_pci_epf_group(item)->epf->msix_interrupts = val;
+
+	return len;
+}
+
+static ssize_t pci_epf_msix_interrupts_show(struct config_item *item,
+					    char *page)
+{
+	return sysfs_emit(page, "%d\n",
+			  to_pci_epf_group(item)->epf->msix_interrupts);
+}
+
+PCI_EPF_HEADER_R(vendorid)
+PCI_EPF_HEADER_W_u16(vendorid)
+
+PCI_EPF_HEADER_R(deviceid)
+PCI_EPF_HEADER_W_u16(deviceid)
+
+PCI_EPF_HEADER_R(revid)
+PCI_EPF_HEADER_W_u8(revid)
+
+PCI_EPF_HEADER_R(progif_code)
+PCI_EPF_HEADER_W_u8(progif_code)
+
+PCI_EPF_HEADER_R(subclass_code)
+PCI_EPF_HEADER_W_u8(subclass_code)
+
+PCI_EPF_HEADER_R(baseclass_code)
+PCI_EPF_HEADER_W_u8(baseclass_code)
+
+PCI_EPF_HEADER_R(cache_line_size)
+PCI_EPF_HEADER_W_u8(cache_line_size)
+
+PCI_EPF_HEADER_R(subsys_vendor_id)
+PCI_EPF_HEADER_W_u16(subsys_vendor_id)
+
+PCI_EPF_HEADER_R(subsys_id)
+PCI_EPF_HEADER_W_u16(subsys_id)
+
+PCI_EPF_HEADER_R(interrupt_pin)
+PCI_EPF_HEADER_W_u8(interrupt_pin)
+
+CONFIGFS_ATTR(pci_epf_, vendorid);
+CONFIGFS_ATTR(pci_epf_, deviceid);
+CONFIGFS_ATTR(pci_epf_, revid);
+CONFIGFS_ATTR(pci_epf_, progif_code);
+CONFIGFS_ATTR(pci_epf_, subclass_code);
+CONFIGFS_ATTR(pci_epf_, baseclass_code);
+CONFIGFS_ATTR(pci_epf_, cache_line_size);
+CONFIGFS_ATTR(pci_epf_, subsys_vendor_id);
+CONFIGFS_ATTR(pci_epf_, subsys_id);
+CONFIGFS_ATTR(pci_epf_, interrupt_pin);
+CONFIGFS_ATTR(pci_epf_, msi_interrupts);
+CONFIGFS_ATTR(pci_epf_, msix_interrupts);
+
+static struct configfs_attribute *pci_epf_attrs[] = {
+	&pci_epf_attr_vendorid,
+	&pci_epf_attr_deviceid,
+	&pci_epf_attr_revid,
+	&pci_epf_attr_progif_code,
+	&pci_epf_attr_subclass_code,
+	&pci_epf_attr_baseclass_code,
+	&pci_epf_attr_cache_line_size,
+	&pci_epf_attr_subsys_vendor_id,
+	&pci_epf_attr_subsys_id,
+	&pci_epf_attr_interrupt_pin,
+	&pci_epf_attr_msi_interrupts,
+	&pci_epf_attr_msix_interrupts,
+	NULL,
+};
+
+static int pci_epf_vepf_link(struct config_item *epf_pf_item,
+			     struct config_item *epf_vf_item)
+{
+	struct pci_epf_group *epf_vf_group = to_pci_epf_group(epf_vf_item);
+	struct pci_epf_group *epf_pf_group = to_pci_epf_group(epf_pf_item);
+	struct pci_epf *epf_pf = epf_pf_group->epf;
+	struct pci_epf *epf_vf = epf_vf_group->epf;
+
+	return pci_epf_add_vepf(epf_pf, epf_vf);
+}
+
+static void pci_epf_vepf_unlink(struct config_item *epf_pf_item,
+				struct config_item *epf_vf_item)
+{
+	struct pci_epf_group *epf_vf_group = to_pci_epf_group(epf_vf_item);
+	struct pci_epf_group *epf_pf_group = to_pci_epf_group(epf_pf_item);
+	struct pci_epf *epf_pf = epf_pf_group->epf;
+	struct pci_epf *epf_vf = epf_vf_group->epf;
+
+	pci_epf_remove_vepf(epf_pf, epf_vf);
+}
+
+static void pci_epf_release(struct config_item *item)
+{
+	struct pci_epf_group *epf_group = to_pci_epf_group(item);
+
+	mutex_lock(&functions_mutex);
+	idr_remove(&functions_idr, epf_group->index);
+	mutex_unlock(&functions_mutex);
+	pci_epf_destroy(epf_group->epf);
+	kfree(epf_group);
+}
+
+static struct configfs_item_operations pci_epf_ops = {
+	.allow_link		= pci_epf_vepf_link,
+	.drop_link		= pci_epf_vepf_unlink,
+	.release		= pci_epf_release,
+};
+
+static const struct config_item_type pci_epf_type = {
+	.ct_item_ops	= &pci_epf_ops,
+	.ct_attrs	= pci_epf_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/**
+ * pci_epf_type_add_cfs() - Help function drivers to expose function specific
+ *                          attributes in configfs
+ * @epf: the EPF device that has to be configured using configfs
+ * @group: the parent configfs group (corresponding to entries in
+ *         pci_epf_device_id)
+ *
+ * Invoke to expose function specific attributes in configfs.
+ *
+ * Return: A pointer to a config_group structure or NULL if the function driver
+ * does not have anything to expose (attributes configured by user) or if
+ * the function driver does not implement the add_cfs() method.
+ *
+ * Returns an error pointer if this function is called for an unbound EPF device
+ * or if the EPF driver add_cfs() method fails.
+ */
+static struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
+						 struct config_group *group)
+{
+	struct config_group *epf_type_group;
+
+	if (!epf->driver) {
+		dev_err(&epf->dev, "epf device not bound to driver\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (!epf->driver->ops->add_cfs)
+		return NULL;
+
+	mutex_lock(&epf->lock);
+	epf_type_group = epf->driver->ops->add_cfs(epf, group);
+	mutex_unlock(&epf->lock);
+
+	return epf_type_group;
+}
+
+static void pci_ep_cfs_add_type_group(struct pci_epf_group *epf_group)
+{
+	struct config_group *group;
+
+	group = pci_epf_type_add_cfs(epf_group->epf, &epf_group->group);
+	if (!group)
+		return;
+
+	if (IS_ERR(group)) {
+		dev_err(&epf_group->epf->dev,
+			"failed to create epf type specific attributes\n");
+		return;
+	}
+
+	configfs_register_group(&epf_group->group, group);
+}
+
+static void pci_epf_cfs_work(struct work_struct *work)
+{
+	struct pci_epf_group *epf_group;
+	struct config_group *group;
+
+	epf_group = container_of(work, struct pci_epf_group, cfs_work.work);
+	group = pci_ep_cfs_add_primary_group(epf_group);
+	if (IS_ERR(group)) {
+		pr_err("failed to create 'primary' EPC interface\n");
+		return;
+	}
+
+	group = pci_ep_cfs_add_secondary_group(epf_group);
+	if (IS_ERR(group)) {
+		pr_err("failed to create 'secondary' EPC interface\n");
+		return;
+	}
+
+	pci_ep_cfs_add_type_group(epf_group);
+}
+
+static struct config_group *pci_epf_make(struct config_group *group,
+					 const char *name)
+{
+	struct pci_epf_group *epf_group;
+	struct pci_epf *epf;
+	char *epf_name;
+	int index, err;
+
+	epf_group = kzalloc(sizeof(*epf_group), GFP_KERNEL);
+	if (!epf_group)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&functions_mutex);
+	index = idr_alloc(&functions_idr, epf_group, 0, 0, GFP_KERNEL);
+	mutex_unlock(&functions_mutex);
+	if (index < 0) {
+		err = index;
+		goto free_group;
+	}
+
+	epf_group->index = index;
+
+	config_group_init_type_name(&epf_group->group, name, &pci_epf_type);
+
+	epf_name = kasprintf(GFP_KERNEL, "%s.%d",
+			     group->cg_item.ci_name, epf_group->index);
+	if (!epf_name) {
+		err = -ENOMEM;
+		goto remove_idr;
+	}
+
+	epf = pci_epf_create(epf_name);
+	if (IS_ERR(epf)) {
+		pr_err("failed to create endpoint function device\n");
+		err = -EINVAL;
+		goto free_name;
+	}
+
+	epf->group = &epf_group->group;
+	epf_group->epf = epf;
+
+	kfree(epf_name);
+
+	INIT_DELAYED_WORK(&epf_group->cfs_work, pci_epf_cfs_work);
+	queue_delayed_work(system_wq, &epf_group->cfs_work,
+			   msecs_to_jiffies(1));
+
+	return &epf_group->group;
+
+free_name:
+	kfree(epf_name);
+
+remove_idr:
+	mutex_lock(&functions_mutex);
+	idr_remove(&functions_idr, epf_group->index);
+	mutex_unlock(&functions_mutex);
+
+free_group:
+	kfree(epf_group);
+
+	return ERR_PTR(err);
+}
+
+static void pci_epf_drop(struct config_group *group, struct config_item *item)
+{
+	config_item_put(item);
+}
+
+static struct configfs_group_operations pci_epf_group_ops = {
+	.make_group     = &pci_epf_make,
+	.drop_item      = &pci_epf_drop,
+};
+
+static const struct config_item_type pci_epf_group_type = {
+	.ct_group_ops	= &pci_epf_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group *pci_ep_cfs_add_epf_group(const char *name)
+{
+	struct config_group *group;
+
+	group = configfs_register_default_group(functions_group, name,
+						&pci_epf_group_type);
+	if (IS_ERR(group))
+		pr_err("failed to register configfs group for %s function\n",
+		       name);
+
+	return group;
+}
+EXPORT_SYMBOL(pci_ep_cfs_add_epf_group);
+
+void pci_ep_cfs_remove_epf_group(struct config_group *group)
+{
+	if (IS_ERR_OR_NULL(group))
+		return;
+
+	configfs_unregister_default_group(group);
+}
+EXPORT_SYMBOL(pci_ep_cfs_remove_epf_group);
+
+static const struct config_item_type pci_functions_type = {
+	.ct_owner	= THIS_MODULE,
+};
+
+static const struct config_item_type pci_controllers_type = {
+	.ct_owner	= THIS_MODULE,
+};
+
+static const struct config_item_type pci_ep_type = {
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem pci_ep_cfs_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "pci_ep",
+			.ci_type = &pci_ep_type,
+		},
+	},
+	.su_mutex = __MUTEX_INITIALIZER(pci_ep_cfs_subsys.su_mutex),
+};
+
+static int __init pci_ep_cfs_init(void)
+{
+	int ret;
+	struct config_group *root = &pci_ep_cfs_subsys.su_group;
+
+	config_group_init(root);
+
+	ret = configfs_register_subsystem(&pci_ep_cfs_subsys);
+	if (ret) {
+		pr_err("Error %d while registering subsystem %s\n",
+		       ret, root->cg_item.ci_namebuf);
+		goto err;
+	}
+
+	functions_group = configfs_register_default_group(root, "functions",
+							  &pci_functions_type);
+	if (IS_ERR(functions_group)) {
+		ret = PTR_ERR(functions_group);
+		pr_err("Error %d while registering functions group\n",
+		       ret);
+		goto err_functions_group;
+	}
+
+	controllers_group =
+		configfs_register_default_group(root, "controllers",
+						&pci_controllers_type);
+	if (IS_ERR(controllers_group)) {
+		ret = PTR_ERR(controllers_group);
+		pr_err("Error %d while registering controllers group\n",
+		       ret);
+		goto err_controllers_group;
+	}
+
+	return 0;
+
+err_controllers_group:
+	configfs_unregister_default_group(functions_group);
+
+err_functions_group:
+	configfs_unregister_subsystem(&pci_ep_cfs_subsys);
+
+err:
+	return ret;
+}
+module_init(pci_ep_cfs_init);
+
+static void __exit pci_ep_cfs_exit(void)
+{
+	configfs_unregister_default_group(controllers_group);
+	configfs_unregister_default_group(functions_group);
+	configfs_unregister_subsystem(&pci_ep_cfs_subsys);
+}
+module_exit(pci_ep_cfs_exit);
+
+MODULE_DESCRIPTION("PCI EP CONFIGFS");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
new file mode 100644
index 000000000..a7d3a9239
--- /dev/null
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -0,0 +1,931 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Endpoint *Controller* (EPC) library
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/pci-ep-cfs.h>
+
+static struct class *pci_epc_class;
+
+static void devm_pci_epc_release(struct device *dev, void *res)
+{
+	struct pci_epc *epc = *(struct pci_epc **)res;
+
+	pci_epc_destroy(epc);
+}
+
+static int devm_pci_epc_match(struct device *dev, void *res, void *match_data)
+{
+	struct pci_epc **epc = res;
+
+	return *epc == match_data;
+}
+
+/**
+ * pci_epc_put() - release the PCI endpoint controller
+ * @epc: epc returned by pci_epc_get()
+ *
+ * release the refcount the caller obtained by invoking pci_epc_get()
+ */
+void pci_epc_put(struct pci_epc *epc)
+{
+	if (!epc || IS_ERR(epc))
+		return;
+
+	module_put(epc->ops->owner);
+	put_device(&epc->dev);
+}
+EXPORT_SYMBOL_GPL(pci_epc_put);
+
+/**
+ * pci_epc_get() - get the PCI endpoint controller
+ * @epc_name: device name of the endpoint controller
+ *
+ * Invoke to get struct pci_epc * corresponding to the device name of the
+ * endpoint controller
+ */
+struct pci_epc *pci_epc_get(const char *epc_name)
+{
+	int ret = -EINVAL;
+	struct pci_epc *epc;
+	struct device *dev;
+	struct class_dev_iter iter;
+
+	class_dev_iter_init(&iter, pci_epc_class, NULL, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
+		if (strcmp(epc_name, dev_name(dev)))
+			continue;
+
+		epc = to_pci_epc(dev);
+		if (!try_module_get(epc->ops->owner)) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		class_dev_iter_exit(&iter);
+		get_device(&epc->dev);
+		return epc;
+	}
+
+err:
+	class_dev_iter_exit(&iter);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(pci_epc_get);
+
+/**
+ * pci_epc_get_first_free_bar() - helper to get first unreserved BAR
+ * @epc_features: pci_epc_features structure that holds the reserved bar bitmap
+ *
+ * Invoke to get the first unreserved BAR that can be used by the endpoint
+ * function. For any incorrect value in reserved_bar return '0'.
+ */
+enum pci_barno
+pci_epc_get_first_free_bar(const struct pci_epc_features *epc_features)
+{
+	return pci_epc_get_next_free_bar(epc_features, BAR_0);
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_first_free_bar);
+
+/**
+ * pci_epc_get_next_free_bar() - helper to get unreserved BAR starting from @bar
+ * @epc_features: pci_epc_features structure that holds the reserved bar bitmap
+ * @bar: the starting BAR number from where unreserved BAR should be searched
+ *
+ * Invoke to get the next unreserved BAR starting from @bar that can be used
+ * for endpoint function. For any incorrect value in reserved_bar return '0'.
+ */
+enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
+					 *epc_features, enum pci_barno bar)
+{
+	unsigned long free_bar;
+
+	if (!epc_features)
+		return BAR_0;
+
+	/* If 'bar - 1' is a 64-bit BAR, move to the next BAR */
+	if ((epc_features->bar_fixed_64bit << 1) & 1 << bar)
+		bar++;
+
+	/* Find if the reserved BAR is also a 64-bit BAR */
+	free_bar = epc_features->reserved_bar & epc_features->bar_fixed_64bit;
+
+	/* Set the adjacent bit if the reserved BAR is also a 64-bit BAR */
+	free_bar <<= 1;
+	free_bar |= epc_features->reserved_bar;
+
+	free_bar = find_next_zero_bit(&free_bar, 6, bar);
+	if (free_bar > 5)
+		return NO_BAR;
+
+	return free_bar;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_next_free_bar);
+
+/**
+ * pci_epc_get_features() - get the features supported by EPC
+ * @epc: the features supported by *this* EPC device will be returned
+ * @func_no: the features supported by the EPC device specific to the
+ *	     endpoint function with func_no will be returned
+ * @vfunc_no: the features supported by the EPC device specific to the
+ *	     virtual endpoint function with vfunc_no will be returned
+ *
+ * Invoke to get the features provided by the EPC which may be
+ * specific to an endpoint function. Returns pci_epc_features on success
+ * and NULL for any failures.
+ */
+const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
+						    u8 func_no, u8 vfunc_no)
+{
+	const struct pci_epc_features *epc_features;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return NULL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return NULL;
+
+	if (!epc->ops->get_features)
+		return NULL;
+
+	mutex_lock(&epc->lock);
+	epc_features = epc->ops->get_features(epc, func_no, vfunc_no);
+	mutex_unlock(&epc->lock);
+
+	return epc_features;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_features);
+
+/**
+ * pci_epc_stop() - stop the PCI link
+ * @epc: the link of the EPC device that has to be stopped
+ *
+ * Invoke to stop the PCI link
+ */
+void pci_epc_stop(struct pci_epc *epc)
+{
+	if (IS_ERR(epc) || !epc->ops->stop)
+		return;
+
+	mutex_lock(&epc->lock);
+	epc->ops->stop(epc);
+	mutex_unlock(&epc->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_stop);
+
+/**
+ * pci_epc_start() - start the PCI link
+ * @epc: the link of *this* EPC device has to be started
+ *
+ * Invoke to start the PCI link
+ */
+int pci_epc_start(struct pci_epc *epc)
+{
+	int ret;
+
+	if (IS_ERR(epc))
+		return -EINVAL;
+
+	if (!epc->ops->start)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->start(epc);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_start);
+
+/**
+ * pci_epc_raise_irq() - interrupt the host system
+ * @epc: the EPC device which has to interrupt the host
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @type: specify the type of interrupt; legacy, MSI or MSI-X
+ * @interrupt_num: the MSI or MSI-X interrupt number with range (1-N)
+ *
+ * Invoke to raise an legacy, MSI or MSI-X interrupt
+ */
+int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		      enum pci_epc_irq_type type, u16 interrupt_num)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->raise_irq)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->raise_irq(epc, func_no, vfunc_no, type, interrupt_num);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_raise_irq);
+
+/**
+ * pci_epc_map_msi_irq() - Map physical address to MSI address and return
+ *                         MSI data
+ * @epc: the EPC device which has the MSI capability
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @phys_addr: the physical address of the outbound region
+ * @interrupt_num: the MSI interrupt number with range (1-N)
+ * @entry_size: Size of Outbound address region for each interrupt
+ * @msi_data: the data that should be written in order to raise MSI interrupt
+ *            with interrupt number as 'interrupt num'
+ * @msi_addr_offset: Offset of MSI address from the aligned outbound address
+ *                   to which the MSI address is mapped
+ *
+ * Invoke to map physical address to MSI address and return MSI data. The
+ * physical address should be an address in the outbound region. This is
+ * required to implement doorbell functionality of NTB wherein EPC on either
+ * side of the interface (primary and secondary) can directly write to the
+ * physical address (in outbound region) of the other interface to ring
+ * doorbell.
+ */
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size,
+			u32 *msi_data, u32 *msi_addr_offset)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(epc))
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->map_msi_irq)
+		return -EINVAL;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->map_msi_irq(epc, func_no, vfunc_no, phys_addr,
+				    interrupt_num, entry_size, msi_data,
+				    msi_addr_offset);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_map_msi_irq);
+
+/**
+ * pci_epc_get_msi() - get the number of MSI interrupt numbers allocated
+ * @epc: the EPC device to which MSI interrupts was requested
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ *
+ * Invoke to get the number of MSI interrupts allocated by the RC
+ */
+int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	int interrupt;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return 0;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return 0;
+
+	if (!epc->ops->get_msi)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	interrupt = epc->ops->get_msi(epc, func_no, vfunc_no);
+	mutex_unlock(&epc->lock);
+
+	if (interrupt < 0)
+		return 0;
+
+	interrupt = 1 << interrupt;
+
+	return interrupt;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_msi);
+
+/**
+ * pci_epc_set_msi() - set the number of MSI interrupt numbers required
+ * @epc: the EPC device on which MSI has to be configured
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @interrupts: number of MSI interrupts required by the EPF
+ *
+ * Invoke to set the required number of MSI interrupts.
+ */
+int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 interrupts)
+{
+	int ret;
+	u8 encode_int;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    interrupts < 1 || interrupts > 32)
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->set_msi)
+		return 0;
+
+	encode_int = order_base_2(interrupts);
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->set_msi(epc, func_no, vfunc_no, encode_int);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_set_msi);
+
+/**
+ * pci_epc_get_msix() - get the number of MSI-X interrupt numbers allocated
+ * @epc: the EPC device to which MSI-X interrupts was requested
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ *
+ * Invoke to get the number of MSI-X interrupts allocated by the RC
+ */
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
+{
+	int interrupt;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return 0;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return 0;
+
+	if (!epc->ops->get_msix)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	interrupt = epc->ops->get_msix(epc, func_no, vfunc_no);
+	mutex_unlock(&epc->lock);
+
+	if (interrupt < 0)
+		return 0;
+
+	return interrupt + 1;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_msix);
+
+/**
+ * pci_epc_set_msix() - set the number of MSI-X interrupt numbers required
+ * @epc: the EPC device on which MSI-X has to be configured
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @interrupts: number of MSI-X interrupts required by the EPF
+ * @bir: BAR where the MSI-X table resides
+ * @offset: Offset pointing to the start of MSI-X table
+ *
+ * Invoke to set the required number of MSI-X interrupts.
+ */
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		     u16 interrupts, enum pci_barno bir, u32 offset)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    interrupts < 1 || interrupts > 2048)
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->set_msix)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->set_msix(epc, func_no, vfunc_no, interrupts - 1, bir,
+				 offset);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_set_msix);
+
+/**
+ * pci_epc_unmap_addr() - unmap CPU address from PCI address
+ * @epc: the EPC device on which address is allocated
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @phys_addr: physical address of the local system
+ *
+ * Invoke to unmap the CPU address from PCI address.
+ */
+void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			phys_addr_t phys_addr)
+{
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return;
+
+	if (!epc->ops->unmap_addr)
+		return;
+
+	mutex_lock(&epc->lock);
+	epc->ops->unmap_addr(epc, func_no, vfunc_no, phys_addr);
+	mutex_unlock(&epc->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_unmap_addr);
+
+/**
+ * pci_epc_map_addr() - map CPU address to PCI address
+ * @epc: the EPC device on which address is allocated
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @phys_addr: physical address of the local system
+ * @pci_addr: PCI address to which the physical address should be mapped
+ * @size: the size of the allocation
+ *
+ * Invoke to map CPU address with PCI address.
+ */
+int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		     phys_addr_t phys_addr, u64 pci_addr, size_t size)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->map_addr)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->map_addr(epc, func_no, vfunc_no, phys_addr, pci_addr,
+				 size);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_map_addr);
+
+/**
+ * pci_epc_clear_bar() - reset the BAR
+ * @epc: the EPC device for which the BAR has to be cleared
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @epf_bar: the struct epf_bar that contains the BAR information
+ *
+ * Invoke to reset the BAR of the endpoint device.
+ */
+void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		       struct pci_epf_bar *epf_bar)
+{
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    (epf_bar->barno == BAR_5 &&
+	     epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
+		return;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return;
+
+	if (!epc->ops->clear_bar)
+		return;
+
+	mutex_lock(&epc->lock);
+	epc->ops->clear_bar(epc, func_no, vfunc_no, epf_bar);
+	mutex_unlock(&epc->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_clear_bar);
+
+/**
+ * pci_epc_set_bar() - configure BAR in order for host to assign PCI addr space
+ * @epc: the EPC device on which BAR has to be configured
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @epf_bar: the struct epf_bar that contains the BAR information
+ *
+ * Invoke to configure the BAR of the endpoint device.
+ */
+int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		    struct pci_epf_bar *epf_bar)
+{
+	int ret;
+	int flags = epf_bar->flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    (epf_bar->barno == BAR_5 &&
+	     flags & PCI_BASE_ADDRESS_MEM_TYPE_64) ||
+	    (flags & PCI_BASE_ADDRESS_SPACE_IO &&
+	     flags & PCI_BASE_ADDRESS_IO_MASK) ||
+	    (upper_32_bits(epf_bar->size) &&
+	     !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64)))
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	if (!epc->ops->set_bar)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->set_bar(epc, func_no, vfunc_no, epf_bar);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_set_bar);
+
+/**
+ * pci_epc_write_header() - write standard configuration header
+ * @epc: the EPC device to which the configuration header should be written
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
+ * @header: standard configuration header fields
+ *
+ * Invoke to write the configuration header to the endpoint controller. Every
+ * endpoint controller will have a dedicated location to which the standard
+ * configuration header would be written. The callback function should write
+ * the header fields to this dedicated location.
+ */
+int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			 struct pci_epf_header *header)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return -EINVAL;
+
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	/* Only Virtual Function #1 has deviceID */
+	if (vfunc_no > 1)
+		return -EINVAL;
+
+	if (!epc->ops->write_header)
+		return 0;
+
+	mutex_lock(&epc->lock);
+	ret = epc->ops->write_header(epc, func_no, vfunc_no, header);
+	mutex_unlock(&epc->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_write_header);
+
+/**
+ * pci_epc_add_epf() - bind PCI endpoint function to an endpoint controller
+ * @epc: the EPC device to which the endpoint function should be added
+ * @epf: the endpoint function to be added
+ * @type: Identifies if the EPC is connected to the primary or secondary
+ *        interface of EPF
+ *
+ * A PCI endpoint device can have one or more functions. In the case of PCIe,
+ * the specification allows up to 8 PCIe endpoint functions. Invoke
+ * pci_epc_add_epf() to add a PCI endpoint function to an endpoint controller.
+ */
+int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
+		    enum pci_epc_interface_type type)
+{
+	struct list_head *list;
+	u32 func_no;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(epc) || epf->is_vf)
+		return -EINVAL;
+
+	if (type == PRIMARY_INTERFACE && epf->epc)
+		return -EBUSY;
+
+	if (type == SECONDARY_INTERFACE && epf->sec_epc)
+		return -EBUSY;
+
+	mutex_lock(&epc->list_lock);
+	func_no = find_first_zero_bit(&epc->function_num_map,
+				      BITS_PER_LONG);
+	if (func_no >= BITS_PER_LONG) {
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	if (func_no > epc->max_functions - 1) {
+		dev_err(&epc->dev, "Exceeding max supported Function Number\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	set_bit(func_no, &epc->function_num_map);
+	if (type == PRIMARY_INTERFACE) {
+		epf->func_no = func_no;
+		epf->epc = epc;
+		list = &epf->list;
+	} else {
+		epf->sec_epc_func_no = func_no;
+		epf->sec_epc = epc;
+		list = &epf->sec_epc_list;
+	}
+
+	list_add_tail(list, &epc->pci_epf);
+ret:
+	mutex_unlock(&epc->list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_add_epf);
+
+/**
+ * pci_epc_remove_epf() - remove PCI endpoint function from endpoint controller
+ * @epc: the EPC device from which the endpoint function should be removed
+ * @epf: the endpoint function to be removed
+ * @type: identifies if the EPC is connected to the primary or secondary
+ *        interface of EPF
+ *
+ * Invoke to remove PCI endpoint function from the endpoint controller.
+ */
+void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
+			enum pci_epc_interface_type type)
+{
+	struct list_head *list;
+	u32 func_no = 0;
+
+	if (!epc || IS_ERR(epc) || !epf)
+		return;
+
+	if (type == PRIMARY_INTERFACE) {
+		func_no = epf->func_no;
+		list = &epf->list;
+	} else {
+		func_no = epf->sec_epc_func_no;
+		list = &epf->sec_epc_list;
+	}
+
+	mutex_lock(&epc->list_lock);
+	clear_bit(func_no, &epc->function_num_map);
+	list_del(list);
+	epf->epc = NULL;
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_remove_epf);
+
+/**
+ * pci_epc_linkup() - Notify the EPF device that EPC device has established a
+ *		      connection with the Root Complex.
+ * @epc: the EPC device which has established link with the host
+ *
+ * Invoke to Notify the EPF device that the EPC device has established a
+ * connection with the Root Complex.
+ */
+void pci_epc_linkup(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->link_up)
+			epf->event_ops->link_up(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_linkup);
+
+/**
+ * pci_epc_linkdown() - Notify the EPF device that EPC device has dropped the
+ *			connection with the Root Complex.
+ * @epc: the EPC device which has dropped the link with the host
+ *
+ * Invoke to Notify the EPF device that the EPC device has dropped the
+ * connection with the Root Complex.
+ */
+void pci_epc_linkdown(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->link_down)
+			epf->event_ops->link_down(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_linkdown);
+
+/**
+ * pci_epc_init_notify() - Notify the EPF device that EPC device's core
+ *			   initialization is completed.
+ * @epc: the EPC device whose core initialization is completed
+ *
+ * Invoke to Notify the EPF device that the EPC device's initialization
+ * is completed.
+ */
+void pci_epc_init_notify(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->core_init)
+			epf->event_ops->core_init(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_init_notify);
+
+/**
+ * pci_epc_bme_notify() - Notify the EPF device that the EPC device has received
+ *			  the BME event from the Root complex
+ * @epc: the EPC device that received the BME event
+ *
+ * Invoke to Notify the EPF device that the EPC device has received the Bus
+ * Master Enable (BME) event from the Root complex
+ */
+void pci_epc_bme_notify(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->bme)
+			epf->event_ops->bme(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_bme_notify);
+
+/**
+ * pci_epc_destroy() - destroy the EPC device
+ * @epc: the EPC device that has to be destroyed
+ *
+ * Invoke to destroy the PCI EPC device
+ */
+void pci_epc_destroy(struct pci_epc *epc)
+{
+	pci_ep_cfs_remove_epc_group(epc->group);
+	device_unregister(&epc->dev);
+}
+EXPORT_SYMBOL_GPL(pci_epc_destroy);
+
+/**
+ * devm_pci_epc_destroy() - destroy the EPC device
+ * @dev: device that wants to destroy the EPC
+ * @epc: the EPC device that has to be destroyed
+ *
+ * Invoke to destroy the devres associated with this
+ * pci_epc and destroy the EPC device.
+ */
+void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc)
+{
+	int r;
+
+	r = devres_destroy(dev, devm_pci_epc_release, devm_pci_epc_match,
+			   epc);
+	dev_WARN_ONCE(dev, r, "couldn't find PCI EPC resource\n");
+}
+EXPORT_SYMBOL_GPL(devm_pci_epc_destroy);
+
+static void pci_epc_release(struct device *dev)
+{
+	kfree(to_pci_epc(dev));
+}
+
+/**
+ * __pci_epc_create() - create a new endpoint controller (EPC) device
+ * @dev: device that is creating the new EPC
+ * @ops: function pointers for performing EPC operations
+ * @owner: the owner of the module that creates the EPC device
+ *
+ * Invoke to create a new EPC device and add it to pci_epc class.
+ */
+struct pci_epc *
+__pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
+		 struct module *owner)
+{
+	int ret;
+	struct pci_epc *epc;
+
+	if (WARN_ON(!dev)) {
+		ret = -EINVAL;
+		goto err_ret;
+	}
+
+	epc = kzalloc(sizeof(*epc), GFP_KERNEL);
+	if (!epc) {
+		ret = -ENOMEM;
+		goto err_ret;
+	}
+
+	mutex_init(&epc->lock);
+	mutex_init(&epc->list_lock);
+	INIT_LIST_HEAD(&epc->pci_epf);
+
+	device_initialize(&epc->dev);
+	epc->dev.class = pci_epc_class;
+	epc->dev.parent = dev;
+	epc->dev.release = pci_epc_release;
+	epc->ops = ops;
+
+	ret = dev_set_name(&epc->dev, "%s", dev_name(dev));
+	if (ret)
+		goto put_dev;
+
+	ret = device_add(&epc->dev);
+	if (ret)
+		goto put_dev;
+
+	epc->group = pci_ep_cfs_add_epc_group(dev_name(dev));
+
+	return epc;
+
+put_dev:
+	put_device(&epc->dev);
+
+err_ret:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(__pci_epc_create);
+
+/**
+ * __devm_pci_epc_create() - create a new endpoint controller (EPC) device
+ * @dev: device that is creating the new EPC
+ * @ops: function pointers for performing EPC operations
+ * @owner: the owner of the module that creates the EPC device
+ *
+ * Invoke to create a new EPC device and add it to pci_epc class.
+ * While at that, it also associates the device with the pci_epc using devres.
+ * On driver detach, release function is invoked on the devres data,
+ * then, devres data is freed.
+ */
+struct pci_epc *
+__devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
+		      struct module *owner)
+{
+	struct pci_epc **ptr, *epc;
+
+	ptr = devres_alloc(devm_pci_epc_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	epc = __pci_epc_create(dev, ops, owner);
+	if (!IS_ERR(epc)) {
+		*ptr = epc;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return epc;
+}
+EXPORT_SYMBOL_GPL(__devm_pci_epc_create);
+
+static int __init pci_epc_init(void)
+{
+	pci_epc_class = class_create("pci_epc");
+	if (IS_ERR(pci_epc_class)) {
+		pr_err("failed to create pci epc class --> %ld\n",
+		       PTR_ERR(pci_epc_class));
+		return PTR_ERR(pci_epc_class);
+	}
+
+	return 0;
+}
+module_init(pci_epc_init);
+
+static void __exit pci_epc_exit(void)
+{
+	class_destroy(pci_epc_class);
+}
+module_exit(pci_epc_exit);
+
+MODULE_DESCRIPTION("PCI EPC Library");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c
new file mode 100644
index 000000000..a9c028f58
--- /dev/null
+++ b/drivers/pci/endpoint/pci-epc-mem.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Endpoint *Controller* Address Space Management
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/pci-epc.h>
+
+/**
+ * pci_epc_mem_get_order() - determine the allocation order of a memory size
+ * @mem: address space of the endpoint controller
+ * @size: the size for which to get the order
+ *
+ * Reimplement get_order() for mem->page_size since the generic get_order
+ * always gets order with a constant PAGE_SIZE.
+ */
+static int pci_epc_mem_get_order(struct pci_epc_mem *mem, size_t size)
+{
+	int order;
+	unsigned int page_shift = ilog2(mem->window.page_size);
+
+	size--;
+	size >>= page_shift;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
+
+/**
+ * pci_epc_multi_mem_init() - initialize the pci_epc_mem structure
+ * @epc: the EPC device that invoked pci_epc_mem_init
+ * @windows: pointer to windows supported by the device
+ * @num_windows: number of windows device supports
+ *
+ * Invoke to initialize the pci_epc_mem structure used by the
+ * endpoint functions to allocate mapped PCI address.
+ */
+int pci_epc_multi_mem_init(struct pci_epc *epc,
+			   struct pci_epc_mem_window *windows,
+			   unsigned int num_windows)
+{
+	struct pci_epc_mem *mem = NULL;
+	unsigned long *bitmap = NULL;
+	unsigned int page_shift;
+	size_t page_size;
+	int bitmap_size;
+	int pages;
+	int ret;
+	int i;
+
+	epc->num_windows = 0;
+
+	if (!windows || !num_windows)
+		return -EINVAL;
+
+	epc->windows = kcalloc(num_windows, sizeof(*epc->windows), GFP_KERNEL);
+	if (!epc->windows)
+		return -ENOMEM;
+
+	for (i = 0; i < num_windows; i++) {
+		page_size = windows[i].page_size;
+		if (page_size < PAGE_SIZE)
+			page_size = PAGE_SIZE;
+		page_shift = ilog2(page_size);
+		pages = windows[i].size >> page_shift;
+		bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
+
+		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+		if (!mem) {
+			ret = -ENOMEM;
+			i--;
+			goto err_mem;
+		}
+
+		bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+		if (!bitmap) {
+			ret = -ENOMEM;
+			kfree(mem);
+			i--;
+			goto err_mem;
+		}
+
+		mem->window.phys_base = windows[i].phys_base;
+		mem->window.size = windows[i].size;
+		mem->window.page_size = page_size;
+		mem->bitmap = bitmap;
+		mem->pages = pages;
+		mutex_init(&mem->lock);
+		epc->windows[i] = mem;
+	}
+
+	epc->mem = epc->windows[0];
+	epc->num_windows = num_windows;
+
+	return 0;
+
+err_mem:
+	for (; i >= 0; i--) {
+		mem = epc->windows[i];
+		kfree(mem->bitmap);
+		kfree(mem);
+	}
+	kfree(epc->windows);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_multi_mem_init);
+
+/**
+ * pci_epc_mem_init() - Initialize the pci_epc_mem structure
+ * @epc: the EPC device that invoked pci_epc_mem_init
+ * @base: Physical address of the window region
+ * @size: Total Size of the window region
+ * @page_size: Page size of the window region
+ *
+ * Invoke to initialize a single pci_epc_mem structure used by the
+ * endpoint functions to allocate memory for mapping the PCI host memory
+ */
+int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t base,
+		     size_t size, size_t page_size)
+{
+	struct pci_epc_mem_window mem_window;
+
+	mem_window.phys_base = base;
+	mem_window.size = size;
+	mem_window.page_size = page_size;
+
+	return pci_epc_multi_mem_init(epc, &mem_window, 1);
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_init);
+
+/**
+ * pci_epc_mem_exit() - cleanup the pci_epc_mem structure
+ * @epc: the EPC device that invoked pci_epc_mem_exit
+ *
+ * Invoke to cleanup the pci_epc_mem structure allocated in
+ * pci_epc_mem_init().
+ */
+void pci_epc_mem_exit(struct pci_epc *epc)
+{
+	struct pci_epc_mem *mem;
+	int i;
+
+	if (!epc->num_windows)
+		return;
+
+	for (i = 0; i < epc->num_windows; i++) {
+		mem = epc->windows[i];
+		kfree(mem->bitmap);
+		kfree(mem);
+	}
+	kfree(epc->windows);
+
+	epc->windows = NULL;
+	epc->mem = NULL;
+	epc->num_windows = 0;
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_exit);
+
+/**
+ * pci_epc_mem_alloc_addr() - allocate memory address from EPC addr space
+ * @epc: the EPC device on which memory has to be allocated
+ * @phys_addr: populate the allocated physical address here
+ * @size: the size of the address space that has to be allocated
+ *
+ * Invoke to allocate memory address from the EPC address space. This
+ * is usually done to map the remote RC address into the local system.
+ */
+void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
+				     phys_addr_t *phys_addr, size_t size)
+{
+	void __iomem *virt_addr = NULL;
+	struct pci_epc_mem *mem;
+	unsigned int page_shift;
+	size_t align_size;
+	int pageno;
+	int order;
+	int i;
+
+	for (i = 0; i < epc->num_windows; i++) {
+		mem = epc->windows[i];
+		mutex_lock(&mem->lock);
+		align_size = ALIGN(size, mem->window.page_size);
+		order = pci_epc_mem_get_order(mem, align_size);
+
+		pageno = bitmap_find_free_region(mem->bitmap, mem->pages,
+						 order);
+		if (pageno >= 0) {
+			page_shift = ilog2(mem->window.page_size);
+			*phys_addr = mem->window.phys_base +
+				((phys_addr_t)pageno << page_shift);
+			virt_addr = ioremap(*phys_addr, align_size);
+			if (!virt_addr) {
+				bitmap_release_region(mem->bitmap,
+						      pageno, order);
+				mutex_unlock(&mem->lock);
+				continue;
+			}
+			mutex_unlock(&mem->lock);
+			return virt_addr;
+		}
+		mutex_unlock(&mem->lock);
+	}
+
+	return virt_addr;
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_alloc_addr);
+
+static struct pci_epc_mem *pci_epc_get_matching_window(struct pci_epc *epc,
+						       phys_addr_t phys_addr)
+{
+	struct pci_epc_mem *mem;
+	int i;
+
+	for (i = 0; i < epc->num_windows; i++) {
+		mem = epc->windows[i];
+
+		if (phys_addr >= mem->window.phys_base &&
+		    phys_addr < (mem->window.phys_base + mem->window.size))
+			return mem;
+	}
+
+	return NULL;
+}
+
+/**
+ * pci_epc_mem_free_addr() - free the allocated memory address
+ * @epc: the EPC device on which memory was allocated
+ * @phys_addr: the allocated physical address
+ * @virt_addr: virtual address of the allocated mem space
+ * @size: the size of the allocated address space
+ *
+ * Invoke to free the memory allocated using pci_epc_mem_alloc_addr.
+ */
+void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
+			   void __iomem *virt_addr, size_t size)
+{
+	struct pci_epc_mem *mem;
+	unsigned int page_shift;
+	size_t page_size;
+	int pageno;
+	int order;
+
+	mem = pci_epc_get_matching_window(epc, phys_addr);
+	if (!mem) {
+		pr_err("failed to get matching window\n");
+		return;
+	}
+
+	page_size = mem->window.page_size;
+	page_shift = ilog2(page_size);
+	iounmap(virt_addr);
+	pageno = (phys_addr - mem->window.phys_base) >> page_shift;
+	size = ALIGN(size, page_size);
+	order = pci_epc_mem_get_order(mem, size);
+	mutex_lock(&mem->lock);
+	bitmap_release_region(mem->bitmap, pageno, order);
+	mutex_unlock(&mem->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr);
+
+MODULE_DESCRIPTION("PCI EPC Address Space Management");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
new file mode 100644
index 000000000..2c32de667
--- /dev/null
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -0,0 +1,538 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Endpoint *Function* (EPF) library
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/pci-ep-cfs.h>
+
+static DEFINE_MUTEX(pci_epf_mutex);
+
+static struct bus_type pci_epf_bus_type;
+static const struct device_type pci_epf_type;
+
+/**
+ * pci_epf_unbind() - Notify the function driver that the binding between the
+ *		      EPF device and EPC device has been lost
+ * @epf: the EPF device which has lost the binding with the EPC device
+ *
+ * Invoke to notify the function driver that the binding between the EPF device
+ * and EPC device has been lost.
+ */
+void pci_epf_unbind(struct pci_epf *epf)
+{
+	struct pci_epf *epf_vf;
+
+	if (!epf->driver) {
+		dev_WARN(&epf->dev, "epf device not bound to driver\n");
+		return;
+	}
+
+	mutex_lock(&epf->lock);
+	list_for_each_entry(epf_vf, &epf->pci_vepf, list) {
+		if (epf_vf->is_bound)
+			epf_vf->driver->ops->unbind(epf_vf);
+	}
+	if (epf->is_bound)
+		epf->driver->ops->unbind(epf);
+	mutex_unlock(&epf->lock);
+	module_put(epf->driver->owner);
+}
+EXPORT_SYMBOL_GPL(pci_epf_unbind);
+
+/**
+ * pci_epf_bind() - Notify the function driver that the EPF device has been
+ *		    bound to a EPC device
+ * @epf: the EPF device which has been bound to the EPC device
+ *
+ * Invoke to notify the function driver that it has been bound to a EPC device
+ */
+int pci_epf_bind(struct pci_epf *epf)
+{
+	struct device *dev = &epf->dev;
+	struct pci_epf *epf_vf;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
+	int ret;
+
+	if (!epf->driver) {
+		dev_WARN(dev, "epf device not bound to driver\n");
+		return -EINVAL;
+	}
+
+	if (!try_module_get(epf->driver->owner))
+		return -EAGAIN;
+
+	mutex_lock(&epf->lock);
+	list_for_each_entry(epf_vf, &epf->pci_vepf, list) {
+		vfunc_no = epf_vf->vfunc_no;
+
+		if (vfunc_no < 1) {
+			dev_err(dev, "Invalid virtual function number\n");
+			ret = -EINVAL;
+			goto ret;
+		}
+
+		epc = epf->epc;
+		func_no = epf->func_no;
+		if (!IS_ERR_OR_NULL(epc)) {
+			if (!epc->max_vfs) {
+				dev_err(dev, "No support for virt function\n");
+				ret = -EINVAL;
+				goto ret;
+			}
+
+			if (vfunc_no > epc->max_vfs[func_no]) {
+				dev_err(dev, "PF%d: Exceeds max vfunc number\n",
+					func_no);
+				ret = -EINVAL;
+				goto ret;
+			}
+		}
+
+		epc = epf->sec_epc;
+		func_no = epf->sec_epc_func_no;
+		if (!IS_ERR_OR_NULL(epc)) {
+			if (!epc->max_vfs) {
+				dev_err(dev, "No support for virt function\n");
+				ret = -EINVAL;
+				goto ret;
+			}
+
+			if (vfunc_no > epc->max_vfs[func_no]) {
+				dev_err(dev, "PF%d: Exceeds max vfunc number\n",
+					func_no);
+				ret = -EINVAL;
+				goto ret;
+			}
+		}
+
+		epf_vf->func_no = epf->func_no;
+		epf_vf->sec_epc_func_no = epf->sec_epc_func_no;
+		epf_vf->epc = epf->epc;
+		epf_vf->sec_epc = epf->sec_epc;
+		ret = epf_vf->driver->ops->bind(epf_vf);
+		if (ret)
+			goto ret;
+		epf_vf->is_bound = true;
+	}
+
+	ret = epf->driver->ops->bind(epf);
+	if (ret)
+		goto ret;
+	epf->is_bound = true;
+
+	mutex_unlock(&epf->lock);
+	return 0;
+
+ret:
+	mutex_unlock(&epf->lock);
+	pci_epf_unbind(epf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epf_bind);
+
+/**
+ * pci_epf_add_vepf() - associate virtual EP function to physical EP function
+ * @epf_pf: the physical EP function to which the virtual EP function should be
+ *   associated
+ * @epf_vf: the virtual EP function to be added
+ *
+ * A physical endpoint function can be associated with multiple virtual
+ * endpoint functions. Invoke pci_epf_add_epf() to add a virtual PCI endpoint
+ * function to a physical PCI endpoint function.
+ */
+int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf)
+{
+	u32 vfunc_no;
+
+	if (IS_ERR_OR_NULL(epf_pf) || IS_ERR_OR_NULL(epf_vf))
+		return -EINVAL;
+
+	if (epf_pf->epc || epf_vf->epc || epf_vf->epf_pf)
+		return -EBUSY;
+
+	if (epf_pf->sec_epc || epf_vf->sec_epc)
+		return -EBUSY;
+
+	mutex_lock(&epf_pf->lock);
+	vfunc_no = find_first_zero_bit(&epf_pf->vfunction_num_map,
+				       BITS_PER_LONG);
+	if (vfunc_no >= BITS_PER_LONG) {
+		mutex_unlock(&epf_pf->lock);
+		return -EINVAL;
+	}
+
+	set_bit(vfunc_no, &epf_pf->vfunction_num_map);
+	epf_vf->vfunc_no = vfunc_no;
+
+	epf_vf->epf_pf = epf_pf;
+	epf_vf->is_vf = true;
+
+	list_add_tail(&epf_vf->list, &epf_pf->pci_vepf);
+	mutex_unlock(&epf_pf->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_epf_add_vepf);
+
+/**
+ * pci_epf_remove_vepf() - remove virtual EP function from physical EP function
+ * @epf_pf: the physical EP function from which the virtual EP function should
+ *   be removed
+ * @epf_vf: the virtual EP function to be removed
+ *
+ * Invoke to remove a virtual endpoint function from the physical endpoint
+ * function.
+ */
+void pci_epf_remove_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf)
+{
+	if (IS_ERR_OR_NULL(epf_pf) || IS_ERR_OR_NULL(epf_vf))
+		return;
+
+	mutex_lock(&epf_pf->lock);
+	clear_bit(epf_vf->vfunc_no, &epf_pf->vfunction_num_map);
+	list_del(&epf_vf->list);
+	mutex_unlock(&epf_pf->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epf_remove_vepf);
+
+/**
+ * pci_epf_free_space() - free the allocated PCI EPF register space
+ * @epf: the EPF device from whom to free the memory
+ * @addr: the virtual address of the PCI EPF register space
+ * @bar: the BAR number corresponding to the register space
+ * @type: Identifies if the allocated space is for primary EPC or secondary EPC
+ *
+ * Invoke to free the allocated PCI EPF register space.
+ */
+void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
+			enum pci_epc_interface_type type)
+{
+	struct device *dev;
+	struct pci_epf_bar *epf_bar;
+	struct pci_epc *epc;
+
+	if (!addr)
+		return;
+
+	if (type == PRIMARY_INTERFACE) {
+		epc = epf->epc;
+		epf_bar = epf->bar;
+	} else {
+		epc = epf->sec_epc;
+		epf_bar = epf->sec_epc_bar;
+	}
+
+	dev = epc->dev.parent;
+	dma_free_coherent(dev, epf_bar[bar].size, addr,
+			  epf_bar[bar].phys_addr);
+
+	epf_bar[bar].phys_addr = 0;
+	epf_bar[bar].addr = NULL;
+	epf_bar[bar].size = 0;
+	epf_bar[bar].barno = 0;
+	epf_bar[bar].flags = 0;
+}
+EXPORT_SYMBOL_GPL(pci_epf_free_space);
+
+/**
+ * pci_epf_alloc_space() - allocate memory for the PCI EPF register space
+ * @epf: the EPF device to whom allocate the memory
+ * @size: the size of the memory that has to be allocated
+ * @bar: the BAR number corresponding to the allocated register space
+ * @align: alignment size for the allocation region
+ * @type: Identifies if the allocation is for primary EPC or secondary EPC
+ *
+ * Invoke to allocate memory for the PCI EPF register space.
+ */
+void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
+			  size_t align, enum pci_epc_interface_type type)
+{
+	struct pci_epf_bar *epf_bar;
+	dma_addr_t phys_addr;
+	struct pci_epc *epc;
+	struct device *dev;
+	void *space;
+
+	if (size < 128)
+		size = 128;
+
+	if (align)
+		size = ALIGN(size, align);
+	else
+		size = roundup_pow_of_two(size);
+
+	if (type == PRIMARY_INTERFACE) {
+		epc = epf->epc;
+		epf_bar = epf->bar;
+	} else {
+		epc = epf->sec_epc;
+		epf_bar = epf->sec_epc_bar;
+	}
+
+	dev = epc->dev.parent;
+	space = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL);
+	if (!space) {
+		dev_err(dev, "failed to allocate mem space\n");
+		return NULL;
+	}
+
+	epf_bar[bar].phys_addr = phys_addr;
+	epf_bar[bar].addr = space;
+	epf_bar[bar].size = size;
+	epf_bar[bar].barno = bar;
+	epf_bar[bar].flags |= upper_32_bits(size) ?
+				PCI_BASE_ADDRESS_MEM_TYPE_64 :
+				PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+	return space;
+}
+EXPORT_SYMBOL_GPL(pci_epf_alloc_space);
+
+static void pci_epf_remove_cfs(struct pci_epf_driver *driver)
+{
+	struct config_group *group, *tmp;
+
+	if (!IS_ENABLED(CONFIG_PCI_ENDPOINT_CONFIGFS))
+		return;
+
+	mutex_lock(&pci_epf_mutex);
+	list_for_each_entry_safe(group, tmp, &driver->epf_group, group_entry)
+		pci_ep_cfs_remove_epf_group(group);
+	list_del(&driver->epf_group);
+	mutex_unlock(&pci_epf_mutex);
+}
+
+/**
+ * pci_epf_unregister_driver() - unregister the PCI EPF driver
+ * @driver: the PCI EPF driver that has to be unregistered
+ *
+ * Invoke to unregister the PCI EPF driver.
+ */
+void pci_epf_unregister_driver(struct pci_epf_driver *driver)
+{
+	pci_epf_remove_cfs(driver);
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(pci_epf_unregister_driver);
+
+static int pci_epf_add_cfs(struct pci_epf_driver *driver)
+{
+	struct config_group *group;
+	const struct pci_epf_device_id *id;
+
+	if (!IS_ENABLED(CONFIG_PCI_ENDPOINT_CONFIGFS))
+		return 0;
+
+	INIT_LIST_HEAD(&driver->epf_group);
+
+	id = driver->id_table;
+	while (id->name[0]) {
+		group = pci_ep_cfs_add_epf_group(id->name);
+		if (IS_ERR(group)) {
+			pci_epf_remove_cfs(driver);
+			return PTR_ERR(group);
+		}
+
+		mutex_lock(&pci_epf_mutex);
+		list_add_tail(&group->group_entry, &driver->epf_group);
+		mutex_unlock(&pci_epf_mutex);
+		id++;
+	}
+
+	return 0;
+}
+
+/**
+ * __pci_epf_register_driver() - register a new PCI EPF driver
+ * @driver: structure representing PCI EPF driver
+ * @owner: the owner of the module that registers the PCI EPF driver
+ *
+ * Invoke to register a new PCI EPF driver.
+ */
+int __pci_epf_register_driver(struct pci_epf_driver *driver,
+			      struct module *owner)
+{
+	int ret;
+
+	if (!driver->ops)
+		return -EINVAL;
+
+	if (!driver->ops->bind || !driver->ops->unbind)
+		return -EINVAL;
+
+	driver->driver.bus = &pci_epf_bus_type;
+	driver->driver.owner = owner;
+
+	ret = driver_register(&driver->driver);
+	if (ret)
+		return ret;
+
+	pci_epf_add_cfs(driver);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__pci_epf_register_driver);
+
+/**
+ * pci_epf_destroy() - destroy the created PCI EPF device
+ * @epf: the PCI EPF device that has to be destroyed.
+ *
+ * Invoke to destroy the PCI EPF device created by invoking pci_epf_create().
+ */
+void pci_epf_destroy(struct pci_epf *epf)
+{
+	device_unregister(&epf->dev);
+}
+EXPORT_SYMBOL_GPL(pci_epf_destroy);
+
+/**
+ * pci_epf_create() - create a new PCI EPF device
+ * @name: the name of the PCI EPF device. This name will be used to bind the
+ *	  EPF device to a EPF driver
+ *
+ * Invoke to create a new PCI EPF device by providing the name of the function
+ * device.
+ */
+struct pci_epf *pci_epf_create(const char *name)
+{
+	int ret;
+	struct pci_epf *epf;
+	struct device *dev;
+	int len;
+
+	epf = kzalloc(sizeof(*epf), GFP_KERNEL);
+	if (!epf)
+		return ERR_PTR(-ENOMEM);
+
+	len = strchrnul(name, '.') - name;
+	epf->name = kstrndup(name, len, GFP_KERNEL);
+	if (!epf->name) {
+		kfree(epf);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* VFs are numbered starting with 1. So set BIT(0) by default */
+	epf->vfunction_num_map = 1;
+	INIT_LIST_HEAD(&epf->pci_vepf);
+
+	dev = &epf->dev;
+	device_initialize(dev);
+	dev->bus = &pci_epf_bus_type;
+	dev->type = &pci_epf_type;
+	mutex_init(&epf->lock);
+
+	ret = dev_set_name(dev, "%s", name);
+	if (ret) {
+		put_device(dev);
+		return ERR_PTR(ret);
+	}
+
+	ret = device_add(dev);
+	if (ret) {
+		put_device(dev);
+		return ERR_PTR(ret);
+	}
+
+	return epf;
+}
+EXPORT_SYMBOL_GPL(pci_epf_create);
+
+static void pci_epf_dev_release(struct device *dev)
+{
+	struct pci_epf *epf = to_pci_epf(dev);
+
+	kfree(epf->name);
+	kfree(epf);
+}
+
+static const struct device_type pci_epf_type = {
+	.release	= pci_epf_dev_release,
+};
+
+static const struct pci_epf_device_id *
+pci_epf_match_id(const struct pci_epf_device_id *id, const struct pci_epf *epf)
+{
+	while (id->name[0]) {
+		if (strcmp(epf->name, id->name) == 0)
+			return id;
+		id++;
+	}
+
+	return NULL;
+}
+
+static int pci_epf_device_match(struct device *dev, struct device_driver *drv)
+{
+	struct pci_epf *epf = to_pci_epf(dev);
+	struct pci_epf_driver *driver = to_pci_epf_driver(drv);
+
+	if (driver->id_table)
+		return !!pci_epf_match_id(driver->id_table, epf);
+
+	return !strcmp(epf->name, drv->name);
+}
+
+static int pci_epf_device_probe(struct device *dev)
+{
+	struct pci_epf *epf = to_pci_epf(dev);
+	struct pci_epf_driver *driver = to_pci_epf_driver(dev->driver);
+
+	if (!driver->probe)
+		return -ENODEV;
+
+	epf->driver = driver;
+
+	return driver->probe(epf, pci_epf_match_id(driver->id_table, epf));
+}
+
+static void pci_epf_device_remove(struct device *dev)
+{
+	struct pci_epf *epf = to_pci_epf(dev);
+	struct pci_epf_driver *driver = to_pci_epf_driver(dev->driver);
+
+	if (driver->remove)
+		driver->remove(epf);
+	epf->driver = NULL;
+}
+
+static struct bus_type pci_epf_bus_type = {
+	.name		= "pci-epf",
+	.match		= pci_epf_device_match,
+	.probe		= pci_epf_device_probe,
+	.remove		= pci_epf_device_remove,
+};
+
+static int __init pci_epf_init(void)
+{
+	int ret;
+
+	ret = bus_register(&pci_epf_bus_type);
+	if (ret) {
+		pr_err("failed to register pci epf bus --> %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(pci_epf_init);
+
+static void __exit pci_epf_exit(void)
+{
+	bus_unregister(&pci_epf_bus_type);
+}
+module_exit(pci_epf_exit);
+
+MODULE_DESCRIPTION("PCI EPF Library");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
new file mode 100644
index 000000000..afa50b446
--- /dev/null
+++ b/drivers/pci/host-bridge.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Host bridge related code
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+
+#include "pci.h"
+
+static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
+{
+	while (bus->parent)
+		bus = bus->parent;
+
+	return bus;
+}
+
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus)
+{
+	struct pci_bus *root_bus = find_pci_root_bus(bus);
+
+	return to_pci_host_bridge(root_bus->bridge);
+}
+EXPORT_SYMBOL_GPL(pci_find_host_bridge);
+
+struct device *pci_get_host_bridge_device(struct pci_dev *dev)
+{
+	struct pci_bus *root_bus = find_pci_root_bus(dev->bus);
+	struct device *bridge = root_bus->bridge;
+
+	kobject_get(&bridge->kobj);
+	return bridge;
+}
+
+void  pci_put_host_bridge_device(struct device *dev)
+{
+	kobject_put(&dev->kobj);
+}
+
+void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
+				 void (*release_fn)(struct pci_host_bridge *),
+				 void *release_data)
+{
+	bridge->release_fn = release_fn;
+	bridge->release_data = release_data;
+}
+EXPORT_SYMBOL_GPL(pci_set_host_bridge_release);
+
+void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
+			     struct resource *res)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
+	struct resource_entry *window;
+	resource_size_t offset = 0;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		if (resource_contains(window->res, res)) {
+			offset = window->offset;
+			break;
+		}
+	}
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+static bool region_contains(struct pci_bus_region *region1,
+			    struct pci_bus_region *region2)
+{
+	return region1->start <= region2->start && region1->end >= region2->end;
+}
+
+void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
+			     struct pci_bus_region *region)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
+	struct resource_entry *window;
+	resource_size_t offset = 0;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		struct pci_bus_region bus_region;
+
+		if (resource_type(res) != resource_type(window->res))
+			continue;
+
+		bus_region.start = window->res->start - window->offset;
+		bus_region.end = window->res->end - window->offset;
+
+		if (region_contains(&bus_region, region)) {
+			offset = window->offset;
+			break;
+		}
+	}
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
new file mode 100644
index 000000000..48113b210
--- /dev/null
+++ b/drivers/pci/hotplug/Kconfig
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# PCI Hotplug support
+#
+
+menuconfig HOTPLUG_PCI
+	bool "Support for PCI Hotplug"
+	depends on PCI && SYSFS
+	default y if USB4
+	help
+	  Say Y here if you have a motherboard with a PCI Hotplug controller.
+	  This allows you to add and remove PCI cards while the machine is
+	  powered up and running.
+
+	  Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug.
+
+	  When in doubt, say N.
+
+if HOTPLUG_PCI
+
+config HOTPLUG_PCI_COMPAQ
+	tristate "Compaq PCI Hotplug driver"
+	depends on X86 && PCI_BIOS
+	help
+	  Say Y here if you have a motherboard with a Compaq PCI Hotplug
+	  controller.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cpqphp.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_COMPAQ_NVRAM
+	bool "Save configuration into NVRAM on Compaq servers"
+	depends on HOTPLUG_PCI_COMPAQ
+	help
+	  Say Y here if you have a Compaq server that has a PCI Hotplug
+	  controller.  This will allow the PCI Hotplug driver to store the PCI
+	  system configuration options in NVRAM.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_IBM
+	tristate "IBM PCI Hotplug driver"
+	depends on X86_IO_APIC && X86 && PCI_BIOS
+	help
+	  Say Y here if you have a motherboard with a IBM PCI Hotplug
+	  controller.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ibmphp.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_ACPI
+	bool "ACPI PCI Hotplug driver"
+	depends on HOTPLUG_PCI=y && ((!ACPI_DOCK && ACPI) || (ACPI_DOCK))
+	help
+	  Say Y here if you have a system that supports PCI Hotplug using
+	  ACPI.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_ACPI_IBM
+	tristate "ACPI PCI Hotplug driver IBM extensions"
+	depends on HOTPLUG_PCI_ACPI
+	help
+	  Say Y here if you have an IBM system that supports PCI Hotplug using
+	  ACPI.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called acpiphp_ibm.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_CPCI
+	bool "CompactPCI Hotplug driver"
+	help
+	  Say Y here if you have a CompactPCI system card with CompactPCI
+	  hotswap support per the PICMG 2.1 specification.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_CPCI_ZT5550
+	tristate "Ziatech ZT5550 CompactPCI Hotplug driver"
+	depends on HOTPLUG_PCI_CPCI && X86
+	help
+	  Say Y here if you have an Performance Technologies (formerly Intel,
+	  formerly just Ziatech) Ziatech ZT5550 CompactPCI system card.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cpcihp_zt5550.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_CPCI_GENERIC
+	tristate "Generic port I/O CompactPCI Hotplug driver"
+	depends on HOTPLUG_PCI_CPCI && X86
+	help
+	  Say Y here if you have a CompactPCI system card that exposes the #ENUM
+	  hotswap signal as a bit in a system register that can be read through
+	  standard port I/O.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cpcihp_generic.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_SHPC
+	bool "SHPC PCI Hotplug driver"
+	help
+	  Say Y here if you have a motherboard with a SHPC PCI Hotplug
+	  controller.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_POWERNV
+	tristate "PowerPC PowerNV PCI Hotplug driver"
+	depends on PPC_POWERNV && EEH
+	select OF_DYNAMIC
+	help
+	  Say Y here if you run PowerPC PowerNV platform that supports
+	  PCI Hotplug
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pnv-php.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_RPA
+	tristate "RPA PCI Hotplug driver"
+	depends on PPC_PSERIES && EEH
+	help
+	  Say Y here if you have a RPA system that supports PCI Hotplug.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rpaphp.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_RPA_DLPAR
+	tristate "RPA Dynamic Logical Partitioning for I/O slots"
+	depends on HOTPLUG_PCI_RPA
+	help
+	  Say Y here if your system supports Dynamic Logical Partitioning
+	  for I/O slots.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rpadlpar_io.
+
+	  When in doubt, say N.
+
+config HOTPLUG_PCI_S390
+	bool "System z PCI Hotplug Support"
+	depends on S390 && 64BIT
+	help
+	  Say Y here if you want to use the System z PCI Hotplug
+	  driver for PCI devices. Without this driver it is not
+	  possible to access stand-by PCI functions nor to deconfigure
+	  PCI functions.
+
+	  When in doubt, say Y.
+
+endif # HOTPLUG_PCI
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
new file mode 100644
index 000000000..519698322
--- /dev/null
+++ b/drivers/pci/hotplug/Makefile
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux kernel pci hotplug controller drivers.
+#
+
+obj-$(CONFIG_HOTPLUG_PCI)		+= pci_hotplug.o
+obj-$(CONFIG_HOTPLUG_PCI_COMPAQ)	+= cpqphp.o
+obj-$(CONFIG_HOTPLUG_PCI_IBM)		+= ibmphp.o
+
+# native drivers should be linked before acpiphp in order to allow the
+# native driver to attempt to bind first. We can then fall back to
+# generic support.
+
+obj-$(CONFIG_HOTPLUG_PCI_PCIE)		+= pciehp.o
+obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550)	+= cpcihp_zt5550.o
+obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC)	+= cpcihp_generic.o
+obj-$(CONFIG_HOTPLUG_PCI_SHPC)		+= shpchp.o
+obj-$(CONFIG_HOTPLUG_PCI_POWERNV)	+= pnv-php.o
+obj-$(CONFIG_HOTPLUG_PCI_RPA)		+= rpaphp.o
+obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)	+= rpadlpar_io.o
+obj-$(CONFIG_HOTPLUG_PCI_ACPI)		+= acpiphp.o
+obj-$(CONFIG_HOTPLUG_PCI_S390)		+= s390_pci_hpc.o
+
+# acpiphp_ibm extends acpiphp, so should be linked afterwards.
+
+obj-$(CONFIG_HOTPLUG_PCI_ACPI_IBM)	+= acpiphp_ibm.o
+
+pci_hotplug-objs	:=	pci_hotplug_core.o
+
+ifdef CONFIG_HOTPLUG_PCI_CPCI
+pci_hotplug-objs	+=	cpci_hotplug_core.o	\
+				cpci_hotplug_pci.o
+endif
+ifdef CONFIG_ACPI
+pci_hotplug-objs	+=	acpi_pcihp.o
+endif
+
+cpqphp-objs		:=	cpqphp_core.o	\
+				cpqphp_ctrl.o	\
+				cpqphp_sysfs.o	\
+				cpqphp_pci.o
+cpqphp-$(CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM) += cpqphp_nvram.o
+cpqphp-objs += $(cpqphp-y)
+
+ibmphp-objs		:=	ibmphp_core.o	\
+				ibmphp_ebda.o	\
+				ibmphp_pci.o	\
+				ibmphp_res.o	\
+				ibmphp_hpc.o
+
+acpiphp-objs		:=	acpiphp_core.o	\
+				acpiphp_glue.o
+
+pnv-php-objs		:=	pnv_php.o
+
+rpaphp-objs		:=	rpaphp_core.o	\
+				rpaphp_pci.o	\
+				rpaphp_slot.o
+
+rpadlpar_io-objs	:=	rpadlpar_core.o \
+				rpadlpar_sysfs.o
+
+pciehp-objs		:=	pciehp_core.o	\
+				pciehp_ctrl.o	\
+				pciehp_pci.o	\
+				pciehp_hpc.o
+
+shpchp-objs		:=	shpchp_core.o	\
+				shpchp_ctrl.o	\
+				shpchp_pci.o	\
+				shpchp_sysfs.o	\
+				shpchp_hpc.o
diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO
new file mode 100644
index 000000000..fdb8dd6ea
--- /dev/null
+++ b/drivers/pci/hotplug/TODO
@@ -0,0 +1,63 @@
+Contributions are solicited in particular to remedy the following issues:
+
+cpcihp:
+
+* There are no implementations of the ->hardware_test, ->get_power and
+  ->set_power callbacks in struct cpci_hp_controller_ops.  Why were they
+  introduced?  Can they be removed from the struct?
+
+cpqphp:
+
+* The driver spawns a kthread cpqhp_event_thread() which is woken by the
+  hardirq handler cpqhp_ctrl_intr().  Convert this to threaded IRQ handling.
+  The kthread is also woken from the timer pushbutton_helper_thread(),
+  convert it to call irq_wake_thread().  Use pciehp as a template.
+
+* A large portion of cpqphp_ctrl.c and cpqphp_pci.c concerns resource
+  management.  Doesn't this duplicate functionality in the core?
+
+ibmphp:
+
+* Implementations of hotplug_slot_ops callbacks such as get_adapter_present()
+  in ibmphp_core.c create a copy of the struct slot on the stack, then perform
+  the actual operation on that copy.  Determine if this overhead is necessary,
+  delete it if not.  The functions also perform a NULL pointer check on the
+  struct hotplug_slot, this seems superfluous.
+
+* Several functions access the pci_slot member in struct hotplug_slot even
+  though pci_hotplug.h declares it private.  See get_max_bus_speed() for an
+  example.  Either the pci_slot member should no longer be declared private
+  or ibmphp should store a pointer to its bus in struct slot.  Probably the
+  former.
+
+* ibmphp_init_devno() takes a struct slot **, it could instead take a
+  struct slot *.
+
+* The return value of pci_hp_register() is not checked.
+
+* The various slot data structures are difficult to follow and need to be
+  simplified.  A lot of functions are too large and too complex, they need
+  to be broken up into smaller, manageable pieces.  Negative examples are
+  ebda_rsrc_controller() and configure_bridge().
+
+* A large portion of ibmphp_res.c and ibmphp_pci.c concerns resource
+  management.  Doesn't this duplicate functionality in the core?
+
+sgi_hotplug:
+
+* Several functions access the pci_slot member in struct hotplug_slot even
+  though pci_hotplug.h declares it private.  See sn_hp_destroy() for an
+  example.  Either the pci_slot member should no longer be declared private
+  or sgi_hotplug should store a pointer to it in struct slot.  Probably the
+  former.
+
+shpchp:
+
+* There is only a single implementation of struct hpc_ops.  Can the struct be
+  removed and its functions invoked directly?  This has already been done in
+  pciehp with commit 82a9e79ef132 ("PCI: pciehp: remove hpc_ops").  Clarify
+  if there was a specific reason not to apply the same change to shpchp.
+
+* The hardirq handler shpc_isr() queues events on a workqueue.  It can be
+  simplified by converting it to threaded IRQ handling.  Use pciehp as a
+  template.
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
new file mode 100644
index 000000000..4fedebf2f
--- /dev/null
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Common ACPI functions for hot plug platforms
+ *
+ * Copyright (C) 2006 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <kristen.c.accardi@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/slab.h>
+
+#define MY_NAME	"acpi_pcihp"
+
+#define dbg(fmt, arg...) do { if (debug_acpi) printk(KERN_DEBUG "%s: %s: " fmt, MY_NAME, __func__, ## arg); } while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+#define	METHOD_NAME__SUN	"_SUN"
+#define	METHOD_NAME_OSHP	"OSHP"
+
+static bool debug_acpi;
+
+/* acpi_run_oshp - get control of hotplug from the firmware
+ *
+ * @handle - the handle of the hotplug controller.
+ */
+static acpi_status acpi_run_oshp(acpi_handle handle)
+{
+	acpi_status		status;
+	struct acpi_buffer	string = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
+
+	/* run OSHP */
+	status = acpi_evaluate_object(handle, METHOD_NAME_OSHP, NULL, NULL);
+	if (ACPI_FAILURE(status))
+		if (status != AE_NOT_FOUND)
+			printk(KERN_ERR "%s:%s OSHP fails=0x%x\n",
+			       __func__, (char *)string.pointer, status);
+		else
+			dbg("%s:%s OSHP not found\n",
+			    __func__, (char *)string.pointer);
+	else
+		pr_debug("%s:%s OSHP passes\n", __func__,
+			(char *)string.pointer);
+
+	kfree(string.pointer);
+	return status;
+}
+
+/**
+ * acpi_get_hp_hw_control_from_firmware
+ * @pdev: the pci_dev of the bridge that has a hotplug controller
+ *
+ * Attempt to take hotplug control from firmware.
+ */
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
+{
+	const struct pci_host_bridge *host;
+	const struct acpi_pci_root *root;
+	acpi_status status;
+	acpi_handle chandle, handle;
+	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	/*
+	 * If there's no ACPI host bridge (i.e., ACPI support is compiled
+	 * into the kernel but the hardware platform doesn't support ACPI),
+	 * there's nothing to do here.
+	 */
+	host = pci_find_host_bridge(pdev->bus);
+	root = acpi_pci_find_root(ACPI_HANDLE(&host->dev));
+	if (!root)
+		return 0;
+
+	/*
+	 * If _OSC exists, it determines whether we're allowed to manage
+	 * the SHPC.  We executed it while enumerating the host bridge.
+	 */
+	if (root->osc_support_set) {
+		if (host->native_shpc_hotplug)
+			return 0;
+		return -ENODEV;
+	}
+
+	/*
+	 * In the absence of _OSC, we're always allowed to manage the SHPC.
+	 * However, if an OSHP method is present, we must execute it so the
+	 * firmware can transfer control to the OS, e.g., direct interrupts
+	 * to the OS instead of to the firmware.
+	 *
+	 * N.B. The PCI Firmware Spec (r3.2, sec 4.8) does not endorse
+	 * searching up the ACPI hierarchy, so the loops below are suspect.
+	 */
+	handle = ACPI_HANDLE(&pdev->dev);
+	if (!handle) {
+		/*
+		 * This hotplug controller was not listed in the ACPI name
+		 * space at all. Try to get ACPI handle of parent PCI bus.
+		 */
+		struct pci_bus *pbus;
+		for (pbus = pdev->bus; pbus; pbus = pbus->parent) {
+			handle = acpi_pci_get_bridge_handle(pbus);
+			if (handle)
+				break;
+		}
+	}
+
+	while (handle) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
+		pci_info(pdev, "Requesting control of SHPC hotplug via OSHP (%s)\n",
+			 (char *)string.pointer);
+		status = acpi_run_oshp(handle);
+		if (ACPI_SUCCESS(status))
+			goto got_one;
+		if (acpi_is_root_bridge(handle))
+			break;
+		chandle = handle;
+		status = acpi_get_parent(chandle, &handle);
+		if (ACPI_FAILURE(status))
+			break;
+	}
+
+	pci_info(pdev, "Cannot get control of SHPC hotplug\n");
+	kfree(string.pointer);
+	return -ENODEV;
+got_one:
+	pci_info(pdev, "Gained control of SHPC hotplug (%s)\n",
+		 (char *)string.pointer);
+	kfree(string.pointer);
+	return 0;
+}
+EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware);
+
+static int pcihp_is_ejectable(acpi_handle handle)
+{
+	acpi_status status;
+	unsigned long long removable;
+	if (!acpi_has_method(handle, "_ADR"))
+		return 0;
+	if (acpi_has_method(handle, "_EJ0"))
+		return 1;
+	status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable);
+	if (ACPI_SUCCESS(status) && removable)
+		return 1;
+	return 0;
+}
+
+/**
+ * acpi_pci_check_ejectable - check if handle is ejectable ACPI PCI slot
+ * @pbus: the PCI bus of the PCI slot corresponding to 'handle'
+ * @handle: ACPI handle to check
+ *
+ * Return 1 if handle is ejectable PCI slot, 0 otherwise.
+ */
+int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle)
+{
+	acpi_handle bridge_handle, parent_handle;
+
+	bridge_handle = acpi_pci_get_bridge_handle(pbus);
+	if (!bridge_handle)
+		return 0;
+	if ((ACPI_FAILURE(acpi_get_parent(handle, &parent_handle))))
+		return 0;
+	if (bridge_handle != parent_handle)
+		return 0;
+	return pcihp_is_ejectable(handle);
+}
+EXPORT_SYMBOL_GPL(acpi_pci_check_ejectable);
+
+static acpi_status
+check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	int *found = (int *)context;
+	if (pcihp_is_ejectable(handle)) {
+		*found = 1;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;
+}
+
+/**
+ * acpi_pci_detect_ejectable - check if the PCI bus has ejectable slots
+ * @handle: handle of the PCI bus to scan
+ *
+ * Returns 1 if the PCI bus has ACPI based ejectable slots, 0 otherwise.
+ */
+int acpi_pci_detect_ejectable(acpi_handle handle)
+{
+	int found = 0;
+
+	if (!handle)
+		return found;
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+			    check_hotplug, NULL, (void *)&found, NULL);
+	return found;
+}
+EXPORT_SYMBOL_GPL(acpi_pci_detect_ejectable);
+
+module_param(debug_acpi, bool, 0644);
+MODULE_PARM_DESC(debug_acpi, "Debugging mode for ACPI enabled or not");
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
new file mode 100644
index 000000000..5745be601
--- /dev/null
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ACPI PCI Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
+ * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
+ * Copyright (C) 2002,2003 NEC Corporation
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
+ * Copyright (C) 2003-2005 Hewlett Packard
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>,
+ *		    <t-kochi@bq.jp.nec.com>
+ *
+ */
+
+#ifndef _ACPIPHP_H
+#define _ACPIPHP_H
+
+#include <linux/acpi.h>
+#include <linux/mutex.h>
+#include <linux/pci_hotplug.h>
+
+struct acpiphp_context;
+struct acpiphp_bridge;
+struct acpiphp_slot;
+
+/*
+ * struct slot - slot information for each *physical* slot
+ */
+struct slot {
+	struct hotplug_slot	hotplug_slot;
+	struct acpiphp_slot	*acpi_slot;
+	unsigned int sun;	/* ACPI _SUN (Slot User Number) value */
+};
+
+static inline const char *slot_name(struct slot *slot)
+{
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+/*
+ * struct acpiphp_bridge - PCI bridge information
+ *
+ * for each bridge device in ACPI namespace
+ */
+struct acpiphp_bridge {
+	struct list_head list;
+	struct list_head slots;
+	struct kref ref;
+
+	struct acpiphp_context *context;
+
+	int nr_slots;
+
+	/* This bus (host bridge) or Secondary bus (PCI-to-PCI bridge) */
+	struct pci_bus *pci_bus;
+
+	/* PCI-to-PCI bridge device */
+	struct pci_dev *pci_dev;
+
+	bool is_going_away;
+};
+
+
+/*
+ * struct acpiphp_slot - PCI slot information
+ *
+ * PCI slot information for each *physical* PCI slot
+ */
+struct acpiphp_slot {
+	struct list_head node;
+	struct pci_bus *bus;
+	struct list_head funcs;		/* one slot may have different
+					   objects (i.e. for each function) */
+	struct slot *slot;
+
+	u8		device;		/* pci device# */
+	u32		flags;		/* see below */
+};
+
+
+/*
+ * struct acpiphp_func - PCI function information
+ *
+ * PCI function information for each object in ACPI namespace
+ * typically 8 objects per slot (i.e. for each PCI function)
+ */
+struct acpiphp_func {
+	struct acpiphp_bridge *parent;
+	struct acpiphp_slot *slot;
+
+	struct list_head sibling;
+
+	u8		function;	/* pci function# */
+	u32		flags;		/* see below */
+};
+
+struct acpiphp_context {
+	struct acpi_hotplug_context hp;
+	struct acpiphp_func func;
+	struct acpiphp_bridge *bridge;
+	unsigned int refcount;
+};
+
+static inline struct acpiphp_context *to_acpiphp_context(struct acpi_hotplug_context *hp)
+{
+	return container_of(hp, struct acpiphp_context, hp);
+}
+
+static inline struct acpiphp_context *func_to_context(struct acpiphp_func *func)
+{
+	return container_of(func, struct acpiphp_context, func);
+}
+
+static inline struct acpi_device *func_to_acpi_device(struct acpiphp_func *func)
+{
+	return func_to_context(func)->hp.self;
+}
+
+static inline acpi_handle func_to_handle(struct acpiphp_func *func)
+{
+	return func_to_acpi_device(func)->handle;
+}
+
+struct acpiphp_root_context {
+	struct acpi_hotplug_context hp;
+	struct acpiphp_bridge *root_bridge;
+};
+
+static inline struct acpiphp_root_context *to_acpiphp_root_context(struct acpi_hotplug_context *hp)
+{
+	return container_of(hp, struct acpiphp_root_context, hp);
+}
+
+/*
+ * struct acpiphp_attention_info - device specific attention registration
+ *
+ * ACPI has no generic method of setting/getting attention status
+ * this allows for device specific driver registration
+ */
+struct acpiphp_attention_info {
+	int (*set_attn)(struct hotplug_slot *slot, u8 status);
+	int (*get_attn)(struct hotplug_slot *slot, u8 *status);
+	struct module *owner;
+};
+
+/* ACPI _STA method value (ignore bit 4; battery present) */
+#define ACPI_STA_ALL			(0x0000000f)
+
+/* slot flags */
+
+#define SLOT_ENABLED		(0x00000001)
+#define SLOT_IS_GOING_AWAY	(0x00000002)
+
+/* function flags */
+
+#define FUNC_HAS_STA		(0x00000001)
+#define FUNC_HAS_EJ0		(0x00000002)
+
+/* function prototypes */
+
+/* acpiphp_core.c */
+int acpiphp_register_attention(struct acpiphp_attention_info *info);
+int acpiphp_unregister_attention(struct acpiphp_attention_info *info);
+int acpiphp_register_hotplug_slot(struct acpiphp_slot *slot, unsigned int sun);
+void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *slot);
+
+int acpiphp_enable_slot(struct acpiphp_slot *slot);
+int acpiphp_disable_slot(struct acpiphp_slot *slot);
+u8 acpiphp_get_power_status(struct acpiphp_slot *slot);
+u8 acpiphp_get_latch_status(struct acpiphp_slot *slot);
+u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot);
+
+/* variables */
+extern bool acpiphp_disabled;
+
+#endif /* _ACPIPHP_H */
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
new file mode 100644
index 000000000..c02257f4b
--- /dev/null
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI PCI Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
+ * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
+ * Copyright (C) 2002,2003 NEC Corporation
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
+ * Copyright (C) 2003-2005 Hewlett Packard
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <kristen.c.accardi@intel.com>
+ *
+ */
+
+#define pr_fmt(fmt) "acpiphp: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci_hotplug.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include "acpiphp.h"
+
+/* name size which is used for entries in pcihpfs */
+#define SLOT_NAME_SIZE  21              /* {_SUN} */
+
+bool acpiphp_disabled;
+
+/* local variables */
+static struct acpiphp_attention_info *attention_info;
+
+#define DRIVER_VERSION	"0.5"
+#define DRIVER_AUTHOR	"Greg Kroah-Hartman <gregkh@us.ibm.com>, Takayoshi Kochi <t-kochi@bq.jp.nec.com>, Matthew Wilcox <willy@infradead.org>"
+#define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver"
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_PARM_DESC(disable, "disable acpiphp driver");
+module_param_named(disable, acpiphp_disabled, bool, 0444);
+
+static int enable_slot(struct hotplug_slot *slot);
+static int disable_slot(struct hotplug_slot *slot);
+static int set_attention_status(struct hotplug_slot *slot, u8 value);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+static int get_attention_status(struct hotplug_slot *slot, u8 *value);
+static int get_latch_status(struct hotplug_slot *slot, u8 *value);
+static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
+
+static const struct hotplug_slot_ops acpi_hotplug_slot_ops = {
+	.enable_slot		= enable_slot,
+	.disable_slot		= disable_slot,
+	.set_attention_status	= set_attention_status,
+	.get_power_status	= get_power_status,
+	.get_attention_status	= get_attention_status,
+	.get_latch_status	= get_latch_status,
+	.get_adapter_status	= get_adapter_status,
+};
+
+/**
+ * acpiphp_register_attention - set attention LED callback
+ * @info: must be completely filled with LED callbacks
+ *
+ * Description: This is used to register a hardware specific ACPI
+ * driver that manipulates the attention LED.  All the fields in
+ * info must be set.
+ */
+int acpiphp_register_attention(struct acpiphp_attention_info *info)
+{
+	int retval = -EINVAL;
+
+	if (info && info->owner && info->set_attn &&
+			info->get_attn && !attention_info) {
+		retval = 0;
+		attention_info = info;
+	}
+	return retval;
+}
+EXPORT_SYMBOL_GPL(acpiphp_register_attention);
+
+
+/**
+ * acpiphp_unregister_attention - unset attention LED callback
+ * @info: must match the pointer used to register
+ *
+ * Description: This is used to un-register a hardware specific acpi
+ * driver that manipulates the attention LED.  The pointer to the
+ * info struct must be the same as the one used to set it.
+ */
+int acpiphp_unregister_attention(struct acpiphp_attention_info *info)
+{
+	int retval = -EINVAL;
+
+	if (info && attention_info == info) {
+		attention_info = NULL;
+		retval = 0;
+	}
+	return retval;
+}
+EXPORT_SYMBOL_GPL(acpiphp_unregister_attention);
+
+
+/**
+ * enable_slot - power on and enable a slot
+ * @hotplug_slot: slot to enable
+ *
+ * Actual tasks are done in acpiphp_enable_slot()
+ */
+static int enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	/* enable the specified slot */
+	return acpiphp_enable_slot(slot->acpi_slot);
+}
+
+
+/**
+ * disable_slot - disable and power off a slot
+ * @hotplug_slot: slot to disable
+ *
+ * Actual tasks are done in acpiphp_disable_slot()
+ */
+static int disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	/* disable the specified slot */
+	return acpiphp_disable_slot(slot->acpi_slot);
+}
+
+
+/**
+ * set_attention_status - set attention LED
+ * @hotplug_slot: slot to set attention LED on
+ * @status: value to set attention LED to (0 or 1)
+ *
+ * attention status LED, so we use a callback that
+ * was registered with us.  This allows hardware specific
+ * ACPI implementations to blink the light for us.
+ */
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
+	int retval = -ENODEV;
+
+	pr_debug("%s - physical_slot = %s\n", __func__,
+		hotplug_slot_name(hotplug_slot));
+
+	if (attention_info && try_module_get(attention_info->owner)) {
+		retval = attention_info->set_attn(hotplug_slot, status);
+		module_put(attention_info->owner);
+	} else
+		attention_info = NULL;
+	return retval;
+}
+
+
+/**
+ * get_power_status - get power status of a slot
+ * @hotplug_slot: slot to get status
+ * @value: pointer to store status
+ *
+ * Some platforms may not implement _STA method properly.
+ * In that case, the value returned may not be reliable.
+ */
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = acpiphp_get_power_status(slot->acpi_slot);
+
+	return 0;
+}
+
+
+/**
+ * get_attention_status - get attention LED status
+ * @hotplug_slot: slot to get status from
+ * @value: returns with value of attention LED
+ *
+ * ACPI doesn't have known method to determine the state
+ * of the attention status LED, so we use a callback that
+ * was registered with us.  This allows hardware specific
+ * ACPI implementations to determine its state.
+ */
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int retval = -EINVAL;
+
+	pr_debug("%s - physical_slot = %s\n", __func__,
+		hotplug_slot_name(hotplug_slot));
+
+	if (attention_info && try_module_get(attention_info->owner)) {
+		retval = attention_info->get_attn(hotplug_slot, value);
+		module_put(attention_info->owner);
+	} else
+		attention_info = NULL;
+	return retval;
+}
+
+
+/**
+ * get_latch_status - get latch status of a slot
+ * @hotplug_slot: slot to get status
+ * @value: pointer to store status
+ *
+ * ACPI doesn't provide any formal means to access latch status.
+ * Instead, we fake latch status from _STA.
+ */
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = acpiphp_get_latch_status(slot->acpi_slot);
+
+	return 0;
+}
+
+
+/**
+ * get_adapter_status - get adapter status of a slot
+ * @hotplug_slot: slot to get status
+ * @value: pointer to store status
+ *
+ * ACPI doesn't provide any formal means to access adapter status.
+ * Instead, we fake adapter status from _STA.
+ */
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = acpiphp_get_adapter_status(slot->acpi_slot);
+
+	return 0;
+}
+
+/* callback routine to initialize 'struct slot' for each slot */
+int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
+				  unsigned int sun)
+{
+	struct slot *slot;
+	int retval = -ENOMEM;
+	char name[SLOT_NAME_SIZE];
+
+	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+	if (!slot)
+		goto error;
+
+	slot->hotplug_slot.ops = &acpi_hotplug_slot_ops;
+
+	slot->acpi_slot = acpiphp_slot;
+
+	acpiphp_slot->slot = slot;
+	slot->sun = sun;
+	snprintf(name, SLOT_NAME_SIZE, "%u", sun);
+
+	retval = pci_hp_register(&slot->hotplug_slot, acpiphp_slot->bus,
+				 acpiphp_slot->device, name);
+	if (retval == -EBUSY)
+		goto error_slot;
+	if (retval) {
+		pr_err("pci_hp_register failed with error %d\n", retval);
+		goto error_slot;
+	}
+
+	pr_info("Slot [%s] registered\n", slot_name(slot));
+
+	return 0;
+error_slot:
+	kfree(slot);
+error:
+	return retval;
+}
+
+
+void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
+{
+	struct slot *slot = acpiphp_slot->slot;
+
+	pr_info("Slot [%s] unregistered\n", slot_name(slot));
+
+	pci_hp_deregister(&slot->hotplug_slot);
+	kfree(slot);
+}
+
+
+void __init acpiphp_init(void)
+{
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "%s\n",
+		acpiphp_disabled ? ", disabled by user; please report a bug"
+				 : "");
+}
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
new file mode 100644
index 000000000..5b1f271c6
--- /dev/null
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -0,0 +1,1068 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI PCI HotPlug glue functions to ACPI CA subsystem
+ *
+ * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
+ * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
+ * Copyright (C) 2002,2003 NEC Corporation
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
+ * Copyright (C) 2003-2005 Hewlett Packard
+ * Copyright (C) 2005 Rajesh Shah (rajesh.shah@intel.com)
+ * Copyright (C) 2005 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <kristen.c.accardi@intel.com>
+ *
+ */
+
+/*
+ * Lifetime rules for pci_dev:
+ *  - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
+ *    when the bridge is scanned and it loses a refcount when the bridge
+ *    is removed.
+ *  - When a P2P bridge is present, we elevate the refcount on the subordinate
+ *    bus. It loses the refcount when the driver unloads.
+ */
+
+#define pr_fmt(fmt) "acpiphp_glue: " fmt
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/pci-acpi.h>
+#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+
+#include "../pci.h"
+#include "acpiphp.h"
+
+static LIST_HEAD(bridge_list);
+static DEFINE_MUTEX(bridge_mutex);
+
+static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type);
+static void acpiphp_post_dock_fixup(struct acpi_device *adev);
+static void acpiphp_sanitize_bus(struct pci_bus *bus);
+static void hotplug_event(u32 type, struct acpiphp_context *context);
+static void free_bridge(struct kref *kref);
+
+/**
+ * acpiphp_init_context - Create hotplug context and grab a reference to it.
+ * @adev: ACPI device object to create the context for.
+ *
+ * Call under acpi_hp_context_lock.
+ */
+static struct acpiphp_context *acpiphp_init_context(struct acpi_device *adev)
+{
+	struct acpiphp_context *context;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return NULL;
+
+	context->refcount = 1;
+	context->hp.notify = acpiphp_hotplug_notify;
+	context->hp.fixup = acpiphp_post_dock_fixup;
+	acpi_set_hp_context(adev, &context->hp);
+	return context;
+}
+
+/**
+ * acpiphp_get_context - Get hotplug context and grab a reference to it.
+ * @adev: ACPI device object to get the context for.
+ *
+ * Call under acpi_hp_context_lock.
+ */
+static struct acpiphp_context *acpiphp_get_context(struct acpi_device *adev)
+{
+	struct acpiphp_context *context;
+
+	if (!adev->hp)
+		return NULL;
+
+	context = to_acpiphp_context(adev->hp);
+	context->refcount++;
+	return context;
+}
+
+/**
+ * acpiphp_put_context - Drop a reference to ACPI hotplug context.
+ * @context: ACPI hotplug context to drop a reference to.
+ *
+ * The context object is removed if there are no more references to it.
+ *
+ * Call under acpi_hp_context_lock.
+ */
+static void acpiphp_put_context(struct acpiphp_context *context)
+{
+	if (--context->refcount)
+		return;
+
+	WARN_ON(context->bridge);
+	context->hp.self->hp = NULL;
+	kfree(context);
+}
+
+static inline void get_bridge(struct acpiphp_bridge *bridge)
+{
+	kref_get(&bridge->ref);
+}
+
+static inline void put_bridge(struct acpiphp_bridge *bridge)
+{
+	kref_put(&bridge->ref, free_bridge);
+}
+
+static struct acpiphp_context *acpiphp_grab_context(struct acpi_device *adev)
+{
+	struct acpiphp_context *context;
+
+	acpi_lock_hp_context();
+
+	context = acpiphp_get_context(adev);
+	if (!context)
+		goto unlock;
+
+	if (context->func.parent->is_going_away) {
+		acpiphp_put_context(context);
+		context = NULL;
+		goto unlock;
+	}
+
+	get_bridge(context->func.parent);
+	acpiphp_put_context(context);
+
+unlock:
+	acpi_unlock_hp_context();
+	return context;
+}
+
+static void acpiphp_let_context_go(struct acpiphp_context *context)
+{
+	put_bridge(context->func.parent);
+}
+
+static void free_bridge(struct kref *kref)
+{
+	struct acpiphp_context *context;
+	struct acpiphp_bridge *bridge;
+	struct acpiphp_slot *slot, *next;
+	struct acpiphp_func *func, *tmp;
+
+	acpi_lock_hp_context();
+
+	bridge = container_of(kref, struct acpiphp_bridge, ref);
+
+	list_for_each_entry_safe(slot, next, &bridge->slots, node) {
+		list_for_each_entry_safe(func, tmp, &slot->funcs, sibling)
+			acpiphp_put_context(func_to_context(func));
+
+		kfree(slot);
+	}
+
+	context = bridge->context;
+	/* Root bridges will not have hotplug context. */
+	if (context) {
+		/* Release the reference taken by acpiphp_enumerate_slots(). */
+		put_bridge(context->func.parent);
+		context->bridge = NULL;
+		acpiphp_put_context(context);
+	}
+
+	put_device(&bridge->pci_bus->dev);
+	pci_dev_put(bridge->pci_dev);
+	kfree(bridge);
+
+	acpi_unlock_hp_context();
+}
+
+/**
+ * acpiphp_post_dock_fixup - Post-dock fixups for PCI devices.
+ * @adev: ACPI device object corresponding to a PCI device.
+ *
+ * TBD - figure out a way to only call fixups for systems that require them.
+ */
+static void acpiphp_post_dock_fixup(struct acpi_device *adev)
+{
+	struct acpiphp_context *context = acpiphp_grab_context(adev);
+	struct pci_bus *bus;
+	u32 buses;
+
+	if (!context)
+		return;
+
+	bus = context->func.slot->bus;
+	if (!bus->self)
+		goto out;
+
+	/* fixup bad _DCK function that rewrites
+	 * secondary bridge on slot
+	 */
+	pci_read_config_dword(bus->self, PCI_PRIMARY_BUS, &buses);
+
+	if (((buses >> 8) & 0xff) != bus->busn_res.start) {
+		buses = (buses & 0xff000000)
+			| ((unsigned int)(bus->primary)     <<  0)
+			| ((unsigned int)(bus->busn_res.start)   <<  8)
+			| ((unsigned int)(bus->busn_res.end) << 16);
+		pci_write_config_dword(bus->self, PCI_PRIMARY_BUS, buses);
+	}
+
+ out:
+	acpiphp_let_context_go(context);
+}
+
+/**
+ * acpiphp_add_context - Add ACPIPHP context to an ACPI device object.
+ * @handle: ACPI handle of the object to add a context to.
+ * @lvl: Not used.
+ * @data: The object's parent ACPIPHP bridge.
+ * @rv: Not used.
+ */
+static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
+				       void **rv)
+{
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
+	struct acpiphp_bridge *bridge = data;
+	struct acpiphp_context *context;
+	struct acpiphp_slot *slot;
+	struct acpiphp_func *newfunc;
+	acpi_status status = AE_OK;
+	unsigned long long adr;
+	int device, function;
+	struct pci_bus *pbus = bridge->pci_bus;
+	struct pci_dev *pdev = bridge->pci_dev;
+	u32 val;
+
+	if (!adev)
+		return AE_OK;
+
+	status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND)
+			acpi_handle_warn(handle,
+				"can't evaluate _ADR (%#x)\n", status);
+		return AE_OK;
+	}
+
+	device = (adr >> 16) & 0xffff;
+	function = adr & 0xffff;
+
+	acpi_lock_hp_context();
+	context = acpiphp_init_context(adev);
+	if (!context) {
+		acpi_unlock_hp_context();
+		acpi_handle_err(handle, "No hotplug context\n");
+		return AE_NOT_EXIST;
+	}
+	newfunc = &context->func;
+	newfunc->function = function;
+	newfunc->parent = bridge;
+	acpi_unlock_hp_context();
+
+	/*
+	 * If this is a dock device, its _EJ0 should be executed by the dock
+	 * notify handler after calling _DCK.
+	 */
+	if (!is_dock_device(adev) && acpi_has_method(handle, "_EJ0"))
+		newfunc->flags = FUNC_HAS_EJ0;
+
+	if (acpi_has_method(handle, "_STA"))
+		newfunc->flags |= FUNC_HAS_STA;
+
+	/* search for objects that share the same slot */
+	list_for_each_entry(slot, &bridge->slots, node)
+		if (slot->device == device)
+			goto slot_found;
+
+	slot = kzalloc(sizeof(struct acpiphp_slot), GFP_KERNEL);
+	if (!slot) {
+		acpi_lock_hp_context();
+		acpiphp_put_context(context);
+		acpi_unlock_hp_context();
+		return AE_NO_MEMORY;
+	}
+
+	slot->bus = bridge->pci_bus;
+	slot->device = device;
+	INIT_LIST_HEAD(&slot->funcs);
+
+	list_add_tail(&slot->node, &bridge->slots);
+
+	/*
+	 * Expose slots to user space for functions that have _EJ0 or _RMV or
+	 * are located in dock stations.  Do not expose them for devices handled
+	 * by the native PCIe hotplug (PCIeHP) or standard PCI hotplug
+	 * (SHPCHP), because that code is supposed to expose slots to user
+	 * space in those cases.
+	 */
+	if ((acpi_pci_check_ejectable(pbus, handle) || is_dock_device(adev))
+	    && !(pdev && hotplug_is_native(pdev))) {
+		unsigned long long sun;
+		int retval;
+
+		bridge->nr_slots++;
+		status = acpi_evaluate_integer(handle, "_SUN", NULL, &sun);
+		if (ACPI_FAILURE(status))
+			sun = bridge->nr_slots;
+
+		pr_debug("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
+		    sun, pci_domain_nr(pbus), pbus->number, device);
+
+		retval = acpiphp_register_hotplug_slot(slot, sun);
+		if (retval) {
+			slot->slot = NULL;
+			bridge->nr_slots--;
+			if (retval == -EBUSY)
+				pr_warn("Slot %llu already registered by another hotplug driver\n", sun);
+			else
+				pr_warn("acpiphp_register_hotplug_slot failed (err code = 0x%x)\n", retval);
+		}
+		/* Even if the slot registration fails, we can still use it. */
+	}
+
+ slot_found:
+	newfunc->slot = slot;
+	list_add_tail(&newfunc->sibling, &slot->funcs);
+
+	if (pci_bus_read_dev_vendor_id(pbus, PCI_DEVFN(device, function),
+				       &val, 60*1000))
+		slot->flags |= SLOT_ENABLED;
+
+	return AE_OK;
+}
+
+static void cleanup_bridge(struct acpiphp_bridge *bridge)
+{
+	struct acpiphp_slot *slot;
+	struct acpiphp_func *func;
+
+	list_for_each_entry(slot, &bridge->slots, node) {
+		list_for_each_entry(func, &slot->funcs, sibling) {
+			struct acpi_device *adev = func_to_acpi_device(func);
+
+			acpi_lock_hp_context();
+			adev->hp->notify = NULL;
+			adev->hp->fixup = NULL;
+			acpi_unlock_hp_context();
+		}
+		slot->flags |= SLOT_IS_GOING_AWAY;
+		if (slot->slot)
+			acpiphp_unregister_hotplug_slot(slot);
+	}
+
+	mutex_lock(&bridge_mutex);
+	list_del(&bridge->list);
+	mutex_unlock(&bridge_mutex);
+
+	acpi_lock_hp_context();
+	bridge->is_going_away = true;
+	acpi_unlock_hp_context();
+}
+
+/**
+ * acpiphp_max_busnr - return the highest reserved bus number under the given bus.
+ * @bus: bus to start search with
+ */
+static unsigned char acpiphp_max_busnr(struct pci_bus *bus)
+{
+	struct pci_bus *tmp;
+	unsigned char max, n;
+
+	/*
+	 * pci_bus_max_busnr will return the highest
+	 * reserved busnr for all these children.
+	 * that is equivalent to the bus->subordinate
+	 * value.  We don't want to use the parent's
+	 * bus->subordinate value because it could have
+	 * padding in it.
+	 */
+	max = bus->busn_res.start;
+
+	list_for_each_entry(tmp, &bus->children, node) {
+		n = pci_bus_max_busnr(tmp);
+		if (n > max)
+			max = n;
+	}
+	return max;
+}
+
+static void acpiphp_set_acpi_region(struct acpiphp_slot *slot)
+{
+	struct acpiphp_func *func;
+
+	list_for_each_entry(func, &slot->funcs, sibling) {
+		/* _REG is optional, we don't care about if there is failure */
+		acpi_evaluate_reg(func_to_handle(func),
+				  ACPI_ADR_SPACE_PCI_CONFIG,
+				  ACPI_REG_CONNECT);
+	}
+}
+
+static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
+{
+	struct acpiphp_func *func;
+
+	/* quirk, or pcie could set it already */
+	if (dev->is_hotplug_bridge)
+		return;
+
+	/*
+	 * In the PCIe case, only Root Ports and Downstream Ports are capable of
+	 * accommodating hotplug devices, so avoid marking Upstream Ports as
+	 * "hotplug bridges".
+	 */
+	if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)
+		return;
+
+	list_for_each_entry(func, &slot->funcs, sibling) {
+		if (PCI_FUNC(dev->devfn) == func->function) {
+			dev->is_hotplug_bridge = 1;
+			break;
+		}
+	}
+}
+
+static int acpiphp_rescan_slot(struct acpiphp_slot *slot)
+{
+	struct acpiphp_func *func;
+
+	list_for_each_entry(func, &slot->funcs, sibling) {
+		struct acpi_device *adev = func_to_acpi_device(func);
+
+		acpi_bus_scan(adev->handle);
+		if (acpi_device_enumerated(adev))
+			acpi_device_set_power(adev, ACPI_STATE_D0);
+	}
+	return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0));
+}
+
+static void acpiphp_native_scan_bridge(struct pci_dev *bridge)
+{
+	struct pci_bus *bus = bridge->subordinate;
+	struct pci_dev *dev;
+	int max;
+
+	if (!bus)
+		return;
+
+	max = bus->busn_res.start;
+	/* Scan already configured non-hotplug bridges */
+	for_each_pci_bridge(dev, bus) {
+		if (!hotplug_is_native(dev))
+			max = pci_scan_bridge(bus, dev, max, 0);
+	}
+
+	/* Scan non-hotplug bridges that need to be reconfigured */
+	for_each_pci_bridge(dev, bus) {
+		if (hotplug_is_native(dev))
+			continue;
+
+		max = pci_scan_bridge(bus, dev, max, 1);
+		if (dev->subordinate) {
+			pcibios_resource_survey_bus(dev->subordinate);
+			pci_bus_size_bridges(dev->subordinate);
+			pci_bus_assign_resources(dev->subordinate);
+		}
+	}
+}
+
+/**
+ * enable_slot - enable, configure a slot
+ * @slot: slot to be enabled
+ * @bridge: true if enable is for the whole bridge (not a single slot)
+ *
+ * This function should be called per *physical slot*,
+ * not per each slot object in ACPI namespace.
+ */
+static void enable_slot(struct acpiphp_slot *slot, bool bridge)
+{
+	struct pci_dev *dev;
+	struct pci_bus *bus = slot->bus;
+	struct acpiphp_func *func;
+
+	if (bridge && bus->self && hotplug_is_native(bus->self)) {
+		/*
+		 * If native hotplug is used, it will take care of hotplug
+		 * slot management and resource allocation for hotplug
+		 * bridges. However, ACPI hotplug may still be used for
+		 * non-hotplug bridges to bring in additional devices such
+		 * as a Thunderbolt host controller.
+		 */
+		for_each_pci_bridge(dev, bus) {
+			if (PCI_SLOT(dev->devfn) == slot->device)
+				acpiphp_native_scan_bridge(dev);
+		}
+	} else {
+		LIST_HEAD(add_list);
+		int max, pass;
+
+		acpiphp_rescan_slot(slot);
+		max = acpiphp_max_busnr(bus);
+		for (pass = 0; pass < 2; pass++) {
+			for_each_pci_bridge(dev, bus) {
+				if (PCI_SLOT(dev->devfn) != slot->device)
+					continue;
+
+				max = pci_scan_bridge(bus, dev, max, pass);
+				if (pass && dev->subordinate) {
+					check_hotplug_bridge(slot, dev);
+					pcibios_resource_survey_bus(dev->subordinate);
+					__pci_bus_size_bridges(dev->subordinate,
+							       &add_list);
+				}
+			}
+		}
+		__pci_bus_assign_resources(bus, &add_list, NULL);
+	}
+
+	acpiphp_sanitize_bus(bus);
+	pcie_bus_configure_settings(bus);
+	acpiphp_set_acpi_region(slot);
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Assume that newly added devices are powered on already. */
+		if (!pci_dev_is_added(dev))
+			dev->current_state = PCI_D0;
+	}
+
+	pci_bus_add_devices(bus);
+
+	slot->flags |= SLOT_ENABLED;
+	list_for_each_entry(func, &slot->funcs, sibling) {
+		dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
+						  func->function));
+		if (!dev) {
+			/* Do not set SLOT_ENABLED flag if some funcs
+			   are not added. */
+			slot->flags &= ~SLOT_ENABLED;
+			continue;
+		}
+		pci_dev_put(dev);
+	}
+}
+
+/**
+ * disable_slot - disable a slot
+ * @slot: ACPI PHP slot
+ */
+static void disable_slot(struct acpiphp_slot *slot)
+{
+	struct pci_bus *bus = slot->bus;
+	struct pci_dev *dev, *prev;
+	struct acpiphp_func *func;
+
+	/*
+	 * enable_slot() enumerates all functions in this device via
+	 * pci_scan_slot(), whether they have associated ACPI hotplug
+	 * methods (_EJ0, etc.) or not.  Therefore, we remove all functions
+	 * here.
+	 */
+	list_for_each_entry_safe_reverse(dev, prev, &bus->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot->device)
+			pci_stop_and_remove_bus_device(dev);
+
+	list_for_each_entry(func, &slot->funcs, sibling)
+		acpi_bus_trim(func_to_acpi_device(func));
+
+	slot->flags &= ~SLOT_ENABLED;
+}
+
+static bool slot_no_hotplug(struct acpiphp_slot *slot)
+{
+	struct pci_bus *bus = slot->bus;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (PCI_SLOT(dev->devfn) == slot->device && dev->ignore_hotplug)
+			return true;
+	}
+	return false;
+}
+
+/**
+ * get_slot_status - get ACPI slot status
+ * @slot: ACPI PHP slot
+ *
+ * If a slot has _STA for each function and if any one of them
+ * returned non-zero status, return it.
+ *
+ * If a slot doesn't have _STA and if any one of its functions'
+ * configuration space is configured, return 0x0f as a _STA.
+ *
+ * Otherwise return 0.
+ */
+static unsigned int get_slot_status(struct acpiphp_slot *slot)
+{
+	unsigned long long sta = 0;
+	struct acpiphp_func *func;
+	u32 dvid;
+
+	list_for_each_entry(func, &slot->funcs, sibling) {
+		if (func->flags & FUNC_HAS_STA) {
+			acpi_status status;
+
+			status = acpi_evaluate_integer(func_to_handle(func),
+						       "_STA", NULL, &sta);
+			if (ACPI_SUCCESS(status) && sta)
+				break;
+		} else {
+			if (pci_bus_read_dev_vendor_id(slot->bus,
+					PCI_DEVFN(slot->device, func->function),
+					&dvid, 0)) {
+				sta = ACPI_STA_ALL;
+				break;
+			}
+		}
+	}
+
+	if (!sta) {
+		/*
+		 * Check for the slot itself since it may be that the
+		 * ACPI slot is a device below PCIe upstream port so in
+		 * that case it may not even be reachable yet.
+		 */
+		if (pci_bus_read_dev_vendor_id(slot->bus,
+				PCI_DEVFN(slot->device, 0), &dvid, 0)) {
+			sta = ACPI_STA_ALL;
+		}
+	}
+
+	return (unsigned int)sta;
+}
+
+static inline bool device_status_valid(unsigned int sta)
+{
+	/*
+	 * ACPI spec says that _STA may return bit 0 clear with bit 3 set
+	 * if the device is valid but does not require a device driver to be
+	 * loaded (Section 6.3.7 of ACPI 5.0A).
+	 */
+	unsigned int mask = ACPI_STA_DEVICE_ENABLED | ACPI_STA_DEVICE_FUNCTIONING;
+	return (sta & mask) == mask;
+}
+
+/**
+ * trim_stale_devices - remove PCI devices that are not responding.
+ * @dev: PCI device to start walking the hierarchy from.
+ */
+static void trim_stale_devices(struct pci_dev *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+	struct pci_bus *bus = dev->subordinate;
+	bool alive = dev->ignore_hotplug;
+
+	if (adev) {
+		acpi_status status;
+		unsigned long long sta;
+
+		status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
+		alive = alive || (ACPI_SUCCESS(status) && device_status_valid(sta));
+	}
+	if (!alive)
+		alive = pci_device_is_present(dev);
+
+	if (!alive) {
+		pci_dev_set_disconnected(dev, NULL);
+		if (pci_has_subordinate(dev))
+			pci_walk_bus(dev->subordinate, pci_dev_set_disconnected,
+				     NULL);
+
+		pci_stop_and_remove_bus_device(dev);
+		if (adev)
+			acpi_bus_trim(adev);
+	} else if (bus) {
+		struct pci_dev *child, *tmp;
+
+		/* The device is a bridge. so check the bus below it. */
+		pm_runtime_get_sync(&dev->dev);
+		list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list)
+			trim_stale_devices(child);
+
+		pm_runtime_put(&dev->dev);
+	}
+}
+
+/**
+ * acpiphp_check_bridge - re-enumerate devices
+ * @bridge: where to begin re-enumeration
+ *
+ * Iterate over all slots under this bridge and make sure that if a
+ * card is present they are enabled, and if not they are disabled.
+ */
+static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
+{
+	struct acpiphp_slot *slot;
+
+	/* Bail out if the bridge is going away. */
+	if (bridge->is_going_away)
+		return;
+
+	if (bridge->pci_dev)
+		pm_runtime_get_sync(&bridge->pci_dev->dev);
+
+	list_for_each_entry(slot, &bridge->slots, node) {
+		struct pci_bus *bus = slot->bus;
+		struct pci_dev *dev, *tmp;
+
+		if (slot_no_hotplug(slot)) {
+			; /* do nothing */
+		} else if (device_status_valid(get_slot_status(slot))) {
+			/* remove stale devices if any */
+			list_for_each_entry_safe_reverse(dev, tmp,
+							 &bus->devices, bus_list)
+				if (PCI_SLOT(dev->devfn) == slot->device)
+					trim_stale_devices(dev);
+
+			/* configure all functions */
+			enable_slot(slot, true);
+		} else {
+			disable_slot(slot);
+		}
+	}
+
+	if (bridge->pci_dev)
+		pm_runtime_put(&bridge->pci_dev->dev);
+}
+
+/*
+ * Remove devices for which we could not assign resources, call
+ * arch specific code to fix-up the bus
+ */
+static void acpiphp_sanitize_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev, *tmp;
+	int i;
+	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
+
+	list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
+		for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+			struct resource *res = &dev->resource[i];
+			if ((res->flags & type_mask) && !res->start &&
+					res->end) {
+				/* Could not assign a required resources
+				 * for this device, remove it */
+				pci_stop_and_remove_bus_device(dev);
+				break;
+			}
+		}
+	}
+}
+
+/*
+ * ACPI event handlers
+ */
+
+void acpiphp_check_host_bridge(struct acpi_device *adev)
+{
+	struct acpiphp_bridge *bridge = NULL;
+
+	acpi_lock_hp_context();
+	if (adev->hp) {
+		bridge = to_acpiphp_root_context(adev->hp)->root_bridge;
+		if (bridge)
+			get_bridge(bridge);
+	}
+	acpi_unlock_hp_context();
+	if (bridge) {
+		pci_lock_rescan_remove();
+
+		acpiphp_check_bridge(bridge);
+
+		pci_unlock_rescan_remove();
+		put_bridge(bridge);
+	}
+}
+
+static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot);
+
+static void hotplug_event(u32 type, struct acpiphp_context *context)
+{
+	acpi_handle handle = context->hp.self->handle;
+	struct acpiphp_func *func = &context->func;
+	struct acpiphp_slot *slot = func->slot;
+	struct acpiphp_bridge *bridge;
+
+	acpi_lock_hp_context();
+	bridge = context->bridge;
+	if (bridge)
+		get_bridge(bridge);
+
+	acpi_unlock_hp_context();
+
+	pci_lock_rescan_remove();
+
+	switch (type) {
+	case ACPI_NOTIFY_BUS_CHECK:
+		/* bus re-enumerate */
+		acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
+		if (bridge)
+			acpiphp_check_bridge(bridge);
+		else if (!(slot->flags & SLOT_IS_GOING_AWAY))
+			enable_slot(slot, false);
+
+		break;
+
+	case ACPI_NOTIFY_DEVICE_CHECK:
+		/* device check */
+		acpi_handle_debug(handle, "Device check in %s()\n", __func__);
+		if (bridge) {
+			acpiphp_check_bridge(bridge);
+		} else if (!(slot->flags & SLOT_IS_GOING_AWAY)) {
+			/*
+			 * Check if anything has changed in the slot and rescan
+			 * from the parent if that's the case.
+			 */
+			if (acpiphp_rescan_slot(slot))
+				acpiphp_check_bridge(func->parent);
+		}
+		break;
+
+	case ACPI_NOTIFY_EJECT_REQUEST:
+		/* request device eject */
+		acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
+		acpiphp_disable_and_eject_slot(slot);
+		break;
+	}
+
+	pci_unlock_rescan_remove();
+	if (bridge)
+		put_bridge(bridge);
+}
+
+static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
+{
+	struct acpiphp_context *context;
+
+	context = acpiphp_grab_context(adev);
+	if (!context)
+		return -ENODATA;
+
+	hotplug_event(type, context);
+	acpiphp_let_context_go(context);
+	return 0;
+}
+
+/**
+ * acpiphp_enumerate_slots - Enumerate PCI slots for a given bus.
+ * @bus: PCI bus to enumerate the slots for.
+ *
+ * A "slot" is an object associated with a PCI device number.  All functions
+ * (PCI devices) with the same bus and device number belong to the same slot.
+ */
+void acpiphp_enumerate_slots(struct pci_bus *bus)
+{
+	struct acpiphp_bridge *bridge;
+	struct acpi_device *adev;
+	acpi_handle handle;
+	acpi_status status;
+
+	if (acpiphp_disabled)
+		return;
+
+	adev = ACPI_COMPANION(bus->bridge);
+	if (!adev)
+		return;
+
+	handle = adev->handle;
+	bridge = kzalloc(sizeof(struct acpiphp_bridge), GFP_KERNEL);
+	if (!bridge)
+		return;
+
+	INIT_LIST_HEAD(&bridge->slots);
+	kref_init(&bridge->ref);
+	bridge->pci_dev = pci_dev_get(bus->self);
+	bridge->pci_bus = bus;
+
+	/*
+	 * Grab a ref to the subordinate PCI bus in case the bus is
+	 * removed via PCI core logical hotplug. The ref pins the bus
+	 * (which we access during module unload).
+	 */
+	get_device(&bus->dev);
+
+	acpi_lock_hp_context();
+	if (pci_is_root_bus(bridge->pci_bus)) {
+		struct acpiphp_root_context *root_context;
+
+		root_context = kzalloc(sizeof(*root_context), GFP_KERNEL);
+		if (!root_context)
+			goto err;
+
+		root_context->root_bridge = bridge;
+		acpi_set_hp_context(adev, &root_context->hp);
+	} else {
+		struct acpiphp_context *context;
+
+		/*
+		 * This bridge should have been registered as a hotplug function
+		 * under its parent, so the context should be there, unless the
+		 * parent is going to be handled by pciehp, in which case this
+		 * bridge is not interesting to us either.
+		 */
+		context = acpiphp_get_context(adev);
+		if (!context)
+			goto err;
+
+		bridge->context = context;
+		context->bridge = bridge;
+		/* Get a reference to the parent bridge. */
+		get_bridge(context->func.parent);
+	}
+	acpi_unlock_hp_context();
+
+	/* Must be added to the list prior to calling acpiphp_add_context(). */
+	mutex_lock(&bridge_mutex);
+	list_add(&bridge->list, &bridge_list);
+	mutex_unlock(&bridge_mutex);
+
+	/* register all slot objects under this bridge */
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpiphp_add_context, NULL, bridge, NULL);
+	if (ACPI_FAILURE(status)) {
+		acpi_handle_err(handle, "failed to register slots\n");
+		cleanup_bridge(bridge);
+		put_bridge(bridge);
+	}
+	return;
+
+ err:
+	acpi_unlock_hp_context();
+	put_device(&bus->dev);
+	pci_dev_put(bridge->pci_dev);
+	kfree(bridge);
+}
+
+static void acpiphp_drop_bridge(struct acpiphp_bridge *bridge)
+{
+	if (pci_is_root_bus(bridge->pci_bus)) {
+		struct acpiphp_root_context *root_context;
+		struct acpi_device *adev;
+
+		acpi_lock_hp_context();
+		adev = ACPI_COMPANION(bridge->pci_bus->bridge);
+		root_context = to_acpiphp_root_context(adev->hp);
+		adev->hp = NULL;
+		acpi_unlock_hp_context();
+		kfree(root_context);
+	}
+	cleanup_bridge(bridge);
+	put_bridge(bridge);
+}
+
+/**
+ * acpiphp_remove_slots - Remove slot objects associated with a given bus.
+ * @bus: PCI bus to remove the slot objects for.
+ */
+void acpiphp_remove_slots(struct pci_bus *bus)
+{
+	struct acpiphp_bridge *bridge;
+
+	if (acpiphp_disabled)
+		return;
+
+	mutex_lock(&bridge_mutex);
+	list_for_each_entry(bridge, &bridge_list, list)
+		if (bridge->pci_bus == bus) {
+			mutex_unlock(&bridge_mutex);
+			acpiphp_drop_bridge(bridge);
+			return;
+		}
+
+	mutex_unlock(&bridge_mutex);
+}
+
+/**
+ * acpiphp_enable_slot - power on slot
+ * @slot: ACPI PHP slot
+ */
+int acpiphp_enable_slot(struct acpiphp_slot *slot)
+{
+	pci_lock_rescan_remove();
+
+	if (slot->flags & SLOT_IS_GOING_AWAY) {
+		pci_unlock_rescan_remove();
+		return -ENODEV;
+	}
+
+	/* configure all functions */
+	if (!(slot->flags & SLOT_ENABLED))
+		enable_slot(slot, false);
+
+	pci_unlock_rescan_remove();
+	return 0;
+}
+
+/**
+ * acpiphp_disable_and_eject_slot - power off and eject slot
+ * @slot: ACPI PHP slot
+ */
+static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot)
+{
+	struct acpiphp_func *func;
+
+	if (slot->flags & SLOT_IS_GOING_AWAY)
+		return -ENODEV;
+
+	/* unconfigure all functions */
+	disable_slot(slot);
+
+	list_for_each_entry(func, &slot->funcs, sibling)
+		if (func->flags & FUNC_HAS_EJ0) {
+			acpi_handle handle = func_to_handle(func);
+
+			if (ACPI_FAILURE(acpi_evaluate_ej0(handle)))
+				acpi_handle_err(handle, "_EJ0 failed\n");
+
+			break;
+		}
+
+	return 0;
+}
+
+int acpiphp_disable_slot(struct acpiphp_slot *slot)
+{
+	int ret;
+
+	/*
+	 * Acquire acpi_scan_lock to ensure that the execution of _EJ0 in
+	 * acpiphp_disable_and_eject_slot() will be synchronized properly.
+	 */
+	acpi_scan_lock_acquire();
+	pci_lock_rescan_remove();
+	ret = acpiphp_disable_and_eject_slot(slot);
+	pci_unlock_rescan_remove();
+	acpi_scan_lock_release();
+	return ret;
+}
+
+/*
+ * slot enabled:  1
+ * slot disabled: 0
+ */
+u8 acpiphp_get_power_status(struct acpiphp_slot *slot)
+{
+	return (slot->flags & SLOT_ENABLED);
+}
+
+/*
+ * latch   open:  1
+ * latch closed:  0
+ */
+u8 acpiphp_get_latch_status(struct acpiphp_slot *slot)
+{
+	return !(get_slot_status(slot) & ACPI_STA_DEVICE_UI);
+}
+
+/*
+ * adapter presence : 1
+ *          absence : 0
+ */
+u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot)
+{
+	return !!get_slot_status(slot);
+}
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
new file mode 100644
index 000000000..8f3a0a33f
--- /dev/null
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI PCI Hot Plug IBM Extension
+ *
+ * Copyright (C) 2004 Vernon Mauery <vernux@us.ibm.com>
+ * Copyright (C) 2004 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <vernux@us.ibm.com>
+ *
+ */
+
+#define pr_fmt(fmt) "acpiphp_ibm: " fmt
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include "acpiphp.h"
+#include "../pci.h"
+
+#define DRIVER_VERSION	"1.0.1"
+#define DRIVER_AUTHOR	"Irene Zubarev <zubarev@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
+#define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver IBM extension"
+
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+#define FOUND_APCI 0x61504349
+/* these are the names for the IBM ACPI pseudo-device */
+#define IBM_HARDWARE_ID1 "IBM37D0"
+#define IBM_HARDWARE_ID2 "IBM37D4"
+
+#define hpslot_to_sun(A) (to_slot(A)->sun)
+
+/* union apci_descriptor - allows access to the
+ * various device descriptors that are embedded in the
+ * aPCI table
+ */
+union apci_descriptor {
+	struct {
+		char sig[4];
+		u8   len;
+	} header;
+	struct {
+		u8  type;
+		u8  len;
+		u16 slot_id;
+		u8  bus_id;
+		u8  dev_num;
+		u8  slot_num;
+		u8  slot_attr[2];
+		u8  attn;
+		u8  status[2];
+		u8  sun;
+		u8  res[3];
+	} slot;
+	struct {
+		u8 type;
+		u8 len;
+	} generic;
+};
+
+/* struct notification - keeps info about the device
+ * that cause the ACPI notification event
+ */
+struct notification {
+	struct acpi_device *device;
+	u8                  event;
+};
+
+static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status);
+static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status);
+static void ibm_handle_events(acpi_handle handle, u32 event, void *context);
+static int ibm_get_table_from_acpi(char **bufp);
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *bin_attr,
+				   char *buffer, loff_t pos, size_t size);
+static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
+		u32 lvl, void *context, void **rv);
+static int __init ibm_acpiphp_init(void);
+static void __exit ibm_acpiphp_exit(void);
+
+static acpi_handle ibm_acpi_handle;
+static struct notification ibm_note;
+static struct bin_attribute ibm_apci_table_attr __ro_after_init = {
+	    .attr = {
+		    .name = "apci_table",
+		    .mode = S_IRUGO,
+	    },
+	    .read = ibm_read_apci_table,
+	    .write = NULL,
+};
+static struct acpiphp_attention_info ibm_attention_info =
+{
+	.set_attn = ibm_set_attention_status,
+	.get_attn = ibm_get_attention_status,
+	.owner = THIS_MODULE,
+};
+
+/**
+ * ibm_slot_from_id - workaround for bad ibm hardware
+ * @id: the slot number that linux refers to the slot by
+ *
+ * Description: This method returns the aCPI slot descriptor
+ * corresponding to the Linux slot number.  This descriptor
+ * has info about the aPCI slot id and attention status.
+ * This descriptor must be freed using kfree when done.
+ */
+static union apci_descriptor *ibm_slot_from_id(int id)
+{
+	int ind = 0, size;
+	union apci_descriptor *ret = NULL, *des;
+	char *table;
+
+	size = ibm_get_table_from_acpi(&table);
+	if (size < 0)
+		return NULL;
+	des = (union apci_descriptor *)table;
+	if (memcmp(des->header.sig, "aPCI", 4) != 0)
+		goto ibm_slot_done;
+
+	des = (union apci_descriptor *)&table[ind += des->header.len];
+	while (ind < size && (des->generic.type != 0x82 ||
+			des->slot.slot_num != id)) {
+		des = (union apci_descriptor *)&table[ind += des->generic.len];
+	}
+
+	if (ind < size && des->slot.slot_num == id)
+		ret = des;
+
+ibm_slot_done:
+	if (ret) {
+		ret = kmalloc(sizeof(union apci_descriptor), GFP_KERNEL);
+		if (ret)
+			memcpy(ret, des, sizeof(union apci_descriptor));
+	}
+	kfree(table);
+	return ret;
+}
+
+/**
+ * ibm_set_attention_status - callback method to set the attention LED
+ * @slot: the hotplug_slot to work with
+ * @status: what to set the LED to (0 or 1)
+ *
+ * Description: This method is registered with the acpiphp module as a
+ * callback to do the device specific task of setting the LED status.
+ */
+static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status)
+{
+	union acpi_object args[2];
+	struct acpi_object_list params = { .pointer = args, .count = 2 };
+	acpi_status stat;
+	unsigned long long rc;
+	union apci_descriptor *ibm_slot;
+	int id = hpslot_to_sun(slot);
+
+	ibm_slot = ibm_slot_from_id(id);
+	if (!ibm_slot) {
+		pr_err("APLS null ACPI descriptor for slot %d\n", id);
+		return -ENODEV;
+	}
+
+	pr_debug("%s: set slot %d (%d) attention status to %d\n", __func__,
+			ibm_slot->slot.slot_num, ibm_slot->slot.slot_id,
+			(status ? 1 : 0));
+
+	args[0].type = ACPI_TYPE_INTEGER;
+	args[0].integer.value = ibm_slot->slot.slot_id;
+	args[1].type = ACPI_TYPE_INTEGER;
+	args[1].integer.value = (status) ? 1 : 0;
+
+	kfree(ibm_slot);
+
+	stat = acpi_evaluate_integer(ibm_acpi_handle, "APLS", &params, &rc);
+	if (ACPI_FAILURE(stat)) {
+		pr_err("APLS evaluation failed:  0x%08x\n", stat);
+		return -ENODEV;
+	} else if (!rc) {
+		pr_err("APLS method failed:  0x%08llx\n", rc);
+		return -ERANGE;
+	}
+	return 0;
+}
+
+/**
+ * ibm_get_attention_status - callback method to get attention LED status
+ * @slot: the hotplug_slot to work with
+ * @status: returns what the LED is set to (0 or 1)
+ *
+ * Description: This method is registered with the acpiphp module as a
+ * callback to do the device specific task of getting the LED status.
+ *
+ * Because there is no direct method of getting the LED status directly
+ * from an ACPI call, we read the aPCI table and parse out our
+ * slot descriptor to read the status from that.
+ */
+static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status)
+{
+	union apci_descriptor *ibm_slot;
+	int id = hpslot_to_sun(slot);
+
+	ibm_slot = ibm_slot_from_id(id);
+	if (!ibm_slot) {
+		pr_err("APLS null ACPI descriptor for slot %d\n", id);
+		return -ENODEV;
+	}
+
+	if (ibm_slot->slot.attn & 0xa0 || ibm_slot->slot.status[1] & 0x08)
+		*status = 1;
+	else
+		*status = 0;
+
+	pr_debug("%s: get slot %d (%d) attention status is %d\n", __func__,
+			ibm_slot->slot.slot_num, ibm_slot->slot.slot_id,
+			*status);
+
+	kfree(ibm_slot);
+	return 0;
+}
+
+/**
+ * ibm_handle_events - listens for ACPI events for the IBM37D0 device
+ * @handle: an ACPI handle to the device that caused the event
+ * @event: the event info (device specific)
+ * @context: passed context (our notification struct)
+ *
+ * Description: This method is registered as a callback with the ACPI
+ * subsystem it is called when this device has an event to notify the OS of.
+ *
+ * The events actually come from the device as two events that get
+ * synthesized into one event with data by this function.  The event
+ * ID comes first and then the slot number that caused it.  We report
+ * this as one event to the OS.
+ *
+ * From section 5.6.2.2 of the ACPI 2.0 spec, I understand that the OSPM will
+ * only re-enable the interrupt that causes this event AFTER this method
+ * has returned, thereby enforcing serial access for the notification struct.
+ */
+static void ibm_handle_events(acpi_handle handle, u32 event, void *context)
+{
+	u8 detail = event & 0x0f;
+	u8 subevent = event & 0xf0;
+	struct notification *note = context;
+
+	pr_debug("%s: Received notification %02x\n", __func__, event);
+
+	if (subevent == 0x80) {
+		pr_debug("%s: generating bus event\n", __func__);
+		acpi_bus_generate_netlink_event(note->device->pnp.device_class,
+						  dev_name(&note->device->dev),
+						  note->event, detail);
+	} else
+		note->event = event;
+}
+
+/**
+ * ibm_get_table_from_acpi - reads the APLS buffer from ACPI
+ * @bufp: address to pointer to allocate for the table
+ *
+ * Description: This method reads the APLS buffer in from ACPI and
+ * stores the "stripped" table into a single buffer
+ * it allocates and passes the address back in bufp.
+ *
+ * If NULL is passed in as buffer, this method only calculates
+ * the size of the table and returns that without filling
+ * in the buffer.
+ *
+ * Returns < 0 on error or the size of the table on success.
+ */
+static int ibm_get_table_from_acpi(char **bufp)
+{
+	union acpi_object *package;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	char *lbuf = NULL;
+	int i, size = -EIO;
+
+	status = acpi_evaluate_object(ibm_acpi_handle, "APCI", NULL, &buffer);
+	if (ACPI_FAILURE(status)) {
+		pr_err("%s:  APCI evaluation failed\n", __func__);
+		return -ENODEV;
+	}
+
+	package = (union acpi_object *) buffer.pointer;
+	if (!(package) ||
+			(package->type != ACPI_TYPE_PACKAGE) ||
+			!(package->package.elements)) {
+		pr_err("%s:  Invalid APCI object\n", __func__);
+		goto read_table_done;
+	}
+
+	for (size = 0, i = 0; i < package->package.count; i++) {
+		if (package->package.elements[i].type != ACPI_TYPE_BUFFER) {
+			pr_err("%s:  Invalid APCI element %d\n", __func__, i);
+			goto read_table_done;
+		}
+		size += package->package.elements[i].buffer.length;
+	}
+
+	if (bufp == NULL)
+		goto read_table_done;
+
+	lbuf = kzalloc(size, GFP_KERNEL);
+	pr_debug("%s: element count: %i, ASL table size: %i, &table = 0x%p\n",
+			__func__, package->package.count, size, lbuf);
+
+	if (lbuf) {
+		*bufp = lbuf;
+	} else {
+		size = -ENOMEM;
+		goto read_table_done;
+	}
+
+	size = 0;
+	for (i = 0; i < package->package.count; i++) {
+		memcpy(&lbuf[size],
+				package->package.elements[i].buffer.pointer,
+				package->package.elements[i].buffer.length);
+		size += package->package.elements[i].buffer.length;
+	}
+
+read_table_done:
+	kfree(buffer.pointer);
+	return size;
+}
+
+/**
+ * ibm_read_apci_table - callback for the sysfs apci_table file
+ * @filp: the open sysfs file
+ * @kobj: the kobject this binary attribute is a part of
+ * @bin_attr: struct bin_attribute for this file
+ * @buffer: the kernel space buffer to fill
+ * @pos: the offset into the file
+ * @size: the number of bytes requested
+ *
+ * Description: Gets registered with sysfs as the reader callback
+ * to be executed when /sys/bus/pci/slots/apci_table gets read.
+ *
+ * Since we don't get notified on open and close for this file,
+ * things get really tricky here...
+ * our solution is to only allow reading the table in all at once.
+ */
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *bin_attr,
+				   char *buffer, loff_t pos, size_t size)
+{
+	int bytes_read = -EINVAL;
+	char *table = NULL;
+
+	pr_debug("%s: pos = %d, size = %zd\n", __func__, (int)pos, size);
+
+	if (pos == 0) {
+		bytes_read = ibm_get_table_from_acpi(&table);
+		if (bytes_read > 0 && bytes_read <= size)
+			memcpy(buffer, table, bytes_read);
+		kfree(table);
+	}
+	return bytes_read;
+}
+
+/**
+ * ibm_find_acpi_device - callback to find our ACPI device
+ * @handle: the ACPI handle of the device we are inspecting
+ * @lvl: depth into the namespace tree
+ * @context: a pointer to our handle to fill when we find the device
+ * @rv: a return value to fill if desired
+ *
+ * Description: Used as a callback when calling acpi_walk_namespace
+ * to find our device.  When this method returns non-zero
+ * acpi_walk_namespace quits its search and returns our value.
+ */
+static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
+		u32 lvl, void *context, void **rv)
+{
+	acpi_handle *phandle = (acpi_handle *)context;
+	unsigned long long current_status = 0;
+	acpi_status status;
+	struct acpi_device_info *info;
+	int retval = 0;
+
+	status = acpi_get_object_info(handle, &info);
+	if (ACPI_FAILURE(status)) {
+		pr_err("%s:  Failed to get device information status=0x%x\n",
+			__func__, status);
+		return retval;
+	}
+
+	acpi_bus_get_status_handle(handle, &current_status);
+
+	if (current_status && (info->valid & ACPI_VALID_HID) &&
+			(!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) ||
+			 !strcmp(info->hardware_id.string, IBM_HARDWARE_ID2))) {
+		pr_debug("found hardware: %s, handle: %p\n",
+			info->hardware_id.string, handle);
+		*phandle = handle;
+		/* returning non-zero causes the search to stop
+		 * and returns this value to the caller of
+		 * acpi_walk_namespace, but it also causes some warnings
+		 * in the acpi debug code to print...
+		 */
+		retval = FOUND_APCI;
+	}
+	kfree(info);
+	return retval;
+}
+
+static int __init ibm_acpiphp_init(void)
+{
+	int retval = 0;
+	acpi_status status;
+	struct acpi_device *device;
+	struct kobject *sysdir = &pci_slots_kset->kobj;
+
+	pr_debug("%s\n", __func__);
+
+	if (acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+			ACPI_UINT32_MAX, ibm_find_acpi_device, NULL,
+			&ibm_acpi_handle, NULL) != FOUND_APCI) {
+		pr_err("%s: acpi_walk_namespace failed\n", __func__);
+		retval = -ENODEV;
+		goto init_return;
+	}
+	pr_debug("%s: found IBM aPCI device\n", __func__);
+	device = acpi_fetch_acpi_dev(ibm_acpi_handle);
+	if (!device) {
+		pr_err("%s: acpi_fetch_acpi_dev failed\n", __func__);
+		retval = -ENODEV;
+		goto init_return;
+	}
+	if (acpiphp_register_attention(&ibm_attention_info)) {
+		retval = -ENODEV;
+		goto init_return;
+	}
+
+	ibm_note.device = device;
+	status = acpi_install_notify_handler(ibm_acpi_handle,
+			ACPI_DEVICE_NOTIFY, ibm_handle_events,
+			&ibm_note);
+	if (ACPI_FAILURE(status)) {
+		pr_err("%s: Failed to register notification handler\n",
+				__func__);
+		retval = -EBUSY;
+		goto init_cleanup;
+	}
+
+	ibm_apci_table_attr.size = ibm_get_table_from_acpi(NULL);
+	retval = sysfs_create_bin_file(sysdir, &ibm_apci_table_attr);
+
+	return retval;
+
+init_cleanup:
+	acpiphp_unregister_attention(&ibm_attention_info);
+init_return:
+	return retval;
+}
+
+static void __exit ibm_acpiphp_exit(void)
+{
+	acpi_status status;
+	struct kobject *sysdir = &pci_slots_kset->kobj;
+
+	pr_debug("%s\n", __func__);
+
+	if (acpiphp_unregister_attention(&ibm_attention_info))
+		pr_err("%s: attention info deregistration failed", __func__);
+
+	status = acpi_remove_notify_handler(
+			   ibm_acpi_handle,
+			   ACPI_DEVICE_NOTIFY,
+			   ibm_handle_events);
+	if (ACPI_FAILURE(status))
+		pr_err("%s: Notification handler removal failed\n", __func__);
+	/* remove the /sys entries */
+	sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr);
+}
+
+module_init(ibm_acpiphp_init);
+module_exit(ibm_acpiphp_exit);
diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h
new file mode 100644
index 000000000..6d8970d8c
--- /dev/null
+++ b/drivers/pci/hotplug/cpci_hotplug.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * CompactPCI Hot Plug Core Functions
+ *
+ * Copyright (C) 2002 SOMA Networks, Inc.
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#ifndef _CPCI_HOTPLUG_H
+#define _CPCI_HOTPLUG_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+
+/* PICMG 2.1 R2.0 HS CSR bits: */
+#define HS_CSR_INS	0x0080
+#define HS_CSR_EXT	0x0040
+#define HS_CSR_PI	0x0030
+#define HS_CSR_LOO	0x0008
+#define HS_CSR_PIE	0x0004
+#define HS_CSR_EIM	0x0002
+#define HS_CSR_DHA	0x0001
+
+struct slot {
+	u8 number;
+	unsigned int devfn;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	unsigned int latch_status:1;
+	unsigned int adapter_status:1;
+	unsigned int extracting;
+	struct hotplug_slot hotplug_slot;
+	struct list_head slot_list;
+};
+
+struct cpci_hp_controller_ops {
+	int (*query_enum)(void);
+	int (*enable_irq)(void);
+	int (*disable_irq)(void);
+	int (*check_irq)(void *dev_id);
+	int (*hardware_test)(struct slot *slot, u32 value);
+	u8  (*get_power)(struct slot *slot);
+	int (*set_power)(struct slot *slot, int value);
+};
+
+struct cpci_hp_controller {
+	unsigned int irq;
+	unsigned long irq_flags;
+	char *devname;
+	void *dev_id;
+	char *name;
+	struct cpci_hp_controller_ops *ops;
+};
+
+static inline const char *slot_name(struct slot *slot)
+{
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+int cpci_hp_register_controller(struct cpci_hp_controller *controller);
+int cpci_hp_unregister_controller(struct cpci_hp_controller *controller);
+int cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last);
+int cpci_hp_unregister_bus(struct pci_bus *bus);
+int cpci_hp_start(void);
+int cpci_hp_stop(void);
+
+/* Global variables */
+extern int cpci_debug;
+
+/*
+ * Internal function prototypes, these functions should not be used by
+ * board/chassis drivers.
+ */
+u8 cpci_get_attention_status(struct slot *slot);
+u16 cpci_get_hs_csr(struct slot *slot);
+int cpci_set_attention_status(struct slot *slot, int status);
+int cpci_check_and_clear_ins(struct slot *slot);
+int cpci_check_ext(struct slot *slot);
+int cpci_clear_ext(struct slot *slot);
+int cpci_led_on(struct slot *slot);
+int cpci_led_off(struct slot *slot);
+int cpci_configure_slot(struct slot *slot);
+int cpci_unconfigure_slot(struct slot *slot);
+
+#ifdef CONFIG_HOTPLUG_PCI_CPCI
+int cpci_hotplug_init(int debug);
+#else
+static inline int cpci_hotplug_init(int debug) { return 0; }
+#endif
+
+#endif	/* _CPCI_HOTPLUG_H */
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
new file mode 100644
index 000000000..d0559d2fa
--- /dev/null
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * CompactPCI Hot Plug Driver
+ *
+ * Copyright (C) 2002,2005 SOMA Networks, Inc.
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include "cpci_hotplug.h"
+
+#define DRIVER_AUTHOR	"Scott Murray <scottm@somanetworks.com>"
+#define DRIVER_DESC	"CompactPCI Hot Plug Core"
+
+#define MY_NAME	"cpci_hotplug"
+
+#define dbg(format, arg...)					\
+	do {							\
+		if (cpci_debug)					\
+			printk(KERN_DEBUG "%s: " format "\n",	\
+				MY_NAME, ## arg);		\
+	} while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME, ## arg)
+
+/* local variables */
+static DECLARE_RWSEM(list_rwsem);
+static LIST_HEAD(slot_list);
+static int slots;
+static atomic_t extracting;
+int cpci_debug;
+static struct cpci_hp_controller *controller;
+static struct task_struct *cpci_thread;
+static int thread_finished;
+
+static int enable_slot(struct hotplug_slot *slot);
+static int disable_slot(struct hotplug_slot *slot);
+static int set_attention_status(struct hotplug_slot *slot, u8 value);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+static int get_attention_status(struct hotplug_slot *slot, u8 *value);
+static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
+static int get_latch_status(struct hotplug_slot *slot, u8 *value);
+
+static const struct hotplug_slot_ops cpci_hotplug_slot_ops = {
+	.enable_slot = enable_slot,
+	.disable_slot = disable_slot,
+	.set_attention_status = set_attention_status,
+	.get_power_status = get_power_status,
+	.get_attention_status = get_attention_status,
+	.get_adapter_status = get_adapter_status,
+	.get_latch_status = get_latch_status,
+};
+
+static int
+enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	int retval = 0;
+
+	dbg("%s - physical_slot = %s", __func__, slot_name(slot));
+
+	if (controller->ops->set_power)
+		retval = controller->ops->set_power(slot, 1);
+	return retval;
+}
+
+static int
+disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	int retval = 0;
+
+	dbg("%s - physical_slot = %s", __func__, slot_name(slot));
+
+	down_write(&list_rwsem);
+
+	/* Unconfigure device */
+	dbg("%s - unconfiguring slot %s", __func__, slot_name(slot));
+	retval = cpci_unconfigure_slot(slot);
+	if (retval) {
+		err("%s - could not unconfigure slot %s",
+		    __func__, slot_name(slot));
+		goto disable_error;
+	}
+	dbg("%s - finished unconfiguring slot %s", __func__, slot_name(slot));
+
+	/* Clear EXT (by setting it) */
+	if (cpci_clear_ext(slot)) {
+		err("%s - could not clear EXT for slot %s",
+		    __func__, slot_name(slot));
+		retval = -ENODEV;
+		goto disable_error;
+	}
+	cpci_led_on(slot);
+
+	if (controller->ops->set_power) {
+		retval = controller->ops->set_power(slot, 0);
+		if (retval)
+			goto disable_error;
+	}
+
+	slot->adapter_status = 0;
+
+	if (slot->extracting) {
+		slot->extracting = 0;
+		atomic_dec(&extracting);
+	}
+disable_error:
+	up_write(&list_rwsem);
+	return retval;
+}
+
+static u8
+cpci_get_power_status(struct slot *slot)
+{
+	u8 power = 1;
+
+	if (controller->ops->get_power)
+		power = controller->ops->get_power(slot);
+	return power;
+}
+
+static int
+get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	*value = cpci_get_power_status(slot);
+	return 0;
+}
+
+static int
+get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	*value = cpci_get_attention_status(slot);
+	return 0;
+}
+
+static int
+set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
+	return cpci_set_attention_status(to_slot(hotplug_slot), status);
+}
+
+static int
+get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	*value = slot->adapter_status;
+	return 0;
+}
+
+static int
+get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+
+	*value = slot->latch_status;
+	return 0;
+}
+
+static void release_slot(struct slot *slot)
+{
+	pci_dev_put(slot->dev);
+	kfree(slot);
+}
+
+#define SLOT_NAME_SIZE	6
+
+int
+cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
+{
+	struct slot *slot;
+	char name[SLOT_NAME_SIZE];
+	int status;
+	int i;
+
+	if (!(controller && bus))
+		return -ENODEV;
+
+	/*
+	 * Create a structure for each slot, and register that slot
+	 * with the pci_hotplug subsystem.
+	 */
+	for (i = first; i <= last; ++i) {
+		slot = kzalloc(sizeof(struct slot), GFP_KERNEL);
+		if (!slot) {
+			status = -ENOMEM;
+			goto error;
+		}
+
+		slot->bus = bus;
+		slot->number = i;
+		slot->devfn = PCI_DEVFN(i, 0);
+
+		snprintf(name, SLOT_NAME_SIZE, "%02x:%02x", bus->number, i);
+
+		slot->hotplug_slot.ops = &cpci_hotplug_slot_ops;
+
+		dbg("registering slot %s", name);
+		status = pci_hp_register(&slot->hotplug_slot, bus, i, name);
+		if (status) {
+			err("pci_hp_register failed with error %d", status);
+			goto error_slot;
+		}
+		dbg("slot registered with name: %s", slot_name(slot));
+
+		/* Add slot to our internal list */
+		down_write(&list_rwsem);
+		list_add(&slot->slot_list, &slot_list);
+		slots++;
+		up_write(&list_rwsem);
+	}
+	return 0;
+error_slot:
+	kfree(slot);
+error:
+	return status;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_register_bus);
+
+int
+cpci_hp_unregister_bus(struct pci_bus *bus)
+{
+	struct slot *slot;
+	struct slot *tmp;
+	int status = 0;
+
+	down_write(&list_rwsem);
+	if (!slots) {
+		up_write(&list_rwsem);
+		return -1;
+	}
+	list_for_each_entry_safe(slot, tmp, &slot_list, slot_list) {
+		if (slot->bus == bus) {
+			list_del(&slot->slot_list);
+			slots--;
+
+			dbg("deregistering slot %s", slot_name(slot));
+			pci_hp_deregister(&slot->hotplug_slot);
+			release_slot(slot);
+		}
+	}
+	up_write(&list_rwsem);
+	return status;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_unregister_bus);
+
+/* This is the interrupt mode interrupt handler */
+static irqreturn_t
+cpci_hp_intr(int irq, void *data)
+{
+	dbg("entered cpci_hp_intr");
+
+	/* Check to see if it was our interrupt */
+	if ((controller->irq_flags & IRQF_SHARED) &&
+	    !controller->ops->check_irq(controller->dev_id)) {
+		dbg("exited cpci_hp_intr, not our interrupt");
+		return IRQ_NONE;
+	}
+
+	/* Disable ENUM interrupt */
+	controller->ops->disable_irq();
+
+	/* Trigger processing by the event thread */
+	wake_up_process(cpci_thread);
+	return IRQ_HANDLED;
+}
+
+/*
+ * According to PICMG 2.1 R2.0, section 6.3.2, upon
+ * initialization, the system driver shall clear the
+ * INS bits of the cold-inserted devices.
+ */
+static int
+init_slots(int clear_ins)
+{
+	struct slot *slot;
+	struct pci_dev *dev;
+
+	dbg("%s - enter", __func__);
+	down_read(&list_rwsem);
+	if (!slots) {
+		up_read(&list_rwsem);
+		return -1;
+	}
+	list_for_each_entry(slot, &slot_list, slot_list) {
+		dbg("%s - looking at slot %s", __func__, slot_name(slot));
+		if (clear_ins && cpci_check_and_clear_ins(slot))
+			dbg("%s - cleared INS for slot %s",
+			    __func__, slot_name(slot));
+		dev = pci_get_slot(slot->bus, PCI_DEVFN(slot->number, 0));
+		if (dev) {
+			slot->adapter_status = 1;
+			slot->latch_status = 1;
+			slot->dev = dev;
+		}
+	}
+	up_read(&list_rwsem);
+	dbg("%s - exit", __func__);
+	return 0;
+}
+
+static int
+check_slots(void)
+{
+	struct slot *slot;
+	int extracted;
+	int inserted;
+	u16 hs_csr;
+
+	down_read(&list_rwsem);
+	if (!slots) {
+		up_read(&list_rwsem);
+		err("no slots registered, shutting down");
+		return -1;
+	}
+	extracted = inserted = 0;
+	list_for_each_entry(slot, &slot_list, slot_list) {
+		dbg("%s - looking at slot %s", __func__, slot_name(slot));
+		if (cpci_check_and_clear_ins(slot)) {
+			/*
+			 * Some broken hardware (e.g. PLX 9054AB) asserts
+			 * ENUM# twice...
+			 */
+			if (slot->dev) {
+				warn("slot %s already inserted",
+				     slot_name(slot));
+				inserted++;
+				continue;
+			}
+
+			/* Process insertion */
+			dbg("%s - slot %s inserted", __func__, slot_name(slot));
+
+			/* GSM, debug */
+			hs_csr = cpci_get_hs_csr(slot);
+			dbg("%s - slot %s HS_CSR (1) = %04x",
+			    __func__, slot_name(slot), hs_csr);
+
+			/* Configure device */
+			dbg("%s - configuring slot %s",
+			    __func__, slot_name(slot));
+			if (cpci_configure_slot(slot)) {
+				err("%s - could not configure slot %s",
+				    __func__, slot_name(slot));
+				continue;
+			}
+			dbg("%s - finished configuring slot %s",
+			    __func__, slot_name(slot));
+
+			/* GSM, debug */
+			hs_csr = cpci_get_hs_csr(slot);
+			dbg("%s - slot %s HS_CSR (2) = %04x",
+			    __func__, slot_name(slot), hs_csr);
+
+			slot->latch_status = 1;
+			slot->adapter_status = 1;
+
+			cpci_led_off(slot);
+
+			/* GSM, debug */
+			hs_csr = cpci_get_hs_csr(slot);
+			dbg("%s - slot %s HS_CSR (3) = %04x",
+			    __func__, slot_name(slot), hs_csr);
+
+			inserted++;
+		} else if (cpci_check_ext(slot)) {
+			/* Process extraction request */
+			dbg("%s - slot %s extracted",
+			    __func__, slot_name(slot));
+
+			/* GSM, debug */
+			hs_csr = cpci_get_hs_csr(slot);
+			dbg("%s - slot %s HS_CSR = %04x",
+			    __func__, slot_name(slot), hs_csr);
+
+			if (!slot->extracting) {
+				slot->latch_status = 0;
+				slot->extracting = 1;
+				atomic_inc(&extracting);
+			}
+			extracted++;
+		} else if (slot->extracting) {
+			hs_csr = cpci_get_hs_csr(slot);
+			if (hs_csr == 0xffff) {
+				/*
+				 * Hmmm, we're likely hosed at this point, should we
+				 * bother trying to tell the driver or not?
+				 */
+				err("card in slot %s was improperly removed",
+				    slot_name(slot));
+				slot->adapter_status = 0;
+				slot->extracting = 0;
+				atomic_dec(&extracting);
+			}
+		}
+	}
+	up_read(&list_rwsem);
+	dbg("inserted=%d, extracted=%d, extracting=%d",
+	    inserted, extracted, atomic_read(&extracting));
+	if (inserted || extracted)
+		return extracted;
+	else if (!atomic_read(&extracting)) {
+		err("cannot find ENUM# source, shutting down");
+		return -1;
+	}
+	return 0;
+}
+
+/* This is the interrupt mode worker thread body */
+static int
+event_thread(void *data)
+{
+	int rc;
+
+	dbg("%s - event thread started", __func__);
+	while (1) {
+		dbg("event thread sleeping");
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		if (kthread_should_stop())
+			break;
+		do {
+			rc = check_slots();
+			if (rc > 0) {
+				/* Give userspace a chance to handle extraction */
+				msleep(500);
+			} else if (rc < 0) {
+				dbg("%s - error checking slots", __func__);
+				thread_finished = 1;
+				goto out;
+			}
+		} while (atomic_read(&extracting) && !kthread_should_stop());
+		if (kthread_should_stop())
+			break;
+
+		/* Re-enable ENUM# interrupt */
+		dbg("%s - re-enabling irq", __func__);
+		controller->ops->enable_irq();
+	}
+ out:
+	return 0;
+}
+
+/* This is the polling mode worker thread body */
+static int
+poll_thread(void *data)
+{
+	int rc;
+
+	while (1) {
+		if (kthread_should_stop() || signal_pending(current))
+			break;
+		if (controller->ops->query_enum()) {
+			do {
+				rc = check_slots();
+				if (rc > 0) {
+					/* Give userspace a chance to handle extraction */
+					msleep(500);
+				} else if (rc < 0) {
+					dbg("%s - error checking slots", __func__);
+					thread_finished = 1;
+					goto out;
+				}
+			} while (atomic_read(&extracting) && !kthread_should_stop());
+		}
+		msleep(100);
+	}
+ out:
+	return 0;
+}
+
+static int
+cpci_start_thread(void)
+{
+	if (controller->irq)
+		cpci_thread = kthread_run(event_thread, NULL, "cpci_hp_eventd");
+	else
+		cpci_thread = kthread_run(poll_thread, NULL, "cpci_hp_polld");
+	if (IS_ERR(cpci_thread)) {
+		err("Can't start up our thread");
+		return PTR_ERR(cpci_thread);
+	}
+	thread_finished = 0;
+	return 0;
+}
+
+static void
+cpci_stop_thread(void)
+{
+	kthread_stop(cpci_thread);
+	thread_finished = 1;
+}
+
+int
+cpci_hp_register_controller(struct cpci_hp_controller *new_controller)
+{
+	int status = 0;
+
+	if (controller)
+		return -1;
+	if (!(new_controller && new_controller->ops))
+		return -EINVAL;
+	if (new_controller->irq) {
+		if (!(new_controller->ops->enable_irq &&
+		     new_controller->ops->disable_irq))
+			status = -EINVAL;
+		if (request_irq(new_controller->irq,
+			       cpci_hp_intr,
+			       new_controller->irq_flags,
+			       MY_NAME,
+			       new_controller->dev_id)) {
+			err("Can't get irq %d for the hotplug cPCI controller",
+			    new_controller->irq);
+			status = -ENODEV;
+		}
+		dbg("%s - acquired controller irq %d",
+		    __func__, new_controller->irq);
+	}
+	if (!status)
+		controller = new_controller;
+	return status;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_register_controller);
+
+static void
+cleanup_slots(void)
+{
+	struct slot *slot;
+	struct slot *tmp;
+
+	/*
+	 * Unregister all of our slots with the pci_hotplug subsystem,
+	 * and free up all memory that we had allocated.
+	 */
+	down_write(&list_rwsem);
+	if (!slots)
+		goto cleanup_null;
+	list_for_each_entry_safe(slot, tmp, &slot_list, slot_list) {
+		list_del(&slot->slot_list);
+		pci_hp_deregister(&slot->hotplug_slot);
+		release_slot(slot);
+	}
+cleanup_null:
+	up_write(&list_rwsem);
+}
+
+int
+cpci_hp_unregister_controller(struct cpci_hp_controller *old_controller)
+{
+	int status = 0;
+
+	if (controller) {
+		if (!thread_finished)
+			cpci_stop_thread();
+		if (controller->irq)
+			free_irq(controller->irq, controller->dev_id);
+		controller = NULL;
+		cleanup_slots();
+	} else
+		status = -ENODEV;
+	return status;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_unregister_controller);
+
+int
+cpci_hp_start(void)
+{
+	static int first = 1;
+	int status;
+
+	dbg("%s - enter", __func__);
+	if (!controller)
+		return -ENODEV;
+
+	down_read(&list_rwsem);
+	if (list_empty(&slot_list)) {
+		up_read(&list_rwsem);
+		return -ENODEV;
+	}
+	up_read(&list_rwsem);
+
+	status = init_slots(first);
+	if (first)
+		first = 0;
+	if (status)
+		return status;
+
+	status = cpci_start_thread();
+	if (status)
+		return status;
+	dbg("%s - thread started", __func__);
+
+	if (controller->irq) {
+		/* Start enum interrupt processing */
+		dbg("%s - enabling irq", __func__);
+		controller->ops->enable_irq();
+	}
+	dbg("%s - exit", __func__);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_start);
+
+int
+cpci_hp_stop(void)
+{
+	if (!controller)
+		return -ENODEV;
+	if (controller->irq) {
+		/* Stop enum interrupt processing */
+		dbg("%s - disabling irq", __func__);
+		controller->ops->disable_irq();
+	}
+	cpci_stop_thread();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpci_hp_stop);
+
+int __init
+cpci_hotplug_init(int debug)
+{
+	cpci_debug = debug;
+	return 0;
+}
diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
new file mode 100644
index 000000000..6c48066ac
--- /dev/null
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * CompactPCI Hot Plug Driver PCI functions
+ *
+ * Copyright (C) 2002,2005 by SOMA Networks, Inc.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/proc_fs.h>
+#include "../pci.h"
+#include "cpci_hotplug.h"
+
+#define MY_NAME	"cpci_hotplug"
+
+#define dbg(format, arg...)					\
+	do {							\
+		if (cpci_debug)					\
+			printk(KERN_DEBUG "%s: " format "\n",	\
+				MY_NAME, ## arg);		\
+	} while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME, ## arg)
+
+
+u8 cpci_get_attention_status(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return 0;
+
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return 0;
+
+	return hs_csr & 0x0008 ? 1 : 0;
+}
+
+int cpci_set_attention_status(struct slot *slot, int status)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return 0;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return 0;
+	if (status)
+		hs_csr |= HS_CSR_LOO;
+	else
+		hs_csr &= ~HS_CSR_LOO;
+	if (pci_bus_write_config_word(slot->bus,
+				      slot->devfn,
+				      hs_cap + 2,
+				      hs_csr))
+		return 0;
+	return 1;
+}
+
+u16 cpci_get_hs_csr(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return 0xFFFF;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return 0xFFFF;
+	return hs_csr;
+}
+
+int cpci_check_and_clear_ins(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+	int ins = 0;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return 0;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return 0;
+	if (hs_csr & HS_CSR_INS) {
+		/* Clear INS (by setting it) */
+		if (pci_bus_write_config_word(slot->bus,
+					      slot->devfn,
+					      hs_cap + 2,
+					      hs_csr))
+			ins = 0;
+		else
+			ins = 1;
+	}
+	return ins;
+}
+
+int cpci_check_ext(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+	int ext = 0;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return 0;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return 0;
+	if (hs_csr & HS_CSR_EXT)
+		ext = 1;
+	return ext;
+}
+
+int cpci_clear_ext(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return -ENODEV;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return -ENODEV;
+	if (hs_csr & HS_CSR_EXT) {
+		/* Clear EXT (by setting it) */
+		if (pci_bus_write_config_word(slot->bus,
+					      slot->devfn,
+					      hs_cap + 2,
+					      hs_csr))
+			return -ENODEV;
+	}
+	return 0;
+}
+
+int cpci_led_on(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return -ENODEV;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return -ENODEV;
+	if ((hs_csr & HS_CSR_LOO) != HS_CSR_LOO) {
+		hs_csr |= HS_CSR_LOO;
+		if (pci_bus_write_config_word(slot->bus,
+					      slot->devfn,
+					      hs_cap + 2,
+					      hs_csr)) {
+			err("Could not set LOO for slot %s", slot_name(slot));
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+int cpci_led_off(struct slot *slot)
+{
+	int hs_cap;
+	u16 hs_csr;
+
+	hs_cap = pci_bus_find_capability(slot->bus,
+					 slot->devfn,
+					 PCI_CAP_ID_CHSWP);
+	if (!hs_cap)
+		return -ENODEV;
+	if (pci_bus_read_config_word(slot->bus,
+				     slot->devfn,
+				     hs_cap + 2,
+				     &hs_csr))
+		return -ENODEV;
+	if (hs_csr & HS_CSR_LOO) {
+		hs_csr &= ~HS_CSR_LOO;
+		if (pci_bus_write_config_word(slot->bus,
+					      slot->devfn,
+					      hs_cap + 2,
+					      hs_csr)) {
+			err("Could not clear LOO for slot %s", slot_name(slot));
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+
+/*
+ * Device configuration functions
+ */
+
+int cpci_configure_slot(struct slot *slot)
+{
+	struct pci_dev *dev;
+	struct pci_bus *parent;
+	int ret = 0;
+
+	dbg("%s - enter", __func__);
+
+	pci_lock_rescan_remove();
+
+	if (slot->dev == NULL) {
+		dbg("pci_dev null, finding %02x:%02x:%x",
+		    slot->bus->number, PCI_SLOT(slot->devfn), PCI_FUNC(slot->devfn));
+		slot->dev = pci_get_slot(slot->bus, slot->devfn);
+	}
+
+	/* Still NULL? Well then scan for it! */
+	if (slot->dev == NULL) {
+		int n;
+		dbg("pci_dev still null");
+
+		/*
+		 * This will generate pci_dev structures for all functions, but
+		 * we will only call this case when lookup fails.
+		 */
+		n = pci_scan_slot(slot->bus, slot->devfn);
+		dbg("%s: pci_scan_slot returned %d", __func__, n);
+		slot->dev = pci_get_slot(slot->bus, slot->devfn);
+		if (slot->dev == NULL) {
+			err("Could not find PCI device for slot %02x", slot->number);
+			ret = -ENODEV;
+			goto out;
+		}
+	}
+	parent = slot->dev->bus;
+
+	for_each_pci_bridge(dev, parent) {
+		if (PCI_SLOT(dev->devfn) == PCI_SLOT(slot->devfn))
+			pci_hp_add_bridge(dev);
+	}
+
+	pci_assign_unassigned_bridge_resources(parent->self);
+
+	pci_bus_add_devices(parent);
+
+ out:
+	pci_unlock_rescan_remove();
+	dbg("%s - exit", __func__);
+	return ret;
+}
+
+int cpci_unconfigure_slot(struct slot *slot)
+{
+	struct pci_dev *dev, *temp;
+
+	dbg("%s - enter", __func__);
+	if (!slot->dev) {
+		err("No device for slot %02x\n", slot->number);
+		return -ENODEV;
+	}
+
+	pci_lock_rescan_remove();
+
+	list_for_each_entry_safe(dev, temp, &slot->bus->devices, bus_list) {
+		if (PCI_SLOT(dev->devfn) != PCI_SLOT(slot->devfn))
+			continue;
+		pci_dev_get(dev);
+		pci_stop_and_remove_bus_device(dev);
+		pci_dev_put(dev);
+	}
+	pci_dev_put(slot->dev);
+	slot->dev = NULL;
+
+	pci_unlock_rescan_remove();
+
+	dbg("%s - exit", __func__);
+	return 0;
+}
diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c
new file mode 100644
index 000000000..17d71edf0
--- /dev/null
+++ b/drivers/pci/hotplug/cpcihp_generic.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * cpcihp_generic.c
+ *
+ * Generic port I/O CompactPCI driver
+ *
+ * Copyright 2002 SOMA Networks, Inc.
+ * Copyright 2001 Intel San Luis Obispo
+ * Copyright 2000,2001 MontaVista Software Inc.
+ *
+ * This generic CompactPCI hotplug driver should allow using the PCI hotplug
+ * mechanism on any CompactPCI board that exposes the #ENUM signal as a bit
+ * in a system register that can be read through standard port I/O.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include "cpci_hotplug.h"
+
+#define DRIVER_VERSION	"0.1"
+#define DRIVER_AUTHOR	"Scott Murray <scottm@somanetworks.com>"
+#define DRIVER_DESC	"Generic port I/O CompactPCI Hot Plug Driver"
+
+#if !defined(MODULE)
+#define MY_NAME	"cpcihp_generic"
+#else
+#define MY_NAME	THIS_MODULE->name
+#endif
+
+#define dbg(format, arg...)					\
+	do {							\
+		if (debug)					\
+			printk(KERN_DEBUG "%s: " format "\n",	\
+				MY_NAME, ## arg);		\
+	} while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME, ## arg)
+
+/* local variables */
+static bool debug;
+static char *bridge;
+static u8 bridge_busnr;
+static u8 bridge_slot;
+static struct pci_bus *bus;
+static u8 first_slot;
+static u8 last_slot;
+static u16 port;
+static unsigned int enum_bit;
+static u8 enum_mask;
+
+static struct cpci_hp_controller_ops generic_hpc_ops;
+static struct cpci_hp_controller generic_hpc;
+
+static int __init validate_parameters(void)
+{
+	char *str;
+	char *p;
+	unsigned long tmp;
+
+	if (!bridge) {
+		info("not configured, disabling.");
+		return -EINVAL;
+	}
+	str = bridge;
+	if (!*str)
+		return -EINVAL;
+
+	tmp = simple_strtoul(str, &p, 16);
+	if (p == str || tmp > 0xff) {
+		err("Invalid hotplug bus bridge device bus number");
+		return -EINVAL;
+	}
+	bridge_busnr = (u8) tmp;
+	dbg("bridge_busnr = 0x%02x", bridge_busnr);
+	if (*p != ':') {
+		err("Invalid hotplug bus bridge device");
+		return -EINVAL;
+	}
+	str = p + 1;
+	tmp = simple_strtoul(str, &p, 16);
+	if (p == str || tmp > 0x1f) {
+		err("Invalid hotplug bus bridge device slot number");
+		return -EINVAL;
+	}
+	bridge_slot = (u8) tmp;
+	dbg("bridge_slot = 0x%02x", bridge_slot);
+
+	dbg("first_slot = 0x%02x", first_slot);
+	dbg("last_slot = 0x%02x", last_slot);
+	if (!(first_slot && last_slot)) {
+		err("Need to specify first_slot and last_slot");
+		return -EINVAL;
+	}
+	if (last_slot < first_slot) {
+		err("first_slot must be less than last_slot");
+		return -EINVAL;
+	}
+
+	dbg("port = 0x%04x", port);
+	dbg("enum_bit = 0x%02x", enum_bit);
+	if (enum_bit > 7) {
+		err("Invalid #ENUM bit");
+		return -EINVAL;
+	}
+	enum_mask = 1 << enum_bit;
+	return 0;
+}
+
+static int query_enum(void)
+{
+	u8 value;
+
+	value = inb_p(port);
+	return ((value & enum_mask) == enum_mask);
+}
+
+static int __init cpcihp_generic_init(void)
+{
+	int status;
+	struct resource *r;
+	struct pci_dev *dev;
+
+	info(DRIVER_DESC " version: " DRIVER_VERSION);
+	status = validate_parameters();
+	if (status)
+		return status;
+
+	r = request_region(port, 1, "#ENUM hotswap signal register");
+	if (!r)
+		return -EBUSY;
+
+	dev = pci_get_domain_bus_and_slot(0, bridge_busnr,
+					  PCI_DEVFN(bridge_slot, 0));
+	if (!dev || dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+		err("Invalid bridge device %s", bridge);
+		pci_dev_put(dev);
+		return -EINVAL;
+	}
+	bus = dev->subordinate;
+	pci_dev_put(dev);
+
+	memset(&generic_hpc, 0, sizeof(struct cpci_hp_controller));
+	generic_hpc_ops.query_enum = query_enum;
+	generic_hpc.ops = &generic_hpc_ops;
+
+	status = cpci_hp_register_controller(&generic_hpc);
+	if (status != 0) {
+		err("Could not register cPCI hotplug controller");
+		return -ENODEV;
+	}
+	dbg("registered controller");
+
+	status = cpci_hp_register_bus(bus, first_slot, last_slot);
+	if (status != 0) {
+		err("Could not register cPCI hotplug bus");
+		goto init_bus_register_error;
+	}
+	dbg("registered bus");
+
+	status = cpci_hp_start();
+	if (status != 0) {
+		err("Could not started cPCI hotplug system");
+		goto init_start_error;
+	}
+	dbg("started cpci hp system");
+	return 0;
+init_start_error:
+	cpci_hp_unregister_bus(bus);
+init_bus_register_error:
+	cpci_hp_unregister_controller(&generic_hpc);
+	err("status = %d", status);
+	return status;
+
+}
+
+static void __exit cpcihp_generic_exit(void)
+{
+	cpci_hp_stop();
+	cpci_hp_unregister_bus(bus);
+	cpci_hp_unregister_controller(&generic_hpc);
+	release_region(port, 1);
+}
+
+module_init(cpcihp_generic_init);
+module_exit(cpcihp_generic_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+module_param(debug, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+module_param(bridge, charp, 0);
+MODULE_PARM_DESC(bridge, "Hotswap bus bridge device, <bus>:<slot> (bus and slot are in hexadecimal)");
+module_param(first_slot, byte, 0);
+MODULE_PARM_DESC(first_slot, "Hotswap bus first slot number");
+module_param(last_slot, byte, 0);
+MODULE_PARM_DESC(last_slot, "Hotswap bus last slot number");
+module_param_hw(port, ushort, ioport, 0);
+MODULE_PARM_DESC(port, "#ENUM signal I/O port");
+module_param(enum_bit, uint, 0);
+MODULE_PARM_DESC(enum_bit, "#ENUM signal bit (0-7)");
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
new file mode 100644
index 000000000..ae63e5a39
--- /dev/null
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * cpcihp_zt5550.c
+ *
+ * Intel/Ziatech ZT5550 CompactPCI Host Controller driver
+ *
+ * Copyright 2002 SOMA Networks, Inc.
+ * Copyright 2001 Intel San Luis Obispo
+ * Copyright 2000,2001 MontaVista Software Inc.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/signal.h>	/* IRQF_SHARED */
+#include "cpci_hotplug.h"
+#include "cpcihp_zt5550.h"
+
+#define DRIVER_VERSION	"0.2"
+#define DRIVER_AUTHOR	"Scott Murray <scottm@somanetworks.com>"
+#define DRIVER_DESC	"ZT5550 CompactPCI Hot Plug Driver"
+
+#define MY_NAME	"cpcihp_zt5550"
+
+#define dbg(format, arg...)					\
+	do {							\
+		if (debug)					\
+			printk(KERN_DEBUG "%s: " format "\n",	\
+				MY_NAME, ## arg);		\
+	} while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME, ## arg)
+
+/* local variables */
+static bool debug;
+static bool poll;
+static struct cpci_hp_controller_ops zt5550_hpc_ops;
+static struct cpci_hp_controller zt5550_hpc;
+
+/* Primary cPCI bus bridge device */
+static struct pci_dev *bus0_dev;
+static struct pci_bus *bus0;
+
+/* Host controller device */
+static struct pci_dev *hc_dev;
+
+/* Host controller register addresses */
+static void __iomem *hc_registers;
+static void __iomem *csr_hc_index;
+static void __iomem *csr_hc_data;
+static void __iomem *csr_int_status;
+static void __iomem *csr_int_mask;
+
+
+static int zt5550_hc_config(struct pci_dev *pdev)
+{
+	int ret;
+
+	/* Since we know that no boards exist with two HC chips, treat it as an error */
+	if (hc_dev) {
+		err("too many host controller devices?");
+		return -EBUSY;
+	}
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		err("cannot enable %s\n", pci_name(pdev));
+		return ret;
+	}
+
+	hc_dev = pdev;
+	dbg("hc_dev = %p", hc_dev);
+	dbg("pci resource start %llx", (unsigned long long)pci_resource_start(hc_dev, 1));
+	dbg("pci resource len %llx", (unsigned long long)pci_resource_len(hc_dev, 1));
+
+	if (!request_mem_region(pci_resource_start(hc_dev, 1),
+				pci_resource_len(hc_dev, 1), MY_NAME)) {
+		err("cannot reserve MMIO region");
+		ret = -ENOMEM;
+		goto exit_disable_device;
+	}
+
+	hc_registers =
+	    ioremap(pci_resource_start(hc_dev, 1), pci_resource_len(hc_dev, 1));
+	if (!hc_registers) {
+		err("cannot remap MMIO region %llx @ %llx",
+			(unsigned long long)pci_resource_len(hc_dev, 1),
+			(unsigned long long)pci_resource_start(hc_dev, 1));
+		ret = -ENODEV;
+		goto exit_release_region;
+	}
+
+	csr_hc_index = hc_registers + CSR_HCINDEX;
+	csr_hc_data = hc_registers + CSR_HCDATA;
+	csr_int_status = hc_registers + CSR_INTSTAT;
+	csr_int_mask = hc_registers + CSR_INTMASK;
+
+	/*
+	 * Disable host control, fault and serial interrupts
+	 */
+	dbg("disabling host control, fault and serial interrupts");
+	writeb((u8) HC_INT_MASK_REG, csr_hc_index);
+	writeb((u8) ALL_INDEXED_INTS_MASK, csr_hc_data);
+	dbg("disabled host control, fault and serial interrupts");
+
+	/*
+	 * Disable timer0, timer1 and ENUM interrupts
+	 */
+	dbg("disabling timer0, timer1 and ENUM interrupts");
+	writeb((u8) ALL_DIRECT_INTS_MASK, csr_int_mask);
+	dbg("disabled timer0, timer1 and ENUM interrupts");
+	return 0;
+
+exit_release_region:
+	release_mem_region(pci_resource_start(hc_dev, 1),
+			   pci_resource_len(hc_dev, 1));
+exit_disable_device:
+	pci_disable_device(hc_dev);
+	return ret;
+}
+
+static int zt5550_hc_cleanup(void)
+{
+	if (!hc_dev)
+		return -ENODEV;
+
+	iounmap(hc_registers);
+	release_mem_region(pci_resource_start(hc_dev, 1),
+			   pci_resource_len(hc_dev, 1));
+	pci_disable_device(hc_dev);
+	return 0;
+}
+
+static int zt5550_hc_query_enum(void)
+{
+	u8 value;
+
+	value = inb_p(ENUM_PORT);
+	return ((value & ENUM_MASK) == ENUM_MASK);
+}
+
+static int zt5550_hc_check_irq(void *dev_id)
+{
+	int ret;
+	u8 reg;
+
+	ret = 0;
+	if (dev_id == zt5550_hpc.dev_id) {
+		reg = readb(csr_int_status);
+		if (reg)
+			ret = 1;
+	}
+	return ret;
+}
+
+static int zt5550_hc_enable_irq(void)
+{
+	u8 reg;
+
+	if (hc_dev == NULL)
+		return -ENODEV;
+
+	reg = readb(csr_int_mask);
+	reg = reg & ~ENUM_INT_MASK;
+	writeb(reg, csr_int_mask);
+	return 0;
+}
+
+static int zt5550_hc_disable_irq(void)
+{
+	u8 reg;
+
+	if (hc_dev == NULL)
+		return -ENODEV;
+
+	reg = readb(csr_int_mask);
+	reg = reg | ENUM_INT_MASK;
+	writeb(reg, csr_int_mask);
+	return 0;
+}
+
+static int zt5550_hc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int status;
+
+	status = zt5550_hc_config(pdev);
+	if (status != 0)
+		return status;
+
+	dbg("returned from zt5550_hc_config");
+
+	memset(&zt5550_hpc, 0, sizeof(struct cpci_hp_controller));
+	zt5550_hpc_ops.query_enum = zt5550_hc_query_enum;
+	zt5550_hpc.ops = &zt5550_hpc_ops;
+	if (!poll) {
+		zt5550_hpc.irq = hc_dev->irq;
+		zt5550_hpc.irq_flags = IRQF_SHARED;
+		zt5550_hpc.dev_id = hc_dev;
+
+		zt5550_hpc_ops.enable_irq = zt5550_hc_enable_irq;
+		zt5550_hpc_ops.disable_irq = zt5550_hc_disable_irq;
+		zt5550_hpc_ops.check_irq = zt5550_hc_check_irq;
+	} else {
+		info("using ENUM# polling mode");
+	}
+
+	status = cpci_hp_register_controller(&zt5550_hpc);
+	if (status != 0) {
+		err("could not register cPCI hotplug controller");
+		goto init_hc_error;
+	}
+	dbg("registered controller");
+
+	/* Look for first device matching cPCI bus's bridge vendor and device IDs */
+	bus0_dev = pci_get_device(PCI_VENDOR_ID_DEC,
+				  PCI_DEVICE_ID_DEC_21154, NULL);
+	if (!bus0_dev) {
+		status = -ENODEV;
+		goto init_register_error;
+	}
+	bus0 = bus0_dev->subordinate;
+	pci_dev_put(bus0_dev);
+
+	status = cpci_hp_register_bus(bus0, 0x0a, 0x0f);
+	if (status != 0) {
+		err("could not register cPCI hotplug bus");
+		goto init_register_error;
+	}
+	dbg("registered bus");
+
+	status = cpci_hp_start();
+	if (status != 0) {
+		err("could not started cPCI hotplug system");
+		cpci_hp_unregister_bus(bus0);
+		goto init_register_error;
+	}
+	dbg("started cpci hp system");
+
+	return 0;
+init_register_error:
+	cpci_hp_unregister_controller(&zt5550_hpc);
+init_hc_error:
+	err("status = %d", status);
+	zt5550_hc_cleanup();
+	return status;
+
+}
+
+static void zt5550_hc_remove_one(struct pci_dev *pdev)
+{
+	cpci_hp_stop();
+	cpci_hp_unregister_bus(bus0);
+	cpci_hp_unregister_controller(&zt5550_hpc);
+	zt5550_hc_cleanup();
+}
+
+
+static const struct pci_device_id zt5550_hc_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ZIATECH, PCI_DEVICE_ID_ZIATECH_5550_HC, PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, zt5550_hc_pci_tbl);
+
+static struct pci_driver zt5550_hc_driver = {
+	.name		= "zt5550_hc",
+	.id_table	= zt5550_hc_pci_tbl,
+	.probe		= zt5550_hc_init_one,
+	.remove		= zt5550_hc_remove_one,
+};
+
+static int __init zt5550_init(void)
+{
+	struct resource *r;
+	int rc;
+
+	info(DRIVER_DESC " version: " DRIVER_VERSION);
+	r = request_region(ENUM_PORT, 1, "#ENUM hotswap signal register");
+	if (!r)
+		return -EBUSY;
+
+	rc = pci_register_driver(&zt5550_hc_driver);
+	if (rc < 0)
+		release_region(ENUM_PORT, 1);
+	return rc;
+}
+
+static void __exit
+zt5550_exit(void)
+{
+	pci_unregister_driver(&zt5550_hc_driver);
+	release_region(ENUM_PORT, 1);
+}
+
+module_init(zt5550_init);
+module_exit(zt5550_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+module_param(poll, bool, 0644);
+MODULE_PARM_DESC(poll, "#ENUM polling mode enabled or not");
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.h b/drivers/pci/hotplug/cpcihp_zt5550.h
new file mode 100644
index 000000000..5ea10df83
--- /dev/null
+++ b/drivers/pci/hotplug/cpcihp_zt5550.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * cpcihp_zt5550.h
+ *
+ * Intel/Ziatech ZT5550 CompactPCI Host Controller driver definitions
+ *
+ * Copyright 2002 SOMA Networks, Inc.
+ * Copyright 2001 Intel San Luis Obispo
+ * Copyright 2000,2001 MontaVista Software Inc.
+ *
+ * Send feedback to <scottm@somanetworks.com>
+ */
+
+#ifndef _CPCIHP_ZT5550_H
+#define _CPCIHP_ZT5550_H
+
+/* Direct registers */
+#define CSR_HCINDEX		0x00
+#define CSR_HCDATA		0x04
+#define CSR_INTSTAT		0x08
+#define CSR_INTMASK		0x09
+#define CSR_CNT0CMD		0x0C
+#define CSR_CNT1CMD		0x0E
+#define CSR_CNT0		0x10
+#define CSR_CNT1		0x14
+
+/* Masks for interrupt bits in CSR_INTMASK direct register */
+#define CNT0_INT_MASK		0x01
+#define CNT1_INT_MASK		0x02
+#define ENUM_INT_MASK		0x04
+#define ALL_DIRECT_INTS_MASK	0x07
+
+/* Indexed registers (through CSR_INDEX, CSR_DATA) */
+#define HC_INT_MASK_REG		0x04
+#define HC_STATUS_REG		0x08
+#define HC_CMD_REG		0x0C
+#define ARB_CONFIG_GNT_REG	0x10
+#define ARB_CONFIG_CFG_REG	0x12
+#define ARB_CONFIG_REG		0x10
+#define ISOL_CONFIG_REG		0x18
+#define FAULT_STATUS_REG	0x20
+#define FAULT_CONFIG_REG	0x24
+#define WD_CONFIG_REG		0x2C
+#define HC_DIAG_REG		0x30
+#define SERIAL_COMM_REG		0x34
+#define SERIAL_OUT_REG		0x38
+#define SERIAL_IN_REG		0x3C
+
+/* Masks for interrupt bits in HC_INT_MASK_REG indexed register */
+#define SERIAL_INT_MASK		0x01
+#define FAULT_INT_MASK		0x02
+#define HCF_INT_MASK		0x04
+#define ALL_INDEXED_INTS_MASK	0x07
+
+/* Digital I/O port storing ENUM# */
+#define ENUM_PORT	0xE1
+/* Mask to get to the ENUM# bit on the bus */
+#define ENUM_MASK	0x40
+
+#endif				/* _CPCIHP_ZT5550_H */
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
new file mode 100644
index 000000000..2f7b49ea9
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -0,0 +1,729 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+#ifndef _CPQPHP_H
+#define _CPQPHP_H
+
+#include <linux/interrupt.h>
+#include <linux/io.h>		/* for read? and write? functions */
+#include <linux/delay.h>	/* for delays */
+#include <linux/mutex.h>
+#include <linux/sched/signal.h>	/* for signal_pending() */
+
+#define MY_NAME	"cpqphp"
+
+#define dbg(fmt, arg...) do { if (cpqhp_debug) printk(KERN_DEBUG "%s: " fmt, MY_NAME, ## arg); } while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+
+
+struct smbios_system_slot {
+	u8 type;
+	u8 length;
+	u16 handle;
+	u8 name_string_num;
+	u8 slot_type;
+	u8 slot_width;
+	u8 slot_current_usage;
+	u8 slot_length;
+	u16 slot_number;
+	u8 properties1;
+	u8 properties2;
+} __attribute__ ((packed));
+
+/* offsets to the smbios generic type based on the above structure layout */
+enum smbios_system_slot_offsets {
+	SMBIOS_SLOT_GENERIC_TYPE =	offsetof(struct smbios_system_slot, type),
+	SMBIOS_SLOT_GENERIC_LENGTH =	offsetof(struct smbios_system_slot, length),
+	SMBIOS_SLOT_GENERIC_HANDLE =	offsetof(struct smbios_system_slot, handle),
+	SMBIOS_SLOT_NAME_STRING_NUM =	offsetof(struct smbios_system_slot, name_string_num),
+	SMBIOS_SLOT_TYPE =		offsetof(struct smbios_system_slot, slot_type),
+	SMBIOS_SLOT_WIDTH =		offsetof(struct smbios_system_slot, slot_width),
+	SMBIOS_SLOT_CURRENT_USAGE =	offsetof(struct smbios_system_slot, slot_current_usage),
+	SMBIOS_SLOT_LENGTH =		offsetof(struct smbios_system_slot, slot_length),
+	SMBIOS_SLOT_NUMBER =		offsetof(struct smbios_system_slot, slot_number),
+	SMBIOS_SLOT_PROPERTIES1 =	offsetof(struct smbios_system_slot, properties1),
+	SMBIOS_SLOT_PROPERTIES2 =	offsetof(struct smbios_system_slot, properties2),
+};
+
+struct smbios_generic {
+	u8 type;
+	u8 length;
+	u16 handle;
+} __attribute__ ((packed));
+
+/* offsets to the smbios generic type based on the above structure layout */
+enum smbios_generic_offsets {
+	SMBIOS_GENERIC_TYPE =	offsetof(struct smbios_generic, type),
+	SMBIOS_GENERIC_LENGTH =	offsetof(struct smbios_generic, length),
+	SMBIOS_GENERIC_HANDLE =	offsetof(struct smbios_generic, handle),
+};
+
+struct smbios_entry_point {
+	char anchor[4];
+	u8 ep_checksum;
+	u8 ep_length;
+	u8 major_version;
+	u8 minor_version;
+	u16 max_size_entry;
+	u8 ep_rev;
+	u8 reserved[5];
+	char int_anchor[5];
+	u8 int_checksum;
+	u16 st_length;
+	u32 st_address;
+	u16 number_of_entrys;
+	u8 bcd_rev;
+} __attribute__ ((packed));
+
+/* offsets to the smbios entry point based on the above structure layout */
+enum smbios_entry_point_offsets {
+	ANCHOR =		offsetof(struct smbios_entry_point, anchor[0]),
+	EP_CHECKSUM =		offsetof(struct smbios_entry_point, ep_checksum),
+	EP_LENGTH =		offsetof(struct smbios_entry_point, ep_length),
+	MAJOR_VERSION =		offsetof(struct smbios_entry_point, major_version),
+	MINOR_VERSION =		offsetof(struct smbios_entry_point, minor_version),
+	MAX_SIZE_ENTRY =	offsetof(struct smbios_entry_point, max_size_entry),
+	EP_REV =		offsetof(struct smbios_entry_point, ep_rev),
+	INT_ANCHOR =		offsetof(struct smbios_entry_point, int_anchor[0]),
+	INT_CHECKSUM =		offsetof(struct smbios_entry_point, int_checksum),
+	ST_LENGTH =		offsetof(struct smbios_entry_point, st_length),
+	ST_ADDRESS =		offsetof(struct smbios_entry_point, st_address),
+	NUMBER_OF_ENTRYS =	offsetof(struct smbios_entry_point, number_of_entrys),
+	BCD_REV =		offsetof(struct smbios_entry_point, bcd_rev),
+};
+
+struct ctrl_reg {			/* offset */
+	u8	slot_RST;		/* 0x00 */
+	u8	slot_enable;		/* 0x01 */
+	u16	misc;			/* 0x02 */
+	u32	led_control;		/* 0x04 */
+	u32	int_input_clear;	/* 0x08 */
+	u32	int_mask;		/* 0x0a */
+	u8	reserved0;		/* 0x10 */
+	u8	reserved1;		/* 0x11 */
+	u8	reserved2;		/* 0x12 */
+	u8	gen_output_AB;		/* 0x13 */
+	u32	non_int_input;		/* 0x14 */
+	u32	reserved3;		/* 0x18 */
+	u32	reserved4;		/* 0x1a */
+	u32	reserved5;		/* 0x20 */
+	u8	reserved6;		/* 0x24 */
+	u8	reserved7;		/* 0x25 */
+	u16	reserved8;		/* 0x26 */
+	u8	slot_mask;		/* 0x28 */
+	u8	reserved9;		/* 0x29 */
+	u8	reserved10;		/* 0x2a */
+	u8	reserved11;		/* 0x2b */
+	u8	slot_SERR;		/* 0x2c */
+	u8	slot_power;		/* 0x2d */
+	u8	reserved12;		/* 0x2e */
+	u8	reserved13;		/* 0x2f */
+	u8	next_curr_freq;		/* 0x30 */
+	u8	reset_freq_mode;	/* 0x31 */
+} __attribute__ ((packed));
+
+/* offsets to the controller registers based on the above structure layout */
+enum ctrl_offsets {
+	SLOT_RST =		offsetof(struct ctrl_reg, slot_RST),
+	SLOT_ENABLE =		offsetof(struct ctrl_reg, slot_enable),
+	MISC =			offsetof(struct ctrl_reg, misc),
+	LED_CONTROL =		offsetof(struct ctrl_reg, led_control),
+	INT_INPUT_CLEAR =	offsetof(struct ctrl_reg, int_input_clear),
+	INT_MASK =		offsetof(struct ctrl_reg, int_mask),
+	CTRL_RESERVED0 =	offsetof(struct ctrl_reg, reserved0),
+	CTRL_RESERVED1 =	offsetof(struct ctrl_reg, reserved1),
+	CTRL_RESERVED2 =	offsetof(struct ctrl_reg, reserved1),
+	GEN_OUTPUT_AB =		offsetof(struct ctrl_reg, gen_output_AB),
+	NON_INT_INPUT =		offsetof(struct ctrl_reg, non_int_input),
+	CTRL_RESERVED3 =	offsetof(struct ctrl_reg, reserved3),
+	CTRL_RESERVED4 =	offsetof(struct ctrl_reg, reserved4),
+	CTRL_RESERVED5 =	offsetof(struct ctrl_reg, reserved5),
+	CTRL_RESERVED6 =	offsetof(struct ctrl_reg, reserved6),
+	CTRL_RESERVED7 =	offsetof(struct ctrl_reg, reserved7),
+	CTRL_RESERVED8 =	offsetof(struct ctrl_reg, reserved8),
+	SLOT_MASK =		offsetof(struct ctrl_reg, slot_mask),
+	CTRL_RESERVED9 =	offsetof(struct ctrl_reg, reserved9),
+	CTRL_RESERVED10 =	offsetof(struct ctrl_reg, reserved10),
+	CTRL_RESERVED11 =	offsetof(struct ctrl_reg, reserved11),
+	SLOT_SERR =		offsetof(struct ctrl_reg, slot_SERR),
+	SLOT_POWER =		offsetof(struct ctrl_reg, slot_power),
+	NEXT_CURR_FREQ =	offsetof(struct ctrl_reg, next_curr_freq),
+	RESET_FREQ_MODE =	offsetof(struct ctrl_reg, reset_freq_mode),
+};
+
+struct hrt {
+	char sig0;
+	char sig1;
+	char sig2;
+	char sig3;
+	u16 unused_IRQ;
+	u16 PCIIRQ;
+	u8 number_of_entries;
+	u8 revision;
+	u16 reserved1;
+	u32 reserved2;
+} __attribute__ ((packed));
+
+/* offsets to the hotplug resource table registers based on the above
+ * structure layout
+ */
+enum hrt_offsets {
+	SIG0 =			offsetof(struct hrt, sig0),
+	SIG1 =			offsetof(struct hrt, sig1),
+	SIG2 =			offsetof(struct hrt, sig2),
+	SIG3 =			offsetof(struct hrt, sig3),
+	UNUSED_IRQ =		offsetof(struct hrt, unused_IRQ),
+	PCIIRQ =		offsetof(struct hrt, PCIIRQ),
+	NUMBER_OF_ENTRIES =	offsetof(struct hrt, number_of_entries),
+	REVISION =		offsetof(struct hrt, revision),
+	HRT_RESERVED1 =		offsetof(struct hrt, reserved1),
+	HRT_RESERVED2 =		offsetof(struct hrt, reserved2),
+};
+
+struct slot_rt {
+	u8 dev_func;
+	u8 primary_bus;
+	u8 secondary_bus;
+	u8 max_bus;
+	u16 io_base;
+	u16 io_length;
+	u16 mem_base;
+	u16 mem_length;
+	u16 pre_mem_base;
+	u16 pre_mem_length;
+} __attribute__ ((packed));
+
+/* offsets to the hotplug slot resource table registers based on the above
+ * structure layout
+ */
+enum slot_rt_offsets {
+	DEV_FUNC =		offsetof(struct slot_rt, dev_func),
+	PRIMARY_BUS =		offsetof(struct slot_rt, primary_bus),
+	SECONDARY_BUS =		offsetof(struct slot_rt, secondary_bus),
+	MAX_BUS =		offsetof(struct slot_rt, max_bus),
+	IO_BASE =		offsetof(struct slot_rt, io_base),
+	IO_LENGTH =		offsetof(struct slot_rt, io_length),
+	MEM_BASE =		offsetof(struct slot_rt, mem_base),
+	MEM_LENGTH =		offsetof(struct slot_rt, mem_length),
+	PRE_MEM_BASE =		offsetof(struct slot_rt, pre_mem_base),
+	PRE_MEM_LENGTH =	offsetof(struct slot_rt, pre_mem_length),
+};
+
+struct pci_func {
+	struct pci_func *next;
+	u8 bus;
+	u8 device;
+	u8 function;
+	u8 is_a_board;
+	u16 status;
+	u8 configured;
+	u8 switch_save;
+	u8 presence_save;
+	u32 base_length[0x06];
+	u8 base_type[0x06];
+	u16 reserved2;
+	u32 config_space[0x20];
+	struct pci_resource *mem_head;
+	struct pci_resource *p_mem_head;
+	struct pci_resource *io_head;
+	struct pci_resource *bus_head;
+	struct timer_list *p_task_event;
+	struct pci_dev *pci_dev;
+};
+
+struct slot {
+	struct slot *next;
+	u8 bus;
+	u8 device;
+	u8 number;
+	u8 is_a_board;
+	u8 configured;
+	u8 state;
+	u8 switch_save;
+	u8 presence_save;
+	u32 capabilities;
+	u16 reserved2;
+	struct timer_list task_event;
+	u8 hp_slot;
+	struct controller *ctrl;
+	void __iomem *p_sm_slot;
+	struct hotplug_slot hotplug_slot;
+};
+
+struct pci_resource {
+	struct pci_resource *next;
+	u32 base;
+	u32 length;
+};
+
+struct event_info {
+	u32 event_type;
+	u8 hp_slot;
+};
+
+struct controller {
+	struct controller *next;
+	u32 ctrl_int_comp;
+	struct mutex crit_sect;	/* critical section mutex */
+	void __iomem *hpc_reg;	/* cookie for our pci controller location */
+	struct pci_resource *mem_head;
+	struct pci_resource *p_mem_head;
+	struct pci_resource *io_head;
+	struct pci_resource *bus_head;
+	struct pci_dev *pci_dev;
+	struct pci_bus *pci_bus;
+	struct event_info event_queue[10];
+	struct slot *slot;
+	u8 next_event;
+	u8 interrupt;
+	u8 cfgspc_irq;
+	u8 bus;			/* bus number for the pci hotplug controller */
+	u8 rev;
+	u8 slot_device_offset;
+	u8 first_slot;
+	u8 add_support;
+	u8 push_flag;
+	u8 push_button;			/* 0 = no pushbutton, 1 = pushbutton present */
+	u8 slot_switch_type;		/* 0 = no switch, 1 = switch present */
+	u8 defeature_PHP;		/* 0 = PHP not supported, 1 = PHP supported */
+	u8 alternate_base_address;	/* 0 = not supported, 1 = supported */
+	u8 pci_config_space;		/* Index/data access to working registers 0 = not supported, 1 = supported */
+	u8 pcix_speed_capability;	/* PCI-X */
+	u8 pcix_support;		/* PCI-X */
+	u16 vendor_id;
+	struct work_struct int_task_event;
+	wait_queue_head_t queue;	/* sleep & wake process */
+	struct dentry *dentry;		/* debugfs dentry */
+};
+
+struct irq_mapping {
+	u8 barber_pole;
+	u8 valid_INT;
+	u8 interrupt[4];
+};
+
+struct resource_lists {
+	struct pci_resource *mem_head;
+	struct pci_resource *p_mem_head;
+	struct pci_resource *io_head;
+	struct pci_resource *bus_head;
+	struct irq_mapping *irqs;
+};
+
+#define ROM_PHY_ADDR			0x0F0000
+#define ROM_PHY_LEN			0x00ffff
+
+#define PCI_HPC_ID			0xA0F7
+#define PCI_SUB_HPC_ID			0xA2F7
+#define PCI_SUB_HPC_ID2			0xA2F8
+#define PCI_SUB_HPC_ID3			0xA2F9
+#define PCI_SUB_HPC_ID_INTC		0xA2FA
+#define PCI_SUB_HPC_ID4			0xA2FD
+
+#define INT_BUTTON_IGNORE		0
+#define INT_PRESENCE_ON			1
+#define INT_PRESENCE_OFF		2
+#define INT_SWITCH_CLOSE		3
+#define INT_SWITCH_OPEN			4
+#define INT_POWER_FAULT			5
+#define INT_POWER_FAULT_CLEAR		6
+#define INT_BUTTON_PRESS		7
+#define INT_BUTTON_RELEASE		8
+#define INT_BUTTON_CANCEL		9
+
+#define STATIC_STATE			0
+#define BLINKINGON_STATE		1
+#define BLINKINGOFF_STATE		2
+#define POWERON_STATE			3
+#define POWEROFF_STATE			4
+
+#define PCISLOT_INTERLOCK_CLOSED	0x00000001
+#define PCISLOT_ADAPTER_PRESENT		0x00000002
+#define PCISLOT_POWERED			0x00000004
+#define PCISLOT_66_MHZ_OPERATION	0x00000008
+#define PCISLOT_64_BIT_OPERATION	0x00000010
+#define PCISLOT_REPLACE_SUPPORTED	0x00000020
+#define PCISLOT_ADD_SUPPORTED		0x00000040
+#define PCISLOT_INTERLOCK_SUPPORTED	0x00000080
+#define PCISLOT_66_MHZ_SUPPORTED	0x00000100
+#define PCISLOT_64_BIT_SUPPORTED	0x00000200
+
+#define PCI_TO_PCI_BRIDGE_CLASS		0x00060400
+
+#define INTERLOCK_OPEN			0x00000002
+#define ADD_NOT_SUPPORTED		0x00000003
+#define CARD_FUNCTIONING		0x00000005
+#define ADAPTER_NOT_SAME		0x00000006
+#define NO_ADAPTER_PRESENT		0x00000009
+#define NOT_ENOUGH_RESOURCES		0x0000000B
+#define DEVICE_TYPE_NOT_SUPPORTED	0x0000000C
+#define POWER_FAILURE			0x0000000E
+
+#define REMOVE_NOT_SUPPORTED		0x00000003
+
+
+/*
+ * error Messages
+ */
+#define msg_initialization_err	"Initialization failure, error=%d\n"
+#define msg_HPC_rev_error	"Unsupported revision of the PCI hot plug controller found.\n"
+#define msg_HPC_non_compaq_or_intel	"The PCI hot plug controller is not supported by this driver.\n"
+#define msg_HPC_not_supported	"this system is not supported by this version of cpqphpd. Upgrade to a newer version of cpqphpd\n"
+#define msg_unable_to_save	"unable to store PCI hot plug add resource information. This system must be rebooted before adding any PCI devices.\n"
+#define msg_button_on		"PCI slot #%d - powering on due to button press.\n"
+#define msg_button_off		"PCI slot #%d - powering off due to button press.\n"
+#define msg_button_cancel	"PCI slot #%d - action canceled due to button press.\n"
+#define msg_button_ignore	"PCI slot #%d - button press ignored.  (action in progress...)\n"
+
+
+/* debugfs functions for the hotplug controller info */
+void cpqhp_initialize_debugfs(void);
+void cpqhp_shutdown_debugfs(void);
+void cpqhp_create_debugfs_files(struct controller *ctrl);
+void cpqhp_remove_debugfs_files(struct controller *ctrl);
+
+/* controller functions */
+void cpqhp_pushbutton_thread(struct timer_list *t);
+irqreturn_t cpqhp_ctrl_intr(int IRQ, void *data);
+int cpqhp_find_available_resources(struct controller *ctrl,
+				   void __iomem *rom_start);
+int cpqhp_event_start_thread(void);
+void cpqhp_event_stop_thread(void);
+struct pci_func *cpqhp_slot_create(unsigned char busnumber);
+struct pci_func *cpqhp_slot_find(unsigned char bus, unsigned char device,
+				 unsigned char index);
+int cpqhp_process_SI(struct controller *ctrl, struct pci_func *func);
+int cpqhp_process_SS(struct controller *ctrl, struct pci_func *func);
+int cpqhp_hardware_test(struct controller *ctrl, int test_num);
+
+/* resource functions */
+int	cpqhp_resource_sort_and_combine(struct pci_resource **head);
+
+/* pci functions */
+int cpqhp_set_irq(u8 bus_num, u8 dev_num, u8 int_pin, u8 irq_num);
+int cpqhp_get_bus_dev(struct controller *ctrl, u8 *bus_num, u8 *dev_num,
+		      u8 slot);
+int cpqhp_save_config(struct controller *ctrl, int busnumber, int is_hot_plug);
+int cpqhp_save_base_addr_length(struct controller *ctrl, struct pci_func *func);
+int cpqhp_save_used_resources(struct controller *ctrl, struct pci_func *func);
+int cpqhp_configure_board(struct controller *ctrl, struct pci_func *func);
+int cpqhp_save_slot_config(struct controller *ctrl, struct pci_func *new_slot);
+int cpqhp_valid_replace(struct controller *ctrl, struct pci_func *func);
+void cpqhp_destroy_board_resources(struct pci_func *func);
+int cpqhp_return_board_resources(struct pci_func *func,
+				 struct resource_lists *resources);
+void cpqhp_destroy_resource_list(struct resource_lists *resources);
+int cpqhp_configure_device(struct controller *ctrl, struct pci_func *func);
+int cpqhp_unconfigure_device(struct pci_func *func);
+
+/* Global variables */
+extern int cpqhp_debug;
+extern int cpqhp_legacy_mode;
+extern struct controller *cpqhp_ctrl_list;
+extern struct pci_func *cpqhp_slot_list[256];
+extern struct irq_routing_table *cpqhp_routing_table;
+
+/* these can be gotten rid of, but for debugging they are purty */
+extern u8 cpqhp_nic_irq;
+extern u8 cpqhp_disk_irq;
+
+
+/* inline functions */
+
+static inline const char *slot_name(struct slot *slot)
+{
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+/*
+ * return_resource
+ *
+ * Puts node back in the resource list pointed to by head
+ */
+static inline void return_resource(struct pci_resource **head,
+				   struct pci_resource *node)
+{
+	if (!node || !head)
+		return;
+	node->next = *head;
+	*head = node;
+}
+
+static inline void set_SOGO(struct controller *ctrl)
+{
+	u16 misc;
+
+	misc = readw(ctrl->hpc_reg + MISC);
+	misc = (misc | 0x0001) & 0xFFFB;
+	writew(misc, ctrl->hpc_reg + MISC);
+}
+
+
+static inline void amber_LED_on(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control |= (0x01010000L << slot);
+	writel(led_control, ctrl->hpc_reg + LED_CONTROL);
+}
+
+
+static inline void amber_LED_off(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control &= ~(0x01010000L << slot);
+	writel(led_control, ctrl->hpc_reg + LED_CONTROL);
+}
+
+
+static inline int read_amber_LED(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control &= (0x01010000L << slot);
+
+	return led_control ? 1 : 0;
+}
+
+
+static inline void green_LED_on(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control |= 0x0101L << slot;
+	writel(led_control, ctrl->hpc_reg + LED_CONTROL);
+}
+
+static inline void green_LED_off(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control &= ~(0x0101L << slot);
+	writel(led_control, ctrl->hpc_reg + LED_CONTROL);
+}
+
+
+static inline void green_LED_blink(struct controller *ctrl, u8 slot)
+{
+	u32 led_control;
+
+	led_control = readl(ctrl->hpc_reg + LED_CONTROL);
+	led_control &= ~(0x0101L << slot);
+	led_control |= (0x0001L << slot);
+	writel(led_control, ctrl->hpc_reg + LED_CONTROL);
+}
+
+
+static inline void slot_disable(struct controller *ctrl, u8 slot)
+{
+	u8 slot_enable;
+
+	slot_enable = readb(ctrl->hpc_reg + SLOT_ENABLE);
+	slot_enable &= ~(0x01 << slot);
+	writeb(slot_enable, ctrl->hpc_reg + SLOT_ENABLE);
+}
+
+
+static inline void slot_enable(struct controller *ctrl, u8 slot)
+{
+	u8 slot_enable;
+
+	slot_enable = readb(ctrl->hpc_reg + SLOT_ENABLE);
+	slot_enable |= (0x01 << slot);
+	writeb(slot_enable, ctrl->hpc_reg + SLOT_ENABLE);
+}
+
+
+static inline u8 is_slot_enabled(struct controller *ctrl, u8 slot)
+{
+	u8 slot_enable;
+
+	slot_enable = readb(ctrl->hpc_reg + SLOT_ENABLE);
+	slot_enable &= (0x01 << slot);
+	return slot_enable ? 1 : 0;
+}
+
+
+static inline u8 read_slot_enable(struct controller *ctrl)
+{
+	return readb(ctrl->hpc_reg + SLOT_ENABLE);
+}
+
+
+/**
+ * get_controller_speed - find the current frequency/mode of controller.
+ *
+ * @ctrl: controller to get frequency/mode for.
+ *
+ * Returns controller speed.
+ */
+static inline u8 get_controller_speed(struct controller *ctrl)
+{
+	u8 curr_freq;
+	u16 misc;
+
+	if (ctrl->pcix_support) {
+		curr_freq = readb(ctrl->hpc_reg + NEXT_CURR_FREQ);
+		if ((curr_freq & 0xB0) == 0xB0)
+			return PCI_SPEED_133MHz_PCIX;
+		if ((curr_freq & 0xA0) == 0xA0)
+			return PCI_SPEED_100MHz_PCIX;
+		if ((curr_freq & 0x90) == 0x90)
+			return PCI_SPEED_66MHz_PCIX;
+		if (curr_freq & 0x10)
+			return PCI_SPEED_66MHz;
+
+		return PCI_SPEED_33MHz;
+	}
+
+	misc = readw(ctrl->hpc_reg + MISC);
+	return (misc & 0x0800) ? PCI_SPEED_66MHz : PCI_SPEED_33MHz;
+}
+
+
+/**
+ * get_adapter_speed - find the max supported frequency/mode of adapter.
+ *
+ * @ctrl: hotplug controller.
+ * @hp_slot: hotplug slot where adapter is installed.
+ *
+ * Returns adapter speed.
+ */
+static inline u8 get_adapter_speed(struct controller *ctrl, u8 hp_slot)
+{
+	u32 temp_dword = readl(ctrl->hpc_reg + NON_INT_INPUT);
+	dbg("slot: %d, PCIXCAP: %8x\n", hp_slot, temp_dword);
+	if (ctrl->pcix_support) {
+		if (temp_dword & (0x10000 << hp_slot))
+			return PCI_SPEED_133MHz_PCIX;
+		if (temp_dword & (0x100 << hp_slot))
+			return PCI_SPEED_66MHz_PCIX;
+	}
+
+	if (temp_dword & (0x01 << hp_slot))
+		return PCI_SPEED_66MHz;
+
+	return PCI_SPEED_33MHz;
+}
+
+static inline void enable_slot_power(struct controller *ctrl, u8 slot)
+{
+	u8 slot_power;
+
+	slot_power = readb(ctrl->hpc_reg + SLOT_POWER);
+	slot_power |= (0x01 << slot);
+	writeb(slot_power, ctrl->hpc_reg + SLOT_POWER);
+}
+
+static inline void disable_slot_power(struct controller *ctrl, u8 slot)
+{
+	u8 slot_power;
+
+	slot_power = readb(ctrl->hpc_reg + SLOT_POWER);
+	slot_power &= ~(0x01 << slot);
+	writeb(slot_power, ctrl->hpc_reg + SLOT_POWER);
+}
+
+
+static inline int cpq_get_attention_status(struct controller *ctrl, struct slot *slot)
+{
+	u8 hp_slot;
+
+	hp_slot = slot->device - ctrl->slot_device_offset;
+
+	return read_amber_LED(ctrl, hp_slot);
+}
+
+
+static inline int get_slot_enabled(struct controller *ctrl, struct slot *slot)
+{
+	u8 hp_slot;
+
+	hp_slot = slot->device - ctrl->slot_device_offset;
+
+	return is_slot_enabled(ctrl, hp_slot);
+}
+
+
+static inline int cpq_get_latch_status(struct controller *ctrl,
+				       struct slot *slot)
+{
+	u32 status;
+	u8 hp_slot;
+
+	hp_slot = slot->device - ctrl->slot_device_offset;
+	dbg("%s: slot->device = %d, ctrl->slot_device_offset = %d\n",
+	    __func__, slot->device, ctrl->slot_device_offset);
+
+	status = (readl(ctrl->hpc_reg + INT_INPUT_CLEAR) & (0x01L << hp_slot));
+
+	return (status == 0) ? 1 : 0;
+}
+
+
+static inline int get_presence_status(struct controller *ctrl,
+				      struct slot *slot)
+{
+	int presence_save = 0;
+	u8 hp_slot;
+	u32 tempdword;
+
+	hp_slot = slot->device - ctrl->slot_device_offset;
+
+	tempdword = readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+	presence_save = (int) ((((~tempdword) >> 23) | ((~tempdword) >> 15))
+				>> hp_slot) & 0x02;
+
+	return presence_save;
+}
+
+static inline int wait_for_ctrl_irq(struct controller *ctrl)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int retval = 0;
+
+	dbg("%s - start\n", __func__);
+	add_wait_queue(&ctrl->queue, &wait);
+	/* Sleep for up to 1 second to wait for the LED to change. */
+	msleep_interruptible(1000);
+	remove_wait_queue(&ctrl->queue, &wait);
+	if (signal_pending(current))
+		retval =  -EINTR;
+
+	dbg("%s - end\n", __func__);
+	return retval;
+}
+
+#include <asm/pci_x86.h>
+static inline int cpqhp_routing_table_length(void)
+{
+	BUG_ON(cpqhp_routing_table == NULL);
+	return ((cpqhp_routing_table->size - sizeof(struct irq_routing_table)) /
+		sizeof(struct irq_info));
+}
+
+#endif
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
new file mode 100644
index 000000000..c94b40e64
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -0,0 +1,1407 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ * Jan 12, 2003 -	Added 66/100/133MHz PCI-X support,
+ *			Torben Mathiasen <torben.mathiasen@hp.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+#include <linux/uaccess.h>
+
+#include "cpqphp.h"
+#include "cpqphp_nvram.h"
+
+
+/* Global variables */
+int cpqhp_debug;
+int cpqhp_legacy_mode;
+struct controller *cpqhp_ctrl_list;	/* = NULL */
+struct pci_func *cpqhp_slot_list[256];
+struct irq_routing_table *cpqhp_routing_table;
+
+/* local variables */
+static void __iomem *smbios_table;
+static void __iomem *smbios_start;
+static void __iomem *cpqhp_rom_start;
+static bool power_mode;
+static bool debug;
+static int initialized;
+
+#define DRIVER_VERSION	"0.9.8"
+#define DRIVER_AUTHOR	"Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>"
+#define DRIVER_DESC	"Compaq Hot Plug PCI Controller Driver"
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+module_param(power_mode, bool, 0644);
+MODULE_PARM_DESC(power_mode, "Power mode enabled or not");
+
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+
+#define CPQHPC_MODULE_MINOR 208
+
+static inline int is_slot64bit(struct slot *slot)
+{
+	return (readb(slot->p_sm_slot + SMBIOS_SLOT_WIDTH) == 0x06) ? 1 : 0;
+}
+
+static inline int is_slot66mhz(struct slot *slot)
+{
+	return (readb(slot->p_sm_slot + SMBIOS_SLOT_TYPE) == 0x0E) ? 1 : 0;
+}
+
+/**
+ * detect_SMBIOS_pointer - find the System Management BIOS Table in mem region.
+ * @begin: begin pointer for region to be scanned.
+ * @end: end pointer for region to be scanned.
+ *
+ * Returns pointer to the head of the SMBIOS tables (or %NULL).
+ */
+static void __iomem *detect_SMBIOS_pointer(void __iomem *begin, void __iomem *end)
+{
+	void __iomem *fp;
+	void __iomem *endp;
+	u8 temp1, temp2, temp3, temp4;
+	int status = 0;
+
+	endp = (end - sizeof(u32) + 1);
+
+	for (fp = begin; fp <= endp; fp += 16) {
+		temp1 = readb(fp);
+		temp2 = readb(fp+1);
+		temp3 = readb(fp+2);
+		temp4 = readb(fp+3);
+		if (temp1 == '_' &&
+		    temp2 == 'S' &&
+		    temp3 == 'M' &&
+		    temp4 == '_') {
+			status = 1;
+			break;
+		}
+	}
+
+	if (!status)
+		fp = NULL;
+
+	dbg("Discovered SMBIOS Entry point at %p\n", fp);
+
+	return fp;
+}
+
+/**
+ * init_SERR - Initializes the per slot SERR generation.
+ * @ctrl: controller to use
+ *
+ * For unexpected switch opens
+ */
+static int init_SERR(struct controller *ctrl)
+{
+	u32 tempdword;
+	u32 number_of_slots;
+
+	if (!ctrl)
+		return 1;
+
+	tempdword = ctrl->first_slot;
+
+	number_of_slots = readb(ctrl->hpc_reg + SLOT_MASK) & 0x0F;
+	/* Loop through slots */
+	while (number_of_slots) {
+		writeb(0, ctrl->hpc_reg + SLOT_SERR);
+		tempdword++;
+		number_of_slots--;
+	}
+
+	return 0;
+}
+
+static int init_cpqhp_routing_table(void)
+{
+	int len;
+
+	cpqhp_routing_table = pcibios_get_irq_routing_table();
+	if (cpqhp_routing_table == NULL)
+		return -ENOMEM;
+
+	len = cpqhp_routing_table_length();
+	if (len == 0) {
+		kfree(cpqhp_routing_table);
+		cpqhp_routing_table = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+/* nice debugging output */
+static void pci_print_IRQ_route(void)
+{
+	int len;
+	int loop;
+	u8 tbus, tdevice, tslot;
+
+	len = cpqhp_routing_table_length();
+
+	dbg("bus dev func slot\n");
+	for (loop = 0; loop < len; ++loop) {
+		tbus = cpqhp_routing_table->slots[loop].bus;
+		tdevice = cpqhp_routing_table->slots[loop].devfn;
+		tslot = cpqhp_routing_table->slots[loop].slot;
+		dbg("%d %d %d %d\n", tbus, tdevice >> 3, tdevice & 0x7, tslot);
+
+	}
+}
+
+
+/**
+ * get_subsequent_smbios_entry: get the next entry from bios table.
+ * @smbios_start: where to start in the SMBIOS table
+ * @smbios_table: location of the SMBIOS table
+ * @curr: %NULL or pointer to previously returned structure
+ *
+ * Gets the first entry if previous == NULL;
+ * otherwise, returns the next entry.
+ * Uses global SMBIOS Table pointer.
+ *
+ * Returns a pointer to an SMBIOS structure or NULL if none found.
+ */
+static void __iomem *get_subsequent_smbios_entry(void __iomem *smbios_start,
+						void __iomem *smbios_table,
+						void __iomem *curr)
+{
+	u8 bail = 0;
+	u8 previous_byte = 1;
+	void __iomem *p_temp;
+	void __iomem *p_max;
+
+	if (!smbios_table || !curr)
+		return NULL;
+
+	/* set p_max to the end of the table */
+	p_max = smbios_start + readw(smbios_table + ST_LENGTH);
+
+	p_temp = curr;
+	p_temp += readb(curr + SMBIOS_GENERIC_LENGTH);
+
+	while ((p_temp < p_max) && !bail) {
+		/* Look for the double NULL terminator
+		 * The first condition is the previous byte
+		 * and the second is the curr
+		 */
+		if (!previous_byte && !(readb(p_temp)))
+			bail = 1;
+
+		previous_byte = readb(p_temp);
+		p_temp++;
+	}
+
+	if (p_temp < p_max)
+		return p_temp;
+	else
+		return NULL;
+}
+
+
+/**
+ * get_SMBIOS_entry - return the requested SMBIOS entry or %NULL
+ * @smbios_start: where to start in the SMBIOS table
+ * @smbios_table: location of the SMBIOS table
+ * @type: SMBIOS structure type to be returned
+ * @previous: %NULL or pointer to previously returned structure
+ *
+ * Gets the first entry of the specified type if previous == %NULL;
+ * Otherwise, returns the next entry of the given type.
+ * Uses global SMBIOS Table pointer.
+ * Uses get_subsequent_smbios_entry.
+ *
+ * Returns a pointer to an SMBIOS structure or %NULL if none found.
+ */
+static void __iomem *get_SMBIOS_entry(void __iomem *smbios_start,
+					void __iomem *smbios_table,
+					u8 type,
+					void __iomem *previous)
+{
+	if (!smbios_table)
+		return NULL;
+
+	if (!previous)
+		previous = smbios_start;
+	else
+		previous = get_subsequent_smbios_entry(smbios_start,
+					smbios_table, previous);
+
+	while (previous)
+		if (readb(previous + SMBIOS_GENERIC_TYPE) != type)
+			previous = get_subsequent_smbios_entry(smbios_start,
+						smbios_table, previous);
+		else
+			break;
+
+	return previous;
+}
+
+static int ctrl_slot_cleanup(struct controller *ctrl)
+{
+	struct slot *old_slot, *next_slot;
+
+	old_slot = ctrl->slot;
+	ctrl->slot = NULL;
+
+	while (old_slot) {
+		next_slot = old_slot->next;
+		pci_hp_deregister(&old_slot->hotplug_slot);
+		kfree(old_slot);
+		old_slot = next_slot;
+	}
+
+	cpqhp_remove_debugfs_files(ctrl);
+
+	/* Free IRQ associated with hot plug device */
+	free_irq(ctrl->interrupt, ctrl);
+	/* Unmap the memory */
+	iounmap(ctrl->hpc_reg);
+	/* Finally reclaim PCI mem */
+	release_mem_region(pci_resource_start(ctrl->pci_dev, 0),
+			   pci_resource_len(ctrl->pci_dev, 0));
+
+	return 0;
+}
+
+
+/**
+ * get_slot_mapping - determine logical slot mapping for PCI device
+ *
+ * Won't work for more than one PCI-PCI bridge in a slot.
+ *
+ * @bus: pointer to the PCI bus structure
+ * @bus_num: bus number of PCI device
+ * @dev_num: device number of PCI device
+ * @slot: Pointer to u8 where slot number will	be returned
+ *
+ * Output:	SUCCESS or FAILURE
+ */
+static int
+get_slot_mapping(struct pci_bus *bus, u8 bus_num, u8 dev_num, u8 *slot)
+{
+	u32 work;
+	long len;
+	long loop;
+
+	u8 tbus, tdevice, tslot, bridgeSlot;
+
+	dbg("%s: %p, %d, %d, %p\n", __func__, bus, bus_num, dev_num, slot);
+
+	bridgeSlot = 0xFF;
+
+	len = cpqhp_routing_table_length();
+	for (loop = 0; loop < len; ++loop) {
+		tbus = cpqhp_routing_table->slots[loop].bus;
+		tdevice = cpqhp_routing_table->slots[loop].devfn >> 3;
+		tslot = cpqhp_routing_table->slots[loop].slot;
+
+		if ((tbus == bus_num) && (tdevice == dev_num)) {
+			*slot = tslot;
+			return 0;
+		} else {
+			/* Did not get a match on the target PCI device. Check
+			 * if the current IRQ table entry is a PCI-to-PCI
+			 * bridge device.  If so, and it's secondary bus
+			 * matches the bus number for the target device, I need
+			 * to save the bridge's slot number.  If I can not find
+			 * an entry for the target device, I will have to
+			 * assume it's on the other side of the bridge, and
+			 * assign it the bridge's slot.
+			 */
+			bus->number = tbus;
+			pci_bus_read_config_dword(bus, PCI_DEVFN(tdevice, 0),
+						PCI_CLASS_REVISION, &work);
+
+			if ((work >> 8) == PCI_TO_PCI_BRIDGE_CLASS) {
+				pci_bus_read_config_dword(bus,
+							PCI_DEVFN(tdevice, 0),
+							PCI_PRIMARY_BUS, &work);
+				// See if bridge's secondary bus matches target bus.
+				if (((work >> 8) & 0x000000FF) == (long) bus_num)
+					bridgeSlot = tslot;
+			}
+		}
+
+	}
+
+	/* If we got here, we didn't find an entry in the IRQ mapping table for
+	 * the target PCI device.  If we did determine that the target device
+	 * is on the other side of a PCI-to-PCI bridge, return the slot number
+	 * for the bridge.
+	 */
+	if (bridgeSlot != 0xFF) {
+		*slot = bridgeSlot;
+		return 0;
+	}
+	/* Couldn't find an entry in the routing table for this PCI device */
+	return -1;
+}
+
+
+/**
+ * cpqhp_set_attention_status - Turns the Amber LED for a slot on or off
+ * @ctrl: struct controller to use
+ * @func: PCI device/function info
+ * @status: LED control flag: 1 = LED on, 0 = LED off
+ */
+static int
+cpqhp_set_attention_status(struct controller *ctrl, struct pci_func *func,
+				u32 status)
+{
+	u8 hp_slot;
+
+	if (func == NULL)
+		return 1;
+
+	hp_slot = func->device - ctrl->slot_device_offset;
+
+	/* Wait for exclusive access to hardware */
+	mutex_lock(&ctrl->crit_sect);
+
+	if (status == 1)
+		amber_LED_on(ctrl, hp_slot);
+	else if (status == 0)
+		amber_LED_off(ctrl, hp_slot);
+	else {
+		/* Done with exclusive hardware access */
+		mutex_unlock(&ctrl->crit_sect);
+		return 1;
+	}
+
+	set_SOGO(ctrl);
+
+	/* Wait for SOBS to be unset */
+	wait_for_ctrl_irq(ctrl);
+
+	/* Done with exclusive hardware access */
+	mutex_unlock(&ctrl->crit_sect);
+
+	return 0;
+}
+
+
+/**
+ * set_attention_status - Turns the Amber LED for a slot on or off
+ * @hotplug_slot: slot to change LED on
+ * @status: LED control flag
+ */
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
+	struct pci_func *slot_func;
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+	u8 bus;
+	u8 devfn;
+	u8 device;
+	u8 function;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	if (cpqhp_get_bus_dev(ctrl, &bus, &devfn, slot->number) == -1)
+		return -ENODEV;
+
+	device = devfn >> 3;
+	function = devfn & 0x7;
+	dbg("bus, dev, fn = %d, %d, %d\n", bus, device, function);
+
+	slot_func = cpqhp_slot_find(bus, device, function);
+	if (!slot_func)
+		return -ENODEV;
+
+	return cpqhp_set_attention_status(ctrl, slot_func, status);
+}
+
+
+static int process_SI(struct hotplug_slot *hotplug_slot)
+{
+	struct pci_func *slot_func;
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+	u8 bus;
+	u8 devfn;
+	u8 device;
+	u8 function;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	if (cpqhp_get_bus_dev(ctrl, &bus, &devfn, slot->number) == -1)
+		return -ENODEV;
+
+	device = devfn >> 3;
+	function = devfn & 0x7;
+	dbg("bus, dev, fn = %d, %d, %d\n", bus, device, function);
+
+	slot_func = cpqhp_slot_find(bus, device, function);
+	if (!slot_func)
+		return -ENODEV;
+
+	slot_func->bus = bus;
+	slot_func->device = device;
+	slot_func->function = function;
+	slot_func->configured = 0;
+	dbg("board_added(%p, %p)\n", slot_func, ctrl);
+	return cpqhp_process_SI(ctrl, slot_func);
+}
+
+
+static int process_SS(struct hotplug_slot *hotplug_slot)
+{
+	struct pci_func *slot_func;
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+	u8 bus;
+	u8 devfn;
+	u8 device;
+	u8 function;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	if (cpqhp_get_bus_dev(ctrl, &bus, &devfn, slot->number) == -1)
+		return -ENODEV;
+
+	device = devfn >> 3;
+	function = devfn & 0x7;
+	dbg("bus, dev, fn = %d, %d, %d\n", bus, device, function);
+
+	slot_func = cpqhp_slot_find(bus, device, function);
+	if (!slot_func)
+		return -ENODEV;
+
+	dbg("In %s, slot_func = %p, ctrl = %p\n", __func__, slot_func, ctrl);
+	return cpqhp_process_SS(ctrl, slot_func);
+}
+
+
+static int hardware_test(struct hotplug_slot *hotplug_slot, u32 value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	return cpqhp_hardware_test(ctrl, value);
+}
+
+
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = get_slot_enabled(ctrl, slot);
+	return 0;
+}
+
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = cpq_get_attention_status(ctrl, slot);
+	return 0;
+}
+
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = cpq_get_latch_status(ctrl, slot);
+
+	return 0;
+}
+
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	struct controller *ctrl = slot->ctrl;
+
+	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
+
+	*value = get_presence_status(ctrl, slot);
+
+	return 0;
+}
+
+static const struct hotplug_slot_ops cpqphp_hotplug_slot_ops = {
+	.set_attention_status =	set_attention_status,
+	.enable_slot =		process_SI,
+	.disable_slot =		process_SS,
+	.hardware_test =	hardware_test,
+	.get_power_status =	get_power_status,
+	.get_attention_status =	get_attention_status,
+	.get_latch_status =	get_latch_status,
+	.get_adapter_status =	get_adapter_status,
+};
+
+#define SLOT_NAME_SIZE 10
+
+static int ctrl_slot_setup(struct controller *ctrl,
+			void __iomem *smbios_start,
+			void __iomem *smbios_table)
+{
+	struct slot *slot;
+	struct pci_bus *bus = ctrl->pci_bus;
+	u8 number_of_slots;
+	u8 slot_device;
+	u8 slot_number;
+	u8 ctrl_slot;
+	u32 tempdword;
+	char name[SLOT_NAME_SIZE];
+	void __iomem *slot_entry = NULL;
+	int result;
+
+	dbg("%s\n", __func__);
+
+	tempdword = readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+	number_of_slots = readb(ctrl->hpc_reg + SLOT_MASK) & 0x0F;
+	slot_device = readb(ctrl->hpc_reg + SLOT_MASK) >> 4;
+	slot_number = ctrl->first_slot;
+
+	while (number_of_slots) {
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			result = -ENOMEM;
+			goto error;
+		}
+
+		slot->ctrl = ctrl;
+		slot->bus = ctrl->bus;
+		slot->device = slot_device;
+		slot->number = slot_number;
+		dbg("slot->number = %u\n", slot->number);
+
+		slot_entry = get_SMBIOS_entry(smbios_start, smbios_table, 9,
+					slot_entry);
+
+		while (slot_entry && (readw(slot_entry + SMBIOS_SLOT_NUMBER) !=
+				slot->number)) {
+			slot_entry = get_SMBIOS_entry(smbios_start,
+						smbios_table, 9, slot_entry);
+		}
+
+		slot->p_sm_slot = slot_entry;
+
+		timer_setup(&slot->task_event, cpqhp_pushbutton_thread, 0);
+		slot->task_event.expires = jiffies + 5 * HZ;
+
+		/*FIXME: these capabilities aren't used but if they are
+		 *	 they need to be correctly implemented
+		 */
+		slot->capabilities |= PCISLOT_REPLACE_SUPPORTED;
+		slot->capabilities |= PCISLOT_INTERLOCK_SUPPORTED;
+
+		if (is_slot64bit(slot))
+			slot->capabilities |= PCISLOT_64_BIT_SUPPORTED;
+		if (is_slot66mhz(slot))
+			slot->capabilities |= PCISLOT_66_MHZ_SUPPORTED;
+		if (bus->cur_bus_speed == PCI_SPEED_66MHz)
+			slot->capabilities |= PCISLOT_66_MHZ_OPERATION;
+
+		ctrl_slot =
+			slot_device - (readb(ctrl->hpc_reg + SLOT_MASK) >> 4);
+
+		/* Check presence */
+		slot->capabilities |=
+			((((~tempdword) >> 23) |
+			 ((~tempdword) >> 15)) >> ctrl_slot) & 0x02;
+		/* Check the switch state */
+		slot->capabilities |=
+			((~tempdword & 0xFF) >> ctrl_slot) & 0x01;
+		/* Check the slot enable */
+		slot->capabilities |=
+			((read_slot_enable(ctrl) << 2) >> ctrl_slot) & 0x04;
+
+		/* register this slot with the hotplug pci core */
+		snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
+		slot->hotplug_slot.ops = &cpqphp_hotplug_slot_ops;
+
+		dbg("registering bus %d, dev %d, number %d, ctrl->slot_device_offset %d, slot %d\n",
+				slot->bus, slot->device,
+				slot->number, ctrl->slot_device_offset,
+				slot_number);
+		result = pci_hp_register(&slot->hotplug_slot,
+					 ctrl->pci_dev->bus,
+					 slot->device,
+					 name);
+		if (result) {
+			err("pci_hp_register failed with error %d\n", result);
+			goto error_slot;
+		}
+
+		slot->next = ctrl->slot;
+		ctrl->slot = slot;
+
+		number_of_slots--;
+		slot_device++;
+		slot_number++;
+	}
+
+	return 0;
+error_slot:
+	kfree(slot);
+error:
+	return result;
+}
+
+static int one_time_init(void)
+{
+	int loop;
+	int retval = 0;
+
+	if (initialized)
+		return 0;
+
+	power_mode = 0;
+
+	retval = init_cpqhp_routing_table();
+	if (retval)
+		goto error;
+
+	if (cpqhp_debug)
+		pci_print_IRQ_route();
+
+	dbg("Initialize + Start the notification mechanism\n");
+
+	retval = cpqhp_event_start_thread();
+	if (retval)
+		goto error;
+
+	dbg("Initialize slot lists\n");
+	for (loop = 0; loop < 256; loop++)
+		cpqhp_slot_list[loop] = NULL;
+
+	/* FIXME: We also need to hook the NMI handler eventually.
+	 * this also needs to be worked with Christoph
+	 * register_NMI_handler();
+	 */
+	/* Map rom address */
+	cpqhp_rom_start = ioremap(ROM_PHY_ADDR, ROM_PHY_LEN);
+	if (!cpqhp_rom_start) {
+		err("Could not ioremap memory region for ROM\n");
+		retval = -EIO;
+		goto error;
+	}
+
+	/* Now, map the int15 entry point if we are on compaq specific
+	 * hardware
+	 */
+	compaq_nvram_init(cpqhp_rom_start);
+
+	/* Map smbios table entry point structure */
+	smbios_table = detect_SMBIOS_pointer(cpqhp_rom_start,
+					cpqhp_rom_start + ROM_PHY_LEN);
+	if (!smbios_table) {
+		err("Could not find the SMBIOS pointer in memory\n");
+		retval = -EIO;
+		goto error_rom_start;
+	}
+
+	smbios_start = ioremap(readl(smbios_table + ST_ADDRESS),
+					readw(smbios_table + ST_LENGTH));
+	if (!smbios_start) {
+		err("Could not ioremap memory region taken from SMBIOS values\n");
+		retval = -EIO;
+		goto error_smbios_start;
+	}
+
+	initialized = 1;
+
+	return retval;
+
+error_smbios_start:
+	iounmap(smbios_start);
+error_rom_start:
+	iounmap(cpqhp_rom_start);
+error:
+	return retval;
+}
+
+static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	u8 num_of_slots = 0;
+	u8 hp_slot = 0;
+	u8 device;
+	u8 bus_cap;
+	u16 temp_word;
+	u16 vendor_id;
+	u16 subsystem_vid;
+	u16 subsystem_deviceid;
+	u32 rc;
+	struct controller *ctrl;
+	struct pci_func *func;
+	struct pci_bus *bus;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		printk(KERN_ERR MY_NAME ": cannot enable PCI device %s (%d)\n",
+			pci_name(pdev), err);
+		return err;
+	}
+
+	bus = pdev->subordinate;
+	if (!bus) {
+		pci_notice(pdev, "the device is not a bridge, skipping\n");
+		rc = -ENODEV;
+		goto err_disable_device;
+	}
+
+	/* Need to read VID early b/c it's used to differentiate CPQ and INTC
+	 * discovery
+	 */
+	vendor_id = pdev->vendor;
+	if ((vendor_id != PCI_VENDOR_ID_COMPAQ) &&
+	    (vendor_id != PCI_VENDOR_ID_INTEL)) {
+		err(msg_HPC_non_compaq_or_intel);
+		rc = -ENODEV;
+		goto err_disable_device;
+	}
+	dbg("Vendor ID: %x\n", vendor_id);
+
+	dbg("revision: %d\n", pdev->revision);
+	if ((vendor_id == PCI_VENDOR_ID_COMPAQ) && (!pdev->revision)) {
+		err(msg_HPC_rev_error);
+		rc = -ENODEV;
+		goto err_disable_device;
+	}
+
+	/* Check for the proper subsystem IDs
+	 * Intel uses a different SSID programming model than Compaq.
+	 * For Intel, each SSID bit identifies a PHP capability.
+	 * Also Intel HPCs may have RID=0.
+	 */
+	if ((pdev->revision <= 2) && (vendor_id != PCI_VENDOR_ID_INTEL)) {
+		err(msg_HPC_not_supported);
+		rc = -ENODEV;
+		goto err_disable_device;
+	}
+
+	/* TODO: This code can be made to support non-Compaq or Intel
+	 * subsystem IDs
+	 */
+	subsystem_vid = pdev->subsystem_vendor;
+	dbg("Subsystem Vendor ID: %x\n", subsystem_vid);
+	if ((subsystem_vid != PCI_VENDOR_ID_COMPAQ) && (subsystem_vid != PCI_VENDOR_ID_INTEL)) {
+		err(msg_HPC_non_compaq_or_intel);
+		rc = -ENODEV;
+		goto err_disable_device;
+	}
+
+	ctrl = kzalloc(sizeof(struct controller), GFP_KERNEL);
+	if (!ctrl) {
+		rc = -ENOMEM;
+		goto err_disable_device;
+	}
+
+	subsystem_deviceid = pdev->subsystem_device;
+
+	info("Hot Plug Subsystem Device ID: %x\n", subsystem_deviceid);
+
+	/* Set Vendor ID, so it can be accessed later from other
+	 * functions
+	 */
+	ctrl->vendor_id = vendor_id;
+
+	switch (subsystem_vid) {
+	case PCI_VENDOR_ID_COMPAQ:
+		if (pdev->revision >= 0x13) { /* CIOBX */
+			ctrl->push_flag = 1;
+			ctrl->slot_switch_type = 1;
+			ctrl->push_button = 1;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 1;
+			ctrl->pcix_speed_capability = 1;
+			pci_read_config_byte(pdev, 0x41, &bus_cap);
+			if (bus_cap & 0x80) {
+				dbg("bus max supports 133MHz PCI-X\n");
+				bus->max_bus_speed = PCI_SPEED_133MHz_PCIX;
+				break;
+			}
+			if (bus_cap & 0x40) {
+				dbg("bus max supports 100MHz PCI-X\n");
+				bus->max_bus_speed = PCI_SPEED_100MHz_PCIX;
+				break;
+			}
+			if (bus_cap & 0x20) {
+				dbg("bus max supports 66MHz PCI-X\n");
+				bus->max_bus_speed = PCI_SPEED_66MHz_PCIX;
+				break;
+			}
+			if (bus_cap & 0x10) {
+				dbg("bus max supports 66MHz PCI\n");
+				bus->max_bus_speed = PCI_SPEED_66MHz;
+				break;
+			}
+
+			break;
+		}
+
+		switch (subsystem_deviceid) {
+		case PCI_SUB_HPC_ID:
+			/* Original 6500/7000 implementation */
+			ctrl->slot_switch_type = 1;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
+			ctrl->push_button = 0;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 0;
+			ctrl->pcix_speed_capability = 0;
+			break;
+		case PCI_SUB_HPC_ID2:
+			/* First Pushbutton implementation */
+			ctrl->push_flag = 1;
+			ctrl->slot_switch_type = 1;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
+			ctrl->push_button = 1;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 0;
+			ctrl->pcix_speed_capability = 0;
+			break;
+		case PCI_SUB_HPC_ID_INTC:
+			/* Third party (6500/7000) */
+			ctrl->slot_switch_type = 1;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
+			ctrl->push_button = 0;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 0;
+			ctrl->pcix_speed_capability = 0;
+			break;
+		case PCI_SUB_HPC_ID3:
+			/* First 66 Mhz implementation */
+			ctrl->push_flag = 1;
+			ctrl->slot_switch_type = 1;
+			bus->max_bus_speed = PCI_SPEED_66MHz;
+			ctrl->push_button = 1;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 0;
+			ctrl->pcix_speed_capability = 0;
+			break;
+		case PCI_SUB_HPC_ID4:
+			/* First PCI-X implementation, 100MHz */
+			ctrl->push_flag = 1;
+			ctrl->slot_switch_type = 1;
+			bus->max_bus_speed = PCI_SPEED_100MHz_PCIX;
+			ctrl->push_button = 1;
+			ctrl->pci_config_space = 1;
+			ctrl->defeature_PHP = 1;
+			ctrl->pcix_support = 1;
+			ctrl->pcix_speed_capability = 0;
+			break;
+		default:
+			err(msg_HPC_not_supported);
+			rc = -ENODEV;
+			goto err_free_ctrl;
+		}
+		break;
+
+	case PCI_VENDOR_ID_INTEL:
+		/* Check for speed capability (0=33, 1=66) */
+		if (subsystem_deviceid & 0x0001)
+			bus->max_bus_speed = PCI_SPEED_66MHz;
+		else
+			bus->max_bus_speed = PCI_SPEED_33MHz;
+
+		/* Check for push button */
+		if (subsystem_deviceid & 0x0002)
+			ctrl->push_button = 0;
+		else
+			ctrl->push_button = 1;
+
+		/* Check for slot switch type (0=mechanical, 1=not mechanical) */
+		if (subsystem_deviceid & 0x0004)
+			ctrl->slot_switch_type = 0;
+		else
+			ctrl->slot_switch_type = 1;
+
+		/* PHP Status (0=De-feature PHP, 1=Normal operation) */
+		if (subsystem_deviceid & 0x0008)
+			ctrl->defeature_PHP = 1;	/* PHP supported */
+		else
+			ctrl->defeature_PHP = 0;	/* PHP not supported */
+
+		/* Alternate Base Address Register Interface
+		 * (0=not supported, 1=supported)
+		 */
+		if (subsystem_deviceid & 0x0010)
+			ctrl->alternate_base_address = 1;
+		else
+			ctrl->alternate_base_address = 0;
+
+		/* PCI Config Space Index (0=not supported, 1=supported) */
+		if (subsystem_deviceid & 0x0020)
+			ctrl->pci_config_space = 1;
+		else
+			ctrl->pci_config_space = 0;
+
+		/* PCI-X support */
+		if (subsystem_deviceid & 0x0080) {
+			ctrl->pcix_support = 1;
+			if (subsystem_deviceid & 0x0040)
+				/* 133MHz PCI-X if bit 7 is 1 */
+				ctrl->pcix_speed_capability = 1;
+			else
+				/* 100MHz PCI-X if bit 7 is 1 and bit 0 is 0, */
+				/* 66MHz PCI-X if bit 7 is 1 and bit 0 is 1 */
+				ctrl->pcix_speed_capability = 0;
+		} else {
+			/* Conventional PCI */
+			ctrl->pcix_support = 0;
+			ctrl->pcix_speed_capability = 0;
+		}
+		break;
+
+	default:
+		err(msg_HPC_not_supported);
+		rc = -ENODEV;
+		goto err_free_ctrl;
+	}
+
+	/* Tell the user that we found one. */
+	info("Initializing the PCI hot plug controller residing on PCI bus %d\n",
+					pdev->bus->number);
+
+	dbg("Hotplug controller capabilities:\n");
+	dbg("    speed_capability       %d\n", bus->max_bus_speed);
+	dbg("    slot_switch_type       %s\n", ctrl->slot_switch_type ?
+					"switch present" : "no switch");
+	dbg("    defeature_PHP          %s\n", ctrl->defeature_PHP ?
+					"PHP supported" : "PHP not supported");
+	dbg("    alternate_base_address %s\n", ctrl->alternate_base_address ?
+					"supported" : "not supported");
+	dbg("    pci_config_space       %s\n", ctrl->pci_config_space ?
+					"supported" : "not supported");
+	dbg("    pcix_speed_capability  %s\n", ctrl->pcix_speed_capability ?
+					"supported" : "not supported");
+	dbg("    pcix_support           %s\n", ctrl->pcix_support ?
+					"supported" : "not supported");
+
+	ctrl->pci_dev = pdev;
+	pci_set_drvdata(pdev, ctrl);
+
+	/* make our own copy of the pci bus structure,
+	 * as we like tweaking it a lot */
+	ctrl->pci_bus = kmemdup(pdev->bus, sizeof(*ctrl->pci_bus), GFP_KERNEL);
+	if (!ctrl->pci_bus) {
+		err("out of memory\n");
+		rc = -ENOMEM;
+		goto err_free_ctrl;
+	}
+
+	ctrl->bus = pdev->bus->number;
+	ctrl->rev = pdev->revision;
+	dbg("bus device function rev: %d %d %d %d\n", ctrl->bus,
+		PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), ctrl->rev);
+
+	mutex_init(&ctrl->crit_sect);
+	init_waitqueue_head(&ctrl->queue);
+
+	/* initialize our threads if they haven't already been started up */
+	rc = one_time_init();
+	if (rc)
+		goto err_free_bus;
+
+	dbg("pdev = %p\n", pdev);
+	dbg("pci resource start %llx\n", (unsigned long long)pci_resource_start(pdev, 0));
+	dbg("pci resource len %llx\n", (unsigned long long)pci_resource_len(pdev, 0));
+
+	if (!request_mem_region(pci_resource_start(pdev, 0),
+				pci_resource_len(pdev, 0), MY_NAME)) {
+		err("cannot reserve MMIO region\n");
+		rc = -ENOMEM;
+		goto err_free_bus;
+	}
+
+	ctrl->hpc_reg = ioremap(pci_resource_start(pdev, 0),
+					pci_resource_len(pdev, 0));
+	if (!ctrl->hpc_reg) {
+		err("cannot remap MMIO region %llx @ %llx\n",
+		    (unsigned long long)pci_resource_len(pdev, 0),
+		    (unsigned long long)pci_resource_start(pdev, 0));
+		rc = -ENODEV;
+		goto err_free_mem_region;
+	}
+
+	/* Check for 66Mhz operation */
+	bus->cur_bus_speed = get_controller_speed(ctrl);
+
+
+	/********************************************************
+	 *
+	 *              Save configuration headers for this and
+	 *              subordinate PCI buses
+	 *
+	 ********************************************************/
+
+	/* find the physical slot number of the first hot plug slot */
+
+	/* Get slot won't work for devices behind bridges, but
+	 * in this case it will always be called for the "base"
+	 * bus/dev/func of a slot.
+	 * CS: this is leveraging the PCIIRQ routing code from the kernel
+	 * (pci-pc.c: get_irq_routing_table) */
+	rc = get_slot_mapping(ctrl->pci_bus, pdev->bus->number,
+				(readb(ctrl->hpc_reg + SLOT_MASK) >> 4),
+				&(ctrl->first_slot));
+	dbg("get_slot_mapping: first_slot = %d, returned = %d\n",
+				ctrl->first_slot, rc);
+	if (rc) {
+		err(msg_initialization_err, rc);
+		goto err_iounmap;
+	}
+
+	/* Store PCI Config Space for all devices on this bus */
+	rc = cpqhp_save_config(ctrl, ctrl->bus, readb(ctrl->hpc_reg + SLOT_MASK));
+	if (rc) {
+		err("%s: unable to save PCI configuration data, error %d\n",
+				__func__, rc);
+		goto err_iounmap;
+	}
+
+	/*
+	 * Get IO, memory, and IRQ resources for new devices
+	 */
+	/* The next line is required for cpqhp_find_available_resources */
+	ctrl->interrupt = pdev->irq;
+	if (ctrl->interrupt < 0x10) {
+		cpqhp_legacy_mode = 1;
+		dbg("System seems to be configured for Full Table Mapped MPS mode\n");
+	}
+
+	ctrl->cfgspc_irq = 0;
+	pci_read_config_byte(pdev, PCI_INTERRUPT_LINE, &ctrl->cfgspc_irq);
+
+	rc = cpqhp_find_available_resources(ctrl, cpqhp_rom_start);
+	ctrl->add_support = !rc;
+	if (rc) {
+		dbg("cpqhp_find_available_resources = 0x%x\n", rc);
+		err("unable to locate PCI configuration resources for hot plug add.\n");
+		goto err_iounmap;
+	}
+
+	/*
+	 * Finish setting up the hot plug ctrl device
+	 */
+	ctrl->slot_device_offset = readb(ctrl->hpc_reg + SLOT_MASK) >> 4;
+	dbg("NumSlots %d\n", ctrl->slot_device_offset);
+
+	ctrl->next_event = 0;
+
+	/* Setup the slot information structures */
+	rc = ctrl_slot_setup(ctrl, smbios_start, smbios_table);
+	if (rc) {
+		err(msg_initialization_err, 6);
+		err("%s: unable to save PCI configuration data, error %d\n",
+			__func__, rc);
+		goto err_iounmap;
+	}
+
+	/* Mask all general input interrupts */
+	writel(0xFFFFFFFFL, ctrl->hpc_reg + INT_MASK);
+
+	/* set up the interrupt */
+	dbg("HPC interrupt = %d\n", ctrl->interrupt);
+	if (request_irq(ctrl->interrupt, cpqhp_ctrl_intr,
+			IRQF_SHARED, MY_NAME, ctrl)) {
+		err("Can't get irq %d for the hotplug pci controller\n",
+			ctrl->interrupt);
+		rc = -ENODEV;
+		goto err_iounmap;
+	}
+
+	/* Enable Shift Out interrupt and clear it, also enable SERR on power
+	 * fault
+	 */
+	temp_word = readw(ctrl->hpc_reg + MISC);
+	temp_word |= 0x4006;
+	writew(temp_word, ctrl->hpc_reg + MISC);
+
+	/* Changed 05/05/97 to clear all interrupts at start */
+	writel(0xFFFFFFFFL, ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+	ctrl->ctrl_int_comp = readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+	writel(0x0L, ctrl->hpc_reg + INT_MASK);
+
+	if (!cpqhp_ctrl_list) {
+		cpqhp_ctrl_list = ctrl;
+		ctrl->next = NULL;
+	} else {
+		ctrl->next = cpqhp_ctrl_list;
+		cpqhp_ctrl_list = ctrl;
+	}
+
+	/* turn off empty slots here unless command line option "ON" set
+	 * Wait for exclusive access to hardware
+	 */
+	mutex_lock(&ctrl->crit_sect);
+
+	num_of_slots = readb(ctrl->hpc_reg + SLOT_MASK) & 0x0F;
+
+	/* find first device number for the ctrl */
+	device = readb(ctrl->hpc_reg + SLOT_MASK) >> 4;
+
+	while (num_of_slots) {
+		dbg("num_of_slots: %d\n", num_of_slots);
+		func = cpqhp_slot_find(ctrl->bus, device, 0);
+		if (!func)
+			break;
+
+		hp_slot = func->device - ctrl->slot_device_offset;
+		dbg("hp_slot: %d\n", hp_slot);
+
+		/* We have to save the presence info for these slots */
+		temp_word = ctrl->ctrl_int_comp >> 16;
+		func->presence_save = (temp_word >> hp_slot) & 0x01;
+		func->presence_save |= (temp_word >> (hp_slot + 7)) & 0x02;
+
+		if (ctrl->ctrl_int_comp & (0x1L << hp_slot))
+			func->switch_save = 0;
+		else
+			func->switch_save = 0x10;
+
+		if (!power_mode)
+			if (!func->is_a_board) {
+				green_LED_off(ctrl, hp_slot);
+				slot_disable(ctrl, hp_slot);
+			}
+
+		device++;
+		num_of_slots--;
+	}
+
+	if (!power_mode) {
+		set_SOGO(ctrl);
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+	}
+
+	rc = init_SERR(ctrl);
+	if (rc) {
+		err("init_SERR failed\n");
+		mutex_unlock(&ctrl->crit_sect);
+		goto err_free_irq;
+	}
+
+	/* Done with exclusive hardware access */
+	mutex_unlock(&ctrl->crit_sect);
+
+	cpqhp_create_debugfs_files(ctrl);
+
+	return 0;
+
+err_free_irq:
+	free_irq(ctrl->interrupt, ctrl);
+err_iounmap:
+	iounmap(ctrl->hpc_reg);
+err_free_mem_region:
+	release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+err_free_bus:
+	kfree(ctrl->pci_bus);
+err_free_ctrl:
+	kfree(ctrl);
+err_disable_device:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+static void __exit unload_cpqphpd(void)
+{
+	struct pci_func *next;
+	struct pci_func *TempSlot;
+	int loop;
+	u32 rc;
+	struct controller *ctrl;
+	struct controller *tctrl;
+	struct pci_resource *res;
+	struct pci_resource *tres;
+
+	compaq_nvram_store(cpqhp_rom_start);
+
+	ctrl = cpqhp_ctrl_list;
+
+	while (ctrl) {
+		if (ctrl->hpc_reg) {
+			u16 misc;
+			rc = read_slot_enable(ctrl);
+
+			writeb(0, ctrl->hpc_reg + SLOT_SERR);
+			writel(0xFFFFFFC0L | ~rc, ctrl->hpc_reg + INT_MASK);
+
+			misc = readw(ctrl->hpc_reg + MISC);
+			misc &= 0xFFFD;
+			writew(misc, ctrl->hpc_reg + MISC);
+		}
+
+		ctrl_slot_cleanup(ctrl);
+
+		res = ctrl->io_head;
+		while (res) {
+			tres = res;
+			res = res->next;
+			kfree(tres);
+		}
+
+		res = ctrl->mem_head;
+		while (res) {
+			tres = res;
+			res = res->next;
+			kfree(tres);
+		}
+
+		res = ctrl->p_mem_head;
+		while (res) {
+			tres = res;
+			res = res->next;
+			kfree(tres);
+		}
+
+		res = ctrl->bus_head;
+		while (res) {
+			tres = res;
+			res = res->next;
+			kfree(tres);
+		}
+
+		kfree(ctrl->pci_bus);
+
+		tctrl = ctrl;
+		ctrl = ctrl->next;
+		kfree(tctrl);
+	}
+
+	for (loop = 0; loop < 256; loop++) {
+		next = cpqhp_slot_list[loop];
+		while (next != NULL) {
+			res = next->io_head;
+			while (res) {
+				tres = res;
+				res = res->next;
+				kfree(tres);
+			}
+
+			res = next->mem_head;
+			while (res) {
+				tres = res;
+				res = res->next;
+				kfree(tres);
+			}
+
+			res = next->p_mem_head;
+			while (res) {
+				tres = res;
+				res = res->next;
+				kfree(tres);
+			}
+
+			res = next->bus_head;
+			while (res) {
+				tres = res;
+				res = res->next;
+				kfree(tres);
+			}
+
+			TempSlot = next;
+			next = next->next;
+			kfree(TempSlot);
+		}
+	}
+
+	/* Stop the notification mechanism */
+	if (initialized)
+		cpqhp_event_stop_thread();
+
+	/* unmap the rom address */
+	if (cpqhp_rom_start)
+		iounmap(cpqhp_rom_start);
+	if (smbios_start)
+		iounmap(smbios_start);
+}
+
+static const struct pci_device_id hpcd_pci_tbl[] = {
+	{
+	/* handle any PCI Hotplug controller */
+	.class =        ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00),
+	.class_mask =   ~0,
+
+	/* no matter who makes it */
+	.vendor =       PCI_ANY_ID,
+	.device =       PCI_ANY_ID,
+	.subvendor =    PCI_ANY_ID,
+	.subdevice =    PCI_ANY_ID,
+
+	}, { /* end: all zeroes */ }
+};
+
+MODULE_DEVICE_TABLE(pci, hpcd_pci_tbl);
+
+static struct pci_driver cpqhpc_driver = {
+	.name =		"compaq_pci_hotplug",
+	.id_table =	hpcd_pci_tbl,
+	.probe =	cpqhpc_probe,
+	/* remove:	cpqhpc_remove_one, */
+};
+
+static int __init cpqhpc_init(void)
+{
+	int result;
+
+	cpqhp_debug = debug;
+
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	cpqhp_initialize_debugfs();
+	result = pci_register_driver(&cpqhpc_driver);
+	dbg("pci_register_driver = %d\n", result);
+	return result;
+}
+
+static void __exit cpqhpc_cleanup(void)
+{
+	dbg("unload_cpqphpd()\n");
+	unload_cpqphpd();
+
+	dbg("pci_unregister_driver\n");
+	pci_unregister_driver(&cpqhpc_driver);
+	cpqhp_shutdown_debugfs();
+}
+
+module_init(cpqhpc_init);
+module_exit(cpqhpc_cleanup);
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
new file mode 100644
index 000000000..e429ecddc
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -0,0 +1,2911 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/kthread.h>
+#include "cpqphp.h"
+
+static u32 configure_new_device(struct controller *ctrl, struct pci_func *func,
+			u8 behind_bridge, struct resource_lists *resources);
+static int configure_new_function(struct controller *ctrl, struct pci_func *func,
+			u8 behind_bridge, struct resource_lists *resources);
+static void interrupt_event_handler(struct controller *ctrl);
+
+
+static struct task_struct *cpqhp_event_thread;
+static struct timer_list *pushbutton_pending;	/* = NULL */
+
+/* delay is in jiffies to wait for */
+static void long_delay(int delay)
+{
+	/*
+	 * XXX(hch): if someone is bored please convert all callers
+	 * to call msleep_interruptible directly.  They really want
+	 * to specify timeouts in natural units and spend a lot of
+	 * effort converting them to jiffies..
+	 */
+	msleep_interruptible(jiffies_to_msecs(delay));
+}
+
+
+/* FIXME: The following line needs to be somewhere else... */
+#define WRONG_BUS_FREQUENCY 0x07
+static u8 handle_switch_change(u8 change, struct controller *ctrl)
+{
+	int hp_slot;
+	u8 rc = 0;
+	u16 temp_word;
+	struct pci_func *func;
+	struct event_info *taskInfo;
+
+	if (!change)
+		return 0;
+
+	/* Switch Change */
+	dbg("cpqsbd:  Switch interrupt received.\n");
+
+	for (hp_slot = 0; hp_slot < 6; hp_slot++) {
+		if (change & (0x1L << hp_slot)) {
+			/*
+			 * this one changed.
+			 */
+			func = cpqhp_slot_find(ctrl->bus,
+				(hp_slot + ctrl->slot_device_offset), 0);
+
+			/* this is the structure that tells the worker thread
+			 * what to do
+			 */
+			taskInfo = &(ctrl->event_queue[ctrl->next_event]);
+			ctrl->next_event = (ctrl->next_event + 1) % 10;
+			taskInfo->hp_slot = hp_slot;
+
+			rc++;
+
+			temp_word = ctrl->ctrl_int_comp >> 16;
+			func->presence_save = (temp_word >> hp_slot) & 0x01;
+			func->presence_save |= (temp_word >> (hp_slot + 7)) & 0x02;
+
+			if (ctrl->ctrl_int_comp & (0x1L << hp_slot)) {
+				/*
+				 * Switch opened
+				 */
+
+				func->switch_save = 0;
+
+				taskInfo->event_type = INT_SWITCH_OPEN;
+			} else {
+				/*
+				 * Switch closed
+				 */
+
+				func->switch_save = 0x10;
+
+				taskInfo->event_type = INT_SWITCH_CLOSE;
+			}
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * cpqhp_find_slot - find the struct slot of given device
+ * @ctrl: scan lots of this controller
+ * @device: the device id to find
+ */
+static struct slot *cpqhp_find_slot(struct controller *ctrl, u8 device)
+{
+	struct slot *slot = ctrl->slot;
+
+	while (slot && (slot->device != device))
+		slot = slot->next;
+
+	return slot;
+}
+
+
+static u8 handle_presence_change(u16 change, struct controller *ctrl)
+{
+	int hp_slot;
+	u8 rc = 0;
+	u8 temp_byte;
+	u16 temp_word;
+	struct pci_func *func;
+	struct event_info *taskInfo;
+	struct slot *p_slot;
+
+	if (!change)
+		return 0;
+
+	/*
+	 * Presence Change
+	 */
+	dbg("cpqsbd:  Presence/Notify input change.\n");
+	dbg("         Changed bits are 0x%4.4x\n", change);
+
+	for (hp_slot = 0; hp_slot < 6; hp_slot++) {
+		if (change & (0x0101 << hp_slot)) {
+			/*
+			 * this one changed.
+			 */
+			func = cpqhp_slot_find(ctrl->bus,
+				(hp_slot + ctrl->slot_device_offset), 0);
+
+			taskInfo = &(ctrl->event_queue[ctrl->next_event]);
+			ctrl->next_event = (ctrl->next_event + 1) % 10;
+			taskInfo->hp_slot = hp_slot;
+
+			rc++;
+
+			p_slot = cpqhp_find_slot(ctrl, hp_slot + (readb(ctrl->hpc_reg + SLOT_MASK) >> 4));
+			if (!p_slot)
+				return 0;
+
+			/* If the switch closed, must be a button
+			 * If not in button mode, nevermind
+			 */
+			if (func->switch_save && (ctrl->push_button == 1)) {
+				temp_word = ctrl->ctrl_int_comp >> 16;
+				temp_byte = (temp_word >> hp_slot) & 0x01;
+				temp_byte |= (temp_word >> (hp_slot + 7)) & 0x02;
+
+				if (temp_byte != func->presence_save) {
+					/*
+					 * button Pressed (doesn't do anything)
+					 */
+					dbg("hp_slot %d button pressed\n", hp_slot);
+					taskInfo->event_type = INT_BUTTON_PRESS;
+				} else {
+					/*
+					 * button Released - TAKE ACTION!!!!
+					 */
+					dbg("hp_slot %d button released\n", hp_slot);
+					taskInfo->event_type = INT_BUTTON_RELEASE;
+
+					/* Cancel if we are still blinking */
+					if ((p_slot->state == BLINKINGON_STATE)
+					    || (p_slot->state == BLINKINGOFF_STATE)) {
+						taskInfo->event_type = INT_BUTTON_CANCEL;
+						dbg("hp_slot %d button cancel\n", hp_slot);
+					} else if ((p_slot->state == POWERON_STATE)
+						   || (p_slot->state == POWEROFF_STATE)) {
+						/* info(msg_button_ignore, p_slot->number); */
+						taskInfo->event_type = INT_BUTTON_IGNORE;
+						dbg("hp_slot %d button ignore\n", hp_slot);
+					}
+				}
+			} else {
+				/* Switch is open, assume a presence change
+				 * Save the presence state
+				 */
+				temp_word = ctrl->ctrl_int_comp >> 16;
+				func->presence_save = (temp_word >> hp_slot) & 0x01;
+				func->presence_save |= (temp_word >> (hp_slot + 7)) & 0x02;
+
+				if ((!(ctrl->ctrl_int_comp & (0x010000 << hp_slot))) ||
+				    (!(ctrl->ctrl_int_comp & (0x01000000 << hp_slot)))) {
+					/* Present */
+					taskInfo->event_type = INT_PRESENCE_ON;
+				} else {
+					/* Not Present */
+					taskInfo->event_type = INT_PRESENCE_OFF;
+				}
+			}
+		}
+	}
+
+	return rc;
+}
+
+
+static u8 handle_power_fault(u8 change, struct controller *ctrl)
+{
+	int hp_slot;
+	u8 rc = 0;
+	struct pci_func *func;
+	struct event_info *taskInfo;
+
+	if (!change)
+		return 0;
+
+	/*
+	 * power fault
+	 */
+
+	info("power fault interrupt\n");
+
+	for (hp_slot = 0; hp_slot < 6; hp_slot++) {
+		if (change & (0x01 << hp_slot)) {
+			/*
+			 * this one changed.
+			 */
+			func = cpqhp_slot_find(ctrl->bus,
+				(hp_slot + ctrl->slot_device_offset), 0);
+
+			taskInfo = &(ctrl->event_queue[ctrl->next_event]);
+			ctrl->next_event = (ctrl->next_event + 1) % 10;
+			taskInfo->hp_slot = hp_slot;
+
+			rc++;
+
+			if (ctrl->ctrl_int_comp & (0x00000100 << hp_slot)) {
+				/*
+				 * power fault Cleared
+				 */
+				func->status = 0x00;
+
+				taskInfo->event_type = INT_POWER_FAULT_CLEAR;
+			} else {
+				/*
+				 * power fault
+				 */
+				taskInfo->event_type = INT_POWER_FAULT;
+
+				if (ctrl->rev < 4) {
+					amber_LED_on(ctrl, hp_slot);
+					green_LED_off(ctrl, hp_slot);
+					set_SOGO(ctrl);
+
+					/* this is a fatal condition, we want
+					 * to crash the machine to protect from
+					 * data corruption. simulated_NMI
+					 * shouldn't ever return */
+					/* FIXME
+					simulated_NMI(hp_slot, ctrl); */
+
+					/* The following code causes a software
+					 * crash just in case simulated_NMI did
+					 * return */
+					/*FIXME
+					panic(msg_power_fault); */
+				} else {
+					/* set power fault status for this board */
+					func->status = 0xFF;
+					info("power fault bit %x set\n", hp_slot);
+				}
+			}
+		}
+	}
+
+	return rc;
+}
+
+
+/**
+ * sort_by_size - sort nodes on the list by their length, smallest first.
+ * @head: list to sort
+ */
+static int sort_by_size(struct pci_resource **head)
+{
+	struct pci_resource *current_res;
+	struct pci_resource *next_res;
+	int out_of_order = 1;
+
+	if (!(*head))
+		return 1;
+
+	if (!((*head)->next))
+		return 0;
+
+	while (out_of_order) {
+		out_of_order = 0;
+
+		/* Special case for swapping list head */
+		if (((*head)->next) &&
+		    ((*head)->length > (*head)->next->length)) {
+			out_of_order++;
+			current_res = *head;
+			*head = (*head)->next;
+			current_res->next = (*head)->next;
+			(*head)->next = current_res;
+		}
+
+		current_res = *head;
+
+		while (current_res->next && current_res->next->next) {
+			if (current_res->next->length > current_res->next->next->length) {
+				out_of_order++;
+				next_res = current_res->next;
+				current_res->next = current_res->next->next;
+				current_res = current_res->next;
+				next_res->next = current_res->next;
+				current_res->next = next_res;
+			} else
+				current_res = current_res->next;
+		}
+	}  /* End of out_of_order loop */
+
+	return 0;
+}
+
+
+/**
+ * sort_by_max_size - sort nodes on the list by their length, largest first.
+ * @head: list to sort
+ */
+static int sort_by_max_size(struct pci_resource **head)
+{
+	struct pci_resource *current_res;
+	struct pci_resource *next_res;
+	int out_of_order = 1;
+
+	if (!(*head))
+		return 1;
+
+	if (!((*head)->next))
+		return 0;
+
+	while (out_of_order) {
+		out_of_order = 0;
+
+		/* Special case for swapping list head */
+		if (((*head)->next) &&
+		    ((*head)->length < (*head)->next->length)) {
+			out_of_order++;
+			current_res = *head;
+			*head = (*head)->next;
+			current_res->next = (*head)->next;
+			(*head)->next = current_res;
+		}
+
+		current_res = *head;
+
+		while (current_res->next && current_res->next->next) {
+			if (current_res->next->length < current_res->next->next->length) {
+				out_of_order++;
+				next_res = current_res->next;
+				current_res->next = current_res->next->next;
+				current_res = current_res->next;
+				next_res->next = current_res->next;
+				current_res->next = next_res;
+			} else
+				current_res = current_res->next;
+		}
+	}  /* End of out_of_order loop */
+
+	return 0;
+}
+
+
+/**
+ * do_pre_bridge_resource_split - find node of resources that are unused
+ * @head: new list head
+ * @orig_head: original list head
+ * @alignment: max node size (?)
+ */
+static struct pci_resource *do_pre_bridge_resource_split(struct pci_resource **head,
+				struct pci_resource **orig_head, u32 alignment)
+{
+	struct pci_resource *prevnode = NULL;
+	struct pci_resource *node;
+	struct pci_resource *split_node;
+	u32 rc;
+	u32 temp_dword;
+	dbg("do_pre_bridge_resource_split\n");
+
+	if (!(*head) || !(*orig_head))
+		return NULL;
+
+	rc = cpqhp_resource_sort_and_combine(head);
+
+	if (rc)
+		return NULL;
+
+	if ((*head)->base != (*orig_head)->base)
+		return NULL;
+
+	if ((*head)->length == (*orig_head)->length)
+		return NULL;
+
+
+	/* If we got here, there the bridge requires some of the resource, but
+	 * we may be able to split some off of the front
+	 */
+
+	node = *head;
+
+	if (node->length & (alignment - 1)) {
+		/* this one isn't an aligned length, so we'll make a new entry
+		 * and split it up.
+		 */
+		split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+		if (!split_node)
+			return NULL;
+
+		temp_dword = (node->length | (alignment-1)) + 1 - alignment;
+
+		split_node->base = node->base;
+		split_node->length = temp_dword;
+
+		node->length -= temp_dword;
+		node->base += split_node->length;
+
+		/* Put it in the list */
+		*head = split_node;
+		split_node->next = node;
+	}
+
+	if (node->length < alignment)
+		return NULL;
+
+	/* Now unlink it */
+	if (*head == node) {
+		*head = node->next;
+	} else {
+		prevnode = *head;
+		while (prevnode->next != node)
+			prevnode = prevnode->next;
+
+		prevnode->next = node->next;
+	}
+	node->next = NULL;
+
+	return node;
+}
+
+
+/**
+ * do_bridge_resource_split - find one node of resources that aren't in use
+ * @head: list head
+ * @alignment: max node size (?)
+ */
+static struct pci_resource *do_bridge_resource_split(struct pci_resource **head, u32 alignment)
+{
+	struct pci_resource *prevnode = NULL;
+	struct pci_resource *node;
+	u32 rc;
+	u32 temp_dword;
+
+	rc = cpqhp_resource_sort_and_combine(head);
+
+	if (rc)
+		return NULL;
+
+	node = *head;
+
+	while (node->next) {
+		prevnode = node;
+		node = node->next;
+		kfree(prevnode);
+	}
+
+	if (node->length < alignment)
+		goto error;
+
+	if (node->base & (alignment - 1)) {
+		/* Short circuit if adjusted size is too small */
+		temp_dword = (node->base | (alignment-1)) + 1;
+		if ((node->length - (temp_dword - node->base)) < alignment)
+			goto error;
+
+		node->length -= (temp_dword - node->base);
+		node->base = temp_dword;
+	}
+
+	if (node->length & (alignment - 1))
+		/* There's stuff in use after this node */
+		goto error;
+
+	return node;
+error:
+	kfree(node);
+	return NULL;
+}
+
+
+/**
+ * get_io_resource - find first node of given size not in ISA aliasing window.
+ * @head: list to search
+ * @size: size of node to find, must be a power of two.
+ *
+ * Description: This function sorts the resource list by size and then
+ * returns the first node of "size" length that is not in the ISA aliasing
+ * window.  If it finds a node larger than "size" it will split it up.
+ */
+static struct pci_resource *get_io_resource(struct pci_resource **head, u32 size)
+{
+	struct pci_resource *prevnode;
+	struct pci_resource *node;
+	struct pci_resource *split_node;
+	u32 temp_dword;
+
+	if (!(*head))
+		return NULL;
+
+	if (cpqhp_resource_sort_and_combine(head))
+		return NULL;
+
+	if (sort_by_size(head))
+		return NULL;
+
+	for (node = *head; node; node = node->next) {
+		if (node->length < size)
+			continue;
+
+		if (node->base & (size - 1)) {
+			/* this one isn't base aligned properly
+			 * so we'll make a new entry and split it up
+			 */
+			temp_dword = (node->base | (size-1)) + 1;
+
+			/* Short circuit if adjusted size is too small */
+			if ((node->length - (temp_dword - node->base)) < size)
+				continue;
+
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+
+			split_node->base = node->base;
+			split_node->length = temp_dword - node->base;
+			node->base = temp_dword;
+			node->length -= split_node->length;
+
+			/* Put it in the list */
+			split_node->next = node->next;
+			node->next = split_node;
+		} /* End of non-aligned base */
+
+		/* Don't need to check if too small since we already did */
+		if (node->length > size) {
+			/* this one is longer than we need
+			 * so we'll make a new entry and split it up
+			 */
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+
+			split_node->base = node->base + size;
+			split_node->length = node->length - size;
+			node->length = size;
+
+			/* Put it in the list */
+			split_node->next = node->next;
+			node->next = split_node;
+		}  /* End of too big on top end */
+
+		/* For IO make sure it's not in the ISA aliasing space */
+		if (node->base & 0x300L)
+			continue;
+
+		/* If we got here, then it is the right size
+		 * Now take it out of the list and break
+		 */
+		if (*head == node) {
+			*head = node->next;
+		} else {
+			prevnode = *head;
+			while (prevnode->next != node)
+				prevnode = prevnode->next;
+
+			prevnode->next = node->next;
+		}
+		node->next = NULL;
+		break;
+	}
+
+	return node;
+}
+
+
+/**
+ * get_max_resource - get largest node which has at least the given size.
+ * @head: the list to search the node in
+ * @size: the minimum size of the node to find
+ *
+ * Description: Gets the largest node that is at least "size" big from the
+ * list pointed to by head.  It aligns the node on top and bottom
+ * to "size" alignment before returning it.
+ */
+static struct pci_resource *get_max_resource(struct pci_resource **head, u32 size)
+{
+	struct pci_resource *max;
+	struct pci_resource *temp;
+	struct pci_resource *split_node;
+	u32 temp_dword;
+
+	if (cpqhp_resource_sort_and_combine(head))
+		return NULL;
+
+	if (sort_by_max_size(head))
+		return NULL;
+
+	for (max = *head; max; max = max->next) {
+		/* If not big enough we could probably just bail,
+		 * instead we'll continue to the next.
+		 */
+		if (max->length < size)
+			continue;
+
+		if (max->base & (size - 1)) {
+			/* this one isn't base aligned properly
+			 * so we'll make a new entry and split it up
+			 */
+			temp_dword = (max->base | (size-1)) + 1;
+
+			/* Short circuit if adjusted size is too small */
+			if ((max->length - (temp_dword - max->base)) < size)
+				continue;
+
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+
+			split_node->base = max->base;
+			split_node->length = temp_dword - max->base;
+			max->base = temp_dword;
+			max->length -= split_node->length;
+
+			split_node->next = max->next;
+			max->next = split_node;
+		}
+
+		if ((max->base + max->length) & (size - 1)) {
+			/* this one isn't end aligned properly at the top
+			 * so we'll make a new entry and split it up
+			 */
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+			temp_dword = ((max->base + max->length) & ~(size - 1));
+			split_node->base = temp_dword;
+			split_node->length = max->length + max->base
+					     - split_node->base;
+			max->length -= split_node->length;
+
+			split_node->next = max->next;
+			max->next = split_node;
+		}
+
+		/* Make sure it didn't shrink too much when we aligned it */
+		if (max->length < size)
+			continue;
+
+		/* Now take it out of the list */
+		temp = *head;
+		if (temp == max) {
+			*head = max->next;
+		} else {
+			while (temp && temp->next != max)
+				temp = temp->next;
+
+			if (temp)
+				temp->next = max->next;
+		}
+
+		max->next = NULL;
+		break;
+	}
+
+	return max;
+}
+
+
+/**
+ * get_resource - find resource of given size and split up larger ones.
+ * @head: the list to search for resources
+ * @size: the size limit to use
+ *
+ * Description: This function sorts the resource list by size and then
+ * returns the first node of "size" length.  If it finds a node
+ * larger than "size" it will split it up.
+ *
+ * size must be a power of two.
+ */
+static struct pci_resource *get_resource(struct pci_resource **head, u32 size)
+{
+	struct pci_resource *prevnode;
+	struct pci_resource *node;
+	struct pci_resource *split_node;
+	u32 temp_dword;
+
+	if (cpqhp_resource_sort_and_combine(head))
+		return NULL;
+
+	if (sort_by_size(head))
+		return NULL;
+
+	for (node = *head; node; node = node->next) {
+		dbg("%s: req_size =%x node=%p, base=%x, length=%x\n",
+		    __func__, size, node, node->base, node->length);
+		if (node->length < size)
+			continue;
+
+		if (node->base & (size - 1)) {
+			dbg("%s: not aligned\n", __func__);
+			/* this one isn't base aligned properly
+			 * so we'll make a new entry and split it up
+			 */
+			temp_dword = (node->base | (size-1)) + 1;
+
+			/* Short circuit if adjusted size is too small */
+			if ((node->length - (temp_dword - node->base)) < size)
+				continue;
+
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+
+			split_node->base = node->base;
+			split_node->length = temp_dword - node->base;
+			node->base = temp_dword;
+			node->length -= split_node->length;
+
+			split_node->next = node->next;
+			node->next = split_node;
+		} /* End of non-aligned base */
+
+		/* Don't need to check if too small since we already did */
+		if (node->length > size) {
+			dbg("%s: too big\n", __func__);
+			/* this one is longer than we need
+			 * so we'll make a new entry and split it up
+			 */
+			split_node = kmalloc(sizeof(*split_node), GFP_KERNEL);
+
+			if (!split_node)
+				return NULL;
+
+			split_node->base = node->base + size;
+			split_node->length = node->length - size;
+			node->length = size;
+
+			/* Put it in the list */
+			split_node->next = node->next;
+			node->next = split_node;
+		}  /* End of too big on top end */
+
+		dbg("%s: got one!!!\n", __func__);
+		/* If we got here, then it is the right size
+		 * Now take it out of the list */
+		if (*head == node) {
+			*head = node->next;
+		} else {
+			prevnode = *head;
+			while (prevnode->next != node)
+				prevnode = prevnode->next;
+
+			prevnode->next = node->next;
+		}
+		node->next = NULL;
+		break;
+	}
+	return node;
+}
+
+
+/**
+ * cpqhp_resource_sort_and_combine - sort nodes by base addresses and clean up
+ * @head: the list to sort and clean up
+ *
+ * Description: Sorts all of the nodes in the list in ascending order by
+ * their base addresses.  Also does garbage collection by
+ * combining adjacent nodes.
+ *
+ * Returns %0 if success.
+ */
+int cpqhp_resource_sort_and_combine(struct pci_resource **head)
+{
+	struct pci_resource *node1;
+	struct pci_resource *node2;
+	int out_of_order = 1;
+
+	dbg("%s: head = %p, *head = %p\n", __func__, head, *head);
+
+	if (!(*head))
+		return 1;
+
+	dbg("*head->next = %p\n", (*head)->next);
+
+	if (!(*head)->next)
+		return 0;	/* only one item on the list, already sorted! */
+
+	dbg("*head->base = 0x%x\n", (*head)->base);
+	dbg("*head->next->base = 0x%x\n", (*head)->next->base);
+	while (out_of_order) {
+		out_of_order = 0;
+
+		/* Special case for swapping list head */
+		if (((*head)->next) &&
+		    ((*head)->base > (*head)->next->base)) {
+			node1 = *head;
+			(*head) = (*head)->next;
+			node1->next = (*head)->next;
+			(*head)->next = node1;
+			out_of_order++;
+		}
+
+		node1 = (*head);
+
+		while (node1->next && node1->next->next) {
+			if (node1->next->base > node1->next->next->base) {
+				out_of_order++;
+				node2 = node1->next;
+				node1->next = node1->next->next;
+				node1 = node1->next;
+				node2->next = node1->next;
+				node1->next = node2;
+			} else
+				node1 = node1->next;
+		}
+	}  /* End of out_of_order loop */
+
+	node1 = *head;
+
+	while (node1 && node1->next) {
+		if ((node1->base + node1->length) == node1->next->base) {
+			/* Combine */
+			dbg("8..\n");
+			node1->length += node1->next->length;
+			node2 = node1->next;
+			node1->next = node1->next->next;
+			kfree(node2);
+		} else
+			node1 = node1->next;
+	}
+
+	return 0;
+}
+
+
+irqreturn_t cpqhp_ctrl_intr(int IRQ, void *data)
+{
+	struct controller *ctrl = data;
+	u8 schedule_flag = 0;
+	u8 reset;
+	u16 misc;
+	u32 Diff;
+
+
+	misc = readw(ctrl->hpc_reg + MISC);
+	/*
+	 * Check to see if it was our interrupt
+	 */
+	if (!(misc & 0x000C))
+		return IRQ_NONE;
+
+	if (misc & 0x0004) {
+		/*
+		 * Serial Output interrupt Pending
+		 */
+
+		/* Clear the interrupt */
+		misc |= 0x0004;
+		writew(misc, ctrl->hpc_reg + MISC);
+
+		/* Read to clear posted writes */
+		misc = readw(ctrl->hpc_reg + MISC);
+
+		dbg("%s - waking up\n", __func__);
+		wake_up_interruptible(&ctrl->queue);
+	}
+
+	if (misc & 0x0008) {
+		/* General-interrupt-input interrupt Pending */
+		Diff = readl(ctrl->hpc_reg + INT_INPUT_CLEAR) ^ ctrl->ctrl_int_comp;
+
+		ctrl->ctrl_int_comp = readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+		/* Clear the interrupt */
+		writel(Diff, ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+		/* Read it back to clear any posted writes */
+		readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+		if (!Diff)
+			/* Clear all interrupts */
+			writel(0xFFFFFFFF, ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+		schedule_flag += handle_switch_change((u8)(Diff & 0xFFL), ctrl);
+		schedule_flag += handle_presence_change((u16)((Diff & 0xFFFF0000L) >> 16), ctrl);
+		schedule_flag += handle_power_fault((u8)((Diff & 0xFF00L) >> 8), ctrl);
+	}
+
+	reset = readb(ctrl->hpc_reg + RESET_FREQ_MODE);
+	if (reset & 0x40) {
+		/* Bus reset has completed */
+		reset &= 0xCF;
+		writeb(reset, ctrl->hpc_reg + RESET_FREQ_MODE);
+		reset = readb(ctrl->hpc_reg + RESET_FREQ_MODE);
+		wake_up_interruptible(&ctrl->queue);
+	}
+
+	if (schedule_flag) {
+		wake_up_process(cpqhp_event_thread);
+		dbg("Waking even thread");
+	}
+	return IRQ_HANDLED;
+}
+
+
+/**
+ * cpqhp_slot_create - Creates a node and adds it to the proper bus.
+ * @busnumber: bus where new node is to be located
+ *
+ * Returns pointer to the new node or %NULL if unsuccessful.
+ */
+struct pci_func *cpqhp_slot_create(u8 busnumber)
+{
+	struct pci_func *new_slot;
+	struct pci_func *next;
+
+	new_slot = kzalloc(sizeof(*new_slot), GFP_KERNEL);
+	if (new_slot == NULL)
+		return new_slot;
+
+	new_slot->next = NULL;
+	new_slot->configured = 1;
+
+	if (cpqhp_slot_list[busnumber] == NULL) {
+		cpqhp_slot_list[busnumber] = new_slot;
+	} else {
+		next = cpqhp_slot_list[busnumber];
+		while (next->next != NULL)
+			next = next->next;
+		next->next = new_slot;
+	}
+	return new_slot;
+}
+
+
+/**
+ * slot_remove - Removes a node from the linked list of slots.
+ * @old_slot: slot to remove
+ *
+ * Returns %0 if successful, !0 otherwise.
+ */
+static int slot_remove(struct pci_func *old_slot)
+{
+	struct pci_func *next;
+
+	if (old_slot == NULL)
+		return 1;
+
+	next = cpqhp_slot_list[old_slot->bus];
+	if (next == NULL)
+		return 1;
+
+	if (next == old_slot) {
+		cpqhp_slot_list[old_slot->bus] = old_slot->next;
+		cpqhp_destroy_board_resources(old_slot);
+		kfree(old_slot);
+		return 0;
+	}
+
+	while ((next->next != old_slot) && (next->next != NULL))
+		next = next->next;
+
+	if (next->next == old_slot) {
+		next->next = old_slot->next;
+		cpqhp_destroy_board_resources(old_slot);
+		kfree(old_slot);
+		return 0;
+	} else
+		return 2;
+}
+
+
+/**
+ * bridge_slot_remove - Removes a node from the linked list of slots.
+ * @bridge: bridge to remove
+ *
+ * Returns %0 if successful, !0 otherwise.
+ */
+static int bridge_slot_remove(struct pci_func *bridge)
+{
+	u8 subordinateBus, secondaryBus;
+	u8 tempBus;
+	struct pci_func *next;
+
+	secondaryBus = (bridge->config_space[0x06] >> 8) & 0xFF;
+	subordinateBus = (bridge->config_space[0x06] >> 16) & 0xFF;
+
+	for (tempBus = secondaryBus; tempBus <= subordinateBus; tempBus++) {
+		next = cpqhp_slot_list[tempBus];
+
+		while (!slot_remove(next))
+			next = cpqhp_slot_list[tempBus];
+	}
+
+	next = cpqhp_slot_list[bridge->bus];
+
+	if (next == NULL)
+		return 1;
+
+	if (next == bridge) {
+		cpqhp_slot_list[bridge->bus] = bridge->next;
+		goto out;
+	}
+
+	while ((next->next != bridge) && (next->next != NULL))
+		next = next->next;
+
+	if (next->next != bridge)
+		return 2;
+	next->next = bridge->next;
+out:
+	kfree(bridge);
+	return 0;
+}
+
+
+/**
+ * cpqhp_slot_find - Looks for a node by bus, and device, multiple functions accessed
+ * @bus: bus to find
+ * @device: device to find
+ * @index: is %0 for first function found, %1 for the second...
+ *
+ * Returns pointer to the node if successful, %NULL otherwise.
+ */
+struct pci_func *cpqhp_slot_find(u8 bus, u8 device, u8 index)
+{
+	int found = -1;
+	struct pci_func *func;
+
+	func = cpqhp_slot_list[bus];
+
+	if ((func == NULL) || ((func->device == device) && (index == 0)))
+		return func;
+
+	if (func->device == device)
+		found++;
+
+	while (func->next != NULL) {
+		func = func->next;
+
+		if (func->device == device)
+			found++;
+
+		if (found == index)
+			return func;
+	}
+
+	return NULL;
+}
+
+
+/* DJZ: I don't think is_bridge will work as is.
+ * FIXME */
+static int is_bridge(struct pci_func *func)
+{
+	/* Check the header type */
+	if (((func->config_space[0x03] >> 16) & 0xFF) == 0x01)
+		return 1;
+	else
+		return 0;
+}
+
+
+/**
+ * set_controller_speed - set the frequency and/or mode of a specific controller segment.
+ * @ctrl: controller to change frequency/mode for.
+ * @adapter_speed: the speed of the adapter we want to match.
+ * @hp_slot: the slot number where the adapter is installed.
+ *
+ * Returns %0 if we successfully change frequency and/or mode to match the
+ * adapter speed.
+ */
+static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_slot)
+{
+	struct slot *slot;
+	struct pci_bus *bus = ctrl->pci_bus;
+	u8 reg;
+	u8 slot_power = readb(ctrl->hpc_reg + SLOT_POWER);
+	u16 reg16;
+	u32 leds = readl(ctrl->hpc_reg + LED_CONTROL);
+
+	if (bus->cur_bus_speed == adapter_speed)
+		return 0;
+
+	/* We don't allow freq/mode changes if we find another adapter running
+	 * in another slot on this controller
+	 */
+	for (slot = ctrl->slot; slot; slot = slot->next) {
+		if (slot->device == (hp_slot + ctrl->slot_device_offset))
+			continue;
+		if (get_presence_status(ctrl, slot) == 0)
+			continue;
+		/* If another adapter is running on the same segment but at a
+		 * lower speed/mode, we allow the new adapter to function at
+		 * this rate if supported
+		 */
+		if (bus->cur_bus_speed < adapter_speed)
+			return 0;
+
+		return 1;
+	}
+
+	/* If the controller doesn't support freq/mode changes and the
+	 * controller is running at a higher mode, we bail
+	 */
+	if ((bus->cur_bus_speed > adapter_speed) && (!ctrl->pcix_speed_capability))
+		return 1;
+
+	/* But we allow the adapter to run at a lower rate if possible */
+	if ((bus->cur_bus_speed < adapter_speed) && (!ctrl->pcix_speed_capability))
+		return 0;
+
+	/* We try to set the max speed supported by both the adapter and
+	 * controller
+	 */
+	if (bus->max_bus_speed < adapter_speed) {
+		if (bus->cur_bus_speed == bus->max_bus_speed)
+			return 0;
+		adapter_speed = bus->max_bus_speed;
+	}
+
+	writel(0x0L, ctrl->hpc_reg + LED_CONTROL);
+	writeb(0x00, ctrl->hpc_reg + SLOT_ENABLE);
+
+	set_SOGO(ctrl);
+	wait_for_ctrl_irq(ctrl);
+
+	if (adapter_speed != PCI_SPEED_133MHz_PCIX)
+		reg = 0xF5;
+	else
+		reg = 0xF4;
+	pci_write_config_byte(ctrl->pci_dev, 0x41, reg);
+
+	reg16 = readw(ctrl->hpc_reg + NEXT_CURR_FREQ);
+	reg16 &= ~0x000F;
+	switch (adapter_speed) {
+		case(PCI_SPEED_133MHz_PCIX):
+			reg = 0x75;
+			reg16 |= 0xB;
+			break;
+		case(PCI_SPEED_100MHz_PCIX):
+			reg = 0x74;
+			reg16 |= 0xA;
+			break;
+		case(PCI_SPEED_66MHz_PCIX):
+			reg = 0x73;
+			reg16 |= 0x9;
+			break;
+		case(PCI_SPEED_66MHz):
+			reg = 0x73;
+			reg16 |= 0x1;
+			break;
+		default: /* 33MHz PCI 2.2 */
+			reg = 0x71;
+			break;
+
+	}
+	reg16 |= 0xB << 12;
+	writew(reg16, ctrl->hpc_reg + NEXT_CURR_FREQ);
+
+	mdelay(5);
+
+	/* Re-enable interrupts */
+	writel(0, ctrl->hpc_reg + INT_MASK);
+
+	pci_write_config_byte(ctrl->pci_dev, 0x41, reg);
+
+	/* Restart state machine */
+	reg = ~0xF;
+	pci_read_config_byte(ctrl->pci_dev, 0x43, &reg);
+	pci_write_config_byte(ctrl->pci_dev, 0x43, reg);
+
+	/* Only if mode change...*/
+	if (((bus->cur_bus_speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) ||
+		((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz)))
+			set_SOGO(ctrl);
+
+	wait_for_ctrl_irq(ctrl);
+	mdelay(1100);
+
+	/* Restore LED/Slot state */
+	writel(leds, ctrl->hpc_reg + LED_CONTROL);
+	writeb(slot_power, ctrl->hpc_reg + SLOT_ENABLE);
+
+	set_SOGO(ctrl);
+	wait_for_ctrl_irq(ctrl);
+
+	bus->cur_bus_speed = adapter_speed;
+	slot = cpqhp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+
+	info("Successfully changed frequency/mode for adapter in slot %d\n",
+			slot->number);
+	return 0;
+}
+
+/* the following routines constitute the bulk of the
+ * hotplug controller logic
+ */
+
+
+/**
+ * board_replaced - Called after a board has been replaced in the system.
+ * @func: PCI device/function information
+ * @ctrl: hotplug controller
+ *
+ * This is only used if we don't have resources for hot add.
+ * Turns power on for the board.
+ * Checks to see if board is the same.
+ * If board is same, reconfigures it.
+ * If board isn't same, turns it back off.
+ */
+static u32 board_replaced(struct pci_func *func, struct controller *ctrl)
+{
+	struct pci_bus *bus = ctrl->pci_bus;
+	u8 hp_slot;
+	u8 temp_byte;
+	u8 adapter_speed;
+	u32 rc = 0;
+
+	hp_slot = func->device - ctrl->slot_device_offset;
+
+	/*
+	 * The switch is open.
+	 */
+	if (readl(ctrl->hpc_reg + INT_INPUT_CLEAR) & (0x01L << hp_slot))
+		rc = INTERLOCK_OPEN;
+	/*
+	 * The board is already on
+	 */
+	else if (is_slot_enabled(ctrl, hp_slot))
+		rc = CARD_FUNCTIONING;
+	else {
+		mutex_lock(&ctrl->crit_sect);
+
+		/* turn on board without attaching to the bus */
+		enable_slot_power(ctrl, hp_slot);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		/* Change bits in slot power register to force another shift out
+		 * NOTE: this is to work around the timer bug */
+		temp_byte = readb(ctrl->hpc_reg + SLOT_POWER);
+		writeb(0x00, ctrl->hpc_reg + SLOT_POWER);
+		writeb(temp_byte, ctrl->hpc_reg + SLOT_POWER);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		adapter_speed = get_adapter_speed(ctrl, hp_slot);
+		if (bus->cur_bus_speed != adapter_speed)
+			if (set_controller_speed(ctrl, adapter_speed, hp_slot))
+				rc = WRONG_BUS_FREQUENCY;
+
+		/* turn off board without attaching to the bus */
+		disable_slot_power(ctrl, hp_slot);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		mutex_unlock(&ctrl->crit_sect);
+
+		if (rc)
+			return rc;
+
+		mutex_lock(&ctrl->crit_sect);
+
+		slot_enable(ctrl, hp_slot);
+		green_LED_blink(ctrl, hp_slot);
+
+		amber_LED_off(ctrl, hp_slot);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		mutex_unlock(&ctrl->crit_sect);
+
+		/* Wait for ~1 second because of hot plug spec */
+		long_delay(1*HZ);
+
+		/* Check for a power fault */
+		if (func->status == 0xFF) {
+			/* power fault occurred, but it was benign */
+			rc = POWER_FAILURE;
+			func->status = 0;
+		} else
+			rc = cpqhp_valid_replace(ctrl, func);
+
+		if (!rc) {
+			/* It must be the same board */
+
+			rc = cpqhp_configure_board(ctrl, func);
+
+			/* If configuration fails, turn it off
+			 * Get slot won't work for devices behind
+			 * bridges, but in this case it will always be
+			 * called for the "base" bus/dev/func of an
+			 * adapter.
+			 */
+
+			mutex_lock(&ctrl->crit_sect);
+
+			amber_LED_on(ctrl, hp_slot);
+			green_LED_off(ctrl, hp_slot);
+			slot_disable(ctrl, hp_slot);
+
+			set_SOGO(ctrl);
+
+			/* Wait for SOBS to be unset */
+			wait_for_ctrl_irq(ctrl);
+
+			mutex_unlock(&ctrl->crit_sect);
+
+			if (rc)
+				return rc;
+			else
+				return 1;
+
+		} else {
+			/* Something is wrong
+
+			 * Get slot won't work for devices behind bridges, but
+			 * in this case it will always be called for the "base"
+			 * bus/dev/func of an adapter.
+			 */
+
+			mutex_lock(&ctrl->crit_sect);
+
+			amber_LED_on(ctrl, hp_slot);
+			green_LED_off(ctrl, hp_slot);
+			slot_disable(ctrl, hp_slot);
+
+			set_SOGO(ctrl);
+
+			/* Wait for SOBS to be unset */
+			wait_for_ctrl_irq(ctrl);
+
+			mutex_unlock(&ctrl->crit_sect);
+		}
+
+	}
+	return rc;
+
+}
+
+
+/**
+ * board_added - Called after a board has been added to the system.
+ * @func: PCI device/function info
+ * @ctrl: hotplug controller
+ *
+ * Turns power on for the board.
+ * Configures board.
+ */
+static u32 board_added(struct pci_func *func, struct controller *ctrl)
+{
+	u8 hp_slot;
+	u8 temp_byte;
+	u8 adapter_speed;
+	int index;
+	u32 temp_register = 0xFFFFFFFF;
+	u32 rc = 0;
+	struct pci_func *new_slot = NULL;
+	struct pci_bus *bus = ctrl->pci_bus;
+	struct resource_lists res_lists;
+
+	hp_slot = func->device - ctrl->slot_device_offset;
+	dbg("%s: func->device, slot_offset, hp_slot = %d, %d ,%d\n",
+	    __func__, func->device, ctrl->slot_device_offset, hp_slot);
+
+	mutex_lock(&ctrl->crit_sect);
+
+	/* turn on board without attaching to the bus */
+	enable_slot_power(ctrl, hp_slot);
+
+	set_SOGO(ctrl);
+
+	/* Wait for SOBS to be unset */
+	wait_for_ctrl_irq(ctrl);
+
+	/* Change bits in slot power register to force another shift out
+	 * NOTE: this is to work around the timer bug
+	 */
+	temp_byte = readb(ctrl->hpc_reg + SLOT_POWER);
+	writeb(0x00, ctrl->hpc_reg + SLOT_POWER);
+	writeb(temp_byte, ctrl->hpc_reg + SLOT_POWER);
+
+	set_SOGO(ctrl);
+
+	/* Wait for SOBS to be unset */
+	wait_for_ctrl_irq(ctrl);
+
+	adapter_speed = get_adapter_speed(ctrl, hp_slot);
+	if (bus->cur_bus_speed != adapter_speed)
+		if (set_controller_speed(ctrl, adapter_speed, hp_slot))
+			rc = WRONG_BUS_FREQUENCY;
+
+	/* turn off board without attaching to the bus */
+	disable_slot_power(ctrl, hp_slot);
+
+	set_SOGO(ctrl);
+
+	/* Wait for SOBS to be unset */
+	wait_for_ctrl_irq(ctrl);
+
+	mutex_unlock(&ctrl->crit_sect);
+
+	if (rc)
+		return rc;
+
+	cpqhp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+
+	/* turn on board and blink green LED */
+
+	dbg("%s: before down\n", __func__);
+	mutex_lock(&ctrl->crit_sect);
+	dbg("%s: after down\n", __func__);
+
+	dbg("%s: before slot_enable\n", __func__);
+	slot_enable(ctrl, hp_slot);
+
+	dbg("%s: before green_LED_blink\n", __func__);
+	green_LED_blink(ctrl, hp_slot);
+
+	dbg("%s: before amber_LED_blink\n", __func__);
+	amber_LED_off(ctrl, hp_slot);
+
+	dbg("%s: before set_SOGO\n", __func__);
+	set_SOGO(ctrl);
+
+	/* Wait for SOBS to be unset */
+	dbg("%s: before wait_for_ctrl_irq\n", __func__);
+	wait_for_ctrl_irq(ctrl);
+	dbg("%s: after wait_for_ctrl_irq\n", __func__);
+
+	dbg("%s: before up\n", __func__);
+	mutex_unlock(&ctrl->crit_sect);
+	dbg("%s: after up\n", __func__);
+
+	/* Wait for ~1 second because of hot plug spec */
+	dbg("%s: before long_delay\n", __func__);
+	long_delay(1*HZ);
+	dbg("%s: after long_delay\n", __func__);
+
+	dbg("%s: func status = %x\n", __func__, func->status);
+	/* Check for a power fault */
+	if (func->status == 0xFF) {
+		/* power fault occurred, but it was benign */
+		temp_register = 0xFFFFFFFF;
+		dbg("%s: temp register set to %x by power fault\n", __func__, temp_register);
+		rc = POWER_FAILURE;
+		func->status = 0;
+	} else {
+		/* Get vendor/device ID u32 */
+		ctrl->pci_bus->number = func->bus;
+		rc = pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(func->device, func->function), PCI_VENDOR_ID, &temp_register);
+		dbg("%s: pci_read_config_dword returns %d\n", __func__, rc);
+		dbg("%s: temp_register is %x\n", __func__, temp_register);
+
+		if (rc != 0) {
+			/* Something's wrong here */
+			temp_register = 0xFFFFFFFF;
+			dbg("%s: temp register set to %x by error\n", __func__, temp_register);
+		}
+		/* Preset return code.  It will be changed later if things go okay. */
+		rc = NO_ADAPTER_PRESENT;
+	}
+
+	/* All F's is an empty slot or an invalid board */
+	if (temp_register != 0xFFFFFFFF) {
+		res_lists.io_head = ctrl->io_head;
+		res_lists.mem_head = ctrl->mem_head;
+		res_lists.p_mem_head = ctrl->p_mem_head;
+		res_lists.bus_head = ctrl->bus_head;
+		res_lists.irqs = NULL;
+
+		rc = configure_new_device(ctrl, func, 0, &res_lists);
+
+		dbg("%s: back from configure_new_device\n", __func__);
+		ctrl->io_head = res_lists.io_head;
+		ctrl->mem_head = res_lists.mem_head;
+		ctrl->p_mem_head = res_lists.p_mem_head;
+		ctrl->bus_head = res_lists.bus_head;
+
+		cpqhp_resource_sort_and_combine(&(ctrl->mem_head));
+		cpqhp_resource_sort_and_combine(&(ctrl->p_mem_head));
+		cpqhp_resource_sort_and_combine(&(ctrl->io_head));
+		cpqhp_resource_sort_and_combine(&(ctrl->bus_head));
+
+		if (rc) {
+			mutex_lock(&ctrl->crit_sect);
+
+			amber_LED_on(ctrl, hp_slot);
+			green_LED_off(ctrl, hp_slot);
+			slot_disable(ctrl, hp_slot);
+
+			set_SOGO(ctrl);
+
+			/* Wait for SOBS to be unset */
+			wait_for_ctrl_irq(ctrl);
+
+			mutex_unlock(&ctrl->crit_sect);
+			return rc;
+		} else {
+			cpqhp_save_slot_config(ctrl, func);
+		}
+
+
+		func->status = 0;
+		func->switch_save = 0x10;
+		func->is_a_board = 0x01;
+
+		/* next, we will instantiate the linux pci_dev structures (with
+		 * appropriate driver notification, if already present) */
+		dbg("%s: configure linux pci_dev structure\n", __func__);
+		index = 0;
+		do {
+			new_slot = cpqhp_slot_find(ctrl->bus, func->device, index++);
+			if (new_slot && !new_slot->pci_dev)
+				cpqhp_configure_device(ctrl, new_slot);
+		} while (new_slot);
+
+		mutex_lock(&ctrl->crit_sect);
+
+		green_LED_on(ctrl, hp_slot);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		mutex_unlock(&ctrl->crit_sect);
+	} else {
+		mutex_lock(&ctrl->crit_sect);
+
+		amber_LED_on(ctrl, hp_slot);
+		green_LED_off(ctrl, hp_slot);
+		slot_disable(ctrl, hp_slot);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+
+		mutex_unlock(&ctrl->crit_sect);
+
+		return rc;
+	}
+	return 0;
+}
+
+
+/**
+ * remove_board - Turns off slot and LEDs
+ * @func: PCI device/function info
+ * @replace_flag: whether replacing or adding a new device
+ * @ctrl: target controller
+ */
+static u32 remove_board(struct pci_func *func, u32 replace_flag, struct controller *ctrl)
+{
+	int index;
+	u8 skip = 0;
+	u8 device;
+	u8 hp_slot;
+	u8 temp_byte;
+	struct resource_lists res_lists;
+	struct pci_func *temp_func;
+
+	if (cpqhp_unconfigure_device(func))
+		return 1;
+
+	device = func->device;
+
+	hp_slot = func->device - ctrl->slot_device_offset;
+	dbg("In %s, hp_slot = %d\n", __func__, hp_slot);
+
+	/* When we get here, it is safe to change base address registers.
+	 * We will attempt to save the base address register lengths */
+	if (replace_flag || !ctrl->add_support)
+		cpqhp_save_base_addr_length(ctrl, func);
+	else if (!func->bus_head && !func->mem_head &&
+		 !func->p_mem_head && !func->io_head) {
+		/* Here we check to see if we've saved any of the board's
+		 * resources already.  If so, we'll skip the attempt to
+		 * determine what's being used. */
+		index = 0;
+		temp_func = cpqhp_slot_find(func->bus, func->device, index++);
+		while (temp_func) {
+			if (temp_func->bus_head || temp_func->mem_head
+			    || temp_func->p_mem_head || temp_func->io_head) {
+				skip = 1;
+				break;
+			}
+			temp_func = cpqhp_slot_find(temp_func->bus, temp_func->device, index++);
+		}
+
+		if (!skip)
+			cpqhp_save_used_resources(ctrl, func);
+	}
+	/* Change status to shutdown */
+	if (func->is_a_board)
+		func->status = 0x01;
+	func->configured = 0;
+
+	mutex_lock(&ctrl->crit_sect);
+
+	green_LED_off(ctrl, hp_slot);
+	slot_disable(ctrl, hp_slot);
+
+	set_SOGO(ctrl);
+
+	/* turn off SERR for slot */
+	temp_byte = readb(ctrl->hpc_reg + SLOT_SERR);
+	temp_byte &= ~(0x01 << hp_slot);
+	writeb(temp_byte, ctrl->hpc_reg + SLOT_SERR);
+
+	/* Wait for SOBS to be unset */
+	wait_for_ctrl_irq(ctrl);
+
+	mutex_unlock(&ctrl->crit_sect);
+
+	if (!replace_flag && ctrl->add_support) {
+		while (func) {
+			res_lists.io_head = ctrl->io_head;
+			res_lists.mem_head = ctrl->mem_head;
+			res_lists.p_mem_head = ctrl->p_mem_head;
+			res_lists.bus_head = ctrl->bus_head;
+
+			cpqhp_return_board_resources(func, &res_lists);
+
+			ctrl->io_head = res_lists.io_head;
+			ctrl->mem_head = res_lists.mem_head;
+			ctrl->p_mem_head = res_lists.p_mem_head;
+			ctrl->bus_head = res_lists.bus_head;
+
+			cpqhp_resource_sort_and_combine(&(ctrl->mem_head));
+			cpqhp_resource_sort_and_combine(&(ctrl->p_mem_head));
+			cpqhp_resource_sort_and_combine(&(ctrl->io_head));
+			cpqhp_resource_sort_and_combine(&(ctrl->bus_head));
+
+			if (is_bridge(func)) {
+				bridge_slot_remove(func);
+			} else
+				slot_remove(func);
+
+			func = cpqhp_slot_find(ctrl->bus, device, 0);
+		}
+
+		/* Setup slot structure with entry for empty slot */
+		func = cpqhp_slot_create(ctrl->bus);
+
+		if (func == NULL)
+			return 1;
+
+		func->bus = ctrl->bus;
+		func->device = device;
+		func->function = 0;
+		func->configured = 0;
+		func->switch_save = 0x10;
+		func->is_a_board = 0;
+		func->p_task_event = NULL;
+	}
+
+	return 0;
+}
+
+static void pushbutton_helper_thread(struct timer_list *t)
+{
+	pushbutton_pending = t;
+
+	wake_up_process(cpqhp_event_thread);
+}
+
+
+/* this is the main worker thread */
+static int event_thread(void *data)
+{
+	struct controller *ctrl;
+
+	while (1) {
+		dbg("!!!!event_thread sleeping\n");
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+
+		if (kthread_should_stop())
+			break;
+		/* Do stuff here */
+		if (pushbutton_pending)
+			cpqhp_pushbutton_thread(pushbutton_pending);
+		else
+			for (ctrl = cpqhp_ctrl_list; ctrl; ctrl = ctrl->next)
+				interrupt_event_handler(ctrl);
+	}
+	dbg("event_thread signals exit\n");
+	return 0;
+}
+
+int cpqhp_event_start_thread(void)
+{
+	cpqhp_event_thread = kthread_run(event_thread, NULL, "phpd_event");
+	if (IS_ERR(cpqhp_event_thread)) {
+		err("Can't start up our event thread\n");
+		return PTR_ERR(cpqhp_event_thread);
+	}
+
+	return 0;
+}
+
+
+void cpqhp_event_stop_thread(void)
+{
+	kthread_stop(cpqhp_event_thread);
+}
+
+
+static void interrupt_event_handler(struct controller *ctrl)
+{
+	int loop;
+	int change = 1;
+	struct pci_func *func;
+	u8 hp_slot;
+	struct slot *p_slot;
+
+	while (change) {
+		change = 0;
+
+		for (loop = 0; loop < 10; loop++) {
+			/* dbg("loop %d\n", loop); */
+			if (ctrl->event_queue[loop].event_type != 0) {
+				hp_slot = ctrl->event_queue[loop].hp_slot;
+
+				func = cpqhp_slot_find(ctrl->bus, (hp_slot + ctrl->slot_device_offset), 0);
+				if (!func)
+					return;
+
+				p_slot = cpqhp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+				if (!p_slot)
+					return;
+
+				dbg("hp_slot %d, func %p, p_slot %p\n",
+				    hp_slot, func, p_slot);
+
+				if (ctrl->event_queue[loop].event_type == INT_BUTTON_PRESS) {
+					dbg("button pressed\n");
+				} else if (ctrl->event_queue[loop].event_type ==
+					   INT_BUTTON_CANCEL) {
+					dbg("button cancel\n");
+					del_timer(&p_slot->task_event);
+
+					mutex_lock(&ctrl->crit_sect);
+
+					if (p_slot->state == BLINKINGOFF_STATE) {
+						/* slot is on */
+						dbg("turn on green LED\n");
+						green_LED_on(ctrl, hp_slot);
+					} else if (p_slot->state == BLINKINGON_STATE) {
+						/* slot is off */
+						dbg("turn off green LED\n");
+						green_LED_off(ctrl, hp_slot);
+					}
+
+					info(msg_button_cancel, p_slot->number);
+
+					p_slot->state = STATIC_STATE;
+
+					amber_LED_off(ctrl, hp_slot);
+
+					set_SOGO(ctrl);
+
+					/* Wait for SOBS to be unset */
+					wait_for_ctrl_irq(ctrl);
+
+					mutex_unlock(&ctrl->crit_sect);
+				}
+				/*** button Released (No action on press...) */
+				else if (ctrl->event_queue[loop].event_type == INT_BUTTON_RELEASE) {
+					dbg("button release\n");
+
+					if (is_slot_enabled(ctrl, hp_slot)) {
+						dbg("slot is on\n");
+						p_slot->state = BLINKINGOFF_STATE;
+						info(msg_button_off, p_slot->number);
+					} else {
+						dbg("slot is off\n");
+						p_slot->state = BLINKINGON_STATE;
+						info(msg_button_on, p_slot->number);
+					}
+					mutex_lock(&ctrl->crit_sect);
+
+					dbg("blink green LED and turn off amber\n");
+
+					amber_LED_off(ctrl, hp_slot);
+					green_LED_blink(ctrl, hp_slot);
+
+					set_SOGO(ctrl);
+
+					/* Wait for SOBS to be unset */
+					wait_for_ctrl_irq(ctrl);
+
+					mutex_unlock(&ctrl->crit_sect);
+					timer_setup(&p_slot->task_event,
+						    pushbutton_helper_thread,
+						    0);
+					p_slot->hp_slot = hp_slot;
+					p_slot->ctrl = ctrl;
+/*					p_slot->physical_slot = physical_slot; */
+					p_slot->task_event.expires = jiffies + 5 * HZ;   /* 5 second delay */
+
+					dbg("add_timer p_slot = %p\n", p_slot);
+					add_timer(&p_slot->task_event);
+				}
+				/***********POWER FAULT */
+				else if (ctrl->event_queue[loop].event_type == INT_POWER_FAULT) {
+					dbg("power fault\n");
+				}
+
+				ctrl->event_queue[loop].event_type = 0;
+
+				change = 1;
+			}
+		}		/* End of FOR loop */
+	}
+}
+
+
+/**
+ * cpqhp_pushbutton_thread - handle pushbutton events
+ * @t: pointer to struct timer_list which holds all timer-related callbacks
+ *
+ * Scheduled procedure to handle blocking stuff for the pushbuttons.
+ * Handles all pending events and exits.
+ */
+void cpqhp_pushbutton_thread(struct timer_list *t)
+{
+	u8 hp_slot;
+	struct pci_func *func;
+	struct slot *p_slot = from_timer(p_slot, t, task_event);
+	struct controller *ctrl = (struct controller *) p_slot->ctrl;
+
+	pushbutton_pending = NULL;
+	hp_slot = p_slot->hp_slot;
+
+	if (is_slot_enabled(ctrl, hp_slot)) {
+		p_slot->state = POWEROFF_STATE;
+		/* power Down board */
+		func = cpqhp_slot_find(p_slot->bus, p_slot->device, 0);
+		dbg("In power_down_board, func = %p, ctrl = %p\n", func, ctrl);
+		if (!func) {
+			dbg("Error! func NULL in %s\n", __func__);
+			return;
+		}
+
+		if (cpqhp_process_SS(ctrl, func) != 0) {
+			amber_LED_on(ctrl, hp_slot);
+			green_LED_on(ctrl, hp_slot);
+
+			set_SOGO(ctrl);
+
+			/* Wait for SOBS to be unset */
+			wait_for_ctrl_irq(ctrl);
+		}
+
+		p_slot->state = STATIC_STATE;
+	} else {
+		p_slot->state = POWERON_STATE;
+		/* slot is off */
+
+		func = cpqhp_slot_find(p_slot->bus, p_slot->device, 0);
+		dbg("In add_board, func = %p, ctrl = %p\n", func, ctrl);
+		if (!func) {
+			dbg("Error! func NULL in %s\n", __func__);
+			return;
+		}
+
+		if (ctrl != NULL) {
+			if (cpqhp_process_SI(ctrl, func) != 0) {
+				amber_LED_on(ctrl, hp_slot);
+				green_LED_off(ctrl, hp_slot);
+
+				set_SOGO(ctrl);
+
+				/* Wait for SOBS to be unset */
+				wait_for_ctrl_irq(ctrl);
+			}
+		}
+
+		p_slot->state = STATIC_STATE;
+	}
+}
+
+
+int cpqhp_process_SI(struct controller *ctrl, struct pci_func *func)
+{
+	u8 device, hp_slot;
+	u16 temp_word;
+	u32 tempdword;
+	int rc;
+	struct slot *p_slot;
+
+	tempdword = 0;
+
+	device = func->device;
+	hp_slot = device - ctrl->slot_device_offset;
+	p_slot = cpqhp_find_slot(ctrl, device);
+
+	/* Check to see if the interlock is closed */
+	tempdword = readl(ctrl->hpc_reg + INT_INPUT_CLEAR);
+
+	if (tempdword & (0x01 << hp_slot))
+		return 1;
+
+	if (func->is_a_board) {
+		rc = board_replaced(func, ctrl);
+	} else {
+		/* add board */
+		slot_remove(func);
+
+		func = cpqhp_slot_create(ctrl->bus);
+		if (func == NULL)
+			return 1;
+
+		func->bus = ctrl->bus;
+		func->device = device;
+		func->function = 0;
+		func->configured = 0;
+		func->is_a_board = 1;
+
+		/* We have to save the presence info for these slots */
+		temp_word = ctrl->ctrl_int_comp >> 16;
+		func->presence_save = (temp_word >> hp_slot) & 0x01;
+		func->presence_save |= (temp_word >> (hp_slot + 7)) & 0x02;
+
+		if (ctrl->ctrl_int_comp & (0x1L << hp_slot)) {
+			func->switch_save = 0;
+		} else {
+			func->switch_save = 0x10;
+		}
+
+		rc = board_added(func, ctrl);
+		if (rc) {
+			if (is_bridge(func)) {
+				bridge_slot_remove(func);
+			} else
+				slot_remove(func);
+
+			/* Setup slot structure with entry for empty slot */
+			func = cpqhp_slot_create(ctrl->bus);
+
+			if (func == NULL)
+				return 1;
+
+			func->bus = ctrl->bus;
+			func->device = device;
+			func->function = 0;
+			func->configured = 0;
+			func->is_a_board = 0;
+
+			/* We have to save the presence info for these slots */
+			temp_word = ctrl->ctrl_int_comp >> 16;
+			func->presence_save = (temp_word >> hp_slot) & 0x01;
+			func->presence_save |=
+			(temp_word >> (hp_slot + 7)) & 0x02;
+
+			if (ctrl->ctrl_int_comp & (0x1L << hp_slot)) {
+				func->switch_save = 0;
+			} else {
+				func->switch_save = 0x10;
+			}
+		}
+	}
+
+	if (rc)
+		dbg("%s: rc = %d\n", __func__, rc);
+
+	return rc;
+}
+
+
+int cpqhp_process_SS(struct controller *ctrl, struct pci_func *func)
+{
+	u8 device, class_code, header_type, BCR;
+	u8 index = 0;
+	u8 replace_flag;
+	u32 rc = 0;
+	unsigned int devfn;
+	struct slot *p_slot;
+	struct pci_bus *pci_bus = ctrl->pci_bus;
+
+	device = func->device;
+	func = cpqhp_slot_find(ctrl->bus, device, index++);
+	p_slot = cpqhp_find_slot(ctrl, device);
+
+	/* Make sure there are no video controllers here */
+	while (func && !rc) {
+		pci_bus->number = func->bus;
+		devfn = PCI_DEVFN(func->device, func->function);
+
+		/* Check the Class Code */
+		rc = pci_bus_read_config_byte(pci_bus, devfn, 0x0B, &class_code);
+		if (rc)
+			return rc;
+
+		if (class_code == PCI_BASE_CLASS_DISPLAY) {
+			/* Display/Video adapter (not supported) */
+			rc = REMOVE_NOT_SUPPORTED;
+		} else {
+			/* See if it's a bridge */
+			rc = pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &header_type);
+			if (rc)
+				return rc;
+
+			/* If it's a bridge, check the VGA Enable bit */
+			if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+				rc = pci_bus_read_config_byte(pci_bus, devfn, PCI_BRIDGE_CONTROL, &BCR);
+				if (rc)
+					return rc;
+
+				/* If the VGA Enable bit is set, remove isn't
+				 * supported */
+				if (BCR & PCI_BRIDGE_CTL_VGA)
+					rc = REMOVE_NOT_SUPPORTED;
+			}
+		}
+
+		func = cpqhp_slot_find(ctrl->bus, device, index++);
+	}
+
+	func = cpqhp_slot_find(ctrl->bus, device, 0);
+	if ((func != NULL) && !rc) {
+		/* FIXME: Replace flag should be passed into process_SS */
+		replace_flag = !(ctrl->add_support);
+		rc = remove_board(func, replace_flag, ctrl);
+	} else if (!rc) {
+		rc = 1;
+	}
+
+	return rc;
+}
+
+/**
+ * switch_leds - switch the leds, go from one site to the other.
+ * @ctrl: controller to use
+ * @num_of_slots: number of slots to use
+ * @work_LED: LED control value
+ * @direction: 1 to start from the left side, 0 to start right.
+ */
+static void switch_leds(struct controller *ctrl, const int num_of_slots,
+			u32 *work_LED, const int direction)
+{
+	int loop;
+
+	for (loop = 0; loop < num_of_slots; loop++) {
+		if (direction)
+			*work_LED = *work_LED >> 1;
+		else
+			*work_LED = *work_LED << 1;
+		writel(*work_LED, ctrl->hpc_reg + LED_CONTROL);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOGO interrupt */
+		wait_for_ctrl_irq(ctrl);
+
+		/* Get ready for next iteration */
+		long_delay((2*HZ)/10);
+	}
+}
+
+/**
+ * cpqhp_hardware_test - runs hardware tests
+ * @ctrl: target controller
+ * @test_num: the number written to the "test" file in sysfs.
+ *
+ * For hot plug ctrl folks to play with.
+ */
+int cpqhp_hardware_test(struct controller *ctrl, int test_num)
+{
+	u32 save_LED;
+	u32 work_LED;
+	int loop;
+	int num_of_slots;
+
+	num_of_slots = readb(ctrl->hpc_reg + SLOT_MASK) & 0x0f;
+
+	switch (test_num) {
+	case 1:
+		/* Do stuff here! */
+
+		/* Do that funky LED thing */
+		/* so we can restore them later */
+		save_LED = readl(ctrl->hpc_reg + LED_CONTROL);
+		work_LED = 0x01010101;
+		switch_leds(ctrl, num_of_slots, &work_LED, 0);
+		switch_leds(ctrl, num_of_slots, &work_LED, 1);
+		switch_leds(ctrl, num_of_slots, &work_LED, 0);
+		switch_leds(ctrl, num_of_slots, &work_LED, 1);
+
+		work_LED = 0x01010000;
+		writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+		switch_leds(ctrl, num_of_slots, &work_LED, 0);
+		switch_leds(ctrl, num_of_slots, &work_LED, 1);
+		work_LED = 0x00000101;
+		writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+		switch_leds(ctrl, num_of_slots, &work_LED, 0);
+		switch_leds(ctrl, num_of_slots, &work_LED, 1);
+
+		work_LED = 0x01010000;
+		writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+		for (loop = 0; loop < num_of_slots; loop++) {
+			set_SOGO(ctrl);
+
+			/* Wait for SOGO interrupt */
+			wait_for_ctrl_irq(ctrl);
+
+			/* Get ready for next iteration */
+			long_delay((3*HZ)/10);
+			work_LED = work_LED >> 16;
+			writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+
+			set_SOGO(ctrl);
+
+			/* Wait for SOGO interrupt */
+			wait_for_ctrl_irq(ctrl);
+
+			/* Get ready for next iteration */
+			long_delay((3*HZ)/10);
+			work_LED = work_LED << 16;
+			writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+			work_LED = work_LED << 1;
+			writel(work_LED, ctrl->hpc_reg + LED_CONTROL);
+		}
+
+		/* put it back the way it was */
+		writel(save_LED, ctrl->hpc_reg + LED_CONTROL);
+
+		set_SOGO(ctrl);
+
+		/* Wait for SOBS to be unset */
+		wait_for_ctrl_irq(ctrl);
+		break;
+	case 2:
+		/* Do other stuff here! */
+		break;
+	case 3:
+		/* and more... */
+		break;
+	}
+	return 0;
+}
+
+
+/**
+ * configure_new_device - Configures the PCI header information of one board.
+ * @ctrl: pointer to controller structure
+ * @func: pointer to function structure
+ * @behind_bridge: 1 if this is a recursive call, 0 if not
+ * @resources: pointer to set of resource lists
+ *
+ * Returns 0 if success.
+ */
+static u32 configure_new_device(struct controller  *ctrl, struct pci_func  *func,
+				 u8 behind_bridge, struct resource_lists  *resources)
+{
+	u8 temp_byte, function, max_functions, stop_it;
+	int rc;
+	u32 ID;
+	struct pci_func *new_slot;
+	int index;
+
+	new_slot = func;
+
+	dbg("%s\n", __func__);
+	/* Check for Multi-function device */
+	ctrl->pci_bus->number = func->bus;
+	rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(func->device, func->function), 0x0E, &temp_byte);
+	if (rc) {
+		dbg("%s: rc = %d\n", __func__, rc);
+		return rc;
+	}
+
+	if (temp_byte & 0x80)	/* Multi-function device */
+		max_functions = 8;
+	else
+		max_functions = 1;
+
+	function = 0;
+
+	do {
+		rc = configure_new_function(ctrl, new_slot, behind_bridge, resources);
+
+		if (rc) {
+			dbg("configure_new_function failed %d\n", rc);
+			index = 0;
+
+			while (new_slot) {
+				new_slot = cpqhp_slot_find(new_slot->bus, new_slot->device, index++);
+
+				if (new_slot)
+					cpqhp_return_board_resources(new_slot, resources);
+			}
+
+			return rc;
+		}
+
+		function++;
+
+		stop_it = 0;
+
+		/* The following loop skips to the next present function
+		 * and creates a board structure */
+
+		while ((function < max_functions) && (!stop_it)) {
+			pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(func->device, function), 0x00, &ID);
+
+			if (PCI_POSSIBLE_ERROR(ID)) {
+				function++;
+			} else {
+				/* Setup slot structure. */
+				new_slot = cpqhp_slot_create(func->bus);
+
+				if (new_slot == NULL)
+					return 1;
+
+				new_slot->bus = func->bus;
+				new_slot->device = func->device;
+				new_slot->function = function;
+				new_slot->is_a_board = 1;
+				new_slot->status = 0;
+
+				stop_it++;
+			}
+		}
+
+	} while (function < max_functions);
+	dbg("returning from configure_new_device\n");
+
+	return 0;
+}
+
+
+/*
+ * Configuration logic that involves the hotplug data structures and
+ * their bookkeeping
+ */
+
+
+/**
+ * configure_new_function - Configures the PCI header information of one device
+ * @ctrl: pointer to controller structure
+ * @func: pointer to function structure
+ * @behind_bridge: 1 if this is a recursive call, 0 if not
+ * @resources: pointer to set of resource lists
+ *
+ * Calls itself recursively for bridged devices.
+ * Returns 0 if success.
+ */
+static int configure_new_function(struct controller *ctrl, struct pci_func *func,
+				   u8 behind_bridge,
+				   struct resource_lists *resources)
+{
+	int cloop;
+	u8 IRQ = 0;
+	u8 temp_byte;
+	u8 device;
+	u8 class_code;
+	u16 command;
+	u16 temp_word;
+	u32 temp_dword;
+	u32 rc;
+	u32 temp_register;
+	u32 base;
+	u32 ID;
+	unsigned int devfn;
+	struct pci_resource *mem_node;
+	struct pci_resource *p_mem_node;
+	struct pci_resource *io_node;
+	struct pci_resource *bus_node;
+	struct pci_resource *hold_mem_node;
+	struct pci_resource *hold_p_mem_node;
+	struct pci_resource *hold_IO_node;
+	struct pci_resource *hold_bus_node;
+	struct irq_mapping irqs;
+	struct pci_func *new_slot;
+	struct pci_bus *pci_bus;
+	struct resource_lists temp_resources;
+
+	pci_bus = ctrl->pci_bus;
+	pci_bus->number = func->bus;
+	devfn = PCI_DEVFN(func->device, func->function);
+
+	/* Check for Bridge */
+	rc = pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &temp_byte);
+	if (rc)
+		return rc;
+
+	if ((temp_byte & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+		/* set Primary bus */
+		dbg("set Primary bus = %d\n", func->bus);
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_PRIMARY_BUS, func->bus);
+		if (rc)
+			return rc;
+
+		/* find range of buses to use */
+		dbg("find ranges of buses to use\n");
+		bus_node = get_max_resource(&(resources->bus_head), 1);
+
+		/* If we don't have any buses to allocate, we can't continue */
+		if (!bus_node)
+			return -ENOMEM;
+
+		/* set Secondary bus */
+		temp_byte = bus_node->base;
+		dbg("set Secondary bus = %d\n", bus_node->base);
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_SECONDARY_BUS, temp_byte);
+		if (rc)
+			return rc;
+
+		/* set subordinate bus */
+		temp_byte = bus_node->base + bus_node->length - 1;
+		dbg("set subordinate bus = %d\n", bus_node->base + bus_node->length - 1);
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_SUBORDINATE_BUS, temp_byte);
+		if (rc)
+			return rc;
+
+		/* set subordinate Latency Timer and base Latency Timer */
+		temp_byte = 0x40;
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_SEC_LATENCY_TIMER, temp_byte);
+		if (rc)
+			return rc;
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_LATENCY_TIMER, temp_byte);
+		if (rc)
+			return rc;
+
+		/* set Cache Line size */
+		temp_byte = 0x08;
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_CACHE_LINE_SIZE, temp_byte);
+		if (rc)
+			return rc;
+
+		/* Setup the IO, memory, and prefetchable windows */
+		io_node = get_max_resource(&(resources->io_head), 0x1000);
+		if (!io_node)
+			return -ENOMEM;
+		mem_node = get_max_resource(&(resources->mem_head), 0x100000);
+		if (!mem_node)
+			return -ENOMEM;
+		p_mem_node = get_max_resource(&(resources->p_mem_head), 0x100000);
+		if (!p_mem_node)
+			return -ENOMEM;
+		dbg("Setup the IO, memory, and prefetchable windows\n");
+		dbg("io_node\n");
+		dbg("(base, len, next) (%x, %x, %p)\n", io_node->base,
+					io_node->length, io_node->next);
+		dbg("mem_node\n");
+		dbg("(base, len, next) (%x, %x, %p)\n", mem_node->base,
+					mem_node->length, mem_node->next);
+		dbg("p_mem_node\n");
+		dbg("(base, len, next) (%x, %x, %p)\n", p_mem_node->base,
+					p_mem_node->length, p_mem_node->next);
+
+		/* set up the IRQ info */
+		if (!resources->irqs) {
+			irqs.barber_pole = 0;
+			irqs.interrupt[0] = 0;
+			irqs.interrupt[1] = 0;
+			irqs.interrupt[2] = 0;
+			irqs.interrupt[3] = 0;
+			irqs.valid_INT = 0;
+		} else {
+			irqs.barber_pole = resources->irqs->barber_pole;
+			irqs.interrupt[0] = resources->irqs->interrupt[0];
+			irqs.interrupt[1] = resources->irqs->interrupt[1];
+			irqs.interrupt[2] = resources->irqs->interrupt[2];
+			irqs.interrupt[3] = resources->irqs->interrupt[3];
+			irqs.valid_INT = resources->irqs->valid_INT;
+		}
+
+		/* set up resource lists that are now aligned on top and bottom
+		 * for anything behind the bridge. */
+		temp_resources.bus_head = bus_node;
+		temp_resources.io_head = io_node;
+		temp_resources.mem_head = mem_node;
+		temp_resources.p_mem_head = p_mem_node;
+		temp_resources.irqs = &irqs;
+
+		/* Make copies of the nodes we are going to pass down so that
+		 * if there is a problem,we can just use these to free resources
+		 */
+		hold_bus_node = kmalloc(sizeof(*hold_bus_node), GFP_KERNEL);
+		hold_IO_node = kmalloc(sizeof(*hold_IO_node), GFP_KERNEL);
+		hold_mem_node = kmalloc(sizeof(*hold_mem_node), GFP_KERNEL);
+		hold_p_mem_node = kmalloc(sizeof(*hold_p_mem_node), GFP_KERNEL);
+
+		if (!hold_bus_node || !hold_IO_node || !hold_mem_node || !hold_p_mem_node) {
+			kfree(hold_bus_node);
+			kfree(hold_IO_node);
+			kfree(hold_mem_node);
+			kfree(hold_p_mem_node);
+
+			return 1;
+		}
+
+		memcpy(hold_bus_node, bus_node, sizeof(struct pci_resource));
+
+		bus_node->base += 1;
+		bus_node->length -= 1;
+		bus_node->next = NULL;
+
+		/* If we have IO resources copy them and fill in the bridge's
+		 * IO range registers */
+		memcpy(hold_IO_node, io_node, sizeof(struct pci_resource));
+		io_node->next = NULL;
+
+		/* set IO base and Limit registers */
+		temp_byte = io_node->base >> 8;
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_IO_BASE, temp_byte);
+
+		temp_byte = (io_node->base + io_node->length - 1) >> 8;
+		rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_IO_LIMIT, temp_byte);
+
+		/* Copy the memory resources and fill in the bridge's memory
+		 * range registers.
+		 */
+		memcpy(hold_mem_node, mem_node, sizeof(struct pci_resource));
+		mem_node->next = NULL;
+
+		/* set Mem base and Limit registers */
+		temp_word = mem_node->base >> 16;
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_MEMORY_BASE, temp_word);
+
+		temp_word = (mem_node->base + mem_node->length - 1) >> 16;
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_MEMORY_LIMIT, temp_word);
+
+		memcpy(hold_p_mem_node, p_mem_node, sizeof(struct pci_resource));
+		p_mem_node->next = NULL;
+
+		/* set Pre Mem base and Limit registers */
+		temp_word = p_mem_node->base >> 16;
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_PREF_MEMORY_BASE, temp_word);
+
+		temp_word = (p_mem_node->base + p_mem_node->length - 1) >> 16;
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, temp_word);
+
+		/* Adjust this to compensate for extra adjustment in first loop
+		 */
+		irqs.barber_pole--;
+
+		rc = 0;
+
+		/* Here we actually find the devices and configure them */
+		for (device = 0; (device <= 0x1F) && !rc; device++) {
+			irqs.barber_pole = (irqs.barber_pole + 1) & 0x03;
+
+			ID = 0xFFFFFFFF;
+			pci_bus->number = hold_bus_node->base;
+			pci_bus_read_config_dword(pci_bus, PCI_DEVFN(device, 0), 0x00, &ID);
+			pci_bus->number = func->bus;
+
+			if (!PCI_POSSIBLE_ERROR(ID)) {	  /*  device present */
+				/* Setup slot structure. */
+				new_slot = cpqhp_slot_create(hold_bus_node->base);
+
+				if (new_slot == NULL) {
+					rc = -ENOMEM;
+					continue;
+				}
+
+				new_slot->bus = hold_bus_node->base;
+				new_slot->device = device;
+				new_slot->function = 0;
+				new_slot->is_a_board = 1;
+				new_slot->status = 0;
+
+				rc = configure_new_device(ctrl, new_slot, 1, &temp_resources);
+				dbg("configure_new_device rc=0x%x\n", rc);
+			}	/* End of IF (device in slot?) */
+		}		/* End of FOR loop */
+
+		if (rc)
+			goto free_and_out;
+		/* save the interrupt routing information */
+		if (resources->irqs) {
+			resources->irqs->interrupt[0] = irqs.interrupt[0];
+			resources->irqs->interrupt[1] = irqs.interrupt[1];
+			resources->irqs->interrupt[2] = irqs.interrupt[2];
+			resources->irqs->interrupt[3] = irqs.interrupt[3];
+			resources->irqs->valid_INT = irqs.valid_INT;
+		} else if (!behind_bridge) {
+			/* We need to hook up the interrupts here */
+			for (cloop = 0; cloop < 4; cloop++) {
+				if (irqs.valid_INT & (0x01 << cloop)) {
+					rc = cpqhp_set_irq(func->bus, func->device,
+							   cloop + 1, irqs.interrupt[cloop]);
+					if (rc)
+						goto free_and_out;
+				}
+			}	/* end of for loop */
+		}
+		/* Return unused bus resources
+		 * First use the temporary node to store information for
+		 * the board */
+		if (bus_node && temp_resources.bus_head) {
+			hold_bus_node->length = bus_node->base - hold_bus_node->base;
+
+			hold_bus_node->next = func->bus_head;
+			func->bus_head = hold_bus_node;
+
+			temp_byte = temp_resources.bus_head->base - 1;
+
+			/* set subordinate bus */
+			rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_SUBORDINATE_BUS, temp_byte);
+
+			if (temp_resources.bus_head->length == 0) {
+				kfree(temp_resources.bus_head);
+				temp_resources.bus_head = NULL;
+			} else {
+				return_resource(&(resources->bus_head), temp_resources.bus_head);
+			}
+		}
+
+		/* If we have IO space available and there is some left,
+		 * return the unused portion */
+		if (hold_IO_node && temp_resources.io_head) {
+			io_node = do_pre_bridge_resource_split(&(temp_resources.io_head),
+							       &hold_IO_node, 0x1000);
+
+			/* Check if we were able to split something off */
+			if (io_node) {
+				hold_IO_node->base = io_node->base + io_node->length;
+
+				temp_byte = (hold_IO_node->base) >> 8;
+				rc = pci_bus_write_config_word(pci_bus, devfn, PCI_IO_BASE, temp_byte);
+
+				return_resource(&(resources->io_head), io_node);
+			}
+
+			io_node = do_bridge_resource_split(&(temp_resources.io_head), 0x1000);
+
+			/* Check if we were able to split something off */
+			if (io_node) {
+				/* First use the temporary node to store
+				 * information for the board */
+				hold_IO_node->length = io_node->base - hold_IO_node->base;
+
+				/* If we used any, add it to the board's list */
+				if (hold_IO_node->length) {
+					hold_IO_node->next = func->io_head;
+					func->io_head = hold_IO_node;
+
+					temp_byte = (io_node->base - 1) >> 8;
+					rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_IO_LIMIT, temp_byte);
+
+					return_resource(&(resources->io_head), io_node);
+				} else {
+					/* it doesn't need any IO */
+					temp_word = 0x0000;
+					rc = pci_bus_write_config_word(pci_bus, devfn, PCI_IO_LIMIT, temp_word);
+
+					return_resource(&(resources->io_head), io_node);
+					kfree(hold_IO_node);
+				}
+			} else {
+				/* it used most of the range */
+				hold_IO_node->next = func->io_head;
+				func->io_head = hold_IO_node;
+			}
+		} else if (hold_IO_node) {
+			/* it used the whole range */
+			hold_IO_node->next = func->io_head;
+			func->io_head = hold_IO_node;
+		}
+		/* If we have memory space available and there is some left,
+		 * return the unused portion */
+		if (hold_mem_node && temp_resources.mem_head) {
+			mem_node = do_pre_bridge_resource_split(&(temp_resources.  mem_head),
+								&hold_mem_node, 0x100000);
+
+			/* Check if we were able to split something off */
+			if (mem_node) {
+				hold_mem_node->base = mem_node->base + mem_node->length;
+
+				temp_word = (hold_mem_node->base) >> 16;
+				rc = pci_bus_write_config_word(pci_bus, devfn, PCI_MEMORY_BASE, temp_word);
+
+				return_resource(&(resources->mem_head), mem_node);
+			}
+
+			mem_node = do_bridge_resource_split(&(temp_resources.mem_head), 0x100000);
+
+			/* Check if we were able to split something off */
+			if (mem_node) {
+				/* First use the temporary node to store
+				 * information for the board */
+				hold_mem_node->length = mem_node->base - hold_mem_node->base;
+
+				if (hold_mem_node->length) {
+					hold_mem_node->next = func->mem_head;
+					func->mem_head = hold_mem_node;
+
+					/* configure end address */
+					temp_word = (mem_node->base - 1) >> 16;
+					rc = pci_bus_write_config_word(pci_bus, devfn, PCI_MEMORY_LIMIT, temp_word);
+
+					/* Return unused resources to the pool */
+					return_resource(&(resources->mem_head), mem_node);
+				} else {
+					/* it doesn't need any Mem */
+					temp_word = 0x0000;
+					rc = pci_bus_write_config_word(pci_bus, devfn, PCI_MEMORY_LIMIT, temp_word);
+
+					return_resource(&(resources->mem_head), mem_node);
+					kfree(hold_mem_node);
+				}
+			} else {
+				/* it used most of the range */
+				hold_mem_node->next = func->mem_head;
+				func->mem_head = hold_mem_node;
+			}
+		} else if (hold_mem_node) {
+			/* it used the whole range */
+			hold_mem_node->next = func->mem_head;
+			func->mem_head = hold_mem_node;
+		}
+		/* If we have prefetchable memory space available and there
+		 * is some left at the end, return the unused portion */
+		if (temp_resources.p_mem_head) {
+			p_mem_node = do_pre_bridge_resource_split(&(temp_resources.p_mem_head),
+								  &hold_p_mem_node, 0x100000);
+
+			/* Check if we were able to split something off */
+			if (p_mem_node) {
+				hold_p_mem_node->base = p_mem_node->base + p_mem_node->length;
+
+				temp_word = (hold_p_mem_node->base) >> 16;
+				rc = pci_bus_write_config_word(pci_bus, devfn, PCI_PREF_MEMORY_BASE, temp_word);
+
+				return_resource(&(resources->p_mem_head), p_mem_node);
+			}
+
+			p_mem_node = do_bridge_resource_split(&(temp_resources.p_mem_head), 0x100000);
+
+			/* Check if we were able to split something off */
+			if (p_mem_node) {
+				/* First use the temporary node to store
+				 * information for the board */
+				hold_p_mem_node->length = p_mem_node->base - hold_p_mem_node->base;
+
+				/* If we used any, add it to the board's list */
+				if (hold_p_mem_node->length) {
+					hold_p_mem_node->next = func->p_mem_head;
+					func->p_mem_head = hold_p_mem_node;
+
+					temp_word = (p_mem_node->base - 1) >> 16;
+					rc = pci_bus_write_config_word(pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, temp_word);
+
+					return_resource(&(resources->p_mem_head), p_mem_node);
+				} else {
+					/* it doesn't need any PMem */
+					temp_word = 0x0000;
+					rc = pci_bus_write_config_word(pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, temp_word);
+
+					return_resource(&(resources->p_mem_head), p_mem_node);
+					kfree(hold_p_mem_node);
+				}
+			} else {
+				/* it used the most of the range */
+				hold_p_mem_node->next = func->p_mem_head;
+				func->p_mem_head = hold_p_mem_node;
+			}
+		} else if (hold_p_mem_node) {
+			/* it used the whole range */
+			hold_p_mem_node->next = func->p_mem_head;
+			func->p_mem_head = hold_p_mem_node;
+		}
+		/* We should be configuring an IRQ and the bridge's base address
+		 * registers if it needs them.  Although we have never seen such
+		 * a device */
+
+		/* enable card */
+		command = 0x0157;	/* = PCI_COMMAND_IO |
+					 *   PCI_COMMAND_MEMORY |
+					 *   PCI_COMMAND_MASTER |
+					 *   PCI_COMMAND_INVALIDATE |
+					 *   PCI_COMMAND_PARITY |
+					 *   PCI_COMMAND_SERR */
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_COMMAND, command);
+
+		/* set Bridge Control Register */
+		command = 0x07;		/* = PCI_BRIDGE_CTL_PARITY |
+					 *   PCI_BRIDGE_CTL_SERR |
+					 *   PCI_BRIDGE_CTL_NO_ISA */
+		rc = pci_bus_write_config_word(pci_bus, devfn, PCI_BRIDGE_CONTROL, command);
+	} else if ((temp_byte & 0x7F) == PCI_HEADER_TYPE_NORMAL) {
+		/* Standard device */
+		rc = pci_bus_read_config_byte(pci_bus, devfn, 0x0B, &class_code);
+
+		if (class_code == PCI_BASE_CLASS_DISPLAY) {
+			/* Display (video) adapter (not supported) */
+			return DEVICE_TYPE_NOT_SUPPORTED;
+		}
+		/* Figure out IO and memory needs */
+		for (cloop = 0x10; cloop <= 0x24; cloop += 4) {
+			temp_register = 0xFFFFFFFF;
+
+			dbg("CND: bus=%d, devfn=%d, offset=%d\n", pci_bus->number, devfn, cloop);
+			rc = pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+
+			rc = pci_bus_read_config_dword(pci_bus, devfn, cloop, &temp_register);
+			dbg("CND: base = 0x%x\n", temp_register);
+
+			if (temp_register) {	  /* If this register is implemented */
+				if ((temp_register & 0x03L) == 0x01) {
+					/* Map IO */
+
+					/* set base = amount of IO space */
+					base = temp_register & 0xFFFFFFFC;
+					base = ~base + 1;
+
+					dbg("CND:      length = 0x%x\n", base);
+					io_node = get_io_resource(&(resources->io_head), base);
+					if (!io_node)
+						return -ENOMEM;
+					dbg("Got io_node start = %8.8x, length = %8.8x next (%p)\n",
+					    io_node->base, io_node->length, io_node->next);
+					dbg("func (%p) io_head (%p)\n", func, func->io_head);
+
+					/* allocate the resource to the board */
+					base = io_node->base;
+					io_node->next = func->io_head;
+					func->io_head = io_node;
+				} else if ((temp_register & 0x0BL) == 0x08) {
+					/* Map prefetchable memory */
+					base = temp_register & 0xFFFFFFF0;
+					base = ~base + 1;
+
+					dbg("CND:      length = 0x%x\n", base);
+					p_mem_node = get_resource(&(resources->p_mem_head), base);
+
+					/* allocate the resource to the board */
+					if (p_mem_node) {
+						base = p_mem_node->base;
+
+						p_mem_node->next = func->p_mem_head;
+						func->p_mem_head = p_mem_node;
+					} else
+						return -ENOMEM;
+				} else if ((temp_register & 0x0BL) == 0x00) {
+					/* Map memory */
+					base = temp_register & 0xFFFFFFF0;
+					base = ~base + 1;
+
+					dbg("CND:      length = 0x%x\n", base);
+					mem_node = get_resource(&(resources->mem_head), base);
+
+					/* allocate the resource to the board */
+					if (mem_node) {
+						base = mem_node->base;
+
+						mem_node->next = func->mem_head;
+						func->mem_head = mem_node;
+					} else
+						return -ENOMEM;
+				} else {
+					/* Reserved bits or requesting space below 1M */
+					return NOT_ENOUGH_RESOURCES;
+				}
+
+				rc = pci_bus_write_config_dword(pci_bus, devfn, cloop, base);
+
+				/* Check for 64-bit base */
+				if ((temp_register & 0x07L) == 0x04) {
+					cloop += 4;
+
+					/* Upper 32 bits of address always zero
+					 * on today's systems */
+					/* FIXME this is probably not true on
+					 * Alpha and ia64??? */
+					base = 0;
+					rc = pci_bus_write_config_dword(pci_bus, devfn, cloop, base);
+				}
+			}
+		}		/* End of base register loop */
+		if (cpqhp_legacy_mode) {
+			/* Figure out which interrupt pin this function uses */
+			rc = pci_bus_read_config_byte(pci_bus, devfn,
+				PCI_INTERRUPT_PIN, &temp_byte);
+
+			/* If this function needs an interrupt and we are behind
+			 * a bridge and the pin is tied to something that's
+			 * already mapped, set this one the same */
+			if (temp_byte && resources->irqs &&
+			    (resources->irqs->valid_INT &
+			     (0x01 << ((temp_byte + resources->irqs->barber_pole - 1) & 0x03)))) {
+				/* We have to share with something already set up */
+				IRQ = resources->irqs->interrupt[(temp_byte +
+					resources->irqs->barber_pole - 1) & 0x03];
+			} else {
+				/* Program IRQ based on card type */
+				rc = pci_bus_read_config_byte(pci_bus, devfn, 0x0B, &class_code);
+
+				if (class_code == PCI_BASE_CLASS_STORAGE)
+					IRQ = cpqhp_disk_irq;
+				else
+					IRQ = cpqhp_nic_irq;
+			}
+
+			/* IRQ Line */
+			rc = pci_bus_write_config_byte(pci_bus, devfn, PCI_INTERRUPT_LINE, IRQ);
+		}
+
+		if (!behind_bridge) {
+			rc = cpqhp_set_irq(func->bus, func->device, temp_byte, IRQ);
+			if (rc)
+				return 1;
+		} else {
+			/* TBD - this code may also belong in the other clause
+			 * of this If statement */
+			resources->irqs->interrupt[(temp_byte + resources->irqs->barber_pole - 1) & 0x03] = IRQ;
+			resources->irqs->valid_INT |= 0x01 << (temp_byte + resources->irqs->barber_pole - 1) & 0x03;
+		}
+
+		/* Latency Timer */
+		temp_byte = 0x40;
+		rc = pci_bus_write_config_byte(pci_bus, devfn,
+					PCI_LATENCY_TIMER, temp_byte);
+
+		/* Cache Line size */
+		temp_byte = 0x08;
+		rc = pci_bus_write_config_byte(pci_bus, devfn,
+					PCI_CACHE_LINE_SIZE, temp_byte);
+
+		/* disable ROM base Address */
+		temp_dword = 0x00L;
+		rc = pci_bus_write_config_word(pci_bus, devfn,
+					PCI_ROM_ADDRESS, temp_dword);
+
+		/* enable card */
+		temp_word = 0x0157;	/* = PCI_COMMAND_IO |
+					 *   PCI_COMMAND_MEMORY |
+					 *   PCI_COMMAND_MASTER |
+					 *   PCI_COMMAND_INVALIDATE |
+					 *   PCI_COMMAND_PARITY |
+					 *   PCI_COMMAND_SERR */
+		rc = pci_bus_write_config_word(pci_bus, devfn,
+					PCI_COMMAND, temp_word);
+	} else {		/* End of Not-A-Bridge else */
+		/* It's some strange type of PCI adapter (Cardbus?) */
+		return DEVICE_TYPE_NOT_SUPPORTED;
+	}
+
+	func->configured = 1;
+
+	return 0;
+free_and_out:
+	cpqhp_destroy_resource_list(&temp_resources);
+
+	return_resource(&(resources->bus_head), hold_bus_node);
+	return_resource(&(resources->io_head), hold_IO_node);
+	return_resource(&(resources->mem_head), hold_mem_node);
+	return_resource(&(resources->p_mem_head), hold_p_mem_node);
+	return rc;
+}
diff --git a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c
new file mode 100644
index 000000000..7a65d427a
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_nvram.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/uaccess.h>
+#include "cpqphp.h"
+#include "cpqphp_nvram.h"
+
+
+#define ROM_INT15_PHY_ADDR		0x0FF859
+#define READ_EV				0xD8A4
+#define WRITE_EV			0xD8A5
+
+struct register_foo {
+	union {
+		unsigned long lword;		/* eax */
+		unsigned short word;		/* ax */
+
+		struct {
+			unsigned char low;	/* al */
+			unsigned char high;	/* ah */
+		} byte;
+	} data;
+
+	unsigned char opcode;	/* see below */
+	unsigned long length;	/* if the reg. is a pointer, how much data */
+} __attribute__ ((packed));
+
+struct all_reg {
+	struct register_foo eax_reg;
+	struct register_foo ebx_reg;
+	struct register_foo ecx_reg;
+	struct register_foo edx_reg;
+	struct register_foo edi_reg;
+	struct register_foo esi_reg;
+	struct register_foo eflags_reg;
+} __attribute__ ((packed));
+
+
+struct ev_hrt_header {
+	u8 Version;
+	u8 num_of_ctrl;
+	u8 next;
+};
+
+struct ev_hrt_ctrl {
+	u8 bus;
+	u8 device;
+	u8 function;
+	u8 mem_avail;
+	u8 p_mem_avail;
+	u8 io_avail;
+	u8 bus_avail;
+	u8 next;
+};
+
+
+static u8 evbuffer_init;
+static u8 evbuffer_length;
+static u8 evbuffer[1024];
+
+static void __iomem *compaq_int15_entry_point;
+
+/* lock for ordering int15_bios_call() */
+static DEFINE_SPINLOCK(int15_lock);
+
+
+/* This is a series of function that deals with
+ * setting & getting the hotplug resource table in some environment variable.
+ */
+
+/*
+ * We really shouldn't be doing this unless there is a _very_ good reason to!!!
+ * greg k-h
+ */
+
+
+static u32 add_byte(u32 **p_buffer, u8 value, u32 *used, u32 *avail)
+{
+	u8 **tByte;
+
+	if ((*used + 1) > *avail)
+		return(1);
+
+	*((u8 *)*p_buffer) = value;
+	tByte = (u8 **)p_buffer;
+	(*tByte)++;
+	*used += 1;
+	return(0);
+}
+
+
+static u32 add_dword(u32 **p_buffer, u32 value, u32 *used, u32 *avail)
+{
+	if ((*used + 4) > *avail)
+		return(1);
+
+	**p_buffer = value;
+	(*p_buffer)++;
+	*used += 4;
+	return(0);
+}
+
+
+/*
+ * check_for_compaq_ROM
+ *
+ * this routine verifies that the ROM OEM string is 'COMPAQ'
+ *
+ * returns 0 for non-Compaq ROM, 1 for Compaq ROM
+ */
+static int check_for_compaq_ROM(void __iomem *rom_start)
+{
+	u8 temp1, temp2, temp3, temp4, temp5, temp6;
+	int result = 0;
+
+	temp1 = readb(rom_start + 0xffea + 0);
+	temp2 = readb(rom_start + 0xffea + 1);
+	temp3 = readb(rom_start + 0xffea + 2);
+	temp4 = readb(rom_start + 0xffea + 3);
+	temp5 = readb(rom_start + 0xffea + 4);
+	temp6 = readb(rom_start + 0xffea + 5);
+	if ((temp1 == 'C') &&
+	    (temp2 == 'O') &&
+	    (temp3 == 'M') &&
+	    (temp4 == 'P') &&
+	    (temp5 == 'A') &&
+	    (temp6 == 'Q')) {
+		result = 1;
+	}
+	dbg("%s - returned %d\n", __func__, result);
+	return result;
+}
+
+
+static u32 access_EV(u16 operation, u8 *ev_name, u8 *buffer, u32 *buf_size)
+{
+	unsigned long flags;
+	int op = operation;
+	int ret_val;
+
+	if (!compaq_int15_entry_point)
+		return -ENODEV;
+
+	spin_lock_irqsave(&int15_lock, flags);
+	__asm__ (
+		"xorl   %%ebx,%%ebx\n" \
+		"xorl    %%edx,%%edx\n" \
+		"pushf\n" \
+		"push %%cs\n" \
+		"cli\n" \
+		"call *%6\n"
+		: "=c" (*buf_size), "=a" (ret_val)
+		: "a" (op), "c" (*buf_size), "S" (ev_name),
+		"D" (buffer), "m" (compaq_int15_entry_point)
+		: "%ebx", "%edx");
+	spin_unlock_irqrestore(&int15_lock, flags);
+
+	return((ret_val & 0xFF00) >> 8);
+}
+
+
+/*
+ * load_HRT
+ *
+ * Read the hot plug Resource Table from NVRAM
+ */
+static int load_HRT(void __iomem *rom_start)
+{
+	u32 available;
+	u32 temp_dword;
+	u8 temp_byte = 0xFF;
+	u32 rc;
+
+	if (!check_for_compaq_ROM(rom_start))
+		return -ENODEV;
+
+	available = 1024;
+
+	/* Now load the EV */
+	temp_dword = available;
+
+	rc = access_EV(READ_EV, "CQTHPS", evbuffer, &temp_dword);
+
+	evbuffer_length = temp_dword;
+
+	/* We're maintaining the resource lists so write FF to invalidate old
+	 * info
+	 */
+	temp_dword = 1;
+
+	rc = access_EV(WRITE_EV, "CQTHPS", &temp_byte, &temp_dword);
+
+	return rc;
+}
+
+
+/*
+ * store_HRT
+ *
+ * Save the hot plug Resource Table in NVRAM
+ */
+static u32 store_HRT(void __iomem *rom_start)
+{
+	u32 *buffer;
+	u32 *pFill;
+	u32 usedbytes;
+	u32 available;
+	u32 temp_dword;
+	u32 rc;
+	u8 loop;
+	u8 numCtrl = 0;
+	struct controller *ctrl;
+	struct pci_resource *resNode;
+	struct ev_hrt_header *p_EV_header;
+	struct ev_hrt_ctrl *p_ev_ctrl;
+
+	available = 1024;
+
+	if (!check_for_compaq_ROM(rom_start))
+		return(1);
+
+	buffer = (u32 *) evbuffer;
+
+	if (!buffer)
+		return(1);
+
+	pFill = buffer;
+	usedbytes = 0;
+
+	p_EV_header = (struct ev_hrt_header *) pFill;
+
+	ctrl = cpqhp_ctrl_list;
+
+	/* The revision of this structure */
+	rc = add_byte(&pFill, 1 + ctrl->push_flag, &usedbytes, &available);
+	if (rc)
+		return(rc);
+
+	/* The number of controllers */
+	rc = add_byte(&pFill, 1, &usedbytes, &available);
+	if (rc)
+		return(rc);
+
+	while (ctrl) {
+		p_ev_ctrl = (struct ev_hrt_ctrl *) pFill;
+
+		numCtrl++;
+
+		/* The bus number */
+		rc = add_byte(&pFill, ctrl->bus, &usedbytes, &available);
+		if (rc)
+			return(rc);
+
+		/* The device Number */
+		rc = add_byte(&pFill, PCI_SLOT(ctrl->pci_dev->devfn), &usedbytes, &available);
+		if (rc)
+			return(rc);
+
+		/* The function Number */
+		rc = add_byte(&pFill, PCI_FUNC(ctrl->pci_dev->devfn), &usedbytes, &available);
+		if (rc)
+			return(rc);
+
+		/* Skip the number of available entries */
+		rc = add_dword(&pFill, 0, &usedbytes, &available);
+		if (rc)
+			return(rc);
+
+		/* Figure out memory Available */
+
+		resNode = ctrl->mem_head;
+
+		loop = 0;
+
+		while (resNode) {
+			loop++;
+
+			/* base */
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			/* length */
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			resNode = resNode->next;
+		}
+
+		/* Fill in the number of entries */
+		p_ev_ctrl->mem_avail = loop;
+
+		/* Figure out prefetchable memory Available */
+
+		resNode = ctrl->p_mem_head;
+
+		loop = 0;
+
+		while (resNode) {
+			loop++;
+
+			/* base */
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			/* length */
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			resNode = resNode->next;
+		}
+
+		/* Fill in the number of entries */
+		p_ev_ctrl->p_mem_avail = loop;
+
+		/* Figure out IO Available */
+
+		resNode = ctrl->io_head;
+
+		loop = 0;
+
+		while (resNode) {
+			loop++;
+
+			/* base */
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			/* length */
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			resNode = resNode->next;
+		}
+
+		/* Fill in the number of entries */
+		p_ev_ctrl->io_avail = loop;
+
+		/* Figure out bus Available */
+
+		resNode = ctrl->bus_head;
+
+		loop = 0;
+
+		while (resNode) {
+			loop++;
+
+			/* base */
+			rc = add_dword(&pFill, resNode->base, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			/* length */
+			rc = add_dword(&pFill, resNode->length, &usedbytes, &available);
+			if (rc)
+				return(rc);
+
+			resNode = resNode->next;
+		}
+
+		/* Fill in the number of entries */
+		p_ev_ctrl->bus_avail = loop;
+
+		ctrl = ctrl->next;
+	}
+
+	p_EV_header->num_of_ctrl = numCtrl;
+
+	/* Now store the EV */
+
+	temp_dword = usedbytes;
+
+	rc = access_EV(WRITE_EV, "CQTHPS", (u8 *) buffer, &temp_dword);
+
+	dbg("usedbytes = 0x%x, length = 0x%x\n", usedbytes, temp_dword);
+
+	evbuffer_length = temp_dword;
+
+	if (rc) {
+		err(msg_unable_to_save);
+		return(1);
+	}
+
+	return(0);
+}
+
+
+void compaq_nvram_init(void __iomem *rom_start)
+{
+	if (rom_start)
+		compaq_int15_entry_point = (rom_start + ROM_INT15_PHY_ADDR - ROM_PHY_ADDR);
+
+	dbg("int15 entry  = %p\n", compaq_int15_entry_point);
+}
+
+
+int compaq_nvram_load(void __iomem *rom_start, struct controller *ctrl)
+{
+	u8 bus, device, function;
+	u8 nummem, numpmem, numio, numbus;
+	u32 rc;
+	u8 *p_byte;
+	struct pci_resource *mem_node;
+	struct pci_resource *p_mem_node;
+	struct pci_resource *io_node;
+	struct pci_resource *bus_node;
+	struct ev_hrt_ctrl *p_ev_ctrl;
+	struct ev_hrt_header *p_EV_header;
+
+	if (!evbuffer_init) {
+		/* Read the resource list information in from NVRAM */
+		if (load_HRT(rom_start))
+			memset(evbuffer, 0, 1024);
+
+		evbuffer_init = 1;
+	}
+
+	/* If we saved information in NVRAM, use it now */
+	p_EV_header = (struct ev_hrt_header *) evbuffer;
+
+	/* The following code is for systems where version 1.0 of this
+	 * driver has been loaded, but doesn't support the hardware.
+	 * In that case, the driver would incorrectly store something
+	 * in NVRAM.
+	 */
+	if ((p_EV_header->Version == 2) ||
+	    ((p_EV_header->Version == 1) && !ctrl->push_flag)) {
+		p_byte = &(p_EV_header->next);
+
+		p_ev_ctrl = (struct ev_hrt_ctrl *) &(p_EV_header->next);
+
+		p_byte += 3;
+
+		if (p_byte > ((u8 *)p_EV_header + evbuffer_length))
+			return 2;
+
+		bus = p_ev_ctrl->bus;
+		device = p_ev_ctrl->device;
+		function = p_ev_ctrl->function;
+
+		while ((bus != ctrl->bus) ||
+		       (device != PCI_SLOT(ctrl->pci_dev->devfn)) ||
+		       (function != PCI_FUNC(ctrl->pci_dev->devfn))) {
+			nummem = p_ev_ctrl->mem_avail;
+			numpmem = p_ev_ctrl->p_mem_avail;
+			numio = p_ev_ctrl->io_avail;
+			numbus = p_ev_ctrl->bus_avail;
+
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length))
+				return 2;
+
+			/* Skip forward to the next entry */
+			p_byte += (nummem + numpmem + numio + numbus) * 8;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length))
+				return 2;
+
+			p_ev_ctrl = (struct ev_hrt_ctrl *) p_byte;
+
+			p_byte += 3;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length))
+				return 2;
+
+			bus = p_ev_ctrl->bus;
+			device = p_ev_ctrl->device;
+			function = p_ev_ctrl->function;
+		}
+
+		nummem = p_ev_ctrl->mem_avail;
+		numpmem = p_ev_ctrl->p_mem_avail;
+		numio = p_ev_ctrl->io_avail;
+		numbus = p_ev_ctrl->bus_avail;
+
+		p_byte += 4;
+
+		if (p_byte > ((u8 *)p_EV_header + evbuffer_length))
+			return 2;
+
+		while (nummem--) {
+			mem_node = kmalloc(sizeof(struct pci_resource), GFP_KERNEL);
+
+			if (!mem_node)
+				break;
+
+			mem_node->base = *(u32 *)p_byte;
+			dbg("mem base = %8.8x\n", mem_node->base);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(mem_node);
+				return 2;
+			}
+
+			mem_node->length = *(u32 *)p_byte;
+			dbg("mem length = %8.8x\n", mem_node->length);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(mem_node);
+				return 2;
+			}
+
+			mem_node->next = ctrl->mem_head;
+			ctrl->mem_head = mem_node;
+		}
+
+		while (numpmem--) {
+			p_mem_node = kmalloc(sizeof(struct pci_resource), GFP_KERNEL);
+
+			if (!p_mem_node)
+				break;
+
+			p_mem_node->base = *(u32 *)p_byte;
+			dbg("pre-mem base = %8.8x\n", p_mem_node->base);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(p_mem_node);
+				return 2;
+			}
+
+			p_mem_node->length = *(u32 *)p_byte;
+			dbg("pre-mem length = %8.8x\n", p_mem_node->length);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(p_mem_node);
+				return 2;
+			}
+
+			p_mem_node->next = ctrl->p_mem_head;
+			ctrl->p_mem_head = p_mem_node;
+		}
+
+		while (numio--) {
+			io_node = kmalloc(sizeof(struct pci_resource), GFP_KERNEL);
+
+			if (!io_node)
+				break;
+
+			io_node->base = *(u32 *)p_byte;
+			dbg("io base = %8.8x\n", io_node->base);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(io_node);
+				return 2;
+			}
+
+			io_node->length = *(u32 *)p_byte;
+			dbg("io length = %8.8x\n", io_node->length);
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(io_node);
+				return 2;
+			}
+
+			io_node->next = ctrl->io_head;
+			ctrl->io_head = io_node;
+		}
+
+		while (numbus--) {
+			bus_node = kmalloc(sizeof(struct pci_resource), GFP_KERNEL);
+
+			if (!bus_node)
+				break;
+
+			bus_node->base = *(u32 *)p_byte;
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(bus_node);
+				return 2;
+			}
+
+			bus_node->length = *(u32 *)p_byte;
+			p_byte += 4;
+
+			if (p_byte > ((u8 *)p_EV_header + evbuffer_length)) {
+				kfree(bus_node);
+				return 2;
+			}
+
+			bus_node->next = ctrl->bus_head;
+			ctrl->bus_head = bus_node;
+		}
+
+		/* If all of the following fail, we don't have any resources for
+		 * hot plug add
+		 */
+		rc = 1;
+		rc &= cpqhp_resource_sort_and_combine(&(ctrl->mem_head));
+		rc &= cpqhp_resource_sort_and_combine(&(ctrl->p_mem_head));
+		rc &= cpqhp_resource_sort_and_combine(&(ctrl->io_head));
+		rc &= cpqhp_resource_sort_and_combine(&(ctrl->bus_head));
+
+		if (rc)
+			return(rc);
+	} else {
+		if ((evbuffer[0] != 0) && (!ctrl->push_flag))
+			return 1;
+	}
+
+	return 0;
+}
+
+
+int compaq_nvram_store(void __iomem *rom_start)
+{
+	int rc = 1;
+
+	if (rom_start == NULL)
+		return -ENODEV;
+
+	if (evbuffer_init) {
+		rc = store_HRT(rom_start);
+		if (rc)
+			err(msg_unable_to_save);
+	}
+	return rc;
+}
+
diff --git a/drivers/pci/hotplug/cpqphp_nvram.h b/drivers/pci/hotplug/cpqphp_nvram.h
new file mode 100644
index 000000000..70e879b6a
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_nvram.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#ifndef _CPQPHP_NVRAM_H
+#define _CPQPHP_NVRAM_H
+
+#ifndef CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM
+
+static inline void compaq_nvram_init(void __iomem *rom_start) { }
+
+static inline int compaq_nvram_load(void __iomem *rom_start, struct controller *ctrl)
+{
+	return 0;
+}
+
+static inline int compaq_nvram_store(void __iomem *rom_start)
+{
+	return 0;
+}
+
+#else
+
+void compaq_nvram_init(void __iomem *rom_start);
+int compaq_nvram_load(void __iomem *rom_start, struct controller *ctrl);
+int compaq_nvram_store(void __iomem *rom_start);
+
+#endif
+
+#endif
+
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
new file mode 100644
index 000000000..3b248426a
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -0,0 +1,1562 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include "../pci.h"
+#include "cpqphp.h"
+#include "cpqphp_nvram.h"
+
+
+u8 cpqhp_nic_irq;
+u8 cpqhp_disk_irq;
+
+static u16 unused_IRQ;
+
+/*
+ * detect_HRT_floating_pointer
+ *
+ * find the Hot Plug Resource Table in the specified region of memory.
+ *
+ */
+static void __iomem *detect_HRT_floating_pointer(void __iomem *begin, void __iomem *end)
+{
+	void __iomem *fp;
+	void __iomem *endp;
+	u8 temp1, temp2, temp3, temp4;
+	int status = 0;
+
+	endp = (end - sizeof(struct hrt) + 1);
+
+	for (fp = begin; fp <= endp; fp += 16) {
+		temp1 = readb(fp + SIG0);
+		temp2 = readb(fp + SIG1);
+		temp3 = readb(fp + SIG2);
+		temp4 = readb(fp + SIG3);
+		if (temp1 == '$' &&
+		    temp2 == 'H' &&
+		    temp3 == 'R' &&
+		    temp4 == 'T') {
+			status = 1;
+			break;
+		}
+	}
+
+	if (!status)
+		fp = NULL;
+
+	dbg("Discovered Hotplug Resource Table at %p\n", fp);
+	return fp;
+}
+
+
+int cpqhp_configure_device(struct controller *ctrl, struct pci_func *func)
+{
+	struct pci_bus *child;
+	int num;
+
+	pci_lock_rescan_remove();
+
+	if (func->pci_dev == NULL)
+		func->pci_dev = pci_get_domain_bus_and_slot(0, func->bus,
+							PCI_DEVFN(func->device,
+							func->function));
+
+	/* No pci device, we need to create it then */
+	if (func->pci_dev == NULL) {
+		dbg("INFO: pci_dev still null\n");
+
+		num = pci_scan_slot(ctrl->pci_dev->bus, PCI_DEVFN(func->device, func->function));
+		if (num)
+			pci_bus_add_devices(ctrl->pci_dev->bus);
+
+		func->pci_dev = pci_get_domain_bus_and_slot(0, func->bus,
+							PCI_DEVFN(func->device,
+							func->function));
+		if (func->pci_dev == NULL) {
+			dbg("ERROR: pci_dev still null\n");
+			goto out;
+		}
+	}
+
+	if (func->pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		pci_hp_add_bridge(func->pci_dev);
+		child = func->pci_dev->subordinate;
+		if (child)
+			pci_bus_add_devices(child);
+	}
+
+	pci_dev_put(func->pci_dev);
+
+ out:
+	pci_unlock_rescan_remove();
+	return 0;
+}
+
+
+int cpqhp_unconfigure_device(struct pci_func *func)
+{
+	int j;
+
+	dbg("%s: bus/dev/func = %x/%x/%x\n", __func__, func->bus, func->device, func->function);
+
+	pci_lock_rescan_remove();
+	for (j = 0; j < 8 ; j++) {
+		struct pci_dev *temp = pci_get_domain_bus_and_slot(0,
+							func->bus,
+							PCI_DEVFN(func->device,
+							j));
+		if (temp) {
+			pci_dev_put(temp);
+			pci_stop_and_remove_bus_device(temp);
+		}
+	}
+	pci_unlock_rescan_remove();
+	return 0;
+}
+
+static int PCI_RefinedAccessConfig(struct pci_bus *bus, unsigned int devfn, u8 offset, u32 *value)
+{
+	u32 vendID = 0;
+
+	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &vendID) == -1)
+		return -1;
+	if (vendID == 0xffffffff)
+		return -1;
+	return pci_bus_read_config_dword(bus, devfn, offset, value);
+}
+
+
+/*
+ * cpqhp_set_irq
+ *
+ * @bus_num: bus number of PCI device
+ * @dev_num: device number of PCI device
+ * @slot: pointer to u8 where slot number will be returned
+ */
+int cpqhp_set_irq(u8 bus_num, u8 dev_num, u8 int_pin, u8 irq_num)
+{
+	int rc = 0;
+
+	if (cpqhp_legacy_mode) {
+		struct pci_dev *fakedev;
+		struct pci_bus *fakebus;
+		u16 temp_word;
+
+		fakedev = kmalloc(sizeof(*fakedev), GFP_KERNEL);
+		fakebus = kmalloc(sizeof(*fakebus), GFP_KERNEL);
+		if (!fakedev || !fakebus) {
+			kfree(fakedev);
+			kfree(fakebus);
+			return -ENOMEM;
+		}
+
+		fakedev->devfn = dev_num << 3;
+		fakedev->bus = fakebus;
+		fakebus->number = bus_num;
+		dbg("%s: dev %d, bus %d, pin %d, num %d\n",
+		    __func__, dev_num, bus_num, int_pin, irq_num);
+		rc = pcibios_set_irq_routing(fakedev, int_pin - 1, irq_num);
+		kfree(fakedev);
+		kfree(fakebus);
+		dbg("%s: rc %d\n", __func__, rc);
+		if (!rc)
+			return !rc;
+
+		/* set the Edge Level Control Register (ELCR) */
+		temp_word = inb(0x4d0);
+		temp_word |= inb(0x4d1) << 8;
+
+		temp_word |= 0x01 << irq_num;
+
+		/* This should only be for x86 as it sets the Edge Level
+		 * Control Register
+		 */
+		outb((u8)(temp_word & 0xFF), 0x4d0);
+		outb((u8)((temp_word & 0xFF00) >> 8), 0x4d1);
+		rc = 0;
+	}
+
+	return rc;
+}
+
+
+static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_num)
+{
+	u16 tdevice;
+	u32 work;
+	u8 tbus;
+
+	ctrl->pci_bus->number = bus_num;
+
+	for (tdevice = 0; tdevice < 0xFF; tdevice++) {
+		/* Scan for access first */
+		if (PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work) == -1)
+			continue;
+		dbg("Looking for nonbridge bus_num %d dev_num %d\n", bus_num, tdevice);
+		/* Yep we got one. Not a bridge ? */
+		if ((work >> 8) != PCI_TO_PCI_BRIDGE_CLASS) {
+			*dev_num = tdevice;
+			dbg("found it !\n");
+			return 0;
+		}
+	}
+	for (tdevice = 0; tdevice < 0xFF; tdevice++) {
+		/* Scan for access first */
+		if (PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work) == -1)
+			continue;
+		dbg("Looking for bridge bus_num %d dev_num %d\n", bus_num, tdevice);
+		/* Yep we got one. bridge ? */
+		if ((work >> 8) == PCI_TO_PCI_BRIDGE_CLASS) {
+			pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(tdevice, 0), PCI_SECONDARY_BUS, &tbus);
+			/* XXX: no recursion, wtf? */
+			dbg("Recurse on bus_num %d tdevice %d\n", tbus, tdevice);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+
+static int PCI_GetBusDevHelper(struct controller *ctrl, u8 *bus_num, u8 *dev_num, u8 slot, u8 nobridge)
+{
+	int loop, len;
+	u32 work;
+	u8 tbus, tdevice, tslot;
+
+	len = cpqhp_routing_table_length();
+	for (loop = 0; loop < len; ++loop) {
+		tbus = cpqhp_routing_table->slots[loop].bus;
+		tdevice = cpqhp_routing_table->slots[loop].devfn;
+		tslot = cpqhp_routing_table->slots[loop].slot;
+
+		if (tslot == slot) {
+			*bus_num = tbus;
+			*dev_num = tdevice;
+			ctrl->pci_bus->number = tbus;
+			pci_bus_read_config_dword(ctrl->pci_bus, *dev_num, PCI_VENDOR_ID, &work);
+			if (!nobridge || (work == 0xffffffff))
+				return 0;
+
+			dbg("bus_num %d devfn %d\n", *bus_num, *dev_num);
+			pci_bus_read_config_dword(ctrl->pci_bus, *dev_num, PCI_CLASS_REVISION, &work);
+			dbg("work >> 8 (%x) = BRIDGE (%x)\n", work >> 8, PCI_TO_PCI_BRIDGE_CLASS);
+
+			if ((work >> 8) == PCI_TO_PCI_BRIDGE_CLASS) {
+				pci_bus_read_config_byte(ctrl->pci_bus, *dev_num, PCI_SECONDARY_BUS, &tbus);
+				dbg("Scan bus for Non Bridge: bus %d\n", tbus);
+				if (PCI_ScanBusForNonBridge(ctrl, tbus, dev_num) == 0) {
+					*bus_num = tbus;
+					return 0;
+				}
+			} else
+				return 0;
+		}
+	}
+	return -1;
+}
+
+
+int cpqhp_get_bus_dev(struct controller *ctrl, u8 *bus_num, u8 *dev_num, u8 slot)
+{
+	/* plain (bridges allowed) */
+	return PCI_GetBusDevHelper(ctrl, bus_num, dev_num, slot, 0);
+}
+
+
+/* More PCI configuration routines; this time centered around hotplug
+ * controller
+ */
+
+
+/*
+ * cpqhp_save_config
+ *
+ * Reads configuration for all slots in a PCI bus and saves info.
+ *
+ * Note:  For non-hot plug buses, the slot # saved is the device #
+ *
+ * returns 0 if success
+ */
+int cpqhp_save_config(struct controller *ctrl, int busnumber, int is_hot_plug)
+{
+	long rc;
+	u8 class_code;
+	u8 header_type;
+	u32 ID;
+	u8 secondary_bus;
+	struct pci_func *new_slot;
+	int sub_bus;
+	int FirstSupported;
+	int LastSupported;
+	int max_functions;
+	int function;
+	u8 DevError;
+	int device = 0;
+	int cloop = 0;
+	int stop_it;
+	int index;
+	u16 devfn;
+
+	/* Decide which slots are supported */
+
+	if (is_hot_plug) {
+		/*
+		 * is_hot_plug is the slot mask
+		 */
+		FirstSupported = is_hot_plug >> 4;
+		LastSupported = FirstSupported + (is_hot_plug & 0x0F) - 1;
+	} else {
+		FirstSupported = 0;
+		LastSupported = 0x1F;
+	}
+
+	/* Save PCI configuration space for all devices in supported slots */
+	ctrl->pci_bus->number = busnumber;
+	for (device = FirstSupported; device <= LastSupported; device++) {
+		ID = 0xFFFFFFFF;
+		rc = pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(device, 0), PCI_VENDOR_ID, &ID);
+
+		if (ID == 0xFFFFFFFF) {
+			if (is_hot_plug) {
+				/* Setup slot structure with entry for empty
+				 * slot
+				 */
+				new_slot = cpqhp_slot_create(busnumber);
+				if (new_slot == NULL)
+					return 1;
+
+				new_slot->bus = (u8) busnumber;
+				new_slot->device = (u8) device;
+				new_slot->function = 0;
+				new_slot->is_a_board = 0;
+				new_slot->presence_save = 0;
+				new_slot->switch_save = 0;
+			}
+			continue;
+		}
+
+		rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(device, 0), 0x0B, &class_code);
+		if (rc)
+			return rc;
+
+		rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(device, 0), PCI_HEADER_TYPE, &header_type);
+		if (rc)
+			return rc;
+
+		/* If multi-function device, set max_functions to 8 */
+		if (header_type & 0x80)
+			max_functions = 8;
+		else
+			max_functions = 1;
+
+		function = 0;
+
+		do {
+			DevError = 0;
+			if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+				/* Recurse the subordinate bus
+				 * get the subordinate bus number
+				 */
+				rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(device, function), PCI_SECONDARY_BUS, &secondary_bus);
+				if (rc) {
+					return rc;
+				} else {
+					sub_bus = (int) secondary_bus;
+
+					/* Save secondary bus cfg spc
+					 * with this recursive call.
+					 */
+					rc = cpqhp_save_config(ctrl, sub_bus, 0);
+					if (rc)
+						return rc;
+					ctrl->pci_bus->number = busnumber;
+				}
+			}
+
+			index = 0;
+			new_slot = cpqhp_slot_find(busnumber, device, index++);
+			while (new_slot &&
+			       (new_slot->function != (u8) function))
+				new_slot = cpqhp_slot_find(busnumber, device, index++);
+
+			if (!new_slot) {
+				/* Setup slot structure. */
+				new_slot = cpqhp_slot_create(busnumber);
+				if (new_slot == NULL)
+					return 1;
+			}
+
+			new_slot->bus = (u8) busnumber;
+			new_slot->device = (u8) device;
+			new_slot->function = (u8) function;
+			new_slot->is_a_board = 1;
+			new_slot->switch_save = 0x10;
+			/* In case of unsupported board */
+			new_slot->status = DevError;
+			devfn = (new_slot->device << 3) | new_slot->function;
+			new_slot->pci_dev = pci_get_domain_bus_and_slot(0,
+							new_slot->bus, devfn);
+
+			for (cloop = 0; cloop < 0x20; cloop++) {
+				rc = pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(device, function), cloop << 2, (u32 *) &(new_slot->config_space[cloop]));
+				if (rc)
+					return rc;
+			}
+
+			pci_dev_put(new_slot->pci_dev);
+
+			function++;
+
+			stop_it = 0;
+
+			/* this loop skips to the next present function
+			 * reading in Class Code and Header type.
+			 */
+			while ((function < max_functions) && (!stop_it)) {
+				rc = pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(device, function), PCI_VENDOR_ID, &ID);
+				if (ID == 0xFFFFFFFF) {
+					function++;
+					continue;
+				}
+				rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(device, function), 0x0B, &class_code);
+				if (rc)
+					return rc;
+
+				rc = pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(device, function), PCI_HEADER_TYPE, &header_type);
+				if (rc)
+					return rc;
+
+				stop_it++;
+			}
+
+		} while (function < max_functions);
+	}			/* End of FOR loop */
+
+	return 0;
+}
+
+
+/*
+ * cpqhp_save_slot_config
+ *
+ * Saves configuration info for all PCI devices in a given slot
+ * including subordinate buses.
+ *
+ * returns 0 if success
+ */
+int cpqhp_save_slot_config(struct controller *ctrl, struct pci_func *new_slot)
+{
+	long rc;
+	u8 class_code;
+	u8 header_type;
+	u32 ID;
+	u8 secondary_bus;
+	int sub_bus;
+	int max_functions;
+	int function = 0;
+	int cloop;
+	int stop_it;
+
+	ID = 0xFFFFFFFF;
+
+	ctrl->pci_bus->number = new_slot->bus;
+	pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(new_slot->device, 0), PCI_VENDOR_ID, &ID);
+
+	if (ID == 0xFFFFFFFF)
+		return 2;
+
+	pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(new_slot->device, 0), 0x0B, &class_code);
+	pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(new_slot->device, 0), PCI_HEADER_TYPE, &header_type);
+
+	if (header_type & 0x80)	/* Multi-function device */
+		max_functions = 8;
+	else
+		max_functions = 1;
+
+	while (function < max_functions) {
+		if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+			/*  Recurse the subordinate bus */
+			pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(new_slot->device, function), PCI_SECONDARY_BUS, &secondary_bus);
+
+			sub_bus = (int) secondary_bus;
+
+			/* Save the config headers for the secondary
+			 * bus.
+			 */
+			rc = cpqhp_save_config(ctrl, sub_bus, 0);
+			if (rc)
+				return(rc);
+			ctrl->pci_bus->number = new_slot->bus;
+
+		}
+
+		new_slot->status = 0;
+
+		for (cloop = 0; cloop < 0x20; cloop++)
+			pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(new_slot->device, function), cloop << 2, (u32 *) &(new_slot->config_space[cloop]));
+
+		function++;
+
+		stop_it = 0;
+
+		/* this loop skips to the next present function
+		 * reading in the Class Code and the Header type.
+		 */
+		while ((function < max_functions) && (!stop_it)) {
+			pci_bus_read_config_dword(ctrl->pci_bus, PCI_DEVFN(new_slot->device, function), PCI_VENDOR_ID, &ID);
+
+			if (ID == 0xFFFFFFFF)
+				function++;
+			else {
+				pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(new_slot->device, function), 0x0B, &class_code);
+				pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(new_slot->device, function), PCI_HEADER_TYPE, &header_type);
+				stop_it++;
+			}
+		}
+
+	}
+
+	return 0;
+}
+
+
+/*
+ * cpqhp_save_base_addr_length
+ *
+ * Saves the length of all base address registers for the
+ * specified slot.  this is for hot plug REPLACE
+ *
+ * returns 0 if success
+ */
+int cpqhp_save_base_addr_length(struct controller *ctrl, struct pci_func *func)
+{
+	u8 cloop;
+	u8 header_type;
+	u8 secondary_bus;
+	u8 type;
+	int sub_bus;
+	u32 temp_register;
+	u32 base;
+	u32 rc;
+	struct pci_func *next;
+	int index = 0;
+	struct pci_bus *pci_bus = ctrl->pci_bus;
+	unsigned int devfn;
+
+	func = cpqhp_slot_find(func->bus, func->device, index++);
+
+	while (func != NULL) {
+		pci_bus->number = func->bus;
+		devfn = PCI_DEVFN(func->device, func->function);
+
+		/* Check for Bridge */
+		pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &header_type);
+
+		if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_SECONDARY_BUS, &secondary_bus);
+
+			sub_bus = (int) secondary_bus;
+
+			next = cpqhp_slot_list[sub_bus];
+
+			while (next != NULL) {
+				rc = cpqhp_save_base_addr_length(ctrl, next);
+				if (rc)
+					return rc;
+
+				next = next->next;
+			}
+			pci_bus->number = func->bus;
+
+			/* FIXME: this loop is duplicated in the non-bridge
+			 * case.  The two could be rolled together Figure out
+			 * IO and memory base lengths
+			 */
+			for (cloop = 0x10; cloop <= 0x14; cloop += 4) {
+				temp_register = 0xFFFFFFFF;
+				pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &base);
+				/* If this register is implemented */
+				if (base) {
+					if (base & 0x01L) {
+						/* IO base
+						 * set base = amount of IO space
+						 * requested
+						 */
+						base = base & 0xFFFFFFFE;
+						base = (~base) + 1;
+
+						type = 1;
+					} else {
+						/* memory base */
+						base = base & 0xFFFFFFF0;
+						base = (~base) + 1;
+
+						type = 0;
+					}
+				} else {
+					base = 0x0L;
+					type = 0;
+				}
+
+				/* Save information in slot structure */
+				func->base_length[(cloop - 0x10) >> 2] =
+				base;
+				func->base_type[(cloop - 0x10) >> 2] = type;
+
+			}	/* End of base register loop */
+
+		} else if ((header_type & 0x7F) == 0x00) {
+			/* Figure out IO and memory base lengths */
+			for (cloop = 0x10; cloop <= 0x24; cloop += 4) {
+				temp_register = 0xFFFFFFFF;
+				pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &base);
+
+				/* If this register is implemented */
+				if (base) {
+					if (base & 0x01L) {
+						/* IO base
+						 * base = amount of IO space
+						 * requested
+						 */
+						base = base & 0xFFFFFFFE;
+						base = (~base) + 1;
+
+						type = 1;
+					} else {
+						/* memory base
+						 * base = amount of memory
+						 * space requested
+						 */
+						base = base & 0xFFFFFFF0;
+						base = (~base) + 1;
+
+						type = 0;
+					}
+				} else {
+					base = 0x0L;
+					type = 0;
+				}
+
+				/* Save information in slot structure */
+				func->base_length[(cloop - 0x10) >> 2] = base;
+				func->base_type[(cloop - 0x10) >> 2] = type;
+
+			}	/* End of base register loop */
+
+		} else {	  /* Some other unknown header type */
+		}
+
+		/* find the next device in this slot */
+		func = cpqhp_slot_find(func->bus, func->device, index++);
+	}
+
+	return(0);
+}
+
+
+/*
+ * cpqhp_save_used_resources
+ *
+ * Stores used resource information for existing boards.  this is
+ * for boards that were in the system when this driver was loaded.
+ * this function is for hot plug ADD
+ *
+ * returns 0 if success
+ */
+int cpqhp_save_used_resources(struct controller *ctrl, struct pci_func *func)
+{
+	u8 cloop;
+	u8 header_type;
+	u8 secondary_bus;
+	u8 temp_byte;
+	u8 b_base;
+	u8 b_length;
+	u16 command;
+	u16 save_command;
+	u16 w_base;
+	u16 w_length;
+	u32 temp_register;
+	u32 save_base;
+	u32 base;
+	int index = 0;
+	struct pci_resource *mem_node;
+	struct pci_resource *p_mem_node;
+	struct pci_resource *io_node;
+	struct pci_resource *bus_node;
+	struct pci_bus *pci_bus = ctrl->pci_bus;
+	unsigned int devfn;
+
+	func = cpqhp_slot_find(func->bus, func->device, index++);
+
+	while ((func != NULL) && func->is_a_board) {
+		pci_bus->number = func->bus;
+		devfn = PCI_DEVFN(func->device, func->function);
+
+		/* Save the command register */
+		pci_bus_read_config_word(pci_bus, devfn, PCI_COMMAND, &save_command);
+
+		/* disable card */
+		command = 0x00;
+		pci_bus_write_config_word(pci_bus, devfn, PCI_COMMAND, command);
+
+		/* Check for Bridge */
+		pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &header_type);
+
+		if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+			/* Clear Bridge Control Register */
+			command = 0x00;
+			pci_bus_write_config_word(pci_bus, devfn, PCI_BRIDGE_CONTROL, command);
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_SECONDARY_BUS, &secondary_bus);
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_SUBORDINATE_BUS, &temp_byte);
+
+			bus_node = kmalloc(sizeof(*bus_node), GFP_KERNEL);
+			if (!bus_node)
+				return -ENOMEM;
+
+			bus_node->base = secondary_bus;
+			bus_node->length = temp_byte - secondary_bus + 1;
+
+			bus_node->next = func->bus_head;
+			func->bus_head = bus_node;
+
+			/* Save IO base and Limit registers */
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_IO_BASE, &b_base);
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_IO_LIMIT, &b_length);
+
+			if ((b_base <= b_length) && (save_command & 0x01)) {
+				io_node = kmalloc(sizeof(*io_node), GFP_KERNEL);
+				if (!io_node)
+					return -ENOMEM;
+
+				io_node->base = (b_base & 0xF0) << 8;
+				io_node->length = (b_length - b_base + 0x10) << 8;
+
+				io_node->next = func->io_head;
+				func->io_head = io_node;
+			}
+
+			/* Save memory base and Limit registers */
+			pci_bus_read_config_word(pci_bus, devfn, PCI_MEMORY_BASE, &w_base);
+			pci_bus_read_config_word(pci_bus, devfn, PCI_MEMORY_LIMIT, &w_length);
+
+			if ((w_base <= w_length) && (save_command & 0x02)) {
+				mem_node = kmalloc(sizeof(*mem_node), GFP_KERNEL);
+				if (!mem_node)
+					return -ENOMEM;
+
+				mem_node->base = w_base << 16;
+				mem_node->length = (w_length - w_base + 0x10) << 16;
+
+				mem_node->next = func->mem_head;
+				func->mem_head = mem_node;
+			}
+
+			/* Save prefetchable memory base and Limit registers */
+			pci_bus_read_config_word(pci_bus, devfn, PCI_PREF_MEMORY_BASE, &w_base);
+			pci_bus_read_config_word(pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, &w_length);
+
+			if ((w_base <= w_length) && (save_command & 0x02)) {
+				p_mem_node = kmalloc(sizeof(*p_mem_node), GFP_KERNEL);
+				if (!p_mem_node)
+					return -ENOMEM;
+
+				p_mem_node->base = w_base << 16;
+				p_mem_node->length = (w_length - w_base + 0x10) << 16;
+
+				p_mem_node->next = func->p_mem_head;
+				func->p_mem_head = p_mem_node;
+			}
+			/* Figure out IO and memory base lengths */
+			for (cloop = 0x10; cloop <= 0x14; cloop += 4) {
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &save_base);
+
+				temp_register = 0xFFFFFFFF;
+				pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &base);
+
+				temp_register = base;
+
+				/* If this register is implemented */
+				if (base) {
+					if (((base & 0x03L) == 0x01)
+					    && (save_command & 0x01)) {
+						/* IO base
+						 * set temp_register = amount
+						 * of IO space requested
+						 */
+						temp_register = base & 0xFFFFFFFE;
+						temp_register = (~temp_register) + 1;
+
+						io_node = kmalloc(sizeof(*io_node),
+								GFP_KERNEL);
+						if (!io_node)
+							return -ENOMEM;
+
+						io_node->base =
+						save_base & (~0x03L);
+						io_node->length = temp_register;
+
+						io_node->next = func->io_head;
+						func->io_head = io_node;
+					} else
+						if (((base & 0x0BL) == 0x08)
+						    && (save_command & 0x02)) {
+						/* prefetchable memory base */
+						temp_register = base & 0xFFFFFFF0;
+						temp_register = (~temp_register) + 1;
+
+						p_mem_node = kmalloc(sizeof(*p_mem_node),
+								GFP_KERNEL);
+						if (!p_mem_node)
+							return -ENOMEM;
+
+						p_mem_node->base = save_base & (~0x0FL);
+						p_mem_node->length = temp_register;
+
+						p_mem_node->next = func->p_mem_head;
+						func->p_mem_head = p_mem_node;
+					} else
+						if (((base & 0x0BL) == 0x00)
+						    && (save_command & 0x02)) {
+						/* prefetchable memory base */
+						temp_register = base & 0xFFFFFFF0;
+						temp_register = (~temp_register) + 1;
+
+						mem_node = kmalloc(sizeof(*mem_node),
+								GFP_KERNEL);
+						if (!mem_node)
+							return -ENOMEM;
+
+						mem_node->base = save_base & (~0x0FL);
+						mem_node->length = temp_register;
+
+						mem_node->next = func->mem_head;
+						func->mem_head = mem_node;
+					} else
+						return(1);
+				}
+			}	/* End of base register loop */
+		/* Standard header */
+		} else if ((header_type & 0x7F) == 0x00) {
+			/* Figure out IO and memory base lengths */
+			for (cloop = 0x10; cloop <= 0x24; cloop += 4) {
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &save_base);
+
+				temp_register = 0xFFFFFFFF;
+				pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &base);
+
+				temp_register = base;
+
+				/* If this register is implemented */
+				if (base) {
+					if (((base & 0x03L) == 0x01)
+					    && (save_command & 0x01)) {
+						/* IO base
+						 * set temp_register = amount
+						 * of IO space requested
+						 */
+						temp_register = base & 0xFFFFFFFE;
+						temp_register = (~temp_register) + 1;
+
+						io_node = kmalloc(sizeof(*io_node),
+								GFP_KERNEL);
+						if (!io_node)
+							return -ENOMEM;
+
+						io_node->base = save_base & (~0x01L);
+						io_node->length = temp_register;
+
+						io_node->next = func->io_head;
+						func->io_head = io_node;
+					} else
+						if (((base & 0x0BL) == 0x08)
+						    && (save_command & 0x02)) {
+						/* prefetchable memory base */
+						temp_register = base & 0xFFFFFFF0;
+						temp_register = (~temp_register) + 1;
+
+						p_mem_node = kmalloc(sizeof(*p_mem_node),
+								GFP_KERNEL);
+						if (!p_mem_node)
+							return -ENOMEM;
+
+						p_mem_node->base = save_base & (~0x0FL);
+						p_mem_node->length = temp_register;
+
+						p_mem_node->next = func->p_mem_head;
+						func->p_mem_head = p_mem_node;
+					} else
+						if (((base & 0x0BL) == 0x00)
+						    && (save_command & 0x02)) {
+						/* prefetchable memory base */
+						temp_register = base & 0xFFFFFFF0;
+						temp_register = (~temp_register) + 1;
+
+						mem_node = kmalloc(sizeof(*mem_node),
+								GFP_KERNEL);
+						if (!mem_node)
+							return -ENOMEM;
+
+						mem_node->base = save_base & (~0x0FL);
+						mem_node->length = temp_register;
+
+						mem_node->next = func->mem_head;
+						func->mem_head = mem_node;
+					} else
+						return(1);
+				}
+			}	/* End of base register loop */
+		}
+
+		/* find the next device in this slot */
+		func = cpqhp_slot_find(func->bus, func->device, index++);
+	}
+
+	return 0;
+}
+
+
+/*
+ * cpqhp_configure_board
+ *
+ * Copies saved configuration information to one slot.
+ * this is called recursively for bridge devices.
+ * this is for hot plug REPLACE!
+ *
+ * returns 0 if success
+ */
+int cpqhp_configure_board(struct controller *ctrl, struct pci_func *func)
+{
+	int cloop;
+	u8 header_type;
+	u8 secondary_bus;
+	int sub_bus;
+	struct pci_func *next;
+	u32 temp;
+	u32 rc;
+	int index = 0;
+	struct pci_bus *pci_bus = ctrl->pci_bus;
+	unsigned int devfn;
+
+	func = cpqhp_slot_find(func->bus, func->device, index++);
+
+	while (func != NULL) {
+		pci_bus->number = func->bus;
+		devfn = PCI_DEVFN(func->device, func->function);
+
+		/* Start at the top of config space so that the control
+		 * registers are programmed last
+		 */
+		for (cloop = 0x3C; cloop > 0; cloop -= 4)
+			pci_bus_write_config_dword(pci_bus, devfn, cloop, func->config_space[cloop >> 2]);
+
+		pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &header_type);
+
+		/* If this is a bridge device, restore subordinate devices */
+		if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+			pci_bus_read_config_byte(pci_bus, devfn, PCI_SECONDARY_BUS, &secondary_bus);
+
+			sub_bus = (int) secondary_bus;
+
+			next = cpqhp_slot_list[sub_bus];
+
+			while (next != NULL) {
+				rc = cpqhp_configure_board(ctrl, next);
+				if (rc)
+					return rc;
+
+				next = next->next;
+			}
+		} else {
+
+			/* Check all the base Address Registers to make sure
+			 * they are the same.  If not, the board is different.
+			 */
+
+			for (cloop = 16; cloop < 40; cloop += 4) {
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &temp);
+
+				if (temp != func->config_space[cloop >> 2]) {
+					dbg("Config space compare failure!!! offset = %x\n", cloop);
+					dbg("bus = %x, device = %x, function = %x\n", func->bus, func->device, func->function);
+					dbg("temp = %x, config space = %x\n\n", temp, func->config_space[cloop >> 2]);
+					return 1;
+				}
+			}
+		}
+
+		func->configured = 1;
+
+		func = cpqhp_slot_find(func->bus, func->device, index++);
+	}
+
+	return 0;
+}
+
+
+/*
+ * cpqhp_valid_replace
+ *
+ * this function checks to see if a board is the same as the
+ * one it is replacing.  this check will detect if the device's
+ * vendor or device id's are the same
+ *
+ * returns 0 if the board is the same nonzero otherwise
+ */
+int cpqhp_valid_replace(struct controller *ctrl, struct pci_func *func)
+{
+	u8 cloop;
+	u8 header_type;
+	u8 secondary_bus;
+	u8 type;
+	u32 temp_register = 0;
+	u32 base;
+	u32 rc;
+	struct pci_func *next;
+	int index = 0;
+	struct pci_bus *pci_bus = ctrl->pci_bus;
+	unsigned int devfn;
+
+	if (!func->is_a_board)
+		return(ADD_NOT_SUPPORTED);
+
+	func = cpqhp_slot_find(func->bus, func->device, index++);
+
+	while (func != NULL) {
+		pci_bus->number = func->bus;
+		devfn = PCI_DEVFN(func->device, func->function);
+
+		pci_bus_read_config_dword(pci_bus, devfn, PCI_VENDOR_ID, &temp_register);
+
+		/* No adapter present */
+		if (temp_register == 0xFFFFFFFF)
+			return(NO_ADAPTER_PRESENT);
+
+		if (temp_register != func->config_space[0])
+			return(ADAPTER_NOT_SAME);
+
+		/* Check for same revision number and class code */
+		pci_bus_read_config_dword(pci_bus, devfn, PCI_CLASS_REVISION, &temp_register);
+
+		/* Adapter not the same */
+		if (temp_register != func->config_space[0x08 >> 2])
+			return(ADAPTER_NOT_SAME);
+
+		/* Check for Bridge */
+		pci_bus_read_config_byte(pci_bus, devfn, PCI_HEADER_TYPE, &header_type);
+
+		if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+			/* In order to continue checking, we must program the
+			 * bus registers in the bridge to respond to accesses
+			 * for its subordinate bus(es)
+			 */
+
+			temp_register = func->config_space[0x18 >> 2];
+			pci_bus_write_config_dword(pci_bus, devfn, PCI_PRIMARY_BUS, temp_register);
+
+			secondary_bus = (temp_register >> 8) & 0xFF;
+
+			next = cpqhp_slot_list[secondary_bus];
+
+			while (next != NULL) {
+				rc = cpqhp_valid_replace(ctrl, next);
+				if (rc)
+					return rc;
+
+				next = next->next;
+			}
+
+		}
+		/* Check to see if it is a standard config header */
+		else if ((header_type & 0x7F) == PCI_HEADER_TYPE_NORMAL) {
+			/* Check subsystem vendor and ID */
+			pci_bus_read_config_dword(pci_bus, devfn, PCI_SUBSYSTEM_VENDOR_ID, &temp_register);
+
+			if (temp_register != func->config_space[0x2C >> 2]) {
+				/* If it's a SMART-2 and the register isn't
+				 * filled in, ignore the difference because
+				 * they just have an old rev of the firmware
+				 */
+				if (!((func->config_space[0] == 0xAE100E11)
+				      && (temp_register == 0x00L)))
+					return(ADAPTER_NOT_SAME);
+			}
+			/* Figure out IO and memory base lengths */
+			for (cloop = 0x10; cloop <= 0x24; cloop += 4) {
+				temp_register = 0xFFFFFFFF;
+				pci_bus_write_config_dword(pci_bus, devfn, cloop, temp_register);
+				pci_bus_read_config_dword(pci_bus, devfn, cloop, &base);
+
+				/* If this register is implemented */
+				if (base) {
+					if (base & 0x01L) {
+						/* IO base
+						 * set base = amount of IO
+						 * space requested
+						 */
+						base = base & 0xFFFFFFFE;
+						base = (~base) + 1;
+
+						type = 1;
+					} else {
+						/* memory base */
+						base = base & 0xFFFFFFF0;
+						base = (~base) + 1;
+
+						type = 0;
+					}
+				} else {
+					base = 0x0L;
+					type = 0;
+				}
+
+				/* Check information in slot structure */
+				if (func->base_length[(cloop - 0x10) >> 2] != base)
+					return(ADAPTER_NOT_SAME);
+
+				if (func->base_type[(cloop - 0x10) >> 2] != type)
+					return(ADAPTER_NOT_SAME);
+
+			}	/* End of base register loop */
+
+		}		/* End of (type 0 config space) else */
+		else {
+			/* this is not a type 0 or 1 config space header so
+			 * we don't know how to do it
+			 */
+			return(DEVICE_TYPE_NOT_SUPPORTED);
+		}
+
+		/* Get the next function */
+		func = cpqhp_slot_find(func->bus, func->device, index++);
+	}
+
+
+	return 0;
+}
+
+
+/*
+ * cpqhp_find_available_resources
+ *
+ * Finds available memory, IO, and IRQ resources for programming
+ * devices which may be added to the system
+ * this function is for hot plug ADD!
+ *
+ * returns 0 if success
+ */
+int cpqhp_find_available_resources(struct controller *ctrl, void __iomem *rom_start)
+{
+	u8 temp;
+	u8 populated_slot;
+	u8 bridged_slot;
+	void __iomem *one_slot;
+	void __iomem *rom_resource_table;
+	struct pci_func *func = NULL;
+	int i = 10, index;
+	u32 temp_dword, rc;
+	struct pci_resource *mem_node;
+	struct pci_resource *p_mem_node;
+	struct pci_resource *io_node;
+	struct pci_resource *bus_node;
+
+	rom_resource_table = detect_HRT_floating_pointer(rom_start, rom_start+0xffff);
+	dbg("rom_resource_table = %p\n", rom_resource_table);
+
+	if (rom_resource_table == NULL)
+		return -ENODEV;
+
+	/* Sum all resources and setup resource maps */
+	unused_IRQ = readl(rom_resource_table + UNUSED_IRQ);
+	dbg("unused_IRQ = %x\n", unused_IRQ);
+
+	temp = 0;
+	while (unused_IRQ) {
+		if (unused_IRQ & 1) {
+			cpqhp_disk_irq = temp;
+			break;
+		}
+		unused_IRQ = unused_IRQ >> 1;
+		temp++;
+	}
+
+	dbg("cpqhp_disk_irq= %d\n", cpqhp_disk_irq);
+	unused_IRQ = unused_IRQ >> 1;
+	temp++;
+
+	while (unused_IRQ) {
+		if (unused_IRQ & 1) {
+			cpqhp_nic_irq = temp;
+			break;
+		}
+		unused_IRQ = unused_IRQ >> 1;
+		temp++;
+	}
+
+	dbg("cpqhp_nic_irq= %d\n", cpqhp_nic_irq);
+	unused_IRQ = readl(rom_resource_table + PCIIRQ);
+
+	temp = 0;
+
+	if (!cpqhp_nic_irq)
+		cpqhp_nic_irq = ctrl->cfgspc_irq;
+
+	if (!cpqhp_disk_irq)
+		cpqhp_disk_irq = ctrl->cfgspc_irq;
+
+	dbg("cpqhp_disk_irq, cpqhp_nic_irq= %d, %d\n", cpqhp_disk_irq, cpqhp_nic_irq);
+
+	rc = compaq_nvram_load(rom_start, ctrl);
+	if (rc)
+		return rc;
+
+	one_slot = rom_resource_table + sizeof(struct hrt);
+
+	i = readb(rom_resource_table + NUMBER_OF_ENTRIES);
+	dbg("number_of_entries = %d\n", i);
+
+	if (!readb(one_slot + SECONDARY_BUS))
+		return 1;
+
+	dbg("dev|IO base|length|Mem base|length|Pre base|length|PB SB MB\n");
+
+	while (i && readb(one_slot + SECONDARY_BUS)) {
+		u8 dev_func = readb(one_slot + DEV_FUNC);
+		u8 primary_bus = readb(one_slot + PRIMARY_BUS);
+		u8 secondary_bus = readb(one_slot + SECONDARY_BUS);
+		u8 max_bus = readb(one_slot + MAX_BUS);
+		u16 io_base = readw(one_slot + IO_BASE);
+		u16 io_length = readw(one_slot + IO_LENGTH);
+		u16 mem_base = readw(one_slot + MEM_BASE);
+		u16 mem_length = readw(one_slot + MEM_LENGTH);
+		u16 pre_mem_base = readw(one_slot + PRE_MEM_BASE);
+		u16 pre_mem_length = readw(one_slot + PRE_MEM_LENGTH);
+
+		dbg("%2.2x | %4.4x  | %4.4x | %4.4x   | %4.4x | %4.4x   | %4.4x |%2.2x %2.2x %2.2x\n",
+		    dev_func, io_base, io_length, mem_base, mem_length, pre_mem_base, pre_mem_length,
+		    primary_bus, secondary_bus, max_bus);
+
+		/* If this entry isn't for our controller's bus, ignore it */
+		if (primary_bus != ctrl->bus) {
+			i--;
+			one_slot += sizeof(struct slot_rt);
+			continue;
+		}
+		/* find out if this entry is for an occupied slot */
+		ctrl->pci_bus->number = primary_bus;
+		pci_bus_read_config_dword(ctrl->pci_bus, dev_func, PCI_VENDOR_ID, &temp_dword);
+		dbg("temp_D_word = %x\n", temp_dword);
+
+		if (temp_dword != 0xFFFFFFFF) {
+			index = 0;
+			func = cpqhp_slot_find(primary_bus, dev_func >> 3, 0);
+
+			while (func && (func->function != (dev_func & 0x07))) {
+				dbg("func = %p (bus, dev, fun) = (%d, %d, %d)\n", func, primary_bus, dev_func >> 3, index);
+				func = cpqhp_slot_find(primary_bus, dev_func >> 3, index++);
+			}
+
+			/* If we can't find a match, skip this table entry */
+			if (!func) {
+				i--;
+				one_slot += sizeof(struct slot_rt);
+				continue;
+			}
+			/* this may not work and shouldn't be used */
+			if (secondary_bus != primary_bus)
+				bridged_slot = 1;
+			else
+				bridged_slot = 0;
+
+			populated_slot = 1;
+		} else {
+			populated_slot = 0;
+			bridged_slot = 0;
+		}
+
+
+		/* If we've got a valid IO base, use it */
+
+		temp_dword = io_base + io_length;
+
+		if ((io_base) && (temp_dword < 0x10000)) {
+			io_node = kmalloc(sizeof(*io_node), GFP_KERNEL);
+			if (!io_node)
+				return -ENOMEM;
+
+			io_node->base = io_base;
+			io_node->length = io_length;
+
+			dbg("found io_node(base, length) = %x, %x\n",
+					io_node->base, io_node->length);
+			dbg("populated slot =%d \n", populated_slot);
+			if (!populated_slot) {
+				io_node->next = ctrl->io_head;
+				ctrl->io_head = io_node;
+			} else {
+				io_node->next = func->io_head;
+				func->io_head = io_node;
+			}
+		}
+
+		/* If we've got a valid memory base, use it */
+		temp_dword = mem_base + mem_length;
+		if ((mem_base) && (temp_dword < 0x10000)) {
+			mem_node = kmalloc(sizeof(*mem_node), GFP_KERNEL);
+			if (!mem_node)
+				return -ENOMEM;
+
+			mem_node->base = mem_base << 16;
+
+			mem_node->length = mem_length << 16;
+
+			dbg("found mem_node(base, length) = %x, %x\n",
+					mem_node->base, mem_node->length);
+			dbg("populated slot =%d \n", populated_slot);
+			if (!populated_slot) {
+				mem_node->next = ctrl->mem_head;
+				ctrl->mem_head = mem_node;
+			} else {
+				mem_node->next = func->mem_head;
+				func->mem_head = mem_node;
+			}
+		}
+
+		/* If we've got a valid prefetchable memory base, and
+		 * the base + length isn't greater than 0xFFFF
+		 */
+		temp_dword = pre_mem_base + pre_mem_length;
+		if ((pre_mem_base) && (temp_dword < 0x10000)) {
+			p_mem_node = kmalloc(sizeof(*p_mem_node), GFP_KERNEL);
+			if (!p_mem_node)
+				return -ENOMEM;
+
+			p_mem_node->base = pre_mem_base << 16;
+
+			p_mem_node->length = pre_mem_length << 16;
+			dbg("found p_mem_node(base, length) = %x, %x\n",
+					p_mem_node->base, p_mem_node->length);
+			dbg("populated slot =%d \n", populated_slot);
+
+			if (!populated_slot) {
+				p_mem_node->next = ctrl->p_mem_head;
+				ctrl->p_mem_head = p_mem_node;
+			} else {
+				p_mem_node->next = func->p_mem_head;
+				func->p_mem_head = p_mem_node;
+			}
+		}
+
+		/* If we've got a valid bus number, use it
+		 * The second condition is to ignore bus numbers on
+		 * populated slots that don't have PCI-PCI bridges
+		 */
+		if (secondary_bus && (secondary_bus != primary_bus)) {
+			bus_node = kmalloc(sizeof(*bus_node), GFP_KERNEL);
+			if (!bus_node)
+				return -ENOMEM;
+
+			bus_node->base = secondary_bus;
+			bus_node->length = max_bus - secondary_bus + 1;
+			dbg("found bus_node(base, length) = %x, %x\n",
+					bus_node->base, bus_node->length);
+			dbg("populated slot =%d \n", populated_slot);
+			if (!populated_slot) {
+				bus_node->next = ctrl->bus_head;
+				ctrl->bus_head = bus_node;
+			} else {
+				bus_node->next = func->bus_head;
+				func->bus_head = bus_node;
+			}
+		}
+
+		i--;
+		one_slot += sizeof(struct slot_rt);
+	}
+
+	/* If all of the following fail, we don't have any resources for
+	 * hot plug add
+	 */
+	rc = 1;
+	rc &= cpqhp_resource_sort_and_combine(&(ctrl->mem_head));
+	rc &= cpqhp_resource_sort_and_combine(&(ctrl->p_mem_head));
+	rc &= cpqhp_resource_sort_and_combine(&(ctrl->io_head));
+	rc &= cpqhp_resource_sort_and_combine(&(ctrl->bus_head));
+
+	return rc;
+}
+
+
+/*
+ * cpqhp_return_board_resources
+ *
+ * this routine returns all resources allocated to a board to
+ * the available pool.
+ *
+ * returns 0 if success
+ */
+int cpqhp_return_board_resources(struct pci_func *func, struct resource_lists *resources)
+{
+	int rc = 0;
+	struct pci_resource *node;
+	struct pci_resource *t_node;
+	dbg("%s\n", __func__);
+
+	if (!func)
+		return 1;
+
+	node = func->io_head;
+	func->io_head = NULL;
+	while (node) {
+		t_node = node->next;
+		return_resource(&(resources->io_head), node);
+		node = t_node;
+	}
+
+	node = func->mem_head;
+	func->mem_head = NULL;
+	while (node) {
+		t_node = node->next;
+		return_resource(&(resources->mem_head), node);
+		node = t_node;
+	}
+
+	node = func->p_mem_head;
+	func->p_mem_head = NULL;
+	while (node) {
+		t_node = node->next;
+		return_resource(&(resources->p_mem_head), node);
+		node = t_node;
+	}
+
+	node = func->bus_head;
+	func->bus_head = NULL;
+	while (node) {
+		t_node = node->next;
+		return_resource(&(resources->bus_head), node);
+		node = t_node;
+	}
+
+	rc |= cpqhp_resource_sort_and_combine(&(resources->mem_head));
+	rc |= cpqhp_resource_sort_and_combine(&(resources->p_mem_head));
+	rc |= cpqhp_resource_sort_and_combine(&(resources->io_head));
+	rc |= cpqhp_resource_sort_and_combine(&(resources->bus_head));
+
+	return rc;
+}
+
+
+/*
+ * cpqhp_destroy_resource_list
+ *
+ * Puts node back in the resource list pointed to by head
+ */
+void cpqhp_destroy_resource_list(struct resource_lists *resources)
+{
+	struct pci_resource *res, *tres;
+
+	res = resources->io_head;
+	resources->io_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = resources->mem_head;
+	resources->mem_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = resources->p_mem_head;
+	resources->p_mem_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = resources->bus_head;
+	resources->bus_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+}
+
+
+/*
+ * cpqhp_destroy_board_resources
+ *
+ * Puts node back in the resource list pointed to by head
+ */
+void cpqhp_destroy_board_resources(struct pci_func *func)
+{
+	struct pci_resource *res, *tres;
+
+	res = func->io_head;
+	func->io_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = func->mem_head;
+	func->mem_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = func->p_mem_head;
+	func->p_mem_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+
+	res = func->bus_head;
+	func->bus_head = NULL;
+
+	while (res) {
+		tres = res;
+		res = res->next;
+		kfree(tres);
+	}
+}
diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
new file mode 100644
index 000000000..fed1360ee
--- /dev/null
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001,2003 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include "cpqphp.h"
+
+static DEFINE_MUTEX(cpqphp_mutex);
+static int show_ctrl(struct controller *ctrl, char *buf)
+{
+	char *out = buf;
+	int index;
+	struct pci_resource *res;
+
+	out += sprintf(buf, "Free resources: memory\n");
+	index = 11;
+	res = ctrl->mem_head;
+	while (res && index--) {
+		out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+		res = res->next;
+	}
+	out += sprintf(out, "Free resources: prefetchable memory\n");
+	index = 11;
+	res = ctrl->p_mem_head;
+	while (res && index--) {
+		out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+		res = res->next;
+	}
+	out += sprintf(out, "Free resources: IO\n");
+	index = 11;
+	res = ctrl->io_head;
+	while (res && index--) {
+		out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+		res = res->next;
+	}
+	out += sprintf(out, "Free resources: bus numbers\n");
+	index = 11;
+	res = ctrl->bus_head;
+	while (res && index--) {
+		out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+		res = res->next;
+	}
+
+	return out - buf;
+}
+
+static int show_dev(struct controller *ctrl, char *buf)
+{
+	char *out = buf;
+	int index;
+	struct pci_resource *res;
+	struct pci_func *new_slot;
+	struct slot *slot;
+
+	slot = ctrl->slot;
+
+	while (slot) {
+		new_slot = cpqhp_slot_find(slot->bus, slot->device, 0);
+		if (!new_slot)
+			break;
+		out += sprintf(out, "assigned resources: memory\n");
+		index = 11;
+		res = new_slot->mem_head;
+		while (res && index--) {
+			out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+			res = res->next;
+		}
+		out += sprintf(out, "assigned resources: prefetchable memory\n");
+		index = 11;
+		res = new_slot->p_mem_head;
+		while (res && index--) {
+			out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+			res = res->next;
+		}
+		out += sprintf(out, "assigned resources: IO\n");
+		index = 11;
+		res = new_slot->io_head;
+		while (res && index--) {
+			out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+			res = res->next;
+		}
+		out += sprintf(out, "assigned resources: bus numbers\n");
+		index = 11;
+		res = new_slot->bus_head;
+		while (res && index--) {
+			out += sprintf(out, "start = %8.8x, length = %8.8x\n", res->base, res->length);
+			res = res->next;
+		}
+		slot = slot->next;
+	}
+
+	return out - buf;
+}
+
+static int spew_debug_info(struct controller *ctrl, char *data, int size)
+{
+	int used;
+
+	used = size - show_ctrl(ctrl, data);
+	used = (size - used) - show_dev(ctrl, &data[used]);
+	return used;
+}
+
+struct ctrl_dbg {
+	int size;
+	char *data;
+	struct controller *ctrl;
+};
+
+#define MAX_OUTPUT	(4*PAGE_SIZE)
+
+static int open(struct inode *inode, struct file *file)
+{
+	struct controller *ctrl = inode->i_private;
+	struct ctrl_dbg *dbg;
+	int retval = -ENOMEM;
+
+	mutex_lock(&cpqphp_mutex);
+	dbg = kmalloc(sizeof(*dbg), GFP_KERNEL);
+	if (!dbg)
+		goto exit;
+	dbg->data = kmalloc(MAX_OUTPUT, GFP_KERNEL);
+	if (!dbg->data) {
+		kfree(dbg);
+		goto exit;
+	}
+	dbg->size = spew_debug_info(ctrl, dbg->data, MAX_OUTPUT);
+	file->private_data = dbg;
+	retval = 0;
+exit:
+	mutex_unlock(&cpqphp_mutex);
+	return retval;
+}
+
+static loff_t lseek(struct file *file, loff_t off, int whence)
+{
+	struct ctrl_dbg *dbg = file->private_data;
+	return fixed_size_llseek(file, off, whence, dbg->size);
+}
+
+static ssize_t read(struct file *file, char __user *buf,
+		    size_t nbytes, loff_t *ppos)
+{
+	struct ctrl_dbg *dbg = file->private_data;
+	return simple_read_from_buffer(buf, nbytes, ppos, dbg->data, dbg->size);
+}
+
+static int release(struct inode *inode, struct file *file)
+{
+	struct ctrl_dbg *dbg = file->private_data;
+
+	kfree(dbg->data);
+	kfree(dbg);
+	return 0;
+}
+
+static const struct file_operations debug_ops = {
+	.owner = THIS_MODULE,
+	.open = open,
+	.llseek = lseek,
+	.read = read,
+	.release = release,
+};
+
+static struct dentry *root;
+
+void cpqhp_initialize_debugfs(void)
+{
+	if (!root)
+		root = debugfs_create_dir("cpqhp", NULL);
+}
+
+void cpqhp_shutdown_debugfs(void)
+{
+	debugfs_remove(root);
+}
+
+void cpqhp_create_debugfs_files(struct controller *ctrl)
+{
+	ctrl->dentry = debugfs_create_file(dev_name(&ctrl->pci_dev->dev),
+					   S_IRUGO, root, ctrl, &debug_ops);
+}
+
+void cpqhp_remove_debugfs_files(struct controller *ctrl)
+{
+	debugfs_remove(ctrl->dentry);
+	ctrl->dentry = NULL;
+}
+
diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h
new file mode 100644
index 000000000..41eafe511
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp.h
@@ -0,0 +1,748 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __IBMPHP_H
+#define __IBMPHP_H
+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Jyoti Shah, Tong Yu, Irene Zubarev, IBM Corporation
+ *
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001-2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *
+ */
+
+#include <linux/pci_hotplug.h>
+
+extern int ibmphp_debug;
+
+#if !defined(MODULE)
+	#define MY_NAME "ibmphpd"
+#else
+	#define MY_NAME THIS_MODULE->name
+#endif
+#define debug(fmt, arg...) do { if (ibmphp_debug == 1) printk(KERN_DEBUG "%s: " fmt, MY_NAME, ## arg); } while (0)
+#define debug_pci(fmt, arg...) do { if (ibmphp_debug) printk(KERN_DEBUG "%s: " fmt, MY_NAME, ## arg); } while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+
+/* EBDA stuff */
+
+/***********************************************************
+* SLOT CAPABILITY                                          *
+***********************************************************/
+
+#define EBDA_SLOT_133_MAX		0x20
+#define EBDA_SLOT_100_MAX		0x10
+#define EBDA_SLOT_66_MAX		0x02
+#define EBDA_SLOT_PCIX_CAP		0x08
+
+
+/************************************************************
+*  RESOURCE TYPE                                             *
+************************************************************/
+
+#define EBDA_RSRC_TYPE_MASK		0x03
+#define EBDA_IO_RSRC_TYPE		0x00
+#define EBDA_MEM_RSRC_TYPE		0x01
+#define EBDA_PFM_RSRC_TYPE		0x03
+#define EBDA_RES_RSRC_TYPE		0x02
+
+
+/*************************************************************
+*  IO RESTRICTION TYPE                                       *
+*************************************************************/
+
+#define EBDA_IO_RESTRI_MASK		0x0c
+#define EBDA_NO_RESTRI			0x00
+#define EBDA_AVO_VGA_ADDR		0x04
+#define EBDA_AVO_VGA_ADDR_AND_ALIA	0x08
+#define EBDA_AVO_ISA_ADDR		0x0c
+
+
+/**************************************************************
+*  DEVICE TYPE DEF                                            *
+**************************************************************/
+
+#define EBDA_DEV_TYPE_MASK		0x10
+#define EBDA_PCI_DEV			0x10
+#define EBDA_NON_PCI_DEV		0x00
+
+
+/***************************************************************
+*  PRIMARY DEF DEFINITION                                      *
+***************************************************************/
+
+#define EBDA_PRI_DEF_MASK		0x20
+#define EBDA_PRI_PCI_BUS_INFO		0x20
+#define EBDA_NORM_DEV_RSRC_INFO		0x00
+
+
+//--------------------------------------------------------------
+// RIO TABLE DATA STRUCTURE
+//--------------------------------------------------------------
+
+struct rio_table_hdr {
+	u8 ver_num;
+	u8 scal_count;
+	u8 riodev_count;
+	u16 offset;
+};
+
+//-------------------------------------------------------------
+// SCALABILITY DETAIL
+//-------------------------------------------------------------
+
+struct scal_detail {
+	u8 node_id;
+	u32 cbar;
+	u8 port0_node_connect;
+	u8 port0_port_connect;
+	u8 port1_node_connect;
+	u8 port1_port_connect;
+	u8 port2_node_connect;
+	u8 port2_port_connect;
+	u8 chassis_num;
+//	struct list_head scal_detail_list;
+};
+
+//--------------------------------------------------------------
+// RIO DETAIL
+//--------------------------------------------------------------
+
+struct rio_detail {
+	u8 rio_node_id;
+	u32 bbar;
+	u8 rio_type;
+	u8 owner_id;
+	u8 port0_node_connect;
+	u8 port0_port_connect;
+	u8 port1_node_connect;
+	u8 port1_port_connect;
+	u8 first_slot_num;
+	u8 status;
+	u8 wpindex;
+	u8 chassis_num;
+	struct list_head rio_detail_list;
+};
+
+struct opt_rio {
+	u8 rio_type;
+	u8 chassis_num;
+	u8 first_slot_num;
+	u8 middle_num;
+	struct list_head opt_rio_list;
+};
+
+struct opt_rio_lo {
+	u8 rio_type;
+	u8 chassis_num;
+	u8 first_slot_num;
+	u8 middle_num;
+	u8 pack_count;
+	struct list_head opt_rio_lo_list;
+};
+
+/****************************************************************
+*  HPC DESCRIPTOR NODE                                          *
+****************************************************************/
+
+struct ebda_hpc_list {
+	u8 format;
+	u16 num_ctlrs;
+	short phys_addr;
+//      struct list_head ebda_hpc_list;
+};
+/*****************************************************************
+*   IN HPC DATA STRUCTURE, THE ASSOCIATED SLOT AND BUS           *
+*   STRUCTURE                                                    *
+*****************************************************************/
+
+struct ebda_hpc_slot {
+	u8 slot_num;
+	u32 slot_bus_num;
+	u8 ctl_index;
+	u8 slot_cap;
+};
+
+struct ebda_hpc_bus {
+	u32 bus_num;
+	u8 slots_at_33_conv;
+	u8 slots_at_66_conv;
+	u8 slots_at_66_pcix;
+	u8 slots_at_100_pcix;
+	u8 slots_at_133_pcix;
+};
+
+
+/********************************************************************
+*   THREE TYPE OF HOT PLUG CONTROLLER                                *
+********************************************************************/
+
+struct isa_ctlr_access {
+	u16 io_start;
+	u16 io_end;
+};
+
+struct pci_ctlr_access {
+	u8 bus;
+	u8 dev_fun;
+};
+
+struct wpeg_i2c_ctlr_access {
+	ulong wpegbbar;
+	u8 i2c_addr;
+};
+
+#define HPC_DEVICE_ID		0x0246
+#define HPC_SUBSYSTEM_ID	0x0247
+#define HPC_PCI_OFFSET		0x40
+/*************************************************************************
+*   RSTC DESCRIPTOR NODE                                                 *
+*************************************************************************/
+
+struct ebda_rsrc_list {
+	u8 format;
+	u16 num_entries;
+	u16 phys_addr;
+	struct ebda_rsrc_list *next;
+};
+
+
+/***************************************************************************
+*   PCI RSRC NODE                                                          *
+***************************************************************************/
+
+struct ebda_pci_rsrc {
+	u8 rsrc_type;
+	u8 bus_num;
+	u8 dev_fun;
+	u32 start_addr;
+	u32 end_addr;
+	u8 marked;	/* for NVRAM */
+	struct list_head ebda_pci_rsrc_list;
+};
+
+
+/***********************************************************
+* BUS_INFO DATE STRUCTURE                                  *
+***********************************************************/
+
+struct bus_info {
+	u8 slot_min;
+	u8 slot_max;
+	u8 slot_count;
+	u8 busno;
+	u8 controller_id;
+	u8 current_speed;
+	u8 current_bus_mode;
+	u8 index;
+	u8 slots_at_33_conv;
+	u8 slots_at_66_conv;
+	u8 slots_at_66_pcix;
+	u8 slots_at_100_pcix;
+	u8 slots_at_133_pcix;
+	struct list_head bus_info_list;
+};
+
+
+/***********************************************************
+* GLOBAL VARIABLES                                         *
+***********************************************************/
+extern struct list_head ibmphp_ebda_pci_rsrc_head;
+extern struct list_head ibmphp_slot_head;
+/***********************************************************
+* FUNCTION PROTOTYPES                                      *
+***********************************************************/
+
+void ibmphp_free_ebda_hpc_queue(void);
+int ibmphp_access_ebda(void);
+struct slot *ibmphp_get_slot_from_physical_num(u8);
+void ibmphp_free_bus_info_queue(void);
+void ibmphp_free_ebda_pci_rsrc_queue(void);
+struct bus_info *ibmphp_find_same_bus_num(u32);
+int ibmphp_get_bus_index(u8);
+u16 ibmphp_get_total_controllers(void);
+int ibmphp_register_pci(void);
+
+/* passed parameters */
+#define MEM		0
+#define IO		1
+#define PFMEM		2
+
+/* bit masks */
+#define RESTYPE		0x03
+#define IOMASK		0x00	/* will need to take its complement */
+#define MMASK		0x01
+#define PFMASK		0x03
+#define PCIDEVMASK	0x10	/* we should always have PCI devices */
+#define PRIMARYBUSMASK	0x20
+
+/* pci specific defines */
+#define PCI_VENDOR_ID_NOTVALID		0xFFFF
+#define PCI_HEADER_TYPE_MULTIDEVICE	0x80
+#define PCI_HEADER_TYPE_MULTIBRIDGE	0x81
+
+#define LATENCY		0x64
+#define CACHE		64
+#define DEVICEENABLE	0x015F		/* CPQ has 0x0157 */
+
+#define IOBRIDGE	0x1000		/* 4k */
+#define MEMBRIDGE	0x100000	/* 1M */
+
+/* irqs */
+#define SCSI_IRQ	0x09
+#define LAN_IRQ		0x0A
+#define OTHER_IRQ	0x0B
+
+/* Data Structures */
+
+/* type is of the form x x xx xx
+ *                     | |  |  |_ 00 - I/O, 01 - Memory, 11 - PFMemory
+ *                     | |  - 00 - No Restrictions, 01 - Avoid VGA, 10 - Avoid
+ *                     | |    VGA and their aliases, 11 - Avoid ISA
+ *                     | - 1 - PCI device, 0 - non pci device
+ *                     - 1 - Primary PCI Bus Information (0 if Normal device)
+ * the IO restrictions [2:3] are only for primary buses
+ */
+
+
+/* we need this struct because there could be several resource blocks
+ * allocated per primary bus in the EBDA
+ */
+struct range_node {
+	int rangeno;
+	u32 start;
+	u32 end;
+	struct range_node *next;
+};
+
+struct bus_node {
+	u8 busno;
+	int noIORanges;
+	struct range_node *rangeIO;
+	int noMemRanges;
+	struct range_node *rangeMem;
+	int noPFMemRanges;
+	struct range_node *rangePFMem;
+	int needIOUpdate;
+	int needMemUpdate;
+	int needPFMemUpdate;
+	struct resource_node *firstIO;	/* first IO resource on the Bus */
+	struct resource_node *firstMem;	/* first memory resource on the Bus */
+	struct resource_node *firstPFMem;	/* first prefetchable memory resource on the Bus */
+	struct resource_node *firstPFMemFromMem;	/* when run out of pfmem available, taking from Mem */
+	struct list_head bus_list;
+};
+
+struct resource_node {
+	int rangeno;
+	u8 busno;
+	u8 devfunc;
+	u32 start;
+	u32 end;
+	u32 len;
+	int type;		/* MEM, IO, PFMEM */
+	u8 fromMem;		/* this is to indicate that the range is from
+				 * the Memory bucket rather than from PFMem */
+	struct resource_node *next;
+	struct resource_node *nextRange;	/* for the other mem range on bus */
+};
+
+struct res_needed {
+	u32 mem;
+	u32 pfmem;
+	u32 io;
+	u8 not_correct;		/* needed for return */
+	int devices[32];	/* for device numbers behind this bridge */
+};
+
+/* functions */
+
+int ibmphp_rsrc_init(void);
+int ibmphp_add_resource(struct resource_node *);
+int ibmphp_remove_resource(struct resource_node *);
+int ibmphp_find_resource(struct bus_node *, u32, struct resource_node **, int);
+int ibmphp_check_resource(struct resource_node *, u8);
+int ibmphp_remove_bus(struct bus_node *, u8);
+void ibmphp_free_resources(void);
+int ibmphp_add_pfmem_from_mem(struct resource_node *);
+struct bus_node *ibmphp_find_res_bus(u8);
+void ibmphp_print_test(void);	/* for debugging purposes */
+
+int ibmphp_hpc_readslot(struct slot *, u8, u8 *);
+int ibmphp_hpc_writeslot(struct slot *, u8);
+void ibmphp_lock_operations(void);
+void ibmphp_unlock_operations(void);
+int ibmphp_hpc_start_poll_thread(void);
+void ibmphp_hpc_stop_poll_thread(void);
+
+//----------------------------------------------------------------------------
+
+
+//----------------------------------------------------------------------------
+// HPC return codes
+//----------------------------------------------------------------------------
+#define HPC_ERROR			0xFF
+
+//-----------------------------------------------------------------------------
+// BUS INFO
+//-----------------------------------------------------------------------------
+#define BUS_SPEED			0x30
+#define BUS_MODE			0x40
+#define BUS_MODE_PCIX			0x01
+#define BUS_MODE_PCI			0x00
+#define BUS_SPEED_2			0x20
+#define BUS_SPEED_1			0x10
+#define BUS_SPEED_33			0x00
+#define BUS_SPEED_66			0x01
+#define BUS_SPEED_100			0x02
+#define BUS_SPEED_133			0x03
+#define BUS_SPEED_66PCIX		0x04
+#define BUS_SPEED_66UNKNOWN		0x05
+#define BUS_STATUS_AVAILABLE		0x01
+#define BUS_CONTROL_AVAILABLE		0x02
+#define SLOT_LATCH_REGS_SUPPORTED	0x10
+
+#define PRGM_MODEL_REV_LEVEL		0xF0
+#define MAX_ADAPTER_NONE		0x09
+
+//----------------------------------------------------------------------------
+// HPC 'write' operations/commands
+//----------------------------------------------------------------------------
+//	Command			Code	State	Write to reg
+//					Machine	at index
+//-------------------------	----	-------	------------
+#define HPC_CTLR_ENABLEIRQ	0x00	// N	15
+#define HPC_CTLR_DISABLEIRQ	0x01	// N	15
+#define HPC_SLOT_OFF		0x02	// Y	0-14
+#define HPC_SLOT_ON		0x03	// Y	0-14
+#define HPC_SLOT_ATTNOFF	0x04	// N	0-14
+#define HPC_SLOT_ATTNON		0x05	// N	0-14
+#define HPC_CTLR_CLEARIRQ	0x06	// N	15
+#define HPC_CTLR_RESET		0x07	// Y	15
+#define HPC_CTLR_IRQSTEER	0x08	// N	15
+#define HPC_BUS_33CONVMODE	0x09	// Y	31-34
+#define HPC_BUS_66CONVMODE	0x0A	// Y	31-34
+#define HPC_BUS_66PCIXMODE	0x0B	// Y	31-34
+#define HPC_BUS_100PCIXMODE	0x0C	// Y	31-34
+#define HPC_BUS_133PCIXMODE	0x0D	// Y	31-34
+#define HPC_ALLSLOT_OFF		0x11	// Y	15
+#define HPC_ALLSLOT_ON		0x12	// Y	15
+#define HPC_SLOT_BLINKLED	0x13	// N	0-14
+
+//----------------------------------------------------------------------------
+// read commands
+//----------------------------------------------------------------------------
+#define READ_SLOTSTATUS		0x01
+#define READ_EXTSLOTSTATUS	0x02
+#define READ_BUSSTATUS		0x03
+#define READ_CTLRSTATUS		0x04
+#define READ_ALLSTAT		0x05
+#define READ_ALLSLOT		0x06
+#define READ_SLOTLATCHLOWREG	0x07
+#define READ_REVLEVEL		0x08
+#define READ_HPCOPTIONS		0x09
+//----------------------------------------------------------------------------
+// slot status
+//----------------------------------------------------------------------------
+#define HPC_SLOT_POWER		0x01
+#define HPC_SLOT_CONNECT	0x02
+#define HPC_SLOT_ATTN		0x04
+#define HPC_SLOT_PRSNT2		0x08
+#define HPC_SLOT_PRSNT1		0x10
+#define HPC_SLOT_PWRGD		0x20
+#define HPC_SLOT_BUS_SPEED	0x40
+#define HPC_SLOT_LATCH		0x80
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_POWER status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_POWER_OFF	0x00
+#define HPC_SLOT_POWER_ON	0x01
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_CONNECT status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_CONNECTED	0x00
+#define HPC_SLOT_DISCONNECTED	0x01
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_ATTN status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_ATTN_OFF	0x00
+#define HPC_SLOT_ATTN_ON	0x01
+#define HPC_SLOT_ATTN_BLINK	0x02
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_PRSNT status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_EMPTY		0x00
+#define HPC_SLOT_PRSNT_7	0x01
+#define HPC_SLOT_PRSNT_15	0x02
+#define HPC_SLOT_PRSNT_25	0x03
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_PWRGD status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_PWRGD_FAULT_NONE	0x00
+#define HPC_SLOT_PWRGD_GOOD		0x01
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_BUS_SPEED status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_BUS_SPEED_OK	0x00
+#define HPC_SLOT_BUS_SPEED_MISM	0x01
+
+//----------------------------------------------------------------------------
+// HPC_SLOT_LATCH status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_LATCH_OPEN	0x01	// NOTE : in PCI spec bit off = open
+#define HPC_SLOT_LATCH_CLOSED	0x00	// NOTE : in PCI spec bit on  = closed
+
+
+//----------------------------------------------------------------------------
+// extended slot status
+//----------------------------------------------------------------------------
+#define HPC_SLOT_PCIX		0x01
+#define HPC_SLOT_SPEED1		0x02
+#define HPC_SLOT_SPEED2		0x04
+#define HPC_SLOT_BLINK_ATTN	0x08
+#define HPC_SLOT_RSRVD1		0x10
+#define HPC_SLOT_RSRVD2		0x20
+#define HPC_SLOT_BUS_MODE	0x40
+#define HPC_SLOT_RSRVD3		0x80
+
+//----------------------------------------------------------------------------
+// HPC_XSLOT_PCIX_CAP status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_PCIX_NO	0x00
+#define HPC_SLOT_PCIX_YES	0x01
+
+//----------------------------------------------------------------------------
+// HPC_XSLOT_SPEED status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_SPEED_33	0x00
+#define HPC_SLOT_SPEED_66	0x01
+#define HPC_SLOT_SPEED_133	0x02
+
+//----------------------------------------------------------------------------
+// HPC_XSLOT_ATTN_BLINK status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_ATTN_BLINK_OFF	0x00
+#define HPC_SLOT_ATTN_BLINK_ON	0x01
+
+//----------------------------------------------------------------------------
+// HPC_XSLOT_BUS_MODE status return codes
+//----------------------------------------------------------------------------
+#define HPC_SLOT_BUS_MODE_OK	0x00
+#define HPC_SLOT_BUS_MODE_MISM	0x01
+
+//----------------------------------------------------------------------------
+// Controller status
+//----------------------------------------------------------------------------
+#define HPC_CTLR_WORKING	0x01
+#define HPC_CTLR_FINISHED	0x02
+#define HPC_CTLR_RESULT0	0x04
+#define HPC_CTLR_RESULT1	0x08
+#define HPC_CTLR_RESULE2	0x10
+#define HPC_CTLR_RESULT3	0x20
+#define HPC_CTLR_IRQ_ROUTG	0x40
+#define HPC_CTLR_IRQ_PENDG	0x80
+
+//----------------------------------------------------------------------------
+// HPC_CTLR_WORKING status return codes
+//----------------------------------------------------------------------------
+#define HPC_CTLR_WORKING_NO	0x00
+#define HPC_CTLR_WORKING_YES	0x01
+
+//----------------------------------------------------------------------------
+// HPC_CTLR_FINISHED status return codes
+//----------------------------------------------------------------------------
+#define HPC_CTLR_FINISHED_NO	0x00
+#define HPC_CTLR_FINISHED_YES	0x01
+
+//----------------------------------------------------------------------------
+// HPC_CTLR_RESULT status return codes
+//----------------------------------------------------------------------------
+#define HPC_CTLR_RESULT_SUCCESS	0x00
+#define HPC_CTLR_RESULT_FAILED	0x01
+#define HPC_CTLR_RESULT_RSVD	0x02
+#define HPC_CTLR_RESULT_NORESP	0x03
+
+
+//----------------------------------------------------------------------------
+// macro for slot info
+//----------------------------------------------------------------------------
+#define SLOT_POWER(s)	((u8) ((s & HPC_SLOT_POWER) \
+	? HPC_SLOT_POWER_ON : HPC_SLOT_POWER_OFF))
+
+#define SLOT_CONNECT(s)	((u8) ((s & HPC_SLOT_CONNECT) \
+	? HPC_SLOT_DISCONNECTED : HPC_SLOT_CONNECTED))
+
+#define SLOT_ATTN(s, es)	((u8) ((es & HPC_SLOT_BLINK_ATTN) \
+	? HPC_SLOT_ATTN_BLINK \
+	: ((s & HPC_SLOT_ATTN) ? HPC_SLOT_ATTN_ON : HPC_SLOT_ATTN_OFF)))
+
+#define SLOT_PRESENT(s)	((u8) ((s & HPC_SLOT_PRSNT1) \
+	? ((s & HPC_SLOT_PRSNT2) ? HPC_SLOT_EMPTY : HPC_SLOT_PRSNT_15) \
+	: ((s & HPC_SLOT_PRSNT2) ? HPC_SLOT_PRSNT_25 : HPC_SLOT_PRSNT_7)))
+
+#define SLOT_PWRGD(s)	((u8) ((s & HPC_SLOT_PWRGD) \
+	? HPC_SLOT_PWRGD_GOOD : HPC_SLOT_PWRGD_FAULT_NONE))
+
+#define SLOT_BUS_SPEED(s)	((u8) ((s & HPC_SLOT_BUS_SPEED) \
+	? HPC_SLOT_BUS_SPEED_MISM : HPC_SLOT_BUS_SPEED_OK))
+
+#define SLOT_LATCH(s)	((u8) ((s & HPC_SLOT_LATCH) \
+	? HPC_SLOT_LATCH_CLOSED : HPC_SLOT_LATCH_OPEN))
+
+#define SLOT_PCIX(es)	((u8) ((es & HPC_SLOT_PCIX) \
+	? HPC_SLOT_PCIX_YES : HPC_SLOT_PCIX_NO))
+
+#define SLOT_SPEED(es)	((u8) ((es & HPC_SLOT_SPEED2) \
+	? ((es & HPC_SLOT_SPEED1) ? HPC_SLOT_SPEED_133   \
+				: HPC_SLOT_SPEED_66)   \
+	: HPC_SLOT_SPEED_33))
+
+#define SLOT_BUS_MODE(es)	((u8) ((es & HPC_SLOT_BUS_MODE) \
+	? HPC_SLOT_BUS_MODE_MISM : HPC_SLOT_BUS_MODE_OK))
+
+//--------------------------------------------------------------------------
+// macro for bus info
+//---------------------------------------------------------------------------
+#define CURRENT_BUS_SPEED(s)	((u8) (s & BUS_SPEED_2) \
+	? ((s & BUS_SPEED_1) ? BUS_SPEED_133 : BUS_SPEED_100) \
+	: ((s & BUS_SPEED_1) ? BUS_SPEED_66 : BUS_SPEED_33))
+
+#define CURRENT_BUS_MODE(s)	((u8) (s & BUS_MODE) ? BUS_MODE_PCIX : BUS_MODE_PCI)
+
+#define READ_BUS_STATUS(s)	((u8) (s->options & BUS_STATUS_AVAILABLE))
+
+#define READ_BUS_MODE(s)	((s->revision & PRGM_MODEL_REV_LEVEL) >= 0x20)
+
+#define SET_BUS_STATUS(s)	((u8) (s->options & BUS_CONTROL_AVAILABLE))
+
+#define READ_SLOT_LATCH(s)	((u8) (s->options & SLOT_LATCH_REGS_SUPPORTED))
+
+//----------------------------------------------------------------------------
+// macro for controller info
+//----------------------------------------------------------------------------
+#define CTLR_WORKING(c) ((u8) ((c & HPC_CTLR_WORKING) \
+	? HPC_CTLR_WORKING_YES : HPC_CTLR_WORKING_NO))
+#define CTLR_FINISHED(c) ((u8) ((c & HPC_CTLR_FINISHED) \
+	? HPC_CTLR_FINISHED_YES : HPC_CTLR_FINISHED_NO))
+#define CTLR_RESULT(c) ((u8) ((c & HPC_CTLR_RESULT1)  \
+	? ((c & HPC_CTLR_RESULT0) ? HPC_CTLR_RESULT_NORESP \
+				: HPC_CTLR_RESULT_RSVD)  \
+	: ((c & HPC_CTLR_RESULT0) ? HPC_CTLR_RESULT_FAILED \
+				: HPC_CTLR_RESULT_SUCCESS)))
+
+// command that affect the state machine of HPC
+#define NEEDTOCHECK_CMDSTATUS(c) ((c == HPC_SLOT_OFF)        || \
+				  (c == HPC_SLOT_ON)         || \
+				  (c == HPC_CTLR_RESET)      || \
+				  (c == HPC_BUS_33CONVMODE)  || \
+				  (c == HPC_BUS_66CONVMODE)  || \
+				  (c == HPC_BUS_66PCIXMODE)  || \
+				  (c == HPC_BUS_100PCIXMODE) || \
+				  (c == HPC_BUS_133PCIXMODE) || \
+				  (c == HPC_ALLSLOT_OFF)     || \
+				  (c == HPC_ALLSLOT_ON))
+
+
+/* Core part of the driver */
+
+#define ENABLE		1
+#define DISABLE		0
+
+#define CARD_INFO	0x07
+#define PCIX133		0x07
+#define PCIX66		0x05
+#define PCI66		0x04
+
+extern struct pci_bus *ibmphp_pci_bus;
+
+/* Variables */
+
+struct pci_func {
+	struct pci_dev *dev;	/* from the OS */
+	u8 busno;
+	u8 device;
+	u8 function;
+	struct resource_node *io[6];
+	struct resource_node *mem[6];
+	struct resource_node *pfmem[6];
+	struct pci_func *next;
+	int devices[32];	/* for bridge config */
+	u8 irq[4];		/* for interrupt config */
+	u8 bus;			/* flag for unconfiguring, to say if PPB */
+};
+
+struct slot {
+	u8 bus;
+	u8 device;
+	u8 number;
+	u8 real_physical_slot_num;
+	u32 capabilities;
+	u8 supported_speed;
+	u8 supported_bus_mode;
+	u8 flag;		/* this is for disable slot and polling */
+	u8 ctlr_index;
+	struct hotplug_slot hotplug_slot;
+	struct controller *ctrl;
+	struct pci_func *func;
+	u8 irq[4];
+	int bit_mode;		/* 0 = 32, 1 = 64 */
+	struct bus_info *bus_on;
+	struct list_head ibm_slot_list;
+	u8 status;
+	u8 ext_status;
+	u8 busstatus;
+};
+
+struct controller {
+	struct ebda_hpc_slot *slots;
+	struct ebda_hpc_bus *buses;
+	struct pci_dev *ctrl_dev; /* in case where controller is PCI */
+	u8 starting_slot_num;	/* starting and ending slot #'s this ctrl controls*/
+	u8 ending_slot_num;
+	u8 revision;
+	u8 options;		/* which options HPC supports */
+	u8 status;
+	u8 ctlr_id;
+	u8 slot_count;
+	u8 bus_count;
+	u8 ctlr_relative_id;
+	u32 irq;
+	union {
+		struct isa_ctlr_access isa_ctlr;
+		struct pci_ctlr_access pci_ctlr;
+		struct wpeg_i2c_ctlr_access wpeg_ctlr;
+	} u;
+	u8 ctlr_type;
+	struct list_head ebda_hpc_list;
+};
+
+/* Functions */
+
+int ibmphp_init_devno(struct slot **);	/* This function is called from EBDA, so we need it not be static */
+int ibmphp_do_disable_slot(struct slot *slot_cur);
+int ibmphp_update_slot_info(struct slot *);	/* This function is called from HPC, so we need it to not be static */
+int ibmphp_configure_card(struct pci_func *, u8);
+int ibmphp_unconfigure_card(struct slot **, int);
+extern const struct hotplug_slot_ops ibmphp_hotplug_slot_ops;
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+#endif				//__IBMPHP_H
+
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
new file mode 100644
index 000000000..197997e26
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -0,0 +1,1252 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Chuck Cole, Jyoti Shah, Tong Yu, Irene Zubarev, IBM Corporation
+ *
+ * Copyright (C) 2001,2003 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001-2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include "../pci.h"
+#include <asm/pci_x86.h>		/* for struct irq_routing_table */
+#include <asm/io_apic.h>
+#include "ibmphp.h"
+
+#define attn_on(sl)  ibmphp_hpc_writeslot(sl, HPC_SLOT_ATTNON)
+#define attn_off(sl) ibmphp_hpc_writeslot(sl, HPC_SLOT_ATTNOFF)
+#define attn_LED_blink(sl) ibmphp_hpc_writeslot(sl, HPC_SLOT_BLINKLED)
+#define get_ctrl_revision(sl, rev) ibmphp_hpc_readslot(sl, READ_REVLEVEL, rev)
+#define get_hpc_options(sl, opt) ibmphp_hpc_readslot(sl, READ_HPCOPTIONS, opt)
+
+#define DRIVER_VERSION	"0.6"
+#define DRIVER_DESC	"IBM Hot Plug PCI Controller Driver"
+
+int ibmphp_debug;
+
+static bool debug;
+module_param(debug, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+struct pci_bus *ibmphp_pci_bus;
+static int max_slots;
+
+static int irqs[16];    /* PIC mode IRQs we're using so far (in case MPS
+			 * tables don't provide default info for empty slots */
+
+static int init_flag;
+
+static inline int get_cur_bus_info(struct slot **sl)
+{
+	int rc = 1;
+	struct slot *slot_cur = *sl;
+
+	debug("options = %x\n", slot_cur->ctrl->options);
+	debug("revision = %x\n", slot_cur->ctrl->revision);
+
+	if (READ_BUS_STATUS(slot_cur->ctrl))
+		rc = ibmphp_hpc_readslot(slot_cur, READ_BUSSTATUS, NULL);
+
+	if (rc)
+		return rc;
+
+	slot_cur->bus_on->current_speed = CURRENT_BUS_SPEED(slot_cur->busstatus);
+	if (READ_BUS_MODE(slot_cur->ctrl))
+		slot_cur->bus_on->current_bus_mode =
+				CURRENT_BUS_MODE(slot_cur->busstatus);
+	else
+		slot_cur->bus_on->current_bus_mode = 0xFF;
+
+	debug("busstatus = %x, bus_speed = %x, bus_mode = %x\n",
+			slot_cur->busstatus,
+			slot_cur->bus_on->current_speed,
+			slot_cur->bus_on->current_bus_mode);
+
+	*sl = slot_cur;
+	return 0;
+}
+
+static inline int slot_update(struct slot **sl)
+{
+	int rc;
+	rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL);
+	if (rc)
+		return rc;
+	if (!init_flag)
+		rc = get_cur_bus_info(sl);
+	return rc;
+}
+
+static int __init get_max_slots(void)
+{
+	struct slot *slot_cur;
+	u8 slot_count = 0;
+
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+		/* sometimes the hot-pluggable slots start with 4 (not always from 1) */
+		slot_count = max(slot_count, slot_cur->number);
+	}
+	return slot_count;
+}
+
+/* This routine will put the correct slot->device information per slot.  It's
+ * called from initialization of the slot structures. It will also assign
+ * interrupt numbers per each slot.
+ * Parameters: struct slot
+ * Returns 0 or errors
+ */
+int ibmphp_init_devno(struct slot **cur_slot)
+{
+	struct irq_routing_table *rtable;
+	int len;
+	int loop;
+	int i;
+
+	rtable = pcibios_get_irq_routing_table();
+	if (!rtable) {
+		err("no BIOS routing table...\n");
+		return -ENOMEM;
+	}
+
+	len = (rtable->size - sizeof(struct irq_routing_table)) /
+			sizeof(struct irq_info);
+
+	if (!len) {
+		kfree(rtable);
+		return -1;
+	}
+	for (loop = 0; loop < len; loop++) {
+		if ((*cur_slot)->number == rtable->slots[loop].slot &&
+		    (*cur_slot)->bus == rtable->slots[loop].bus) {
+			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
+			for (i = 0; i < 4; i++)
+				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
+						(int) (*cur_slot)->device, i);
+
+			debug("(*cur_slot)->irq[0] = %x\n",
+					(*cur_slot)->irq[0]);
+			debug("(*cur_slot)->irq[1] = %x\n",
+					(*cur_slot)->irq[1]);
+			debug("(*cur_slot)->irq[2] = %x\n",
+					(*cur_slot)->irq[2]);
+			debug("(*cur_slot)->irq[3] = %x\n",
+					(*cur_slot)->irq[3]);
+
+			debug("rtable->exclusive_irqs = %x\n",
+					rtable->exclusive_irqs);
+			debug("rtable->slots[loop].irq[0].bitmap = %x\n",
+					rtable->slots[loop].irq[0].bitmap);
+			debug("rtable->slots[loop].irq[1].bitmap = %x\n",
+					rtable->slots[loop].irq[1].bitmap);
+			debug("rtable->slots[loop].irq[2].bitmap = %x\n",
+					rtable->slots[loop].irq[2].bitmap);
+			debug("rtable->slots[loop].irq[3].bitmap = %x\n",
+					rtable->slots[loop].irq[3].bitmap);
+
+			debug("rtable->slots[loop].irq[0].link = %x\n",
+					rtable->slots[loop].irq[0].link);
+			debug("rtable->slots[loop].irq[1].link = %x\n",
+					rtable->slots[loop].irq[1].link);
+			debug("rtable->slots[loop].irq[2].link = %x\n",
+					rtable->slots[loop].irq[2].link);
+			debug("rtable->slots[loop].irq[3].link = %x\n",
+					rtable->slots[loop].irq[3].link);
+			debug("end of init_devno\n");
+			kfree(rtable);
+			return 0;
+		}
+	}
+
+	kfree(rtable);
+	return -1;
+}
+
+static inline int power_on(struct slot *slot_cur)
+{
+	u8 cmd = HPC_SLOT_ON;
+	int retval;
+
+	retval = ibmphp_hpc_writeslot(slot_cur, cmd);
+	if (retval) {
+		err("power on failed\n");
+		return retval;
+	}
+	if (CTLR_RESULT(slot_cur->ctrl->status)) {
+		err("command not completed successfully in power_on\n");
+		return -EIO;
+	}
+	msleep(3000);	/* For ServeRAID cards, and some 66 PCI */
+	return 0;
+}
+
+static inline int power_off(struct slot *slot_cur)
+{
+	u8 cmd = HPC_SLOT_OFF;
+	int retval;
+
+	retval = ibmphp_hpc_writeslot(slot_cur, cmd);
+	if (retval) {
+		err("power off failed\n");
+		return retval;
+	}
+	if (CTLR_RESULT(slot_cur->ctrl->status)) {
+		err("command not completed successfully in power_off\n");
+		retval = -EIO;
+	}
+	return retval;
+}
+
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
+{
+	int rc = 0;
+	struct slot *pslot;
+	u8 cmd = 0x00;     /* avoid compiler warning */
+
+	debug("set_attention_status - Entry hotplug_slot[%lx] value[%x]\n",
+			(ulong) hotplug_slot, value);
+	ibmphp_lock_operations();
+
+
+	if (hotplug_slot) {
+		switch (value) {
+		case HPC_SLOT_ATTN_OFF:
+			cmd = HPC_SLOT_ATTNOFF;
+			break;
+		case HPC_SLOT_ATTN_ON:
+			cmd = HPC_SLOT_ATTNON;
+			break;
+		case HPC_SLOT_ATTN_BLINK:
+			cmd = HPC_SLOT_BLINKLED;
+			break;
+		default:
+			rc = -ENODEV;
+			err("set_attention_status - Error : invalid input [%x]\n",
+					value);
+			break;
+		}
+		if (rc == 0) {
+			pslot = to_slot(hotplug_slot);
+			rc = ibmphp_hpc_writeslot(pslot, cmd);
+		}
+	} else
+		rc = -ENODEV;
+
+	ibmphp_unlock_operations();
+
+	debug("set_attention_status - Exit rc[%d]\n", rc);
+	return rc;
+}
+
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int rc = -ENODEV;
+	struct slot *pslot;
+	struct slot myslot;
+
+	debug("get_attention_status - Entry hotplug_slot[%lx] pvalue[%lx]\n",
+					(ulong) hotplug_slot, (ulong) value);
+
+	ibmphp_lock_operations();
+	if (hotplug_slot) {
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			rc = ibmphp_hpc_readslot(pslot, READ_EXTSLOTSTATUS,
+						 &myslot.ext_status);
+		if (!rc)
+			*value = SLOT_ATTN(myslot.status, myslot.ext_status);
+	}
+
+	ibmphp_unlock_operations();
+	debug("get_attention_status - Exit rc[%d] value[%x]\n", rc, *value);
+	return rc;
+}
+
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int rc = -ENODEV;
+	struct slot *pslot;
+	struct slot myslot;
+
+	debug("get_latch_status - Entry hotplug_slot[%lx] pvalue[%lx]\n",
+					(ulong) hotplug_slot, (ulong) value);
+	ibmphp_lock_operations();
+	if (hotplug_slot) {
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			*value = SLOT_LATCH(myslot.status);
+	}
+
+	ibmphp_unlock_operations();
+	debug("get_latch_status - Exit rc[%d] rc[%x] value[%x]\n",
+			rc, rc, *value);
+	return rc;
+}
+
+
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int rc = -ENODEV;
+	struct slot *pslot;
+	struct slot myslot;
+
+	debug("get_power_status - Entry hotplug_slot[%lx] pvalue[%lx]\n",
+					(ulong) hotplug_slot, (ulong) value);
+	ibmphp_lock_operations();
+	if (hotplug_slot) {
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			*value = SLOT_PWRGD(myslot.status);
+	}
+
+	ibmphp_unlock_operations();
+	debug("get_power_status - Exit rc[%d] rc[%x] value[%x]\n",
+			rc, rc, *value);
+	return rc;
+}
+
+static int get_adapter_present(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int rc = -ENODEV;
+	struct slot *pslot;
+	u8 present;
+	struct slot myslot;
+
+	debug("get_adapter_status - Entry hotplug_slot[%lx] pvalue[%lx]\n",
+					(ulong) hotplug_slot, (ulong) value);
+	ibmphp_lock_operations();
+	if (hotplug_slot) {
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc) {
+			present = SLOT_PRESENT(myslot.status);
+			if (present == HPC_SLOT_EMPTY)
+				*value = 0;
+			else
+				*value = 1;
+		}
+	}
+
+	ibmphp_unlock_operations();
+	debug("get_adapter_present - Exit rc[%d] value[%x]\n", rc, *value);
+	return rc;
+}
+
+static int get_max_bus_speed(struct slot *slot)
+{
+	int rc = 0;
+	u8 mode = 0;
+	enum pci_bus_speed speed;
+	struct pci_bus *bus = slot->hotplug_slot.pci_slot->bus;
+
+	debug("%s - Entry slot[%p]\n", __func__, slot);
+
+	ibmphp_lock_operations();
+	mode = slot->supported_bus_mode;
+	speed = slot->supported_speed;
+	ibmphp_unlock_operations();
+
+	switch (speed) {
+	case BUS_SPEED_33:
+		break;
+	case BUS_SPEED_66:
+		if (mode == BUS_MODE_PCIX)
+			speed += 0x01;
+		break;
+	case BUS_SPEED_100:
+	case BUS_SPEED_133:
+		speed += 0x01;
+		break;
+	default:
+		/* Note (will need to change): there would be soon 256, 512 also */
+		rc = -ENODEV;
+	}
+
+	if (!rc)
+		bus->max_bus_speed = speed;
+
+	debug("%s - Exit rc[%d] speed[%x]\n", __func__, rc, speed);
+	return rc;
+}
+
+/****************************************************************************
+ * This routine will initialize the ops data structure used in the validate
+ * function. It will also power off empty slots that are powered on since BIOS
+ * leaves those on, albeit disconnected
+ ****************************************************************************/
+static int __init init_ops(void)
+{
+	struct slot *slot_cur;
+	int retval;
+	int rc;
+
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+		debug("BEFORE GETTING SLOT STATUS, slot # %x\n",
+							slot_cur->number);
+		if (slot_cur->ctrl->revision == 0xFF)
+			if (get_ctrl_revision(slot_cur,
+						&slot_cur->ctrl->revision))
+				return -1;
+
+		if (slot_cur->bus_on->current_speed == 0xFF)
+			if (get_cur_bus_info(&slot_cur))
+				return -1;
+		get_max_bus_speed(slot_cur);
+
+		if (slot_cur->ctrl->options == 0xFF)
+			if (get_hpc_options(slot_cur, &slot_cur->ctrl->options))
+				return -1;
+
+		retval = slot_update(&slot_cur);
+		if (retval)
+			return retval;
+
+		debug("status = %x\n", slot_cur->status);
+		debug("ext_status = %x\n", slot_cur->ext_status);
+		debug("SLOT_POWER = %x\n", SLOT_POWER(slot_cur->status));
+		debug("SLOT_PRESENT = %x\n", SLOT_PRESENT(slot_cur->status));
+		debug("SLOT_LATCH = %x\n", SLOT_LATCH(slot_cur->status));
+
+		if ((SLOT_PWRGD(slot_cur->status)) &&
+		    !(SLOT_PRESENT(slot_cur->status)) &&
+		    !(SLOT_LATCH(slot_cur->status))) {
+			debug("BEFORE POWER OFF COMMAND\n");
+				rc = power_off(slot_cur);
+				if (rc)
+					return rc;
+
+	/*		retval = slot_update(&slot_cur);
+	 *		if (retval)
+	 *			return retval;
+	 *		ibmphp_update_slot_info(slot_cur);
+	 */
+		}
+	}
+	init_flag = 0;
+	return 0;
+}
+
+/* This operation will check whether the slot is within the bounds and
+ * the operation is valid to perform on that slot
+ * Parameters: slot, operation
+ * Returns: 0 or error codes
+ */
+static int validate(struct slot *slot_cur, int opn)
+{
+	int number;
+	int retval;
+
+	if (!slot_cur)
+		return -ENODEV;
+	number = slot_cur->number;
+	if ((number > max_slots) || (number < 0))
+		return -EBADSLT;
+	debug("slot_number in validate is %d\n", slot_cur->number);
+
+	retval = slot_update(&slot_cur);
+	if (retval)
+		return retval;
+
+	switch (opn) {
+		case ENABLE:
+			if (!(SLOT_PWRGD(slot_cur->status)) &&
+			     (SLOT_PRESENT(slot_cur->status)) &&
+			     !(SLOT_LATCH(slot_cur->status)))
+				return 0;
+			break;
+		case DISABLE:
+			if ((SLOT_PWRGD(slot_cur->status)) &&
+			    (SLOT_PRESENT(slot_cur->status)) &&
+			    !(SLOT_LATCH(slot_cur->status)))
+				return 0;
+			break;
+		default:
+			break;
+	}
+	err("validate failed....\n");
+	return -EINVAL;
+}
+
+/****************************************************************************
+ * This routine is for updating the data structures in the hotplug core
+ * Parameters: struct slot
+ * Returns: 0 or error
+ ****************************************************************************/
+int ibmphp_update_slot_info(struct slot *slot_cur)
+{
+	struct pci_bus *bus = slot_cur->hotplug_slot.pci_slot->bus;
+	u8 bus_speed;
+	u8 mode;
+
+	bus_speed = slot_cur->bus_on->current_speed;
+	mode = slot_cur->bus_on->current_bus_mode;
+
+	switch (bus_speed) {
+		case BUS_SPEED_33:
+			break;
+		case BUS_SPEED_66:
+			if (mode == BUS_MODE_PCIX)
+				bus_speed += 0x01;
+			else if (mode == BUS_MODE_PCI)
+				;
+			else
+				bus_speed = PCI_SPEED_UNKNOWN;
+			break;
+		case BUS_SPEED_100:
+		case BUS_SPEED_133:
+			bus_speed += 0x01;
+			break;
+		default:
+			bus_speed = PCI_SPEED_UNKNOWN;
+	}
+
+	bus->cur_bus_speed = bus_speed;
+	// To do: bus_names
+
+	return 0;
+}
+
+
+/******************************************************************************
+ * This function will return the pci_func, given bus and devfunc, or NULL.  It
+ * is called from visit routines
+ ******************************************************************************/
+
+static struct pci_func *ibm_slot_find(u8 busno, u8 device, u8 function)
+{
+	struct pci_func *func_cur;
+	struct slot *slot_cur;
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+		if (slot_cur->func) {
+			func_cur = slot_cur->func;
+			while (func_cur) {
+				if ((func_cur->busno == busno) &&
+						(func_cur->device == device) &&
+						(func_cur->function == function))
+					return func_cur;
+				func_cur = func_cur->next;
+			}
+		}
+	}
+	return NULL;
+}
+
+/*************************************************************
+ * This routine frees up memory used by struct slot, including
+ * the pointers to pci_func, bus, hotplug_slot, controller,
+ * and deregistering from the hotplug core
+ *************************************************************/
+static void free_slots(void)
+{
+	struct slot *slot_cur, *next;
+
+	debug("%s -- enter\n", __func__);
+
+	list_for_each_entry_safe(slot_cur, next, &ibmphp_slot_head,
+				 ibm_slot_list) {
+		pci_hp_del(&slot_cur->hotplug_slot);
+		slot_cur->ctrl = NULL;
+		slot_cur->bus_on = NULL;
+
+		/*
+		 * We don't want to actually remove the resources,
+		 * since ibmphp_free_resources() will do just that.
+		 */
+		ibmphp_unconfigure_card(&slot_cur, -1);
+
+		pci_hp_destroy(&slot_cur->hotplug_slot);
+		kfree(slot_cur);
+	}
+	debug("%s -- exit\n", __func__);
+}
+
+static void ibm_unconfigure_device(struct pci_func *func)
+{
+	struct pci_dev *temp;
+	u8 j;
+
+	debug("inside %s\n", __func__);
+	debug("func->device = %x, func->function = %x\n",
+					func->device, func->function);
+	debug("func->device << 3 | 0x0  = %x\n", func->device << 3 | 0x0);
+
+	pci_lock_rescan_remove();
+
+	for (j = 0; j < 0x08; j++) {
+		temp = pci_get_domain_bus_and_slot(0, func->busno,
+						   (func->device << 3) | j);
+		if (temp) {
+			pci_stop_and_remove_bus_device(temp);
+			pci_dev_put(temp);
+		}
+	}
+
+	pci_dev_put(func->dev);
+
+	pci_unlock_rescan_remove();
+}
+
+/*
+ * The following function is to fix kernel bug regarding
+ * getting bus entries, here we manually add those primary
+ * bus entries to kernel bus structure whenever apply
+ */
+static u8 bus_structure_fixup(u8 busno)
+{
+	struct pci_bus *bus, *b;
+	struct pci_dev *dev;
+	u16 l;
+
+	if (pci_find_bus(0, busno) || !(ibmphp_find_same_bus_num(busno)))
+		return 1;
+
+	bus = kmalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return 1;
+
+	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		kfree(bus);
+		return 1;
+	}
+
+	bus->number = busno;
+	bus->ops = ibmphp_pci_bus->ops;
+	dev->bus = bus;
+	for (dev->devfn = 0; dev->devfn < 256; dev->devfn += 8) {
+		if (!pci_read_config_word(dev, PCI_VENDOR_ID, &l) &&
+					(l != 0x0000) && (l != 0xffff)) {
+			debug("%s - Inside bus_structure_fixup()\n",
+							__func__);
+			b = pci_scan_bus(busno, ibmphp_pci_bus->ops, NULL);
+			if (!b)
+				continue;
+
+			pci_bus_add_devices(b);
+			break;
+		}
+	}
+
+	kfree(dev);
+	kfree(bus);
+
+	return 0;
+}
+
+static int ibm_configure_device(struct pci_func *func)
+{
+	struct pci_bus *child;
+	int num;
+	int flag = 0;	/* this is to make sure we don't double scan the bus,
+					for bridged devices primarily */
+
+	pci_lock_rescan_remove();
+
+	if (!(bus_structure_fixup(func->busno)))
+		flag = 1;
+	if (func->dev == NULL)
+		func->dev = pci_get_domain_bus_and_slot(0, func->busno,
+				PCI_DEVFN(func->device, func->function));
+
+	if (func->dev == NULL) {
+		struct pci_bus *bus = pci_find_bus(0, func->busno);
+		if (!bus)
+			goto out;
+
+		num = pci_scan_slot(bus,
+				PCI_DEVFN(func->device, func->function));
+		if (num)
+			pci_bus_add_devices(bus);
+
+		func->dev = pci_get_domain_bus_and_slot(0, func->busno,
+				PCI_DEVFN(func->device, func->function));
+		if (func->dev == NULL) {
+			err("ERROR... : pci_dev still NULL\n");
+			goto out;
+		}
+	}
+	if (!(flag) && (func->dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
+		pci_hp_add_bridge(func->dev);
+		child = func->dev->subordinate;
+		if (child)
+			pci_bus_add_devices(child);
+	}
+
+ out:
+	pci_unlock_rescan_remove();
+	return 0;
+}
+
+/*******************************************************
+ * Returns whether the bus is empty or not
+ *******************************************************/
+static int is_bus_empty(struct slot *slot_cur)
+{
+	int rc;
+	struct slot *tmp_slot;
+	u8 i = slot_cur->bus_on->slot_min;
+
+	while (i <= slot_cur->bus_on->slot_max) {
+		if (i == slot_cur->number) {
+			i++;
+			continue;
+		}
+		tmp_slot = ibmphp_get_slot_from_physical_num(i);
+		if (!tmp_slot)
+			return 0;
+		rc = slot_update(&tmp_slot);
+		if (rc)
+			return 0;
+		if (SLOT_PRESENT(tmp_slot->status) &&
+					SLOT_PWRGD(tmp_slot->status))
+			return 0;
+		i++;
+	}
+	return 1;
+}
+
+/***********************************************************
+ * If the HPC permits and the bus currently empty, tries to set the
+ * bus speed and mode at the maximum card and bus capability
+ * Parameters: slot
+ * Returns: bus is set (0) or error code
+ ***********************************************************/
+static int set_bus(struct slot *slot_cur)
+{
+	int rc;
+	u8 speed;
+	u8 cmd = 0x0;
+	int retval;
+	static const struct pci_device_id ciobx[] = {
+		{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) },
+		{ },
+	};
+
+	debug("%s - entry slot # %d\n", __func__, slot_cur->number);
+	if (SET_BUS_STATUS(slot_cur->ctrl) && is_bus_empty(slot_cur)) {
+		rc = slot_update(&slot_cur);
+		if (rc)
+			return rc;
+		speed = SLOT_SPEED(slot_cur->ext_status);
+		debug("ext_status = %x, speed = %x\n", slot_cur->ext_status, speed);
+		switch (speed) {
+		case HPC_SLOT_SPEED_33:
+			cmd = HPC_BUS_33CONVMODE;
+			break;
+		case HPC_SLOT_SPEED_66:
+			if (SLOT_PCIX(slot_cur->ext_status)) {
+				if ((slot_cur->supported_speed >= BUS_SPEED_66) &&
+						(slot_cur->supported_bus_mode == BUS_MODE_PCIX))
+					cmd = HPC_BUS_66PCIXMODE;
+				else if (!SLOT_BUS_MODE(slot_cur->ext_status))
+					/* if max slot/bus capability is 66 pci
+					and there's no bus mode mismatch, then
+					the adapter supports 66 pci */
+					cmd = HPC_BUS_66CONVMODE;
+				else
+					cmd = HPC_BUS_33CONVMODE;
+			} else {
+				if (slot_cur->supported_speed >= BUS_SPEED_66)
+					cmd = HPC_BUS_66CONVMODE;
+				else
+					cmd = HPC_BUS_33CONVMODE;
+			}
+			break;
+		case HPC_SLOT_SPEED_133:
+			switch (slot_cur->supported_speed) {
+			case BUS_SPEED_33:
+				cmd = HPC_BUS_33CONVMODE;
+				break;
+			case BUS_SPEED_66:
+				if (slot_cur->supported_bus_mode == BUS_MODE_PCIX)
+					cmd = HPC_BUS_66PCIXMODE;
+				else
+					cmd = HPC_BUS_66CONVMODE;
+				break;
+			case BUS_SPEED_100:
+				cmd = HPC_BUS_100PCIXMODE;
+				break;
+			case BUS_SPEED_133:
+				/* This is to take care of the bug in CIOBX chip */
+				if (pci_dev_present(ciobx))
+					ibmphp_hpc_writeslot(slot_cur,
+							HPC_BUS_100PCIXMODE);
+				cmd = HPC_BUS_133PCIXMODE;
+				break;
+			default:
+				err("Wrong bus speed\n");
+				return -ENODEV;
+			}
+			break;
+		default:
+			err("wrong slot speed\n");
+			return -ENODEV;
+		}
+		debug("setting bus speed for slot %d, cmd %x\n",
+						slot_cur->number, cmd);
+		retval = ibmphp_hpc_writeslot(slot_cur, cmd);
+		if (retval) {
+			err("setting bus speed failed\n");
+			return retval;
+		}
+		if (CTLR_RESULT(slot_cur->ctrl->status)) {
+			err("command not completed successfully in set_bus\n");
+			return -EIO;
+		}
+	}
+	/* This is for x440, once Brandon fixes the firmware,
+	will not need this delay */
+	msleep(1000);
+	debug("%s -Exit\n", __func__);
+	return 0;
+}
+
+/* This routine checks the bus limitations that the slot is on from the BIOS.
+ * This is used in deciding whether or not to power up the slot.
+ * (electrical/spec limitations. For example, >1 133 MHz or >2 66 PCI cards on
+ * same bus)
+ * Parameters: slot
+ * Returns: 0 = no limitations, -EINVAL = exceeded limitations on the bus
+ */
+static int check_limitations(struct slot *slot_cur)
+{
+	u8 i;
+	struct slot *tmp_slot;
+	u8 count = 0;
+	u8 limitation = 0;
+
+	for (i = slot_cur->bus_on->slot_min; i <= slot_cur->bus_on->slot_max; i++) {
+		tmp_slot = ibmphp_get_slot_from_physical_num(i);
+		if (!tmp_slot)
+			return -ENODEV;
+		if ((SLOT_PWRGD(tmp_slot->status)) &&
+					!(SLOT_CONNECT(tmp_slot->status)))
+			count++;
+	}
+	get_cur_bus_info(&slot_cur);
+	switch (slot_cur->bus_on->current_speed) {
+	case BUS_SPEED_33:
+		limitation = slot_cur->bus_on->slots_at_33_conv;
+		break;
+	case BUS_SPEED_66:
+		if (slot_cur->bus_on->current_bus_mode == BUS_MODE_PCIX)
+			limitation = slot_cur->bus_on->slots_at_66_pcix;
+		else
+			limitation = slot_cur->bus_on->slots_at_66_conv;
+		break;
+	case BUS_SPEED_100:
+		limitation = slot_cur->bus_on->slots_at_100_pcix;
+		break;
+	case BUS_SPEED_133:
+		limitation = slot_cur->bus_on->slots_at_133_pcix;
+		break;
+	}
+
+	if ((count + 1) > limitation)
+		return -EINVAL;
+	return 0;
+}
+
+static inline void print_card_capability(struct slot *slot_cur)
+{
+	info("capability of the card is ");
+	if ((slot_cur->ext_status & CARD_INFO) == PCIX133)
+		info("   133 MHz PCI-X\n");
+	else if ((slot_cur->ext_status & CARD_INFO) == PCIX66)
+		info("    66 MHz PCI-X\n");
+	else if ((slot_cur->ext_status & CARD_INFO) == PCI66)
+		info("    66 MHz PCI\n");
+	else
+		info("    33 MHz PCI\n");
+
+}
+
+/* This routine will power on the slot, configure the device(s) and find the
+ * drivers for them.
+ * Parameters: hotplug_slot
+ * Returns: 0 or failure codes
+ */
+static int enable_slot(struct hotplug_slot *hs)
+{
+	int rc, i, rcpr;
+	struct slot *slot_cur;
+	u8 function;
+	struct pci_func *tmp_func;
+
+	ibmphp_lock_operations();
+
+	debug("ENABLING SLOT........\n");
+	slot_cur = to_slot(hs);
+
+	rc = validate(slot_cur, ENABLE);
+	if (rc) {
+		err("validate function failed\n");
+		goto error_nopower;
+	}
+
+	attn_LED_blink(slot_cur);
+
+	rc = set_bus(slot_cur);
+	if (rc) {
+		err("was not able to set the bus\n");
+		goto error_nopower;
+	}
+
+	/*-----------------debugging------------------------------*/
+	get_cur_bus_info(&slot_cur);
+	debug("the current bus speed right after set_bus = %x\n",
+					slot_cur->bus_on->current_speed);
+	/*----------------------------------------------------------*/
+
+	rc = check_limitations(slot_cur);
+	if (rc) {
+		err("Adding this card exceeds the limitations of this bus.\n");
+		err("(i.e., >1 133MHz cards running on same bus, or >2 66 PCI cards running on same bus.\n");
+		err("Try hot-adding into another bus\n");
+		rc = -EINVAL;
+		goto error_nopower;
+	}
+
+	rc = power_on(slot_cur);
+
+	if (rc) {
+		err("something wrong when powering up... please see below for details\n");
+		/* need to turn off before on, otherwise, blinking overwrites */
+		attn_off(slot_cur);
+		attn_on(slot_cur);
+		if (slot_update(&slot_cur)) {
+			attn_off(slot_cur);
+			attn_on(slot_cur);
+			rc = -ENODEV;
+			goto exit;
+		}
+		/* Check to see the error of why it failed */
+		if ((SLOT_POWER(slot_cur->status)) &&
+					!(SLOT_PWRGD(slot_cur->status)))
+			err("power fault occurred trying to power up\n");
+		else if (SLOT_BUS_SPEED(slot_cur->status)) {
+			err("bus speed mismatch occurred.  please check current bus speed and card capability\n");
+			print_card_capability(slot_cur);
+		} else if (SLOT_BUS_MODE(slot_cur->ext_status)) {
+			err("bus mode mismatch occurred.  please check current bus mode and card capability\n");
+			print_card_capability(slot_cur);
+		}
+		ibmphp_update_slot_info(slot_cur);
+		goto exit;
+	}
+	debug("after power_on\n");
+	/*-----------------------debugging---------------------------*/
+	get_cur_bus_info(&slot_cur);
+	debug("the current bus speed right after power_on = %x\n",
+					slot_cur->bus_on->current_speed);
+	/*----------------------------------------------------------*/
+
+	rc = slot_update(&slot_cur);
+	if (rc)
+		goto error_power;
+
+	rc = -EINVAL;
+	if (SLOT_POWER(slot_cur->status) && !(SLOT_PWRGD(slot_cur->status))) {
+		err("power fault occurred trying to power up...\n");
+		goto error_power;
+	}
+	if (SLOT_POWER(slot_cur->status) && (SLOT_BUS_SPEED(slot_cur->status))) {
+		err("bus speed mismatch occurred.  please check current bus speed and card capability\n");
+		print_card_capability(slot_cur);
+		goto error_power;
+	}
+	/* Don't think this case will happen after above checks...
+	 * but just in case, for paranoia sake */
+	if (!(SLOT_POWER(slot_cur->status))) {
+		err("power on failed...\n");
+		goto error_power;
+	}
+
+	slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL);
+	if (!slot_cur->func) {
+		/* do update_slot_info here? */
+		rc = -ENOMEM;
+		goto error_power;
+	}
+	slot_cur->func->busno = slot_cur->bus;
+	slot_cur->func->device = slot_cur->device;
+	for (i = 0; i < 4; i++)
+		slot_cur->func->irq[i] = slot_cur->irq[i];
+
+	debug("b4 configure_card, slot_cur->bus = %x, slot_cur->device = %x\n",
+					slot_cur->bus, slot_cur->device);
+
+	if (ibmphp_configure_card(slot_cur->func, slot_cur->number)) {
+		err("configure_card was unsuccessful...\n");
+		/* true because don't need to actually deallocate resources,
+		 * just remove references */
+		ibmphp_unconfigure_card(&slot_cur, 1);
+		debug("after unconfigure_card\n");
+		slot_cur->func = NULL;
+		rc = -ENOMEM;
+		goto error_power;
+	}
+
+	function = 0x00;
+	do {
+		tmp_func = ibm_slot_find(slot_cur->bus, slot_cur->func->device,
+							function++);
+		if (tmp_func && !(tmp_func->dev))
+			ibm_configure_device(tmp_func);
+	} while (tmp_func);
+
+	attn_off(slot_cur);
+	if (slot_update(&slot_cur)) {
+		rc = -EFAULT;
+		goto exit;
+	}
+	ibmphp_print_test();
+	rc = ibmphp_update_slot_info(slot_cur);
+exit:
+	ibmphp_unlock_operations();
+	return rc;
+
+error_nopower:
+	attn_off(slot_cur);	/* need to turn off if was blinking b4 */
+	attn_on(slot_cur);
+error_cont:
+	rcpr = slot_update(&slot_cur);
+	if (rcpr) {
+		rc = rcpr;
+		goto exit;
+	}
+	ibmphp_update_slot_info(slot_cur);
+	goto exit;
+
+error_power:
+	attn_off(slot_cur);	/* need to turn off if was blinking b4 */
+	attn_on(slot_cur);
+	rcpr = power_off(slot_cur);
+	if (rcpr) {
+		rc = rcpr;
+		goto exit;
+	}
+	goto error_cont;
+}
+
+/**************************************************************
+* HOT REMOVING ADAPTER CARD                                   *
+* INPUT: POINTER TO THE HOTPLUG SLOT STRUCTURE                *
+* OUTPUT: SUCCESS 0 ; FAILURE: UNCONFIGURE , VALIDATE         *
+*		DISABLE POWER ,                               *
+**************************************************************/
+static int ibmphp_disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	int rc;
+
+	ibmphp_lock_operations();
+	rc = ibmphp_do_disable_slot(slot);
+	ibmphp_unlock_operations();
+	return rc;
+}
+
+int ibmphp_do_disable_slot(struct slot *slot_cur)
+{
+	int rc;
+	u8 flag;
+
+	debug("DISABLING SLOT...\n");
+
+	if ((slot_cur == NULL) || (slot_cur->ctrl == NULL))
+		return -ENODEV;
+
+	flag = slot_cur->flag;
+	slot_cur->flag = 1;
+
+	if (flag == 1) {
+		rc = validate(slot_cur, DISABLE);
+			/* checking if powered off already & valid slot # */
+		if (rc)
+			goto error;
+	}
+	attn_LED_blink(slot_cur);
+
+	if (slot_cur->func == NULL) {
+		/* We need this for functions that were there on bootup */
+		slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL);
+		if (!slot_cur->func) {
+			rc = -ENOMEM;
+			goto error;
+		}
+		slot_cur->func->busno = slot_cur->bus;
+		slot_cur->func->device = slot_cur->device;
+	}
+
+	ibm_unconfigure_device(slot_cur->func);
+
+	/*
+	 * If we got here from latch suddenly opening on operating card or
+	 * a power fault, there's no power to the card, so cannot
+	 * read from it to determine what resources it occupied.  This operation
+	 * is forbidden anyhow.  The best we can do is remove it from kernel
+	 * lists at least */
+
+	if (!flag) {
+		attn_off(slot_cur);
+		return 0;
+	}
+
+	rc = ibmphp_unconfigure_card(&slot_cur, 0);
+	slot_cur->func = NULL;
+	debug("in disable_slot. after unconfigure_card\n");
+	if (rc) {
+		err("could not unconfigure card.\n");
+		goto error;
+	}
+
+	rc = ibmphp_hpc_writeslot(slot_cur, HPC_SLOT_OFF);
+	if (rc)
+		goto error;
+
+	attn_off(slot_cur);
+	rc = slot_update(&slot_cur);
+	if (rc)
+		goto exit;
+
+	rc = ibmphp_update_slot_info(slot_cur);
+	ibmphp_print_test();
+exit:
+	return rc;
+
+error:
+	/*  Need to turn off if was blinking b4 */
+	attn_off(slot_cur);
+	attn_on(slot_cur);
+	if (slot_update(&slot_cur)) {
+		rc = -EFAULT;
+		goto exit;
+	}
+	if (flag)
+		ibmphp_update_slot_info(slot_cur);
+	goto exit;
+}
+
+const struct hotplug_slot_ops ibmphp_hotplug_slot_ops = {
+	.set_attention_status =		set_attention_status,
+	.enable_slot =			enable_slot,
+	.disable_slot =			ibmphp_disable_slot,
+	.hardware_test =		NULL,
+	.get_power_status =		get_power_status,
+	.get_attention_status =		get_attention_status,
+	.get_latch_status =		get_latch_status,
+	.get_adapter_status =		get_adapter_present,
+};
+
+static void ibmphp_unload(void)
+{
+	free_slots();
+	debug("after slots\n");
+	ibmphp_free_resources();
+	debug("after resources\n");
+	ibmphp_free_bus_info_queue();
+	debug("after bus info\n");
+	ibmphp_free_ebda_hpc_queue();
+	debug("after ebda hpc\n");
+	ibmphp_free_ebda_pci_rsrc_queue();
+	debug("after ebda pci rsrc\n");
+	kfree(ibmphp_pci_bus);
+}
+
+static int __init ibmphp_init(void)
+{
+	struct pci_bus *bus;
+	int i = 0;
+	int rc = 0;
+
+	init_flag = 1;
+
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+
+	ibmphp_pci_bus = kmalloc(sizeof(*ibmphp_pci_bus), GFP_KERNEL);
+	if (!ibmphp_pci_bus) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+
+	bus = pci_find_bus(0, 0);
+	if (!bus) {
+		err("Can't find the root pci bus, can not continue\n");
+		rc = -ENODEV;
+		goto error;
+	}
+	memcpy(ibmphp_pci_bus, bus, sizeof(*ibmphp_pci_bus));
+
+	ibmphp_debug = debug;
+
+	for (i = 0; i < 16; i++)
+		irqs[i] = 0;
+
+	rc = ibmphp_access_ebda();
+	if (rc)
+		goto error;
+	debug("after ibmphp_access_ebda()\n");
+
+	rc = ibmphp_rsrc_init();
+	if (rc)
+		goto error;
+	debug("AFTER Resource & EBDA INITIALIZATIONS\n");
+
+	max_slots = get_max_slots();
+
+	rc = ibmphp_register_pci();
+	if (rc)
+		goto error;
+
+	if (init_ops()) {
+		rc = -ENODEV;
+		goto error;
+	}
+
+	ibmphp_print_test();
+	rc = ibmphp_hpc_start_poll_thread();
+	if (rc)
+		goto error;
+
+exit:
+	return rc;
+
+error:
+	ibmphp_unload();
+	goto exit;
+}
+
+static void __exit ibmphp_exit(void)
+{
+	ibmphp_hpc_stop_poll_thread();
+	debug("after polling\n");
+	ibmphp_unload();
+	debug("done\n");
+}
+
+module_init(ibmphp_init);
+module_exit(ibmphp_exit);
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
new file mode 100644
index 000000000..7fb75401a
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -0,0 +1,1118 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Tong Yu, IBM Corporation
+ *
+ * Copyright (C) 2001,2003 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001-2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include "ibmphp.h"
+
+/*
+ * POST builds data blocks(in this data block definition, a char-1
+ * byte, short(or word)-2 byte, long(dword)-4 byte) in the Extended
+ * BIOS Data Area which describe the configuration of the hot-plug
+ * controllers and resources used by the PCI Hot-Plug devices.
+ *
+ * This file walks EBDA, maps data block from physical addr,
+ * reconstruct linked lists about all system resource(MEM, PFM, IO)
+ * already assigned by POST, as well as linked lists about hot plug
+ * controllers (ctlr#, slot#, bus&slot features...)
+ */
+
+/* Global lists */
+LIST_HEAD(ibmphp_ebda_pci_rsrc_head);
+LIST_HEAD(ibmphp_slot_head);
+
+/* Local variables */
+static struct ebda_hpc_list *hpc_list_ptr;
+static struct ebda_rsrc_list *rsrc_list_ptr;
+static struct rio_table_hdr *rio_table_ptr = NULL;
+static LIST_HEAD(ebda_hpc_head);
+static LIST_HEAD(bus_info_head);
+static LIST_HEAD(rio_vg_head);
+static LIST_HEAD(rio_lo_head);
+static LIST_HEAD(opt_vg_head);
+static LIST_HEAD(opt_lo_head);
+static void __iomem *io_mem;
+
+/* Local functions */
+static int ebda_rsrc_controller(void);
+static int ebda_rsrc_rsrc(void);
+static int ebda_rio_table(void);
+
+static struct ebda_hpc_list * __init alloc_ebda_hpc_list(void)
+{
+	return kzalloc(sizeof(struct ebda_hpc_list), GFP_KERNEL);
+}
+
+static struct controller *alloc_ebda_hpc(u32 slot_count, u32 bus_count)
+{
+	struct controller *controller;
+	struct ebda_hpc_slot *slots;
+	struct ebda_hpc_bus *buses;
+
+	controller = kzalloc(sizeof(struct controller), GFP_KERNEL);
+	if (!controller)
+		goto error;
+
+	slots = kcalloc(slot_count, sizeof(struct ebda_hpc_slot), GFP_KERNEL);
+	if (!slots)
+		goto error_contr;
+	controller->slots = slots;
+
+	buses = kcalloc(bus_count, sizeof(struct ebda_hpc_bus), GFP_KERNEL);
+	if (!buses)
+		goto error_slots;
+	controller->buses = buses;
+
+	return controller;
+error_slots:
+	kfree(controller->slots);
+error_contr:
+	kfree(controller);
+error:
+	return NULL;
+}
+
+static void free_ebda_hpc(struct controller *controller)
+{
+	kfree(controller->slots);
+	kfree(controller->buses);
+	kfree(controller);
+}
+
+static struct ebda_rsrc_list * __init alloc_ebda_rsrc_list(void)
+{
+	return kzalloc(sizeof(struct ebda_rsrc_list), GFP_KERNEL);
+}
+
+static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc(void)
+{
+	return kzalloc(sizeof(struct ebda_pci_rsrc), GFP_KERNEL);
+}
+
+static void __init print_bus_info(void)
+{
+	struct bus_info *ptr;
+
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
+		debug("%s - slot_min = %x\n", __func__, ptr->slot_min);
+		debug("%s - slot_max = %x\n", __func__, ptr->slot_max);
+		debug("%s - slot_count = %x\n", __func__, ptr->slot_count);
+		debug("%s - bus# = %x\n", __func__, ptr->busno);
+		debug("%s - current_speed = %x\n", __func__, ptr->current_speed);
+		debug("%s - controller_id = %x\n", __func__, ptr->controller_id);
+
+		debug("%s - slots_at_33_conv = %x\n", __func__, ptr->slots_at_33_conv);
+		debug("%s - slots_at_66_conv = %x\n", __func__, ptr->slots_at_66_conv);
+		debug("%s - slots_at_66_pcix = %x\n", __func__, ptr->slots_at_66_pcix);
+		debug("%s - slots_at_100_pcix = %x\n", __func__, ptr->slots_at_100_pcix);
+		debug("%s - slots_at_133_pcix = %x\n", __func__, ptr->slots_at_133_pcix);
+
+	}
+}
+
+static void print_lo_info(void)
+{
+	struct rio_detail *ptr;
+	debug("print_lo_info ----\n");
+	list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
+		debug("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
+		debug("%s - rio_type = %x\n", __func__, ptr->rio_type);
+		debug("%s - owner_id = %x\n", __func__, ptr->owner_id);
+		debug("%s - first_slot_num = %x\n", __func__, ptr->first_slot_num);
+		debug("%s - wpindex = %x\n", __func__, ptr->wpindex);
+		debug("%s - chassis_num = %x\n", __func__, ptr->chassis_num);
+
+	}
+}
+
+static void print_vg_info(void)
+{
+	struct rio_detail *ptr;
+	debug("%s ---\n", __func__);
+	list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
+		debug("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
+		debug("%s - rio_type = %x\n", __func__, ptr->rio_type);
+		debug("%s - owner_id = %x\n", __func__, ptr->owner_id);
+		debug("%s - first_slot_num = %x\n", __func__, ptr->first_slot_num);
+		debug("%s - wpindex = %x\n", __func__, ptr->wpindex);
+		debug("%s - chassis_num = %x\n", __func__, ptr->chassis_num);
+
+	}
+}
+
+static void __init print_ebda_pci_rsrc(void)
+{
+	struct ebda_pci_rsrc *ptr;
+
+	list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
+		debug("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
+			__func__, ptr->rsrc_type, ptr->bus_num, ptr->dev_fun, ptr->start_addr, ptr->end_addr);
+	}
+}
+
+static void __init print_ibm_slot(void)
+{
+	struct slot *ptr;
+
+	list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
+		debug("%s - slot_number: %x\n", __func__, ptr->number);
+	}
+}
+
+static void __init print_opt_vg(void)
+{
+	struct opt_rio *ptr;
+	debug("%s ---\n", __func__);
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
+		debug("%s - rio_type %x\n", __func__, ptr->rio_type);
+		debug("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
+		debug("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
+		debug("%s - middle_num: %x\n", __func__, ptr->middle_num);
+	}
+}
+
+static void __init print_ebda_hpc(void)
+{
+	struct controller *hpc_ptr;
+	u16 index;
+
+	list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
+		for (index = 0; index < hpc_ptr->slot_count; index++) {
+			debug("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
+			debug("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
+			debug("%s - index into ctlr addr: %x\n", __func__, hpc_ptr->slots[index].ctl_index);
+			debug("%s - cap of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_cap);
+		}
+
+		for (index = 0; index < hpc_ptr->bus_count; index++)
+			debug("%s - bus# of each bus controlled by this ctlr: %x\n", __func__, hpc_ptr->buses[index].bus_num);
+
+		debug("%s - type of hpc: %x\n", __func__, hpc_ptr->ctlr_type);
+		switch (hpc_ptr->ctlr_type) {
+		case 1:
+			debug("%s - bus: %x\n", __func__, hpc_ptr->u.pci_ctlr.bus);
+			debug("%s - dev_fun: %x\n", __func__, hpc_ptr->u.pci_ctlr.dev_fun);
+			debug("%s - irq: %x\n", __func__, hpc_ptr->irq);
+			break;
+
+		case 0:
+			debug("%s - io_start: %x\n", __func__, hpc_ptr->u.isa_ctlr.io_start);
+			debug("%s - io_end: %x\n", __func__, hpc_ptr->u.isa_ctlr.io_end);
+			debug("%s - irq: %x\n", __func__, hpc_ptr->irq);
+			break;
+
+		case 2:
+		case 4:
+			debug("%s - wpegbbar: %lx\n", __func__, hpc_ptr->u.wpeg_ctlr.wpegbbar);
+			debug("%s - i2c_addr: %x\n", __func__, hpc_ptr->u.wpeg_ctlr.i2c_addr);
+			debug("%s - irq: %x\n", __func__, hpc_ptr->irq);
+			break;
+		}
+	}
+}
+
+int __init ibmphp_access_ebda(void)
+{
+	u8 format, num_ctlrs, rio_complete, hs_complete, ebda_sz;
+	u16 ebda_seg, num_entries, next_offset, offset, blk_id, sub_addr, re, rc_id, re_id, base;
+	int rc = 0;
+
+
+	rio_complete = 0;
+	hs_complete = 0;
+
+	io_mem = ioremap((0x40 << 4) + 0x0e, 2);
+	if (!io_mem)
+		return -ENOMEM;
+	ebda_seg = readw(io_mem);
+	iounmap(io_mem);
+	debug("returned ebda segment: %x\n", ebda_seg);
+
+	io_mem = ioremap(ebda_seg<<4, 1);
+	if (!io_mem)
+		return -ENOMEM;
+	ebda_sz = readb(io_mem);
+	iounmap(io_mem);
+	debug("ebda size: %d(KiB)\n", ebda_sz);
+	if (ebda_sz == 0)
+		return -ENOMEM;
+
+	io_mem = ioremap(ebda_seg<<4, (ebda_sz * 1024));
+	if (!io_mem)
+		return -ENOMEM;
+	next_offset = 0x180;
+
+	for (;;) {
+		offset = next_offset;
+
+		/* Make sure what we read is still in the mapped section */
+		if (WARN(offset > (ebda_sz * 1024 - 4),
+			 "ibmphp_ebda: next read is beyond ebda_sz\n"))
+			break;
+
+		next_offset = readw(io_mem + offset);	/* offset of next blk */
+
+		offset += 2;
+		if (next_offset == 0)	/* 0 indicate it's last blk */
+			break;
+		blk_id = readw(io_mem + offset);	/* this blk id */
+
+		offset += 2;
+		/* check if it is hot swap block or rio block */
+		if (blk_id != 0x4853 && blk_id != 0x4752)
+			continue;
+		/* found hs table */
+		if (blk_id == 0x4853) {
+			debug("now enter hot swap block---\n");
+			debug("hot blk id: %x\n", blk_id);
+			format = readb(io_mem + offset);
+
+			offset += 1;
+			if (format != 4)
+				goto error_nodev;
+			debug("hot blk format: %x\n", format);
+			/* hot swap sub blk */
+			base = offset;
+
+			sub_addr = base;
+			re = readw(io_mem + sub_addr);	/* next sub blk */
+
+			sub_addr += 2;
+			rc_id = readw(io_mem + sub_addr);	/* sub blk id */
+
+			sub_addr += 2;
+			if (rc_id != 0x5243)
+				goto error_nodev;
+			/* rc sub blk signature  */
+			num_ctlrs = readb(io_mem + sub_addr);
+
+			sub_addr += 1;
+			hpc_list_ptr = alloc_ebda_hpc_list();
+			if (!hpc_list_ptr) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			hpc_list_ptr->format = format;
+			hpc_list_ptr->num_ctlrs = num_ctlrs;
+			hpc_list_ptr->phys_addr = sub_addr;	/*  offset of RSRC_CONTROLLER blk */
+			debug("info about hpc descriptor---\n");
+			debug("hot blk format: %x\n", format);
+			debug("num of controller: %x\n", num_ctlrs);
+			debug("offset of hpc data structure entries: %x\n ", sub_addr);
+
+			sub_addr = base + re;	/* re sub blk */
+			/* FIXME: rc is never used/checked */
+			rc = readw(io_mem + sub_addr);	/* next sub blk */
+
+			sub_addr += 2;
+			re_id = readw(io_mem + sub_addr);	/* sub blk id */
+
+			sub_addr += 2;
+			if (re_id != 0x5245)
+				goto error_nodev;
+
+			/* signature of re */
+			num_entries = readw(io_mem + sub_addr);
+
+			sub_addr += 2;	/* offset of RSRC_ENTRIES blk */
+			rsrc_list_ptr = alloc_ebda_rsrc_list();
+			if (!rsrc_list_ptr) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			rsrc_list_ptr->format = format;
+			rsrc_list_ptr->num_entries = num_entries;
+			rsrc_list_ptr->phys_addr = sub_addr;
+
+			debug("info about rsrc descriptor---\n");
+			debug("format: %x\n", format);
+			debug("num of rsrc: %x\n", num_entries);
+			debug("offset of rsrc data structure entries: %x\n ", sub_addr);
+
+			hs_complete = 1;
+		} else {
+		/* found rio table, blk_id == 0x4752 */
+			debug("now enter io table ---\n");
+			debug("rio blk id: %x\n", blk_id);
+
+			rio_table_ptr = kzalloc(sizeof(struct rio_table_hdr), GFP_KERNEL);
+			if (!rio_table_ptr) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			rio_table_ptr->ver_num = readb(io_mem + offset);
+			rio_table_ptr->scal_count = readb(io_mem + offset + 1);
+			rio_table_ptr->riodev_count = readb(io_mem + offset + 2);
+			rio_table_ptr->offset = offset + 3 ;
+
+			debug("info about rio table hdr ---\n");
+			debug("ver_num: %x\nscal_count: %x\nriodev_count: %x\noffset of rio table: %x\n ",
+				rio_table_ptr->ver_num, rio_table_ptr->scal_count,
+				rio_table_ptr->riodev_count, rio_table_ptr->offset);
+
+			rio_complete = 1;
+		}
+	}
+
+	if (!hs_complete && !rio_complete)
+		goto error_nodev;
+
+	if (rio_table_ptr) {
+		if (rio_complete && rio_table_ptr->ver_num == 3) {
+			rc = ebda_rio_table();
+			if (rc)
+				goto out;
+		}
+	}
+	rc = ebda_rsrc_controller();
+	if (rc)
+		goto out;
+
+	rc = ebda_rsrc_rsrc();
+	goto out;
+error_nodev:
+	rc = -ENODEV;
+out:
+	iounmap(io_mem);
+	return rc;
+}
+
+/*
+ * map info of scalability details and rio details from physical address
+ */
+static int __init ebda_rio_table(void)
+{
+	u16 offset;
+	u8 i;
+	struct rio_detail *rio_detail_ptr;
+
+	offset = rio_table_ptr->offset;
+	offset += 12 * rio_table_ptr->scal_count;
+
+	// we do concern about rio details
+	for (i = 0; i < rio_table_ptr->riodev_count; i++) {
+		rio_detail_ptr = kzalloc(sizeof(struct rio_detail), GFP_KERNEL);
+		if (!rio_detail_ptr)
+			return -ENOMEM;
+		rio_detail_ptr->rio_node_id = readb(io_mem + offset);
+		rio_detail_ptr->bbar = readl(io_mem + offset + 1);
+		rio_detail_ptr->rio_type = readb(io_mem + offset + 5);
+		rio_detail_ptr->owner_id = readb(io_mem + offset + 6);
+		rio_detail_ptr->port0_node_connect = readb(io_mem + offset + 7);
+		rio_detail_ptr->port0_port_connect = readb(io_mem + offset + 8);
+		rio_detail_ptr->port1_node_connect = readb(io_mem + offset + 9);
+		rio_detail_ptr->port1_port_connect = readb(io_mem + offset + 10);
+		rio_detail_ptr->first_slot_num = readb(io_mem + offset + 11);
+		rio_detail_ptr->status = readb(io_mem + offset + 12);
+		rio_detail_ptr->wpindex = readb(io_mem + offset + 13);
+		rio_detail_ptr->chassis_num = readb(io_mem + offset + 14);
+//		debug("rio_node_id: %x\nbbar: %x\nrio_type: %x\nowner_id: %x\nport0_node: %x\nport0_port: %x\nport1_node: %x\nport1_port: %x\nfirst_slot_num: %x\nstatus: %x\n", rio_detail_ptr->rio_node_id, rio_detail_ptr->bbar, rio_detail_ptr->rio_type, rio_detail_ptr->owner_id, rio_detail_ptr->port0_node_connect, rio_detail_ptr->port0_port_connect, rio_detail_ptr->port1_node_connect, rio_detail_ptr->port1_port_connect, rio_detail_ptr->first_slot_num, rio_detail_ptr->status);
+		//create linked list of chassis
+		if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5)
+			list_add(&rio_detail_ptr->rio_detail_list, &rio_vg_head);
+		//create linked list of expansion box
+		else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7)
+			list_add(&rio_detail_ptr->rio_detail_list, &rio_lo_head);
+		else
+			// not in my concern
+			kfree(rio_detail_ptr);
+		offset += 15;
+	}
+	print_lo_info();
+	print_vg_info();
+	return 0;
+}
+
+/*
+ * reorganizing linked list of chassis
+ */
+static struct opt_rio *search_opt_vg(u8 chassis_num)
+{
+	struct opt_rio *ptr;
+	list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
+		if (ptr->chassis_num == chassis_num)
+			return ptr;
+	}
+	return NULL;
+}
+
+static int __init combine_wpg_for_chassis(void)
+{
+	struct opt_rio *opt_rio_ptr = NULL;
+	struct rio_detail *rio_detail_ptr = NULL;
+
+	list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
+		opt_rio_ptr = search_opt_vg(rio_detail_ptr->chassis_num);
+		if (!opt_rio_ptr) {
+			opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
+			if (!opt_rio_ptr)
+				return -ENOMEM;
+			opt_rio_ptr->rio_type = rio_detail_ptr->rio_type;
+			opt_rio_ptr->chassis_num = rio_detail_ptr->chassis_num;
+			opt_rio_ptr->first_slot_num = rio_detail_ptr->first_slot_num;
+			opt_rio_ptr->middle_num = rio_detail_ptr->first_slot_num;
+			list_add(&opt_rio_ptr->opt_rio_list, &opt_vg_head);
+		} else {
+			opt_rio_ptr->first_slot_num = min(opt_rio_ptr->first_slot_num, rio_detail_ptr->first_slot_num);
+			opt_rio_ptr->middle_num = max(opt_rio_ptr->middle_num, rio_detail_ptr->first_slot_num);
+		}
+	}
+	print_opt_vg();
+	return 0;
+}
+
+/*
+ * reorganizing linked list of expansion box
+ */
+static struct opt_rio_lo *search_opt_lo(u8 chassis_num)
+{
+	struct opt_rio_lo *ptr;
+	list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
+		if (ptr->chassis_num == chassis_num)
+			return ptr;
+	}
+	return NULL;
+}
+
+static int combine_wpg_for_expansion(void)
+{
+	struct opt_rio_lo *opt_rio_lo_ptr = NULL;
+	struct rio_detail *rio_detail_ptr = NULL;
+
+	list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
+		opt_rio_lo_ptr = search_opt_lo(rio_detail_ptr->chassis_num);
+		if (!opt_rio_lo_ptr) {
+			opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
+			if (!opt_rio_lo_ptr)
+				return -ENOMEM;
+			opt_rio_lo_ptr->rio_type = rio_detail_ptr->rio_type;
+			opt_rio_lo_ptr->chassis_num = rio_detail_ptr->chassis_num;
+			opt_rio_lo_ptr->first_slot_num = rio_detail_ptr->first_slot_num;
+			opt_rio_lo_ptr->middle_num = rio_detail_ptr->first_slot_num;
+			opt_rio_lo_ptr->pack_count = 1;
+
+			list_add(&opt_rio_lo_ptr->opt_rio_lo_list, &opt_lo_head);
+		} else {
+			opt_rio_lo_ptr->first_slot_num = min(opt_rio_lo_ptr->first_slot_num, rio_detail_ptr->first_slot_num);
+			opt_rio_lo_ptr->middle_num = max(opt_rio_lo_ptr->middle_num, rio_detail_ptr->first_slot_num);
+			opt_rio_lo_ptr->pack_count = 2;
+		}
+	}
+	return 0;
+}
+
+
+/* Since we don't know the max slot number per each chassis, hence go
+ * through the list of all chassis to find out the range
+ * Arguments: slot_num, 1st slot number of the chassis we think we are on,
+ * var (0 = chassis, 1 = expansion box)
+ */
+static int first_slot_num(u8 slot_num, u8 first_slot, u8 var)
+{
+	struct opt_rio *opt_vg_ptr = NULL;
+	struct opt_rio_lo *opt_lo_ptr = NULL;
+	int rc = 0;
+
+	if (!var) {
+		list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+			if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) {
+				rc = -ENODEV;
+				break;
+			}
+		}
+	} else {
+		list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+			if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
+				rc = -ENODEV;
+				break;
+			}
+		}
+	}
+	return rc;
+}
+
+static struct opt_rio_lo *find_rxe_num(u8 slot_num)
+{
+	struct opt_rio_lo *opt_lo_ptr;
+
+	list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
+		//check to see if this slot_num belongs to expansion box
+		if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num(slot_num, opt_lo_ptr->first_slot_num, 1)))
+			return opt_lo_ptr;
+	}
+	return NULL;
+}
+
+static struct opt_rio *find_chassis_num(u8 slot_num)
+{
+	struct opt_rio *opt_vg_ptr;
+
+	list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
+		//check to see if this slot_num belongs to chassis
+		if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num(slot_num, opt_vg_ptr->first_slot_num, 0)))
+			return opt_vg_ptr;
+	}
+	return NULL;
+}
+
+/* This routine will find out how many slots are in the chassis, so that
+ * the slot numbers for rxe100 would start from 1, and not from 7, or 6 etc
+ */
+static u8 calculate_first_slot(u8 slot_num)
+{
+	u8 first_slot = 1;
+	struct slot *slot_cur;
+
+	list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
+		if (slot_cur->ctrl) {
+			if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num))
+				first_slot = slot_cur->ctrl->ending_slot_num;
+		}
+	}
+	return first_slot + 1;
+
+}
+
+#define SLOT_NAME_SIZE 30
+
+static char *create_file_name(struct slot *slot_cur)
+{
+	struct opt_rio *opt_vg_ptr = NULL;
+	struct opt_rio_lo *opt_lo_ptr = NULL;
+	static char str[SLOT_NAME_SIZE];
+	int which = 0; /* rxe = 1, chassis = 0 */
+	u8 number = 1; /* either chassis or rxe # */
+	u8 first_slot = 1;
+	u8 slot_num;
+	u8 flag = 0;
+
+	if (!slot_cur) {
+		err("Structure passed is empty\n");
+		return NULL;
+	}
+
+	slot_num = slot_cur->number;
+
+	memset(str, 0, sizeof(str));
+
+	if (rio_table_ptr) {
+		if (rio_table_ptr->ver_num == 3) {
+			opt_vg_ptr = find_chassis_num(slot_num);
+			opt_lo_ptr = find_rxe_num(slot_num);
+		}
+	}
+	if (opt_vg_ptr) {
+		if (opt_lo_ptr) {
+			if ((slot_num - opt_vg_ptr->first_slot_num) > (slot_num - opt_lo_ptr->first_slot_num)) {
+				number = opt_lo_ptr->chassis_num;
+				first_slot = opt_lo_ptr->first_slot_num;
+				which = 1; /* it is RXE */
+			} else {
+				first_slot = opt_vg_ptr->first_slot_num;
+				number = opt_vg_ptr->chassis_num;
+				which = 0;
+			}
+		} else {
+			first_slot = opt_vg_ptr->first_slot_num;
+			number = opt_vg_ptr->chassis_num;
+			which = 0;
+		}
+		++flag;
+	} else if (opt_lo_ptr) {
+		number = opt_lo_ptr->chassis_num;
+		first_slot = opt_lo_ptr->first_slot_num;
+		which = 1;
+		++flag;
+	} else if (rio_table_ptr) {
+		if (rio_table_ptr->ver_num == 3) {
+			/* if both NULL and we DO have correct RIO table in BIOS */
+			return NULL;
+		}
+	}
+	if (!flag) {
+		if (slot_cur->ctrl->ctlr_type == 4) {
+			first_slot = calculate_first_slot(slot_num);
+			which = 1;
+		} else {
+			which = 0;
+		}
+	}
+
+	sprintf(str, "%s%dslot%d",
+		which == 0 ? "chassis" : "rxe",
+		number, slot_num - first_slot + 1);
+	return str;
+}
+
+static int fillslotinfo(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot;
+	int rc = 0;
+
+	slot = to_slot(hotplug_slot);
+	rc = ibmphp_hpc_readslot(slot, READ_ALLSTAT, NULL);
+	return rc;
+}
+
+static struct pci_driver ibmphp_driver;
+
+/*
+ * map info (ctlr-id, slot count, slot#.. bus count, bus#, ctlr type...) of
+ * each hpc from physical address to a list of hot plug controllers based on
+ * hpc descriptors.
+ */
+static int __init ebda_rsrc_controller(void)
+{
+	u16 addr, addr_slot, addr_bus;
+	u8 ctlr_id, temp, bus_index;
+	u16 ctlr, slot, bus;
+	u16 slot_num, bus_num, index;
+	struct controller *hpc_ptr;
+	struct ebda_hpc_bus *bus_ptr;
+	struct ebda_hpc_slot *slot_ptr;
+	struct bus_info *bus_info_ptr1, *bus_info_ptr2;
+	int rc;
+	struct slot *tmp_slot;
+	char name[SLOT_NAME_SIZE];
+
+	addr = hpc_list_ptr->phys_addr;
+	for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
+		bus_index = 1;
+		ctlr_id = readb(io_mem + addr);
+		addr += 1;
+		slot_num = readb(io_mem + addr);
+
+		addr += 1;
+		addr_slot = addr;	/* offset of slot structure */
+		addr += (slot_num * 4);
+
+		bus_num = readb(io_mem + addr);
+
+		addr += 1;
+		addr_bus = addr;	/* offset of bus */
+		addr += (bus_num * 9);	/* offset of ctlr_type */
+		temp = readb(io_mem + addr);
+
+		addr += 1;
+		/* init hpc structure */
+		hpc_ptr = alloc_ebda_hpc(slot_num, bus_num);
+		if (!hpc_ptr) {
+			return -ENOMEM;
+		}
+		hpc_ptr->ctlr_id = ctlr_id;
+		hpc_ptr->ctlr_relative_id = ctlr;
+		hpc_ptr->slot_count = slot_num;
+		hpc_ptr->bus_count = bus_num;
+		debug("now enter ctlr data structure ---\n");
+		debug("ctlr id: %x\n", ctlr_id);
+		debug("ctlr_relative_id: %x\n", hpc_ptr->ctlr_relative_id);
+		debug("count of slots controlled by this ctlr: %x\n", slot_num);
+		debug("count of buses controlled by this ctlr: %x\n", bus_num);
+
+		/* init slot structure, fetch slot, bus, cap... */
+		slot_ptr = hpc_ptr->slots;
+		for (slot = 0; slot < slot_num; slot++) {
+			slot_ptr->slot_num = readb(io_mem + addr_slot);
+			slot_ptr->slot_bus_num = readb(io_mem + addr_slot + slot_num);
+			slot_ptr->ctl_index = readb(io_mem + addr_slot + 2*slot_num);
+			slot_ptr->slot_cap = readb(io_mem + addr_slot + 3*slot_num);
+
+			// create bus_info lined list --- if only one slot per bus: slot_min = slot_max
+
+			bus_info_ptr2 = ibmphp_find_same_bus_num(slot_ptr->slot_bus_num);
+			if (!bus_info_ptr2) {
+				bus_info_ptr1 = kzalloc(sizeof(struct bus_info), GFP_KERNEL);
+				if (!bus_info_ptr1) {
+					rc = -ENOMEM;
+					goto error_no_slot;
+				}
+				bus_info_ptr1->slot_min = slot_ptr->slot_num;
+				bus_info_ptr1->slot_max = slot_ptr->slot_num;
+				bus_info_ptr1->slot_count += 1;
+				bus_info_ptr1->busno = slot_ptr->slot_bus_num;
+				bus_info_ptr1->index = bus_index++;
+				bus_info_ptr1->current_speed = 0xff;
+				bus_info_ptr1->current_bus_mode = 0xff;
+
+				bus_info_ptr1->controller_id = hpc_ptr->ctlr_id;
+
+				list_add_tail(&bus_info_ptr1->bus_info_list, &bus_info_head);
+
+			} else {
+				bus_info_ptr2->slot_min = min(bus_info_ptr2->slot_min, slot_ptr->slot_num);
+				bus_info_ptr2->slot_max = max(bus_info_ptr2->slot_max, slot_ptr->slot_num);
+				bus_info_ptr2->slot_count += 1;
+
+			}
+
+			// end of creating the bus_info linked list
+
+			slot_ptr++;
+			addr_slot += 1;
+		}
+
+		/* init bus structure */
+		bus_ptr = hpc_ptr->buses;
+		for (bus = 0; bus < bus_num; bus++) {
+			bus_ptr->bus_num = readb(io_mem + addr_bus + bus);
+			bus_ptr->slots_at_33_conv = readb(io_mem + addr_bus + bus_num + 8 * bus);
+			bus_ptr->slots_at_66_conv = readb(io_mem + addr_bus + bus_num + 8 * bus + 1);
+
+			bus_ptr->slots_at_66_pcix = readb(io_mem + addr_bus + bus_num + 8 * bus + 2);
+
+			bus_ptr->slots_at_100_pcix = readb(io_mem + addr_bus + bus_num + 8 * bus + 3);
+
+			bus_ptr->slots_at_133_pcix = readb(io_mem + addr_bus + bus_num + 8 * bus + 4);
+
+			bus_info_ptr2 = ibmphp_find_same_bus_num(bus_ptr->bus_num);
+			if (bus_info_ptr2) {
+				bus_info_ptr2->slots_at_33_conv = bus_ptr->slots_at_33_conv;
+				bus_info_ptr2->slots_at_66_conv = bus_ptr->slots_at_66_conv;
+				bus_info_ptr2->slots_at_66_pcix = bus_ptr->slots_at_66_pcix;
+				bus_info_ptr2->slots_at_100_pcix = bus_ptr->slots_at_100_pcix;
+				bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix;
+			}
+			bus_ptr++;
+		}
+
+		hpc_ptr->ctlr_type = temp;
+
+		switch (hpc_ptr->ctlr_type) {
+			case 1:
+				hpc_ptr->u.pci_ctlr.bus = readb(io_mem + addr);
+				hpc_ptr->u.pci_ctlr.dev_fun = readb(io_mem + addr + 1);
+				hpc_ptr->irq = readb(io_mem + addr + 2);
+				addr += 3;
+				debug("ctrl bus = %x, ctlr devfun = %x, irq = %x\n",
+					hpc_ptr->u.pci_ctlr.bus,
+					hpc_ptr->u.pci_ctlr.dev_fun, hpc_ptr->irq);
+				break;
+
+			case 0:
+				hpc_ptr->u.isa_ctlr.io_start = readw(io_mem + addr);
+				hpc_ptr->u.isa_ctlr.io_end = readw(io_mem + addr + 2);
+				if (!request_region(hpc_ptr->u.isa_ctlr.io_start,
+						     (hpc_ptr->u.isa_ctlr.io_end - hpc_ptr->u.isa_ctlr.io_start + 1),
+						     "ibmphp")) {
+					rc = -ENODEV;
+					goto error_no_slot;
+				}
+				hpc_ptr->irq = readb(io_mem + addr + 4);
+				addr += 5;
+				break;
+
+			case 2:
+			case 4:
+				hpc_ptr->u.wpeg_ctlr.wpegbbar = readl(io_mem + addr);
+				hpc_ptr->u.wpeg_ctlr.i2c_addr = readb(io_mem + addr + 4);
+				hpc_ptr->irq = readb(io_mem + addr + 5);
+				addr += 6;
+				break;
+			default:
+				rc = -ENODEV;
+				goto error_no_slot;
+		}
+
+		//reorganize chassis' linked list
+		combine_wpg_for_chassis();
+		combine_wpg_for_expansion();
+		hpc_ptr->revision = 0xff;
+		hpc_ptr->options = 0xff;
+		hpc_ptr->starting_slot_num = hpc_ptr->slots[0].slot_num;
+		hpc_ptr->ending_slot_num = hpc_ptr->slots[slot_num-1].slot_num;
+
+		// register slots with hpc core as well as create linked list of ibm slot
+		for (index = 0; index < hpc_ptr->slot_count; index++) {
+			tmp_slot = kzalloc(sizeof(*tmp_slot), GFP_KERNEL);
+			if (!tmp_slot) {
+				rc = -ENOMEM;
+				goto error_no_slot;
+			}
+
+			tmp_slot->flag = 1;
+
+			tmp_slot->capabilities = hpc_ptr->slots[index].slot_cap;
+			if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_133_MAX) == EBDA_SLOT_133_MAX)
+				tmp_slot->supported_speed =  3;
+			else if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_100_MAX) == EBDA_SLOT_100_MAX)
+				tmp_slot->supported_speed =  2;
+			else if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_66_MAX) == EBDA_SLOT_66_MAX)
+				tmp_slot->supported_speed =  1;
+
+			if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_PCIX_CAP) == EBDA_SLOT_PCIX_CAP)
+				tmp_slot->supported_bus_mode = 1;
+			else
+				tmp_slot->supported_bus_mode = 0;
+
+
+			tmp_slot->bus = hpc_ptr->slots[index].slot_bus_num;
+
+			bus_info_ptr1 = ibmphp_find_same_bus_num(hpc_ptr->slots[index].slot_bus_num);
+			if (!bus_info_ptr1) {
+				rc = -ENODEV;
+				goto error;
+			}
+			tmp_slot->bus_on = bus_info_ptr1;
+			bus_info_ptr1 = NULL;
+			tmp_slot->ctrl = hpc_ptr;
+
+			tmp_slot->ctlr_index = hpc_ptr->slots[index].ctl_index;
+			tmp_slot->number = hpc_ptr->slots[index].slot_num;
+
+			rc = fillslotinfo(&tmp_slot->hotplug_slot);
+			if (rc)
+				goto error;
+
+			rc = ibmphp_init_devno(&tmp_slot);
+			if (rc)
+				goto error;
+			tmp_slot->hotplug_slot.ops = &ibmphp_hotplug_slot_ops;
+
+			// end of registering ibm slot with hotplug core
+
+			list_add(&tmp_slot->ibm_slot_list, &ibmphp_slot_head);
+		}
+
+		print_bus_info();
+		list_add(&hpc_ptr->ebda_hpc_list, &ebda_hpc_head);
+
+	}			/* each hpc  */
+
+	list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
+		snprintf(name, SLOT_NAME_SIZE, "%s", create_file_name(tmp_slot));
+		pci_hp_register(&tmp_slot->hotplug_slot,
+			pci_find_bus(0, tmp_slot->bus), tmp_slot->device, name);
+	}
+
+	print_ebda_hpc();
+	print_ibm_slot();
+	return 0;
+
+error:
+	kfree(tmp_slot);
+error_no_slot:
+	free_ebda_hpc(hpc_ptr);
+	return rc;
+}
+
+/*
+ * map info (bus, devfun, start addr, end addr..) of i/o, memory,
+ * pfm from the physical addr to a list of resource.
+ */
+static int __init ebda_rsrc_rsrc(void)
+{
+	u16 addr;
+	short rsrc;
+	u8 type, rsrc_type;
+	struct ebda_pci_rsrc *rsrc_ptr;
+
+	addr = rsrc_list_ptr->phys_addr;
+	debug("now entering rsrc land\n");
+	debug("offset of rsrc: %x\n", rsrc_list_ptr->phys_addr);
+
+	for (rsrc = 0; rsrc < rsrc_list_ptr->num_entries; rsrc++) {
+		type = readb(io_mem + addr);
+
+		addr += 1;
+		rsrc_type = type & EBDA_RSRC_TYPE_MASK;
+
+		if (rsrc_type == EBDA_IO_RSRC_TYPE) {
+			rsrc_ptr = alloc_ebda_pci_rsrc();
+			if (!rsrc_ptr) {
+				iounmap(io_mem);
+				return -ENOMEM;
+			}
+			rsrc_ptr->rsrc_type = type;
+
+			rsrc_ptr->bus_num = readb(io_mem + addr);
+			rsrc_ptr->dev_fun = readb(io_mem + addr + 1);
+			rsrc_ptr->start_addr = readw(io_mem + addr + 2);
+			rsrc_ptr->end_addr = readw(io_mem + addr + 4);
+			addr += 6;
+
+			debug("rsrc from io type ----\n");
+			debug("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
+				rsrc_ptr->rsrc_type, rsrc_ptr->bus_num, rsrc_ptr->dev_fun, rsrc_ptr->start_addr, rsrc_ptr->end_addr);
+
+			list_add(&rsrc_ptr->ebda_pci_rsrc_list, &ibmphp_ebda_pci_rsrc_head);
+		}
+
+		if (rsrc_type == EBDA_MEM_RSRC_TYPE || rsrc_type == EBDA_PFM_RSRC_TYPE) {
+			rsrc_ptr = alloc_ebda_pci_rsrc();
+			if (!rsrc_ptr) {
+				iounmap(io_mem);
+				return -ENOMEM;
+			}
+			rsrc_ptr->rsrc_type = type;
+
+			rsrc_ptr->bus_num = readb(io_mem + addr);
+			rsrc_ptr->dev_fun = readb(io_mem + addr + 1);
+			rsrc_ptr->start_addr = readl(io_mem + addr + 2);
+			rsrc_ptr->end_addr = readl(io_mem + addr + 6);
+			addr += 10;
+
+			debug("rsrc from mem or pfm ---\n");
+			debug("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
+				rsrc_ptr->rsrc_type, rsrc_ptr->bus_num, rsrc_ptr->dev_fun, rsrc_ptr->start_addr, rsrc_ptr->end_addr);
+
+			list_add(&rsrc_ptr->ebda_pci_rsrc_list, &ibmphp_ebda_pci_rsrc_head);
+		}
+	}
+	kfree(rsrc_list_ptr);
+	rsrc_list_ptr = NULL;
+	print_ebda_pci_rsrc();
+	return 0;
+}
+
+u16 ibmphp_get_total_controllers(void)
+{
+	return hpc_list_ptr->num_ctlrs;
+}
+
+struct slot *ibmphp_get_slot_from_physical_num(u8 physical_num)
+{
+	struct slot *slot;
+
+	list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
+		if (slot->number == physical_num)
+			return slot;
+	}
+	return NULL;
+}
+
+/* To find:
+ *	- the smallest slot number
+ *	- the largest slot number
+ *	- the total number of the slots based on each bus
+ *	  (if only one slot per bus slot_min = slot_max )
+ */
+struct bus_info *ibmphp_find_same_bus_num(u32 num)
+{
+	struct bus_info *ptr;
+
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
+		if (ptr->busno == num)
+			 return ptr;
+	}
+	return NULL;
+}
+
+/*  Finding relative bus number, in order to map corresponding
+ *  bus register
+ */
+int ibmphp_get_bus_index(u8 num)
+{
+	struct bus_info *ptr;
+
+	list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
+		if (ptr->busno == num)
+			return ptr->index;
+	}
+	return -ENODEV;
+}
+
+void ibmphp_free_bus_info_queue(void)
+{
+	struct bus_info *bus_info, *next;
+
+	list_for_each_entry_safe(bus_info, next, &bus_info_head,
+				 bus_info_list) {
+		kfree (bus_info);
+	}
+}
+
+void ibmphp_free_ebda_hpc_queue(void)
+{
+	struct controller *controller = NULL, *next;
+	int pci_flag = 0;
+
+	list_for_each_entry_safe(controller, next, &ebda_hpc_head,
+				 ebda_hpc_list) {
+		if (controller->ctlr_type == 0)
+			release_region(controller->u.isa_ctlr.io_start, (controller->u.isa_ctlr.io_end - controller->u.isa_ctlr.io_start + 1));
+		else if ((controller->ctlr_type == 1) && (!pci_flag)) {
+			++pci_flag;
+			pci_unregister_driver(&ibmphp_driver);
+		}
+		free_ebda_hpc(controller);
+	}
+}
+
+void ibmphp_free_ebda_pci_rsrc_queue(void)
+{
+	struct ebda_pci_rsrc *resource, *next;
+
+	list_for_each_entry_safe(resource, next, &ibmphp_ebda_pci_rsrc_head,
+				 ebda_pci_rsrc_list) {
+		kfree (resource);
+		resource = NULL;
+	}
+}
+
+static const struct pci_device_id id_table[] = {
+	{
+		.vendor		= PCI_VENDOR_ID_IBM,
+		.device		= HPC_DEVICE_ID,
+		.subvendor	= PCI_VENDOR_ID_IBM,
+		.subdevice	= HPC_SUBSYSTEM_ID,
+		.class		= ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00),
+	}, {}
+};
+
+MODULE_DEVICE_TABLE(pci, id_table);
+
+static int ibmphp_probe(struct pci_dev *, const struct pci_device_id *);
+static struct pci_driver ibmphp_driver = {
+	.name		= "ibmphp",
+	.id_table	= id_table,
+	.probe		= ibmphp_probe,
+};
+
+int ibmphp_register_pci(void)
+{
+	struct controller *ctrl;
+	int rc = 0;
+
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
+		if (ctrl->ctlr_type == 1) {
+			rc = pci_register_driver(&ibmphp_driver);
+			break;
+		}
+	}
+	return rc;
+}
+static int ibmphp_probe(struct pci_dev *dev, const struct pci_device_id *ids)
+{
+	struct controller *ctrl;
+
+	debug("inside ibmphp_probe\n");
+
+	list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
+		if (ctrl->ctlr_type == 1) {
+			if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
+				ctrl->ctrl_dev = dev;
+				debug("found device!!!\n");
+				debug("dev->device = %x, dev->subsystem_device = %x\n", dev->device, dev->subsystem_device);
+				return 0;
+			}
+		}
+	}
+	return -ENODEV;
+}
diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c
new file mode 100644
index 000000000..a5720d12e
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp_hpc.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Jyoti Shah, IBM Corporation
+ *
+ * Copyright (C) 2001-2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *                  <jshah@us.ibm.com>
+ *
+ */
+
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include "ibmphp.h"
+
+static int to_debug = 0;
+#define debug_polling(fmt, arg...)	do { if (to_debug) debug(fmt, arg); } while (0)
+
+//----------------------------------------------------------------------------
+// timeout values
+//----------------------------------------------------------------------------
+#define CMD_COMPLETE_TOUT_SEC	60	// give HPC 60 sec to finish cmd
+#define HPC_CTLR_WORKING_TOUT	60	// give HPC 60 sec to finish cmd
+#define HPC_GETACCESS_TIMEOUT	60	// seconds
+#define POLL_INTERVAL_SEC	2	// poll HPC every 2 seconds
+#define POLL_LATCH_CNT		5	// poll latch 5 times, then poll slots
+
+//----------------------------------------------------------------------------
+// Winnipeg Architected Register Offsets
+//----------------------------------------------------------------------------
+#define WPG_I2CMBUFL_OFFSET	0x08	// I2C Message Buffer Low
+#define WPG_I2CMOSUP_OFFSET	0x10	// I2C Master Operation Setup Reg
+#define WPG_I2CMCNTL_OFFSET	0x20	// I2C Master Control Register
+#define WPG_I2CPARM_OFFSET	0x40	// I2C Parameter Register
+#define WPG_I2CSTAT_OFFSET	0x70	// I2C Status Register
+
+//----------------------------------------------------------------------------
+// Winnipeg Store Type commands (Add this commands to the register offset)
+//----------------------------------------------------------------------------
+#define WPG_I2C_AND		0x1000	// I2C AND operation
+#define WPG_I2C_OR		0x2000	// I2C OR operation
+
+//----------------------------------------------------------------------------
+// Command set for I2C Master Operation Setup Register
+//----------------------------------------------------------------------------
+#define WPG_READATADDR_MASK	0x00010000	// read,bytes,I2C shifted,index
+#define WPG_WRITEATADDR_MASK	0x40010000	// write,bytes,I2C shifted,index
+#define WPG_READDIRECT_MASK	0x10010000
+#define WPG_WRITEDIRECT_MASK	0x60010000
+
+
+//----------------------------------------------------------------------------
+// bit masks for I2C Master Control Register
+//----------------------------------------------------------------------------
+#define WPG_I2CMCNTL_STARTOP_MASK	0x00000002	// Start the Operation
+
+//----------------------------------------------------------------------------
+//
+//----------------------------------------------------------------------------
+#define WPG_I2C_IOREMAP_SIZE	0x2044	// size of linear address interval
+
+//----------------------------------------------------------------------------
+// command index
+//----------------------------------------------------------------------------
+#define WPG_1ST_SLOT_INDEX	0x01	// index - 1st slot for ctlr
+#define WPG_CTLR_INDEX		0x0F	// index - ctlr
+#define WPG_1ST_EXTSLOT_INDEX	0x10	// index - 1st ext slot for ctlr
+#define WPG_1ST_BUS_INDEX	0x1F	// index - 1st bus for ctlr
+
+//----------------------------------------------------------------------------
+// macro utilities
+//----------------------------------------------------------------------------
+// if bits 20,22,25,26,27,29,30 are OFF return 1
+#define HPC_I2CSTATUS_CHECK(s)	((u8)((s & 0x00000A76) ? 0 : 1))
+
+//----------------------------------------------------------------------------
+// global variables
+//----------------------------------------------------------------------------
+static DEFINE_MUTEX(sem_hpcaccess);	// lock access to HPC
+static DEFINE_MUTEX(operations_mutex);	// lock all operations and
+					// access to data structures
+static DECLARE_COMPLETION(exit_complete); // make sure polling thread goes away
+static struct task_struct *ibmphp_poll_thread;
+//----------------------------------------------------------------------------
+// local function prototypes
+//----------------------------------------------------------------------------
+static u8 i2c_ctrl_read(struct controller *, void __iomem *, u8);
+static u8 i2c_ctrl_write(struct controller *, void __iomem *, u8, u8);
+static u8 hpc_writecmdtoindex(u8, u8);
+static u8 hpc_readcmdtoindex(u8, u8);
+static void get_hpc_access(void);
+static void free_hpc_access(void);
+static int poll_hpc(void *data);
+static int process_changeinstatus(struct slot *, struct slot *);
+static int process_changeinlatch(u8, u8, struct controller *);
+static int hpc_wait_ctlr_notworking(int, struct controller *, void __iomem *, u8 *);
+//----------------------------------------------------------------------------
+
+
+/*----------------------------------------------------------------------
+* Name:    i2c_ctrl_read
+*
+* Action:  read from HPC over I2C
+*
+*---------------------------------------------------------------------*/
+static u8 i2c_ctrl_read(struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 index)
+{
+	u8 status;
+	int i;
+	void __iomem *wpg_addr;	// base addr + offset
+	unsigned long wpg_data;	// data to/from WPG LOHI format
+	unsigned long ultemp;
+	unsigned long data;	// actual data HILO format
+
+	debug_polling("%s - Entry WPGBbar[%p] index[%x] \n", __func__, WPGBbar, index);
+
+	//--------------------------------------------------------------------
+	// READ - step 1
+	// read at address, byte length, I2C address (shifted), index
+	// or read direct, byte length, index
+	if (ctlr_ptr->ctlr_type == 0x02) {
+		data = WPG_READATADDR_MASK;
+		// fill in I2C address
+		ultemp = (unsigned long)ctlr_ptr->u.wpeg_ctlr.i2c_addr;
+		ultemp = ultemp >> 1;
+		data |= (ultemp << 8);
+
+		// fill in index
+		data |= (unsigned long)index;
+	} else if (ctlr_ptr->ctlr_type == 0x04) {
+		data = WPG_READDIRECT_MASK;
+
+		// fill in index
+		ultemp = (unsigned long)index;
+		ultemp = ultemp << 8;
+		data |= ultemp;
+	} else {
+		err("this controller type is not supported \n");
+		return HPC_ERROR;
+	}
+
+	wpg_data = swab32(data);	// swap data before writing
+	wpg_addr = WPGBbar + WPG_I2CMOSUP_OFFSET;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// READ - step 2 : clear the message buffer
+	data = 0x00000000;
+	wpg_data = swab32(data);
+	wpg_addr = WPGBbar + WPG_I2CMBUFL_OFFSET;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// READ - step 3 : issue start operation, I2C master control bit 30:ON
+	//                 2020 : [20] OR operation at [20] offset 0x20
+	data = WPG_I2CMCNTL_STARTOP_MASK;
+	wpg_data = swab32(data);
+	wpg_addr = WPGBbar + WPG_I2CMCNTL_OFFSET + WPG_I2C_OR;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// READ - step 4 : wait until start operation bit clears
+	i = CMD_COMPLETE_TOUT_SEC;
+	while (i) {
+		msleep(10);
+		wpg_addr = WPGBbar + WPG_I2CMCNTL_OFFSET;
+		wpg_data = readl(wpg_addr);
+		data = swab32(wpg_data);
+		if (!(data & WPG_I2CMCNTL_STARTOP_MASK))
+			break;
+		i--;
+	}
+	if (i == 0) {
+		debug("%s - Error : WPG timeout\n", __func__);
+		return HPC_ERROR;
+	}
+	//--------------------------------------------------------------------
+	// READ - step 5 : read I2C status register
+	i = CMD_COMPLETE_TOUT_SEC;
+	while (i) {
+		msleep(10);
+		wpg_addr = WPGBbar + WPG_I2CSTAT_OFFSET;
+		wpg_data = readl(wpg_addr);
+		data = swab32(wpg_data);
+		if (HPC_I2CSTATUS_CHECK(data))
+			break;
+		i--;
+	}
+	if (i == 0) {
+		debug("ctrl_read - Exit Error:I2C timeout\n");
+		return HPC_ERROR;
+	}
+
+	//--------------------------------------------------------------------
+	// READ - step 6 : get DATA
+	wpg_addr = WPGBbar + WPG_I2CMBUFL_OFFSET;
+	wpg_data = readl(wpg_addr);
+	data = swab32(wpg_data);
+
+	status = (u8) data;
+
+	debug_polling("%s - Exit index[%x] status[%x]\n", __func__, index, status);
+
+	return (status);
+}
+
+/*----------------------------------------------------------------------
+* Name:    i2c_ctrl_write
+*
+* Action:  write to HPC over I2C
+*
+* Return   0 or error codes
+*---------------------------------------------------------------------*/
+static u8 i2c_ctrl_write(struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 index, u8 cmd)
+{
+	u8 rc;
+	void __iomem *wpg_addr;	// base addr + offset
+	unsigned long wpg_data;	// data to/from WPG LOHI format
+	unsigned long ultemp;
+	unsigned long data;	// actual data HILO format
+	int i;
+
+	debug_polling("%s - Entry WPGBbar[%p] index[%x] cmd[%x]\n", __func__, WPGBbar, index, cmd);
+
+	rc = 0;
+	//--------------------------------------------------------------------
+	// WRITE - step 1
+	// write at address, byte length, I2C address (shifted), index
+	// or write direct, byte length, index
+	data = 0x00000000;
+
+	if (ctlr_ptr->ctlr_type == 0x02) {
+		data = WPG_WRITEATADDR_MASK;
+		// fill in I2C address
+		ultemp = (unsigned long)ctlr_ptr->u.wpeg_ctlr.i2c_addr;
+		ultemp = ultemp >> 1;
+		data |= (ultemp << 8);
+
+		// fill in index
+		data |= (unsigned long)index;
+	} else if (ctlr_ptr->ctlr_type == 0x04) {
+		data = WPG_WRITEDIRECT_MASK;
+
+		// fill in index
+		ultemp = (unsigned long)index;
+		ultemp = ultemp << 8;
+		data |= ultemp;
+	} else {
+		err("this controller type is not supported \n");
+		return HPC_ERROR;
+	}
+
+	wpg_data = swab32(data);	// swap data before writing
+	wpg_addr = WPGBbar + WPG_I2CMOSUP_OFFSET;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// WRITE - step 2 : clear the message buffer
+	data = 0x00000000 | (unsigned long)cmd;
+	wpg_data = swab32(data);
+	wpg_addr = WPGBbar + WPG_I2CMBUFL_OFFSET;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// WRITE - step 3 : issue start operation,I2C master control bit 30:ON
+	//                 2020 : [20] OR operation at [20] offset 0x20
+	data = WPG_I2CMCNTL_STARTOP_MASK;
+	wpg_data = swab32(data);
+	wpg_addr = WPGBbar + WPG_I2CMCNTL_OFFSET + WPG_I2C_OR;
+	writel(wpg_data, wpg_addr);
+
+	//--------------------------------------------------------------------
+	// WRITE - step 4 : wait until start operation bit clears
+	i = CMD_COMPLETE_TOUT_SEC;
+	while (i) {
+		msleep(10);
+		wpg_addr = WPGBbar + WPG_I2CMCNTL_OFFSET;
+		wpg_data = readl(wpg_addr);
+		data = swab32(wpg_data);
+		if (!(data & WPG_I2CMCNTL_STARTOP_MASK))
+			break;
+		i--;
+	}
+	if (i == 0) {
+		debug("%s - Exit Error:WPG timeout\n", __func__);
+		rc = HPC_ERROR;
+	}
+
+	//--------------------------------------------------------------------
+	// WRITE - step 5 : read I2C status register
+	i = CMD_COMPLETE_TOUT_SEC;
+	while (i) {
+		msleep(10);
+		wpg_addr = WPGBbar + WPG_I2CSTAT_OFFSET;
+		wpg_data = readl(wpg_addr);
+		data = swab32(wpg_data);
+		if (HPC_I2CSTATUS_CHECK(data))
+			break;
+		i--;
+	}
+	if (i == 0) {
+		debug("ctrl_read - Error : I2C timeout\n");
+		rc = HPC_ERROR;
+	}
+
+	debug_polling("%s Exit rc[%x]\n", __func__, rc);
+	return (rc);
+}
+
+//------------------------------------------------------------
+//  Read from ISA type HPC
+//------------------------------------------------------------
+static u8 isa_ctrl_read(struct controller *ctlr_ptr, u8 offset)
+{
+	u16 start_address;
+	u8 data;
+
+	start_address = ctlr_ptr->u.isa_ctlr.io_start;
+	data = inb(start_address + offset);
+	return data;
+}
+
+//--------------------------------------------------------------
+// Write to ISA type HPC
+//--------------------------------------------------------------
+static void isa_ctrl_write(struct controller *ctlr_ptr, u8 offset, u8 data)
+{
+	u16 start_address;
+	u16 port_address;
+
+	start_address = ctlr_ptr->u.isa_ctlr.io_start;
+	port_address = start_address + (u16) offset;
+	outb(data, port_address);
+}
+
+static u8 pci_ctrl_read(struct controller *ctrl, u8 offset)
+{
+	u8 data = 0x00;
+	debug("inside pci_ctrl_read\n");
+	if (ctrl->ctrl_dev)
+		pci_read_config_byte(ctrl->ctrl_dev, HPC_PCI_OFFSET + offset, &data);
+	return data;
+}
+
+static u8 pci_ctrl_write(struct controller *ctrl, u8 offset, u8 data)
+{
+	u8 rc = -ENODEV;
+	debug("inside pci_ctrl_write\n");
+	if (ctrl->ctrl_dev) {
+		pci_write_config_byte(ctrl->ctrl_dev, HPC_PCI_OFFSET + offset, data);
+		rc = 0;
+	}
+	return rc;
+}
+
+static u8 ctrl_read(struct controller *ctlr, void __iomem *base, u8 offset)
+{
+	u8 rc;
+	switch (ctlr->ctlr_type) {
+	case 0:
+		rc = isa_ctrl_read(ctlr, offset);
+		break;
+	case 1:
+		rc = pci_ctrl_read(ctlr, offset);
+		break;
+	case 2:
+	case 4:
+		rc = i2c_ctrl_read(ctlr, base, offset);
+		break;
+	default:
+		return -ENODEV;
+	}
+	return rc;
+}
+
+static u8 ctrl_write(struct controller *ctlr, void __iomem *base, u8 offset, u8 data)
+{
+	u8 rc = 0;
+	switch (ctlr->ctlr_type) {
+	case 0:
+		isa_ctrl_write(ctlr, offset, data);
+		break;
+	case 1:
+		rc = pci_ctrl_write(ctlr, offset, data);
+		break;
+	case 2:
+	case 4:
+		rc = i2c_ctrl_write(ctlr, base, offset, data);
+		break;
+	default:
+		return -ENODEV;
+	}
+	return rc;
+}
+/*----------------------------------------------------------------------
+* Name:    hpc_writecmdtoindex()
+*
+* Action:  convert a write command to proper index within a controller
+*
+* Return   index, HPC_ERROR
+*---------------------------------------------------------------------*/
+static u8 hpc_writecmdtoindex(u8 cmd, u8 index)
+{
+	u8 rc;
+
+	switch (cmd) {
+	case HPC_CTLR_ENABLEIRQ:	// 0x00.N.15
+	case HPC_CTLR_CLEARIRQ:	// 0x06.N.15
+	case HPC_CTLR_RESET:	// 0x07.N.15
+	case HPC_CTLR_IRQSTEER:	// 0x08.N.15
+	case HPC_CTLR_DISABLEIRQ:	// 0x01.N.15
+	case HPC_ALLSLOT_ON:	// 0x11.N.15
+	case HPC_ALLSLOT_OFF:	// 0x12.N.15
+		rc = 0x0F;
+		break;
+
+	case HPC_SLOT_OFF:	// 0x02.Y.0-14
+	case HPC_SLOT_ON:	// 0x03.Y.0-14
+	case HPC_SLOT_ATTNOFF:	// 0x04.N.0-14
+	case HPC_SLOT_ATTNON:	// 0x05.N.0-14
+	case HPC_SLOT_BLINKLED:	// 0x13.N.0-14
+		rc = index;
+		break;
+
+	case HPC_BUS_33CONVMODE:
+	case HPC_BUS_66CONVMODE:
+	case HPC_BUS_66PCIXMODE:
+	case HPC_BUS_100PCIXMODE:
+	case HPC_BUS_133PCIXMODE:
+		rc = index + WPG_1ST_BUS_INDEX - 1;
+		break;
+
+	default:
+		err("hpc_writecmdtoindex - Error invalid cmd[%x]\n", cmd);
+		rc = HPC_ERROR;
+	}
+
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    hpc_readcmdtoindex()
+*
+* Action:  convert a read command to proper index within a controller
+*
+* Return   index, HPC_ERROR
+*---------------------------------------------------------------------*/
+static u8 hpc_readcmdtoindex(u8 cmd, u8 index)
+{
+	u8 rc;
+
+	switch (cmd) {
+	case READ_CTLRSTATUS:
+		rc = 0x0F;
+		break;
+	case READ_SLOTSTATUS:
+	case READ_ALLSTAT:
+		rc = index;
+		break;
+	case READ_EXTSLOTSTATUS:
+		rc = index + WPG_1ST_EXTSLOT_INDEX;
+		break;
+	case READ_BUSSTATUS:
+		rc = index + WPG_1ST_BUS_INDEX - 1;
+		break;
+	case READ_SLOTLATCHLOWREG:
+		rc = 0x28;
+		break;
+	case READ_REVLEVEL:
+		rc = 0x25;
+		break;
+	case READ_HPCOPTIONS:
+		rc = 0x27;
+		break;
+	default:
+		rc = HPC_ERROR;
+	}
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    HPCreadslot()
+*
+* Action:  issue a READ command to HPC
+*
+* Input:   pslot   - cannot be NULL for READ_ALLSTAT
+*          pstatus - can be NULL for READ_ALLSTAT
+*
+* Return   0 or error codes
+*---------------------------------------------------------------------*/
+int ibmphp_hpc_readslot(struct slot *pslot, u8 cmd, u8 *pstatus)
+{
+	void __iomem *wpg_bbar = NULL;
+	struct controller *ctlr_ptr;
+	u8 index, status;
+	int rc = 0;
+	int busindex;
+
+	debug_polling("%s - Entry pslot[%p] cmd[%x] pstatus[%p]\n", __func__, pslot, cmd, pstatus);
+
+	if ((pslot == NULL)
+	    || ((pstatus == NULL) && (cmd != READ_ALLSTAT) && (cmd != READ_BUSSTATUS))) {
+		rc = -EINVAL;
+		err("%s - Error invalid pointer, rc[%d]\n", __func__, rc);
+		return rc;
+	}
+
+	if (cmd == READ_BUSSTATUS) {
+		busindex = ibmphp_get_bus_index(pslot->bus);
+		if (busindex < 0) {
+			rc = -EINVAL;
+			err("%s - Exit Error:invalid bus, rc[%d]\n", __func__, rc);
+			return rc;
+		} else
+			index = (u8) busindex;
+	} else
+		index = pslot->ctlr_index;
+
+	index = hpc_readcmdtoindex(cmd, index);
+
+	if (index == HPC_ERROR) {
+		rc = -EINVAL;
+		err("%s - Exit Error:invalid index, rc[%d]\n", __func__, rc);
+		return rc;
+	}
+
+	ctlr_ptr = pslot->ctrl;
+
+	get_hpc_access();
+
+	//--------------------------------------------------------------------
+	// map physical address to logical address
+	//--------------------------------------------------------------------
+	if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4))
+		wpg_bbar = ioremap(ctlr_ptr->u.wpeg_ctlr.wpegbbar, WPG_I2C_IOREMAP_SIZE);
+
+	//--------------------------------------------------------------------
+	// check controller status before reading
+	//--------------------------------------------------------------------
+	rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT, ctlr_ptr, wpg_bbar, &status);
+	if (!rc) {
+		switch (cmd) {
+		case READ_ALLSTAT:
+			// update the slot structure
+			pslot->ctrl->status = status;
+			pslot->status = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT, ctlr_ptr, wpg_bbar,
+						       &status);
+			if (!rc)
+				pslot->ext_status = ctrl_read(ctlr_ptr, wpg_bbar, index + WPG_1ST_EXTSLOT_INDEX);
+
+			break;
+
+		case READ_SLOTSTATUS:
+			// DO NOT update the slot structure
+			*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+
+		case READ_EXTSLOTSTATUS:
+			// DO NOT update the slot structure
+			*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+
+		case READ_CTLRSTATUS:
+			// DO NOT update the slot structure
+			*pstatus = status;
+			break;
+
+		case READ_BUSSTATUS:
+			pslot->busstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+		case READ_REVLEVEL:
+			*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+		case READ_HPCOPTIONS:
+			*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+		case READ_SLOTLATCHLOWREG:
+			// DO NOT update the slot structure
+			*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, index);
+			break;
+
+			// Not used
+		case READ_ALLSLOT:
+			list_for_each_entry(pslot, &ibmphp_slot_head,
+					    ibm_slot_list) {
+				index = pslot->ctlr_index;
+				rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT, ctlr_ptr,
+								wpg_bbar, &status);
+				if (!rc) {
+					pslot->status = ctrl_read(ctlr_ptr, wpg_bbar, index);
+					rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT,
+									ctlr_ptr, wpg_bbar, &status);
+					if (!rc)
+						pslot->ext_status =
+						    ctrl_read(ctlr_ptr, wpg_bbar,
+								index + WPG_1ST_EXTSLOT_INDEX);
+				} else {
+					err("%s - Error ctrl_read failed\n", __func__);
+					rc = -EINVAL;
+					break;
+				}
+			}
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+	}
+	//--------------------------------------------------------------------
+	// cleanup
+	//--------------------------------------------------------------------
+
+	// remove physical to logical address mapping
+	if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4))
+		iounmap(wpg_bbar);
+
+	free_hpc_access();
+
+	debug_polling("%s - Exit rc[%d]\n", __func__, rc);
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    ibmphp_hpc_writeslot()
+*
+* Action: issue a WRITE command to HPC
+*---------------------------------------------------------------------*/
+int ibmphp_hpc_writeslot(struct slot *pslot, u8 cmd)
+{
+	void __iomem *wpg_bbar = NULL;
+	struct controller *ctlr_ptr;
+	u8 index, status;
+	int busindex;
+	u8 done;
+	int rc = 0;
+	int timeout;
+
+	debug_polling("%s - Entry pslot[%p] cmd[%x]\n", __func__, pslot, cmd);
+	if (pslot == NULL) {
+		rc = -EINVAL;
+		err("%s - Error Exit rc[%d]\n", __func__, rc);
+		return rc;
+	}
+
+	if ((cmd == HPC_BUS_33CONVMODE) || (cmd == HPC_BUS_66CONVMODE) ||
+		(cmd == HPC_BUS_66PCIXMODE) || (cmd == HPC_BUS_100PCIXMODE) ||
+		(cmd == HPC_BUS_133PCIXMODE)) {
+		busindex = ibmphp_get_bus_index(pslot->bus);
+		if (busindex < 0) {
+			rc = -EINVAL;
+			err("%s - Exit Error:invalid bus, rc[%d]\n", __func__, rc);
+			return rc;
+		} else
+			index = (u8) busindex;
+	} else
+		index = pslot->ctlr_index;
+
+	index = hpc_writecmdtoindex(cmd, index);
+
+	if (index == HPC_ERROR) {
+		rc = -EINVAL;
+		err("%s - Error Exit rc[%d]\n", __func__, rc);
+		return rc;
+	}
+
+	ctlr_ptr = pslot->ctrl;
+
+	get_hpc_access();
+
+	//--------------------------------------------------------------------
+	// map physical address to logical address
+	//--------------------------------------------------------------------
+	if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4)) {
+		wpg_bbar = ioremap(ctlr_ptr->u.wpeg_ctlr.wpegbbar, WPG_I2C_IOREMAP_SIZE);
+
+		debug("%s - ctlr id[%x] physical[%lx] logical[%lx] i2c[%x]\n", __func__,
+		ctlr_ptr->ctlr_id, (ulong) (ctlr_ptr->u.wpeg_ctlr.wpegbbar), (ulong) wpg_bbar,
+		ctlr_ptr->u.wpeg_ctlr.i2c_addr);
+	}
+	//--------------------------------------------------------------------
+	// check controller status before writing
+	//--------------------------------------------------------------------
+	rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT, ctlr_ptr, wpg_bbar, &status);
+	if (!rc) {
+
+		ctrl_write(ctlr_ptr, wpg_bbar, index, cmd);
+
+		//--------------------------------------------------------------------
+		// check controller is still not working on the command
+		//--------------------------------------------------------------------
+		timeout = CMD_COMPLETE_TOUT_SEC;
+		done = 0;
+		while (!done) {
+			rc = hpc_wait_ctlr_notworking(HPC_CTLR_WORKING_TOUT, ctlr_ptr, wpg_bbar,
+							&status);
+			if (!rc) {
+				if (NEEDTOCHECK_CMDSTATUS(cmd)) {
+					if (CTLR_FINISHED(status) == HPC_CTLR_FINISHED_YES)
+						done = 1;
+				} else
+					done = 1;
+			}
+			if (!done) {
+				msleep(1000);
+				if (timeout < 1) {
+					done = 1;
+					err("%s - Error command complete timeout\n", __func__);
+					rc = -EFAULT;
+				} else
+					timeout--;
+			}
+		}
+		ctlr_ptr->status = status;
+	}
+	// cleanup
+
+	// remove physical to logical address mapping
+	if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4))
+		iounmap(wpg_bbar);
+	free_hpc_access();
+
+	debug_polling("%s - Exit rc[%d]\n", __func__, rc);
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    get_hpc_access()
+*
+* Action: make sure only one process can access HPC at one time
+*---------------------------------------------------------------------*/
+static void get_hpc_access(void)
+{
+	mutex_lock(&sem_hpcaccess);
+}
+
+/*----------------------------------------------------------------------
+* Name:    free_hpc_access()
+*---------------------------------------------------------------------*/
+void free_hpc_access(void)
+{
+	mutex_unlock(&sem_hpcaccess);
+}
+
+/*----------------------------------------------------------------------
+* Name:    ibmphp_lock_operations()
+*
+* Action: make sure only one process can change the data structure
+*---------------------------------------------------------------------*/
+void ibmphp_lock_operations(void)
+{
+	mutex_lock(&operations_mutex);
+	to_debug = 1;
+}
+
+/*----------------------------------------------------------------------
+* Name:    ibmphp_unlock_operations()
+*---------------------------------------------------------------------*/
+void ibmphp_unlock_operations(void)
+{
+	debug("%s - Entry\n", __func__);
+	mutex_unlock(&operations_mutex);
+	to_debug = 0;
+	debug("%s - Exit\n", __func__);
+}
+
+/*----------------------------------------------------------------------
+* Name:    poll_hpc()
+*---------------------------------------------------------------------*/
+#define POLL_LATCH_REGISTER	0
+#define POLL_SLOTS		1
+#define POLL_SLEEP		2
+static int poll_hpc(void *data)
+{
+	struct slot myslot;
+	struct slot *pslot = NULL;
+	int rc;
+	int poll_state = POLL_LATCH_REGISTER;
+	u8 oldlatchlow = 0x00;
+	u8 curlatchlow = 0x00;
+	int poll_count = 0;
+	u8 ctrl_count = 0x00;
+
+	debug("%s - Entry\n", __func__);
+
+	while (!kthread_should_stop()) {
+		/* try to get the lock to do some kind of hardware access */
+		mutex_lock(&operations_mutex);
+
+		switch (poll_state) {
+		case POLL_LATCH_REGISTER:
+			oldlatchlow = curlatchlow;
+			ctrl_count = 0x00;
+			list_for_each_entry(pslot, &ibmphp_slot_head,
+					    ibm_slot_list) {
+				if (ctrl_count >= ibmphp_get_total_controllers())
+					break;
+				if (pslot->ctrl->ctlr_relative_id == ctrl_count) {
+					ctrl_count++;
+					if (READ_SLOT_LATCH(pslot->ctrl)) {
+						rc = ibmphp_hpc_readslot(pslot,
+									  READ_SLOTLATCHLOWREG,
+									  &curlatchlow);
+						if (oldlatchlow != curlatchlow)
+							process_changeinlatch(oldlatchlow,
+									       curlatchlow,
+									       pslot->ctrl);
+					}
+				}
+			}
+			++poll_count;
+			poll_state = POLL_SLEEP;
+			break;
+		case POLL_SLOTS:
+			list_for_each_entry(pslot, &ibmphp_slot_head,
+					    ibm_slot_list) {
+				// make a copy of the old status
+				memcpy((void *) &myslot, (void *) pslot,
+					sizeof(struct slot));
+				rc = ibmphp_hpc_readslot(pslot, READ_ALLSTAT, NULL);
+				if ((myslot.status != pslot->status)
+				    || (myslot.ext_status != pslot->ext_status))
+					process_changeinstatus(pslot, &myslot);
+			}
+			ctrl_count = 0x00;
+			list_for_each_entry(pslot, &ibmphp_slot_head,
+					    ibm_slot_list) {
+				if (ctrl_count >= ibmphp_get_total_controllers())
+					break;
+				if (pslot->ctrl->ctlr_relative_id == ctrl_count) {
+					ctrl_count++;
+					if (READ_SLOT_LATCH(pslot->ctrl))
+						rc = ibmphp_hpc_readslot(pslot,
+									  READ_SLOTLATCHLOWREG,
+									  &curlatchlow);
+				}
+			}
+			++poll_count;
+			poll_state = POLL_SLEEP;
+			break;
+		case POLL_SLEEP:
+			/* don't sleep with a lock on the hardware */
+			mutex_unlock(&operations_mutex);
+			msleep(POLL_INTERVAL_SEC * 1000);
+
+			if (kthread_should_stop())
+				goto out_sleep;
+
+			mutex_lock(&operations_mutex);
+
+			if (poll_count >= POLL_LATCH_CNT) {
+				poll_count = 0;
+				poll_state = POLL_SLOTS;
+			} else
+				poll_state = POLL_LATCH_REGISTER;
+			break;
+		}
+		/* give up the hardware semaphore */
+		mutex_unlock(&operations_mutex);
+		/* sleep for a short time just for good measure */
+out_sleep:
+		msleep(100);
+	}
+	complete(&exit_complete);
+	debug("%s - Exit\n", __func__);
+	return 0;
+}
+
+
+/*----------------------------------------------------------------------
+* Name:    process_changeinstatus
+*
+* Action:  compare old and new slot status, process the change in status
+*
+* Input:   pointer to slot struct, old slot struct
+*
+* Return   0 or error codes
+* Value:
+*
+* Side
+* Effects: None.
+*
+* Notes:
+*---------------------------------------------------------------------*/
+static int process_changeinstatus(struct slot *pslot, struct slot *poldslot)
+{
+	u8 status;
+	int rc = 0;
+	u8 disable = 0;
+	u8 update = 0;
+
+	debug("process_changeinstatus - Entry pslot[%p], poldslot[%p]\n", pslot, poldslot);
+
+	// bit 0 - HPC_SLOT_POWER
+	if ((pslot->status & 0x01) != (poldslot->status & 0x01))
+		update = 1;
+
+	// bit 1 - HPC_SLOT_CONNECT
+	// ignore
+
+	// bit 2 - HPC_SLOT_ATTN
+	if ((pslot->status & 0x04) != (poldslot->status & 0x04))
+		update = 1;
+
+	// bit 3 - HPC_SLOT_PRSNT2
+	// bit 4 - HPC_SLOT_PRSNT1
+	if (((pslot->status & 0x08) != (poldslot->status & 0x08))
+		|| ((pslot->status & 0x10) != (poldslot->status & 0x10)))
+		update = 1;
+
+	// bit 5 - HPC_SLOT_PWRGD
+	if ((pslot->status & 0x20) != (poldslot->status & 0x20))
+		// OFF -> ON: ignore, ON -> OFF: disable slot
+		if ((poldslot->status & 0x20) && (SLOT_CONNECT(poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT(poldslot->status)))
+			disable = 1;
+
+	// bit 6 - HPC_SLOT_BUS_SPEED
+	// ignore
+
+	// bit 7 - HPC_SLOT_LATCH
+	if ((pslot->status & 0x80) != (poldslot->status & 0x80)) {
+		update = 1;
+		// OPEN -> CLOSE
+		if (pslot->status & 0x80) {
+			if (SLOT_PWRGD(pslot->status)) {
+				// power goes on and off after closing latch
+				// check again to make sure power is still ON
+				msleep(1000);
+				rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS, &status);
+				if (SLOT_PWRGD(status))
+					update = 1;
+				else	// overwrite power in pslot to OFF
+					pslot->status &= ~HPC_SLOT_POWER;
+			}
+		}
+		// CLOSE -> OPEN
+		else if ((SLOT_PWRGD(poldslot->status) == HPC_SLOT_PWRGD_GOOD)
+			&& (SLOT_CONNECT(poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT(poldslot->status))) {
+			disable = 1;
+		}
+		// else - ignore
+	}
+	// bit 4 - HPC_SLOT_BLINK_ATTN
+	if ((pslot->ext_status & 0x08) != (poldslot->ext_status & 0x08))
+		update = 1;
+
+	if (disable) {
+		debug("process_changeinstatus - disable slot\n");
+		pslot->flag = 0;
+		rc = ibmphp_do_disable_slot(pslot);
+	}
+
+	if (update || disable)
+		ibmphp_update_slot_info(pslot);
+
+	debug("%s - Exit rc[%d] disable[%x] update[%x]\n", __func__, rc, disable, update);
+
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    process_changeinlatch
+*
+* Action:  compare old and new latch reg status, process the change
+*
+* Input:   old and current latch register status
+*
+* Return   0 or error codes
+* Value:
+*---------------------------------------------------------------------*/
+static int process_changeinlatch(u8 old, u8 new, struct controller *ctrl)
+{
+	struct slot myslot, *pslot;
+	u8 i;
+	u8 mask;
+	int rc = 0;
+
+	debug("%s - Entry old[%x], new[%x]\n", __func__, old, new);
+	// bit 0 reserved, 0 is LSB, check bit 1-6 for 6 slots
+
+	for (i = ctrl->starting_slot_num; i <= ctrl->ending_slot_num; i++) {
+		mask = 0x01 << i;
+		if ((mask & old) != (mask & new)) {
+			pslot = ibmphp_get_slot_from_physical_num(i);
+			if (pslot) {
+				memcpy((void *) &myslot, (void *) pslot, sizeof(struct slot));
+				rc = ibmphp_hpc_readslot(pslot, READ_ALLSTAT, NULL);
+				debug("%s - call process_changeinstatus for slot[%d]\n", __func__, i);
+				process_changeinstatus(pslot, &myslot);
+			} else {
+				rc = -EINVAL;
+				err("%s - Error bad pointer for slot[%d]\n", __func__, i);
+			}
+		}
+	}
+	debug("%s - Exit rc[%d]\n", __func__, rc);
+	return rc;
+}
+
+/*----------------------------------------------------------------------
+* Name:    ibmphp_hpc_start_poll_thread
+*
+* Action:  start polling thread
+*---------------------------------------------------------------------*/
+int __init ibmphp_hpc_start_poll_thread(void)
+{
+	debug("%s - Entry\n", __func__);
+
+	ibmphp_poll_thread = kthread_run(poll_hpc, NULL, "hpc_poll");
+	if (IS_ERR(ibmphp_poll_thread)) {
+		err("%s - Error, thread not started\n", __func__);
+		return PTR_ERR(ibmphp_poll_thread);
+	}
+	return 0;
+}
+
+/*----------------------------------------------------------------------
+* Name:    ibmphp_hpc_stop_poll_thread
+*
+* Action:  stop polling thread and cleanup
+*---------------------------------------------------------------------*/
+void __exit ibmphp_hpc_stop_poll_thread(void)
+{
+	debug("%s - Entry\n", __func__);
+
+	kthread_stop(ibmphp_poll_thread);
+	debug("before locking operations\n");
+	ibmphp_lock_operations();
+	debug("after locking operations\n");
+
+	// wait for poll thread to exit
+	debug("before exit_complete down\n");
+	wait_for_completion(&exit_complete);
+	debug("after exit_completion down\n");
+
+	// cleanup
+	debug("before free_hpc_access\n");
+	free_hpc_access();
+	debug("after free_hpc_access\n");
+	ibmphp_unlock_operations();
+	debug("after unlock operations\n");
+
+	debug("%s - Exit\n", __func__);
+}
+
+/*----------------------------------------------------------------------
+* Name:    hpc_wait_ctlr_notworking
+*
+* Action:  wait until the controller is in a not working state
+*
+* Return   0, HPC_ERROR
+* Value:
+*---------------------------------------------------------------------*/
+static int hpc_wait_ctlr_notworking(int timeout, struct controller *ctlr_ptr, void __iomem *wpg_bbar,
+				    u8 *pstatus)
+{
+	int rc = 0;
+	u8 done = 0;
+
+	debug_polling("hpc_wait_ctlr_notworking - Entry timeout[%d]\n", timeout);
+
+	while (!done) {
+		*pstatus = ctrl_read(ctlr_ptr, wpg_bbar, WPG_CTLR_INDEX);
+		if (*pstatus == HPC_ERROR) {
+			rc = HPC_ERROR;
+			done = 1;
+		}
+		if (CTLR_WORKING(*pstatus) == HPC_CTLR_WORKING_NO)
+			done = 1;
+		if (!done) {
+			msleep(1000);
+			if (timeout < 1) {
+				done = 1;
+				err("HPCreadslot - Error ctlr timeout\n");
+				rc = HPC_ERROR;
+			} else
+				timeout--;
+		}
+	}
+	debug_polling("hpc_wait_ctlr_notworking - Exit rc[%x] status[%x]\n", rc, *pstatus);
+	return rc;
+}
diff --git a/drivers/pci/hotplug/ibmphp_pci.c b/drivers/pci/hotplug/ibmphp_pci.c
new file mode 100644
index 000000000..50038e5f9
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp_pci.c
@@ -0,0 +1,1689 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Irene Zubarev, IBM Corporation
+ *
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001,2002 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/list.h>
+#include "ibmphp.h"
+
+
+static int configure_device(struct pci_func *);
+static int configure_bridge(struct pci_func **, u8);
+static struct res_needed *scan_behind_bridge(struct pci_func *, u8);
+static int add_new_bus(struct bus_node *, struct resource_node *, struct resource_node *, struct resource_node *, u8);
+static u8 find_sec_number(u8 primary_busno, u8 slotno);
+
+/*
+ * NOTE..... If BIOS doesn't provide default routing, we assign:
+ * 9 for SCSI, 10 for LAN adapters, and 11 for everything else.
+ * If adapter is bridged, then we assign 11 to it and devices behind it.
+ * We also assign the same irq numbers for multi function devices.
+ * These are PIC mode, so shouldn't matter n.e.ways (hopefully)
+ */
+static void assign_alt_irq(struct pci_func *cur_func, u8 class_code)
+{
+	int j;
+	for (j = 0; j < 4; j++) {
+		if (cur_func->irq[j] == 0xff) {
+			switch (class_code) {
+				case PCI_BASE_CLASS_STORAGE:
+					cur_func->irq[j] = SCSI_IRQ;
+					break;
+				case PCI_BASE_CLASS_NETWORK:
+					cur_func->irq[j] = LAN_IRQ;
+					break;
+				default:
+					cur_func->irq[j] = OTHER_IRQ;
+					break;
+			}
+		}
+	}
+}
+
+/*
+ * Configures the device to be added (will allocate needed resources if it
+ * can), the device can be a bridge or a regular pci device, can also be
+ * multi-functional
+ *
+ * Input: function to be added
+ *
+ * TO DO:  The error case with Multifunction device or multi function bridge,
+ * if there is an error, will need to go through all previous functions and
+ * unconfigure....or can add some code into unconfigure_card....
+ */
+int ibmphp_configure_card(struct pci_func *func, u8 slotno)
+{
+	u16 vendor_id;
+	u32 class;
+	u8 class_code;
+	u8 hdr_type, device, sec_number;
+	u8 function;
+	struct pci_func *newfunc;	/* for multi devices */
+	struct pci_func *cur_func, *prev_func;
+	int rc, i, j;
+	int cleanup_count;
+	u8 flag;
+	u8 valid_device = 0x00; /* to see if we are able to read from card any device info at all */
+
+	debug("inside configure_card, func->busno = %x\n", func->busno);
+
+	device = func->device;
+	cur_func = func;
+
+	/* We only get bus and device from IRQ routing table.  So at this point,
+	 * func->busno is correct, and func->device contains only device (at the 5
+	 * highest bits)
+	 */
+
+	/* For every function on the card */
+	for (function = 0x00; function < 0x08; function++) {
+		unsigned int devfn = PCI_DEVFN(device, function);
+		ibmphp_pci_bus->number = cur_func->busno;
+
+		cur_func->function = function;
+
+		debug("inside the loop, cur_func->busno = %x, cur_func->device = %x, cur_func->function = %x\n",
+			cur_func->busno, cur_func->device, cur_func->function);
+
+		pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_VENDOR_ID, &vendor_id);
+
+		debug("vendor_id is %x\n", vendor_id);
+		if (vendor_id != PCI_VENDOR_ID_NOTVALID) {
+			/* found correct device!!! */
+			debug("found valid device, vendor_id = %x\n", vendor_id);
+
+			++valid_device;
+
+			/* header: x x x x x x x x
+			 *         | |___________|=> 1=PPB bridge, 0=normal device, 2=CardBus Bridge
+			 *         |_=> 0 = single function device, 1 = multi-function device
+			 */
+
+			pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_HEADER_TYPE, &hdr_type);
+			pci_bus_read_config_dword(ibmphp_pci_bus, devfn, PCI_CLASS_REVISION, &class);
+
+			class_code = class >> 24;
+			debug("hrd_type = %x, class = %x, class_code %x\n", hdr_type, class, class_code);
+			class >>= 8;	/* to take revision out, class = class.subclass.prog i/f */
+			if (class == PCI_CLASS_NOT_DEFINED_VGA) {
+				err("The device %x is VGA compatible and as is not supported for hot plugging. "
+				     "Please choose another device.\n", cur_func->device);
+				return -ENODEV;
+			} else if (class == PCI_CLASS_DISPLAY_VGA) {
+				err("The device %x is not supported for hot plugging. Please choose another device.\n",
+				     cur_func->device);
+				return -ENODEV;
+			}
+			switch (hdr_type) {
+				case PCI_HEADER_TYPE_NORMAL:
+					debug("single device case.... vendor id = %x, hdr_type = %x, class = %x\n", vendor_id, hdr_type, class);
+					assign_alt_irq(cur_func, class_code);
+					rc = configure_device(cur_func);
+					if (rc < 0) {
+						/* We need to do this in case some other BARs were properly inserted */
+						err("was not able to configure devfunc %x on bus %x.\n",
+						     cur_func->device, cur_func->busno);
+						cleanup_count = 6;
+						goto error;
+					}
+					cur_func->next = NULL;
+					function = 0x8;
+					break;
+				case PCI_HEADER_TYPE_MULTIDEVICE:
+					assign_alt_irq(cur_func, class_code);
+					rc = configure_device(cur_func);
+					if (rc < 0) {
+						/* We need to do this in case some other BARs were properly inserted */
+						err("was not able to configure devfunc %x on bus %x...bailing out\n",
+						     cur_func->device, cur_func->busno);
+						cleanup_count = 6;
+						goto error;
+					}
+					newfunc = kzalloc(sizeof(*newfunc), GFP_KERNEL);
+					if (!newfunc)
+						return -ENOMEM;
+
+					newfunc->busno = cur_func->busno;
+					newfunc->device = device;
+					cur_func->next = newfunc;
+					cur_func = newfunc;
+					for (j = 0; j < 4; j++)
+						newfunc->irq[j] = cur_func->irq[j];
+					break;
+				case PCI_HEADER_TYPE_MULTIBRIDGE:
+					class >>= 8;
+					if (class != PCI_CLASS_BRIDGE_PCI) {
+						err("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging.  Please insert another card.\n",
+						     cur_func->device);
+						return -ENODEV;
+					}
+					assign_alt_irq(cur_func, class_code);
+					rc = configure_bridge(&cur_func, slotno);
+					if (rc == -ENODEV) {
+						err("You chose to insert Single Bridge, or nested bridges, this is not supported...\n");
+						err("Bus %x, devfunc %x\n", cur_func->busno, cur_func->device);
+						return rc;
+					}
+					if (rc) {
+						/* We need to do this in case some other BARs were properly inserted */
+						err("was not able to hot-add PPB properly.\n");
+						func->bus = 1; /* To indicate to the unconfigure function that this is a PPB */
+						cleanup_count = 2;
+						goto error;
+					}
+
+					pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number);
+					flag = 0;
+					for (i = 0; i < 32; i++) {
+						if (func->devices[i]) {
+							newfunc = kzalloc(sizeof(*newfunc), GFP_KERNEL);
+							if (!newfunc)
+								return -ENOMEM;
+
+							newfunc->busno = sec_number;
+							newfunc->device = (u8) i;
+							for (j = 0; j < 4; j++)
+								newfunc->irq[j] = cur_func->irq[j];
+
+							if (flag) {
+								for (prev_func = cur_func; prev_func->next; prev_func = prev_func->next) ;
+								prev_func->next = newfunc;
+							} else
+								cur_func->next = newfunc;
+
+							rc = ibmphp_configure_card(newfunc, slotno);
+							/* This could only happen if kmalloc failed */
+							if (rc) {
+								/* We need to do this in case bridge itself got configured properly, but devices behind it failed */
+								func->bus = 1; /* To indicate to the unconfigure function that this is a PPB */
+								cleanup_count = 2;
+								goto error;
+							}
+							flag = 1;
+						}
+					}
+
+					newfunc = kzalloc(sizeof(*newfunc), GFP_KERNEL);
+					if (!newfunc)
+						return -ENOMEM;
+
+					newfunc->busno = cur_func->busno;
+					newfunc->device = device;
+					for (j = 0; j < 4; j++)
+						newfunc->irq[j] = cur_func->irq[j];
+					for (prev_func = cur_func; prev_func->next; prev_func = prev_func->next);
+					prev_func->next = newfunc;
+					cur_func = newfunc;
+					break;
+				case PCI_HEADER_TYPE_BRIDGE:
+					class >>= 8;
+					debug("class now is %x\n", class);
+					if (class != PCI_CLASS_BRIDGE_PCI) {
+						err("This %x is not PCI-to-PCI bridge, and as is not supported for hot-plugging.  Please insert another card.\n",
+						     cur_func->device);
+						return -ENODEV;
+					}
+
+					assign_alt_irq(cur_func, class_code);
+
+					debug("cur_func->busno b4 configure_bridge is %x\n", cur_func->busno);
+					rc = configure_bridge(&cur_func, slotno);
+					if (rc == -ENODEV) {
+						err("You chose to insert Single Bridge, or nested bridges, this is not supported...\n");
+						err("Bus %x, devfunc %x\n", cur_func->busno, cur_func->device);
+						return rc;
+					}
+					if (rc) {
+						/* We need to do this in case some other BARs were properly inserted */
+						func->bus = 1; /* To indicate to the unconfigure function that this is a PPB */
+						err("was not able to hot-add PPB properly.\n");
+						cleanup_count = 2;
+						goto error;
+					}
+					debug("cur_func->busno = %x, device = %x, function = %x\n",
+						cur_func->busno, device, function);
+					pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number);
+					debug("after configuring bridge..., sec_number = %x\n", sec_number);
+					flag = 0;
+					for (i = 0; i < 32; i++) {
+						if (func->devices[i]) {
+							debug("inside for loop, device is %x\n", i);
+							newfunc = kzalloc(sizeof(*newfunc), GFP_KERNEL);
+							if (!newfunc)
+								return -ENOMEM;
+
+							newfunc->busno = sec_number;
+							newfunc->device = (u8) i;
+							for (j = 0; j < 4; j++)
+								newfunc->irq[j] = cur_func->irq[j];
+
+							if (flag) {
+								for (prev_func = cur_func; prev_func->next; prev_func = prev_func->next);
+								prev_func->next = newfunc;
+							} else
+								cur_func->next = newfunc;
+
+							rc = ibmphp_configure_card(newfunc, slotno);
+
+							/* Again, this case should not happen... For complete paranoia, will need to call remove_bus */
+							if (rc) {
+								/* We need to do this in case some other BARs were properly inserted */
+								func->bus = 1; /* To indicate to the unconfigure function that this is a PPB */
+								cleanup_count = 2;
+								goto error;
+							}
+							flag = 1;
+						}
+					}
+
+					function = 0x8;
+					break;
+				default:
+					err("MAJOR PROBLEM!!!!, header type not supported? %x\n", hdr_type);
+					return -ENXIO;
+			}	/* end of switch */
+		}	/* end of valid device */
+	}	/* end of for */
+
+	if (!valid_device) {
+		err("Cannot find any valid devices on the card.  Or unable to read from card.\n");
+		return -ENODEV;
+	}
+
+	return 0;
+
+error:
+	for (i = 0; i < cleanup_count; i++) {
+		if (cur_func->io[i]) {
+			ibmphp_remove_resource(cur_func->io[i]);
+			cur_func->io[i] = NULL;
+		} else if (cur_func->pfmem[i]) {
+			ibmphp_remove_resource(cur_func->pfmem[i]);
+			cur_func->pfmem[i] = NULL;
+		} else if (cur_func->mem[i]) {
+			ibmphp_remove_resource(cur_func->mem[i]);
+			cur_func->mem[i] = NULL;
+		}
+	}
+	return rc;
+}
+
+/*
+ * This function configures the pci BARs of a single device.
+ * Input: pointer to the pci_func
+ * Output: configured PCI, 0, or error
+ */
+static int configure_device(struct pci_func *func)
+{
+	u32 bar[6];
+	static const u32 address[] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		PCI_BASE_ADDRESS_2,
+		PCI_BASE_ADDRESS_3,
+		PCI_BASE_ADDRESS_4,
+		PCI_BASE_ADDRESS_5,
+		0
+	};
+	u8 irq;
+	int count;
+	int len[6];
+	struct resource_node *io[6];
+	struct resource_node *mem[6];
+	struct resource_node *mem_tmp;
+	struct resource_node *pfmem[6];
+	unsigned int devfn;
+
+	debug("%s - inside\n", __func__);
+
+	devfn = PCI_DEVFN(func->device, func->function);
+	ibmphp_pci_bus->number = func->busno;
+
+	for (count = 0; address[count]; count++) {	/* for 6 BARs */
+
+		/* not sure if i need this.  per scott, said maybe need * something like this
+		   if devices don't adhere 100% to the spec, so don't want to write
+		   to the reserved bits
+
+		pcibios_read_config_byte(cur_func->busno, cur_func->device,
+		PCI_BASE_ADDRESS_0 + 4 * count, &tmp);
+		if (tmp & 0x01) // IO
+			pcibios_write_config_dword(cur_func->busno, cur_func->device,
+			PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFD);
+		else  // Memory
+			pcibios_write_config_dword(cur_func->busno, cur_func->device,
+			PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFF);
+		 */
+		pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
+		pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+
+		if (!bar[count])	/* This BAR is not implemented */
+			continue;
+
+		debug("Device %x BAR %d wants %x\n", func->device, count, bar[count]);
+
+		if (bar[count] & PCI_BASE_ADDRESS_SPACE_IO) {
+			/* This is IO */
+			debug("inside IO SPACE\n");
+
+			len[count] = bar[count] & 0xFFFFFFFC;
+			len[count] = ~len[count] + 1;
+
+			debug("len[count] in IO %x, count %d\n", len[count], count);
+
+			io[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+
+			if (!io[count])
+				return -ENOMEM;
+
+			io[count]->type = IO;
+			io[count]->busno = func->busno;
+			io[count]->devfunc = PCI_DEVFN(func->device, func->function);
+			io[count]->len = len[count];
+			if (ibmphp_check_resource(io[count], 0) == 0) {
+				ibmphp_add_resource(io[count]);
+				func->io[count] = io[count];
+			} else {
+				err("cannot allocate requested io for bus %x device %x function %x len %x\n",
+				     func->busno, func->device, func->function, len[count]);
+				kfree(io[count]);
+				return -EIO;
+			}
+			pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->io[count]->start);
+
+			/* _______________This is for debugging purposes only_____________________ */
+			debug("b4 writing, the IO address is %x\n", func->io[count]->start);
+			pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+			debug("after writing.... the start address is %x\n", bar[count]);
+			/* _________________________________________________________________________*/
+
+		} else {
+			/* This is Memory */
+			if (bar[count] & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+				/* pfmem */
+				debug("PFMEM SPACE\n");
+
+				len[count] = bar[count] & 0xFFFFFFF0;
+				len[count] = ~len[count] + 1;
+
+				debug("len[count] in PFMEM %x, count %d\n", len[count], count);
+
+				pfmem[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+				if (!pfmem[count])
+					return -ENOMEM;
+
+				pfmem[count]->type = PFMEM;
+				pfmem[count]->busno = func->busno;
+				pfmem[count]->devfunc = PCI_DEVFN(func->device,
+							func->function);
+				pfmem[count]->len = len[count];
+				pfmem[count]->fromMem = 0;
+				if (ibmphp_check_resource(pfmem[count], 0) == 0) {
+					ibmphp_add_resource(pfmem[count]);
+					func->pfmem[count] = pfmem[count];
+				} else {
+					mem_tmp = kzalloc(sizeof(*mem_tmp), GFP_KERNEL);
+					if (!mem_tmp) {
+						kfree(pfmem[count]);
+						return -ENOMEM;
+					}
+					mem_tmp->type = MEM;
+					mem_tmp->busno = pfmem[count]->busno;
+					mem_tmp->devfunc = pfmem[count]->devfunc;
+					mem_tmp->len = pfmem[count]->len;
+					debug("there's no pfmem... going into mem.\n");
+					if (ibmphp_check_resource(mem_tmp, 0) == 0) {
+						ibmphp_add_resource(mem_tmp);
+						pfmem[count]->fromMem = 1;
+						pfmem[count]->rangeno = mem_tmp->rangeno;
+						pfmem[count]->start = mem_tmp->start;
+						pfmem[count]->end = mem_tmp->end;
+						ibmphp_add_pfmem_from_mem(pfmem[count]);
+						func->pfmem[count] = pfmem[count];
+					} else {
+						err("cannot allocate requested pfmem for bus %x, device %x, len %x\n",
+						     func->busno, func->device, len[count]);
+						kfree(mem_tmp);
+						kfree(pfmem[count]);
+						return -EIO;
+					}
+				}
+
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->pfmem[count]->start);
+
+				/*_______________This is for debugging purposes only______________________________*/
+				debug("b4 writing, start address is %x\n", func->pfmem[count]->start);
+				pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+				debug("after writing, start address is %x\n", bar[count]);
+				/*_________________________________________________________________________________*/
+
+				if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64) {	/* takes up another dword */
+					debug("inside the mem 64 case, count %d\n", count);
+					count += 1;
+					/* on the 2nd dword, write all 0s, since we can't handle them n.e.ways */
+					pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0x00000000);
+				}
+			} else {
+				/* regular memory */
+				debug("REGULAR MEM SPACE\n");
+
+				len[count] = bar[count] & 0xFFFFFFF0;
+				len[count] = ~len[count] + 1;
+
+				debug("len[count] in Mem %x, count %d\n", len[count], count);
+
+				mem[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+				if (!mem[count])
+					return -ENOMEM;
+
+				mem[count]->type = MEM;
+				mem[count]->busno = func->busno;
+				mem[count]->devfunc = PCI_DEVFN(func->device,
+							func->function);
+				mem[count]->len = len[count];
+				if (ibmphp_check_resource(mem[count], 0) == 0) {
+					ibmphp_add_resource(mem[count]);
+					func->mem[count] = mem[count];
+				} else {
+					err("cannot allocate requested mem for bus %x, device %x, len %x\n",
+					     func->busno, func->device, len[count]);
+					kfree(mem[count]);
+					return -EIO;
+				}
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->mem[count]->start);
+				/* _______________________This is for debugging purposes only _______________________*/
+				debug("b4 writing, start address is %x\n", func->mem[count]->start);
+				pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+				debug("after writing, the address is %x\n", bar[count]);
+				/* __________________________________________________________________________________*/
+
+				if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+					/* takes up another dword */
+					debug("inside mem 64 case, reg. mem, count %d\n", count);
+					count += 1;
+					/* on the 2nd dword, write all 0s, since we can't handle them n.e.ways */
+					pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0x00000000);
+				}
+			}
+		}		/* end of mem */
+	}			/* end of for */
+
+	func->bus = 0;		/* To indicate that this is not a PPB */
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_INTERRUPT_PIN, &irq);
+	if ((irq > 0x00) && (irq < 0x05))
+		pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_INTERRUPT_LINE, func->irq[irq - 1]);
+
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_CACHE_LINE_SIZE, CACHE);
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_LATENCY_TIMER, LATENCY);
+
+	pci_bus_write_config_dword(ibmphp_pci_bus, devfn, PCI_ROM_ADDRESS, 0x00L);
+	pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_COMMAND, DEVICEENABLE);
+
+	return 0;
+}
+
+/******************************************************************************
+ * This routine configures a PCI-2-PCI bridge and the functions behind it
+ * Parameters: pci_func
+ * Returns:
+ ******************************************************************************/
+static int configure_bridge(struct pci_func **func_passed, u8 slotno)
+{
+	int count;
+	int i;
+	int rc;
+	u8 sec_number;
+	u8 io_base;
+	u16 pfmem_base;
+	u32 bar[2];
+	u32 len[2];
+	u8 flag_io = 0;
+	u8 flag_mem = 0;
+	u8 flag_pfmem = 0;
+	u8 need_io_upper = 0;
+	u8 need_pfmem_upper = 0;
+	struct res_needed *amount_needed = NULL;
+	struct resource_node *io = NULL;
+	struct resource_node *bus_io[2] = {NULL, NULL};
+	struct resource_node *mem = NULL;
+	struct resource_node *bus_mem[2] = {NULL, NULL};
+	struct resource_node *mem_tmp = NULL;
+	struct resource_node *pfmem = NULL;
+	struct resource_node *bus_pfmem[2] = {NULL, NULL};
+	struct bus_node *bus;
+	static const u32 address[] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		0
+	};
+	struct pci_func *func = *func_passed;
+	unsigned int devfn;
+	u8 irq;
+	int retval;
+
+	debug("%s - enter\n", __func__);
+
+	devfn = PCI_DEVFN(func->function, func->device);
+	ibmphp_pci_bus->number = func->busno;
+
+	/* Configuring necessary info for the bridge so that we could see the devices
+	 * behind it
+	 */
+
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_PRIMARY_BUS, func->busno);
+
+	/* _____________________For debugging purposes only __________________________
+	pci_bus_config_byte(ibmphp_pci_bus, devfn, PCI_PRIMARY_BUS, &pri_number);
+	debug("primary # written into the bridge is %x\n", pri_number);
+	 ___________________________________________________________________________*/
+
+	/* in EBDA, only get allocated 1 additional bus # per slot */
+	sec_number = find_sec_number(func->busno, slotno);
+	if (sec_number == 0xff) {
+		err("cannot allocate secondary bus number for the bridged device\n");
+		return -EINVAL;
+	}
+
+	debug("after find_sec_number, the number we got is %x\n", sec_number);
+	debug("AFTER FIND_SEC_NUMBER, func->busno IS %x\n", func->busno);
+
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, sec_number);
+
+	/* __________________For debugging purposes only __________________________________
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number);
+	debug("sec_number after write/read is %x\n", sec_number);
+	 ________________________________________________________________________________*/
+
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_SUBORDINATE_BUS, sec_number);
+
+	/* __________________For debugging purposes only ____________________________________
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SUBORDINATE_BUS, &sec_number);
+	debug("subordinate number after write/read is %x\n", sec_number);
+	 __________________________________________________________________________________*/
+
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_CACHE_LINE_SIZE, CACHE);
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_LATENCY_TIMER, LATENCY);
+	pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_SEC_LATENCY_TIMER, LATENCY);
+
+	debug("func->busno is %x\n", func->busno);
+	debug("sec_number after writing is %x\n", sec_number);
+
+
+	/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	   !!!!!!!!!!!!!!!NEED TO ADD!!!  FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!!
+	   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
+
+
+	/* First we need to allocate mem/io for the bridge itself in case it needs it */
+	for (count = 0; address[count]; count++) {	/* for 2 BARs */
+		pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
+		pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+
+		if (!bar[count]) {
+			/* This BAR is not implemented */
+			debug("so we come here then, eh?, count = %d\n", count);
+			continue;
+		}
+		//  tmp_bar = bar[count];
+
+		debug("Bar %d wants %x\n", count, bar[count]);
+
+		if (bar[count] & PCI_BASE_ADDRESS_SPACE_IO) {
+			/* This is IO */
+			len[count] = bar[count] & 0xFFFFFFFC;
+			len[count] = ~len[count] + 1;
+
+			debug("len[count] in IO = %x\n", len[count]);
+
+			bus_io[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+
+			if (!bus_io[count]) {
+				retval = -ENOMEM;
+				goto error;
+			}
+			bus_io[count]->type = IO;
+			bus_io[count]->busno = func->busno;
+			bus_io[count]->devfunc = PCI_DEVFN(func->device,
+							func->function);
+			bus_io[count]->len = len[count];
+			if (ibmphp_check_resource(bus_io[count], 0) == 0) {
+				ibmphp_add_resource(bus_io[count]);
+				func->io[count] = bus_io[count];
+			} else {
+				err("cannot allocate requested io for bus %x, device %x, len %x\n",
+				     func->busno, func->device, len[count]);
+				kfree(bus_io[count]);
+				return -EIO;
+			}
+
+			pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->io[count]->start);
+
+		} else {
+			/* This is Memory */
+			if (bar[count] & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+				/* pfmem */
+				len[count] = bar[count] & 0xFFFFFFF0;
+				len[count] = ~len[count] + 1;
+
+				debug("len[count] in PFMEM = %x\n", len[count]);
+
+				bus_pfmem[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+				if (!bus_pfmem[count]) {
+					retval = -ENOMEM;
+					goto error;
+				}
+				bus_pfmem[count]->type = PFMEM;
+				bus_pfmem[count]->busno = func->busno;
+				bus_pfmem[count]->devfunc = PCI_DEVFN(func->device,
+							func->function);
+				bus_pfmem[count]->len = len[count];
+				bus_pfmem[count]->fromMem = 0;
+				if (ibmphp_check_resource(bus_pfmem[count], 0) == 0) {
+					ibmphp_add_resource(bus_pfmem[count]);
+					func->pfmem[count] = bus_pfmem[count];
+				} else {
+					mem_tmp = kzalloc(sizeof(*mem_tmp), GFP_KERNEL);
+					if (!mem_tmp) {
+						retval = -ENOMEM;
+						goto error;
+					}
+					mem_tmp->type = MEM;
+					mem_tmp->busno = bus_pfmem[count]->busno;
+					mem_tmp->devfunc = bus_pfmem[count]->devfunc;
+					mem_tmp->len = bus_pfmem[count]->len;
+					if (ibmphp_check_resource(mem_tmp, 0) == 0) {
+						ibmphp_add_resource(mem_tmp);
+						bus_pfmem[count]->fromMem = 1;
+						bus_pfmem[count]->rangeno = mem_tmp->rangeno;
+						ibmphp_add_pfmem_from_mem(bus_pfmem[count]);
+						func->pfmem[count] = bus_pfmem[count];
+					} else {
+						err("cannot allocate requested pfmem for bus %x, device %x, len %x\n",
+						     func->busno, func->device, len[count]);
+						kfree(mem_tmp);
+						kfree(bus_pfmem[count]);
+						return -EIO;
+					}
+				}
+
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->pfmem[count]->start);
+
+				if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+					/* takes up another dword */
+					count += 1;
+					/* on the 2nd dword, write all 0s, since we can't handle them n.e.ways */
+					pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0x00000000);
+
+				}
+			} else {
+				/* regular memory */
+				len[count] = bar[count] & 0xFFFFFFF0;
+				len[count] = ~len[count] + 1;
+
+				debug("len[count] in Memory is %x\n", len[count]);
+
+				bus_mem[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+				if (!bus_mem[count]) {
+					retval = -ENOMEM;
+					goto error;
+				}
+				bus_mem[count]->type = MEM;
+				bus_mem[count]->busno = func->busno;
+				bus_mem[count]->devfunc = PCI_DEVFN(func->device,
+							func->function);
+				bus_mem[count]->len = len[count];
+				if (ibmphp_check_resource(bus_mem[count], 0) == 0) {
+					ibmphp_add_resource(bus_mem[count]);
+					func->mem[count] = bus_mem[count];
+				} else {
+					err("cannot allocate requested mem for bus %x, device %x, len %x\n",
+					     func->busno, func->device, len[count]);
+					kfree(bus_mem[count]);
+					return -EIO;
+				}
+
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], func->mem[count]->start);
+
+				if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+					/* takes up another dword */
+					count += 1;
+					/* on the 2nd dword, write all 0s, since we can't handle them n.e.ways */
+					pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0x00000000);
+
+				}
+			}
+		}		/* end of mem */
+	}			/* end of for  */
+
+	/* Now need to see how much space the devices behind the bridge needed */
+	amount_needed = scan_behind_bridge(func, sec_number);
+	if (amount_needed == NULL)
+		return -ENOMEM;
+
+	ibmphp_pci_bus->number = func->busno;
+	debug("after coming back from scan_behind_bridge\n");
+	debug("amount_needed->not_correct = %x\n", amount_needed->not_correct);
+	debug("amount_needed->io = %x\n", amount_needed->io);
+	debug("amount_needed->mem = %x\n", amount_needed->mem);
+	debug("amount_needed->pfmem =  %x\n", amount_needed->pfmem);
+
+	if (amount_needed->not_correct) {
+		debug("amount_needed is not correct\n");
+		for (count = 0; address[count]; count++) {
+			/* for 2 BARs */
+			if (bus_io[count]) {
+				ibmphp_remove_resource(bus_io[count]);
+				func->io[count] = NULL;
+			} else if (bus_pfmem[count]) {
+				ibmphp_remove_resource(bus_pfmem[count]);
+				func->pfmem[count] = NULL;
+			} else if (bus_mem[count]) {
+				ibmphp_remove_resource(bus_mem[count]);
+				func->mem[count] = NULL;
+			}
+		}
+		kfree(amount_needed);
+		return -ENODEV;
+	}
+
+	if (!amount_needed->io) {
+		debug("it doesn't want IO?\n");
+		flag_io = 1;
+	} else {
+		debug("it wants %x IO behind the bridge\n", amount_needed->io);
+		io = kzalloc(sizeof(*io), GFP_KERNEL);
+
+		if (!io) {
+			retval = -ENOMEM;
+			goto error;
+		}
+		io->type = IO;
+		io->busno = func->busno;
+		io->devfunc = PCI_DEVFN(func->device, func->function);
+		io->len = amount_needed->io;
+		if (ibmphp_check_resource(io, 1) == 0) {
+			debug("were we able to add io\n");
+			ibmphp_add_resource(io);
+			flag_io = 1;
+		}
+	}
+
+	if (!amount_needed->mem) {
+		debug("it doesn't want n.e.memory?\n");
+		flag_mem = 1;
+	} else {
+		debug("it wants %x memory behind the bridge\n", amount_needed->mem);
+		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+		if (!mem) {
+			retval = -ENOMEM;
+			goto error;
+		}
+		mem->type = MEM;
+		mem->busno = func->busno;
+		mem->devfunc = PCI_DEVFN(func->device, func->function);
+		mem->len = amount_needed->mem;
+		if (ibmphp_check_resource(mem, 1) == 0) {
+			ibmphp_add_resource(mem);
+			flag_mem = 1;
+			debug("were we able to add mem\n");
+		}
+	}
+
+	if (!amount_needed->pfmem) {
+		debug("it doesn't want n.e.pfmem mem?\n");
+		flag_pfmem = 1;
+	} else {
+		debug("it wants %x pfmemory behind the bridge\n", amount_needed->pfmem);
+		pfmem = kzalloc(sizeof(*pfmem), GFP_KERNEL);
+		if (!pfmem) {
+			retval = -ENOMEM;
+			goto error;
+		}
+		pfmem->type = PFMEM;
+		pfmem->busno = func->busno;
+		pfmem->devfunc = PCI_DEVFN(func->device, func->function);
+		pfmem->len = amount_needed->pfmem;
+		pfmem->fromMem = 0;
+		if (ibmphp_check_resource(pfmem, 1) == 0) {
+			ibmphp_add_resource(pfmem);
+			flag_pfmem = 1;
+		} else {
+			mem_tmp = kzalloc(sizeof(*mem_tmp), GFP_KERNEL);
+			if (!mem_tmp) {
+				retval = -ENOMEM;
+				goto error;
+			}
+			mem_tmp->type = MEM;
+			mem_tmp->busno = pfmem->busno;
+			mem_tmp->devfunc = pfmem->devfunc;
+			mem_tmp->len = pfmem->len;
+			if (ibmphp_check_resource(mem_tmp, 1) == 0) {
+				ibmphp_add_resource(mem_tmp);
+				pfmem->fromMem = 1;
+				pfmem->rangeno = mem_tmp->rangeno;
+				ibmphp_add_pfmem_from_mem(pfmem);
+				flag_pfmem = 1;
+			}
+		}
+	}
+
+	debug("b4 if (flag_io && flag_mem && flag_pfmem)\n");
+	debug("flag_io = %x, flag_mem = %x, flag_pfmem = %x\n", flag_io, flag_mem, flag_pfmem);
+
+	if (flag_io && flag_mem && flag_pfmem) {
+		/* If on bootup, there was a bridged card in this slot,
+		 * then card was removed and ibmphp got unloaded and loaded
+		 * back again, there's no way for us to remove the bus
+		 * struct, so no need to kmalloc, can use existing node
+		 */
+		bus = ibmphp_find_res_bus(sec_number);
+		if (!bus) {
+			bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+			if (!bus) {
+				retval = -ENOMEM;
+				goto error;
+			}
+			bus->busno = sec_number;
+			debug("b4 adding new bus\n");
+			rc = add_new_bus(bus, io, mem, pfmem, func->busno);
+		} else if (!(bus->rangeIO) && !(bus->rangeMem) && !(bus->rangePFMem))
+			rc = add_new_bus(bus, io, mem, pfmem, 0xFF);
+		else {
+			err("expected bus structure not empty?\n");
+			retval = -EIO;
+			goto error;
+		}
+		if (rc) {
+			if (rc == -ENOMEM) {
+				ibmphp_remove_bus(bus, func->busno);
+				kfree(amount_needed);
+				return rc;
+			}
+			retval = rc;
+			goto error;
+		}
+		pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_IO_BASE, &io_base);
+		pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_BASE, &pfmem_base);
+
+		if ((io_base & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+			debug("io 32\n");
+			need_io_upper = 1;
+		}
+		if ((pfmem_base & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+			debug("pfmem 64\n");
+			need_pfmem_upper = 1;
+		}
+
+		if (bus->noIORanges) {
+			pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_IO_BASE, 0x00 | bus->rangeIO->start >> 8);
+			pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8);
+
+			/* _______________This is for debugging purposes only ____________________
+			pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_IO_BASE, &temp);
+			debug("io_base = %x\n", (temp & PCI_IO_RANGE_TYPE_MASK) << 8);
+			pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_IO_LIMIT, &temp);
+			debug("io_limit = %x\n", (temp & PCI_IO_RANGE_TYPE_MASK) << 8);
+			 ________________________________________________________________________*/
+
+			if (need_io_upper) {	/* since can't support n.e.ways */
+				pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_IO_BASE_UPPER16, 0x0000);
+				pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_IO_LIMIT_UPPER16, 0x0000);
+			}
+		} else {
+			pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_IO_BASE, 0x00);
+			pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00);
+		}
+
+		if (bus->noMemRanges) {
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, 0x0000 | bus->rangeMem->start >> 16);
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, 0x0000 | bus->rangeMem->end >> 16);
+
+			/* ____________________This is for debugging purposes only ________________________
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &temp);
+			debug("mem_base = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16);
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, &temp);
+			debug("mem_limit = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16);
+			 __________________________________________________________________________________*/
+
+		} else {
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, 0xffff);
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, 0x0000);
+		}
+		if (bus->noPFMemRanges) {
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_BASE, 0x0000 | bus->rangePFMem->start >> 16);
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, 0x0000 | bus->rangePFMem->end >> 16);
+
+			/* __________________________This is for debugging purposes only _______________________
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_BASE, &temp);
+			debug("pfmem_base = %x", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16);
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, &temp);
+			debug("pfmem_limit = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16);
+			 ______________________________________________________________________________________*/
+
+			if (need_pfmem_upper) {	/* since can't support n.e.ways */
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, PCI_PREF_BASE_UPPER32, 0x00000000);
+				pci_bus_write_config_dword(ibmphp_pci_bus, devfn, PCI_PREF_LIMIT_UPPER32, 0x00000000);
+			}
+		} else {
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_BASE, 0xffff);
+			pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, 0x0000);
+		}
+
+		debug("b4 writing control information\n");
+
+		pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_INTERRUPT_PIN, &irq);
+		if ((irq > 0x00) && (irq < 0x05))
+			pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_INTERRUPT_LINE, func->irq[irq - 1]);
+		/*
+		pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, ctrl);
+		pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_PARITY);
+		pci_bus_write_config_byte(ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR);
+		 */
+
+		pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_COMMAND, DEVICEENABLE);
+		pci_bus_write_config_word(ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, 0x07);
+		for (i = 0; i < 32; i++) {
+			if (amount_needed->devices[i]) {
+				debug("device where devices[i] is 1 = %x\n", i);
+				func->devices[i] = 1;
+			}
+		}
+		func->bus = 1;	/* For unconfiguring, to indicate it's PPB */
+		func_passed = &func;
+		debug("func->busno b4 returning is %x\n", func->busno);
+		debug("func->busno b4 returning in the other structure is %x\n", (*func_passed)->busno);
+		kfree(amount_needed);
+		return 0;
+	} else {
+		err("Configuring bridge was unsuccessful...\n");
+		mem_tmp = NULL;
+		retval = -EIO;
+		goto error;
+	}
+
+error:
+	kfree(amount_needed);
+	if (pfmem)
+		ibmphp_remove_resource(pfmem);
+	if (io)
+		ibmphp_remove_resource(io);
+	if (mem)
+		ibmphp_remove_resource(mem);
+	for (i = 0; i < 2; i++) {	/* for 2 BARs */
+		if (bus_io[i]) {
+			ibmphp_remove_resource(bus_io[i]);
+			func->io[i] = NULL;
+		} else if (bus_pfmem[i]) {
+			ibmphp_remove_resource(bus_pfmem[i]);
+			func->pfmem[i] = NULL;
+		} else if (bus_mem[i]) {
+			ibmphp_remove_resource(bus_mem[i]);
+			func->mem[i] = NULL;
+		}
+	}
+	return retval;
+}
+
+/*****************************************************************************
+ * This function adds up the amount of resources needed behind the PPB bridge
+ * and passes it to the configure_bridge function
+ * Input: bridge function
+ * Output: amount of resources needed
+ *****************************************************************************/
+static struct res_needed *scan_behind_bridge(struct pci_func *func, u8 busno)
+{
+	int count, len[6];
+	u16 vendor_id;
+	u8 hdr_type;
+	u8 device, function;
+	unsigned int devfn;
+	int howmany = 0;	/*this is to see if there are any devices behind the bridge */
+
+	u32 bar[6], class;
+	static const u32 address[] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		PCI_BASE_ADDRESS_2,
+		PCI_BASE_ADDRESS_3,
+		PCI_BASE_ADDRESS_4,
+		PCI_BASE_ADDRESS_5,
+		0
+	};
+	struct res_needed *amount;
+
+	amount = kzalloc(sizeof(*amount), GFP_KERNEL);
+	if (amount == NULL)
+		return NULL;
+
+	ibmphp_pci_bus->number = busno;
+
+	debug("the bus_no behind the bridge is %x\n", busno);
+	debug("scanning devices behind the bridge...\n");
+	for (device = 0; device < 32; device++) {
+		amount->devices[device] = 0;
+		for (function = 0; function < 8; function++) {
+			devfn = PCI_DEVFN(device, function);
+
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_VENDOR_ID, &vendor_id);
+
+			if (vendor_id != PCI_VENDOR_ID_NOTVALID) {
+				/* found correct device!!! */
+				howmany++;
+
+				pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_HEADER_TYPE, &hdr_type);
+				pci_bus_read_config_dword(ibmphp_pci_bus, devfn, PCI_CLASS_REVISION, &class);
+
+				debug("hdr_type behind the bridge is %x\n", hdr_type);
+				if ((hdr_type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+					err("embedded bridges not supported for hot-plugging.\n");
+					amount->not_correct = 1;
+					return amount;
+				}
+
+				class >>= 8;	/* to take revision out, class = class.subclass.prog i/f */
+				if (class == PCI_CLASS_NOT_DEFINED_VGA) {
+					err("The device %x is VGA compatible and as is not supported for hot plugging.  Please choose another device.\n", device);
+					amount->not_correct = 1;
+					return amount;
+				} else if (class == PCI_CLASS_DISPLAY_VGA) {
+					err("The device %x is not supported for hot plugging.  Please choose another device.\n", device);
+					amount->not_correct = 1;
+					return amount;
+				}
+
+				amount->devices[device] = 1;
+
+				for (count = 0; address[count]; count++) {
+					/* for 6 BARs */
+					/*
+					pci_bus_read_config_byte(ibmphp_pci_bus, devfn, address[count], &tmp);
+					if (tmp & 0x01) // IO
+						pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFD);
+					else // MEMORY
+						pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
+					*/
+					pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
+					pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &bar[count]);
+
+					debug("what is bar[count]? %x, count = %d\n", bar[count], count);
+
+					if (!bar[count])	/* This BAR is not implemented */
+						continue;
+
+					//tmp_bar = bar[count];
+
+					debug("count %d device %x function %x wants %x resources\n", count, device, function, bar[count]);
+
+					if (bar[count] & PCI_BASE_ADDRESS_SPACE_IO) {
+						/* This is IO */
+						len[count] = bar[count] & 0xFFFFFFFC;
+						len[count] = ~len[count] + 1;
+						amount->io += len[count];
+					} else {
+						/* This is Memory */
+						if (bar[count] & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+							/* pfmem */
+							len[count] = bar[count] & 0xFFFFFFF0;
+							len[count] = ~len[count] + 1;
+							amount->pfmem += len[count];
+							if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64)
+								/* takes up another dword */
+								count += 1;
+
+						} else {
+							/* regular memory */
+							len[count] = bar[count] & 0xFFFFFFF0;
+							len[count] = ~len[count] + 1;
+							amount->mem += len[count];
+							if (bar[count] & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+								/* takes up another dword */
+								count += 1;
+							}
+						}
+					}
+				}	/* end for */
+			}	/* end if (valid) */
+		}	/* end for */
+	}	/* end for */
+
+	if (!howmany)
+		amount->not_correct = 1;
+	else
+		amount->not_correct = 0;
+	if ((amount->io) && (amount->io < IOBRIDGE))
+		amount->io = IOBRIDGE;
+	if ((amount->mem) && (amount->mem < MEMBRIDGE))
+		amount->mem = MEMBRIDGE;
+	if ((amount->pfmem) && (amount->pfmem < MEMBRIDGE))
+		amount->pfmem = MEMBRIDGE;
+	return amount;
+}
+
+/* The following 3 unconfigure_boot_ routines deal with the case when we had the card
+ * upon bootup in the system, since we don't allocate func to such case, we need to read
+ * the start addresses from pci config space and then find the corresponding entries in
+ * our resource lists.  The functions return either 0, -ENODEV, or -1 (general failure)
+ * Change: we also call these functions even if we configured the card ourselves (i.e., not
+ * the bootup case), since it should work same way
+ */
+static int unconfigure_boot_device(u8 busno, u8 device, u8 function)
+{
+	u32 start_address;
+	static const u32 address[] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		PCI_BASE_ADDRESS_2,
+		PCI_BASE_ADDRESS_3,
+		PCI_BASE_ADDRESS_4,
+		PCI_BASE_ADDRESS_5,
+		0
+	};
+	int count;
+	struct resource_node *io;
+	struct resource_node *mem;
+	struct resource_node *pfmem;
+	struct bus_node *bus;
+	u32 end_address;
+	u32 temp_end;
+	u32 size;
+	u32 tmp_address;
+	unsigned int devfn;
+
+	debug("%s - enter\n", __func__);
+
+	bus = ibmphp_find_res_bus(busno);
+	if (!bus) {
+		debug("cannot find corresponding bus.\n");
+		return -EINVAL;
+	}
+
+	devfn = PCI_DEVFN(device, function);
+	ibmphp_pci_bus->number = busno;
+	for (count = 0; address[count]; count++) {	/* for 6 BARs */
+		pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &start_address);
+
+		/* We can do this here, b/c by that time the device driver of the card has been stopped */
+
+		pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF);
+		pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &size);
+		pci_bus_write_config_dword(ibmphp_pci_bus, devfn, address[count], start_address);
+
+		debug("start_address is %x\n", start_address);
+		debug("busno, device, function %x %x %x\n", busno, device, function);
+		if (!size) {
+			/* This BAR is not implemented */
+			debug("is this bar no implemented?, count = %d\n", count);
+			continue;
+		}
+		tmp_address = start_address;
+		if (start_address & PCI_BASE_ADDRESS_SPACE_IO) {
+			/* This is IO */
+			start_address &= PCI_BASE_ADDRESS_IO_MASK;
+			size = size & 0xFFFFFFFC;
+			size = ~size + 1;
+			end_address = start_address + size - 1;
+			if (ibmphp_find_resource(bus, start_address, &io, IO))
+				goto report_search_failure;
+
+			debug("io->start = %x\n", io->start);
+			temp_end = io->end;
+			start_address = io->end + 1;
+			ibmphp_remove_resource(io);
+			/* This is needed b/c of the old I/O restrictions in the BIOS */
+			while (temp_end < end_address) {
+				if (ibmphp_find_resource(bus, start_address,
+							 &io, IO))
+					goto report_search_failure;
+
+				debug("io->start = %x\n", io->start);
+				temp_end = io->end;
+				start_address = io->end + 1;
+				ibmphp_remove_resource(io);
+			}
+
+			/* ????????? DO WE NEED TO WRITE ANYTHING INTO THE PCI CONFIG SPACE BACK ?????????? */
+		} else {
+			/* This is Memory */
+			if (start_address & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+				/* pfmem */
+				debug("start address of pfmem is %x\n", start_address);
+				start_address &= PCI_BASE_ADDRESS_MEM_MASK;
+
+				if (ibmphp_find_resource(bus, start_address, &pfmem, PFMEM) < 0) {
+					err("cannot find corresponding PFMEM resource to remove\n");
+					return -EIO;
+				}
+				if (pfmem) {
+					debug("pfmem->start = %x\n", pfmem->start);
+
+					ibmphp_remove_resource(pfmem);
+				}
+			} else {
+				/* regular memory */
+				debug("start address of mem is %x\n", start_address);
+				start_address &= PCI_BASE_ADDRESS_MEM_MASK;
+
+				if (ibmphp_find_resource(bus, start_address, &mem, MEM) < 0) {
+					err("cannot find corresponding MEM resource to remove\n");
+					return -EIO;
+				}
+				if (mem) {
+					debug("mem->start = %x\n", mem->start);
+
+					ibmphp_remove_resource(mem);
+				}
+			}
+			if (tmp_address & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+				/* takes up another dword */
+				count += 1;
+			}
+		}	/* end of mem */
+	}	/* end of for */
+
+	return 0;
+
+report_search_failure:
+	err("cannot find corresponding IO resource to remove\n");
+	return -EIO;
+}
+
+static int unconfigure_boot_bridge(u8 busno, u8 device, u8 function)
+{
+	int count;
+	int bus_no, pri_no, sub_no, sec_no = 0;
+	u32 start_address, tmp_address;
+	u8 sec_number, sub_number, pri_number;
+	struct resource_node *io = NULL;
+	struct resource_node *mem = NULL;
+	struct resource_node *pfmem = NULL;
+	struct bus_node *bus;
+	static const u32 address[] = {
+		PCI_BASE_ADDRESS_0,
+		PCI_BASE_ADDRESS_1,
+		0
+	};
+	unsigned int devfn;
+
+	devfn = PCI_DEVFN(device, function);
+	ibmphp_pci_bus->number = busno;
+	bus_no = (int) busno;
+	debug("busno is %x\n", busno);
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_PRIMARY_BUS, &pri_number);
+	debug("%s - busno = %x, primary_number = %x\n", __func__, busno, pri_number);
+
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number);
+	debug("sec_number is %x\n", sec_number);
+	sec_no = (int) sec_number;
+	pri_no = (int) pri_number;
+	if (pri_no != bus_no) {
+		err("primary numbers in our structures and pci config space don't match.\n");
+		return -EINVAL;
+	}
+
+	pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SUBORDINATE_BUS, &sub_number);
+	sub_no = (int) sub_number;
+	debug("sub_no is %d, sec_no is %d\n", sub_no, sec_no);
+	if (sec_no != sub_number) {
+		err("there're more buses behind this bridge.  Hot removal is not supported.  Please choose another card\n");
+		return -ENODEV;
+	}
+
+	bus = ibmphp_find_res_bus(sec_number);
+	if (!bus) {
+		err("cannot find Bus structure for the bridged device\n");
+		return -EINVAL;
+	}
+	debug("bus->busno is %x\n", bus->busno);
+	debug("sec_number is %x\n", sec_number);
+
+	ibmphp_remove_bus(bus, busno);
+
+	for (count = 0; address[count]; count++) {
+		/* for 2 BARs */
+		pci_bus_read_config_dword(ibmphp_pci_bus, devfn, address[count], &start_address);
+
+		if (!start_address) {
+			/* This BAR is not implemented */
+			continue;
+		}
+
+		tmp_address = start_address;
+
+		if (start_address & PCI_BASE_ADDRESS_SPACE_IO) {
+			/* This is IO */
+			start_address &= PCI_BASE_ADDRESS_IO_MASK;
+			if (ibmphp_find_resource(bus, start_address, &io, IO) < 0) {
+				err("cannot find corresponding IO resource to remove\n");
+				return -EIO;
+			}
+			if (io)
+				debug("io->start = %x\n", io->start);
+
+			ibmphp_remove_resource(io);
+
+			/* ????????? DO WE NEED TO WRITE ANYTHING INTO THE PCI CONFIG SPACE BACK ?????????? */
+		} else {
+			/* This is Memory */
+			if (start_address & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+				/* pfmem */
+				start_address &= PCI_BASE_ADDRESS_MEM_MASK;
+				if (ibmphp_find_resource(bus, start_address, &pfmem, PFMEM) < 0) {
+					err("cannot find corresponding PFMEM resource to remove\n");
+					return -EINVAL;
+				}
+				if (pfmem) {
+					debug("pfmem->start = %x\n", pfmem->start);
+
+					ibmphp_remove_resource(pfmem);
+				}
+			} else {
+				/* regular memory */
+				start_address &= PCI_BASE_ADDRESS_MEM_MASK;
+				if (ibmphp_find_resource(bus, start_address, &mem, MEM) < 0) {
+					err("cannot find corresponding MEM resource to remove\n");
+					return -EINVAL;
+				}
+				if (mem) {
+					debug("mem->start = %x\n", mem->start);
+
+					ibmphp_remove_resource(mem);
+				}
+			}
+			if (tmp_address & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+				/* takes up another dword */
+				count += 1;
+			}
+		}	/* end of mem */
+	}	/* end of for */
+	debug("%s - exiting, returning success\n", __func__);
+	return 0;
+}
+
+static int unconfigure_boot_card(struct slot *slot_cur)
+{
+	u16 vendor_id;
+	u32 class;
+	u8 hdr_type;
+	u8 device;
+	u8 busno;
+	u8 function;
+	int rc;
+	unsigned int devfn;
+	u8 valid_device = 0x00; /* To see if we are ever able to find valid device and read it */
+
+	debug("%s - enter\n", __func__);
+
+	device = slot_cur->device;
+	busno = slot_cur->bus;
+
+	debug("b4 for loop, device is %x\n", device);
+	/* For every function on the card */
+	for (function = 0x0; function < 0x08; function++) {
+		devfn = PCI_DEVFN(device, function);
+		ibmphp_pci_bus->number = busno;
+
+		pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_VENDOR_ID, &vendor_id);
+
+		if (vendor_id != PCI_VENDOR_ID_NOTVALID) {
+			/* found correct device!!! */
+			++valid_device;
+
+			debug("%s - found correct device\n", __func__);
+
+			/* header: x x x x x x x x
+			 *         | |___________|=> 1=PPB bridge, 0=normal device, 2=CardBus Bridge
+			 *         |_=> 0 = single function device, 1 = multi-function device
+			 */
+
+			pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_HEADER_TYPE, &hdr_type);
+			pci_bus_read_config_dword(ibmphp_pci_bus, devfn, PCI_CLASS_REVISION, &class);
+
+			debug("hdr_type %x, class %x\n", hdr_type, class);
+			class >>= 8;	/* to take revision out, class = class.subclass.prog i/f */
+			if (class == PCI_CLASS_NOT_DEFINED_VGA) {
+				err("The device %x function %x is VGA compatible and is not supported for hot removing.  Please choose another device.\n", device, function);
+				return -ENODEV;
+			} else if (class == PCI_CLASS_DISPLAY_VGA) {
+				err("The device %x function %x is not supported for hot removing.  Please choose another device.\n", device, function);
+				return -ENODEV;
+			}
+
+			switch (hdr_type) {
+				case PCI_HEADER_TYPE_NORMAL:
+					rc = unconfigure_boot_device(busno, device, function);
+					if (rc) {
+						err("was not able to unconfigure device %x func %x on bus %x. bailing out...\n",
+						     device, function, busno);
+						return rc;
+					}
+					function = 0x8;
+					break;
+				case PCI_HEADER_TYPE_MULTIDEVICE:
+					rc = unconfigure_boot_device(busno, device, function);
+					if (rc) {
+						err("was not able to unconfigure device %x func %x on bus %x. bailing out...\n",
+						     device, function, busno);
+						return rc;
+					}
+					break;
+				case PCI_HEADER_TYPE_BRIDGE:
+					class >>= 8;
+					if (class != PCI_CLASS_BRIDGE_PCI) {
+						err("This device %x function %x is not PCI-to-PCI bridge, and is not supported for hot-removing.  Please try another card.\n", device, function);
+						return -ENODEV;
+					}
+					rc = unconfigure_boot_bridge(busno, device, function);
+					if (rc != 0) {
+						err("was not able to hot-remove PPB properly.\n");
+						return rc;
+					}
+
+					function = 0x8;
+					break;
+				case PCI_HEADER_TYPE_MULTIBRIDGE:
+					class >>= 8;
+					if (class != PCI_CLASS_BRIDGE_PCI) {
+						err("This device %x function %x is not PCI-to-PCI bridge,  and is not supported for hot-removing.  Please try another card.\n", device, function);
+						return -ENODEV;
+					}
+					rc = unconfigure_boot_bridge(busno, device, function);
+					if (rc != 0) {
+						err("was not able to hot-remove PPB properly.\n");
+						return rc;
+					}
+					break;
+				default:
+					err("MAJOR PROBLEM!!!! Cannot read device's header\n");
+					return -1;
+			}	/* end of switch */
+		}	/* end of valid device */
+	}	/* end of for */
+
+	if (!valid_device) {
+		err("Could not find device to unconfigure.  Or could not read the card.\n");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * free the resources of the card (multi, single, or bridged)
+ * Parameters: slot, flag to say if this is for removing entire module or just
+ * unconfiguring the device
+ * TO DO:  will probably need to add some code in case there was some resource,
+ * to remove it... this is from when we have errors in the configure_card...
+ *			!!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!!
+ * Returns: 0, -1, -ENODEV
+ */
+int ibmphp_unconfigure_card(struct slot **slot_cur, int the_end)
+{
+	int i;
+	int count;
+	int rc;
+	struct slot *sl = *slot_cur;
+	struct pci_func *cur_func = NULL;
+	struct pci_func *temp_func;
+
+	debug("%s - enter\n", __func__);
+
+	if (!the_end) {
+		/* Need to unconfigure the card */
+		rc = unconfigure_boot_card(sl);
+		if ((rc == -ENODEV) || (rc == -EIO) || (rc == -EINVAL)) {
+			/* In all other cases, will still need to get rid of func structure if it exists */
+			return rc;
+		}
+	}
+
+	if (sl->func) {
+		cur_func = sl->func;
+		while (cur_func) {
+			/* TO DO: WILL MOST LIKELY NEED TO GET RID OF THE BUS STRUCTURE FROM RESOURCES AS WELL */
+			if (cur_func->bus) {
+				/* in other words, it's a PPB */
+				count = 2;
+			} else {
+				count = 6;
+			}
+
+			for (i = 0; i < count; i++) {
+				if (cur_func->io[i]) {
+					debug("io[%d] exists\n", i);
+					if (the_end > 0)
+						ibmphp_remove_resource(cur_func->io[i]);
+					cur_func->io[i] = NULL;
+				}
+				if (cur_func->mem[i]) {
+					debug("mem[%d] exists\n", i);
+					if (the_end > 0)
+						ibmphp_remove_resource(cur_func->mem[i]);
+					cur_func->mem[i] = NULL;
+				}
+				if (cur_func->pfmem[i]) {
+					debug("pfmem[%d] exists\n", i);
+					if (the_end > 0)
+						ibmphp_remove_resource(cur_func->pfmem[i]);
+					cur_func->pfmem[i] = NULL;
+				}
+			}
+
+			temp_func = cur_func->next;
+			kfree(cur_func);
+			cur_func = temp_func;
+		}
+	}
+
+	sl->func = NULL;
+	*slot_cur = sl;
+	debug("%s - exit\n", __func__);
+	return 0;
+}
+
+/*
+ * add a new bus resulting from hot-plugging a PPB bridge with devices
+ *
+ * Input: bus and the amount of resources needed (we know we can assign those,
+ *        since they've been checked already
+ * Output: bus added to the correct spot
+ *         0, -1, error
+ */
+static int add_new_bus(struct bus_node *bus, struct resource_node *io, struct resource_node *mem, struct resource_node *pfmem, u8 parent_busno)
+{
+	struct range_node *io_range = NULL;
+	struct range_node *mem_range = NULL;
+	struct range_node *pfmem_range = NULL;
+	struct bus_node *cur_bus = NULL;
+
+	/* Trying to find the parent bus number */
+	if (parent_busno != 0xFF) {
+		cur_bus	= ibmphp_find_res_bus(parent_busno);
+		if (!cur_bus) {
+			err("strange, cannot find bus which is supposed to be at the system... something is terribly wrong...\n");
+			return -ENODEV;
+		}
+
+		list_add(&bus->bus_list, &cur_bus->bus_list);
+	}
+	if (io) {
+		io_range = kzalloc(sizeof(*io_range), GFP_KERNEL);
+		if (!io_range)
+			return -ENOMEM;
+
+		io_range->start = io->start;
+		io_range->end = io->end;
+		io_range->rangeno = 1;
+		bus->noIORanges = 1;
+		bus->rangeIO = io_range;
+	}
+	if (mem) {
+		mem_range = kzalloc(sizeof(*mem_range), GFP_KERNEL);
+		if (!mem_range)
+			return -ENOMEM;
+
+		mem_range->start = mem->start;
+		mem_range->end = mem->end;
+		mem_range->rangeno = 1;
+		bus->noMemRanges = 1;
+		bus->rangeMem = mem_range;
+	}
+	if (pfmem) {
+		pfmem_range = kzalloc(sizeof(*pfmem_range), GFP_KERNEL);
+		if (!pfmem_range)
+			return -ENOMEM;
+
+		pfmem_range->start = pfmem->start;
+		pfmem_range->end = pfmem->end;
+		pfmem_range->rangeno = 1;
+		bus->noPFMemRanges = 1;
+		bus->rangePFMem = pfmem_range;
+	}
+	return 0;
+}
+
+/*
+ * find the 1st available bus number for PPB to set as its secondary bus
+ * Parameters: bus_number of the primary bus
+ * Returns: bus_number of the secondary bus or 0xff in case of failure
+ */
+static u8 find_sec_number(u8 primary_busno, u8 slotno)
+{
+	int min, max;
+	u8 busno;
+	struct bus_info *bus;
+	struct bus_node *bus_cur;
+
+	bus = ibmphp_find_same_bus_num(primary_busno);
+	if (!bus) {
+		err("cannot get slot range of the bus from the BIOS\n");
+		return 0xff;
+	}
+	max = bus->slot_max;
+	min = bus->slot_min;
+	if ((slotno > max) || (slotno < min)) {
+		err("got the wrong range\n");
+		return 0xff;
+	}
+	busno = (u8) (slotno - (u8) min);
+	busno += primary_busno + 0x01;
+	bus_cur = ibmphp_find_res_bus(busno);
+	/* either there is no such bus number, or there are no ranges, which
+	 * can only happen if we removed the bridged device in previous load
+	 * of the driver, and now only have the skeleton bus struct
+	 */
+	if ((!bus_cur) || (!(bus_cur->rangeIO) && !(bus_cur->rangeMem) && !(bus_cur->rangePFMem)))
+		return busno;
+	return 0xff;
+}
diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c
new file mode 100644
index 000000000..4a72ade2c
--- /dev/null
+++ b/drivers/pci/hotplug/ibmphp_res.c
@@ -0,0 +1,2118 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Hot Plug Controller Driver
+ *
+ * Written By: Irene Zubarev, IBM Corporation
+ *
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001,2002 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <gregkh@us.ibm.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include "ibmphp.h"
+
+static int flags = 0;		/* for testing */
+
+static void update_resources(struct bus_node *bus_cur, int type, int rangeno);
+static int once_over(void);
+static int remove_ranges(struct bus_node *, struct bus_node *);
+static int update_bridge_ranges(struct bus_node **);
+static int add_bus_range(int type, struct range_node *, struct bus_node *);
+static void fix_resources(struct bus_node *);
+static struct bus_node *find_bus_wprev(u8, struct bus_node **, u8);
+
+static LIST_HEAD(gbuses);
+
+static struct bus_node * __init alloc_error_bus(struct ebda_pci_rsrc *curr, u8 busno, int flag)
+{
+	struct bus_node *newbus;
+
+	if (!(curr) && !(flag)) {
+		err("NULL pointer passed\n");
+		return NULL;
+	}
+
+	newbus = kzalloc(sizeof(struct bus_node), GFP_KERNEL);
+	if (!newbus)
+		return NULL;
+
+	if (flag)
+		newbus->busno = busno;
+	else
+		newbus->busno = curr->bus_num;
+	list_add_tail(&newbus->bus_list, &gbuses);
+	return newbus;
+}
+
+static struct resource_node * __init alloc_resources(struct ebda_pci_rsrc *curr)
+{
+	struct resource_node *rs;
+
+	if (!curr) {
+		err("NULL passed to allocate\n");
+		return NULL;
+	}
+
+	rs = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+	if (!rs)
+		return NULL;
+
+	rs->busno = curr->bus_num;
+	rs->devfunc = curr->dev_fun;
+	rs->start = curr->start_addr;
+	rs->end = curr->end_addr;
+	rs->len = curr->end_addr - curr->start_addr + 1;
+	return rs;
+}
+
+static int __init alloc_bus_range(struct bus_node **new_bus, struct range_node **new_range, struct ebda_pci_rsrc *curr, int flag, u8 first_bus)
+{
+	struct bus_node *newbus;
+	struct range_node *newrange;
+	u8 num_ranges = 0;
+
+	if (first_bus) {
+		newbus = kzalloc(sizeof(struct bus_node), GFP_KERNEL);
+		if (!newbus)
+			return -ENOMEM;
+
+		newbus->busno = curr->bus_num;
+	} else {
+		newbus = *new_bus;
+		switch (flag) {
+			case MEM:
+				num_ranges = newbus->noMemRanges;
+				break;
+			case PFMEM:
+				num_ranges = newbus->noPFMemRanges;
+				break;
+			case IO:
+				num_ranges = newbus->noIORanges;
+				break;
+		}
+	}
+
+	newrange = kzalloc(sizeof(struct range_node), GFP_KERNEL);
+	if (!newrange) {
+		if (first_bus)
+			kfree(newbus);
+		return -ENOMEM;
+	}
+	newrange->start = curr->start_addr;
+	newrange->end = curr->end_addr;
+
+	if (first_bus || (!num_ranges))
+		newrange->rangeno = 1;
+	else {
+		/* need to insert our range */
+		add_bus_range(flag, newrange, newbus);
+		debug("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end);
+	}
+
+	switch (flag) {
+		case MEM:
+			newbus->rangeMem = newrange;
+			if (first_bus)
+				newbus->noMemRanges = 1;
+			else {
+				debug("First Memory Primary on bus %x, [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				++newbus->noMemRanges;
+				fix_resources(newbus);
+			}
+			break;
+		case IO:
+			newbus->rangeIO = newrange;
+			if (first_bus)
+				newbus->noIORanges = 1;
+			else {
+				debug("First IO Primary on bus %x, [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				++newbus->noIORanges;
+				fix_resources(newbus);
+			}
+			break;
+		case PFMEM:
+			newbus->rangePFMem = newrange;
+			if (first_bus)
+				newbus->noPFMemRanges = 1;
+			else {
+				debug("1st PFMemory Primary on Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				++newbus->noPFMemRanges;
+				fix_resources(newbus);
+			}
+
+			break;
+	}
+
+	*new_bus = newbus;
+	*new_range = newrange;
+	return 0;
+}
+
+
+/* Notes:
+ * 1. The ranges are ordered.  The buses are not ordered.  (First come)
+ *
+ * 2. If cannot allocate out of PFMem range, allocate from Mem ranges.  PFmemFromMem
+ * are not sorted. (no need since use mem node). To not change the entire code, we
+ * also add mem node whenever this case happens so as not to change
+ * ibmphp_check_mem_resource etc(and since it really is taking Mem resource)
+ */
+
+/*****************************************************************************
+ * This is the Resource Management initialization function.  It will go through
+ * the Resource list taken from EBDA and fill in this module's data structures
+ *
+ * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES,
+ * SINCE WE'RE GOING TO ASSUME FOR NOW WE DON'T HAVE THOSE ON OUR BUSES FOR NOW
+ *
+ * Input: ptr to the head of the resource list from EBDA
+ * Output: 0, -1 or error codes
+ ***************************************************************************/
+int __init ibmphp_rsrc_init(void)
+{
+	struct ebda_pci_rsrc *curr;
+	struct range_node *newrange = NULL;
+	struct bus_node *newbus = NULL;
+	struct bus_node *bus_cur;
+	struct bus_node *bus_prev;
+	struct resource_node *new_io = NULL;
+	struct resource_node *new_mem = NULL;
+	struct resource_node *new_pfmem = NULL;
+	int rc;
+
+	list_for_each_entry(curr, &ibmphp_ebda_pci_rsrc_head,
+			    ebda_pci_rsrc_list) {
+		if (!(curr->rsrc_type & PCIDEVMASK)) {
+			/* EBDA still lists non PCI devices, so ignore... */
+			debug("this is not a PCI DEVICE in rsrc_init, please take care\n");
+			// continue;
+		}
+
+		/* this is a primary bus resource */
+		if (curr->rsrc_type & PRIMARYBUSMASK) {
+			/* memory */
+			if ((curr->rsrc_type & RESTYPE) == MMASK) {
+				/* no bus structure exists in place yet */
+				if (list_empty(&gbuses)) {
+					rc = alloc_bus_range(&newbus, &newrange, curr, MEM, 1);
+					if (rc)
+						return rc;
+					list_add_tail(&newbus->bus_list, &gbuses);
+					debug("gbuses = NULL, Memory Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				} else {
+					bus_cur = find_bus_wprev(curr->bus_num, &bus_prev, 1);
+					/* found our bus */
+					if (bus_cur) {
+						rc = alloc_bus_range(&bus_cur, &newrange, curr, MEM, 0);
+						if (rc)
+							return rc;
+					} else {
+						/* went through all the buses and didn't find ours, need to create a new bus node */
+						rc = alloc_bus_range(&newbus, &newrange, curr, MEM, 1);
+						if (rc)
+							return rc;
+
+						list_add_tail(&newbus->bus_list, &gbuses);
+						debug("New Bus, Memory Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+					}
+				}
+			} else if ((curr->rsrc_type & RESTYPE) == PFMASK) {
+				/* prefetchable memory */
+				if (list_empty(&gbuses)) {
+					/* no bus structure exists in place yet */
+					rc = alloc_bus_range(&newbus, &newrange, curr, PFMEM, 1);
+					if (rc)
+						return rc;
+					list_add_tail(&newbus->bus_list, &gbuses);
+					debug("gbuses = NULL, PFMemory Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				} else {
+					bus_cur = find_bus_wprev(curr->bus_num, &bus_prev, 1);
+					if (bus_cur) {
+						/* found our bus */
+						rc = alloc_bus_range(&bus_cur, &newrange, curr, PFMEM, 0);
+						if (rc)
+							return rc;
+					} else {
+						/* went through all the buses and didn't find ours, need to create a new bus node */
+						rc = alloc_bus_range(&newbus, &newrange, curr, PFMEM, 1);
+						if (rc)
+							return rc;
+						list_add_tail(&newbus->bus_list, &gbuses);
+						debug("1st Bus, PFMemory Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+					}
+				}
+			} else if ((curr->rsrc_type & RESTYPE) == IOMASK) {
+				/* IO */
+				if (list_empty(&gbuses)) {
+					/* no bus structure exists in place yet */
+					rc = alloc_bus_range(&newbus, &newrange, curr, IO, 1);
+					if (rc)
+						return rc;
+					list_add_tail(&newbus->bus_list, &gbuses);
+					debug("gbuses = NULL, IO Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+				} else {
+					bus_cur = find_bus_wprev(curr->bus_num, &bus_prev, 1);
+					if (bus_cur) {
+						rc = alloc_bus_range(&bus_cur, &newrange, curr, IO, 0);
+						if (rc)
+							return rc;
+					} else {
+						/* went through all the buses and didn't find ours, need to create a new bus node */
+						rc = alloc_bus_range(&newbus, &newrange, curr, IO, 1);
+						if (rc)
+							return rc;
+						list_add_tail(&newbus->bus_list, &gbuses);
+						debug("1st Bus, IO Primary Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end);
+					}
+				}
+
+			} else {
+				;	/* type is reserved  WHAT TO DO IN THIS CASE???
+					   NOTHING TO DO??? */
+			}
+		} else {
+			/* regular pci device resource */
+			if ((curr->rsrc_type & RESTYPE) == MMASK) {
+				/* Memory resource */
+				new_mem = alloc_resources(curr);
+				if (!new_mem)
+					return -ENOMEM;
+				new_mem->type = MEM;
+				/*
+				 * if it didn't find the bus, means PCI dev
+				 * came b4 the Primary Bus info, so need to
+				 * create a bus rangeno becomes a problem...
+				 * assign a -1 and then update once the range
+				 * actually appears...
+				 */
+				if (ibmphp_add_resource(new_mem) < 0) {
+					newbus = alloc_error_bus(curr, 0, 0);
+					if (!newbus)
+						return -ENOMEM;
+					newbus->firstMem = new_mem;
+					++newbus->needMemUpdate;
+					new_mem->rangeno = -1;
+				}
+				debug("Memory resource for device %x, bus %x, [%x - %x]\n", new_mem->devfunc, new_mem->busno, new_mem->start, new_mem->end);
+
+			} else if ((curr->rsrc_type & RESTYPE) == PFMASK) {
+				/* PFMemory resource */
+				new_pfmem = alloc_resources(curr);
+				if (!new_pfmem)
+					return -ENOMEM;
+				new_pfmem->type = PFMEM;
+				new_pfmem->fromMem = 0;
+				if (ibmphp_add_resource(new_pfmem) < 0) {
+					newbus = alloc_error_bus(curr, 0, 0);
+					if (!newbus)
+						return -ENOMEM;
+					newbus->firstPFMem = new_pfmem;
+					++newbus->needPFMemUpdate;
+					new_pfmem->rangeno = -1;
+				}
+
+				debug("PFMemory resource for device %x, bus %x, [%x - %x]\n", new_pfmem->devfunc, new_pfmem->busno, new_pfmem->start, new_pfmem->end);
+			} else if ((curr->rsrc_type & RESTYPE) == IOMASK) {
+				/* IO resource */
+				new_io = alloc_resources(curr);
+				if (!new_io)
+					return -ENOMEM;
+				new_io->type = IO;
+
+				/*
+				 * if it didn't find the bus, means PCI dev
+				 * came b4 the Primary Bus info, so need to
+				 * create a bus rangeno becomes a problem...
+				 * Can assign a -1 and then update once the
+				 * range actually appears...
+				 */
+				if (ibmphp_add_resource(new_io) < 0) {
+					newbus = alloc_error_bus(curr, 0, 0);
+					if (!newbus)
+						return -ENOMEM;
+					newbus->firstIO = new_io;
+					++newbus->needIOUpdate;
+					new_io->rangeno = -1;
+				}
+				debug("IO resource for device %x, bus %x, [%x - %x]\n", new_io->devfunc, new_io->busno, new_io->start, new_io->end);
+			}
+		}
+	}
+
+	list_for_each_entry(bus_cur, &gbuses, bus_list) {
+		/* This is to get info about PPB resources, since EBDA doesn't put this info into the primary bus info */
+		rc = update_bridge_ranges(&bus_cur);
+		if (rc)
+			return rc;
+	}
+	return once_over();	/* This is to align ranges (so no -1) */
+}
+
+/********************************************************************************
+ * This function adds a range into a sorted list of ranges per bus for a particular
+ * range type, it then calls another routine to update the range numbers on the
+ * pci devices' resources for the appropriate resource
+ *
+ * Input: type of the resource, range to add, current bus
+ * Output: 0 or -1, bus and range ptrs
+ ********************************************************************************/
+static int add_bus_range(int type, struct range_node *range, struct bus_node *bus_cur)
+{
+	struct range_node *range_cur = NULL;
+	struct range_node *range_prev;
+	int count = 0, i_init;
+	int noRanges = 0;
+
+	switch (type) {
+		case MEM:
+			range_cur = bus_cur->rangeMem;
+			noRanges = bus_cur->noMemRanges;
+			break;
+		case PFMEM:
+			range_cur = bus_cur->rangePFMem;
+			noRanges = bus_cur->noPFMemRanges;
+			break;
+		case IO:
+			range_cur = bus_cur->rangeIO;
+			noRanges = bus_cur->noIORanges;
+			break;
+	}
+
+	range_prev = NULL;
+	while (range_cur) {
+		if (range->start < range_cur->start)
+			break;
+		range_prev = range_cur;
+		range_cur = range_cur->next;
+		count = count + 1;
+	}
+	if (!count) {
+		/* our range will go at the beginning of the list */
+		switch (type) {
+			case MEM:
+				bus_cur->rangeMem = range;
+				break;
+			case PFMEM:
+				bus_cur->rangePFMem = range;
+				break;
+			case IO:
+				bus_cur->rangeIO = range;
+				break;
+		}
+		range->next = range_cur;
+		range->rangeno = 1;
+		i_init = 0;
+	} else if (!range_cur) {
+		/* our range will go at the end of the list */
+		range->next = NULL;
+		range_prev->next = range;
+		range->rangeno = range_prev->rangeno + 1;
+		return 0;
+	} else {
+		/* the range is in the middle */
+		range_prev->next = range;
+		range->next = range_cur;
+		range->rangeno = range_cur->rangeno;
+		i_init = range_prev->rangeno;
+	}
+
+	for (count = i_init; count < noRanges; ++count) {
+		++range_cur->rangeno;
+		range_cur = range_cur->next;
+	}
+
+	update_resources(bus_cur, type, i_init + 1);
+	return 0;
+}
+
+/*******************************************************************************
+ * This routine goes through the list of resources of type 'type' and updates
+ * the range numbers that they correspond to.  It was called from add_bus_range fnc
+ *
+ * Input: bus, type of the resource, the rangeno starting from which to update
+ ******************************************************************************/
+static void update_resources(struct bus_node *bus_cur, int type, int rangeno)
+{
+	struct resource_node *res = NULL;
+	u8 eol = 0;	/* end of list indicator */
+
+	switch (type) {
+		case MEM:
+			if (bus_cur->firstMem)
+				res = bus_cur->firstMem;
+			break;
+		case PFMEM:
+			if (bus_cur->firstPFMem)
+				res = bus_cur->firstPFMem;
+			break;
+		case IO:
+			if (bus_cur->firstIO)
+				res = bus_cur->firstIO;
+			break;
+	}
+
+	if (res) {
+		while (res) {
+			if (res->rangeno == rangeno)
+				break;
+			if (res->next)
+				res = res->next;
+			else if (res->nextRange)
+				res = res->nextRange;
+			else {
+				eol = 1;
+				break;
+			}
+		}
+
+		if (!eol) {
+			/* found the range */
+			while (res) {
+				++res->rangeno;
+				res = res->next;
+			}
+		}
+	}
+}
+
+static void fix_me(struct resource_node *res, struct bus_node *bus_cur, struct range_node *range)
+{
+	char *str = "";
+	switch (res->type) {
+		case IO:
+			str = "io";
+			break;
+		case MEM:
+			str = "mem";
+			break;
+		case PFMEM:
+			str = "pfmem";
+			break;
+	}
+
+	while (res) {
+		if (res->rangeno == -1) {
+			while (range) {
+				if ((res->start >= range->start) && (res->end <= range->end)) {
+					res->rangeno = range->rangeno;
+					debug("%s->rangeno in fix_resources is %d\n", str, res->rangeno);
+					switch (res->type) {
+						case IO:
+							--bus_cur->needIOUpdate;
+							break;
+						case MEM:
+							--bus_cur->needMemUpdate;
+							break;
+						case PFMEM:
+							--bus_cur->needPFMemUpdate;
+							break;
+					}
+					break;
+				}
+				range = range->next;
+			}
+		}
+		if (res->next)
+			res = res->next;
+		else
+			res = res->nextRange;
+	}
+
+}
+
+/*****************************************************************************
+ * This routine reassigns the range numbers to the resources that had a -1
+ * This case can happen only if upon initialization, resources taken by pci dev
+ * appear in EBDA before the resources allocated for that bus, since we don't
+ * know the range, we assign -1, and this routine is called after a new range
+ * is assigned to see the resources with unknown range belong to the added range
+ *
+ * Input: current bus
+ * Output: none, list of resources for that bus are fixed if can be
+ *******************************************************************************/
+static void fix_resources(struct bus_node *bus_cur)
+{
+	struct range_node *range;
+	struct resource_node *res;
+
+	debug("%s - bus_cur->busno = %d\n", __func__, bus_cur->busno);
+
+	if (bus_cur->needIOUpdate) {
+		res = bus_cur->firstIO;
+		range = bus_cur->rangeIO;
+		fix_me(res, bus_cur, range);
+	}
+	if (bus_cur->needMemUpdate) {
+		res = bus_cur->firstMem;
+		range = bus_cur->rangeMem;
+		fix_me(res, bus_cur, range);
+	}
+	if (bus_cur->needPFMemUpdate) {
+		res = bus_cur->firstPFMem;
+		range = bus_cur->rangePFMem;
+		fix_me(res, bus_cur, range);
+	}
+}
+
+/*******************************************************************************
+ * This routine adds a resource to the list of resources to the appropriate bus
+ * based on their resource type and sorted by their starting addresses.  It assigns
+ * the ptrs to next and nextRange if needed.
+ *
+ * Input: resource ptr
+ * Output: ptrs assigned (to the node)
+ * 0 or -1
+ *******************************************************************************/
+int ibmphp_add_resource(struct resource_node *res)
+{
+	struct resource_node *res_cur;
+	struct resource_node *res_prev;
+	struct bus_node *bus_cur;
+	struct range_node *range_cur = NULL;
+	struct resource_node *res_start = NULL;
+
+	debug("%s - enter\n", __func__);
+
+	if (!res) {
+		err("NULL passed to add\n");
+		return -ENODEV;
+	}
+
+	bus_cur = find_bus_wprev(res->busno, NULL, 0);
+
+	if (!bus_cur) {
+		/* didn't find a bus, something's wrong!!! */
+		debug("no bus in the system, either pci_dev's wrong or allocation failed\n");
+		return -ENODEV;
+	}
+
+	/* Normal case */
+	switch (res->type) {
+		case IO:
+			range_cur = bus_cur->rangeIO;
+			res_start = bus_cur->firstIO;
+			break;
+		case MEM:
+			range_cur = bus_cur->rangeMem;
+			res_start = bus_cur->firstMem;
+			break;
+		case PFMEM:
+			range_cur = bus_cur->rangePFMem;
+			res_start = bus_cur->firstPFMem;
+			break;
+		default:
+			err("cannot read the type of the resource to add... problem\n");
+			return -EINVAL;
+	}
+	while (range_cur) {
+		if ((res->start >= range_cur->start) && (res->end <= range_cur->end)) {
+			res->rangeno = range_cur->rangeno;
+			break;
+		}
+		range_cur = range_cur->next;
+	}
+
+	/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	 * this is again the case of rangeno = -1
+	 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	 */
+
+	if (!range_cur) {
+		switch (res->type) {
+			case IO:
+				++bus_cur->needIOUpdate;
+				break;
+			case MEM:
+				++bus_cur->needMemUpdate;
+				break;
+			case PFMEM:
+				++bus_cur->needPFMemUpdate;
+				break;
+		}
+		res->rangeno = -1;
+	}
+
+	debug("The range is %d\n", res->rangeno);
+	if (!res_start) {
+		/* no first{IO,Mem,Pfmem} on the bus, 1st IO/Mem/Pfmem resource ever */
+		switch (res->type) {
+			case IO:
+				bus_cur->firstIO = res;
+				break;
+			case MEM:
+				bus_cur->firstMem = res;
+				break;
+			case PFMEM:
+				bus_cur->firstPFMem = res;
+				break;
+		}
+		res->next = NULL;
+		res->nextRange = NULL;
+	} else {
+		res_cur = res_start;
+		res_prev = NULL;
+
+		debug("res_cur->rangeno is %d\n", res_cur->rangeno);
+
+		while (res_cur) {
+			if (res_cur->rangeno >= res->rangeno)
+				break;
+			res_prev = res_cur;
+			if (res_cur->next)
+				res_cur = res_cur->next;
+			else
+				res_cur = res_cur->nextRange;
+		}
+
+		if (!res_cur) {
+			/* at the end of the resource list */
+			debug("i should be here, [%x - %x]\n", res->start, res->end);
+			res_prev->nextRange = res;
+			res->next = NULL;
+			res->nextRange = NULL;
+		} else if (res_cur->rangeno == res->rangeno) {
+			/* in the same range */
+			while (res_cur) {
+				if (res->start < res_cur->start)
+					break;
+				res_prev = res_cur;
+				res_cur = res_cur->next;
+			}
+			if (!res_cur) {
+				/* the last resource in this range */
+				res_prev->next = res;
+				res->next = NULL;
+				res->nextRange = res_prev->nextRange;
+				res_prev->nextRange = NULL;
+			} else if (res->start < res_cur->start) {
+				/* at the beginning or middle of the range */
+				if (!res_prev)	{
+					switch (res->type) {
+						case IO:
+							bus_cur->firstIO = res;
+							break;
+						case MEM:
+							bus_cur->firstMem = res;
+							break;
+						case PFMEM:
+							bus_cur->firstPFMem = res;
+							break;
+					}
+				} else if (res_prev->rangeno == res_cur->rangeno)
+					res_prev->next = res;
+				else
+					res_prev->nextRange = res;
+
+				res->next = res_cur;
+				res->nextRange = NULL;
+			}
+		} else {
+			/* this is the case where it is 1st occurrence of the range */
+			if (!res_prev) {
+				/* at the beginning of the resource list */
+				res->next = NULL;
+				switch (res->type) {
+					case IO:
+						res->nextRange = bus_cur->firstIO;
+						bus_cur->firstIO = res;
+						break;
+					case MEM:
+						res->nextRange = bus_cur->firstMem;
+						bus_cur->firstMem = res;
+						break;
+					case PFMEM:
+						res->nextRange = bus_cur->firstPFMem;
+						bus_cur->firstPFMem = res;
+						break;
+				}
+			} else if (res_cur->rangeno > res->rangeno) {
+				/* in the middle of the resource list */
+				res_prev->nextRange = res;
+				res->next = NULL;
+				res->nextRange = res_cur;
+			}
+		}
+	}
+
+	debug("%s - exit\n", __func__);
+	return 0;
+}
+
+/****************************************************************************
+ * This routine will remove the resource from the list of resources
+ *
+ * Input: io, mem, and/or pfmem resource to be deleted
+ * Output: modified resource list
+ *        0 or error code
+ ****************************************************************************/
+int ibmphp_remove_resource(struct resource_node *res)
+{
+	struct bus_node *bus_cur;
+	struct resource_node *res_cur = NULL;
+	struct resource_node *res_prev;
+	struct resource_node *mem_cur;
+	char *type = "";
+
+	if (!res)  {
+		err("resource to remove is NULL\n");
+		return -ENODEV;
+	}
+
+	bus_cur = find_bus_wprev(res->busno, NULL, 0);
+
+	if (!bus_cur) {
+		err("cannot find corresponding bus of the io resource to remove  bailing out...\n");
+		return -ENODEV;
+	}
+
+	switch (res->type) {
+		case IO:
+			res_cur = bus_cur->firstIO;
+			type = "io";
+			break;
+		case MEM:
+			res_cur = bus_cur->firstMem;
+			type = "mem";
+			break;
+		case PFMEM:
+			res_cur = bus_cur->firstPFMem;
+			type = "pfmem";
+			break;
+		default:
+			err("unknown type for resource to remove\n");
+			return -EINVAL;
+	}
+	res_prev = NULL;
+
+	while (res_cur) {
+		if ((res_cur->start == res->start) && (res_cur->end == res->end))
+			break;
+		res_prev = res_cur;
+		if (res_cur->next)
+			res_cur = res_cur->next;
+		else
+			res_cur = res_cur->nextRange;
+	}
+
+	if (!res_cur) {
+		if (res->type == PFMEM) {
+			/*
+			 * case where pfmem might be in the PFMemFromMem list
+			 * so will also need to remove the corresponding mem
+			 * entry
+			 */
+			res_cur = bus_cur->firstPFMemFromMem;
+			res_prev = NULL;
+
+			while (res_cur) {
+				if ((res_cur->start == res->start) && (res_cur->end == res->end)) {
+					mem_cur = bus_cur->firstMem;
+					while (mem_cur) {
+						if ((mem_cur->start == res_cur->start)
+						    && (mem_cur->end == res_cur->end))
+							break;
+						if (mem_cur->next)
+							mem_cur = mem_cur->next;
+						else
+							mem_cur = mem_cur->nextRange;
+					}
+					if (!mem_cur) {
+						err("cannot find corresponding mem node for pfmem...\n");
+						return -EINVAL;
+					}
+
+					ibmphp_remove_resource(mem_cur);
+					if (!res_prev)
+						bus_cur->firstPFMemFromMem = res_cur->next;
+					else
+						res_prev->next = res_cur->next;
+					kfree(res_cur);
+					return 0;
+				}
+				res_prev = res_cur;
+				if (res_cur->next)
+					res_cur = res_cur->next;
+				else
+					res_cur = res_cur->nextRange;
+			}
+			if (!res_cur) {
+				err("cannot find pfmem to delete...\n");
+				return -EINVAL;
+			}
+		} else {
+			err("the %s resource is not in the list to be deleted...\n", type);
+			return -EINVAL;
+		}
+	}
+	if (!res_prev) {
+		/* first device to be deleted */
+		if (res_cur->next) {
+			switch (res->type) {
+				case IO:
+					bus_cur->firstIO = res_cur->next;
+					break;
+				case MEM:
+					bus_cur->firstMem = res_cur->next;
+					break;
+				case PFMEM:
+					bus_cur->firstPFMem = res_cur->next;
+					break;
+			}
+		} else if (res_cur->nextRange) {
+			switch (res->type) {
+				case IO:
+					bus_cur->firstIO = res_cur->nextRange;
+					break;
+				case MEM:
+					bus_cur->firstMem = res_cur->nextRange;
+					break;
+				case PFMEM:
+					bus_cur->firstPFMem = res_cur->nextRange;
+					break;
+			}
+		} else {
+			switch (res->type) {
+				case IO:
+					bus_cur->firstIO = NULL;
+					break;
+				case MEM:
+					bus_cur->firstMem = NULL;
+					break;
+				case PFMEM:
+					bus_cur->firstPFMem = NULL;
+					break;
+			}
+		}
+		kfree(res_cur);
+		return 0;
+	} else {
+		if (res_cur->next) {
+			if (res_prev->rangeno == res_cur->rangeno)
+				res_prev->next = res_cur->next;
+			else
+				res_prev->nextRange = res_cur->next;
+		} else if (res_cur->nextRange) {
+			res_prev->next = NULL;
+			res_prev->nextRange = res_cur->nextRange;
+		} else {
+			res_prev->next = NULL;
+			res_prev->nextRange = NULL;
+		}
+		kfree(res_cur);
+		return 0;
+	}
+
+	return 0;
+}
+
+static struct range_node *find_range(struct bus_node *bus_cur, struct resource_node *res)
+{
+	struct range_node *range = NULL;
+
+	switch (res->type) {
+		case IO:
+			range = bus_cur->rangeIO;
+			break;
+		case MEM:
+			range = bus_cur->rangeMem;
+			break;
+		case PFMEM:
+			range = bus_cur->rangePFMem;
+			break;
+		default:
+			err("cannot read resource type in find_range\n");
+	}
+
+	while (range) {
+		if (res->rangeno == range->rangeno)
+			break;
+		range = range->next;
+	}
+	return range;
+}
+
+/*****************************************************************************
+ * This routine will check to make sure the io/mem/pfmem->len that the device asked for
+ * can fit w/i our list of available IO/MEM/PFMEM resources.  If cannot, returns -EINVAL,
+ * otherwise, returns 0
+ *
+ * Input: resource
+ * Output: the correct start and end address are inputted into the resource node,
+ *        0 or -EINVAL
+ *****************************************************************************/
+int ibmphp_check_resource(struct resource_node *res, u8 bridge)
+{
+	struct bus_node *bus_cur;
+	struct range_node *range = NULL;
+	struct resource_node *res_prev;
+	struct resource_node *res_cur = NULL;
+	u32 len_cur = 0, start_cur = 0, len_tmp = 0;
+	int noranges = 0;
+	u32 tmp_start;		/* this is to make sure start address is divisible by the length needed */
+	u32 tmp_divide;
+	u8 flag = 0;
+
+	if (!res)
+		return -EINVAL;
+
+	if (bridge) {
+		/* The rules for bridges are different, 4K divisible for IO, 1M for (pf)mem*/
+		if (res->type == IO)
+			tmp_divide = IOBRIDGE;
+		else
+			tmp_divide = MEMBRIDGE;
+	} else
+		tmp_divide = res->len;
+
+	bus_cur = find_bus_wprev(res->busno, NULL, 0);
+
+	if (!bus_cur) {
+		/* didn't find a bus, something's wrong!!! */
+		debug("no bus in the system, either pci_dev's wrong or allocation failed\n");
+		return -EINVAL;
+	}
+
+	debug("%s - enter\n", __func__);
+	debug("bus_cur->busno is %d\n", bus_cur->busno);
+
+	/* This is a quick fix to not mess up with the code very much.  i.e.,
+	 * 2000-2fff, len = 1000, but when we compare, we need it to be fff */
+	res->len -= 1;
+
+	switch (res->type) {
+		case IO:
+			res_cur = bus_cur->firstIO;
+			noranges = bus_cur->noIORanges;
+			break;
+		case MEM:
+			res_cur = bus_cur->firstMem;
+			noranges = bus_cur->noMemRanges;
+			break;
+		case PFMEM:
+			res_cur = bus_cur->firstPFMem;
+			noranges = bus_cur->noPFMemRanges;
+			break;
+		default:
+			err("wrong type of resource to check\n");
+			return -EINVAL;
+	}
+	res_prev = NULL;
+
+	while (res_cur) {
+		range = find_range(bus_cur, res_cur);
+		debug("%s - rangeno = %d\n", __func__, res_cur->rangeno);
+
+		if (!range) {
+			err("no range for the device exists... bailing out...\n");
+			return -EINVAL;
+		}
+
+		/* found our range */
+		if (!res_prev) {
+			/* first time in the loop */
+			len_tmp = res_cur->start - 1 - range->start;
+
+			if ((res_cur->start != range->start) && (len_tmp >= res->len)) {
+				debug("len_tmp = %x\n", len_tmp);
+
+				if ((len_tmp < len_cur) || (len_cur == 0)) {
+
+					if ((range->start % tmp_divide) == 0) {
+						/* just perfect, starting address is divisible by length */
+						flag = 1;
+						len_cur = len_tmp;
+						start_cur = range->start;
+					} else {
+						/* Needs adjusting */
+						tmp_start = range->start;
+						flag = 0;
+
+						while ((len_tmp = res_cur->start - 1 - tmp_start) >= res->len) {
+							if ((tmp_start % tmp_divide) == 0) {
+								flag = 1;
+								len_cur = len_tmp;
+								start_cur = tmp_start;
+								break;
+							}
+							tmp_start += tmp_divide - tmp_start % tmp_divide;
+							if (tmp_start >= res_cur->start - 1)
+								break;
+						}
+					}
+
+					if (flag && len_cur == res->len) {
+						debug("but we are not here, right?\n");
+						res->start = start_cur;
+						res->len += 1; /* To restore the balance */
+						res->end = res->start + res->len - 1;
+						return 0;
+					}
+				}
+			}
+		}
+		if (!res_cur->next) {
+			/* last device on the range */
+			len_tmp = range->end - (res_cur->end + 1);
+
+			if ((range->end != res_cur->end) && (len_tmp >= res->len)) {
+				debug("len_tmp = %x\n", len_tmp);
+				if ((len_tmp < len_cur) || (len_cur == 0)) {
+
+					if (((res_cur->end + 1) % tmp_divide) == 0) {
+						/* just perfect, starting address is divisible by length */
+						flag = 1;
+						len_cur = len_tmp;
+						start_cur = res_cur->end + 1;
+					} else {
+						/* Needs adjusting */
+						tmp_start = res_cur->end + 1;
+						flag = 0;
+
+						while ((len_tmp = range->end - tmp_start) >= res->len) {
+							if ((tmp_start % tmp_divide) == 0) {
+								flag = 1;
+								len_cur = len_tmp;
+								start_cur = tmp_start;
+								break;
+							}
+							tmp_start += tmp_divide - tmp_start % tmp_divide;
+							if (tmp_start >= range->end)
+								break;
+						}
+					}
+					if (flag && len_cur == res->len) {
+						res->start = start_cur;
+						res->len += 1; /* To restore the balance */
+						res->end = res->start + res->len - 1;
+						return 0;
+					}
+				}
+			}
+		}
+
+		if (res_prev) {
+			if (res_prev->rangeno != res_cur->rangeno) {
+				/* 1st device on this range */
+				len_tmp = res_cur->start - 1 - range->start;
+
+				if ((res_cur->start != range->start) &&	(len_tmp >= res->len)) {
+					if ((len_tmp < len_cur) || (len_cur == 0)) {
+						if ((range->start % tmp_divide) == 0) {
+							/* just perfect, starting address is divisible by length */
+							flag = 1;
+							len_cur = len_tmp;
+							start_cur = range->start;
+						} else {
+							/* Needs adjusting */
+							tmp_start = range->start;
+							flag = 0;
+
+							while ((len_tmp = res_cur->start - 1 - tmp_start) >= res->len) {
+								if ((tmp_start % tmp_divide) == 0) {
+									flag = 1;
+									len_cur = len_tmp;
+									start_cur = tmp_start;
+									break;
+								}
+								tmp_start += tmp_divide - tmp_start % tmp_divide;
+								if (tmp_start >= res_cur->start - 1)
+									break;
+							}
+						}
+
+						if (flag && len_cur == res->len) {
+							res->start = start_cur;
+							res->len += 1; /* To restore the balance */
+							res->end = res->start + res->len - 1;
+							return 0;
+						}
+					}
+				}
+			} else {
+				/* in the same range */
+				len_tmp = res_cur->start - 1 - res_prev->end - 1;
+
+				if (len_tmp >= res->len) {
+					if ((len_tmp < len_cur) || (len_cur == 0)) {
+						if (((res_prev->end + 1) % tmp_divide) == 0) {
+							/* just perfect, starting address's divisible by length */
+							flag = 1;
+							len_cur = len_tmp;
+							start_cur = res_prev->end + 1;
+						} else {
+							/* Needs adjusting */
+							tmp_start = res_prev->end + 1;
+							flag = 0;
+
+							while ((len_tmp = res_cur->start - 1 - tmp_start) >= res->len) {
+								if ((tmp_start % tmp_divide) == 0) {
+									flag = 1;
+									len_cur = len_tmp;
+									start_cur = tmp_start;
+									break;
+								}
+								tmp_start += tmp_divide - tmp_start % tmp_divide;
+								if (tmp_start >= res_cur->start - 1)
+									break;
+							}
+						}
+
+						if (flag && len_cur == res->len) {
+							res->start = start_cur;
+							res->len += 1; /* To restore the balance */
+							res->end = res->start + res->len - 1;
+							return 0;
+						}
+					}
+				}
+			}
+		}
+		/* end if (res_prev) */
+		res_prev = res_cur;
+		if (res_cur->next)
+			res_cur = res_cur->next;
+		else
+			res_cur = res_cur->nextRange;
+	}	/* end of while */
+
+
+	if (!res_prev) {
+		/* 1st device ever */
+		/* need to find appropriate range */
+		switch (res->type) {
+			case IO:
+				range = bus_cur->rangeIO;
+				break;
+			case MEM:
+				range = bus_cur->rangeMem;
+				break;
+			case PFMEM:
+				range = bus_cur->rangePFMem;
+				break;
+		}
+		while (range) {
+			len_tmp = range->end - range->start;
+
+			if (len_tmp >= res->len) {
+				if ((len_tmp < len_cur) || (len_cur == 0)) {
+					if ((range->start % tmp_divide) == 0) {
+						/* just perfect, starting address's divisible by length */
+						flag = 1;
+						len_cur = len_tmp;
+						start_cur = range->start;
+					} else {
+						/* Needs adjusting */
+						tmp_start = range->start;
+						flag = 0;
+
+						while ((len_tmp = range->end - tmp_start) >= res->len) {
+							if ((tmp_start % tmp_divide) == 0) {
+								flag = 1;
+								len_cur = len_tmp;
+								start_cur = tmp_start;
+								break;
+							}
+							tmp_start += tmp_divide - tmp_start % tmp_divide;
+							if (tmp_start >= range->end)
+								break;
+						}
+					}
+
+					if (flag && len_cur == res->len) {
+						res->start = start_cur;
+						res->len += 1; /* To restore the balance */
+						res->end = res->start + res->len - 1;
+						return 0;
+					}
+				}
+			}
+			range = range->next;
+		}		/* end of while */
+
+		if ((!range) && (len_cur == 0)) {
+			/* have gone through the list of devices and ranges and haven't found n.e.thing */
+			err("no appropriate range.. bailing out...\n");
+			return -EINVAL;
+		} else if (len_cur) {
+			res->start = start_cur;
+			res->len += 1; /* To restore the balance */
+			res->end = res->start + res->len - 1;
+			return 0;
+		}
+	}
+
+	if (!res_cur) {
+		debug("prev->rangeno = %d, noranges = %d\n", res_prev->rangeno, noranges);
+		if (res_prev->rangeno < noranges) {
+			/* if there're more ranges out there to check */
+			switch (res->type) {
+				case IO:
+					range = bus_cur->rangeIO;
+					break;
+				case MEM:
+					range = bus_cur->rangeMem;
+					break;
+				case PFMEM:
+					range = bus_cur->rangePFMem;
+					break;
+			}
+			while (range) {
+				len_tmp = range->end - range->start;
+
+				if (len_tmp >= res->len) {
+					if ((len_tmp < len_cur) || (len_cur == 0)) {
+						if ((range->start % tmp_divide) == 0) {
+							/* just perfect, starting address's divisible by length */
+							flag = 1;
+							len_cur = len_tmp;
+							start_cur = range->start;
+						} else {
+							/* Needs adjusting */
+							tmp_start = range->start;
+							flag = 0;
+
+							while ((len_tmp = range->end - tmp_start) >= res->len) {
+								if ((tmp_start % tmp_divide) == 0) {
+									flag = 1;
+									len_cur = len_tmp;
+									start_cur = tmp_start;
+									break;
+								}
+								tmp_start += tmp_divide - tmp_start % tmp_divide;
+								if (tmp_start >= range->end)
+									break;
+							}
+						}
+
+						if (flag && len_cur == res->len) {
+							res->start = start_cur;
+							res->len += 1; /* To restore the balance */
+							res->end = res->start + res->len - 1;
+							return 0;
+						}
+					}
+				}
+				range = range->next;
+			}	/* end of while */
+
+			if ((!range) && (len_cur == 0)) {
+				/* have gone through the list of devices and ranges and haven't found n.e.thing */
+				err("no appropriate range.. bailing out...\n");
+				return -EINVAL;
+			} else if (len_cur) {
+				res->start = start_cur;
+				res->len += 1; /* To restore the balance */
+				res->end = res->start + res->len - 1;
+				return 0;
+			}
+		} else {
+			/* no more ranges to check on */
+			if (len_cur) {
+				res->start = start_cur;
+				res->len += 1; /* To restore the balance */
+				res->end = res->start + res->len - 1;
+				return 0;
+			} else {
+				/* have gone through the list of devices and haven't found n.e.thing */
+				err("no appropriate range.. bailing out...\n");
+				return -EINVAL;
+			}
+		}
+	}	/* end if (!res_cur) */
+	return -EINVAL;
+}
+
+/********************************************************************************
+ * This routine is called from remove_card if the card contained PPB.
+ * It will remove all the resources on the bus as well as the bus itself
+ * Input: Bus
+ * Output: 0, -ENODEV
+ ********************************************************************************/
+int ibmphp_remove_bus(struct bus_node *bus, u8 parent_busno)
+{
+	struct resource_node *res_cur;
+	struct resource_node *res_tmp;
+	struct bus_node *prev_bus;
+	int rc;
+
+	prev_bus = find_bus_wprev(parent_busno, NULL, 0);
+
+	if (!prev_bus) {
+		debug("something terribly wrong. Cannot find parent bus to the one to remove\n");
+		return -ENODEV;
+	}
+
+	debug("In ibmphp_remove_bus... prev_bus->busno is %x\n", prev_bus->busno);
+
+	rc = remove_ranges(bus, prev_bus);
+	if (rc)
+		return rc;
+
+	if (bus->firstIO) {
+		res_cur = bus->firstIO;
+		while (res_cur) {
+			res_tmp = res_cur;
+			if (res_cur->next)
+				res_cur = res_cur->next;
+			else
+				res_cur = res_cur->nextRange;
+			kfree(res_tmp);
+			res_tmp = NULL;
+		}
+		bus->firstIO = NULL;
+	}
+	if (bus->firstMem) {
+		res_cur = bus->firstMem;
+		while (res_cur) {
+			res_tmp = res_cur;
+			if (res_cur->next)
+				res_cur = res_cur->next;
+			else
+				res_cur = res_cur->nextRange;
+			kfree(res_tmp);
+			res_tmp = NULL;
+		}
+		bus->firstMem = NULL;
+	}
+	if (bus->firstPFMem) {
+		res_cur = bus->firstPFMem;
+		while (res_cur) {
+			res_tmp = res_cur;
+			if (res_cur->next)
+				res_cur = res_cur->next;
+			else
+				res_cur = res_cur->nextRange;
+			kfree(res_tmp);
+			res_tmp = NULL;
+		}
+		bus->firstPFMem = NULL;
+	}
+
+	if (bus->firstPFMemFromMem) {
+		res_cur = bus->firstPFMemFromMem;
+		while (res_cur) {
+			res_tmp = res_cur;
+			res_cur = res_cur->next;
+
+			kfree(res_tmp);
+			res_tmp = NULL;
+		}
+		bus->firstPFMemFromMem = NULL;
+	}
+
+	list_del(&bus->bus_list);
+	kfree(bus);
+	return 0;
+}
+
+/******************************************************************************
+ * This routine deletes the ranges from a given bus, and the entries from the
+ * parent's bus in the resources
+ * Input: current bus, previous bus
+ * Output: 0, -EINVAL
+ ******************************************************************************/
+static int remove_ranges(struct bus_node *bus_cur, struct bus_node *bus_prev)
+{
+	struct range_node *range_cur;
+	struct range_node *range_tmp;
+	int i;
+	struct resource_node *res = NULL;
+
+	if (bus_cur->noIORanges) {
+		range_cur = bus_cur->rangeIO;
+		for (i = 0; i < bus_cur->noIORanges; i++) {
+			if (ibmphp_find_resource(bus_prev, range_cur->start, &res, IO) < 0)
+				return -EINVAL;
+			ibmphp_remove_resource(res);
+
+			range_tmp = range_cur;
+			range_cur = range_cur->next;
+			kfree(range_tmp);
+			range_tmp = NULL;
+		}
+		bus_cur->rangeIO = NULL;
+	}
+	if (bus_cur->noMemRanges) {
+		range_cur = bus_cur->rangeMem;
+		for (i = 0; i < bus_cur->noMemRanges; i++) {
+			if (ibmphp_find_resource(bus_prev, range_cur->start, &res, MEM) < 0)
+				return -EINVAL;
+
+			ibmphp_remove_resource(res);
+			range_tmp = range_cur;
+			range_cur = range_cur->next;
+			kfree(range_tmp);
+			range_tmp = NULL;
+		}
+		bus_cur->rangeMem = NULL;
+	}
+	if (bus_cur->noPFMemRanges) {
+		range_cur = bus_cur->rangePFMem;
+		for (i = 0; i < bus_cur->noPFMemRanges; i++) {
+			if (ibmphp_find_resource(bus_prev, range_cur->start, &res, PFMEM) < 0)
+				return -EINVAL;
+
+			ibmphp_remove_resource(res);
+			range_tmp = range_cur;
+			range_cur = range_cur->next;
+			kfree(range_tmp);
+			range_tmp = NULL;
+		}
+		bus_cur->rangePFMem = NULL;
+	}
+	return 0;
+}
+
+/*
+ * find the resource node in the bus
+ * Input: Resource needed, start address of the resource, type of resource
+ */
+int ibmphp_find_resource(struct bus_node *bus, u32 start_address, struct resource_node **res, int flag)
+{
+	struct resource_node *res_cur = NULL;
+	char *type = "";
+
+	if (!bus) {
+		err("The bus passed in NULL to find resource\n");
+		return -ENODEV;
+	}
+
+	switch (flag) {
+		case IO:
+			res_cur = bus->firstIO;
+			type = "io";
+			break;
+		case MEM:
+			res_cur = bus->firstMem;
+			type = "mem";
+			break;
+		case PFMEM:
+			res_cur = bus->firstPFMem;
+			type = "pfmem";
+			break;
+		default:
+			err("wrong type of flag\n");
+			return -EINVAL;
+	}
+
+	while (res_cur) {
+		if (res_cur->start == start_address) {
+			*res = res_cur;
+			break;
+		}
+		if (res_cur->next)
+			res_cur = res_cur->next;
+		else
+			res_cur = res_cur->nextRange;
+	}
+
+	if (!res_cur) {
+		if (flag == PFMEM) {
+			res_cur = bus->firstPFMemFromMem;
+			while (res_cur) {
+				if (res_cur->start == start_address) {
+					*res = res_cur;
+					break;
+				}
+				res_cur = res_cur->next;
+			}
+			if (!res_cur) {
+				debug("SOS...cannot find %s resource in the bus.\n", type);
+				return -EINVAL;
+			}
+		} else {
+			debug("SOS... cannot find %s resource in the bus.\n", type);
+			return -EINVAL;
+		}
+	}
+
+	if (*res)
+		debug("*res->start = %x\n", (*res)->start);
+
+	return 0;
+}
+
+/***********************************************************************
+ * This routine will free the resource structures used by the
+ * system.  It is called from cleanup routine for the module
+ * Parameters: none
+ * Returns: none
+ ***********************************************************************/
+void ibmphp_free_resources(void)
+{
+	struct bus_node *bus_cur = NULL, *next;
+	struct bus_node *bus_tmp;
+	struct range_node *range_cur;
+	struct range_node *range_tmp;
+	struct resource_node *res_cur;
+	struct resource_node *res_tmp;
+	int i = 0;
+	flags = 1;
+
+	list_for_each_entry_safe(bus_cur, next, &gbuses, bus_list) {
+		if (bus_cur->noIORanges) {
+			range_cur = bus_cur->rangeIO;
+			for (i = 0; i < bus_cur->noIORanges; i++) {
+				if (!range_cur)
+					break;
+				range_tmp = range_cur;
+				range_cur = range_cur->next;
+				kfree(range_tmp);
+				range_tmp = NULL;
+			}
+		}
+		if (bus_cur->noMemRanges) {
+			range_cur = bus_cur->rangeMem;
+			for (i = 0; i < bus_cur->noMemRanges; i++) {
+				if (!range_cur)
+					break;
+				range_tmp = range_cur;
+				range_cur = range_cur->next;
+				kfree(range_tmp);
+				range_tmp = NULL;
+			}
+		}
+		if (bus_cur->noPFMemRanges) {
+			range_cur = bus_cur->rangePFMem;
+			for (i = 0; i < bus_cur->noPFMemRanges; i++) {
+				if (!range_cur)
+					break;
+				range_tmp = range_cur;
+				range_cur = range_cur->next;
+				kfree(range_tmp);
+				range_tmp = NULL;
+			}
+		}
+
+		if (bus_cur->firstIO) {
+			res_cur = bus_cur->firstIO;
+			while (res_cur) {
+				res_tmp = res_cur;
+				if (res_cur->next)
+					res_cur = res_cur->next;
+				else
+					res_cur = res_cur->nextRange;
+				kfree(res_tmp);
+				res_tmp = NULL;
+			}
+			bus_cur->firstIO = NULL;
+		}
+		if (bus_cur->firstMem) {
+			res_cur = bus_cur->firstMem;
+			while (res_cur) {
+				res_tmp = res_cur;
+				if (res_cur->next)
+					res_cur = res_cur->next;
+				else
+					res_cur = res_cur->nextRange;
+				kfree(res_tmp);
+				res_tmp = NULL;
+			}
+			bus_cur->firstMem = NULL;
+		}
+		if (bus_cur->firstPFMem) {
+			res_cur = bus_cur->firstPFMem;
+			while (res_cur) {
+				res_tmp = res_cur;
+				if (res_cur->next)
+					res_cur = res_cur->next;
+				else
+					res_cur = res_cur->nextRange;
+				kfree(res_tmp);
+				res_tmp = NULL;
+			}
+			bus_cur->firstPFMem = NULL;
+		}
+
+		if (bus_cur->firstPFMemFromMem) {
+			res_cur = bus_cur->firstPFMemFromMem;
+			while (res_cur) {
+				res_tmp = res_cur;
+				res_cur = res_cur->next;
+
+				kfree(res_tmp);
+				res_tmp = NULL;
+			}
+			bus_cur->firstPFMemFromMem = NULL;
+		}
+
+		bus_tmp = bus_cur;
+		list_del(&bus_cur->bus_list);
+		kfree(bus_tmp);
+		bus_tmp = NULL;
+	}
+}
+
+/*********************************************************************************
+ * This function will go over the PFmem resources to check if the EBDA allocated
+ * pfmem out of memory buckets of the bus.  If so, it will change the range numbers
+ * and a flag to indicate that this resource is out of memory. It will also move the
+ * Pfmem out of the pfmem resource list to the PFMemFromMem list, and will create
+ * a new Mem node
+ * This routine is called right after initialization
+ *******************************************************************************/
+static int __init once_over(void)
+{
+	struct resource_node *pfmem_cur;
+	struct resource_node *pfmem_prev;
+	struct resource_node *mem;
+	struct bus_node *bus_cur;
+
+	list_for_each_entry(bus_cur, &gbuses, bus_list) {
+		if ((!bus_cur->rangePFMem) && (bus_cur->firstPFMem)) {
+			for (pfmem_cur = bus_cur->firstPFMem, pfmem_prev = NULL; pfmem_cur; pfmem_prev = pfmem_cur, pfmem_cur = pfmem_cur->next) {
+				pfmem_cur->fromMem = 1;
+				if (pfmem_prev)
+					pfmem_prev->next = pfmem_cur->next;
+				else
+					bus_cur->firstPFMem = pfmem_cur->next;
+
+				if (!bus_cur->firstPFMemFromMem)
+					pfmem_cur->next = NULL;
+				else
+					/* we don't need to sort PFMemFromMem since we're using mem node for
+					   all the real work anyways, so just insert at the beginning of the
+					   list
+					 */
+					pfmem_cur->next = bus_cur->firstPFMemFromMem;
+
+				bus_cur->firstPFMemFromMem = pfmem_cur;
+
+				mem = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+				if (!mem)
+					return -ENOMEM;
+
+				mem->type = MEM;
+				mem->busno = pfmem_cur->busno;
+				mem->devfunc = pfmem_cur->devfunc;
+				mem->start = pfmem_cur->start;
+				mem->end = pfmem_cur->end;
+				mem->len = pfmem_cur->len;
+				if (ibmphp_add_resource(mem) < 0)
+					err("Trouble...trouble... EBDA allocated pfmem from mem, but system doesn't display it has this space... unless not PCI device...\n");
+				pfmem_cur->rangeno = mem->rangeno;
+			}	/* end for pfmem */
+		}	/* end if */
+	}	/* end list_for_each bus */
+	return 0;
+}
+
+int ibmphp_add_pfmem_from_mem(struct resource_node *pfmem)
+{
+	struct bus_node *bus_cur = find_bus_wprev(pfmem->busno, NULL, 0);
+
+	if (!bus_cur) {
+		err("cannot find bus of pfmem to add...\n");
+		return -ENODEV;
+	}
+
+	if (bus_cur->firstPFMemFromMem)
+		pfmem->next = bus_cur->firstPFMemFromMem;
+	else
+		pfmem->next = NULL;
+
+	bus_cur->firstPFMemFromMem = pfmem;
+
+	return 0;
+}
+
+/* This routine just goes through the buses to see if the bus already exists.
+ * It is called from ibmphp_find_sec_number, to find out a secondary bus number for
+ * bridged cards
+ * Parameters: bus_number
+ * Returns: Bus pointer or NULL
+ */
+struct bus_node *ibmphp_find_res_bus(u8 bus_number)
+{
+	return find_bus_wprev(bus_number, NULL, 0);
+}
+
+static struct bus_node *find_bus_wprev(u8 bus_number, struct bus_node **prev, u8 flag)
+{
+	struct bus_node *bus_cur;
+
+	list_for_each_entry(bus_cur, &gbuses, bus_list) {
+		if (flag)
+			*prev = list_prev_entry(bus_cur, bus_list);
+		if (bus_cur->busno == bus_number)
+			return bus_cur;
+	}
+
+	return NULL;
+}
+
+void ibmphp_print_test(void)
+{
+	int i = 0;
+	struct bus_node *bus_cur = NULL;
+	struct range_node *range;
+	struct resource_node *res;
+
+	debug_pci("*****************START**********************\n");
+
+	if ((!list_empty(&gbuses)) && flags) {
+		err("The GBUSES is not NULL?!?!?!?!?\n");
+		return;
+	}
+
+	list_for_each_entry(bus_cur, &gbuses, bus_list) {
+		debug_pci ("This is bus # %d.  There are\n", bus_cur->busno);
+		debug_pci ("IORanges = %d\t", bus_cur->noIORanges);
+		debug_pci ("MemRanges = %d\t", bus_cur->noMemRanges);
+		debug_pci ("PFMemRanges = %d\n", bus_cur->noPFMemRanges);
+		debug_pci ("The IO Ranges are as follows:\n");
+		if (bus_cur->rangeIO) {
+			range = bus_cur->rangeIO;
+			for (i = 0; i < bus_cur->noIORanges; i++) {
+				debug_pci("rangeno is %d\n", range->rangeno);
+				debug_pci("[%x - %x]\n", range->start, range->end);
+				range = range->next;
+			}
+		}
+
+		debug_pci("The Mem Ranges are as follows:\n");
+		if (bus_cur->rangeMem) {
+			range = bus_cur->rangeMem;
+			for (i = 0; i < bus_cur->noMemRanges; i++) {
+				debug_pci("rangeno is %d\n", range->rangeno);
+				debug_pci("[%x - %x]\n", range->start, range->end);
+				range = range->next;
+			}
+		}
+
+		debug_pci("The PFMem Ranges are as follows:\n");
+
+		if (bus_cur->rangePFMem) {
+			range = bus_cur->rangePFMem;
+			for (i = 0; i < bus_cur->noPFMemRanges; i++) {
+				debug_pci("rangeno is %d\n", range->rangeno);
+				debug_pci("[%x - %x]\n", range->start, range->end);
+				range = range->next;
+			}
+		}
+
+		debug_pci("The resources on this bus are as follows\n");
+
+		debug_pci("IO...\n");
+		if (bus_cur->firstIO) {
+			res = bus_cur->firstIO;
+			while (res) {
+				debug_pci("The range # is %d\n", res->rangeno);
+				debug_pci("The bus, devfnc is %d, %x\n", res->busno, res->devfunc);
+				debug_pci("[%x - %x], len=%x\n", res->start, res->end, res->len);
+				if (res->next)
+					res = res->next;
+				else if (res->nextRange)
+					res = res->nextRange;
+				else
+					break;
+			}
+		}
+		debug_pci("Mem...\n");
+		if (bus_cur->firstMem) {
+			res = bus_cur->firstMem;
+			while (res) {
+				debug_pci("The range # is %d\n", res->rangeno);
+				debug_pci("The bus, devfnc is %d, %x\n", res->busno, res->devfunc);
+				debug_pci("[%x - %x], len=%x\n", res->start, res->end, res->len);
+				if (res->next)
+					res = res->next;
+				else if (res->nextRange)
+					res = res->nextRange;
+				else
+					break;
+			}
+		}
+		debug_pci("PFMem...\n");
+		if (bus_cur->firstPFMem) {
+			res = bus_cur->firstPFMem;
+			while (res) {
+				debug_pci("The range # is %d\n", res->rangeno);
+				debug_pci("The bus, devfnc is %d, %x\n", res->busno, res->devfunc);
+				debug_pci("[%x - %x], len=%x\n", res->start, res->end, res->len);
+				if (res->next)
+					res = res->next;
+				else if (res->nextRange)
+					res = res->nextRange;
+				else
+					break;
+			}
+		}
+
+		debug_pci("PFMemFromMem...\n");
+		if (bus_cur->firstPFMemFromMem) {
+			res = bus_cur->firstPFMemFromMem;
+			while (res) {
+				debug_pci("The range # is %d\n", res->rangeno);
+				debug_pci("The bus, devfnc is %d, %x\n", res->busno, res->devfunc);
+				debug_pci("[%x - %x], len=%x\n", res->start, res->end, res->len);
+				res = res->next;
+			}
+		}
+	}
+	debug_pci("***********************END***********************\n");
+}
+
+static int range_exists_already(struct range_node *range, struct bus_node *bus_cur, u8 type)
+{
+	struct range_node *range_cur = NULL;
+	switch (type) {
+		case IO:
+			range_cur = bus_cur->rangeIO;
+			break;
+		case MEM:
+			range_cur = bus_cur->rangeMem;
+			break;
+		case PFMEM:
+			range_cur = bus_cur->rangePFMem;
+			break;
+		default:
+			err("wrong type passed to find out if range already exists\n");
+			return -ENODEV;
+	}
+
+	while (range_cur) {
+		if ((range_cur->start == range->start) && (range_cur->end == range->end))
+			return 1;
+		range_cur = range_cur->next;
+	}
+
+	return 0;
+}
+
+/* This routine will read the windows for any PPB we have and update the
+ * range info for the secondary bus, and will also input this info into
+ * primary bus, since BIOS doesn't. This is for PPB that are in the system
+ * on bootup.  For bridged cards that were added during previous load of the
+ * driver, only the ranges and the bus structure are added, the devices are
+ * added from NVRAM
+ * Input: primary busno
+ * Returns: none
+ * Note: this function doesn't take into account IO restrictions etc,
+ *	 so will only work for bridges with no video/ISA devices behind them It
+ *	 also will not work for onboard PPBs that can have more than 1 *bus
+ *	 behind them All these are TO DO.
+ *	 Also need to add more error checkings... (from fnc returns etc)
+ */
+static int __init update_bridge_ranges(struct bus_node **bus)
+{
+	u8 sec_busno, device, function, hdr_type, start_io_address, end_io_address;
+	u16 vendor_id, upper_io_start, upper_io_end, start_mem_address, end_mem_address;
+	u32 start_address, end_address, upper_start, upper_end;
+	struct bus_node *bus_sec;
+	struct bus_node *bus_cur;
+	struct resource_node *io;
+	struct resource_node *mem;
+	struct resource_node *pfmem;
+	struct range_node *range;
+	unsigned int devfn;
+
+	bus_cur = *bus;
+	if (!bus_cur)
+		return -ENODEV;
+	ibmphp_pci_bus->number = bus_cur->busno;
+
+	debug("inside %s\n", __func__);
+	debug("bus_cur->busno = %x\n", bus_cur->busno);
+
+	for (device = 0; device < 32; device++) {
+		for (function = 0x00; function < 0x08; function++) {
+			devfn = PCI_DEVFN(device, function);
+			pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_VENDOR_ID, &vendor_id);
+
+			if (vendor_id != PCI_VENDOR_ID_NOTVALID) {
+				/* found correct device!!! */
+				pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_HEADER_TYPE, &hdr_type);
+
+				switch (hdr_type) {
+					case PCI_HEADER_TYPE_NORMAL:
+						function = 0x8;
+						break;
+					case PCI_HEADER_TYPE_MULTIDEVICE:
+						break;
+					case PCI_HEADER_TYPE_BRIDGE:
+						function = 0x8;
+						fallthrough;
+					case PCI_HEADER_TYPE_MULTIBRIDGE:
+						/* We assume here that only 1 bus behind the bridge
+						   TO DO: add functionality for several:
+						   temp = secondary;
+						   while (temp < subordinate) {
+						   ...
+						   temp++;
+						   }
+						 */
+						pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_busno);
+						bus_sec = find_bus_wprev(sec_busno, NULL, 0);
+						/* this bus structure doesn't exist yet, PPB was configured during previous loading of ibmphp */
+						if (!bus_sec) {
+							alloc_error_bus(NULL, sec_busno, 1);
+							/* the rest will be populated during NVRAM call */
+							return 0;
+						}
+						pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_IO_BASE, &start_io_address);
+						pci_bus_read_config_byte(ibmphp_pci_bus, devfn, PCI_IO_LIMIT, &end_io_address);
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_IO_BASE_UPPER16, &upper_io_start);
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_IO_LIMIT_UPPER16, &upper_io_end);
+						start_address = (start_io_address & PCI_IO_RANGE_MASK) << 8;
+						start_address |= (upper_io_start << 16);
+						end_address = (end_io_address & PCI_IO_RANGE_MASK) << 8;
+						end_address |= (upper_io_end << 16);
+
+						if ((start_address) && (start_address <= end_address)) {
+							range = kzalloc(sizeof(struct range_node), GFP_KERNEL);
+							if (!range)
+								return -ENOMEM;
+
+							range->start = start_address;
+							range->end = end_address + 0xfff;
+
+							if (bus_sec->noIORanges > 0) {
+								if (!range_exists_already(range, bus_sec, IO)) {
+									add_bus_range(IO, range, bus_sec);
+									++bus_sec->noIORanges;
+								} else {
+									kfree(range);
+									range = NULL;
+								}
+							} else {
+								/* 1st IO Range on the bus */
+								range->rangeno = 1;
+								bus_sec->rangeIO = range;
+								++bus_sec->noIORanges;
+							}
+							fix_resources(bus_sec);
+
+							if (ibmphp_find_resource(bus_cur, start_address, &io, IO)) {
+								io = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+								if (!io) {
+									kfree(range);
+									return -ENOMEM;
+								}
+								io->type = IO;
+								io->busno = bus_cur->busno;
+								io->devfunc = ((device << 3) | (function & 0x7));
+								io->start = start_address;
+								io->end = end_address + 0xfff;
+								io->len = io->end - io->start + 1;
+								ibmphp_add_resource(io);
+							}
+						}
+
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &start_mem_address);
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, &end_mem_address);
+
+						start_address = 0x00000000 | (start_mem_address & PCI_MEMORY_RANGE_MASK) << 16;
+						end_address = 0x00000000 | (end_mem_address & PCI_MEMORY_RANGE_MASK) << 16;
+
+						if ((start_address) && (start_address <= end_address)) {
+
+							range = kzalloc(sizeof(struct range_node), GFP_KERNEL);
+							if (!range)
+								return -ENOMEM;
+
+							range->start = start_address;
+							range->end = end_address + 0xfffff;
+
+							if (bus_sec->noMemRanges > 0) {
+								if (!range_exists_already(range, bus_sec, MEM)) {
+									add_bus_range(MEM, range, bus_sec);
+									++bus_sec->noMemRanges;
+								} else {
+									kfree(range);
+									range = NULL;
+								}
+							} else {
+								/* 1st Mem Range on the bus */
+								range->rangeno = 1;
+								bus_sec->rangeMem = range;
+								++bus_sec->noMemRanges;
+							}
+
+							fix_resources(bus_sec);
+
+							if (ibmphp_find_resource(bus_cur, start_address, &mem, MEM)) {
+								mem = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+								if (!mem) {
+									kfree(range);
+									return -ENOMEM;
+								}
+								mem->type = MEM;
+								mem->busno = bus_cur->busno;
+								mem->devfunc = ((device << 3) | (function & 0x7));
+								mem->start = start_address;
+								mem->end = end_address + 0xfffff;
+								mem->len = mem->end - mem->start + 1;
+								ibmphp_add_resource(mem);
+							}
+						}
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_BASE, &start_mem_address);
+						pci_bus_read_config_word(ibmphp_pci_bus, devfn, PCI_PREF_MEMORY_LIMIT, &end_mem_address);
+						pci_bus_read_config_dword(ibmphp_pci_bus, devfn, PCI_PREF_BASE_UPPER32, &upper_start);
+						pci_bus_read_config_dword(ibmphp_pci_bus, devfn, PCI_PREF_LIMIT_UPPER32, &upper_end);
+						start_address = 0x00000000 | (start_mem_address & PCI_MEMORY_RANGE_MASK) << 16;
+						end_address = 0x00000000 | (end_mem_address & PCI_MEMORY_RANGE_MASK) << 16;
+#if BITS_PER_LONG == 64
+						start_address |= ((long) upper_start) << 32;
+						end_address |= ((long) upper_end) << 32;
+#endif
+
+						if ((start_address) && (start_address <= end_address)) {
+
+							range = kzalloc(sizeof(struct range_node), GFP_KERNEL);
+							if (!range)
+								return -ENOMEM;
+
+							range->start = start_address;
+							range->end = end_address + 0xfffff;
+
+							if (bus_sec->noPFMemRanges > 0) {
+								if (!range_exists_already(range, bus_sec, PFMEM)) {
+									add_bus_range(PFMEM, range, bus_sec);
+									++bus_sec->noPFMemRanges;
+								} else {
+									kfree(range);
+									range = NULL;
+								}
+							} else {
+								/* 1st PFMem Range on the bus */
+								range->rangeno = 1;
+								bus_sec->rangePFMem = range;
+								++bus_sec->noPFMemRanges;
+							}
+
+							fix_resources(bus_sec);
+							if (ibmphp_find_resource(bus_cur, start_address, &pfmem, PFMEM)) {
+								pfmem = kzalloc(sizeof(struct resource_node), GFP_KERNEL);
+								if (!pfmem) {
+									kfree(range);
+									return -ENOMEM;
+								}
+								pfmem->type = PFMEM;
+								pfmem->busno = bus_cur->busno;
+								pfmem->devfunc = ((device << 3) | (function & 0x7));
+								pfmem->start = start_address;
+								pfmem->end = end_address + 0xfffff;
+								pfmem->len = pfmem->end - pfmem->start + 1;
+								pfmem->fromMem = 0;
+
+								ibmphp_add_resource(pfmem);
+							}
+						}
+						break;
+				}	/* end of switch */
+			}	/* end if vendor */
+		}	/* end for function */
+	}	/* end for device */
+
+	return 0;
+}
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
new file mode 100644
index 000000000..058d5937d
--- /dev/null
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI HotPlug Controller Core
+ *
+ * Copyright (C) 2001-2002 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001-2002 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <kristen.c.accardi@intel.com>
+ *
+ * Authors:
+ *   Greg Kroah-Hartman <greg@kroah.com>
+ *   Scott Murray <scottm@somanetworks.com>
+ */
+
+#include <linux/module.h>	/* try_module_get & module_put */
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/uaccess.h>
+#include "../pci.h"
+#include "cpci_hotplug.h"
+
+#define MY_NAME	"pci_hotplug"
+
+#define dbg(fmt, arg...) do { if (debug) printk(KERN_DEBUG "%s: %s: " fmt, MY_NAME, __func__, ## arg); } while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+/* local variables */
+static bool debug;
+
+static LIST_HEAD(pci_hotplug_slot_list);
+static DEFINE_MUTEX(pci_hp_mutex);
+
+/* Weee, fun with macros... */
+#define GET_STATUS(name, type)	\
+static int get_##name(struct hotplug_slot *slot, type *value)		\
+{									\
+	const struct hotplug_slot_ops *ops = slot->ops;			\
+	int retval = 0;							\
+	if (!try_module_get(slot->owner))				\
+		return -ENODEV;						\
+	if (ops->get_##name)						\
+		retval = ops->get_##name(slot, value);			\
+	module_put(slot->owner);					\
+	return retval;							\
+}
+
+GET_STATUS(power_status, u8)
+GET_STATUS(attention_status, u8)
+GET_STATUS(latch_status, u8)
+GET_STATUS(adapter_status, u8)
+
+static ssize_t power_read_file(struct pci_slot *pci_slot, char *buf)
+{
+	int retval;
+	u8 value;
+
+	retval = get_power_status(pci_slot->hotplug, &value);
+	if (retval)
+		return retval;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
+				size_t count)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	unsigned long lpower;
+	u8 power;
+	int retval = 0;
+
+	lpower = simple_strtoul(buf, NULL, 10);
+	power = (u8)(lpower & 0xff);
+	dbg("power = %d\n", power);
+
+	if (!try_module_get(slot->owner)) {
+		retval = -ENODEV;
+		goto exit;
+	}
+	switch (power) {
+	case 0:
+		if (slot->ops->disable_slot)
+			retval = slot->ops->disable_slot(slot);
+		break;
+
+	case 1:
+		if (slot->ops->enable_slot)
+			retval = slot->ops->enable_slot(slot);
+		break;
+
+	default:
+		err("Illegal value specified for power\n");
+		retval = -EINVAL;
+	}
+	module_put(slot->owner);
+
+exit:
+	if (retval)
+		return retval;
+	return count;
+}
+
+static struct pci_slot_attribute hotplug_slot_attr_power = {
+	.attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR},
+	.show = power_read_file,
+	.store = power_write_file
+};
+
+static ssize_t attention_read_file(struct pci_slot *pci_slot, char *buf)
+{
+	int retval;
+	u8 value;
+
+	retval = get_attention_status(pci_slot->hotplug, &value);
+	if (retval)
+		return retval;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static ssize_t attention_write_file(struct pci_slot *pci_slot, const char *buf,
+				    size_t count)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	const struct hotplug_slot_ops *ops = slot->ops;
+	unsigned long lattention;
+	u8 attention;
+	int retval = 0;
+
+	lattention = simple_strtoul(buf, NULL, 10);
+	attention = (u8)(lattention & 0xff);
+	dbg(" - attention = %d\n", attention);
+
+	if (!try_module_get(slot->owner)) {
+		retval = -ENODEV;
+		goto exit;
+	}
+	if (ops->set_attention_status)
+		retval = ops->set_attention_status(slot, attention);
+	module_put(slot->owner);
+
+exit:
+	if (retval)
+		return retval;
+	return count;
+}
+
+static struct pci_slot_attribute hotplug_slot_attr_attention = {
+	.attr = {.name = "attention", .mode = S_IFREG | S_IRUGO | S_IWUSR},
+	.show = attention_read_file,
+	.store = attention_write_file
+};
+
+static ssize_t latch_read_file(struct pci_slot *pci_slot, char *buf)
+{
+	int retval;
+	u8 value;
+
+	retval = get_latch_status(pci_slot->hotplug, &value);
+	if (retval)
+		return retval;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static struct pci_slot_attribute hotplug_slot_attr_latch = {
+	.attr = {.name = "latch", .mode = S_IFREG | S_IRUGO},
+	.show = latch_read_file,
+};
+
+static ssize_t presence_read_file(struct pci_slot *pci_slot, char *buf)
+{
+	int retval;
+	u8 value;
+
+	retval = get_adapter_status(pci_slot->hotplug, &value);
+	if (retval)
+		return retval;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static struct pci_slot_attribute hotplug_slot_attr_presence = {
+	.attr = {.name = "adapter", .mode = S_IFREG | S_IRUGO},
+	.show = presence_read_file,
+};
+
+static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
+			       size_t count)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	unsigned long ltest;
+	u32 test;
+	int retval = 0;
+
+	ltest = simple_strtoul(buf, NULL, 10);
+	test = (u32)(ltest & 0xffffffff);
+	dbg("test = %d\n", test);
+
+	if (!try_module_get(slot->owner)) {
+		retval = -ENODEV;
+		goto exit;
+	}
+	if (slot->ops->hardware_test)
+		retval = slot->ops->hardware_test(slot, test);
+	module_put(slot->owner);
+
+exit:
+	if (retval)
+		return retval;
+	return count;
+}
+
+static struct pci_slot_attribute hotplug_slot_attr_test = {
+	.attr = {.name = "test", .mode = S_IFREG | S_IRUGO | S_IWUSR},
+	.store = test_write_file
+};
+
+static bool has_power_file(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+
+	if ((!slot) || (!slot->ops))
+		return false;
+	if ((slot->ops->enable_slot) ||
+	    (slot->ops->disable_slot) ||
+	    (slot->ops->get_power_status))
+		return true;
+	return false;
+}
+
+static bool has_attention_file(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+
+	if ((!slot) || (!slot->ops))
+		return false;
+	if ((slot->ops->set_attention_status) ||
+	    (slot->ops->get_attention_status))
+		return true;
+	return false;
+}
+
+static bool has_latch_file(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+
+	if ((!slot) || (!slot->ops))
+		return false;
+	if (slot->ops->get_latch_status)
+		return true;
+	return false;
+}
+
+static bool has_adapter_file(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+
+	if ((!slot) || (!slot->ops))
+		return false;
+	if (slot->ops->get_adapter_status)
+		return true;
+	return false;
+}
+
+static bool has_test_file(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+
+	if ((!slot) || (!slot->ops))
+		return false;
+	if (slot->ops->hardware_test)
+		return true;
+	return false;
+}
+
+static int fs_add_slot(struct pci_slot *pci_slot)
+{
+	int retval = 0;
+
+	/* Create symbolic link to the hotplug driver module */
+	pci_hp_create_module_link(pci_slot);
+
+	if (has_power_file(pci_slot)) {
+		retval = sysfs_create_file(&pci_slot->kobj,
+					   &hotplug_slot_attr_power.attr);
+		if (retval)
+			goto exit_power;
+	}
+
+	if (has_attention_file(pci_slot)) {
+		retval = sysfs_create_file(&pci_slot->kobj,
+					   &hotplug_slot_attr_attention.attr);
+		if (retval)
+			goto exit_attention;
+	}
+
+	if (has_latch_file(pci_slot)) {
+		retval = sysfs_create_file(&pci_slot->kobj,
+					   &hotplug_slot_attr_latch.attr);
+		if (retval)
+			goto exit_latch;
+	}
+
+	if (has_adapter_file(pci_slot)) {
+		retval = sysfs_create_file(&pci_slot->kobj,
+					   &hotplug_slot_attr_presence.attr);
+		if (retval)
+			goto exit_adapter;
+	}
+
+	if (has_test_file(pci_slot)) {
+		retval = sysfs_create_file(&pci_slot->kobj,
+					   &hotplug_slot_attr_test.attr);
+		if (retval)
+			goto exit_test;
+	}
+
+	goto exit;
+
+exit_test:
+	if (has_adapter_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj,
+				  &hotplug_slot_attr_presence.attr);
+exit_adapter:
+	if (has_latch_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj, &hotplug_slot_attr_latch.attr);
+exit_latch:
+	if (has_attention_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj,
+				  &hotplug_slot_attr_attention.attr);
+exit_attention:
+	if (has_power_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj, &hotplug_slot_attr_power.attr);
+exit_power:
+	pci_hp_remove_module_link(pci_slot);
+exit:
+	return retval;
+}
+
+static void fs_remove_slot(struct pci_slot *pci_slot)
+{
+	if (has_power_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj, &hotplug_slot_attr_power.attr);
+
+	if (has_attention_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj,
+				  &hotplug_slot_attr_attention.attr);
+
+	if (has_latch_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj, &hotplug_slot_attr_latch.attr);
+
+	if (has_adapter_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj,
+				  &hotplug_slot_attr_presence.attr);
+
+	if (has_test_file(pci_slot))
+		sysfs_remove_file(&pci_slot->kobj, &hotplug_slot_attr_test.attr);
+
+	pci_hp_remove_module_link(pci_slot);
+}
+
+static struct hotplug_slot *get_slot_from_name(const char *name)
+{
+	struct hotplug_slot *slot;
+
+	list_for_each_entry(slot, &pci_hotplug_slot_list, slot_list) {
+		if (strcmp(hotplug_slot_name(slot), name) == 0)
+			return slot;
+	}
+	return NULL;
+}
+
+/**
+ * __pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem
+ * @bus: bus this slot is on
+ * @slot: pointer to the &struct hotplug_slot to register
+ * @devnr: device number
+ * @name: name registered with kobject core
+ * @owner: caller module owner
+ * @mod_name: caller module name
+ *
+ * Prepares a hotplug slot for in-kernel use and immediately publishes it to
+ * user space in one go.  Drivers may alternatively carry out the two steps
+ * separately by invoking pci_hp_initialize() and pci_hp_add().
+ *
+ * Returns 0 if successful, anything else for an error.
+ */
+int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus,
+		      int devnr, const char *name,
+		      struct module *owner, const char *mod_name)
+{
+	int result;
+
+	result = __pci_hp_initialize(slot, bus, devnr, name, owner, mod_name);
+	if (result)
+		return result;
+
+	result = pci_hp_add(slot);
+	if (result)
+		pci_hp_destroy(slot);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_register);
+
+/**
+ * __pci_hp_initialize - prepare hotplug slot for in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to initialize
+ * @bus: bus this slot is on
+ * @devnr: slot number
+ * @name: name registered with kobject core
+ * @owner: caller module owner
+ * @mod_name: caller module name
+ *
+ * Allocate and fill in a PCI slot for use by a hotplug driver.  Once this has
+ * been called, the driver may invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver must be prepared to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus,
+			int devnr, const char *name, struct module *owner,
+			const char *mod_name)
+{
+	struct pci_slot *pci_slot;
+
+	if (slot == NULL)
+		return -ENODEV;
+	if (slot->ops == NULL)
+		return -EINVAL;
+
+	slot->owner = owner;
+	slot->mod_name = mod_name;
+
+	/*
+	 * No problems if we call this interface from both ACPI_PCI_SLOT
+	 * driver and call it here again. If we've already created the
+	 * pci_slot, the interface will simply bump the refcount.
+	 */
+	pci_slot = pci_create_slot(bus, devnr, name, slot);
+	if (IS_ERR(pci_slot))
+		return PTR_ERR(pci_slot);
+
+	slot->pci_slot = pci_slot;
+	pci_slot->hotplug = slot;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_initialize);
+
+/**
+ * pci_hp_add - publish hotplug slot to user space
+ * @slot: pointer to the &struct hotplug_slot to publish
+ *
+ * Make a hotplug slot's sysfs interface available and inform user space of its
+ * addition by sending a uevent.  The hotplug driver must be prepared to handle
+ * all &struct hotplug_slot_ops callbacks from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int pci_hp_add(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
+	int result;
+
+	result = fs_add_slot(pci_slot);
+	if (result)
+		return result;
+
+	kobject_uevent(&pci_slot->kobj, KOBJ_ADD);
+	mutex_lock(&pci_hp_mutex);
+	list_add(&slot->slot_list, &pci_hotplug_slot_list);
+	mutex_unlock(&pci_hp_mutex);
+	dbg("Added slot %s to the list\n", hotplug_slot_name(slot));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_hp_add);
+
+/**
+ * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem
+ * @slot: pointer to the &struct hotplug_slot to deregister
+ *
+ * The @slot must have been registered with the pci hotplug subsystem
+ * previously with a call to pci_hp_register().
+ *
+ * Returns 0 if successful, anything else for an error.
+ */
+void pci_hp_deregister(struct hotplug_slot *slot)
+{
+	pci_hp_del(slot);
+	pci_hp_destroy(slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_deregister);
+
+/**
+ * pci_hp_del - unpublish hotplug slot from user space
+ * @slot: pointer to the &struct hotplug_slot to unpublish
+ *
+ * Remove a hotplug slot's sysfs interface.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_del(struct hotplug_slot *slot)
+{
+	struct hotplug_slot *temp;
+
+	if (WARN_ON(!slot))
+		return;
+
+	mutex_lock(&pci_hp_mutex);
+	temp = get_slot_from_name(hotplug_slot_name(slot));
+	if (WARN_ON(temp != slot)) {
+		mutex_unlock(&pci_hp_mutex);
+		return;
+	}
+
+	list_del(&slot->slot_list);
+	mutex_unlock(&pci_hp_mutex);
+	dbg("Removed slot %s from the list\n", hotplug_slot_name(slot));
+	fs_remove_slot(slot->pci_slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_del);
+
+/**
+ * pci_hp_destroy - remove hotplug slot from in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to destroy
+ *
+ * Destroy a PCI slot used by a hotplug driver.  Once this has been called,
+ * the driver may no longer invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver no longer needs to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_destroy(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
+
+	slot->pci_slot = NULL;
+	pci_slot->hotplug = NULL;
+	pci_destroy_slot(pci_slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_destroy);
+
+static int __init pci_hotplug_init(void)
+{
+	int result;
+
+	result = cpci_hotplug_init(debug);
+	if (result) {
+		err("cpci_hotplug_init with error %d\n", result);
+		return result;
+	}
+
+	return result;
+}
+device_initcall(pci_hotplug_init);
+
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
new file mode 100644
index 000000000..e0a614ace
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PCI Express Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+#ifndef _PCIEHP_H
+#define _PCIEHP_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+
+#include "../pcie/portdrv.h"
+
+extern bool pciehp_poll_mode;
+extern int pciehp_poll_time;
+
+/*
+ * Set CONFIG_DYNAMIC_DEBUG=y and boot with 'dyndbg="file pciehp* +p"' to
+ * enable debug messages.
+ */
+#define ctrl_dbg(ctrl, format, arg...)					\
+	pci_dbg(ctrl->pcie->port, format, ## arg)
+#define ctrl_err(ctrl, format, arg...)					\
+	pci_err(ctrl->pcie->port, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)					\
+	pci_info(ctrl->pcie->port, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)					\
+	pci_warn(ctrl->pcie->port, format, ## arg)
+
+#define SLOT_NAME_SIZE 10
+
+/**
+ * struct controller - PCIe hotplug controller
+ * @pcie: pointer to the controller's PCIe port service device
+ * @slot_cap: cached copy of the Slot Capabilities register
+ * @inband_presence_disabled: In-Band Presence Detect Disable supported by
+ *	controller and disabled per spec recommendation (PCIe r5.0, appendix I
+ *	implementation note)
+ * @slot_ctrl: cached copy of the Slot Control register
+ * @ctrl_lock: serializes writes to the Slot Control register
+ * @cmd_started: jiffies when the Slot Control register was last written;
+ *	the next write is allowed 1 second later, absent a Command Completed
+ *	interrupt (PCIe r4.0, sec 6.7.3.2)
+ * @cmd_busy: flag set on Slot Control register write, cleared by IRQ handler
+ *	on reception of a Command Completed event
+ * @queue: wait queue to wake up on reception of a Command Completed event,
+ *	used for synchronous writes to the Slot Control register
+ * @pending_events: used by the IRQ handler to save events retrieved from the
+ *	Slot Status register for later consumption by the IRQ thread
+ * @notification_enabled: whether the IRQ was requested successfully
+ * @power_fault_detected: whether a power fault was detected by the hardware
+ *	that has not yet been cleared by the user
+ * @poll_thread: thread to poll for slot events if no IRQ is available,
+ *	enabled with pciehp_poll_mode module parameter
+ * @state: current state machine position
+ * @state_lock: protects reads and writes of @state;
+ *	protects scheduling, execution and cancellation of @button_work
+ * @button_work: work item to turn the slot on or off after 5 seconds
+ *	in response to an Attention Button press
+ * @hotplug_slot: structure registered with the PCI hotplug core
+ * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
+ *	Link Status register and to the Presence Detect State bit in the Slot
+ *	Status register during a slot reset which may cause them to flap
+ * @depth: Number of additional hotplug ports in the path to the root bus,
+ *	used as lock subclass for @reset_lock
+ * @ist_running: flag to keep user request waiting while IRQ thread is running
+ * @request_result: result of last user request submitted to the IRQ thread
+ * @requester: wait queue to wake up on completion of user request,
+ *	used for synchronous slot enable/disable request via sysfs
+ *
+ * PCIe hotplug has a 1:1 relationship between controller and slot, hence
+ * unlike other drivers, the two aren't represented by separate structures.
+ */
+struct controller {
+	struct pcie_device *pcie;
+
+	u32 slot_cap;				/* capabilities and quirks */
+	unsigned int inband_presence_disabled:1;
+
+	u16 slot_ctrl;				/* control register access */
+	struct mutex ctrl_lock;
+	unsigned long cmd_started;
+	unsigned int cmd_busy:1;
+	wait_queue_head_t queue;
+
+	atomic_t pending_events;		/* event handling */
+	unsigned int notification_enabled:1;
+	unsigned int power_fault_detected;
+	struct task_struct *poll_thread;
+
+	u8 state;				/* state machine */
+	struct mutex state_lock;
+	struct delayed_work button_work;
+
+	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
+	struct rw_semaphore reset_lock;
+	unsigned int depth;
+	unsigned int ist_running;
+	int request_result;
+	wait_queue_head_t requester;
+};
+
+/**
+ * DOC: Slot state
+ *
+ * @OFF_STATE: slot is powered off, no subordinate devices are enumerated
+ * @BLINKINGON_STATE: slot will be powered on after the 5 second delay,
+ *	Power Indicator is blinking
+ * @BLINKINGOFF_STATE: slot will be powered off after the 5 second delay,
+ *	Power Indicator is blinking
+ * @POWERON_STATE: slot is currently powering on
+ * @POWEROFF_STATE: slot is currently powering off
+ * @ON_STATE: slot is powered on, subordinate devices have been enumerated
+ */
+#define OFF_STATE			0
+#define BLINKINGON_STATE		1
+#define BLINKINGOFF_STATE		2
+#define POWERON_STATE			3
+#define POWEROFF_STATE			4
+#define ON_STATE			5
+
+/**
+ * DOC: Flags to request an action from the IRQ thread
+ *
+ * These are stored together with events read from the Slot Status register,
+ * hence must be greater than its 16-bit width.
+ *
+ * %DISABLE_SLOT: Disable the slot in response to a user request via sysfs or
+ *	an Attention Button press after the 5 second delay
+ * %RERUN_ISR: Used by the IRQ handler to inform the IRQ thread that the
+ *	hotplug port was inaccessible when the interrupt occurred, requiring
+ *	that the IRQ handler is rerun by the IRQ thread after it has made the
+ *	hotplug port accessible by runtime resuming its parents to D0
+ */
+#define DISABLE_SLOT		(1 << 16)
+#define RERUN_ISR		(1 << 17)
+
+#define ATTN_BUTTN(ctrl)	((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
+#define POWER_CTRL(ctrl)	((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
+#define MRL_SENS(ctrl)		((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
+#define ATTN_LED(ctrl)		((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
+#define PWR_LED(ctrl)		((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
+#define NO_CMD_CMPL(ctrl)	((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
+#define PSN(ctrl)		(((ctrl)->slot_cap & PCI_EXP_SLTCAP_PSN) >> 19)
+
+void pciehp_request(struct controller *ctrl, int action);
+void pciehp_handle_button_press(struct controller *ctrl);
+void pciehp_handle_disable_request(struct controller *ctrl);
+void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events);
+int pciehp_configure_device(struct controller *ctrl);
+void pciehp_unconfigure_device(struct controller *ctrl, bool presence);
+void pciehp_queue_pushbutton_work(struct work_struct *work);
+struct controller *pcie_init(struct pcie_device *dev);
+int pcie_init_notification(struct controller *ctrl);
+void pcie_shutdown_notification(struct controller *ctrl);
+void pcie_clear_hotplug_events(struct controller *ctrl);
+void pcie_enable_interrupt(struct controller *ctrl);
+void pcie_disable_interrupt(struct controller *ctrl);
+int pciehp_power_on_slot(struct controller *ctrl);
+void pciehp_power_off_slot(struct controller *ctrl);
+void pciehp_get_power_status(struct controller *ctrl, u8 *status);
+
+#define INDICATOR_NOOP -1	/* Leave indicator unchanged */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn);
+
+void pciehp_get_latch_status(struct controller *ctrl, u8 *status);
+int pciehp_query_power_fault(struct controller *ctrl);
+int pciehp_card_present(struct controller *ctrl);
+int pciehp_card_present_or_link_active(struct controller *ctrl);
+int pciehp_check_link_status(struct controller *ctrl);
+int pciehp_check_link_active(struct controller *ctrl);
+void pciehp_release_ctrl(struct controller *ctrl);
+
+int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot);
+int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot);
+int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe);
+int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
+int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status);
+int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
+
+int pciehp_slot_reset(struct pcie_device *dev);
+
+static inline const char *slot_name(struct controller *ctrl)
+{
+	return hotplug_slot_name(&ctrl->hotplug_slot);
+}
+
+static inline struct controller *to_ctrl(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct controller, hotplug_slot);
+}
+
+#endif				/* _PCIEHP_H */
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
new file mode 100644
index 000000000..4042d87d5
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Express Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ * Authors:
+ *   Dan Zink <dan.zink@compaq.com>
+ *   Greg Kroah-Hartman <greg@kroah.com>
+ *   Dely Sy <dely.l.sy@intel.com>"
+ */
+
+#define pr_fmt(fmt) "pciehp: " fmt
+#define dev_fmt pr_fmt
+
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "pciehp.h"
+
+#include "../pci.h"
+
+/* Global variables */
+bool pciehp_poll_mode;
+int pciehp_poll_time;
+
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
+module_param(pciehp_poll_mode, bool, 0644);
+module_param(pciehp_poll_time, int, 0644);
+MODULE_PARM_DESC(pciehp_poll_mode, "Using polling mechanism for hot-plug events or not");
+MODULE_PARM_DESC(pciehp_poll_time, "Polling mechanism frequency, in seconds");
+
+static int set_attention_status(struct hotplug_slot *slot, u8 value);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+static int get_latch_status(struct hotplug_slot *slot, u8 *value);
+static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
+
+static int init_slot(struct controller *ctrl)
+{
+	struct hotplug_slot_ops *ops;
+	char name[SLOT_NAME_SIZE];
+	int retval;
+
+	/* Setup hotplug slot ops */
+	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	ops->enable_slot = pciehp_sysfs_enable_slot;
+	ops->disable_slot = pciehp_sysfs_disable_slot;
+	ops->get_power_status = get_power_status;
+	ops->get_adapter_status = get_adapter_status;
+	ops->reset_slot = pciehp_reset_slot;
+	if (MRL_SENS(ctrl))
+		ops->get_latch_status = get_latch_status;
+	if (ATTN_LED(ctrl)) {
+		ops->get_attention_status = pciehp_get_attention_status;
+		ops->set_attention_status = set_attention_status;
+	} else if (ctrl->pcie->port->hotplug_user_indicators) {
+		ops->get_attention_status = pciehp_get_raw_indicator_status;
+		ops->set_attention_status = pciehp_set_raw_indicator_status;
+	}
+
+	/* register this slot with the hotplug pci core */
+	ctrl->hotplug_slot.ops = ops;
+	snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
+
+	retval = pci_hp_initialize(&ctrl->hotplug_slot,
+				   ctrl->pcie->port->subordinate, 0, name);
+	if (retval) {
+		ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
+		kfree(ops);
+	}
+	return retval;
+}
+
+static void cleanup_slot(struct controller *ctrl)
+{
+	struct hotplug_slot *hotplug_slot = &ctrl->hotplug_slot;
+
+	pci_hp_destroy(hotplug_slot);
+	kfree(hotplug_slot->ops);
+}
+
+/*
+ * set_attention_status - Turns the Attention Indicator on, off or blinking
+ */
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl->pcie->port;
+
+	if (status)
+		status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
+	else
+		status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
+
+	pci_config_pm_runtime_get(pdev);
+	pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
+	pci_config_pm_runtime_put(pdev);
+	return 0;
+}
+
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl->pcie->port;
+
+	pci_config_pm_runtime_get(pdev);
+	pciehp_get_power_status(ctrl, value);
+	pci_config_pm_runtime_put(pdev);
+	return 0;
+}
+
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl->pcie->port;
+
+	pci_config_pm_runtime_get(pdev);
+	pciehp_get_latch_status(ctrl, value);
+	pci_config_pm_runtime_put(pdev);
+	return 0;
+}
+
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl->pcie->port;
+	int ret;
+
+	pci_config_pm_runtime_get(pdev);
+	ret = pciehp_card_present_or_link_active(ctrl);
+	pci_config_pm_runtime_put(pdev);
+	if (ret < 0)
+		return ret;
+
+	*value = ret;
+	return 0;
+}
+
+/**
+ * pciehp_check_presence() - synthesize event if presence has changed
+ * @ctrl: controller to check
+ *
+ * On probe and resume, an explicit presence check is necessary to bring up an
+ * occupied slot or bring down an unoccupied slot.  This can't be triggered by
+ * events in the Slot Status register, they may be stale and are therefore
+ * cleared.  Secondly, sending an interrupt for "events that occur while
+ * interrupt generation is disabled [when] interrupt generation is subsequently
+ * enabled" is optional per PCIe r4.0, sec 6.7.3.4.
+ */
+static void pciehp_check_presence(struct controller *ctrl)
+{
+	int occupied;
+
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
+	mutex_lock(&ctrl->state_lock);
+
+	occupied = pciehp_card_present_or_link_active(ctrl);
+	if ((occupied > 0 && (ctrl->state == OFF_STATE ||
+			  ctrl->state == BLINKINGON_STATE)) ||
+	    (!occupied && (ctrl->state == ON_STATE ||
+			   ctrl->state == BLINKINGOFF_STATE)))
+		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
+
+	mutex_unlock(&ctrl->state_lock);
+	up_read(&ctrl->reset_lock);
+}
+
+static int pciehp_probe(struct pcie_device *dev)
+{
+	int rc;
+	struct controller *ctrl;
+
+	/* If this is not a "hotplug" service, we have no business here. */
+	if (dev->service != PCIE_PORT_SERVICE_HP)
+		return -ENODEV;
+
+	if (!dev->port->subordinate) {
+		/* Can happen if we run out of bus numbers during probe */
+		pci_err(dev->port,
+			"Hotplug bridge without secondary bus, ignoring\n");
+		return -ENODEV;
+	}
+
+	ctrl = pcie_init(dev);
+	if (!ctrl) {
+		pci_err(dev->port, "Controller initialization failed\n");
+		return -ENODEV;
+	}
+	set_service_data(dev, ctrl);
+
+	/* Setup the slot information structures */
+	rc = init_slot(ctrl);
+	if (rc) {
+		if (rc == -EBUSY)
+			ctrl_warn(ctrl, "Slot already registered by another hotplug driver\n");
+		else
+			ctrl_err(ctrl, "Slot initialization failed (%d)\n", rc);
+		goto err_out_release_ctlr;
+	}
+
+	/* Enable events after we have setup the data structures */
+	rc = pcie_init_notification(ctrl);
+	if (rc) {
+		ctrl_err(ctrl, "Notification initialization failed (%d)\n", rc);
+		goto err_out_free_ctrl_slot;
+	}
+
+	/* Publish to user space */
+	rc = pci_hp_add(&ctrl->hotplug_slot);
+	if (rc) {
+		ctrl_err(ctrl, "Publication to user space failed (%d)\n", rc);
+		goto err_out_shutdown_notification;
+	}
+
+	pciehp_check_presence(ctrl);
+
+	return 0;
+
+err_out_shutdown_notification:
+	pcie_shutdown_notification(ctrl);
+err_out_free_ctrl_slot:
+	cleanup_slot(ctrl);
+err_out_release_ctlr:
+	pciehp_release_ctrl(ctrl);
+	return -ENODEV;
+}
+
+static void pciehp_remove(struct pcie_device *dev)
+{
+	struct controller *ctrl = get_service_data(dev);
+
+	pci_hp_del(&ctrl->hotplug_slot);
+	pcie_shutdown_notification(ctrl);
+	cleanup_slot(ctrl);
+	pciehp_release_ctrl(ctrl);
+}
+
+#ifdef CONFIG_PM
+static bool pme_is_native(struct pcie_device *dev)
+{
+	const struct pci_host_bridge *host;
+
+	host = pci_find_host_bridge(dev->port->bus);
+	return pcie_ports_native || host->native_pme;
+}
+
+static void pciehp_disable_interrupt(struct pcie_device *dev)
+{
+	/*
+	 * Disable hotplug interrupt so that it does not trigger
+	 * immediately when the downstream link goes down.
+	 */
+	if (pme_is_native(dev))
+		pcie_disable_interrupt(get_service_data(dev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int pciehp_suspend(struct pcie_device *dev)
+{
+	/*
+	 * If the port is already runtime suspended we can keep it that
+	 * way.
+	 */
+	if (dev_pm_skip_suspend(&dev->port->dev))
+		return 0;
+
+	pciehp_disable_interrupt(dev);
+	return 0;
+}
+
+static int pciehp_resume_noirq(struct pcie_device *dev)
+{
+	struct controller *ctrl = get_service_data(dev);
+
+	/* pci_restore_state() just wrote to the Slot Control register */
+	ctrl->cmd_started = jiffies;
+	ctrl->cmd_busy = true;
+
+	/* clear spurious events from rediscovery of inserted card */
+	if (ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE)
+		pcie_clear_hotplug_events(ctrl);
+
+	return 0;
+}
+#endif
+
+static int pciehp_resume(struct pcie_device *dev)
+{
+	struct controller *ctrl = get_service_data(dev);
+
+	if (pme_is_native(dev))
+		pcie_enable_interrupt(ctrl);
+
+	pciehp_check_presence(ctrl);
+
+	return 0;
+}
+
+static int pciehp_runtime_suspend(struct pcie_device *dev)
+{
+	pciehp_disable_interrupt(dev);
+	return 0;
+}
+
+static int pciehp_runtime_resume(struct pcie_device *dev)
+{
+	struct controller *ctrl = get_service_data(dev);
+
+	/* pci_restore_state() just wrote to the Slot Control register */
+	ctrl->cmd_started = jiffies;
+	ctrl->cmd_busy = true;
+
+	/* clear spurious events from rediscovery of inserted card */
+	if ((ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE) &&
+	     pme_is_native(dev))
+		pcie_clear_hotplug_events(ctrl);
+
+	return pciehp_resume(dev);
+}
+#endif /* PM */
+
+static struct pcie_port_service_driver hpdriver_portdrv = {
+	.name		= "pciehp",
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,
+
+	.probe		= pciehp_probe,
+	.remove		= pciehp_remove,
+
+#ifdef	CONFIG_PM
+#ifdef	CONFIG_PM_SLEEP
+	.suspend	= pciehp_suspend,
+	.resume_noirq	= pciehp_resume_noirq,
+	.resume		= pciehp_resume,
+#endif
+	.runtime_suspend = pciehp_runtime_suspend,
+	.runtime_resume	= pciehp_runtime_resume,
+#endif	/* PM */
+
+	.slot_reset	= pciehp_slot_reset,
+};
+
+int __init pcie_hp_init(void)
+{
+	int retval = 0;
+
+	retval = pcie_port_service_register(&hpdriver_portdrv);
+	pr_debug("pcie_port_service_register = %d\n", retval);
+	if (retval)
+		pr_debug("Failure to register service\n");
+
+	return retval;
+}
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
new file mode 100644
index 000000000..dcdbfcf40
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Express Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+
+#define dev_fmt(fmt) "pciehp: " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pm_runtime.h>
+#include <linux/pci.h>
+#include "pciehp.h"
+
+/* The following routines constitute the bulk of the
+   hotplug controller logic
+ */
+
+#define SAFE_REMOVAL	 true
+#define SURPRISE_REMOVAL false
+
+static void set_slot_off(struct controller *ctrl)
+{
+	/*
+	 * Turn off slot, turn on attention indicator, turn off power
+	 * indicator
+	 */
+	if (POWER_CTRL(ctrl)) {
+		pciehp_power_off_slot(ctrl);
+
+		/*
+		 * After turning power off, we must wait for at least 1 second
+		 * before taking any action that relies on power having been
+		 * removed from the slot/adapter.
+		 */
+		msleep(1000);
+	}
+
+	pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+			      PCI_EXP_SLTCTL_ATTN_IND_ON);
+}
+
+/**
+ * board_added - Called after a board has been added to the system.
+ * @ctrl: PCIe hotplug controller where board is added
+ *
+ * Turns power on for the board.
+ * Configures board.
+ */
+static int board_added(struct controller *ctrl)
+{
+	int retval = 0;
+	struct pci_bus *parent = ctrl->pcie->port->subordinate;
+
+	if (POWER_CTRL(ctrl)) {
+		/* Power on slot */
+		retval = pciehp_power_on_slot(ctrl);
+		if (retval)
+			return retval;
+	}
+
+	pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+			      INDICATOR_NOOP);
+
+	/* Check link training status */
+	retval = pciehp_check_link_status(ctrl);
+	if (retval)
+		goto err_exit;
+
+	/* Check for a power fault */
+	if (ctrl->power_fault_detected || pciehp_query_power_fault(ctrl)) {
+		ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
+		retval = -EIO;
+		goto err_exit;
+	}
+
+	retval = pciehp_configure_device(ctrl);
+	if (retval) {
+		if (retval != -EEXIST) {
+			ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
+				 pci_domain_nr(parent), parent->number);
+			goto err_exit;
+		}
+	}
+
+	pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+			      PCI_EXP_SLTCTL_ATTN_IND_OFF);
+	return 0;
+
+err_exit:
+	set_slot_off(ctrl);
+	return retval;
+}
+
+/**
+ * remove_board - Turn off slot and Power Indicator
+ * @ctrl: PCIe hotplug controller where board is being removed
+ * @safe_removal: whether the board is safely removed (versus surprise removed)
+ */
+static void remove_board(struct controller *ctrl, bool safe_removal)
+{
+	pciehp_unconfigure_device(ctrl, safe_removal);
+
+	if (POWER_CTRL(ctrl)) {
+		pciehp_power_off_slot(ctrl);
+
+		/*
+		 * After turning power off, we must wait for at least 1 second
+		 * before taking any action that relies on power having been
+		 * removed from the slot/adapter.
+		 */
+		msleep(1000);
+
+		/* Ignore link or presence changes caused by power off */
+		atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
+			   &ctrl->pending_events);
+	}
+
+	pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+			      INDICATOR_NOOP);
+}
+
+static int pciehp_enable_slot(struct controller *ctrl);
+static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal);
+
+void pciehp_request(struct controller *ctrl, int action)
+{
+	atomic_or(action, &ctrl->pending_events);
+	if (!pciehp_poll_mode)
+		irq_wake_thread(ctrl->pcie->irq, ctrl);
+}
+
+void pciehp_queue_pushbutton_work(struct work_struct *work)
+{
+	struct controller *ctrl = container_of(work, struct controller,
+					       button_work.work);
+
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case BLINKINGOFF_STATE:
+		pciehp_request(ctrl, DISABLE_SLOT);
+		break;
+	case BLINKINGON_STATE:
+		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&ctrl->state_lock);
+}
+
+void pciehp_handle_button_press(struct controller *ctrl)
+{
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case OFF_STATE:
+	case ON_STATE:
+		if (ctrl->state == ON_STATE) {
+			ctrl->state = BLINKINGOFF_STATE;
+			ctrl_info(ctrl, "Slot(%s): Button press: will power off in 5 sec\n",
+				  slot_name(ctrl));
+		} else {
+			ctrl->state = BLINKINGON_STATE;
+			ctrl_info(ctrl, "Slot(%s): Button press: will power on in 5 sec\n",
+				  slot_name(ctrl));
+		}
+		/* blink power indicator and turn off attention */
+		pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
+				      PCI_EXP_SLTCTL_ATTN_IND_OFF);
+		schedule_delayed_work(&ctrl->button_work, 5 * HZ);
+		break;
+	case BLINKINGOFF_STATE:
+	case BLINKINGON_STATE:
+		/*
+		 * Cancel if we are still blinking; this means that we
+		 * press the attention again before the 5 sec. limit
+		 * expires to cancel hot-add or hot-remove
+		 */
+		cancel_delayed_work(&ctrl->button_work);
+		if (ctrl->state == BLINKINGOFF_STATE) {
+			ctrl->state = ON_STATE;
+			pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
+					      PCI_EXP_SLTCTL_ATTN_IND_OFF);
+			ctrl_info(ctrl, "Slot(%s): Button press: canceling request to power off\n",
+				  slot_name(ctrl));
+		} else {
+			ctrl->state = OFF_STATE;
+			pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+					      PCI_EXP_SLTCTL_ATTN_IND_OFF);
+			ctrl_info(ctrl, "Slot(%s): Button press: canceling request to power on\n",
+				  slot_name(ctrl));
+		}
+		break;
+	default:
+		ctrl_err(ctrl, "Slot(%s): Button press: ignoring invalid state %#x\n",
+			 slot_name(ctrl), ctrl->state);
+		break;
+	}
+	mutex_unlock(&ctrl->state_lock);
+}
+
+void pciehp_handle_disable_request(struct controller *ctrl)
+{
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case BLINKINGON_STATE:
+	case BLINKINGOFF_STATE:
+		cancel_delayed_work(&ctrl->button_work);
+		break;
+	}
+	ctrl->state = POWEROFF_STATE;
+	mutex_unlock(&ctrl->state_lock);
+
+	ctrl->request_result = pciehp_disable_slot(ctrl, SAFE_REMOVAL);
+}
+
+void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
+{
+	int present, link_active;
+
+	/*
+	 * If the slot is on and presence or link has changed, turn it off.
+	 * Even if it's occupied again, we cannot assume the card is the same.
+	 */
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case BLINKINGOFF_STATE:
+		cancel_delayed_work(&ctrl->button_work);
+		fallthrough;
+	case ON_STATE:
+		ctrl->state = POWEROFF_STATE;
+		mutex_unlock(&ctrl->state_lock);
+		if (events & PCI_EXP_SLTSTA_DLLSC)
+			ctrl_info(ctrl, "Slot(%s): Link Down\n",
+				  slot_name(ctrl));
+		if (events & PCI_EXP_SLTSTA_PDC)
+			ctrl_info(ctrl, "Slot(%s): Card not present\n",
+				  slot_name(ctrl));
+		pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
+		break;
+	default:
+		mutex_unlock(&ctrl->state_lock);
+		break;
+	}
+
+	/* Turn the slot on if it's occupied or link is up */
+	mutex_lock(&ctrl->state_lock);
+	present = pciehp_card_present(ctrl);
+	link_active = pciehp_check_link_active(ctrl);
+	if (present <= 0 && link_active <= 0) {
+		if (ctrl->state == BLINKINGON_STATE) {
+			ctrl->state = OFF_STATE;
+			cancel_delayed_work(&ctrl->button_work);
+			pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+					      INDICATOR_NOOP);
+			ctrl_info(ctrl, "Slot(%s): Card not present\n",
+				  slot_name(ctrl));
+		}
+		mutex_unlock(&ctrl->state_lock);
+		return;
+	}
+
+	switch (ctrl->state) {
+	case BLINKINGON_STATE:
+		cancel_delayed_work(&ctrl->button_work);
+		fallthrough;
+	case OFF_STATE:
+		ctrl->state = POWERON_STATE;
+		mutex_unlock(&ctrl->state_lock);
+		if (present)
+			ctrl_info(ctrl, "Slot(%s): Card present\n",
+				  slot_name(ctrl));
+		if (link_active)
+			ctrl_info(ctrl, "Slot(%s): Link Up\n",
+				  slot_name(ctrl));
+		ctrl->request_result = pciehp_enable_slot(ctrl);
+		break;
+	default:
+		mutex_unlock(&ctrl->state_lock);
+		break;
+	}
+}
+
+static int __pciehp_enable_slot(struct controller *ctrl)
+{
+	u8 getstatus = 0;
+
+	if (MRL_SENS(ctrl)) {
+		pciehp_get_latch_status(ctrl, &getstatus);
+		if (getstatus) {
+			ctrl_info(ctrl, "Slot(%s): Latch open\n",
+				  slot_name(ctrl));
+			return -ENODEV;
+		}
+	}
+
+	if (POWER_CTRL(ctrl)) {
+		pciehp_get_power_status(ctrl, &getstatus);
+		if (getstatus) {
+			ctrl_info(ctrl, "Slot(%s): Already enabled\n",
+				  slot_name(ctrl));
+			return 0;
+		}
+	}
+
+	return board_added(ctrl);
+}
+
+static int pciehp_enable_slot(struct controller *ctrl)
+{
+	int ret;
+
+	pm_runtime_get_sync(&ctrl->pcie->port->dev);
+	ret = __pciehp_enable_slot(ctrl);
+	if (ret && ATTN_BUTTN(ctrl))
+		/* may be blinking */
+		pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+				      INDICATOR_NOOP);
+	pm_runtime_put(&ctrl->pcie->port->dev);
+
+	mutex_lock(&ctrl->state_lock);
+	ctrl->state = ret ? OFF_STATE : ON_STATE;
+	mutex_unlock(&ctrl->state_lock);
+
+	return ret;
+}
+
+static int __pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
+{
+	u8 getstatus = 0;
+
+	if (POWER_CTRL(ctrl)) {
+		pciehp_get_power_status(ctrl, &getstatus);
+		if (!getstatus) {
+			ctrl_info(ctrl, "Slot(%s): Already disabled\n",
+				  slot_name(ctrl));
+			return -EINVAL;
+		}
+	}
+
+	remove_board(ctrl, safe_removal);
+	return 0;
+}
+
+static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
+{
+	int ret;
+
+	pm_runtime_get_sync(&ctrl->pcie->port->dev);
+	ret = __pciehp_disable_slot(ctrl, safe_removal);
+	pm_runtime_put(&ctrl->pcie->port->dev);
+
+	mutex_lock(&ctrl->state_lock);
+	ctrl->state = OFF_STATE;
+	mutex_unlock(&ctrl->state_lock);
+
+	return ret;
+}
+
+int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case BLINKINGON_STATE:
+	case OFF_STATE:
+		mutex_unlock(&ctrl->state_lock);
+		/*
+		 * The IRQ thread becomes a no-op if the user pulls out the
+		 * card before the thread wakes up, so initialize to -ENODEV.
+		 */
+		ctrl->request_result = -ENODEV;
+		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
+		wait_event(ctrl->requester,
+			   !atomic_read(&ctrl->pending_events) &&
+			   !ctrl->ist_running);
+		return ctrl->request_result;
+	case POWERON_STATE:
+		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
+			  slot_name(ctrl));
+		break;
+	case BLINKINGOFF_STATE:
+	case ON_STATE:
+	case POWEROFF_STATE:
+		ctrl_info(ctrl, "Slot(%s): Already enabled\n",
+			  slot_name(ctrl));
+		break;
+	default:
+		ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
+			 slot_name(ctrl), ctrl->state);
+		break;
+	}
+	mutex_unlock(&ctrl->state_lock);
+
+	return -ENODEV;
+}
+
+int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+
+	mutex_lock(&ctrl->state_lock);
+	switch (ctrl->state) {
+	case BLINKINGOFF_STATE:
+	case ON_STATE:
+		mutex_unlock(&ctrl->state_lock);
+		pciehp_request(ctrl, DISABLE_SLOT);
+		wait_event(ctrl->requester,
+			   !atomic_read(&ctrl->pending_events) &&
+			   !ctrl->ist_running);
+		return ctrl->request_result;
+	case POWEROFF_STATE:
+		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
+			  slot_name(ctrl));
+		break;
+	case BLINKINGON_STATE:
+	case OFF_STATE:
+	case POWERON_STATE:
+		ctrl_info(ctrl, "Slot(%s): Already disabled\n",
+			  slot_name(ctrl));
+		break;
+	default:
+		ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
+			 slot_name(ctrl), ctrl->state);
+		break;
+	}
+	mutex_unlock(&ctrl->state_lock);
+
+	return -ENODEV;
+}
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
new file mode 100644
index 000000000..fd713abdf
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -0,0 +1,1088 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Express PCI Hot Plug Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
+ */
+
+#define dev_fmt(fmt) "pciehp: " fmt
+
+#include <linux/dmi.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+#include "../pci.h"
+#include "pciehp.h"
+
+static const struct dmi_system_id inband_presence_disabled_dmi_table[] = {
+	/*
+	 * Match all Dell systems, as some Dell systems have inband
+	 * presence disabled on NVMe slots (but don't support the bit to
+	 * report it). Setting inband presence disabled should have no
+	 * negative effect, except on broken hotplug slots that never
+	 * assert presence detect--and those will still work, they will
+	 * just have a bit of extra delay before being probed.
+	 */
+	{
+		.ident = "Dell System",
+		.matches = {
+			DMI_MATCH(DMI_OEM_STRING, "Dell System"),
+		},
+	},
+	{}
+};
+
+static inline struct pci_dev *ctrl_dev(struct controller *ctrl)
+{
+	return ctrl->pcie->port;
+}
+
+static irqreturn_t pciehp_isr(int irq, void *dev_id);
+static irqreturn_t pciehp_ist(int irq, void *dev_id);
+static int pciehp_poll(void *data);
+
+static inline int pciehp_request_irq(struct controller *ctrl)
+{
+	int retval, irq = ctrl->pcie->irq;
+
+	if (pciehp_poll_mode) {
+		ctrl->poll_thread = kthread_run(&pciehp_poll, ctrl,
+						"pciehp_poll-%s",
+						slot_name(ctrl));
+		return PTR_ERR_OR_ZERO(ctrl->poll_thread);
+	}
+
+	/* Installs the interrupt handler */
+	retval = request_threaded_irq(irq, pciehp_isr, pciehp_ist,
+				      IRQF_SHARED, "pciehp", ctrl);
+	if (retval)
+		ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
+			 irq);
+	return retval;
+}
+
+static inline void pciehp_free_irq(struct controller *ctrl)
+{
+	if (pciehp_poll_mode)
+		kthread_stop(ctrl->poll_thread);
+	else
+		free_irq(ctrl->pcie->irq, ctrl);
+}
+
+static int pcie_poll_cmd(struct controller *ctrl, int timeout)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_status;
+
+	do {
+		pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+		if (PCI_POSSIBLE_ERROR(slot_status)) {
+			ctrl_info(ctrl, "%s: no response from device\n",
+				  __func__);
+			return 0;
+		}
+
+		if (slot_status & PCI_EXP_SLTSTA_CC) {
+			pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
+						   PCI_EXP_SLTSTA_CC);
+			ctrl->cmd_busy = 0;
+			smp_mb();
+			return 1;
+		}
+		msleep(10);
+		timeout -= 10;
+	} while (timeout >= 0);
+	return 0;	/* timeout */
+}
+
+static void pcie_wait_cmd(struct controller *ctrl)
+{
+	unsigned int msecs = pciehp_poll_mode ? 2500 : 1000;
+	unsigned long duration = msecs_to_jiffies(msecs);
+	unsigned long cmd_timeout = ctrl->cmd_started + duration;
+	unsigned long now, timeout;
+	int rc;
+
+	/*
+	 * If the controller does not generate notifications for command
+	 * completions, we never need to wait between writes.
+	 */
+	if (NO_CMD_CMPL(ctrl))
+		return;
+
+	if (!ctrl->cmd_busy)
+		return;
+
+	/*
+	 * Even if the command has already timed out, we want to call
+	 * pcie_poll_cmd() so it can clear PCI_EXP_SLTSTA_CC.
+	 */
+	now = jiffies;
+	if (time_before_eq(cmd_timeout, now))
+		timeout = 1;
+	else
+		timeout = cmd_timeout - now;
+
+	if (ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE &&
+	    ctrl->slot_ctrl & PCI_EXP_SLTCTL_CCIE)
+		rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
+	else
+		rc = pcie_poll_cmd(ctrl, jiffies_to_msecs(timeout));
+
+	if (!rc)
+		ctrl_info(ctrl, "Timeout on hotplug command %#06x (issued %u msec ago)\n",
+			  ctrl->slot_ctrl,
+			  jiffies_to_msecs(jiffies - ctrl->cmd_started));
+}
+
+#define CC_ERRATUM_MASK		(PCI_EXP_SLTCTL_PCC |	\
+				 PCI_EXP_SLTCTL_PIC |	\
+				 PCI_EXP_SLTCTL_AIC |	\
+				 PCI_EXP_SLTCTL_EIC)
+
+static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd,
+			      u16 mask, bool wait)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_ctrl_orig, slot_ctrl;
+
+	mutex_lock(&ctrl->ctrl_lock);
+
+	/*
+	 * Always wait for any previous command that might still be in progress
+	 */
+	pcie_wait_cmd(ctrl);
+
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
+	if (PCI_POSSIBLE_ERROR(slot_ctrl)) {
+		ctrl_info(ctrl, "%s: no response from device\n", __func__);
+		goto out;
+	}
+
+	slot_ctrl_orig = slot_ctrl;
+	slot_ctrl &= ~mask;
+	slot_ctrl |= (cmd & mask);
+	ctrl->cmd_busy = 1;
+	smp_mb();
+	ctrl->slot_ctrl = slot_ctrl;
+	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
+	ctrl->cmd_started = jiffies;
+
+	/*
+	 * Controllers with the Intel CF118 and similar errata advertise
+	 * Command Completed support, but they only set Command Completed
+	 * if we change the "Control" bits for power, power indicator,
+	 * attention indicator, or interlock.  If we only change the
+	 * "Enable" bits, they never set the Command Completed bit.
+	 */
+	if (pdev->broken_cmd_compl &&
+	    (slot_ctrl_orig & CC_ERRATUM_MASK) == (slot_ctrl & CC_ERRATUM_MASK))
+		ctrl->cmd_busy = 0;
+
+	/*
+	 * Optionally wait for the hardware to be ready for a new command,
+	 * indicating completion of the above issued command.
+	 */
+	if (wait)
+		pcie_wait_cmd(ctrl);
+
+out:
+	mutex_unlock(&ctrl->ctrl_lock);
+}
+
+/**
+ * pcie_write_cmd - Issue controller command
+ * @ctrl: controller to which the command is issued
+ * @cmd:  command value written to slot control register
+ * @mask: bitmask of slot control register to be modified
+ */
+static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
+{
+	pcie_do_write_cmd(ctrl, cmd, mask, true);
+}
+
+/* Same as above without waiting for the hardware to latch */
+static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask)
+{
+	pcie_do_write_cmd(ctrl, cmd, mask, false);
+}
+
+/**
+ * pciehp_check_link_active() - Is the link active
+ * @ctrl: PCIe hotplug controller
+ *
+ * Check whether the downstream link is currently active. Note it is
+ * possible that the card is removed immediately after this so the
+ * caller may need to take it into account.
+ *
+ * If the hotplug controller itself is not available anymore returns
+ * %-ENODEV.
+ */
+int pciehp_check_link_active(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 lnk_status;
+	int ret;
+
+	ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+	if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
+		return -ENODEV;
+
+	ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
+	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
+
+	return ret;
+}
+
+static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
+{
+	u32 l;
+	int count = 0;
+	int delay = 1000, step = 20;
+	bool found = false;
+
+	do {
+		found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0);
+		count++;
+
+		if (found)
+			break;
+
+		msleep(step);
+		delay -= step;
+	} while (delay > 0);
+
+	if (count > 1)
+		pr_debug("pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n",
+			pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+			PCI_FUNC(devfn), count, step, l);
+
+	return found;
+}
+
+static void pcie_wait_for_presence(struct pci_dev *pdev)
+{
+	int timeout = 1250;
+	u16 slot_status;
+
+	do {
+		pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+		if (slot_status & PCI_EXP_SLTSTA_PDS)
+			return;
+		msleep(10);
+		timeout -= 10;
+	} while (timeout > 0);
+}
+
+int pciehp_check_link_status(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	bool found;
+	u16 lnk_status;
+
+	if (!pcie_wait_for_link(pdev, true)) {
+		ctrl_info(ctrl, "Slot(%s): No link\n", slot_name(ctrl));
+		return -1;
+	}
+
+	if (ctrl->inband_presence_disabled)
+		pcie_wait_for_presence(pdev);
+
+	found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
+					PCI_DEVFN(0, 0));
+
+	/* ignore link or presence changes up to this point */
+	if (found)
+		atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
+			   &ctrl->pending_events);
+
+	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
+	if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
+	    !(lnk_status & PCI_EXP_LNKSTA_NLW)) {
+		ctrl_info(ctrl, "Slot(%s): Cannot train link: status %#06x\n",
+			  slot_name(ctrl), lnk_status);
+		return -1;
+	}
+
+	pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status);
+
+	if (!found) {
+		ctrl_info(ctrl, "Slot(%s): No device found\n",
+			  slot_name(ctrl));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __pciehp_link_set(struct controller *ctrl, bool enable)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+
+	pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_LD,
+					   enable ? 0 : PCI_EXP_LNKCTL_LD);
+
+	return 0;
+}
+
+static int pciehp_link_enable(struct controller *ctrl)
+{
+	return __pciehp_link_set(ctrl, true);
+}
+
+int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
+				    u8 *status)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_ctrl;
+
+	pci_config_pm_runtime_get(pdev);
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
+	pci_config_pm_runtime_put(pdev);
+	*status = (slot_ctrl & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
+	return 0;
+}
+
+int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_ctrl;
+
+	pci_config_pm_runtime_get(pdev);
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
+	pci_config_pm_runtime_put(pdev);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
+
+	switch (slot_ctrl & PCI_EXP_SLTCTL_AIC) {
+	case PCI_EXP_SLTCTL_ATTN_IND_ON:
+		*status = 1;	/* On */
+		break;
+	case PCI_EXP_SLTCTL_ATTN_IND_BLINK:
+		*status = 2;	/* Blink */
+		break;
+	case PCI_EXP_SLTCTL_ATTN_IND_OFF:
+		*status = 0;	/* Off */
+		break;
+	default:
+		*status = 0xFF;
+		break;
+	}
+
+	return 0;
+}
+
+void pciehp_get_power_status(struct controller *ctrl, u8 *status)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_ctrl;
+
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
+
+	switch (slot_ctrl & PCI_EXP_SLTCTL_PCC) {
+	case PCI_EXP_SLTCTL_PWR_ON:
+		*status = 1;	/* On */
+		break;
+	case PCI_EXP_SLTCTL_PWR_OFF:
+		*status = 0;	/* Off */
+		break;
+	default:
+		*status = 0xFF;
+		break;
+	}
+}
+
+void pciehp_get_latch_status(struct controller *ctrl, u8 *status)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_status;
+
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+	*status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS);
+}
+
+/**
+ * pciehp_card_present() - Is the card present
+ * @ctrl: PCIe hotplug controller
+ *
+ * Function checks whether the card is currently present in the slot and
+ * in that case returns true. Note it is possible that the card is
+ * removed immediately after the check so the caller may need to take
+ * this into account.
+ *
+ * It the hotplug controller itself is not available anymore returns
+ * %-ENODEV.
+ */
+int pciehp_card_present(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_status;
+	int ret;
+
+	ret = pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+	if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(slot_status))
+		return -ENODEV;
+
+	return !!(slot_status & PCI_EXP_SLTSTA_PDS);
+}
+
+/**
+ * pciehp_card_present_or_link_active() - whether given slot is occupied
+ * @ctrl: PCIe hotplug controller
+ *
+ * Unlike pciehp_card_present(), which determines presence solely from the
+ * Presence Detect State bit, this helper also returns true if the Link Active
+ * bit is set.  This is a concession to broken hotplug ports which hardwire
+ * Presence Detect State to zero, such as Wilocity's [1ae9:0200].
+ *
+ * Returns: %1 if the slot is occupied and %0 if it is not. If the hotplug
+ *	    port is not present anymore returns %-ENODEV.
+ */
+int pciehp_card_present_or_link_active(struct controller *ctrl)
+{
+	int ret;
+
+	ret = pciehp_card_present(ctrl);
+	if (ret)
+		return ret;
+
+	return pciehp_check_link_active(ctrl);
+}
+
+int pciehp_query_power_fault(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_status;
+
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+	return !!(slot_status & PCI_EXP_SLTSTA_PFD);
+}
+
+int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
+				    u8 status)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+
+	pci_config_pm_runtime_get(pdev);
+	pcie_write_cmd_nowait(ctrl, status << 6,
+			      PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC);
+	pci_config_pm_runtime_put(pdev);
+	return 0;
+}
+
+/**
+ * pciehp_set_indicators() - set attention indicator, power indicator, or both
+ * @ctrl: PCIe hotplug controller
+ * @pwr: one of:
+ *	PCI_EXP_SLTCTL_PWR_IND_ON
+ *	PCI_EXP_SLTCTL_PWR_IND_BLINK
+ *	PCI_EXP_SLTCTL_PWR_IND_OFF
+ * @attn: one of:
+ *	PCI_EXP_SLTCTL_ATTN_IND_ON
+ *	PCI_EXP_SLTCTL_ATTN_IND_BLINK
+ *	PCI_EXP_SLTCTL_ATTN_IND_OFF
+ *
+ * Either @pwr or @attn can also be INDICATOR_NOOP to leave that indicator
+ * unchanged.
+ */
+void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn)
+{
+	u16 cmd = 0, mask = 0;
+
+	if (PWR_LED(ctrl) && pwr != INDICATOR_NOOP) {
+		cmd |= (pwr & PCI_EXP_SLTCTL_PIC);
+		mask |= PCI_EXP_SLTCTL_PIC;
+	}
+
+	if (ATTN_LED(ctrl) && attn != INDICATOR_NOOP) {
+		cmd |= (attn & PCI_EXP_SLTCTL_AIC);
+		mask |= PCI_EXP_SLTCTL_AIC;
+	}
+
+	if (cmd) {
+		pcie_write_cmd_nowait(ctrl, cmd, mask);
+		ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+			 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
+	}
+}
+
+int pciehp_power_on_slot(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_status;
+	int retval;
+
+	/* Clear power-fault bit from previous power failures */
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
+	if (slot_status & PCI_EXP_SLTSTA_PFD)
+		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
+					   PCI_EXP_SLTSTA_PFD);
+	ctrl->power_fault_detected = 0;
+
+	pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_ON, PCI_EXP_SLTCTL_PCC);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
+		 PCI_EXP_SLTCTL_PWR_ON);
+
+	retval = pciehp_link_enable(ctrl);
+	if (retval)
+		ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
+
+	return retval;
+}
+
+void pciehp_power_off_slot(struct controller *ctrl)
+{
+	pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_OFF, PCI_EXP_SLTCTL_PCC);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
+		 PCI_EXP_SLTCTL_PWR_OFF);
+}
+
+static void pciehp_ignore_dpc_link_change(struct controller *ctrl,
+					  struct pci_dev *pdev, int irq)
+{
+	/*
+	 * Ignore link changes which occurred while waiting for DPC recovery.
+	 * Could be several if DPC triggered multiple times consecutively.
+	 */
+	synchronize_hardirq(irq);
+	atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events);
+	if (pciehp_poll_mode)
+		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
+					   PCI_EXP_SLTSTA_DLLSC);
+	ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n",
+		  slot_name(ctrl));
+
+	/*
+	 * If the link is unexpectedly down after successful recovery,
+	 * the corresponding link change may have been ignored above.
+	 * Synthesize it to ensure that it is acted on.
+	 */
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
+	if (!pciehp_check_link_active(ctrl))
+		pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
+	up_read(&ctrl->reset_lock);
+}
+
+static irqreturn_t pciehp_isr(int irq, void *dev_id)
+{
+	struct controller *ctrl = (struct controller *)dev_id;
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	struct device *parent = pdev->dev.parent;
+	u16 status, events = 0;
+
+	/*
+	 * Interrupts only occur in D3hot or shallower and only if enabled
+	 * in the Slot Control register (PCIe r4.0, sec 6.7.3.4).
+	 */
+	if (pdev->current_state == PCI_D3cold ||
+	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
+		return IRQ_NONE;
+
+	/*
+	 * Keep the port accessible by holding a runtime PM ref on its parent.
+	 * Defer resume of the parent to the IRQ thread if it's suspended.
+	 * Mask the interrupt until then.
+	 */
+	if (parent) {
+		pm_runtime_get_noresume(parent);
+		if (!pm_runtime_active(parent)) {
+			pm_runtime_put(parent);
+			disable_irq_nosync(irq);
+			atomic_or(RERUN_ISR, &ctrl->pending_events);
+			return IRQ_WAKE_THREAD;
+		}
+	}
+
+read_status:
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);
+	if (PCI_POSSIBLE_ERROR(status)) {
+		ctrl_info(ctrl, "%s: no response from device\n", __func__);
+		if (parent)
+			pm_runtime_put(parent);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Slot Status contains plain status bits as well as event
+	 * notification bits; right now we only want the event bits.
+	 */
+	status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
+		  PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
+		  PCI_EXP_SLTSTA_DLLSC;
+
+	/*
+	 * If we've already reported a power fault, don't report it again
+	 * until we've done something to handle it.
+	 */
+	if (ctrl->power_fault_detected)
+		status &= ~PCI_EXP_SLTSTA_PFD;
+	else if (status & PCI_EXP_SLTSTA_PFD)
+		ctrl->power_fault_detected = true;
+
+	events |= status;
+	if (!events) {
+		if (parent)
+			pm_runtime_put(parent);
+		return IRQ_NONE;
+	}
+
+	if (status) {
+		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
+
+		/*
+		 * In MSI mode, all event bits must be zero before the port
+		 * will send a new interrupt (PCIe Base Spec r5.0 sec 6.7.3.4).
+		 * So re-read the Slot Status register in case a bit was set
+		 * between read and write.
+		 */
+		if (pci_dev_msi_enabled(pdev) && !pciehp_poll_mode)
+			goto read_status;
+	}
+
+	ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events);
+	if (parent)
+		pm_runtime_put(parent);
+
+	/*
+	 * Command Completed notifications are not deferred to the
+	 * IRQ thread because it may be waiting for their arrival.
+	 */
+	if (events & PCI_EXP_SLTSTA_CC) {
+		ctrl->cmd_busy = 0;
+		smp_mb();
+		wake_up(&ctrl->queue);
+
+		if (events == PCI_EXP_SLTSTA_CC)
+			return IRQ_HANDLED;
+
+		events &= ~PCI_EXP_SLTSTA_CC;
+	}
+
+	if (pdev->ignore_hotplug) {
+		ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events);
+		return IRQ_HANDLED;
+	}
+
+	/* Save pending events for consumption by IRQ thread. */
+	atomic_or(events, &ctrl->pending_events);
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t pciehp_ist(int irq, void *dev_id)
+{
+	struct controller *ctrl = (struct controller *)dev_id;
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	irqreturn_t ret;
+	u32 events;
+
+	ctrl->ist_running = true;
+	pci_config_pm_runtime_get(pdev);
+
+	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
+	if (atomic_fetch_and(~RERUN_ISR, &ctrl->pending_events) & RERUN_ISR) {
+		ret = pciehp_isr(irq, dev_id);
+		enable_irq(irq);
+		if (ret != IRQ_WAKE_THREAD)
+			goto out;
+	}
+
+	synchronize_hardirq(irq);
+	events = atomic_xchg(&ctrl->pending_events, 0);
+	if (!events) {
+		ret = IRQ_NONE;
+		goto out;
+	}
+
+	/* Check Attention Button Pressed */
+	if (events & PCI_EXP_SLTSTA_ABP)
+		pciehp_handle_button_press(ctrl);
+
+	/* Check Power Fault Detected */
+	if (events & PCI_EXP_SLTSTA_PFD) {
+		ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
+		pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+				      PCI_EXP_SLTCTL_ATTN_IND_ON);
+	}
+
+	/*
+	 * Ignore Link Down/Up events caused by Downstream Port Containment
+	 * if recovery from the error succeeded.
+	 */
+	if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) &&
+	    ctrl->state == ON_STATE) {
+		events &= ~PCI_EXP_SLTSTA_DLLSC;
+		pciehp_ignore_dpc_link_change(ctrl, pdev, irq);
+	}
+
+	/*
+	 * Disable requests have higher priority than Presence Detect Changed
+	 * or Data Link Layer State Changed events.
+	 */
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
+	if (events & DISABLE_SLOT)
+		pciehp_handle_disable_request(ctrl);
+	else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
+		pciehp_handle_presence_or_link_change(ctrl, events);
+	up_read(&ctrl->reset_lock);
+
+	ret = IRQ_HANDLED;
+out:
+	pci_config_pm_runtime_put(pdev);
+	ctrl->ist_running = false;
+	wake_up(&ctrl->requester);
+	return ret;
+}
+
+static int pciehp_poll(void *data)
+{
+	struct controller *ctrl = data;
+
+	schedule_timeout_idle(10 * HZ); /* start with 10 sec delay */
+
+	while (!kthread_should_stop()) {
+		/* poll for interrupt events or user requests */
+		while (pciehp_isr(IRQ_NOTCONNECTED, ctrl) == IRQ_WAKE_THREAD ||
+		       atomic_read(&ctrl->pending_events))
+			pciehp_ist(IRQ_NOTCONNECTED, ctrl);
+
+		if (pciehp_poll_time <= 0 || pciehp_poll_time > 60)
+			pciehp_poll_time = 2; /* clamp to sane value */
+
+		schedule_timeout_idle(pciehp_poll_time * HZ);
+	}
+
+	return 0;
+}
+
+static void pcie_enable_notification(struct controller *ctrl)
+{
+	u16 cmd, mask;
+
+	/*
+	 * TBD: Power fault detected software notification support.
+	 *
+	 * Power fault detected software notification is not enabled
+	 * now, because it caused power fault detected interrupt storm
+	 * on some machines. On those machines, power fault detected
+	 * bit in the slot status register was set again immediately
+	 * when it is cleared in the interrupt service routine, and
+	 * next power fault detected interrupt was notified again.
+	 */
+
+	/*
+	 * Always enable link events: thus link-up and link-down shall
+	 * always be treated as hotplug and unplug respectively. Enable
+	 * presence detect only if Attention Button is not present.
+	 */
+	cmd = PCI_EXP_SLTCTL_DLLSCE;
+	if (ATTN_BUTTN(ctrl))
+		cmd |= PCI_EXP_SLTCTL_ABPE;
+	else
+		cmd |= PCI_EXP_SLTCTL_PDCE;
+	if (!pciehp_poll_mode)
+		cmd |= PCI_EXP_SLTCTL_HPIE;
+	if (!pciehp_poll_mode && !NO_CMD_CMPL(ctrl))
+		cmd |= PCI_EXP_SLTCTL_CCIE;
+
+	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
+		PCI_EXP_SLTCTL_PFDE |
+		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
+		PCI_EXP_SLTCTL_DLLSCE);
+
+	pcie_write_cmd_nowait(ctrl, cmd, mask);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
+}
+
+static void pcie_disable_notification(struct controller *ctrl)
+{
+	u16 mask;
+
+	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
+		PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
+		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
+		PCI_EXP_SLTCTL_DLLSCE);
+	pcie_write_cmd(ctrl, 0, mask);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
+}
+
+void pcie_clear_hotplug_events(struct controller *ctrl)
+{
+	pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_SLTSTA,
+				   PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+}
+
+void pcie_enable_interrupt(struct controller *ctrl)
+{
+	u16 mask;
+
+	mask = PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_DLLSCE;
+	pcie_write_cmd(ctrl, mask, mask);
+}
+
+void pcie_disable_interrupt(struct controller *ctrl)
+{
+	u16 mask;
+
+	/*
+	 * Mask hot-plug interrupt to prevent it triggering immediately
+	 * when the link goes inactive (we still get PME when any of the
+	 * enabled events is detected). Same goes with Link Layer State
+	 * changed event which generates PME immediately when the link goes
+	 * inactive so mask it as well.
+	 */
+	mask = PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_DLLSCE;
+	pcie_write_cmd(ctrl, 0, mask);
+}
+
+/**
+ * pciehp_slot_reset() - ignore link event caused by error-induced hot reset
+ * @dev: PCI Express port service device
+ *
+ * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset
+ * further up in the hierarchy to recover from an error.  The reset was
+ * propagated down to this hotplug port.  Ignore the resulting link flap.
+ * If the link failed to retrain successfully, synthesize the ignored event.
+ * Surprise removal during reset is detected through Presence Detect Changed.
+ */
+int pciehp_slot_reset(struct pcie_device *dev)
+{
+	struct controller *ctrl = get_service_data(dev);
+
+	if (ctrl->state != ON_STATE)
+		return 0;
+
+	pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA,
+				   PCI_EXP_SLTSTA_DLLSC);
+
+	if (!pciehp_check_link_active(ctrl))
+		pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
+
+	return 0;
+}
+
+/*
+ * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary
+ * bus reset of the bridge, but at the same time we want to ensure that it is
+ * not seen as a hot-unplug, followed by the hot-plug of the device. Thus,
+ * disable link state notification and presence detection change notification
+ * momentarily, if we see that they could interfere. Also, clear any spurious
+ * events after.
+ */
+int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
+{
+	struct controller *ctrl = to_ctrl(hotplug_slot);
+	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 stat_mask = 0, ctrl_mask = 0;
+	int rc;
+
+	if (probe)
+		return 0;
+
+	down_write_nested(&ctrl->reset_lock, ctrl->depth);
+
+	if (!ATTN_BUTTN(ctrl)) {
+		ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
+		stat_mask |= PCI_EXP_SLTSTA_PDC;
+	}
+	ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE;
+	stat_mask |= PCI_EXP_SLTSTA_DLLSC;
+
+	pcie_write_cmd(ctrl, 0, ctrl_mask);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
+
+	rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
+
+	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
+	pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
+	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
+		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask);
+
+	up_write(&ctrl->reset_lock);
+	return rc;
+}
+
+int pcie_init_notification(struct controller *ctrl)
+{
+	if (pciehp_request_irq(ctrl))
+		return -1;
+	pcie_enable_notification(ctrl);
+	ctrl->notification_enabled = 1;
+	return 0;
+}
+
+void pcie_shutdown_notification(struct controller *ctrl)
+{
+	if (ctrl->notification_enabled) {
+		pcie_disable_notification(ctrl);
+		pciehp_free_irq(ctrl);
+		ctrl->notification_enabled = 0;
+	}
+}
+
+static inline void dbg_ctrl(struct controller *ctrl)
+{
+	struct pci_dev *pdev = ctrl->pcie->port;
+	u16 reg16;
+
+	ctrl_dbg(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &reg16);
+	ctrl_dbg(ctrl, "Slot Status            : 0x%04x\n", reg16);
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &reg16);
+	ctrl_dbg(ctrl, "Slot Control           : 0x%04x\n", reg16);
+}
+
+#define FLAG(x, y)	(((x) & (y)) ? '+' : '-')
+
+static inline int pcie_hotplug_depth(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	int depth = 0;
+
+	while (bus->parent) {
+		bus = bus->parent;
+		if (bus->self && bus->self->is_hotplug_bridge)
+			depth++;
+	}
+
+	return depth;
+}
+
+struct controller *pcie_init(struct pcie_device *dev)
+{
+	struct controller *ctrl;
+	u32 slot_cap, slot_cap2;
+	u8 poweron;
+	struct pci_dev *pdev = dev->port;
+	struct pci_bus *subordinate = pdev->subordinate;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return NULL;
+
+	ctrl->pcie = dev;
+	ctrl->depth = pcie_hotplug_depth(dev->port);
+	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
+
+	if (pdev->hotplug_user_indicators)
+		slot_cap &= ~(PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_PIP);
+
+	/*
+	 * We assume no Thunderbolt controllers support Command Complete events,
+	 * but some controllers falsely claim they do.
+	 */
+	if (pdev->is_thunderbolt)
+		slot_cap |= PCI_EXP_SLTCAP_NCCS;
+
+	ctrl->slot_cap = slot_cap;
+	mutex_init(&ctrl->ctrl_lock);
+	mutex_init(&ctrl->state_lock);
+	init_rwsem(&ctrl->reset_lock);
+	init_waitqueue_head(&ctrl->requester);
+	init_waitqueue_head(&ctrl->queue);
+	INIT_DELAYED_WORK(&ctrl->button_work, pciehp_queue_pushbutton_work);
+	dbg_ctrl(ctrl);
+
+	down_read(&pci_bus_sem);
+	ctrl->state = list_empty(&subordinate->devices) ? OFF_STATE : ON_STATE;
+	up_read(&pci_bus_sem);
+
+	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP2, &slot_cap2);
+	if (slot_cap2 & PCI_EXP_SLTCAP2_IBPD) {
+		pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_IBPD_DISABLE,
+				      PCI_EXP_SLTCTL_IBPD_DISABLE);
+		ctrl->inband_presence_disabled = 1;
+	}
+
+	if (dmi_first_match(inband_presence_disabled_dmi_table))
+		ctrl->inband_presence_disabled = 1;
+
+	/* Clear all remaining event bits in Slot Status register. */
+	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
+		PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
+		PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_CC |
+		PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC);
+
+	ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c IbPresDis%c LLActRep%c%s\n",
+		(slot_cap & PCI_EXP_SLTCAP_PSN) >> 19,
+		FLAG(slot_cap, PCI_EXP_SLTCAP_ABP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_PCP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_MRLSP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_AIP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_PIP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_HPC),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_HPS),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
+		FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
+		FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD),
+		FLAG(pdev->link_active_reporting, true),
+		pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : "");
+
+	/*
+	 * If empty slot's power status is on, turn power off.  The IRQ isn't
+	 * requested yet, so avoid triggering a notification with this command.
+	 */
+	if (POWER_CTRL(ctrl)) {
+		pciehp_get_power_status(ctrl, &poweron);
+		if (!pciehp_card_present_or_link_active(ctrl) && poweron) {
+			pcie_disable_notification(ctrl);
+			pciehp_power_off_slot(ctrl);
+		}
+	}
+
+	return ctrl;
+}
+
+void pciehp_release_ctrl(struct controller *ctrl)
+{
+	cancel_delayed_work_sync(&ctrl->button_work);
+	kfree(ctrl);
+}
+
+static void quirk_cmd_compl(struct pci_dev *pdev)
+{
+	u32 slot_cap;
+
+	if (pci_is_pcie(pdev)) {
+		pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
+		if (slot_cap & PCI_EXP_SLTCAP_HPC &&
+		    !(slot_cap & PCI_EXP_SLTCAP_NCCS))
+			pdev->broken_cmd_compl = 1;
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x010e,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_HXT, 0x0401,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
new file mode 100644
index 000000000..ad12515a4
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Express Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+
+#define dev_fmt(fmt) "pciehp: " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "../pci.h"
+#include "pciehp.h"
+
+/**
+ * pciehp_configure_device() - enumerate PCI devices below a hotplug bridge
+ * @ctrl: PCIe hotplug controller
+ *
+ * Enumerate PCI devices below a hotplug bridge and add them to the system.
+ * Return 0 on success, %-EEXIST if the devices are already enumerated or
+ * %-ENODEV if enumeration failed.
+ */
+int pciehp_configure_device(struct controller *ctrl)
+{
+	struct pci_dev *dev;
+	struct pci_dev *bridge = ctrl->pcie->port;
+	struct pci_bus *parent = bridge->subordinate;
+	int num, ret = 0;
+
+	pci_lock_rescan_remove();
+
+	dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
+	if (dev) {
+		/*
+		 * The device is already there. Either configured by the
+		 * boot firmware or a previous hotplug event.
+		 */
+		ctrl_dbg(ctrl, "Device %s already exists at %04x:%02x:00, skipping hot-add\n",
+			 pci_name(dev), pci_domain_nr(parent), parent->number);
+		pci_dev_put(dev);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
+	if (num == 0) {
+		ctrl_err(ctrl, "No new device found\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	for_each_pci_bridge(dev, parent)
+		pci_hp_add_bridge(dev);
+
+	pci_assign_unassigned_bridge_resources(bridge);
+	pcie_bus_configure_settings(parent);
+
+	/*
+	 * Release reset_lock during driver binding
+	 * to avoid AB-BA deadlock with device_lock.
+	 */
+	up_read(&ctrl->reset_lock);
+	pci_bus_add_devices(parent);
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
+
+ out:
+	pci_unlock_rescan_remove();
+	return ret;
+}
+
+/**
+ * pciehp_unconfigure_device() - remove PCI devices below a hotplug bridge
+ * @ctrl: PCIe hotplug controller
+ * @presence: whether the card is still present in the slot;
+ *	true for safe removal via sysfs or an Attention Button press,
+ *	false for surprise removal
+ *
+ * Unbind PCI devices below a hotplug bridge from their drivers and remove
+ * them from the system.  Safely removed devices are quiesced.  Surprise
+ * removed devices are marked as such to prevent further accesses.
+ */
+void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
+{
+	struct pci_dev *dev, *temp;
+	struct pci_bus *parent = ctrl->pcie->port->subordinate;
+	u16 command;
+
+	ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+		 __func__, pci_domain_nr(parent), parent->number);
+
+	if (!presence)
+		pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
+
+	pci_lock_rescan_remove();
+
+	/*
+	 * Stopping an SR-IOV PF device removes all the associated VFs,
+	 * which will update the bus->devices list and confuse the
+	 * iterator.  Therefore, iterate in reverse so we remove the VFs
+	 * first, then the PF.  We do the same in pci_stop_bus_device().
+	 */
+	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
+					 bus_list) {
+		pci_dev_get(dev);
+
+		/*
+		 * Release reset_lock during driver unbinding
+		 * to avoid AB-BA deadlock with device_lock.
+		 */
+		up_read(&ctrl->reset_lock);
+		pci_stop_and_remove_bus_device(dev);
+		down_read_nested(&ctrl->reset_lock, ctrl->depth);
+
+		/*
+		 * Ensure that no new Requests will be generated from
+		 * the device.
+		 */
+		if (presence) {
+			pci_read_config_word(dev, PCI_COMMAND, &command);
+			command &= ~(PCI_COMMAND_MASTER | PCI_COMMAND_SERR);
+			command |= PCI_COMMAND_INTX_DISABLE;
+			pci_write_config_word(dev, PCI_COMMAND, command);
+		}
+		pci_dev_put(dev);
+	}
+
+	pci_unlock_rescan_remove();
+}
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
new file mode 100644
index 000000000..881d42063
--- /dev/null
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -0,0 +1,1049 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Hotplug Driver for PowerPC PowerNV platform.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ */
+
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/of_fdt.h>
+
+#include <asm/opal.h>
+#include <asm/pnv-pci.h>
+#include <asm/ppc-pci.h>
+
+#define DRIVER_VERSION	"0.1"
+#define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
+#define DRIVER_DESC	"PowerPC PowerNV PCI Hotplug Driver"
+
+#define SLOT_WARN(sl, x...) \
+	((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x))
+
+struct pnv_php_event {
+	bool			added;
+	struct pnv_php_slot	*php_slot;
+	struct work_struct	work;
+};
+
+static LIST_HEAD(pnv_php_slot_list);
+static DEFINE_SPINLOCK(pnv_php_lock);
+
+static void pnv_php_register(struct device_node *dn);
+static void pnv_php_unregister_one(struct device_node *dn);
+static void pnv_php_unregister(struct device_node *dn);
+
+static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
+				bool disable_device)
+{
+	struct pci_dev *pdev = php_slot->pdev;
+	int irq = php_slot->irq;
+	u16 ctrl;
+
+	if (php_slot->irq > 0) {
+		pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
+		ctrl &= ~(PCI_EXP_SLTCTL_HPIE |
+			  PCI_EXP_SLTCTL_PDCE |
+			  PCI_EXP_SLTCTL_DLLSCE);
+		pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
+
+		free_irq(php_slot->irq, php_slot);
+		php_slot->irq = 0;
+	}
+
+	if (php_slot->wq) {
+		destroy_workqueue(php_slot->wq);
+		php_slot->wq = NULL;
+	}
+
+	if (disable_device || irq > 0) {
+		if (pdev->msix_enabled)
+			pci_disable_msix(pdev);
+		else if (pdev->msi_enabled)
+			pci_disable_msi(pdev);
+
+		pci_disable_device(pdev);
+	}
+}
+
+static void pnv_php_free_slot(struct kref *kref)
+{
+	struct pnv_php_slot *php_slot = container_of(kref,
+					struct pnv_php_slot, kref);
+
+	WARN_ON(!list_empty(&php_slot->children));
+	pnv_php_disable_irq(php_slot, false);
+	kfree(php_slot->name);
+	kfree(php_slot);
+}
+
+static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
+{
+
+	if (!php_slot)
+		return;
+
+	kref_put(&php_slot->kref, pnv_php_free_slot);
+}
+
+static struct pnv_php_slot *pnv_php_match(struct device_node *dn,
+					  struct pnv_php_slot *php_slot)
+{
+	struct pnv_php_slot *target, *tmp;
+
+	if (php_slot->dn == dn) {
+		kref_get(&php_slot->kref);
+		return php_slot;
+	}
+
+	list_for_each_entry(tmp, &php_slot->children, link) {
+		target = pnv_php_match(dn, tmp);
+		if (target)
+			return target;
+	}
+
+	return NULL;
+}
+
+struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn)
+{
+	struct pnv_php_slot *php_slot, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pnv_php_lock, flags);
+	list_for_each_entry(tmp, &pnv_php_slot_list, link) {
+		php_slot = pnv_php_match(dn, tmp);
+		if (php_slot) {
+			spin_unlock_irqrestore(&pnv_php_lock, flags);
+			return php_slot;
+		}
+	}
+	spin_unlock_irqrestore(&pnv_php_lock, flags);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pnv_php_find_slot);
+
+/*
+ * Remove pdn for all children of the indicated device node.
+ * The function should remove pdn in a depth-first manner.
+ */
+static void pnv_php_rmv_pdns(struct device_node *dn)
+{
+	struct device_node *child;
+
+	for_each_child_of_node(dn, child) {
+		pnv_php_rmv_pdns(child);
+
+		pci_remove_device_node_info(child);
+	}
+}
+
+/*
+ * Detach all child nodes of the indicated device nodes. The
+ * function should handle device nodes in depth-first manner.
+ *
+ * We should not invoke of_node_release() as the memory for
+ * individual device node is part of large memory block. The
+ * large block is allocated from memblock (system bootup) or
+ * kmalloc() when unflattening the device tree by OF changeset.
+ * We can not free the large block allocated from memblock. For
+ * later case, it should be released at once.
+ */
+static void pnv_php_detach_device_nodes(struct device_node *parent)
+{
+	struct device_node *dn;
+
+	for_each_child_of_node(parent, dn) {
+		pnv_php_detach_device_nodes(dn);
+
+		of_node_put(dn);
+		of_detach_node(dn);
+	}
+}
+
+static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot)
+{
+	pnv_php_rmv_pdns(php_slot->dn);
+
+	/*
+	 * Decrease the refcount if the device nodes were created
+	 * through OF changeset before detaching them.
+	 */
+	if (php_slot->fdt)
+		of_changeset_destroy(&php_slot->ocs);
+	pnv_php_detach_device_nodes(php_slot->dn);
+
+	if (php_slot->fdt) {
+		kfree(php_slot->dt);
+		kfree(php_slot->fdt);
+		php_slot->dt        = NULL;
+		php_slot->dn->child = NULL;
+		php_slot->fdt       = NULL;
+	}
+}
+
+/*
+ * As the nodes in OF changeset are applied in reverse order, we
+ * need revert the nodes in advance so that we have correct node
+ * order after the changeset is applied.
+ */
+static void pnv_php_reverse_nodes(struct device_node *parent)
+{
+	struct device_node *child, *next;
+
+	/* In-depth first */
+	for_each_child_of_node(parent, child)
+		pnv_php_reverse_nodes(child);
+
+	/* Reverse the nodes in the child list */
+	child = parent->child;
+	parent->child = NULL;
+	while (child) {
+		next = child->sibling;
+
+		child->sibling = parent->child;
+		parent->child = child;
+		child = next;
+	}
+}
+
+static int pnv_php_populate_changeset(struct of_changeset *ocs,
+				      struct device_node *dn)
+{
+	struct device_node *child;
+	int ret = 0;
+
+	for_each_child_of_node(dn, child) {
+		ret = of_changeset_attach_node(ocs, child);
+		if (ret) {
+			of_node_put(child);
+			break;
+		}
+
+		ret = pnv_php_populate_changeset(ocs, child);
+		if (ret) {
+			of_node_put(child);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void *pnv_php_add_one_pdn(struct device_node *dn, void *data)
+{
+	struct pci_controller *hose = (struct pci_controller *)data;
+	struct pci_dn *pdn;
+
+	pdn = pci_add_device_node_info(hose, dn);
+	if (!pdn)
+		return ERR_PTR(-ENOMEM);
+
+	return NULL;
+}
+
+static void pnv_php_add_pdns(struct pnv_php_slot *slot)
+{
+	struct pci_controller *hose = pci_bus_to_host(slot->bus);
+
+	pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose);
+}
+
+static int pnv_php_add_devtree(struct pnv_php_slot *php_slot)
+{
+	void *fdt, *fdt1, *dt;
+	int ret;
+
+	/* We don't know the FDT blob size. We try to get it through
+	 * maximal memory chunk and then copy it to another chunk that
+	 * fits the real size.
+	 */
+	fdt1 = kzalloc(0x10000, GFP_KERNEL);
+	if (!fdt1) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret);
+		goto free_fdt1;
+	}
+
+	fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL);
+	if (!fdt) {
+		ret = -ENOMEM;
+		goto free_fdt1;
+	}
+
+	/* Unflatten device tree blob */
+	dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
+	if (!dt) {
+		ret = -EINVAL;
+		SLOT_WARN(php_slot, "Cannot unflatten FDT\n");
+		goto free_fdt;
+	}
+
+	/* Initialize and apply the changeset */
+	of_changeset_init(&php_slot->ocs);
+	pnv_php_reverse_nodes(php_slot->dn);
+	ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
+	if (ret) {
+		pnv_php_reverse_nodes(php_slot->dn);
+		SLOT_WARN(php_slot, "Error %d populating changeset\n",
+			  ret);
+		goto free_dt;
+	}
+
+	php_slot->dn->child = NULL;
+	ret = of_changeset_apply(&php_slot->ocs);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d applying changeset\n", ret);
+		goto destroy_changeset;
+	}
+
+	/* Add device node firmware data */
+	pnv_php_add_pdns(php_slot);
+	php_slot->fdt = fdt;
+	php_slot->dt  = dt;
+	kfree(fdt1);
+	goto out;
+
+destroy_changeset:
+	of_changeset_destroy(&php_slot->ocs);
+free_dt:
+	kfree(dt);
+	php_slot->dn->child = NULL;
+free_fdt:
+	kfree(fdt);
+free_fdt1:
+	kfree(fdt1);
+out:
+	return ret;
+}
+
+static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
+{
+	return container_of(slot, struct pnv_php_slot, slot);
+}
+
+int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
+				 uint8_t state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	struct opal_msg msg;
+	int ret;
+
+	ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
+	if (ret > 0) {
+		if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle	||
+		    be64_to_cpu(msg.params[2]) != state) {
+			SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n",
+				  be64_to_cpu(msg.params[1]),
+				  be64_to_cpu(msg.params[2]),
+				  be64_to_cpu(msg.params[3]));
+			return -ENOMSG;
+		}
+		if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
+			ret = -ENODEV;
+			goto error;
+		}
+	} else if (ret < 0) {
+		goto error;
+	}
+
+	if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
+		pnv_php_rmv_devtree(php_slot);
+	else
+		ret = pnv_php_add_devtree(php_slot);
+
+	return ret;
+
+error:
+	SLOT_WARN(php_slot, "Error %d powering %s\n",
+		  ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
+
+static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
+	int ret;
+
+	/*
+	 * Retrieve power status from firmware. If we fail
+	 * getting that, the power status fails back to
+	 * be on.
+	 */
+	ret = pnv_pci_get_power_state(php_slot->id, &power_state);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d getting power status\n",
+			  ret);
+	} else {
+		*state = power_state;
+	}
+
+	return 0;
+}
+
+static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
+	int ret;
+
+	/*
+	 * Retrieve presence status from firmware. If we can't
+	 * get that, it will fail back to be empty.
+	 */
+	ret = pnv_pci_get_presence_state(php_slot->id, &presence);
+	if (ret >= 0) {
+		*state = presence;
+		ret = 0;
+	} else {
+		SLOT_WARN(php_slot, "Error %d getting presence\n", ret);
+	}
+
+	return ret;
+}
+
+static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+
+	*state = php_slot->attention_state;
+	return 0;
+}
+
+static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	struct pci_dev *bridge = php_slot->pdev;
+	u16 new, mask;
+
+	php_slot->attention_state = state;
+	if (!bridge)
+		return 0;
+
+	mask = PCI_EXP_SLTCTL_AIC;
+
+	if (state)
+		new = PCI_EXP_SLTCTL_ATTN_IND_ON;
+	else
+		new = PCI_EXP_SLTCTL_ATTN_IND_OFF;
+
+	pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new);
+
+	return 0;
+}
+
+static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
+{
+	struct hotplug_slot *slot = &php_slot->slot;
+	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
+	uint8_t power_status = OPAL_PCI_SLOT_POWER_ON;
+	int ret;
+
+	/* Check if the slot has been configured */
+	if (php_slot->state != PNV_PHP_STATE_REGISTERED)
+		return 0;
+
+	/* Retrieve slot presence status */
+	ret = pnv_php_get_adapter_state(slot, &presence);
+	if (ret)
+		return ret;
+
+	/*
+	 * Proceed if there have nothing behind the slot. However,
+	 * we should leave the slot in registered state at the
+	 * beginning. Otherwise, the PCI devices inserted afterwards
+	 * won't be probed and populated.
+	 */
+	if (presence == OPAL_PCI_SLOT_EMPTY) {
+		if (!php_slot->power_state_check) {
+			php_slot->power_state_check = true;
+
+			return 0;
+		}
+
+		goto scan;
+	}
+
+	/*
+	 * If the power supply to the slot is off, we can't detect
+	 * adapter presence state. That means we have to turn the
+	 * slot on before going to probe slot's presence state.
+	 *
+	 * On the first time, we don't change the power status to
+	 * boost system boot with assumption that the firmware
+	 * supplies consistent slot power status: empty slot always
+	 * has its power off and non-empty slot has its power on.
+	 */
+	if (!php_slot->power_state_check) {
+		php_slot->power_state_check = true;
+
+		ret = pnv_php_get_power_state(slot, &power_status);
+		if (ret)
+			return ret;
+
+		if (power_status != OPAL_PCI_SLOT_POWER_ON)
+			return 0;
+	}
+
+	/* Check the power status. Scan the slot if it is already on */
+	ret = pnv_php_get_power_state(slot, &power_status);
+	if (ret)
+		return ret;
+
+	if (power_status == OPAL_PCI_SLOT_POWER_ON)
+		goto scan;
+
+	/* Power is off, turn it on and then scan the slot */
+	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON);
+	if (ret)
+		return ret;
+
+scan:
+	if (presence == OPAL_PCI_SLOT_PRESENT) {
+		if (rescan) {
+			pci_lock_rescan_remove();
+			pci_hp_add_devices(php_slot->bus);
+			pci_unlock_rescan_remove();
+		}
+
+		/* Rescan for child hotpluggable slots */
+		php_slot->state = PNV_PHP_STATE_POPULATED;
+		if (rescan)
+			pnv_php_register(php_slot->dn);
+	} else {
+		php_slot->state = PNV_PHP_STATE_POPULATED;
+	}
+
+	return 0;
+}
+
+static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	struct pci_dev *bridge = php_slot->pdev;
+	uint16_t sts;
+
+	/*
+	 * The CAPI folks want pnv_php to drive OpenCAPI slots
+	 * which don't have a bridge. Only claim to support
+	 * reset_slot() if we have a bridge device (for now...)
+	 */
+	if (probe)
+		return !bridge;
+
+	/* mask our interrupt while resetting the bridge */
+	if (php_slot->irq > 0)
+		disable_irq(php_slot->irq);
+
+	pci_bridge_secondary_bus_reset(bridge);
+
+	/* clear any state changes that happened due to the reset */
+	pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts);
+	sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+	pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts);
+
+	if (php_slot->irq > 0)
+		enable_irq(php_slot->irq);
+
+	return 0;
+}
+
+static int pnv_php_enable_slot(struct hotplug_slot *slot)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+
+	return pnv_php_enable(php_slot, true);
+}
+
+static int pnv_php_disable_slot(struct hotplug_slot *slot)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+	int ret;
+
+	/*
+	 * Allow to disable a slot already in the registered state to
+	 * cover cases where the slot couldn't be enabled and never
+	 * reached the populated state
+	 */
+	if (php_slot->state != PNV_PHP_STATE_POPULATED &&
+	    php_slot->state != PNV_PHP_STATE_REGISTERED)
+		return 0;
+
+	/* Remove all devices behind the slot */
+	pci_lock_rescan_remove();
+	pci_hp_remove_devices(php_slot->bus);
+	pci_unlock_rescan_remove();
+
+	/* Detach the child hotpluggable slots */
+	pnv_php_unregister(php_slot->dn);
+
+	/* Notify firmware and remove device nodes */
+	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF);
+
+	php_slot->state = PNV_PHP_STATE_REGISTERED;
+	return ret;
+}
+
+static const struct hotplug_slot_ops php_slot_ops = {
+	.get_power_status	= pnv_php_get_power_state,
+	.get_adapter_status	= pnv_php_get_adapter_state,
+	.get_attention_status	= pnv_php_get_attention_state,
+	.set_attention_status	= pnv_php_set_attention_state,
+	.enable_slot		= pnv_php_enable_slot,
+	.disable_slot		= pnv_php_disable_slot,
+	.reset_slot		= pnv_php_reset_slot,
+};
+
+static void pnv_php_release(struct pnv_php_slot *php_slot)
+{
+	unsigned long flags;
+
+	/* Remove from global or child list */
+	spin_lock_irqsave(&pnv_php_lock, flags);
+	list_del(&php_slot->link);
+	spin_unlock_irqrestore(&pnv_php_lock, flags);
+
+	/* Detach from parent */
+	pnv_php_put_slot(php_slot);
+	pnv_php_put_slot(php_slot->parent);
+}
+
+static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
+{
+	struct pnv_php_slot *php_slot;
+	struct pci_bus *bus;
+	const char *label;
+	uint64_t id;
+	int ret;
+
+	ret = of_property_read_string(dn, "ibm,slot-label", &label);
+	if (ret)
+		return NULL;
+
+	if (pnv_pci_get_slot_id(dn, &id))
+		return NULL;
+
+	bus = pci_find_bus_by_node(dn);
+	if (!bus)
+		return NULL;
+
+	php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL);
+	if (!php_slot)
+		return NULL;
+
+	php_slot->name = kstrdup(label, GFP_KERNEL);
+	if (!php_slot->name) {
+		kfree(php_slot);
+		return NULL;
+	}
+
+	if (dn->child && PCI_DN(dn->child))
+		php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn);
+	else
+		php_slot->slot_no = -1;   /* Placeholder slot */
+
+	kref_init(&php_slot->kref);
+	php_slot->state	                = PNV_PHP_STATE_INITIALIZED;
+	php_slot->dn	                = dn;
+	php_slot->pdev	                = bus->self;
+	php_slot->bus	                = bus;
+	php_slot->id	                = id;
+	php_slot->power_state_check     = false;
+	php_slot->slot.ops              = &php_slot_ops;
+
+	INIT_LIST_HEAD(&php_slot->children);
+	INIT_LIST_HEAD(&php_slot->link);
+
+	return php_slot;
+}
+
+static int pnv_php_register_slot(struct pnv_php_slot *php_slot)
+{
+	struct pnv_php_slot *parent;
+	struct device_node *dn = php_slot->dn;
+	unsigned long flags;
+	int ret;
+
+	/* Check if the slot is registered or not */
+	parent = pnv_php_find_slot(php_slot->dn);
+	if (parent) {
+		pnv_php_put_slot(parent);
+		return -EEXIST;
+	}
+
+	/* Register PCI slot */
+	ret = pci_hp_register(&php_slot->slot, php_slot->bus,
+			      php_slot->slot_no, php_slot->name);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d registering slot\n", ret);
+		return ret;
+	}
+
+	/* Attach to the parent's child list or global list */
+	while ((dn = of_get_parent(dn))) {
+		if (!PCI_DN(dn)) {
+			of_node_put(dn);
+			break;
+		}
+
+		parent = pnv_php_find_slot(dn);
+		if (parent) {
+			of_node_put(dn);
+			break;
+		}
+
+		of_node_put(dn);
+	}
+
+	spin_lock_irqsave(&pnv_php_lock, flags);
+	php_slot->parent = parent;
+	if (parent)
+		list_add_tail(&php_slot->link, &parent->children);
+	else
+		list_add_tail(&php_slot->link, &pnv_php_slot_list);
+	spin_unlock_irqrestore(&pnv_php_lock, flags);
+
+	php_slot->state = PNV_PHP_STATE_REGISTERED;
+	return 0;
+}
+
+static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
+{
+	struct pci_dev *pdev = php_slot->pdev;
+	struct msix_entry entry;
+	int nr_entries, ret;
+	u16 pcie_flag;
+
+	/* Get total number of MSIx entries */
+	nr_entries = pci_msix_vec_count(pdev);
+	if (nr_entries < 0)
+		return nr_entries;
+
+	/* Check hotplug MSIx entry is in range */
+	pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag);
+	entry.entry = (pcie_flag & PCI_EXP_FLAGS_IRQ) >> 9;
+	if (entry.entry >= nr_entries)
+		return -ERANGE;
+
+	/* Enable MSIx */
+	ret = pci_enable_msix_exact(pdev, &entry, 1);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret);
+		return ret;
+	}
+
+	return entry.vector;
+}
+
+static void pnv_php_event_handler(struct work_struct *work)
+{
+	struct pnv_php_event *event =
+		container_of(work, struct pnv_php_event, work);
+	struct pnv_php_slot *php_slot = event->php_slot;
+
+	if (event->added)
+		pnv_php_enable_slot(&php_slot->slot);
+	else
+		pnv_php_disable_slot(&php_slot->slot);
+
+	kfree(event);
+}
+
+static irqreturn_t pnv_php_interrupt(int irq, void *data)
+{
+	struct pnv_php_slot *php_slot = data;
+	struct pci_dev *pchild, *pdev = php_slot->pdev;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct pnv_php_event *event;
+	u16 sts, lsts;
+	u8 presence;
+	bool added;
+	unsigned long flags;
+	int ret;
+
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
+	sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
+
+	pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n",
+			php_slot->name,
+			!!(sts & PCI_EXP_SLTSTA_DLLSC),
+			!!(sts & PCI_EXP_SLTSTA_PDC));
+
+	if (sts & PCI_EXP_SLTSTA_DLLSC) {
+		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
+		added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
+	} else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
+		   (sts & PCI_EXP_SLTSTA_PDC)) {
+		ret = pnv_pci_get_presence_state(php_slot->id, &presence);
+		if (ret) {
+			SLOT_WARN(php_slot,
+				  "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
+				  php_slot->name, ret, sts);
+			return IRQ_HANDLED;
+		}
+
+		added = !!(presence == OPAL_PCI_SLOT_PRESENT);
+	} else {
+		pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name);
+		return IRQ_NONE;
+	}
+
+	/* Freeze the removed PE to avoid unexpected error reporting */
+	if (!added) {
+		pchild = list_first_entry_or_null(&php_slot->bus->devices,
+						  struct pci_dev, bus_list);
+		edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL;
+		pe = edev ? edev->pe : NULL;
+		if (pe) {
+			eeh_serialize_lock(&flags);
+			eeh_pe_mark_isolated(pe);
+			eeh_serialize_unlock(flags);
+			eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+		}
+	}
+
+	/*
+	 * The PE is left in frozen state if the event is missed. It's
+	 * fine as the PCI devices (PE) aren't functional any more.
+	 */
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+	if (!event) {
+		SLOT_WARN(php_slot,
+			  "PCI slot [%s] missed hotplug event 0x%04x\n",
+			  php_slot->name, sts);
+		return IRQ_HANDLED;
+	}
+
+	pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n",
+		 php_slot->name, added ? "added" : "removed", irq);
+	INIT_WORK(&event->work, pnv_php_event_handler);
+	event->added = added;
+	event->php_slot = php_slot;
+	queue_work(php_slot->wq, &event->work);
+
+	return IRQ_HANDLED;
+}
+
+static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
+{
+	struct pci_dev *pdev = php_slot->pdev;
+	u32 broken_pdc = 0;
+	u16 sts, ctrl;
+	int ret;
+
+	/* Allocate workqueue */
+	php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name);
+	if (!php_slot->wq) {
+		SLOT_WARN(php_slot, "Cannot alloc workqueue\n");
+		pnv_php_disable_irq(php_slot, true);
+		return;
+	}
+
+	/* Check PDC (Presence Detection Change) is broken or not */
+	ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
+				   &broken_pdc);
+	if (!ret && broken_pdc)
+		php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
+
+	/* Clear pending interrupts */
+	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
+	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
+		sts |= PCI_EXP_SLTSTA_DLLSC;
+	else
+		sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
+
+	/* Request the interrupt */
+	ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
+			  php_slot->name, php_slot);
+	if (ret) {
+		pnv_php_disable_irq(php_slot, true);
+		SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq);
+		return;
+	}
+
+	/* Enable the interrupts */
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
+	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
+		ctrl &= ~PCI_EXP_SLTCTL_PDCE;
+		ctrl |= (PCI_EXP_SLTCTL_HPIE |
+			 PCI_EXP_SLTCTL_DLLSCE);
+	} else {
+		ctrl |= (PCI_EXP_SLTCTL_HPIE |
+			 PCI_EXP_SLTCTL_PDCE |
+			 PCI_EXP_SLTCTL_DLLSCE);
+	}
+	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
+
+	/* The interrupt is initialized successfully when @irq is valid */
+	php_slot->irq = irq;
+}
+
+static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
+{
+	struct pci_dev *pdev = php_slot->pdev;
+	int irq, ret;
+
+	/*
+	 * The MSI/MSIx interrupt might have been occupied by other
+	 * drivers. Don't populate the surprise hotplug capability
+	 * in that case.
+	 */
+	if (pci_dev_msi_enabled(pdev))
+		return;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		SLOT_WARN(php_slot, "Error %d enabling device\n", ret);
+		return;
+	}
+
+	pci_set_master(pdev);
+
+	/* Enable MSIx interrupt */
+	irq = pnv_php_enable_msix(php_slot);
+	if (irq > 0) {
+		pnv_php_init_irq(php_slot, irq);
+		return;
+	}
+
+	/*
+	 * Use MSI if MSIx doesn't work. Fail back to legacy INTx
+	 * if MSI doesn't work either
+	 */
+	ret = pci_enable_msi(pdev);
+	if (!ret || pdev->irq) {
+		irq = pdev->irq;
+		pnv_php_init_irq(php_slot, irq);
+	}
+}
+
+static int pnv_php_register_one(struct device_node *dn)
+{
+	struct pnv_php_slot *php_slot;
+	u32 prop32;
+	int ret;
+
+	/* Check if it's hotpluggable slot */
+	ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32);
+	if (ret || !prop32)
+		return -ENXIO;
+
+	ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32);
+	if (ret || !prop32)
+		return -ENXIO;
+
+	php_slot = pnv_php_alloc_slot(dn);
+	if (!php_slot)
+		return -ENODEV;
+
+	ret = pnv_php_register_slot(php_slot);
+	if (ret)
+		goto free_slot;
+
+	ret = pnv_php_enable(php_slot, false);
+	if (ret)
+		goto unregister_slot;
+
+	/* Enable interrupt if the slot supports surprise hotplug */
+	ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32);
+	if (!ret && prop32)
+		pnv_php_enable_irq(php_slot);
+
+	return 0;
+
+unregister_slot:
+	pnv_php_unregister_one(php_slot->dn);
+free_slot:
+	pnv_php_put_slot(php_slot);
+	return ret;
+}
+
+static void pnv_php_register(struct device_node *dn)
+{
+	struct device_node *child;
+
+	/*
+	 * The parent slots should be registered before their
+	 * child slots.
+	 */
+	for_each_child_of_node(dn, child) {
+		pnv_php_register_one(child);
+		pnv_php_register(child);
+	}
+}
+
+static void pnv_php_unregister_one(struct device_node *dn)
+{
+	struct pnv_php_slot *php_slot;
+
+	php_slot = pnv_php_find_slot(dn);
+	if (!php_slot)
+		return;
+
+	php_slot->state = PNV_PHP_STATE_OFFLINE;
+	pci_hp_deregister(&php_slot->slot);
+	pnv_php_release(php_slot);
+	pnv_php_put_slot(php_slot);
+}
+
+static void pnv_php_unregister(struct device_node *dn)
+{
+	struct device_node *child;
+
+	/* The child slots should go before their parent slots */
+	for_each_child_of_node(dn, child) {
+		pnv_php_unregister(child);
+		pnv_php_unregister_one(child);
+	}
+}
+
+static int __init pnv_php_init(void)
+{
+	struct device_node *dn;
+
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
+		pnv_php_register(dn);
+
+	for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
+		pnv_php_register(dn);
+
+	for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
+		pnv_php_register_one(dn); /* slot directly under the PHB */
+	return 0;
+}
+
+static void __exit pnv_php_exit(void)
+{
+	struct device_node *dn;
+
+	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
+		pnv_php_unregister(dn);
+
+	for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
+		pnv_php_unregister(dn);
+
+	for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
+		pnv_php_unregister_one(dn); /* slot directly under the PHB */
+}
+
+module_init(pnv_php_init);
+module_exit(pnv_php_exit);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/pci/hotplug/rpadlpar.h b/drivers/pci/hotplug/rpadlpar.h
new file mode 100644
index 000000000..1eeb55d33
--- /dev/null
+++ b/drivers/pci/hotplug/rpadlpar.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Interface for Dynamic Logical Partitioning of I/O Slots on
+ * RPA-compliant PPC64 platform.
+ *
+ * John Rose <johnrose@austin.ibm.com>
+ * October 2003
+ *
+ * Copyright (C) 2003 IBM.
+ */
+#ifndef _RPADLPAR_IO_H_
+#define _RPADLPAR_IO_H_
+
+int dlpar_sysfs_init(void);
+void dlpar_sysfs_exit(void);
+
+int dlpar_add_slot(char *drc_name);
+int dlpar_remove_slot(char *drc_name);
+
+#endif
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
new file mode 100644
index 000000000..980bb3afd
--- /dev/null
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Interface for Dynamic Logical Partitioning of I/O Slots on
+ * RPA-compliant PPC64 platform.
+ *
+ * John Rose <johnrose@austin.ibm.com>
+ * Linda Xie <lxie@us.ibm.com>
+ *
+ * October 2003
+ *
+ * Copyright (C) 2003 IBM.
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/pci-bridge.h>
+#include <linux/mutex.h>
+#include <asm/rtas.h>
+#include <asm/vio.h>
+#include <linux/firmware.h>
+
+#include "../pci.h"
+#include "rpaphp.h"
+#include "rpadlpar.h"
+
+static DEFINE_MUTEX(rpadlpar_mutex);
+
+#define DLPAR_MODULE_NAME "rpadlpar_io"
+
+#define NODE_TYPE_VIO  1
+#define NODE_TYPE_SLOT 2
+#define NODE_TYPE_PHB  3
+
+static struct device_node *find_vio_slot_node(char *drc_name)
+{
+	struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
+	struct device_node *dn;
+	int rc;
+
+	if (!parent)
+		return NULL;
+
+	for_each_child_of_node(parent, dn) {
+		rc = rpaphp_check_drc_props(dn, drc_name, NULL);
+		if (rc == 0)
+			break;
+	}
+	of_node_put(parent);
+
+	return dn;
+}
+
+/* Find dlpar-capable pci node that contains the specified name and type */
+static struct device_node *find_php_slot_pci_node(char *drc_name,
+						  char *drc_type)
+{
+	struct device_node *np;
+	int rc;
+
+	for_each_node_by_name(np, "pci") {
+		rc = rpaphp_check_drc_props(np, drc_name, drc_type);
+		if (rc == 0)
+			break;
+	}
+
+	return np;
+}
+
+/* Returns a device_node with its reference count incremented */
+static struct device_node *find_dlpar_node(char *drc_name, int *node_type)
+{
+	struct device_node *dn;
+
+	dn = find_php_slot_pci_node(drc_name, "SLOT");
+	if (dn) {
+		*node_type = NODE_TYPE_SLOT;
+		return dn;
+	}
+
+	dn = find_php_slot_pci_node(drc_name, "PHB");
+	if (dn) {
+		*node_type = NODE_TYPE_PHB;
+		return dn;
+	}
+
+	dn = find_vio_slot_node(drc_name);
+	if (dn) {
+		*node_type = NODE_TYPE_VIO;
+		return dn;
+	}
+
+	return NULL;
+}
+
+/**
+ * find_php_slot - return hotplug slot structure for device node
+ * @dn: target &device_node
+ *
+ * This routine will return the hotplug slot structure
+ * for a given device node. Note that built-in PCI slots
+ * may be dlpar-able, but not hot-pluggable, so this routine
+ * will return NULL for built-in PCI slots.
+ */
+static struct slot *find_php_slot(struct device_node *dn)
+{
+	struct slot *slot, *next;
+
+	list_for_each_entry_safe(slot, next, &rpaphp_slot_head,
+				 rpaphp_slot_list) {
+		if (slot->dn == dn)
+			return slot;
+	}
+
+	return NULL;
+}
+
+static struct pci_dev *dlpar_find_new_dev(struct pci_bus *parent,
+					struct device_node *dev_dn)
+{
+	struct pci_dev *tmp = NULL;
+	struct device_node *child_dn;
+
+	list_for_each_entry(tmp, &parent->devices, bus_list) {
+		child_dn = pci_device_to_OF_node(tmp);
+		if (child_dn == dev_dn)
+			return tmp;
+	}
+	return NULL;
+}
+
+static void dlpar_pci_add_bus(struct device_node *dn)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct pci_controller *phb = pdn->phb;
+	struct pci_dev *dev = NULL;
+
+	pseries_eeh_init_edev_recursive(pdn);
+
+	/* Add EADS device to PHB bus, adding new entry to bus->devices */
+	dev = of_create_pci_dev(dn, phb->bus, pdn->devfn);
+	if (!dev) {
+		printk(KERN_ERR "%s: failed to create pci dev for %pOF\n",
+				__func__, dn);
+		return;
+	}
+
+	/* Scan below the new bridge */
+	if (pci_is_bridge(dev))
+		of_scan_pci_bridge(dev);
+
+	/* Map IO space for child bus, which may or may not succeed */
+	pcibios_map_io_space(dev->subordinate);
+
+	/* Finish adding it : resource allocation, adding devices, etc...
+	 * Note that we need to perform the finish pass on the -parent-
+	 * bus of the EADS bridge so the bridge device itself gets
+	 * properly added
+	 */
+	pcibios_finish_adding_to_bus(phb->bus);
+}
+
+static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn)
+{
+	struct pci_dev *dev;
+	struct pci_controller *phb;
+
+	if (pci_find_bus_by_node(dn))
+		return -EINVAL;
+
+	/* Add pci bus */
+	dlpar_pci_add_bus(dn);
+
+	/* Confirm new bridge dev was created */
+	phb = PCI_DN(dn)->phb;
+	dev = dlpar_find_new_dev(phb->bus, dn);
+
+	if (!dev) {
+		printk(KERN_ERR "%s: unable to add bus %s\n", __func__,
+			drc_name);
+		return -EIO;
+	}
+
+	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+		printk(KERN_ERR "%s: unexpected header type %d, unable to add bus %s\n",
+			__func__, dev->hdr_type, drc_name);
+		return -EIO;
+	}
+
+	/* Add hotplug slot */
+	if (rpaphp_add_slot(dn)) {
+		printk(KERN_ERR "%s: unable to add hotplug slot %s\n",
+			__func__, drc_name);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int dlpar_remove_phb(char *drc_name, struct device_node *dn)
+{
+	struct slot *slot;
+	struct pci_dn *pdn;
+	int rc = 0;
+
+	if (!pci_find_bus_by_node(dn))
+		return -EINVAL;
+
+	/* If pci slot is hotpluggable, use hotplug to remove it */
+	slot = find_php_slot(dn);
+	if (slot && rpaphp_deregister_slot(slot)) {
+		printk(KERN_ERR "%s: unable to remove hotplug slot %s\n",
+		       __func__, drc_name);
+		return -EIO;
+	}
+
+	pdn = dn->data;
+	BUG_ON(!pdn || !pdn->phb);
+	rc = remove_phb_dynamic(pdn->phb);
+	if (rc < 0)
+		return rc;
+
+	pdn->phb = NULL;
+
+	return 0;
+}
+
+static int dlpar_add_phb(char *drc_name, struct device_node *dn)
+{
+	struct pci_controller *phb;
+
+	if (PCI_DN(dn) && PCI_DN(dn)->phb) {
+		/* PHB already exists */
+		return -EINVAL;
+	}
+
+	phb = init_phb_dynamic(dn);
+	if (!phb)
+		return -EIO;
+
+	if (rpaphp_add_slot(dn)) {
+		printk(KERN_ERR "%s: unable to add hotplug slot %s\n",
+			__func__, drc_name);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int dlpar_add_vio_slot(char *drc_name, struct device_node *dn)
+{
+	struct vio_dev *vio_dev;
+
+	vio_dev = vio_find_node(dn);
+	if (vio_dev) {
+		put_device(&vio_dev->dev);
+		return -EINVAL;
+	}
+
+	if (!vio_register_device_node(dn)) {
+		printk(KERN_ERR
+			"%s: failed to register vio node %s\n",
+			__func__, drc_name);
+		return -EIO;
+	}
+	return 0;
+}
+
+/**
+ * dlpar_add_slot - DLPAR add an I/O Slot
+ * @drc_name: drc-name of newly added slot
+ *
+ * Make the hotplug module and the kernel aware of a newly added I/O Slot.
+ * Return Codes:
+ * 0			Success
+ * -ENODEV		Not a valid drc_name
+ * -EINVAL		Slot already added
+ * -ERESTARTSYS		Signalled before obtaining lock
+ * -EIO			Internal PCI Error
+ */
+int dlpar_add_slot(char *drc_name)
+{
+	struct device_node *dn = NULL;
+	int node_type;
+	int rc = -EIO;
+
+	if (mutex_lock_interruptible(&rpadlpar_mutex))
+		return -ERESTARTSYS;
+
+	/* Find newly added node */
+	dn = find_dlpar_node(drc_name, &node_type);
+	if (!dn) {
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	switch (node_type) {
+		case NODE_TYPE_VIO:
+			rc = dlpar_add_vio_slot(drc_name, dn);
+			break;
+		case NODE_TYPE_SLOT:
+			rc = dlpar_add_pci_slot(drc_name, dn);
+			break;
+		case NODE_TYPE_PHB:
+			rc = dlpar_add_phb(drc_name, dn);
+			break;
+	}
+	of_node_put(dn);
+
+	printk(KERN_INFO "%s: slot %s added\n", DLPAR_MODULE_NAME, drc_name);
+exit:
+	mutex_unlock(&rpadlpar_mutex);
+	return rc;
+}
+
+/**
+ * dlpar_remove_vio_slot - DLPAR remove a virtual I/O Slot
+ * @drc_name: drc-name of newly added slot
+ * @dn: &device_node
+ *
+ * Remove the kernel and hotplug representations of an I/O Slot.
+ * Return Codes:
+ * 0			Success
+ * -EINVAL		Vio dev doesn't exist
+ */
+static int dlpar_remove_vio_slot(char *drc_name, struct device_node *dn)
+{
+	struct vio_dev *vio_dev;
+
+	vio_dev = vio_find_node(dn);
+	if (!vio_dev)
+		return -EINVAL;
+
+	vio_unregister_device(vio_dev);
+
+	put_device(&vio_dev->dev);
+
+	return 0;
+}
+
+/**
+ * dlpar_remove_pci_slot - DLPAR remove a PCI I/O Slot
+ * @drc_name: drc-name of newly added slot
+ * @dn: &device_node
+ *
+ * Remove the kernel and hotplug representations of a PCI I/O Slot.
+ * Return Codes:
+ * 0			Success
+ * -ENODEV		Not a valid drc_name
+ * -EIO			Internal PCI Error
+ */
+static int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
+{
+	struct pci_bus *bus;
+	struct slot *slot;
+	int ret = 0;
+
+	pci_lock_rescan_remove();
+
+	bus = pci_find_bus_by_node(dn);
+	if (!bus) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pr_debug("PCI: Removing PCI slot below EADS bridge %s\n",
+		 bus->self ? pci_name(bus->self) : "<!PHB!>");
+
+	slot = find_php_slot(dn);
+	if (slot) {
+		pr_debug("PCI: Removing hotplug slot for %04x:%02x...\n",
+			 pci_domain_nr(bus), bus->number);
+
+		if (rpaphp_deregister_slot(slot)) {
+			printk(KERN_ERR
+				"%s: unable to remove hotplug slot %s\n",
+				__func__, drc_name);
+			ret = -EIO;
+			goto out;
+		}
+	}
+
+	/* Remove all devices below slot */
+	pci_hp_remove_devices(bus);
+
+	/* Unmap PCI IO space */
+	if (pcibios_unmap_io_space(bus)) {
+		printk(KERN_ERR "%s: failed to unmap bus range\n",
+			__func__);
+		ret = -ERANGE;
+		goto out;
+	}
+
+	/* Remove the EADS bridge device itself */
+	BUG_ON(!bus->self);
+	pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
+	pci_stop_and_remove_bus_device(bus->self);
+
+ out:
+	pci_unlock_rescan_remove();
+	return ret;
+}
+
+/**
+ * dlpar_remove_slot - DLPAR remove an I/O Slot
+ * @drc_name: drc-name of newly added slot
+ *
+ * Remove the kernel and hotplug representations of an I/O Slot.
+ * Return Codes:
+ * 0			Success
+ * -ENODEV		Not a valid drc_name
+ * -EINVAL		Slot already removed
+ * -ERESTARTSYS		Signalled before obtaining lock
+ * -EIO			Internal Error
+ */
+int dlpar_remove_slot(char *drc_name)
+{
+	struct device_node *dn;
+	int node_type;
+	int rc = 0;
+
+	if (mutex_lock_interruptible(&rpadlpar_mutex))
+		return -ERESTARTSYS;
+
+	dn = find_dlpar_node(drc_name, &node_type);
+	if (!dn) {
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	switch (node_type) {
+		case NODE_TYPE_VIO:
+			rc = dlpar_remove_vio_slot(drc_name, dn);
+			break;
+		case NODE_TYPE_PHB:
+			rc = dlpar_remove_phb(drc_name, dn);
+			break;
+		case NODE_TYPE_SLOT:
+			rc = dlpar_remove_pci_slot(drc_name, dn);
+			break;
+	}
+	of_node_put(dn);
+	vm_unmap_aliases();
+
+	printk(KERN_INFO "%s: slot %s removed\n", DLPAR_MODULE_NAME, drc_name);
+exit:
+	mutex_unlock(&rpadlpar_mutex);
+	return rc;
+}
+
+static inline int is_dlpar_capable(void)
+{
+	int rc = rtas_token("ibm,configure-connector");
+
+	return (int) (rc != RTAS_UNKNOWN_SERVICE);
+}
+
+static int __init rpadlpar_io_init(void)
+{
+
+	if (!is_dlpar_capable()) {
+		printk(KERN_WARNING "%s: partition not DLPAR capable\n",
+			__func__);
+		return -EPERM;
+	}
+
+	return dlpar_sysfs_init();
+}
+
+static void __exit rpadlpar_io_exit(void)
+{
+	dlpar_sysfs_exit();
+}
+
+module_init(rpadlpar_io_init);
+module_exit(rpadlpar_io_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots");
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
new file mode 100644
index 000000000..068b7810a
--- /dev/null
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Interface for Dynamic Logical Partitioning of I/O Slots on
+ * RPA-compliant PPC64 platform.
+ *
+ * John Rose <johnrose@austin.ibm.com>
+ * October 2003
+ *
+ * Copyright (C) 2003 IBM.
+ */
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include "rpaphp.h"
+#include "rpadlpar.h"
+#include "../pci.h"
+
+#define DLPAR_KOBJ_NAME       "control"
+
+/* Those two have no quotes because they are passed to __ATTR() which
+ * stringifies the argument (yuck !)
+ */
+#define ADD_SLOT_ATTR_NAME    add_slot
+#define REMOVE_SLOT_ATTR_NAME remove_slot
+
+static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
+			      const char *buf, size_t nbytes)
+{
+	char drc_name[MAX_DRC_NAME_LEN];
+	char *end;
+	int rc;
+
+	if (nbytes >= MAX_DRC_NAME_LEN)
+		return 0;
+
+	strscpy(drc_name, buf, nbytes + 1);
+
+	end = strchr(drc_name, '\n');
+	if (end)
+		*end = '\0';
+
+	rc = dlpar_add_slot(drc_name);
+	if (rc)
+		return rc;
+
+	return nbytes;
+}
+
+static ssize_t add_slot_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t remove_slot_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t nbytes)
+{
+	char drc_name[MAX_DRC_NAME_LEN];
+	int rc;
+	char *end;
+
+	if (nbytes >= MAX_DRC_NAME_LEN)
+		return 0;
+
+	strscpy(drc_name, buf, nbytes + 1);
+
+	end = strchr(drc_name, '\n');
+	if (end)
+		*end = '\0';
+
+	rc = dlpar_remove_slot(drc_name);
+	if (rc)
+		return rc;
+
+	return nbytes;
+}
+
+static ssize_t remove_slot_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "0\n");
+}
+
+static struct kobj_attribute add_slot_attr =
+	__ATTR(ADD_SLOT_ATTR_NAME, 0644, add_slot_show, add_slot_store);
+
+static struct kobj_attribute remove_slot_attr =
+	__ATTR(REMOVE_SLOT_ATTR_NAME, 0644, remove_slot_show, remove_slot_store);
+
+static struct attribute *default_attrs[] = {
+	&add_slot_attr.attr,
+	&remove_slot_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group dlpar_attr_group = {
+	.attrs = default_attrs,
+};
+
+static struct kobject *dlpar_kobj;
+
+int dlpar_sysfs_init(void)
+{
+	int error;
+
+	dlpar_kobj = kobject_create_and_add(DLPAR_KOBJ_NAME,
+					    &pci_slots_kset->kobj);
+	if (!dlpar_kobj)
+		return -EINVAL;
+
+	error = sysfs_create_group(dlpar_kobj, &dlpar_attr_group);
+	if (error)
+		kobject_put(dlpar_kobj);
+	return error;
+}
+
+void dlpar_sysfs_exit(void)
+{
+	sysfs_remove_group(dlpar_kobj, &dlpar_attr_group);
+	kobject_put(dlpar_kobj);
+}
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
new file mode 100644
index 000000000..bdc954d70
--- /dev/null
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PCI Hot Plug Controller Driver for RPA-compliant PPC64 platform.
+ *
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <lxie@us.ibm.com>,
+ *
+ */
+
+#ifndef _PPC64PHP_H
+#define _PPC64PHP_H
+
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+
+#define DR_INDICATOR 9002
+#define DR_ENTITY_SENSE 9003
+
+#define POWER_ON	100
+#define POWER_OFF	0
+
+#define LED_OFF		0
+#define LED_ON		1	/* continuous on */
+#define LED_ID		2	/* slow blinking */
+#define LED_ACTION	3	/* fast blinking */
+
+/* Sensor values from rtas_get-sensor */
+#define EMPTY           0	/* No card in slot */
+#define PRESENT         1	/* Card in slot */
+
+#define MY_NAME "rpaphp"
+extern bool rpaphp_debug;
+#define dbg(format, arg...)					\
+	do {							\
+		if (rpaphp_debug)				\
+			printk(KERN_DEBUG "%s: " format,	\
+				MY_NAME, ## arg);		\
+	} while (0)
+#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+/* slot states */
+
+#define	NOT_VALID	3
+#define	NOT_CONFIGURED	2
+#define	CONFIGURED	1
+#define	EMPTY		0
+
+/* DRC constants */
+
+#define MAX_DRC_NAME_LEN 64
+
+/*
+ * struct slot - slot information for each *physical* slot
+ */
+struct slot {
+	struct list_head rpaphp_slot_list;
+	int state;
+	u32 index;
+	u32 type;
+	u32 power_domain;
+	u8 attention_status;
+	char *name;
+	struct device_node *dn;
+	struct pci_bus *bus;
+	struct list_head *pci_devs;
+	struct hotplug_slot hotplug_slot;
+};
+
+extern const struct hotplug_slot_ops rpaphp_hotplug_slot_ops;
+extern struct list_head rpaphp_slot_head;
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+/* function prototypes */
+
+/* rpaphp_pci.c */
+int rpaphp_enable_slot(struct slot *slot);
+int rpaphp_get_sensor_state(struct slot *slot, int *state);
+
+/* rpaphp_core.c */
+int rpaphp_add_slot(struct device_node *dn);
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+		char *drc_type);
+
+/* rpaphp_slot.c */
+void dealloc_slot_struct(struct slot *slot);
+struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain);
+int rpaphp_register_slot(struct slot *slot);
+int rpaphp_deregister_slot(struct slot *slot);
+
+#endif				/* _PPC64PHP_H */
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
new file mode 100644
index 000000000..2316de0fd
--- /dev/null
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Hot Plug Controller Driver for RPA-compliant PPC64 platform.
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <lxie@us.ibm.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>       /* for eeh_add_device() */
+#include <asm/rtas.h>		/* rtas_call */
+#include <asm/pci-bridge.h>	/* for pci_controller */
+#include <asm/prom.h>
+#include "../pci.h"		/* for pci_add_new_bus */
+				/* and pci_do_scan_bus */
+#include "rpaphp.h"
+
+bool rpaphp_debug;
+LIST_HEAD(rpaphp_slot_head);
+EXPORT_SYMBOL_GPL(rpaphp_slot_head);
+
+#define DRIVER_VERSION	"0.1"
+#define DRIVER_AUTHOR	"Linda Xie <lxie@us.ibm.com>"
+#define DRIVER_DESC	"RPA HOT Plug PCI Controller Driver"
+
+#define MAX_LOC_CODE 128
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+module_param_named(debug, rpaphp_debug, bool, 0644);
+
+/**
+ * set_attention_status - set attention LED
+ * @hotplug_slot: target &hotplug_slot
+ * @value: LED control value
+ *
+ * echo 0 > attention -- set LED OFF
+ * echo 1 > attention -- set LED ON
+ * echo 2 > attention -- set LED ID(identify, light is blinking)
+ */
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
+{
+	int rc;
+	struct slot *slot = to_slot(hotplug_slot);
+
+	switch (value) {
+	case 0:
+	case 1:
+	case 2:
+		break;
+	default:
+		value = 1;
+		break;
+	}
+
+	rc = rtas_set_indicator(DR_INDICATOR, slot->index, value);
+	if (!rc)
+		slot->attention_status = value;
+
+	return rc;
+}
+
+/**
+ * get_power_status - get power status of a slot
+ * @hotplug_slot: slot to get status
+ * @value: pointer to store status
+ */
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	int retval, level;
+	struct slot *slot = to_slot(hotplug_slot);
+
+	retval = rtas_get_power_level(slot->power_domain, &level);
+	if (!retval)
+		*value = level;
+	return retval;
+}
+
+/**
+ * get_attention_status - get attention LED status
+ * @hotplug_slot: slot to get status
+ * @value: pointer to store status
+ */
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	*value = slot->attention_status;
+	return 0;
+}
+
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	int rc, state;
+
+	rc = rpaphp_get_sensor_state(slot, &state);
+
+	*value = NOT_VALID;
+	if (rc)
+		return rc;
+
+	if (state == EMPTY)
+		*value = EMPTY;
+	else if (state == PRESENT)
+		*value = slot->state;
+
+	return 0;
+}
+
+static enum pci_bus_speed get_max_bus_speed(struct slot *slot)
+{
+	enum pci_bus_speed speed;
+	switch (slot->type) {
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+	case 5:
+	case 6:
+		speed = PCI_SPEED_33MHz;	/* speed for case 1-6 */
+		break;
+	case 7:
+	case 8:
+		speed = PCI_SPEED_66MHz;
+		break;
+	case 11:
+	case 14:
+		speed = PCI_SPEED_66MHz_PCIX;
+		break;
+	case 12:
+	case 15:
+		speed = PCI_SPEED_100MHz_PCIX;
+		break;
+	case 13:
+	case 16:
+		speed = PCI_SPEED_133MHz_PCIX;
+		break;
+	default:
+		speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+
+	return speed;
+}
+
+static int get_children_props(struct device_node *dn, const __be32 **drc_indexes,
+			      const __be32 **drc_names, const __be32 **drc_types,
+			      const __be32 **drc_power_domains)
+{
+	const __be32 *indexes, *names, *types, *domains;
+
+	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+	names = of_get_property(dn, "ibm,drc-names", NULL);
+	types = of_get_property(dn, "ibm,drc-types", NULL);
+	domains = of_get_property(dn, "ibm,drc-power-domains", NULL);
+
+	if (!indexes || !names || !types || !domains) {
+		/* Slot does not have dynamically-removable children */
+		return -EINVAL;
+	}
+	if (drc_indexes)
+		*drc_indexes = indexes;
+	if (drc_names)
+		/* &drc_names[1] contains NULL terminated slot names */
+		*drc_names = names;
+	if (drc_types)
+		/* &drc_types[1] contains NULL terminated slot types */
+		*drc_types = types;
+	if (drc_power_domains)
+		*drc_power_domains = domains;
+
+	return 0;
+}
+
+
+/* Verify the existence of 'drc_name' and/or 'drc_type' within the
+ * current node.  First obtain its my-drc-index property.  Next,
+ * obtain the DRC info from its parent.  Use the my-drc-index for
+ * correlation, and obtain/validate the requested properties.
+ */
+
+static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
+				char *drc_type, unsigned int my_index)
+{
+	char *name_tmp, *type_tmp;
+	const __be32 *indexes, *names;
+	const __be32 *types, *domains;
+	int i, rc;
+
+	rc = get_children_props(dn->parent, &indexes, &names, &types, &domains);
+	if (rc < 0) {
+		return -EINVAL;
+	}
+
+	name_tmp = (char *) &names[1];
+	type_tmp = (char *) &types[1];
+
+	/* Iterate through parent properties, looking for my-drc-index */
+	for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
+		if (be32_to_cpu(indexes[i + 1]) == my_index)
+			break;
+
+		name_tmp += (strlen(name_tmp) + 1);
+		type_tmp += (strlen(type_tmp) + 1);
+	}
+
+	if (((drc_name == NULL) || (drc_name && !strcmp(drc_name, name_tmp))) &&
+	    ((drc_type == NULL) || (drc_type && !strcmp(drc_type, type_tmp))))
+		return 0;
+
+	return -EINVAL;
+}
+
+static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
+				char *drc_type, unsigned int my_index)
+{
+	struct property *info;
+	unsigned int entries;
+	struct of_drc_info drc;
+	const __be32 *value;
+	char cell_drc_name[MAX_DRC_NAME_LEN];
+	int j;
+
+	info = of_find_property(dn->parent, "ibm,drc-info", NULL);
+	if (info == NULL)
+		return -EINVAL;
+
+	value = of_prop_next_u32(info, NULL, &entries);
+	if (!value)
+		return -EINVAL;
+	else
+		value++;
+
+	for (j = 0; j < entries; j++) {
+		of_read_drc_info_cell(&info, &value, &drc);
+
+		/* Should now know end of current entry */
+
+		/* Found it */
+		if (my_index >= drc.drc_index_start && my_index <= drc.last_drc_index) {
+			int index = my_index - drc.drc_index_start;
+			sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix,
+				drc.drc_name_suffix_start + index);
+			break;
+		}
+	}
+
+	if (((drc_name == NULL) ||
+	     (drc_name && !strcmp(drc_name, cell_drc_name))) &&
+	    ((drc_type == NULL) ||
+	     (drc_type && !strcmp(drc_type, drc.drc_type))))
+		return 0;
+
+	return -EINVAL;
+}
+
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+			char *drc_type)
+{
+	const __be32 *my_index;
+
+	my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+	if (!my_index) {
+		/* Node isn't DLPAR/hotplug capable */
+		return -EINVAL;
+	}
+
+	if (of_property_present(dn->parent, "ibm,drc-info"))
+		return rpaphp_check_drc_props_v2(dn, drc_name, drc_type,
+						be32_to_cpu(*my_index));
+	else
+		return rpaphp_check_drc_props_v1(dn, drc_name, drc_type,
+						be32_to_cpu(*my_index));
+}
+EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
+
+
+static int is_php_type(char *drc_type)
+{
+	char *endptr;
+
+	/* PCI Hotplug nodes have an integer for drc_type */
+	simple_strtoul(drc_type, &endptr, 10);
+	if (endptr == drc_type)
+		return 0;
+
+	return 1;
+}
+
+/**
+ * is_php_dn() - return 1 if this is a hotpluggable pci slot, else 0
+ * @dn: target &device_node
+ * @indexes: passed to get_children_props()
+ * @names: passed to get_children_props()
+ * @types: returned from get_children_props()
+ * @power_domains:
+ *
+ * This routine will return true only if the device node is
+ * a hotpluggable slot. This routine will return false
+ * for built-in pci slots (even when the built-in slots are
+ * dlparable.)
+ */
+static int is_php_dn(struct device_node *dn, const __be32 **indexes,
+		     const __be32 **names, const __be32 **types,
+		     const __be32 **power_domains)
+{
+	const __be32 *drc_types;
+	int rc;
+
+	rc = get_children_props(dn, indexes, names, &drc_types, power_domains);
+	if (rc < 0)
+		return 0;
+
+	if (!is_php_type((char *) &drc_types[1]))
+		return 0;
+
+	*types = drc_types;
+	return 1;
+}
+
+static int rpaphp_drc_info_add_slot(struct device_node *dn)
+{
+	struct slot *slot;
+	struct property *info;
+	struct of_drc_info drc;
+	char drc_name[MAX_DRC_NAME_LEN];
+	const __be32 *cur;
+	u32 count;
+	int retval = 0;
+
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (!info)
+		return 0;
+
+	cur = of_prop_next_u32(info, NULL, &count);
+	if (cur)
+		cur++;
+	else
+		return 0;
+
+	of_read_drc_info_cell(&info, &cur, &drc);
+	if (!is_php_type(drc.drc_type))
+		return 0;
+
+	sprintf(drc_name, "%s%d", drc.drc_name_prefix, drc.drc_name_suffix_start);
+
+	slot = alloc_slot_struct(dn, drc.drc_index_start, drc_name, drc.drc_power_domain);
+	if (!slot)
+		return -ENOMEM;
+
+	slot->type = simple_strtoul(drc.drc_type, NULL, 10);
+	retval = rpaphp_enable_slot(slot);
+	if (!retval)
+		retval = rpaphp_register_slot(slot);
+
+	if (retval)
+		dealloc_slot_struct(slot);
+
+	return retval;
+}
+
+static int rpaphp_drc_add_slot(struct device_node *dn)
+{
+	struct slot *slot;
+	int retval = 0;
+	int i;
+	const __be32 *indexes, *names, *types, *power_domains;
+	char *name, *type;
+
+	/* If this is not a hotplug slot, return without doing anything. */
+	if (!is_php_dn(dn, &indexes, &names, &types, &power_domains))
+		return 0;
+
+	dbg("Entry %s: dn=%pOF\n", __func__, dn);
+
+	/* register PCI devices */
+	name = (char *) &names[1];
+	type = (char *) &types[1];
+	for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
+		int index;
+
+		index = be32_to_cpu(indexes[i + 1]);
+		slot = alloc_slot_struct(dn, index, name,
+					 be32_to_cpu(power_domains[i + 1]));
+		if (!slot)
+			return -ENOMEM;
+
+		slot->type = simple_strtoul(type, NULL, 10);
+
+		dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n",
+				index, name, type);
+
+		retval = rpaphp_enable_slot(slot);
+		if (!retval)
+			retval = rpaphp_register_slot(slot);
+
+		if (retval)
+			dealloc_slot_struct(slot);
+
+		name += strlen(name) + 1;
+		type += strlen(type) + 1;
+	}
+	dbg("%s - Exit: rc[%d]\n", __func__, retval);
+
+	/* XXX FIXME: reports a failure only if last entry in loop failed */
+	return retval;
+}
+
+/**
+ * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem.
+ * @dn: device node of slot
+ *
+ * This subroutine will register a hotpluggable slot with the
+ * PCI hotplug infrastructure. This routine is typically called
+ * during boot time, if the hotplug slots are present at boot time,
+ * or is called later, by the dlpar add code, if the slot is
+ * being dynamically added during runtime.
+ *
+ * If the device node points at an embedded (built-in) slot, this
+ * routine will just return without doing anything, since embedded
+ * slots cannot be hotplugged.
+ *
+ * To remove a slot, it suffices to call rpaphp_deregister_slot().
+ */
+int rpaphp_add_slot(struct device_node *dn)
+{
+	if (!of_node_name_eq(dn, "pci"))
+		return 0;
+
+	if (of_property_present(dn, "ibm,drc-info"))
+		return rpaphp_drc_info_add_slot(dn);
+	else
+		return rpaphp_drc_add_slot(dn);
+}
+EXPORT_SYMBOL_GPL(rpaphp_add_slot);
+
+static void __exit cleanup_slots(void)
+{
+	struct slot *slot, *next;
+
+	/*
+	 * Unregister all of our slots with the pci_hotplug subsystem,
+	 * and free up all memory that we had allocated.
+	 */
+
+	list_for_each_entry_safe(slot, next, &rpaphp_slot_head,
+				 rpaphp_slot_list) {
+		list_del(&slot->rpaphp_slot_list);
+		pci_hp_deregister(&slot->hotplug_slot);
+		dealloc_slot_struct(slot);
+	}
+}
+
+static int __init rpaphp_init(void)
+{
+	struct device_node *dn;
+
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+
+	for_each_node_by_name(dn, "pci")
+		rpaphp_add_slot(dn);
+
+	return 0;
+}
+
+static void __exit rpaphp_exit(void)
+{
+	cleanup_slots();
+}
+
+static int enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	int state;
+	int retval;
+
+	if (slot->state == CONFIGURED)
+		return 0;
+
+	retval = rpaphp_get_sensor_state(slot, &state);
+	if (retval)
+		return retval;
+
+	if (state == PRESENT) {
+		pseries_eeh_init_edev_recursive(PCI_DN(slot->dn));
+
+		pci_lock_rescan_remove();
+		pci_hp_add_devices(slot->bus);
+		pci_unlock_rescan_remove();
+		slot->state = CONFIGURED;
+	} else if (state == EMPTY) {
+		slot->state = EMPTY;
+	} else {
+		err("%s: slot[%s] is in invalid state\n", __func__, slot->name);
+		slot->state = NOT_VALID;
+		return -EINVAL;
+	}
+
+	slot->bus->max_bus_speed = get_max_bus_speed(slot);
+	return 0;
+}
+
+static int disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = to_slot(hotplug_slot);
+	if (slot->state == NOT_CONFIGURED)
+		return -EINVAL;
+
+	pci_lock_rescan_remove();
+	pci_hp_remove_devices(slot->bus);
+	pci_unlock_rescan_remove();
+	vm_unmap_aliases();
+
+	slot->state = NOT_CONFIGURED;
+	return 0;
+}
+
+const struct hotplug_slot_ops rpaphp_hotplug_slot_ops = {
+	.enable_slot = enable_slot,
+	.disable_slot = disable_slot,
+	.set_attention_status = set_attention_status,
+	.get_power_status = get_power_status,
+	.get_attention_status = get_attention_status,
+	.get_adapter_status = get_adapter_status,
+};
+
+module_init(rpaphp_init);
+module_exit(rpaphp_exit);
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
new file mode 100644
index 000000000..bcfd26ec6
--- /dev/null
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Hot Plug Controller Driver for RPA-compliant PPC64 platform.
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <lxie@us.ibm.com>
+ *
+ */
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h>
+
+#include "../pci.h"		/* for pci_add_new_bus */
+#include "rpaphp.h"
+
+/*
+ * RTAS call get-sensor-state(DR_ENTITY_SENSE) return values as per PAPR:
+ * -- generic return codes ---
+ *    -1: Hardware Error
+ *    -2: RTAS_BUSY
+ *    -3: Invalid sensor. RTAS Parameter Error.
+ * -- rtas_get_sensor function specific return codes ---
+ * -9000: Need DR entity to be powered up and unisolated before RTAS call
+ * -9001: Need DR entity to be powered up, but not unisolated, before RTAS call
+ * -9002: DR entity unusable
+ *  990x: Extended delay - where x is a number in the range of 0-5
+ */
+#define RTAS_SLOT_UNISOLATED		-9000
+#define RTAS_SLOT_NOT_UNISOLATED	-9001
+#define RTAS_SLOT_NOT_USABLE		-9002
+
+static int rtas_get_sensor_errno(int rtas_rc)
+{
+	switch (rtas_rc) {
+	case 0:
+		/* Success case */
+		return 0;
+	case RTAS_SLOT_UNISOLATED:
+	case RTAS_SLOT_NOT_UNISOLATED:
+		return -EFAULT;
+	case RTAS_SLOT_NOT_USABLE:
+		return -ENODEV;
+	case RTAS_BUSY:
+	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+		return -EBUSY;
+	default:
+		return rtas_error_rc(rtas_rc);
+	}
+}
+
+/*
+ * get_adapter_status() can be called by the EEH handler during EEH recovery.
+ * On certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns
+ * extended busy error (9902) until PHB is recovered by pHyp. The RTAS call
+ * interface rtas_get_sensor() loops over the RTAS call on extended delay
+ * return code (9902) until the return value is either success (0) or error
+ * (-1). This causes the EEH handler to get stuck for ~6 seconds before it
+ * could notify that the PCI error has been detected and stop any active
+ * operations. This sometimes causes EEH recovery to fail. To avoid this issue,
+ * invoke rtas_call(get-sensor-state) directly if the respective PE is in EEH
+ * recovery state and return -EBUSY error based on RTAS return status. This
+ * will help the EEH handler to notify the driver about the PCI error
+ * immediately and successfully proceed with EEH recovery steps.
+ */
+
+static int __rpaphp_get_sensor_state(struct slot *slot, int *state)
+{
+	int rc;
+	int token = rtas_token("get-sensor-state");
+	struct pci_dn *pdn;
+	struct eeh_pe *pe;
+	struct pci_controller *phb = PCI_DN(slot->dn)->phb;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	/*
+	 * Fallback to existing method for empty slot or PE isn't in EEH
+	 * recovery.
+	 */
+	pdn = list_first_entry_or_null(&PCI_DN(phb->dn)->child_list,
+					struct pci_dn, list);
+	if (!pdn)
+		goto fallback;
+
+	pe = eeh_dev_to_pe(pdn->edev);
+	if (pe && (pe->state & EEH_PE_RECOVERING)) {
+		rc = rtas_call(token, 2, 2, state, DR_ENTITY_SENSE,
+			       slot->index);
+		return rtas_get_sensor_errno(rc);
+	}
+fallback:
+	return rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
+}
+
+int rpaphp_get_sensor_state(struct slot *slot, int *state)
+{
+	int rc;
+	int setlevel;
+
+	rc = __rpaphp_get_sensor_state(slot, state);
+
+	if (rc < 0) {
+		if (rc == -EFAULT || rc == -EEXIST) {
+			dbg("%s: slot must be power up to get sensor-state\n",
+			    __func__);
+
+			/* some slots have to be powered up
+			 * before get-sensor will succeed.
+			 */
+			rc = rtas_set_power_level(slot->power_domain, POWER_ON,
+						  &setlevel);
+			if (rc < 0) {
+				dbg("%s: power on slot[%s] failed rc=%d.\n",
+				    __func__, slot->name, rc);
+			} else {
+				rc = __rpaphp_get_sensor_state(slot, state);
+			}
+		} else if (rc == -ENODEV)
+			info("%s: slot is unusable\n", __func__);
+		else
+			err("%s failed to get sensor state\n", __func__);
+	}
+	return rc;
+}
+
+/**
+ * rpaphp_enable_slot - record slot state, config pci device
+ * @slot: target &slot
+ *
+ * Initialize values in the slot structure to indicate if there is a pci card
+ * plugged into the slot. If the slot is not empty, run the pcibios routine
+ * to get pcibios stuff correctly set up.
+ */
+int rpaphp_enable_slot(struct slot *slot)
+{
+	int rc, level, state;
+	struct pci_bus *bus;
+
+	slot->state = EMPTY;
+
+	/* Find out if the power is turned on for the slot */
+	rc = rtas_get_power_level(slot->power_domain, &level);
+	if (rc)
+		return rc;
+
+	/* Figure out if there is an adapter in the slot */
+	rc = rpaphp_get_sensor_state(slot, &state);
+	if (rc)
+		return rc;
+
+	bus = pci_find_bus_by_node(slot->dn);
+	if (!bus) {
+		err("%s: no pci_bus for dn %pOF\n", __func__, slot->dn);
+		return -EINVAL;
+	}
+
+	slot->bus = bus;
+	slot->pci_devs = &bus->devices;
+
+	/* if there's an adapter in the slot, go add the pci devices */
+	if (state == PRESENT) {
+		slot->state = NOT_CONFIGURED;
+
+		/* non-empty slot has to have child */
+		if (!slot->dn->child) {
+			err("%s: slot[%s]'s device_node doesn't have child for adapter\n",
+			    __func__, slot->name);
+			return -EINVAL;
+		}
+
+		if (list_empty(&bus->devices)) {
+			pseries_eeh_init_edev_recursive(PCI_DN(slot->dn));
+			pci_hp_add_devices(bus);
+		}
+
+		if (!list_empty(&bus->devices)) {
+			slot->state = CONFIGURED;
+		}
+
+		if (rpaphp_debug) {
+			struct pci_dev *dev;
+			dbg("%s: pci_devs of slot[%pOF]\n", __func__, slot->dn);
+			list_for_each_entry(dev, &bus->devices, bus_list)
+				dbg("\t%s\n", pci_name(dev));
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
new file mode 100644
index 000000000..779eab12e
--- /dev/null
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RPA Virtual I/O device functions
+ * Copyright (C) 2004 Linda Xie <lxie@us.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <lxie@us.ibm.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sysfs.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include <asm/rtas.h>
+#include "rpaphp.h"
+
+/* free up the memory used by a slot */
+void dealloc_slot_struct(struct slot *slot)
+{
+	of_node_put(slot->dn);
+	kfree(slot->name);
+	kfree(slot);
+}
+
+struct slot *alloc_slot_struct(struct device_node *dn,
+		int drc_index, char *drc_name, int power_domain)
+{
+	struct slot *slot;
+
+	slot = kzalloc(sizeof(struct slot), GFP_KERNEL);
+	if (!slot)
+		goto error_nomem;
+	slot->name = kstrdup(drc_name, GFP_KERNEL);
+	if (!slot->name)
+		goto error_slot;
+	slot->dn = of_node_get(dn);
+	slot->index = drc_index;
+	slot->power_domain = power_domain;
+	slot->hotplug_slot.ops = &rpaphp_hotplug_slot_ops;
+
+	return (slot);
+
+error_slot:
+	kfree(slot);
+error_nomem:
+	return NULL;
+}
+
+static int is_registered(struct slot *slot)
+{
+	struct slot *tmp_slot;
+
+	list_for_each_entry(tmp_slot, &rpaphp_slot_head, rpaphp_slot_list) {
+		if (!strcmp(tmp_slot->name, slot->name))
+			return 1;
+	}
+	return 0;
+}
+
+int rpaphp_deregister_slot(struct slot *slot)
+{
+	int retval = 0;
+	struct hotplug_slot *php_slot = &slot->hotplug_slot;
+
+	 dbg("%s - Entry: deregistering slot=%s\n",
+		__func__, slot->name);
+
+	list_del(&slot->rpaphp_slot_list);
+	pci_hp_deregister(php_slot);
+	dealloc_slot_struct(slot);
+
+	dbg("%s - Exit: rc[%d]\n", __func__, retval);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(rpaphp_deregister_slot);
+
+int rpaphp_register_slot(struct slot *slot)
+{
+	struct hotplug_slot *php_slot = &slot->hotplug_slot;
+	struct device_node *child;
+	u32 my_index;
+	int retval;
+	int slotno = -1;
+
+	dbg("%s registering slot:path[%pOF] index[%x], name[%s] pdomain[%x] type[%d]\n",
+		__func__, slot->dn, slot->index, slot->name,
+		slot->power_domain, slot->type);
+
+	/* should not try to register the same slot twice */
+	if (is_registered(slot)) {
+		err("rpaphp_register_slot: slot[%s] is already registered\n", slot->name);
+		return -EAGAIN;
+	}
+
+	for_each_child_of_node(slot->dn, child) {
+		retval = of_property_read_u32(child, "ibm,my-drc-index", &my_index);
+		if (my_index == slot->index) {
+			slotno = PCI_SLOT(PCI_DN(child)->devfn);
+			of_node_put(child);
+			break;
+		}
+	}
+
+	retval = pci_hp_register(php_slot, slot->bus, slotno, slot->name);
+	if (retval) {
+		err("pci_hp_register failed with error %d\n", retval);
+		return retval;
+	}
+
+	/* add slot to our internal list */
+	list_add(&slot->rpaphp_slot_list, &rpaphp_slot_head);
+	info("Slot [%s] registered\n", slot->name);
+	return 0;
+}
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
new file mode 100644
index 000000000..a89b7de72
--- /dev/null
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Hot Plug Controller Driver for System z
+ *
+ * Copyright 2012 IBM Corp.
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
+
+#define SLOT_NAME_SIZE	10
+
+static int enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+					     hotplug_slot);
+	int rc;
+
+	if (zdev->state != ZPCI_FN_STATE_STANDBY)
+		return -EIO;
+
+	rc = sclp_pci_configure(zdev->fid);
+	zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc);
+	if (rc)
+		return rc;
+	zdev->state = ZPCI_FN_STATE_CONFIGURED;
+
+	return zpci_scan_configured_device(zdev, zdev->fh);
+}
+
+static int disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+					     hotplug_slot);
+	struct pci_dev *pdev;
+
+	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+		return -EIO;
+
+	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (pdev && pci_num_vf(pdev)) {
+		pci_dev_put(pdev);
+		return -EBUSY;
+	}
+	pci_dev_put(pdev);
+
+	return zpci_deconfigure_device(zdev);
+}
+
+static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
+{
+	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+					     hotplug_slot);
+
+	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+		return -EIO;
+	/*
+	 * We can't take the zdev->lock as reset_slot may be called during
+	 * probing and/or device removal which already happens under the
+	 * zdev->lock. Instead the user should use the higher level
+	 * pci_reset_function() or pci_bus_reset() which hold the PCI device
+	 * lock preventing concurrent removal. If not using these functions
+	 * holding the PCI device lock is required.
+	 */
+
+	/* As long as the function is configured we can reset */
+	if (probe)
+		return 0;
+
+	return zpci_hot_reset_device(zdev);
+}
+
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+					     hotplug_slot);
+
+	*value = zpci_is_device_configured(zdev) ? 1 : 0;
+	return 0;
+}
+
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	/* if the slot exits it always contains a function */
+	*value = 1;
+	return 0;
+}
+
+static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
+	.enable_slot =		enable_slot,
+	.disable_slot =		disable_slot,
+	.reset_slot =		reset_slot,
+	.get_power_status =	get_power_status,
+	.get_adapter_status =	get_adapter_status,
+};
+
+int zpci_init_slot(struct zpci_dev *zdev)
+{
+	char name[SLOT_NAME_SIZE];
+	struct zpci_bus *zbus = zdev->zbus;
+
+	zdev->hotplug_slot.ops = &s390_hotplug_slot_ops;
+
+	snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
+	return pci_hp_register(&zdev->hotplug_slot, zbus->bus,
+			       zdev->devfn, name);
+}
+
+void zpci_exit_slot(struct zpci_dev *zdev)
+{
+	pci_hp_deregister(&zdev->hotplug_slot);
+}
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
new file mode 100644
index 000000000..3a97f4553
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Standard Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
+ *
+ */
+#ifndef _SHPCHP_H
+#define _SHPCHP_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/delay.h>
+#include <linux/sched/signal.h>	/* signal_pending(), struct timer_list */
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+#if !defined(MODULE)
+	#define MY_NAME	"shpchp"
+#else
+	#define MY_NAME	THIS_MODULE->name
+#endif
+
+extern bool shpchp_poll_mode;
+extern int shpchp_poll_time;
+extern bool shpchp_debug;
+
+#define dbg(format, arg...)						\
+do {									\
+	if (shpchp_debug)						\
+		printk(KERN_DEBUG "%s: " format, MY_NAME, ## arg);	\
+} while (0)
+#define err(format, arg...)						\
+	printk(KERN_ERR "%s: " format, MY_NAME, ## arg)
+#define info(format, arg...)						\
+	printk(KERN_INFO "%s: " format, MY_NAME, ## arg)
+#define warn(format, arg...)						\
+	printk(KERN_WARNING "%s: " format, MY_NAME, ## arg)
+
+#define ctrl_dbg(ctrl, format, arg...)					\
+	do {								\
+		if (shpchp_debug)					\
+			pci_printk(KERN_DEBUG, ctrl->pci_dev,		\
+					format, ## arg);		\
+	} while (0)
+#define ctrl_err(ctrl, format, arg...)					\
+	pci_err(ctrl->pci_dev, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)					\
+	pci_info(ctrl->pci_dev, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)					\
+	pci_warn(ctrl->pci_dev, format, ## arg)
+
+
+#define SLOT_NAME_SIZE 10
+struct slot {
+	u8 bus;
+	u8 device;
+	u16 status;
+	u32 number;
+	u8 is_a_board;
+	u8 state;
+	u8 attention_save;
+	u8 presence_save;
+	u8 latch_save;
+	u8 pwr_save;
+	struct controller *ctrl;
+	const struct hpc_ops *hpc_ops;
+	struct hotplug_slot hotplug_slot;
+	struct list_head	slot_list;
+	struct delayed_work work;	/* work for button event */
+	struct mutex lock;
+	struct workqueue_struct *wq;
+	u8 hp_slot;
+};
+
+struct event_info {
+	u32 event_type;
+	struct slot *p_slot;
+	struct work_struct work;
+};
+
+struct controller {
+	struct mutex crit_sect;		/* critical section mutex */
+	struct mutex cmd_lock;		/* command lock */
+	int num_slots;			/* Number of slots on ctlr */
+	int slot_num_inc;		/* 1 or -1 */
+	struct pci_dev *pci_dev;
+	struct list_head slot_list;
+	const struct hpc_ops *hpc_ops;
+	wait_queue_head_t queue;	/* sleep & wake process */
+	u8 slot_device_offset;
+	u32 pcix_misc2_reg;	/* for amd pogo errata */
+	u32 first_slot;		/* First physical slot number */
+	u32 cap_offset;
+	unsigned long mmio_base;
+	unsigned long mmio_size;
+	void __iomem *creg;
+	struct timer_list poll_timer;
+};
+
+/* Define AMD SHPC ID  */
+#define PCI_DEVICE_ID_AMD_POGO_7458	0x7458
+
+/* AMD PCI-X bridge registers */
+#define PCIX_MEM_BASE_LIMIT_OFFSET	0x1C
+#define PCIX_MISCII_OFFSET		0x48
+#define PCIX_MISC_BRIDGE_ERRORS_OFFSET	0x80
+
+/* AMD PCIX_MISCII masks and offsets */
+#define PERRNONFATALENABLE_MASK		0x00040000
+#define PERRFATALENABLE_MASK		0x00080000
+#define PERRFLOODENABLE_MASK		0x00100000
+#define SERRNONFATALENABLE_MASK		0x00200000
+#define SERRFATALENABLE_MASK		0x00400000
+
+/* AMD PCIX_MISC_BRIDGE_ERRORS masks and offsets */
+#define PERR_OBSERVED_MASK		0x00000001
+
+/* AMD PCIX_MEM_BASE_LIMIT masks */
+#define RSE_MASK			0x40000000
+
+#define INT_BUTTON_IGNORE		0
+#define INT_PRESENCE_ON			1
+#define INT_PRESENCE_OFF		2
+#define INT_SWITCH_CLOSE		3
+#define INT_SWITCH_OPEN			4
+#define INT_POWER_FAULT			5
+#define INT_POWER_FAULT_CLEAR		6
+#define INT_BUTTON_PRESS		7
+#define INT_BUTTON_RELEASE		8
+#define INT_BUTTON_CANCEL		9
+
+#define STATIC_STATE			0
+#define BLINKINGON_STATE		1
+#define BLINKINGOFF_STATE		2
+#define POWERON_STATE			3
+#define POWEROFF_STATE			4
+
+/* Error messages */
+#define INTERLOCK_OPEN			0x00000002
+#define ADD_NOT_SUPPORTED		0x00000003
+#define CARD_FUNCTIONING		0x00000005
+#define ADAPTER_NOT_SAME		0x00000006
+#define NO_ADAPTER_PRESENT		0x00000009
+#define NOT_ENOUGH_RESOURCES		0x0000000B
+#define DEVICE_TYPE_NOT_SUPPORTED	0x0000000C
+#define WRONG_BUS_FREQUENCY		0x0000000D
+#define POWER_FAILURE			0x0000000E
+
+int __must_check shpchp_create_ctrl_files(struct controller *ctrl);
+void shpchp_remove_ctrl_files(struct controller *ctrl);
+int shpchp_sysfs_enable_slot(struct slot *slot);
+int shpchp_sysfs_disable_slot(struct slot *slot);
+u8 shpchp_handle_attention_button(u8 hp_slot, struct controller *ctrl);
+u8 shpchp_handle_switch_change(u8 hp_slot, struct controller *ctrl);
+u8 shpchp_handle_presence_change(u8 hp_slot, struct controller *ctrl);
+u8 shpchp_handle_power_fault(u8 hp_slot, struct controller *ctrl);
+int shpchp_configure_device(struct slot *p_slot);
+void shpchp_unconfigure_device(struct slot *p_slot);
+void cleanup_slots(struct controller *ctrl);
+void shpchp_queue_pushbutton_work(struct work_struct *work);
+int shpc_init(struct controller *ctrl, struct pci_dev *pdev);
+
+static inline const char *slot_name(struct slot *slot)
+{
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+struct ctrl_reg {
+	volatile u32 base_offset;
+	volatile u32 slot_avail1;
+	volatile u32 slot_avail2;
+	volatile u32 slot_config;
+	volatile u16 sec_bus_config;
+	volatile u8  msi_ctrl;
+	volatile u8  prog_interface;
+	volatile u16 cmd;
+	volatile u16 cmd_status;
+	volatile u32 intr_loc;
+	volatile u32 serr_loc;
+	volatile u32 serr_intr_enable;
+	volatile u32 slot1;
+} __attribute__ ((packed));
+
+/* offsets to the controller registers based on the above structure layout */
+enum ctrl_offsets {
+	BASE_OFFSET	 = offsetof(struct ctrl_reg, base_offset),
+	SLOT_AVAIL1	 = offsetof(struct ctrl_reg, slot_avail1),
+	SLOT_AVAIL2	 = offsetof(struct ctrl_reg, slot_avail2),
+	SLOT_CONFIG	 = offsetof(struct ctrl_reg, slot_config),
+	SEC_BUS_CONFIG	 = offsetof(struct ctrl_reg, sec_bus_config),
+	MSI_CTRL	 = offsetof(struct ctrl_reg, msi_ctrl),
+	PROG_INTERFACE	 = offsetof(struct ctrl_reg, prog_interface),
+	CMD		 = offsetof(struct ctrl_reg, cmd),
+	CMD_STATUS	 = offsetof(struct ctrl_reg, cmd_status),
+	INTR_LOC	 = offsetof(struct ctrl_reg, intr_loc),
+	SERR_LOC	 = offsetof(struct ctrl_reg, serr_loc),
+	SERR_INTR_ENABLE = offsetof(struct ctrl_reg, serr_intr_enable),
+	SLOT1		 = offsetof(struct ctrl_reg, slot1),
+};
+
+static inline struct slot *get_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
+static inline struct slot *shpchp_find_slot(struct controller *ctrl, u8 device)
+{
+	struct slot *slot;
+
+	list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
+		if (slot->device == device)
+			return slot;
+	}
+
+	ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
+	return NULL;
+}
+
+static inline void amd_pogo_errata_save_misc_reg(struct slot *p_slot)
+{
+	u32 pcix_misc2_temp;
+
+	/* save MiscII register */
+	pci_read_config_dword(p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, &pcix_misc2_temp);
+
+	p_slot->ctrl->pcix_misc2_reg = pcix_misc2_temp;
+
+	/* clear SERR/PERR enable bits */
+	pcix_misc2_temp &= ~SERRFATALENABLE_MASK;
+	pcix_misc2_temp &= ~SERRNONFATALENABLE_MASK;
+	pcix_misc2_temp &= ~PERRFLOODENABLE_MASK;
+	pcix_misc2_temp &= ~PERRFATALENABLE_MASK;
+	pcix_misc2_temp &= ~PERRNONFATALENABLE_MASK;
+	pci_write_config_dword(p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, pcix_misc2_temp);
+}
+
+static inline void amd_pogo_errata_restore_misc_reg(struct slot *p_slot)
+{
+	u32 pcix_misc2_temp;
+	u32 pcix_bridge_errors_reg;
+	u32 pcix_mem_base_reg;
+	u8  perr_set;
+	u8  rse_set;
+
+	/* write-one-to-clear Bridge_Errors[ PERR_OBSERVED ] */
+	pci_read_config_dword(p_slot->ctrl->pci_dev, PCIX_MISC_BRIDGE_ERRORS_OFFSET, &pcix_bridge_errors_reg);
+	perr_set = pcix_bridge_errors_reg & PERR_OBSERVED_MASK;
+	if (perr_set) {
+		ctrl_dbg(p_slot->ctrl,
+			 "Bridge_Errors[ PERR_OBSERVED = %08X] (W1C)\n",
+			 perr_set);
+
+		pci_write_config_dword(p_slot->ctrl->pci_dev, PCIX_MISC_BRIDGE_ERRORS_OFFSET, perr_set);
+	}
+
+	/* write-one-to-clear Memory_Base_Limit[ RSE ] */
+	pci_read_config_dword(p_slot->ctrl->pci_dev, PCIX_MEM_BASE_LIMIT_OFFSET, &pcix_mem_base_reg);
+	rse_set = pcix_mem_base_reg & RSE_MASK;
+	if (rse_set) {
+		ctrl_dbg(p_slot->ctrl, "Memory_Base_Limit[ RSE ] (W1C)\n");
+
+		pci_write_config_dword(p_slot->ctrl->pci_dev, PCIX_MEM_BASE_LIMIT_OFFSET, rse_set);
+	}
+	/* restore MiscII register */
+	pci_read_config_dword(p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, &pcix_misc2_temp);
+
+	if (p_slot->ctrl->pcix_misc2_reg & SERRFATALENABLE_MASK)
+		pcix_misc2_temp |= SERRFATALENABLE_MASK;
+	else
+		pcix_misc2_temp &= ~SERRFATALENABLE_MASK;
+
+	if (p_slot->ctrl->pcix_misc2_reg & SERRNONFATALENABLE_MASK)
+		pcix_misc2_temp |= SERRNONFATALENABLE_MASK;
+	else
+		pcix_misc2_temp &= ~SERRNONFATALENABLE_MASK;
+
+	if (p_slot->ctrl->pcix_misc2_reg & PERRFLOODENABLE_MASK)
+		pcix_misc2_temp |= PERRFLOODENABLE_MASK;
+	else
+		pcix_misc2_temp &= ~PERRFLOODENABLE_MASK;
+
+	if (p_slot->ctrl->pcix_misc2_reg & PERRFATALENABLE_MASK)
+		pcix_misc2_temp |= PERRFATALENABLE_MASK;
+	else
+		pcix_misc2_temp &= ~PERRFATALENABLE_MASK;
+
+	if (p_slot->ctrl->pcix_misc2_reg & PERRNONFATALENABLE_MASK)
+		pcix_misc2_temp |= PERRNONFATALENABLE_MASK;
+	else
+		pcix_misc2_temp &= ~PERRNONFATALENABLE_MASK;
+	pci_write_config_dword(p_slot->ctrl->pci_dev, PCIX_MISCII_OFFSET, pcix_misc2_temp);
+}
+
+struct hpc_ops {
+	int (*power_on_slot)(struct slot *slot);
+	int (*slot_enable)(struct slot *slot);
+	int (*slot_disable)(struct slot *slot);
+	int (*set_bus_speed_mode)(struct slot *slot, enum pci_bus_speed speed);
+	int (*get_power_status)(struct slot *slot, u8 *status);
+	int (*get_attention_status)(struct slot *slot, u8 *status);
+	int (*set_attention_status)(struct slot *slot, u8 status);
+	int (*get_latch_status)(struct slot *slot, u8 *status);
+	int (*get_adapter_status)(struct slot *slot, u8 *status);
+	int (*get_adapter_speed)(struct slot *slot, enum pci_bus_speed *speed);
+	int (*get_prog_int)(struct slot *slot, u8 *prog_int);
+	int (*query_power_fault)(struct slot *slot);
+	void (*green_led_on)(struct slot *slot);
+	void (*green_led_off)(struct slot *slot);
+	void (*green_led_blink)(struct slot *slot);
+	void (*release_ctlr)(struct controller *ctrl);
+	int (*check_cmd_status)(struct controller *ctrl);
+};
+
+#endif				/* _SHPCHP_H */
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
new file mode 100644
index 000000000..56c7795ed
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Standard Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include "shpchp.h"
+
+/* Global variables */
+bool shpchp_debug;
+bool shpchp_poll_mode;
+int shpchp_poll_time;
+
+#define DRIVER_VERSION	"0.4"
+#define DRIVER_AUTHOR	"Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
+#define DRIVER_DESC	"Standard Hot Plug PCI Controller Driver"
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+module_param(shpchp_debug, bool, 0644);
+module_param(shpchp_poll_mode, bool, 0644);
+module_param(shpchp_poll_time, int, 0644);
+MODULE_PARM_DESC(shpchp_debug, "Debugging mode enabled or not");
+MODULE_PARM_DESC(shpchp_poll_mode, "Using polling mechanism for hot-plug events or not");
+MODULE_PARM_DESC(shpchp_poll_time, "Polling mechanism frequency, in seconds");
+
+#define SHPC_MODULE_NAME "shpchp"
+
+static int set_attention_status(struct hotplug_slot *slot, u8 value);
+static int enable_slot(struct hotplug_slot *slot);
+static int disable_slot(struct hotplug_slot *slot);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+static int get_attention_status(struct hotplug_slot *slot, u8 *value);
+static int get_latch_status(struct hotplug_slot *slot, u8 *value);
+static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
+
+static const struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
+	.set_attention_status =	set_attention_status,
+	.enable_slot =		enable_slot,
+	.disable_slot =		disable_slot,
+	.get_power_status =	get_power_status,
+	.get_attention_status =	get_attention_status,
+	.get_latch_status =	get_latch_status,
+	.get_adapter_status =	get_adapter_status,
+};
+
+static int init_slots(struct controller *ctrl)
+{
+	struct slot *slot;
+	struct hotplug_slot *hotplug_slot;
+	char name[SLOT_NAME_SIZE];
+	int retval;
+	int i;
+
+	for (i = 0; i < ctrl->num_slots; i++) {
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			retval = -ENOMEM;
+			goto error;
+		}
+
+		hotplug_slot = &slot->hotplug_slot;
+
+		slot->hp_slot = i;
+		slot->ctrl = ctrl;
+		slot->bus = ctrl->pci_dev->subordinate->number;
+		slot->device = ctrl->slot_device_offset + i;
+		slot->hpc_ops = ctrl->hpc_ops;
+		slot->number = ctrl->first_slot + (ctrl->slot_num_inc * i);
+
+		slot->wq = alloc_workqueue("shpchp-%d", 0, 0, slot->number);
+		if (!slot->wq) {
+			retval = -ENOMEM;
+			goto error_slot;
+		}
+
+		mutex_init(&slot->lock);
+		INIT_DELAYED_WORK(&slot->work, shpchp_queue_pushbutton_work);
+
+		/* register this slot with the hotplug pci core */
+		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
+		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
+
+		ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x hp_slot=%x sun=%x slot_device_offset=%x\n",
+			 pci_domain_nr(ctrl->pci_dev->subordinate),
+			 slot->bus, slot->device, slot->hp_slot, slot->number,
+			 ctrl->slot_device_offset);
+		retval = pci_hp_register(hotplug_slot,
+				ctrl->pci_dev->subordinate, slot->device, name);
+		if (retval) {
+			ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
+				 retval);
+			goto error_slotwq;
+		}
+
+		get_power_status(hotplug_slot, &slot->pwr_save);
+		get_attention_status(hotplug_slot, &slot->attention_save);
+		get_latch_status(hotplug_slot, &slot->latch_save);
+		get_adapter_status(hotplug_slot, &slot->presence_save);
+
+		list_add(&slot->slot_list, &ctrl->slot_list);
+	}
+
+	return 0;
+error_slotwq:
+	destroy_workqueue(slot->wq);
+error_slot:
+	kfree(slot);
+error:
+	return retval;
+}
+
+void cleanup_slots(struct controller *ctrl)
+{
+	struct slot *slot, *next;
+
+	list_for_each_entry_safe(slot, next, &ctrl->slot_list, slot_list) {
+		list_del(&slot->slot_list);
+		cancel_delayed_work(&slot->work);
+		destroy_workqueue(slot->wq);
+		pci_hp_deregister(&slot->hotplug_slot);
+		kfree(slot);
+	}
+}
+
+/*
+ * set_attention_status - Turns the Amber LED for a slot on, off or blink
+ */
+static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	slot->attention_save = status;
+	slot->hpc_ops->set_attention_status(slot, status);
+
+	return 0;
+}
+
+static int enable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	return shpchp_sysfs_enable_slot(slot);
+}
+
+static int disable_slot(struct hotplug_slot *hotplug_slot)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	return shpchp_sysfs_disable_slot(slot);
+}
+
+static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+	int retval;
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	retval = slot->hpc_ops->get_power_status(slot, value);
+	if (retval < 0)
+		*value = slot->pwr_save;
+
+	return 0;
+}
+
+static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+	int retval;
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	retval = slot->hpc_ops->get_attention_status(slot, value);
+	if (retval < 0)
+		*value = slot->attention_save;
+
+	return 0;
+}
+
+static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+	int retval;
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	retval = slot->hpc_ops->get_latch_status(slot, value);
+	if (retval < 0)
+		*value = slot->latch_save;
+
+	return 0;
+}
+
+static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
+{
+	struct slot *slot = get_slot(hotplug_slot);
+	int retval;
+
+	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
+		 __func__, slot_name(slot));
+
+	retval = slot->hpc_ops->get_adapter_status(slot, value);
+	if (retval < 0)
+		*value = slot->presence_save;
+
+	return 0;
+}
+
+static bool shpc_capable(struct pci_dev *bridge)
+{
+	/*
+	 * It is assumed that AMD GOLAM chips support SHPC but they do not
+	 * have SHPC capability.
+	 */
+	if (bridge->vendor == PCI_VENDOR_ID_AMD &&
+	    bridge->device == PCI_DEVICE_ID_AMD_GOLAM_7450)
+		return true;
+
+	if (pci_find_capability(bridge, PCI_CAP_ID_SHPC))
+		return true;
+
+	return false;
+}
+
+static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int rc;
+	struct controller *ctrl;
+
+	if (!shpc_capable(pdev))
+		return -ENODEV;
+
+	if (acpi_get_hp_hw_control_from_firmware(pdev))
+		return -ENODEV;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		goto err_out_none;
+
+	INIT_LIST_HEAD(&ctrl->slot_list);
+
+	rc = shpc_init(ctrl, pdev);
+	if (rc) {
+		ctrl_dbg(ctrl, "Controller initialization failed\n");
+		goto err_out_free_ctrl;
+	}
+
+	pci_set_drvdata(pdev, ctrl);
+
+	/* Setup the slot information structures */
+	rc = init_slots(ctrl);
+	if (rc) {
+		ctrl_err(ctrl, "Slot initialization failed\n");
+		goto err_out_release_ctlr;
+	}
+
+	rc = shpchp_create_ctrl_files(ctrl);
+	if (rc)
+		goto err_cleanup_slots;
+
+	pdev->shpc_managed = 1;
+	return 0;
+
+err_cleanup_slots:
+	cleanup_slots(ctrl);
+err_out_release_ctlr:
+	ctrl->hpc_ops->release_ctlr(ctrl);
+err_out_free_ctrl:
+	kfree(ctrl);
+err_out_none:
+	return -ENODEV;
+}
+
+static void shpc_remove(struct pci_dev *dev)
+{
+	struct controller *ctrl = pci_get_drvdata(dev);
+
+	dev->shpc_managed = 0;
+	shpchp_remove_ctrl_files(ctrl);
+	ctrl->hpc_ops->release_ctlr(ctrl);
+	kfree(ctrl);
+}
+
+static const struct pci_device_id shpcd_pci_tbl[] = {
+	{PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0)},
+	{ /* end: all zeroes */ }
+};
+MODULE_DEVICE_TABLE(pci, shpcd_pci_tbl);
+
+static struct pci_driver shpc_driver = {
+	.name =		SHPC_MODULE_NAME,
+	.id_table =	shpcd_pci_tbl,
+	.probe =	shpc_probe,
+	.remove =	shpc_remove,
+};
+
+static int __init shpcd_init(void)
+{
+	int retval;
+
+	retval = pci_register_driver(&shpc_driver);
+	dbg("%s: pci_register_driver = %d\n", __func__, retval);
+	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+
+	return retval;
+}
+
+static void __exit shpcd_cleanup(void)
+{
+	dbg("unload_shpchpd()\n");
+	pci_unregister_driver(&shpc_driver);
+	info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
+}
+
+module_init(shpcd_init);
+module_exit(shpcd_cleanup);
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
new file mode 100644
index 000000000..6a6705e0c
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Standard Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include "../pci.h"
+#include "shpchp.h"
+
+static void interrupt_event_handler(struct work_struct *work);
+static int shpchp_enable_slot(struct slot *p_slot);
+static int shpchp_disable_slot(struct slot *p_slot);
+
+static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
+{
+	struct event_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info)
+		return -ENOMEM;
+
+	info->event_type = event_type;
+	info->p_slot = p_slot;
+	INIT_WORK(&info->work, interrupt_event_handler);
+
+	queue_work(p_slot->wq, &info->work);
+
+	return 0;
+}
+
+u8 shpchp_handle_attention_button(u8 hp_slot, struct controller *ctrl)
+{
+	struct slot *p_slot;
+	u32 event_type;
+
+	/* Attention Button Change */
+	ctrl_dbg(ctrl, "Attention button interrupt received\n");
+
+	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+	p_slot->hpc_ops->get_adapter_status(p_slot, &(p_slot->presence_save));
+
+	/*
+	 *  Button pressed - See if need to TAKE ACTION!!!
+	 */
+	ctrl_info(ctrl, "Button pressed on Slot(%s)\n", slot_name(p_slot));
+	event_type = INT_BUTTON_PRESS;
+
+	queue_interrupt_event(p_slot, event_type);
+
+	return 0;
+
+}
+
+u8 shpchp_handle_switch_change(u8 hp_slot, struct controller *ctrl)
+{
+	struct slot *p_slot;
+	u8 getstatus;
+	u32 event_type;
+
+	/* Switch Change */
+	ctrl_dbg(ctrl, "Switch interrupt received\n");
+
+	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+	p_slot->hpc_ops->get_adapter_status(p_slot, &(p_slot->presence_save));
+	p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+	ctrl_dbg(ctrl, "Card present %x Power status %x\n",
+		 p_slot->presence_save, p_slot->pwr_save);
+
+	if (getstatus) {
+		/*
+		 * Switch opened
+		 */
+		ctrl_info(ctrl, "Latch open on Slot(%s)\n", slot_name(p_slot));
+		event_type = INT_SWITCH_OPEN;
+		if (p_slot->pwr_save && p_slot->presence_save) {
+			event_type = INT_POWER_FAULT;
+			ctrl_err(ctrl, "Surprise Removal of card\n");
+		}
+	} else {
+		/*
+		 *  Switch closed
+		 */
+		ctrl_info(ctrl, "Latch close on Slot(%s)\n", slot_name(p_slot));
+		event_type = INT_SWITCH_CLOSE;
+	}
+
+	queue_interrupt_event(p_slot, event_type);
+
+	return 1;
+}
+
+u8 shpchp_handle_presence_change(u8 hp_slot, struct controller *ctrl)
+{
+	struct slot *p_slot;
+	u32 event_type;
+
+	/* Presence Change */
+	ctrl_dbg(ctrl, "Presence/Notify input change\n");
+
+	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+
+	/*
+	 * Save the presence state
+	 */
+	p_slot->hpc_ops->get_adapter_status(p_slot, &(p_slot->presence_save));
+	if (p_slot->presence_save) {
+		/*
+		 * Card Present
+		 */
+		ctrl_info(ctrl, "Card present on Slot(%s)\n",
+			  slot_name(p_slot));
+		event_type = INT_PRESENCE_ON;
+	} else {
+		/*
+		 * Not Present
+		 */
+		ctrl_info(ctrl, "Card not present on Slot(%s)\n",
+			  slot_name(p_slot));
+		event_type = INT_PRESENCE_OFF;
+	}
+
+	queue_interrupt_event(p_slot, event_type);
+
+	return 1;
+}
+
+u8 shpchp_handle_power_fault(u8 hp_slot, struct controller *ctrl)
+{
+	struct slot *p_slot;
+	u32 event_type;
+
+	/* Power fault */
+	ctrl_dbg(ctrl, "Power fault interrupt received\n");
+
+	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+
+	if (!(p_slot->hpc_ops->query_power_fault(p_slot))) {
+		/*
+		 * Power fault Cleared
+		 */
+		ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
+			  slot_name(p_slot));
+		p_slot->status = 0x00;
+		event_type = INT_POWER_FAULT_CLEAR;
+	} else {
+		/*
+		 *   Power fault
+		 */
+		ctrl_info(ctrl, "Power fault on Slot(%s)\n", slot_name(p_slot));
+		event_type = INT_POWER_FAULT;
+		/* set power fault status for this board */
+		p_slot->status = 0xFF;
+		ctrl_info(ctrl, "Power fault bit %x set\n", hp_slot);
+	}
+
+	queue_interrupt_event(p_slot, event_type);
+
+	return 1;
+}
+
+/* The following routines constitute the bulk of the
+   hotplug controller logic
+ */
+static int change_bus_speed(struct controller *ctrl, struct slot *p_slot,
+		enum pci_bus_speed speed)
+{
+	int rc = 0;
+
+	ctrl_dbg(ctrl, "Change speed to %d\n", speed);
+	rc = p_slot->hpc_ops->set_bus_speed_mode(p_slot, speed);
+	if (rc) {
+		ctrl_err(ctrl, "%s: Issue of set bus speed mode command failed\n",
+			 __func__);
+		return WRONG_BUS_FREQUENCY;
+	}
+	return rc;
+}
+
+static int fix_bus_speed(struct controller *ctrl, struct slot *pslot,
+		u8 flag, enum pci_bus_speed asp, enum pci_bus_speed bsp,
+		enum pci_bus_speed msp)
+{
+	int rc = 0;
+
+	/*
+	 * If other slots on the same bus are occupied, we cannot
+	 * change the bus speed.
+	 */
+	if (flag) {
+		if (asp < bsp) {
+			ctrl_err(ctrl, "Speed of bus %x and adapter %x mismatch\n",
+				 bsp, asp);
+			rc = WRONG_BUS_FREQUENCY;
+		}
+		return rc;
+	}
+
+	if (asp < msp) {
+		if (bsp != asp)
+			rc = change_bus_speed(ctrl, pslot, asp);
+	} else {
+		if (bsp != msp)
+			rc = change_bus_speed(ctrl, pslot, msp);
+	}
+	return rc;
+}
+
+/**
+ * board_added - Called after a board has been added to the system.
+ * @p_slot: target &slot
+ *
+ * Turns power on for the board.
+ * Configures board.
+ */
+static int board_added(struct slot *p_slot)
+{
+	u8 hp_slot;
+	u8 slots_not_empty = 0;
+	int rc = 0;
+	enum pci_bus_speed asp, bsp, msp;
+	struct controller *ctrl = p_slot->ctrl;
+	struct pci_bus *parent = ctrl->pci_dev->subordinate;
+
+	hp_slot = p_slot->device - ctrl->slot_device_offset;
+
+	ctrl_dbg(ctrl, "%s: p_slot->device, slot_offset, hp_slot = %d, %d ,%d\n",
+		 __func__, p_slot->device, ctrl->slot_device_offset, hp_slot);
+
+	/* Power on slot without connecting to bus */
+	rc = p_slot->hpc_ops->power_on_slot(p_slot);
+	if (rc) {
+		ctrl_err(ctrl, "Failed to power on slot\n");
+		return -1;
+	}
+
+	if ((ctrl->pci_dev->vendor == 0x8086) && (ctrl->pci_dev->device == 0x0332)) {
+		rc = p_slot->hpc_ops->set_bus_speed_mode(p_slot, PCI_SPEED_33MHz);
+		if (rc) {
+			ctrl_err(ctrl, "%s: Issue of set bus speed mode command failed\n",
+				 __func__);
+			return WRONG_BUS_FREQUENCY;
+		}
+
+		/* turn on board, blink green LED, turn off Amber LED */
+		rc = p_slot->hpc_ops->slot_enable(p_slot);
+		if (rc) {
+			ctrl_err(ctrl, "Issue of Slot Enable command failed\n");
+			return rc;
+		}
+	}
+
+	rc = p_slot->hpc_ops->get_adapter_speed(p_slot, &asp);
+	if (rc) {
+		ctrl_err(ctrl, "Can't get adapter speed or bus mode mismatch\n");
+		return WRONG_BUS_FREQUENCY;
+	}
+
+	bsp = ctrl->pci_dev->subordinate->cur_bus_speed;
+	msp = ctrl->pci_dev->subordinate->max_bus_speed;
+
+	/* Check if there are other slots or devices on the same bus */
+	if (!list_empty(&ctrl->pci_dev->subordinate->devices))
+		slots_not_empty = 1;
+
+	ctrl_dbg(ctrl, "%s: slots_not_empty %d, adapter_speed %d, bus_speed %d, max_bus_speed %d\n",
+		 __func__, slots_not_empty, asp,
+		 bsp, msp);
+
+	rc = fix_bus_speed(ctrl, p_slot, slots_not_empty, asp, bsp, msp);
+	if (rc)
+		return rc;
+
+	/* turn on board, blink green LED, turn off Amber LED */
+	rc = p_slot->hpc_ops->slot_enable(p_slot);
+	if (rc) {
+		ctrl_err(ctrl, "Issue of Slot Enable command failed\n");
+		return rc;
+	}
+
+	/* Wait for ~1 second */
+	msleep(1000);
+
+	ctrl_dbg(ctrl, "%s: slot status = %x\n", __func__, p_slot->status);
+	/* Check for a power fault */
+	if (p_slot->status == 0xFF) {
+		/* power fault occurred, but it was benign */
+		ctrl_dbg(ctrl, "%s: Power fault\n", __func__);
+		p_slot->status = 0;
+		goto err_exit;
+	}
+
+	if (shpchp_configure_device(p_slot)) {
+		ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n",
+			 pci_domain_nr(parent), p_slot->bus, p_slot->device);
+		goto err_exit;
+	}
+
+	p_slot->status = 0;
+	p_slot->is_a_board = 0x01;
+	p_slot->pwr_save = 1;
+
+	p_slot->hpc_ops->green_led_on(p_slot);
+
+	return 0;
+
+err_exit:
+	/* turn off slot, turn on Amber LED, turn off Green LED */
+	rc = p_slot->hpc_ops->slot_disable(p_slot);
+	if (rc) {
+		ctrl_err(ctrl, "%s: Issue of Slot Disable command failed\n",
+			 __func__);
+		return rc;
+	}
+
+	return(rc);
+}
+
+
+/**
+ * remove_board - Turns off slot and LEDs
+ * @p_slot: target &slot
+ */
+static int remove_board(struct slot *p_slot)
+{
+	struct controller *ctrl = p_slot->ctrl;
+	u8 hp_slot;
+	int rc;
+
+	shpchp_unconfigure_device(p_slot);
+
+	hp_slot = p_slot->device - ctrl->slot_device_offset;
+	p_slot = shpchp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
+
+	ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, hp_slot);
+
+	/* Change status to shutdown */
+	if (p_slot->is_a_board)
+		p_slot->status = 0x01;
+
+	/* turn off slot, turn on Amber LED, turn off Green LED */
+	rc = p_slot->hpc_ops->slot_disable(p_slot);
+	if (rc) {
+		ctrl_err(ctrl, "%s: Issue of Slot Disable command failed\n",
+			 __func__);
+		return rc;
+	}
+
+	rc = p_slot->hpc_ops->set_attention_status(p_slot, 0);
+	if (rc) {
+		ctrl_err(ctrl, "Issue of Set Attention command failed\n");
+		return rc;
+	}
+
+	p_slot->pwr_save = 0;
+	p_slot->is_a_board = 0;
+
+	return 0;
+}
+
+
+struct pushbutton_work_info {
+	struct slot *p_slot;
+	struct work_struct work;
+};
+
+/**
+ * shpchp_pushbutton_thread - handle pushbutton events
+ * @work: &struct work_struct to be handled
+ *
+ * Scheduled procedure to handle blocking stuff for the pushbuttons.
+ * Handles all pending events and exits.
+ */
+static void shpchp_pushbutton_thread(struct work_struct *work)
+{
+	struct pushbutton_work_info *info =
+		container_of(work, struct pushbutton_work_info, work);
+	struct slot *p_slot = info->p_slot;
+
+	mutex_lock(&p_slot->lock);
+	switch (p_slot->state) {
+	case POWEROFF_STATE:
+		mutex_unlock(&p_slot->lock);
+		shpchp_disable_slot(p_slot);
+		mutex_lock(&p_slot->lock);
+		p_slot->state = STATIC_STATE;
+		break;
+	case POWERON_STATE:
+		mutex_unlock(&p_slot->lock);
+		if (shpchp_enable_slot(p_slot))
+			p_slot->hpc_ops->green_led_off(p_slot);
+		mutex_lock(&p_slot->lock);
+		p_slot->state = STATIC_STATE;
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&p_slot->lock);
+
+	kfree(info);
+}
+
+void shpchp_queue_pushbutton_work(struct work_struct *work)
+{
+	struct slot *p_slot = container_of(work, struct slot, work.work);
+	struct pushbutton_work_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+			 __func__);
+		return;
+	}
+	info->p_slot = p_slot;
+	INIT_WORK(&info->work, shpchp_pushbutton_thread);
+
+	mutex_lock(&p_slot->lock);
+	switch (p_slot->state) {
+	case BLINKINGOFF_STATE:
+		p_slot->state = POWEROFF_STATE;
+		break;
+	case BLINKINGON_STATE:
+		p_slot->state = POWERON_STATE;
+		break;
+	default:
+		kfree(info);
+		goto out;
+	}
+	queue_work(p_slot->wq, &info->work);
+ out:
+	mutex_unlock(&p_slot->lock);
+}
+
+static void update_slot_info(struct slot *slot)
+{
+	slot->hpc_ops->get_power_status(slot, &slot->pwr_save);
+	slot->hpc_ops->get_attention_status(slot, &slot->attention_save);
+	slot->hpc_ops->get_latch_status(slot, &slot->latch_save);
+	slot->hpc_ops->get_adapter_status(slot, &slot->presence_save);
+}
+
+/*
+ * Note: This function must be called with slot->lock held
+ */
+static void handle_button_press_event(struct slot *p_slot)
+{
+	u8 getstatus;
+	struct controller *ctrl = p_slot->ctrl;
+
+	switch (p_slot->state) {
+	case STATIC_STATE:
+		p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+		if (getstatus) {
+			p_slot->state = BLINKINGOFF_STATE;
+			ctrl_info(ctrl, "PCI slot #%s - powering off due to button press\n",
+				  slot_name(p_slot));
+		} else {
+			p_slot->state = BLINKINGON_STATE;
+			ctrl_info(ctrl, "PCI slot #%s - powering on due to button press\n",
+				  slot_name(p_slot));
+		}
+		/* blink green LED and turn off amber */
+		p_slot->hpc_ops->green_led_blink(p_slot);
+		p_slot->hpc_ops->set_attention_status(p_slot, 0);
+
+		queue_delayed_work(p_slot->wq, &p_slot->work, 5*HZ);
+		break;
+	case BLINKINGOFF_STATE:
+	case BLINKINGON_STATE:
+		/*
+		 * Cancel if we are still blinking; this means that we
+		 * press the attention again before the 5 sec. limit
+		 * expires to cancel hot-add or hot-remove
+		 */
+		ctrl_info(ctrl, "Button cancel on Slot(%s)\n",
+			  slot_name(p_slot));
+		cancel_delayed_work(&p_slot->work);
+		if (p_slot->state == BLINKINGOFF_STATE)
+			p_slot->hpc_ops->green_led_on(p_slot);
+		else
+			p_slot->hpc_ops->green_led_off(p_slot);
+		p_slot->hpc_ops->set_attention_status(p_slot, 0);
+		ctrl_info(ctrl, "PCI slot #%s - action canceled due to button press\n",
+			  slot_name(p_slot));
+		p_slot->state = STATIC_STATE;
+		break;
+	case POWEROFF_STATE:
+	case POWERON_STATE:
+		/*
+		 * Ignore if the slot is on power-on or power-off state;
+		 * this means that the previous attention button action
+		 * to hot-add or hot-remove is undergoing
+		 */
+		ctrl_info(ctrl, "Button ignore on Slot(%s)\n",
+			  slot_name(p_slot));
+		update_slot_info(p_slot);
+		break;
+	default:
+		ctrl_warn(ctrl, "Not a valid state\n");
+		break;
+	}
+}
+
+static void interrupt_event_handler(struct work_struct *work)
+{
+	struct event_info *info = container_of(work, struct event_info, work);
+	struct slot *p_slot = info->p_slot;
+
+	mutex_lock(&p_slot->lock);
+	switch (info->event_type) {
+	case INT_BUTTON_PRESS:
+		handle_button_press_event(p_slot);
+		break;
+	case INT_POWER_FAULT:
+		ctrl_dbg(p_slot->ctrl, "%s: Power fault\n", __func__);
+		p_slot->hpc_ops->set_attention_status(p_slot, 1);
+		p_slot->hpc_ops->green_led_off(p_slot);
+		break;
+	default:
+		update_slot_info(p_slot);
+		break;
+	}
+	mutex_unlock(&p_slot->lock);
+
+	kfree(info);
+}
+
+
+static int shpchp_enable_slot (struct slot *p_slot)
+{
+	u8 getstatus = 0;
+	int rc, retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
+
+	/* Check to see if (latch closed, card present, power off) */
+	mutex_lock(&p_slot->ctrl->crit_sect);
+	rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+	if (rc || !getstatus) {
+		ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
+		goto out;
+	}
+	rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+	if (rc || getstatus) {
+		ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot));
+		goto out;
+	}
+	rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+	if (rc || getstatus) {
+		ctrl_info(ctrl, "Already enabled on slot(%s)\n",
+			  slot_name(p_slot));
+		goto out;
+	}
+
+	p_slot->is_a_board = 1;
+
+	/* We have to save the presence info for these slots */
+	p_slot->hpc_ops->get_adapter_status(p_slot, &(p_slot->presence_save));
+	p_slot->hpc_ops->get_power_status(p_slot, &(p_slot->pwr_save));
+	ctrl_dbg(ctrl, "%s: p_slot->pwr_save %x\n", __func__, p_slot->pwr_save);
+	p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+
+	if ((p_slot->ctrl->pci_dev->vendor == PCI_VENDOR_ID_AMD &&
+	     p_slot->ctrl->pci_dev->device == PCI_DEVICE_ID_AMD_POGO_7458)
+	     && p_slot->ctrl->num_slots == 1) {
+		/* handle AMD POGO errata; this must be done before enable  */
+		amd_pogo_errata_save_misc_reg(p_slot);
+		retval = board_added(p_slot);
+		/* handle AMD POGO errata; this must be done after enable  */
+		amd_pogo_errata_restore_misc_reg(p_slot);
+	} else
+		retval = board_added(p_slot);
+
+	if (retval) {
+		p_slot->hpc_ops->get_adapter_status(p_slot,
+				&(p_slot->presence_save));
+		p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+	}
+
+	update_slot_info(p_slot);
+ out:
+	mutex_unlock(&p_slot->ctrl->crit_sect);
+	return retval;
+}
+
+
+static int shpchp_disable_slot (struct slot *p_slot)
+{
+	u8 getstatus = 0;
+	int rc, retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
+
+	if (!p_slot->ctrl)
+		return -ENODEV;
+
+	/* Check to see if (latch closed, card present, power on) */
+	mutex_lock(&p_slot->ctrl->crit_sect);
+
+	rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+	if (rc || !getstatus) {
+		ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
+		goto out;
+	}
+	rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+	if (rc || getstatus) {
+		ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot));
+		goto out;
+	}
+	rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+	if (rc || !getstatus) {
+		ctrl_info(ctrl, "Already disabled on slot(%s)\n",
+			  slot_name(p_slot));
+		goto out;
+	}
+
+	retval = remove_board(p_slot);
+	update_slot_info(p_slot);
+ out:
+	mutex_unlock(&p_slot->ctrl->crit_sect);
+	return retval;
+}
+
+int shpchp_sysfs_enable_slot(struct slot *p_slot)
+{
+	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
+
+	mutex_lock(&p_slot->lock);
+	switch (p_slot->state) {
+	case BLINKINGON_STATE:
+		cancel_delayed_work(&p_slot->work);
+		fallthrough;
+	case STATIC_STATE:
+		p_slot->state = POWERON_STATE;
+		mutex_unlock(&p_slot->lock);
+		retval = shpchp_enable_slot(p_slot);
+		mutex_lock(&p_slot->lock);
+		p_slot->state = STATIC_STATE;
+		break;
+	case POWERON_STATE:
+		ctrl_info(ctrl, "Slot %s is already in powering on state\n",
+			  slot_name(p_slot));
+		break;
+	case BLINKINGOFF_STATE:
+	case POWEROFF_STATE:
+		ctrl_info(ctrl, "Already enabled on slot %s\n",
+			  slot_name(p_slot));
+		break;
+	default:
+		ctrl_err(ctrl, "Not a valid state on slot %s\n",
+			 slot_name(p_slot));
+		break;
+	}
+	mutex_unlock(&p_slot->lock);
+
+	return retval;
+}
+
+int shpchp_sysfs_disable_slot(struct slot *p_slot)
+{
+	int retval = -ENODEV;
+	struct controller *ctrl = p_slot->ctrl;
+
+	mutex_lock(&p_slot->lock);
+	switch (p_slot->state) {
+	case BLINKINGOFF_STATE:
+		cancel_delayed_work(&p_slot->work);
+		fallthrough;
+	case STATIC_STATE:
+		p_slot->state = POWEROFF_STATE;
+		mutex_unlock(&p_slot->lock);
+		retval = shpchp_disable_slot(p_slot);
+		mutex_lock(&p_slot->lock);
+		p_slot->state = STATIC_STATE;
+		break;
+	case POWEROFF_STATE:
+		ctrl_info(ctrl, "Slot %s is already in powering off state\n",
+			  slot_name(p_slot));
+		break;
+	case BLINKINGON_STATE:
+	case POWERON_STATE:
+		ctrl_info(ctrl, "Already disabled on slot %s\n",
+			  slot_name(p_slot));
+		break;
+	default:
+		ctrl_err(ctrl, "Not a valid state on slot %s\n",
+			 slot_name(p_slot));
+		break;
+	}
+	mutex_unlock(&p_slot->lock);
+
+	return retval;
+}
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
new file mode 100644
index 000000000..48e4daefc
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -0,0 +1,1073 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Standard PCI Hot Plug Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "shpchp.h"
+
+/* Slot Available Register I field definition */
+#define SLOT_33MHZ		0x0000001f
+#define SLOT_66MHZ_PCIX		0x00001f00
+#define SLOT_100MHZ_PCIX	0x001f0000
+#define SLOT_133MHZ_PCIX	0x1f000000
+
+/* Slot Available Register II field definition */
+#define SLOT_66MHZ		0x0000001f
+#define SLOT_66MHZ_PCIX_266	0x00000f00
+#define SLOT_100MHZ_PCIX_266	0x0000f000
+#define SLOT_133MHZ_PCIX_266	0x000f0000
+#define SLOT_66MHZ_PCIX_533	0x00f00000
+#define SLOT_100MHZ_PCIX_533	0x0f000000
+#define SLOT_133MHZ_PCIX_533	0xf0000000
+
+/* Slot Configuration */
+#define SLOT_NUM		0x0000001F
+#define	FIRST_DEV_NUM		0x00001F00
+#define PSN			0x07FF0000
+#define	UPDOWN			0x20000000
+#define	MRLSENSOR		0x40000000
+#define ATTN_BUTTON		0x80000000
+
+/*
+ * Interrupt Locator Register definitions
+ */
+#define CMD_INTR_PENDING	(1 << 0)
+#define SLOT_INTR_PENDING(i)	(1 << (i + 1))
+
+/*
+ * Controller SERR-INT Register
+ */
+#define GLOBAL_INTR_MASK	(1 << 0)
+#define GLOBAL_SERR_MASK	(1 << 1)
+#define COMMAND_INTR_MASK	(1 << 2)
+#define ARBITER_SERR_MASK	(1 << 3)
+#define COMMAND_DETECTED	(1 << 16)
+#define ARBITER_DETECTED	(1 << 17)
+#define SERR_INTR_RSVDZ_MASK	0xfffc0000
+
+/*
+ * Logical Slot Register definitions
+ */
+#define SLOT_REG(i)		(SLOT1 + (4 * i))
+
+#define SLOT_STATE_SHIFT	(0)
+#define SLOT_STATE_MASK		(3 << 0)
+#define SLOT_STATE_PWRONLY	(1)
+#define SLOT_STATE_ENABLED	(2)
+#define SLOT_STATE_DISABLED	(3)
+#define PWR_LED_STATE_SHIFT	(2)
+#define PWR_LED_STATE_MASK	(3 << 2)
+#define ATN_LED_STATE_SHIFT	(4)
+#define ATN_LED_STATE_MASK	(3 << 4)
+#define ATN_LED_STATE_ON	(1)
+#define ATN_LED_STATE_BLINK	(2)
+#define ATN_LED_STATE_OFF	(3)
+#define POWER_FAULT		(1 << 6)
+#define ATN_BUTTON		(1 << 7)
+#define MRL_SENSOR		(1 << 8)
+#define MHZ66_CAP		(1 << 9)
+#define PRSNT_SHIFT		(10)
+#define PRSNT_MASK		(3 << 10)
+#define PCIX_CAP_SHIFT		(12)
+#define PCIX_CAP_MASK_PI1	(3 << 12)
+#define PCIX_CAP_MASK_PI2	(7 << 12)
+#define PRSNT_CHANGE_DETECTED	(1 << 16)
+#define ISO_PFAULT_DETECTED	(1 << 17)
+#define BUTTON_PRESS_DETECTED	(1 << 18)
+#define MRL_CHANGE_DETECTED	(1 << 19)
+#define CON_PFAULT_DETECTED	(1 << 20)
+#define PRSNT_CHANGE_INTR_MASK	(1 << 24)
+#define ISO_PFAULT_INTR_MASK	(1 << 25)
+#define BUTTON_PRESS_INTR_MASK	(1 << 26)
+#define MRL_CHANGE_INTR_MASK	(1 << 27)
+#define CON_PFAULT_INTR_MASK	(1 << 28)
+#define MRL_CHANGE_SERR_MASK	(1 << 29)
+#define CON_PFAULT_SERR_MASK	(1 << 30)
+#define SLOT_REG_RSVDZ_MASK	((1 << 15) | (7 << 21))
+
+/*
+ * SHPC Command Code definitions
+ *
+ *     Slot Operation				00h - 3Fh
+ *     Set Bus Segment Speed/Mode A		40h - 47h
+ *     Power-Only All Slots			48h
+ *     Enable All Slots				49h
+ *     Set Bus Segment Speed/Mode B (PI=2)	50h - 5Fh
+ *     Reserved Command Codes			60h - BFh
+ *     Vendor Specific Commands			C0h - FFh
+ */
+#define SET_SLOT_PWR		0x01	/* Slot Operation */
+#define SET_SLOT_ENABLE		0x02
+#define SET_SLOT_DISABLE	0x03
+#define SET_PWR_ON		0x04
+#define SET_PWR_BLINK		0x08
+#define SET_PWR_OFF		0x0c
+#define SET_ATTN_ON		0x10
+#define SET_ATTN_BLINK		0x20
+#define SET_ATTN_OFF		0x30
+#define SETA_PCI_33MHZ		0x40	/* Set Bus Segment Speed/Mode A */
+#define SETA_PCI_66MHZ		0x41
+#define SETA_PCIX_66MHZ		0x42
+#define SETA_PCIX_100MHZ	0x43
+#define SETA_PCIX_133MHZ	0x44
+#define SETA_RESERVED1		0x45
+#define SETA_RESERVED2		0x46
+#define SETA_RESERVED3		0x47
+#define SET_PWR_ONLY_ALL	0x48	/* Power-Only All Slots */
+#define SET_ENABLE_ALL		0x49	/* Enable All Slots */
+#define	SETB_PCI_33MHZ		0x50	/* Set Bus Segment Speed/Mode B */
+#define SETB_PCI_66MHZ		0x51
+#define SETB_PCIX_66MHZ_PM	0x52
+#define SETB_PCIX_100MHZ_PM	0x53
+#define SETB_PCIX_133MHZ_PM	0x54
+#define SETB_PCIX_66MHZ_EM	0x55
+#define SETB_PCIX_100MHZ_EM	0x56
+#define SETB_PCIX_133MHZ_EM	0x57
+#define SETB_PCIX_66MHZ_266	0x58
+#define SETB_PCIX_100MHZ_266	0x59
+#define SETB_PCIX_133MHZ_266	0x5a
+#define SETB_PCIX_66MHZ_533	0x5b
+#define SETB_PCIX_100MHZ_533	0x5c
+#define SETB_PCIX_133MHZ_533	0x5d
+#define SETB_RESERVED1		0x5e
+#define SETB_RESERVED2		0x5f
+
+/*
+ * SHPC controller command error code
+ */
+#define SWITCH_OPEN		0x1
+#define INVALID_CMD		0x2
+#define INVALID_SPEED_MODE	0x4
+
+/*
+ * For accessing SHPC Working Register Set via PCI Configuration Space
+ */
+#define DWORD_SELECT		0x2
+#define DWORD_DATA		0x4
+
+/* Field Offset in Logical Slot Register - byte boundary */
+#define SLOT_EVENT_LATCH	0x2
+#define SLOT_SERR_INT_MASK	0x3
+
+static irqreturn_t shpc_isr(int irq, void *dev_id);
+static void start_int_poll_timer(struct controller *ctrl, int sec);
+static int hpc_check_cmd_status(struct controller *ctrl);
+
+static inline u8 shpc_readb(struct controller *ctrl, int reg)
+{
+	return readb(ctrl->creg + reg);
+}
+
+static inline u16 shpc_readw(struct controller *ctrl, int reg)
+{
+	return readw(ctrl->creg + reg);
+}
+
+static inline void shpc_writew(struct controller *ctrl, int reg, u16 val)
+{
+	writew(val, ctrl->creg + reg);
+}
+
+static inline u32 shpc_readl(struct controller *ctrl, int reg)
+{
+	return readl(ctrl->creg + reg);
+}
+
+static inline void shpc_writel(struct controller *ctrl, int reg, u32 val)
+{
+	writel(val, ctrl->creg + reg);
+}
+
+static inline int shpc_indirect_read(struct controller *ctrl, int index,
+				     u32 *value)
+{
+	int rc;
+	u32 cap_offset = ctrl->cap_offset;
+	struct pci_dev *pdev = ctrl->pci_dev;
+
+	rc = pci_write_config_byte(pdev, cap_offset + DWORD_SELECT, index);
+	if (rc)
+		return rc;
+	return pci_read_config_dword(pdev, cap_offset + DWORD_DATA, value);
+}
+
+/*
+ * This is the interrupt polling timeout function.
+ */
+static void int_poll_timeout(struct timer_list *t)
+{
+	struct controller *ctrl = from_timer(ctrl, t, poll_timer);
+
+	/* Poll for interrupt events.  regs == NULL => polling */
+	shpc_isr(0, ctrl);
+
+	if (!shpchp_poll_time)
+		shpchp_poll_time = 2; /* default polling interval is 2 sec */
+
+	start_int_poll_timer(ctrl, shpchp_poll_time);
+}
+
+/*
+ * This function starts the interrupt polling timer.
+ */
+static void start_int_poll_timer(struct controller *ctrl, int sec)
+{
+	/* Clamp to sane value */
+	if ((sec <= 0) || (sec > 60))
+		sec = 2;
+
+	ctrl->poll_timer.expires = jiffies + sec * HZ;
+	add_timer(&ctrl->poll_timer);
+}
+
+static inline int is_ctrl_busy(struct controller *ctrl)
+{
+	u16 cmd_status = shpc_readw(ctrl, CMD_STATUS);
+	return cmd_status & 0x1;
+}
+
+/*
+ * Returns 1 if SHPC finishes executing a command within 1 sec,
+ * otherwise returns 0.
+ */
+static inline int shpc_poll_ctrl_busy(struct controller *ctrl)
+{
+	int i;
+
+	if (!is_ctrl_busy(ctrl))
+		return 1;
+
+	/* Check every 0.1 sec for a total of 1 sec */
+	for (i = 0; i < 10; i++) {
+		msleep(100);
+		if (!is_ctrl_busy(ctrl))
+			return 1;
+	}
+
+	return 0;
+}
+
+static inline int shpc_wait_cmd(struct controller *ctrl)
+{
+	int retval = 0;
+	unsigned long timeout = msecs_to_jiffies(1000);
+	int rc;
+
+	if (shpchp_poll_mode)
+		rc = shpc_poll_ctrl_busy(ctrl);
+	else
+		rc = wait_event_interruptible_timeout(ctrl->queue,
+						!is_ctrl_busy(ctrl), timeout);
+	if (!rc && is_ctrl_busy(ctrl)) {
+		retval = -EIO;
+		ctrl_err(ctrl, "Command not completed in 1000 msec\n");
+	} else if (rc < 0) {
+		retval = -EINTR;
+		ctrl_info(ctrl, "Command was interrupted by a signal\n");
+	}
+
+	return retval;
+}
+
+static int shpc_write_cmd(struct slot *slot, u8 t_slot, u8 cmd)
+{
+	struct controller *ctrl = slot->ctrl;
+	u16 cmd_status;
+	int retval = 0;
+	u16 temp_word;
+
+	mutex_lock(&slot->ctrl->cmd_lock);
+
+	if (!shpc_poll_ctrl_busy(ctrl)) {
+		/* After 1 sec and the controller is still busy */
+		ctrl_err(ctrl, "Controller is still busy after 1 sec\n");
+		retval = -EBUSY;
+		goto out;
+	}
+
+	++t_slot;
+	temp_word =  (t_slot << 8) | (cmd & 0xFF);
+	ctrl_dbg(ctrl, "%s: t_slot %x cmd %x\n", __func__, t_slot, cmd);
+
+	/* To make sure the Controller Busy bit is 0 before we send out the
+	 * command.
+	 */
+	shpc_writew(ctrl, CMD, temp_word);
+
+	/*
+	 * Wait for command completion.
+	 */
+	retval = shpc_wait_cmd(slot->ctrl);
+	if (retval)
+		goto out;
+
+	cmd_status = hpc_check_cmd_status(slot->ctrl);
+	if (cmd_status) {
+		ctrl_err(ctrl, "Failed to issued command 0x%x (error code = %d)\n",
+			 cmd, cmd_status);
+		retval = -EIO;
+	}
+ out:
+	mutex_unlock(&slot->ctrl->cmd_lock);
+	return retval;
+}
+
+static int hpc_check_cmd_status(struct controller *ctrl)
+{
+	int retval = 0;
+	u16 cmd_status = shpc_readw(ctrl, CMD_STATUS) & 0x000F;
+
+	switch (cmd_status >> 1) {
+	case 0:
+		retval = 0;
+		break;
+	case 1:
+		retval = SWITCH_OPEN;
+		ctrl_err(ctrl, "Switch opened!\n");
+		break;
+	case 2:
+		retval = INVALID_CMD;
+		ctrl_err(ctrl, "Invalid HPC command!\n");
+		break;
+	case 4:
+		retval = INVALID_SPEED_MODE;
+		ctrl_err(ctrl, "Invalid bus speed/mode!\n");
+		break;
+	default:
+		retval = cmd_status;
+	}
+
+	return retval;
+}
+
+
+static int hpc_get_attention_status(struct slot *slot, u8 *status)
+{
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+	u8 state = (slot_reg & ATN_LED_STATE_MASK) >> ATN_LED_STATE_SHIFT;
+
+	switch (state) {
+	case ATN_LED_STATE_ON:
+		*status = 1;	/* On */
+		break;
+	case ATN_LED_STATE_BLINK:
+		*status = 2;	/* Blink */
+		break;
+	case ATN_LED_STATE_OFF:
+		*status = 0;	/* Off */
+		break;
+	default:
+		*status = 0xFF;	/* Reserved */
+		break;
+	}
+
+	return 0;
+}
+
+static int hpc_get_power_status(struct slot *slot, u8 *status)
+{
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+	u8 state = (slot_reg & SLOT_STATE_MASK) >> SLOT_STATE_SHIFT;
+
+	switch (state) {
+	case SLOT_STATE_PWRONLY:
+		*status = 2;	/* Powered only */
+		break;
+	case SLOT_STATE_ENABLED:
+		*status = 1;	/* Enabled */
+		break;
+	case SLOT_STATE_DISABLED:
+		*status = 0;	/* Disabled */
+		break;
+	default:
+		*status = 0xFF;	/* Reserved */
+		break;
+	}
+
+	return 0;
+}
+
+
+static int hpc_get_latch_status(struct slot *slot, u8 *status)
+{
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+
+	*status = !!(slot_reg & MRL_SENSOR);	/* 0 -> close; 1 -> open */
+
+	return 0;
+}
+
+static int hpc_get_adapter_status(struct slot *slot, u8 *status)
+{
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+	u8 state = (slot_reg & PRSNT_MASK) >> PRSNT_SHIFT;
+
+	*status = (state != 0x3) ? 1 : 0;
+
+	return 0;
+}
+
+static int hpc_get_prog_int(struct slot *slot, u8 *prog_int)
+{
+	struct controller *ctrl = slot->ctrl;
+
+	*prog_int = shpc_readb(ctrl, PROG_INTERFACE);
+
+	return 0;
+}
+
+static int hpc_get_adapter_speed(struct slot *slot, enum pci_bus_speed *value)
+{
+	int retval = 0;
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+	u8 m66_cap  = !!(slot_reg & MHZ66_CAP);
+	u8 pi, pcix_cap;
+
+	retval = hpc_get_prog_int(slot, &pi);
+	if (retval)
+		return retval;
+
+	switch (pi) {
+	case 1:
+		pcix_cap = (slot_reg & PCIX_CAP_MASK_PI1) >> PCIX_CAP_SHIFT;
+		break;
+	case 2:
+		pcix_cap = (slot_reg & PCIX_CAP_MASK_PI2) >> PCIX_CAP_SHIFT;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	ctrl_dbg(ctrl, "%s: slot_reg = %x, pcix_cap = %x, m66_cap = %x\n",
+		 __func__, slot_reg, pcix_cap, m66_cap);
+
+	switch (pcix_cap) {
+	case 0x0:
+		*value = m66_cap ? PCI_SPEED_66MHz : PCI_SPEED_33MHz;
+		break;
+	case 0x1:
+		*value = PCI_SPEED_66MHz_PCIX;
+		break;
+	case 0x3:
+		*value = PCI_SPEED_133MHz_PCIX;
+		break;
+	case 0x4:
+		*value = PCI_SPEED_133MHz_PCIX_266;
+		break;
+	case 0x5:
+		*value = PCI_SPEED_133MHz_PCIX_533;
+		break;
+	case 0x2:
+	default:
+		*value = PCI_SPEED_UNKNOWN;
+		retval = -ENODEV;
+		break;
+	}
+
+	ctrl_dbg(ctrl, "Adapter speed = %d\n", *value);
+	return retval;
+}
+
+static int hpc_query_power_fault(struct slot *slot)
+{
+	struct controller *ctrl = slot->ctrl;
+	u32 slot_reg = shpc_readl(ctrl, SLOT_REG(slot->hp_slot));
+
+	/* Note: Logic 0 => fault */
+	return !(slot_reg & POWER_FAULT);
+}
+
+static int hpc_set_attention_status(struct slot *slot, u8 value)
+{
+	u8 slot_cmd = 0;
+
+	switch (value) {
+		case 0:
+			slot_cmd = SET_ATTN_OFF;	/* OFF */
+			break;
+		case 1:
+			slot_cmd = SET_ATTN_ON;		/* ON */
+			break;
+		case 2:
+			slot_cmd = SET_ATTN_BLINK;	/* BLINK */
+			break;
+		default:
+			return -1;
+	}
+
+	return shpc_write_cmd(slot, slot->hp_slot, slot_cmd);
+}
+
+
+static void hpc_set_green_led_on(struct slot *slot)
+{
+	shpc_write_cmd(slot, slot->hp_slot, SET_PWR_ON);
+}
+
+static void hpc_set_green_led_off(struct slot *slot)
+{
+	shpc_write_cmd(slot, slot->hp_slot, SET_PWR_OFF);
+}
+
+static void hpc_set_green_led_blink(struct slot *slot)
+{
+	shpc_write_cmd(slot, slot->hp_slot, SET_PWR_BLINK);
+}
+
+static void hpc_release_ctlr(struct controller *ctrl)
+{
+	int i;
+	u32 slot_reg, serr_int;
+
+	/*
+	 * Mask event interrupts and SERRs of all slots
+	 */
+	for (i = 0; i < ctrl->num_slots; i++) {
+		slot_reg = shpc_readl(ctrl, SLOT_REG(i));
+		slot_reg |= (PRSNT_CHANGE_INTR_MASK | ISO_PFAULT_INTR_MASK |
+			     BUTTON_PRESS_INTR_MASK | MRL_CHANGE_INTR_MASK |
+			     CON_PFAULT_INTR_MASK   | MRL_CHANGE_SERR_MASK |
+			     CON_PFAULT_SERR_MASK);
+		slot_reg &= ~SLOT_REG_RSVDZ_MASK;
+		shpc_writel(ctrl, SLOT_REG(i), slot_reg);
+	}
+
+	cleanup_slots(ctrl);
+
+	/*
+	 * Mask SERR and System Interrupt generation
+	 */
+	serr_int = shpc_readl(ctrl, SERR_INTR_ENABLE);
+	serr_int |= (GLOBAL_INTR_MASK  | GLOBAL_SERR_MASK |
+		     COMMAND_INTR_MASK | ARBITER_SERR_MASK);
+	serr_int &= ~SERR_INTR_RSVDZ_MASK;
+	shpc_writel(ctrl, SERR_INTR_ENABLE, serr_int);
+
+	if (shpchp_poll_mode)
+		del_timer(&ctrl->poll_timer);
+	else {
+		free_irq(ctrl->pci_dev->irq, ctrl);
+		pci_disable_msi(ctrl->pci_dev);
+	}
+
+	iounmap(ctrl->creg);
+	release_mem_region(ctrl->mmio_base, ctrl->mmio_size);
+}
+
+static int hpc_power_on_slot(struct slot *slot)
+{
+	int retval;
+
+	retval = shpc_write_cmd(slot, slot->hp_slot, SET_SLOT_PWR);
+	if (retval)
+		ctrl_err(slot->ctrl, "%s: Write command failed!\n", __func__);
+
+	return retval;
+}
+
+static int hpc_slot_enable(struct slot *slot)
+{
+	int retval;
+
+	/* Slot - Enable, Power Indicator - Blink, Attention Indicator - Off */
+	retval = shpc_write_cmd(slot, slot->hp_slot,
+			SET_SLOT_ENABLE | SET_PWR_BLINK | SET_ATTN_OFF);
+	if (retval)
+		ctrl_err(slot->ctrl, "%s: Write command failed!\n", __func__);
+
+	return retval;
+}
+
+static int hpc_slot_disable(struct slot *slot)
+{
+	int retval;
+
+	/* Slot - Disable, Power Indicator - Off, Attention Indicator - On */
+	retval = shpc_write_cmd(slot, slot->hp_slot,
+			SET_SLOT_DISABLE | SET_PWR_OFF | SET_ATTN_ON);
+	if (retval)
+		ctrl_err(slot->ctrl, "%s: Write command failed!\n", __func__);
+
+	return retval;
+}
+
+static int shpc_get_cur_bus_speed(struct controller *ctrl)
+{
+	int retval = 0;
+	struct pci_bus *bus = ctrl->pci_dev->subordinate;
+	enum pci_bus_speed bus_speed = PCI_SPEED_UNKNOWN;
+	u16 sec_bus_reg = shpc_readw(ctrl, SEC_BUS_CONFIG);
+	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
+	u8 speed_mode = (pi == 2) ? (sec_bus_reg & 0xF) : (sec_bus_reg & 0x7);
+
+	if ((pi == 1) && (speed_mode > 4)) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	switch (speed_mode) {
+	case 0x0:
+		bus_speed = PCI_SPEED_33MHz;
+		break;
+	case 0x1:
+		bus_speed = PCI_SPEED_66MHz;
+		break;
+	case 0x2:
+		bus_speed = PCI_SPEED_66MHz_PCIX;
+		break;
+	case 0x3:
+		bus_speed = PCI_SPEED_100MHz_PCIX;
+		break;
+	case 0x4:
+		bus_speed = PCI_SPEED_133MHz_PCIX;
+		break;
+	case 0x5:
+		bus_speed = PCI_SPEED_66MHz_PCIX_ECC;
+		break;
+	case 0x6:
+		bus_speed = PCI_SPEED_100MHz_PCIX_ECC;
+		break;
+	case 0x7:
+		bus_speed = PCI_SPEED_133MHz_PCIX_ECC;
+		break;
+	case 0x8:
+		bus_speed = PCI_SPEED_66MHz_PCIX_266;
+		break;
+	case 0x9:
+		bus_speed = PCI_SPEED_100MHz_PCIX_266;
+		break;
+	case 0xa:
+		bus_speed = PCI_SPEED_133MHz_PCIX_266;
+		break;
+	case 0xb:
+		bus_speed = PCI_SPEED_66MHz_PCIX_533;
+		break;
+	case 0xc:
+		bus_speed = PCI_SPEED_100MHz_PCIX_533;
+		break;
+	case 0xd:
+		bus_speed = PCI_SPEED_133MHz_PCIX_533;
+		break;
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+ out:
+	bus->cur_bus_speed = bus_speed;
+	dbg("Current bus speed = %d\n", bus_speed);
+	return retval;
+}
+
+
+static int hpc_set_bus_speed_mode(struct slot *slot, enum pci_bus_speed value)
+{
+	int retval;
+	struct controller *ctrl = slot->ctrl;
+	u8 pi, cmd;
+
+	pi = shpc_readb(ctrl, PROG_INTERFACE);
+	if ((pi == 1) && (value > PCI_SPEED_133MHz_PCIX))
+		return -EINVAL;
+
+	switch (value) {
+	case PCI_SPEED_33MHz:
+		cmd = SETA_PCI_33MHZ;
+		break;
+	case PCI_SPEED_66MHz:
+		cmd = SETA_PCI_66MHZ;
+		break;
+	case PCI_SPEED_66MHz_PCIX:
+		cmd = SETA_PCIX_66MHZ;
+		break;
+	case PCI_SPEED_100MHz_PCIX:
+		cmd = SETA_PCIX_100MHZ;
+		break;
+	case PCI_SPEED_133MHz_PCIX:
+		cmd = SETA_PCIX_133MHZ;
+		break;
+	case PCI_SPEED_66MHz_PCIX_ECC:
+		cmd = SETB_PCIX_66MHZ_EM;
+		break;
+	case PCI_SPEED_100MHz_PCIX_ECC:
+		cmd = SETB_PCIX_100MHZ_EM;
+		break;
+	case PCI_SPEED_133MHz_PCIX_ECC:
+		cmd = SETB_PCIX_133MHZ_EM;
+		break;
+	case PCI_SPEED_66MHz_PCIX_266:
+		cmd = SETB_PCIX_66MHZ_266;
+		break;
+	case PCI_SPEED_100MHz_PCIX_266:
+		cmd = SETB_PCIX_100MHZ_266;
+		break;
+	case PCI_SPEED_133MHz_PCIX_266:
+		cmd = SETB_PCIX_133MHZ_266;
+		break;
+	case PCI_SPEED_66MHz_PCIX_533:
+		cmd = SETB_PCIX_66MHZ_533;
+		break;
+	case PCI_SPEED_100MHz_PCIX_533:
+		cmd = SETB_PCIX_100MHZ_533;
+		break;
+	case PCI_SPEED_133MHz_PCIX_533:
+		cmd = SETB_PCIX_133MHZ_533;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	retval = shpc_write_cmd(slot, 0, cmd);
+	if (retval)
+		ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
+	else
+		shpc_get_cur_bus_speed(ctrl);
+
+	return retval;
+}
+
+static irqreturn_t shpc_isr(int irq, void *dev_id)
+{
+	struct controller *ctrl = (struct controller *)dev_id;
+	u32 serr_int, slot_reg, intr_loc, intr_loc2;
+	int hp_slot;
+
+	/* Check to see if it was our interrupt */
+	intr_loc = shpc_readl(ctrl, INTR_LOC);
+	if (!intr_loc)
+		return IRQ_NONE;
+
+	ctrl_dbg(ctrl, "%s: intr_loc = %x\n", __func__, intr_loc);
+
+	if (!shpchp_poll_mode) {
+		/*
+		 * Mask Global Interrupt Mask - see implementation
+		 * note on p. 139 of SHPC spec rev 1.0
+		 */
+		serr_int = shpc_readl(ctrl, SERR_INTR_ENABLE);
+		serr_int |= GLOBAL_INTR_MASK;
+		serr_int &= ~SERR_INTR_RSVDZ_MASK;
+		shpc_writel(ctrl, SERR_INTR_ENABLE, serr_int);
+
+		intr_loc2 = shpc_readl(ctrl, INTR_LOC);
+		ctrl_dbg(ctrl, "%s: intr_loc2 = %x\n", __func__, intr_loc2);
+	}
+
+	if (intr_loc & CMD_INTR_PENDING) {
+		/*
+		 * Command Complete Interrupt Pending
+		 * RO only - clear by writing 1 to the Command Completion
+		 * Detect bit in Controller SERR-INT register
+		 */
+		serr_int = shpc_readl(ctrl, SERR_INTR_ENABLE);
+		serr_int &= ~SERR_INTR_RSVDZ_MASK;
+		shpc_writel(ctrl, SERR_INTR_ENABLE, serr_int);
+
+		wake_up_interruptible(&ctrl->queue);
+	}
+
+	if (!(intr_loc & ~CMD_INTR_PENDING))
+		goto out;
+
+	for (hp_slot = 0; hp_slot < ctrl->num_slots; hp_slot++) {
+		/* To find out which slot has interrupt pending */
+		if (!(intr_loc & SLOT_INTR_PENDING(hp_slot)))
+			continue;
+
+		slot_reg = shpc_readl(ctrl, SLOT_REG(hp_slot));
+		ctrl_dbg(ctrl, "Slot %x with intr, slot register = %x\n",
+			 hp_slot, slot_reg);
+
+		if (slot_reg & MRL_CHANGE_DETECTED)
+			shpchp_handle_switch_change(hp_slot, ctrl);
+
+		if (slot_reg & BUTTON_PRESS_DETECTED)
+			shpchp_handle_attention_button(hp_slot, ctrl);
+
+		if (slot_reg & PRSNT_CHANGE_DETECTED)
+			shpchp_handle_presence_change(hp_slot, ctrl);
+
+		if (slot_reg & (ISO_PFAULT_DETECTED | CON_PFAULT_DETECTED))
+			shpchp_handle_power_fault(hp_slot, ctrl);
+
+		/* Clear all slot events */
+		slot_reg &= ~SLOT_REG_RSVDZ_MASK;
+		shpc_writel(ctrl, SLOT_REG(hp_slot), slot_reg);
+	}
+ out:
+	if (!shpchp_poll_mode) {
+		/* Unmask Global Interrupt Mask */
+		serr_int = shpc_readl(ctrl, SERR_INTR_ENABLE);
+		serr_int &= ~(GLOBAL_INTR_MASK | SERR_INTR_RSVDZ_MASK);
+		shpc_writel(ctrl, SERR_INTR_ENABLE, serr_int);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int shpc_get_max_bus_speed(struct controller *ctrl)
+{
+	int retval = 0;
+	struct pci_bus *bus = ctrl->pci_dev->subordinate;
+	enum pci_bus_speed bus_speed = PCI_SPEED_UNKNOWN;
+	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
+	u32 slot_avail1 = shpc_readl(ctrl, SLOT_AVAIL1);
+	u32 slot_avail2 = shpc_readl(ctrl, SLOT_AVAIL2);
+
+	if (pi == 2) {
+		if (slot_avail2 & SLOT_133MHZ_PCIX_533)
+			bus_speed = PCI_SPEED_133MHz_PCIX_533;
+		else if (slot_avail2 & SLOT_100MHZ_PCIX_533)
+			bus_speed = PCI_SPEED_100MHz_PCIX_533;
+		else if (slot_avail2 & SLOT_66MHZ_PCIX_533)
+			bus_speed = PCI_SPEED_66MHz_PCIX_533;
+		else if (slot_avail2 & SLOT_133MHZ_PCIX_266)
+			bus_speed = PCI_SPEED_133MHz_PCIX_266;
+		else if (slot_avail2 & SLOT_100MHZ_PCIX_266)
+			bus_speed = PCI_SPEED_100MHz_PCIX_266;
+		else if (slot_avail2 & SLOT_66MHZ_PCIX_266)
+			bus_speed = PCI_SPEED_66MHz_PCIX_266;
+	}
+
+	if (bus_speed == PCI_SPEED_UNKNOWN) {
+		if (slot_avail1 & SLOT_133MHZ_PCIX)
+			bus_speed = PCI_SPEED_133MHz_PCIX;
+		else if (slot_avail1 & SLOT_100MHZ_PCIX)
+			bus_speed = PCI_SPEED_100MHz_PCIX;
+		else if (slot_avail1 & SLOT_66MHZ_PCIX)
+			bus_speed = PCI_SPEED_66MHz_PCIX;
+		else if (slot_avail2 & SLOT_66MHZ)
+			bus_speed = PCI_SPEED_66MHz;
+		else if (slot_avail1 & SLOT_33MHZ)
+			bus_speed = PCI_SPEED_33MHz;
+		else
+			retval = -ENODEV;
+	}
+
+	bus->max_bus_speed = bus_speed;
+	ctrl_dbg(ctrl, "Max bus speed = %d\n", bus_speed);
+
+	return retval;
+}
+
+static const struct hpc_ops shpchp_hpc_ops = {
+	.power_on_slot			= hpc_power_on_slot,
+	.slot_enable			= hpc_slot_enable,
+	.slot_disable			= hpc_slot_disable,
+	.set_bus_speed_mode		= hpc_set_bus_speed_mode,
+	.set_attention_status	= hpc_set_attention_status,
+	.get_power_status		= hpc_get_power_status,
+	.get_attention_status	= hpc_get_attention_status,
+	.get_latch_status		= hpc_get_latch_status,
+	.get_adapter_status		= hpc_get_adapter_status,
+
+	.get_adapter_speed		= hpc_get_adapter_speed,
+	.get_prog_int			= hpc_get_prog_int,
+
+	.query_power_fault		= hpc_query_power_fault,
+	.green_led_on			= hpc_set_green_led_on,
+	.green_led_off			= hpc_set_green_led_off,
+	.green_led_blink		= hpc_set_green_led_blink,
+
+	.release_ctlr			= hpc_release_ctlr,
+};
+
+int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
+{
+	int rc = -1, num_slots = 0;
+	u8 hp_slot;
+	u32 shpc_base_offset;
+	u32 tempdword, slot_reg, slot_config;
+	u8 i;
+
+	ctrl->pci_dev = pdev;  /* pci_dev of the P2P bridge */
+	ctrl_dbg(ctrl, "Hotplug Controller:\n");
+
+	if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+	    pdev->device == PCI_DEVICE_ID_AMD_GOLAM_7450) {
+		/* amd shpc driver doesn't use Base Offset; assume 0 */
+		ctrl->mmio_base = pci_resource_start(pdev, 0);
+		ctrl->mmio_size = pci_resource_len(pdev, 0);
+	} else {
+		ctrl->cap_offset = pci_find_capability(pdev, PCI_CAP_ID_SHPC);
+		if (!ctrl->cap_offset) {
+			ctrl_err(ctrl, "Cannot find PCI capability\n");
+			goto abort;
+		}
+		ctrl_dbg(ctrl, " cap_offset = %x\n", ctrl->cap_offset);
+
+		rc = shpc_indirect_read(ctrl, 0, &shpc_base_offset);
+		if (rc) {
+			ctrl_err(ctrl, "Cannot read base_offset\n");
+			goto abort;
+		}
+
+		rc = shpc_indirect_read(ctrl, 3, &tempdword);
+		if (rc) {
+			ctrl_err(ctrl, "Cannot read slot config\n");
+			goto abort;
+		}
+		num_slots = tempdword & SLOT_NUM;
+		ctrl_dbg(ctrl, " num_slots (indirect) %x\n", num_slots);
+
+		for (i = 0; i < 9 + num_slots; i++) {
+			rc = shpc_indirect_read(ctrl, i, &tempdword);
+			if (rc) {
+				ctrl_err(ctrl, "Cannot read creg (index = %d)\n",
+					 i);
+				goto abort;
+			}
+			ctrl_dbg(ctrl, " offset %d: value %x\n", i, tempdword);
+		}
+
+		ctrl->mmio_base =
+			pci_resource_start(pdev, 0) + shpc_base_offset;
+		ctrl->mmio_size = 0x24 + 0x4 * num_slots;
+	}
+
+	ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+		  pdev->vendor, pdev->device, pdev->subsystem_vendor,
+		  pdev->subsystem_device);
+
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		ctrl_err(ctrl, "pci_enable_device failed\n");
+		goto abort;
+	}
+
+	if (!request_mem_region(ctrl->mmio_base, ctrl->mmio_size, MY_NAME)) {
+		ctrl_err(ctrl, "Cannot reserve MMIO region\n");
+		rc = -1;
+		goto abort;
+	}
+
+	ctrl->creg = ioremap(ctrl->mmio_base, ctrl->mmio_size);
+	if (!ctrl->creg) {
+		ctrl_err(ctrl, "Cannot remap MMIO region %lx @ %lx\n",
+			 ctrl->mmio_size, ctrl->mmio_base);
+		release_mem_region(ctrl->mmio_base, ctrl->mmio_size);
+		rc = -1;
+		goto abort;
+	}
+	ctrl_dbg(ctrl, "ctrl->creg %p\n", ctrl->creg);
+
+	mutex_init(&ctrl->crit_sect);
+	mutex_init(&ctrl->cmd_lock);
+
+	/* Setup wait queue */
+	init_waitqueue_head(&ctrl->queue);
+
+	ctrl->hpc_ops = &shpchp_hpc_ops;
+
+	/* Return PCI Controller Info */
+	slot_config = shpc_readl(ctrl, SLOT_CONFIG);
+	ctrl->slot_device_offset = (slot_config & FIRST_DEV_NUM) >> 8;
+	ctrl->num_slots = slot_config & SLOT_NUM;
+	ctrl->first_slot = (slot_config & PSN) >> 16;
+	ctrl->slot_num_inc = ((slot_config & UPDOWN) >> 29) ? 1 : -1;
+
+	/* Mask Global Interrupt Mask & Command Complete Interrupt Mask */
+	tempdword = shpc_readl(ctrl, SERR_INTR_ENABLE);
+	ctrl_dbg(ctrl, "SERR_INTR_ENABLE = %x\n", tempdword);
+	tempdword |= (GLOBAL_INTR_MASK  | GLOBAL_SERR_MASK |
+		      COMMAND_INTR_MASK | ARBITER_SERR_MASK);
+	tempdword &= ~SERR_INTR_RSVDZ_MASK;
+	shpc_writel(ctrl, SERR_INTR_ENABLE, tempdword);
+	tempdword = shpc_readl(ctrl, SERR_INTR_ENABLE);
+	ctrl_dbg(ctrl, "SERR_INTR_ENABLE = %x\n", tempdword);
+
+	/* Mask the MRL sensor SERR Mask of individual slot in
+	 * Slot SERR-INT Mask & clear all the existing event if any
+	 */
+	for (hp_slot = 0; hp_slot < ctrl->num_slots; hp_slot++) {
+		slot_reg = shpc_readl(ctrl, SLOT_REG(hp_slot));
+		ctrl_dbg(ctrl, "Default Logical Slot Register %d value %x\n",
+			 hp_slot, slot_reg);
+		slot_reg |= (PRSNT_CHANGE_INTR_MASK | ISO_PFAULT_INTR_MASK |
+			     BUTTON_PRESS_INTR_MASK | MRL_CHANGE_INTR_MASK |
+			     CON_PFAULT_INTR_MASK   | MRL_CHANGE_SERR_MASK |
+			     CON_PFAULT_SERR_MASK);
+		slot_reg &= ~SLOT_REG_RSVDZ_MASK;
+		shpc_writel(ctrl, SLOT_REG(hp_slot), slot_reg);
+	}
+
+	if (shpchp_poll_mode) {
+		/* Install interrupt polling timer. Start with 10 sec delay */
+		timer_setup(&ctrl->poll_timer, int_poll_timeout, 0);
+		start_int_poll_timer(ctrl, 10);
+	} else {
+		/* Installs the interrupt handler */
+		rc = pci_enable_msi(pdev);
+		if (rc) {
+			ctrl_info(ctrl, "Can't get msi for the hotplug controller\n");
+			ctrl_info(ctrl, "Use INTx for the hotplug controller\n");
+		} else {
+			pci_set_master(pdev);
+		}
+
+		rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED,
+				 MY_NAME, (void *)ctrl);
+		ctrl_dbg(ctrl, "request_irq %d (returns %d)\n",
+			 ctrl->pci_dev->irq, rc);
+		if (rc) {
+			ctrl_err(ctrl, "Can't get irq %d for the hotplug controller\n",
+				 ctrl->pci_dev->irq);
+			goto abort_iounmap;
+		}
+	}
+	ctrl_dbg(ctrl, "HPC at %s irq=%x\n", pci_name(pdev), pdev->irq);
+
+	shpc_get_max_bus_speed(ctrl);
+	shpc_get_cur_bus_speed(ctrl);
+
+	/*
+	 * Unmask all event interrupts of all slots
+	 */
+	for (hp_slot = 0; hp_slot < ctrl->num_slots; hp_slot++) {
+		slot_reg = shpc_readl(ctrl, SLOT_REG(hp_slot));
+		ctrl_dbg(ctrl, "Default Logical Slot Register %d value %x\n",
+			 hp_slot, slot_reg);
+		slot_reg &= ~(PRSNT_CHANGE_INTR_MASK | ISO_PFAULT_INTR_MASK |
+			      BUTTON_PRESS_INTR_MASK | MRL_CHANGE_INTR_MASK |
+			      CON_PFAULT_INTR_MASK | SLOT_REG_RSVDZ_MASK);
+		shpc_writel(ctrl, SLOT_REG(hp_slot), slot_reg);
+	}
+	if (!shpchp_poll_mode) {
+		/* Unmask all general input interrupts and SERR */
+		tempdword = shpc_readl(ctrl, SERR_INTR_ENABLE);
+		tempdword &= ~(GLOBAL_INTR_MASK | COMMAND_INTR_MASK |
+			       SERR_INTR_RSVDZ_MASK);
+		shpc_writel(ctrl, SERR_INTR_ENABLE, tempdword);
+		tempdword = shpc_readl(ctrl, SERR_INTR_ENABLE);
+		ctrl_dbg(ctrl, "SERR_INTR_ENABLE = %x\n", tempdword);
+	}
+
+	return 0;
+
+	/* We end up here for the many possible ways to fail this API.  */
+abort_iounmap:
+	iounmap(ctrl->creg);
+abort:
+	return rc;
+}
diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c
new file mode 100644
index 000000000..36db0c3c4
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp_pci.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Standard Hot Plug Controller Driver
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "../pci.h"
+#include "shpchp.h"
+
+int shpchp_configure_device(struct slot *p_slot)
+{
+	struct pci_dev *dev;
+	struct controller *ctrl = p_slot->ctrl;
+	struct pci_dev *bridge = ctrl->pci_dev;
+	struct pci_bus *parent = bridge->subordinate;
+	int num, ret = 0;
+
+	pci_lock_rescan_remove();
+
+	dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
+	if (dev) {
+		ctrl_err(ctrl, "Device %s already exists at %04x:%02x:%02x, cannot hot-add\n",
+			 pci_name(dev), pci_domain_nr(parent),
+			 p_slot->bus, p_slot->device);
+		pci_dev_put(dev);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
+	if (num == 0) {
+		ctrl_err(ctrl, "No new device found\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	for_each_pci_bridge(dev, parent) {
+		if (PCI_SLOT(dev->devfn) == p_slot->device)
+			pci_hp_add_bridge(dev);
+	}
+
+	pci_assign_unassigned_bridge_resources(bridge);
+	pcie_bus_configure_settings(parent);
+	pci_bus_add_devices(parent);
+
+ out:
+	pci_unlock_rescan_remove();
+	return ret;
+}
+
+void shpchp_unconfigure_device(struct slot *p_slot)
+{
+	struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+	struct pci_dev *dev, *temp;
+	struct controller *ctrl = p_slot->ctrl;
+
+	ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n",
+		 __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device);
+
+	pci_lock_rescan_remove();
+
+	list_for_each_entry_safe(dev, temp, &parent->devices, bus_list) {
+		if (PCI_SLOT(dev->devfn) != p_slot->device)
+			continue;
+
+		pci_dev_get(dev);
+		pci_stop_and_remove_bus_device(dev);
+		pci_dev_put(dev);
+	}
+
+	pci_unlock_rescan_remove();
+}
diff --git a/drivers/pci/hotplug/shpchp_sysfs.c b/drivers/pci/hotplug/shpchp_sysfs.c
new file mode 100644
index 000000000..01d47a42d
--- /dev/null
+++ b/drivers/pci/hotplug/shpchp_sysfs.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Compaq Hot Plug Controller Driver
+ *
+ * Copyright (c) 1995,2001 Compaq Computer Corporation
+ * Copyright (c) 2001,2003 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (c) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * Send feedback to <greg@kroah.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "shpchp.h"
+
+
+/* A few routines that create sysfs entries for the hot plug controller */
+
+static ssize_t show_ctrl(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev;
+	struct resource *res;
+	struct pci_bus *bus;
+	size_t len = 0;
+	int busnr;
+
+	pdev = to_pci_dev(dev);
+	bus = pdev->subordinate;
+
+	len += sysfs_emit_at(buf, len, "Free resources: memory\n");
+	pci_bus_for_each_resource(bus, res) {
+		if (res && (res->flags & IORESOURCE_MEM) &&
+				!(res->flags & IORESOURCE_PREFETCH)) {
+			len += sysfs_emit_at(buf, len,
+					     "start = %8.8llx, length = %8.8llx\n",
+					     (unsigned long long)res->start,
+					     (unsigned long long)resource_size(res));
+		}
+	}
+	len += sysfs_emit_at(buf, len, "Free resources: prefetchable memory\n");
+	pci_bus_for_each_resource(bus, res) {
+		if (res && (res->flags & IORESOURCE_MEM) &&
+			       (res->flags & IORESOURCE_PREFETCH)) {
+			len += sysfs_emit_at(buf, len,
+					     "start = %8.8llx, length = %8.8llx\n",
+					     (unsigned long long)res->start,
+					     (unsigned long long)resource_size(res));
+		}
+	}
+	len += sysfs_emit_at(buf, len, "Free resources: IO\n");
+	pci_bus_for_each_resource(bus, res) {
+		if (res && (res->flags & IORESOURCE_IO)) {
+			len += sysfs_emit_at(buf, len,
+					     "start = %8.8llx, length = %8.8llx\n",
+					     (unsigned long long)res->start,
+					     (unsigned long long)resource_size(res));
+		}
+	}
+	len += sysfs_emit_at(buf, len, "Free resources: bus numbers\n");
+	for (busnr = bus->busn_res.start; busnr <= bus->busn_res.end; busnr++) {
+		if (!pci_find_bus(pci_domain_nr(bus), busnr))
+			break;
+	}
+	if (busnr < bus->busn_res.end)
+		len += sysfs_emit_at(buf, len,
+				     "start = %8.8x, length = %8.8x\n",
+				     busnr, (int)(bus->busn_res.end - busnr));
+
+	return len;
+}
+static DEVICE_ATTR(ctrl, S_IRUGO, show_ctrl, NULL);
+
+int shpchp_create_ctrl_files(struct controller *ctrl)
+{
+	return device_create_file(&ctrl->pci_dev->dev, &dev_attr_ctrl);
+}
+
+void shpchp_remove_ctrl_files(struct controller *ctrl)
+{
+	device_remove_file(&ctrl->pci_dev->dev, &dev_attr_ctrl);
+}
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
new file mode 100644
index 000000000..25dbe85c4
--- /dev/null
+++ b/drivers/pci/iov.c
@@ -0,0 +1,1228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Express I/O Virtualization (IOV) support
+ *   Single Root IOV 1.0
+ *   Address Translation Service 1.0
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+#define VIRTFN_ID_LEN	17	/* "virtfn%u\0" for 2^32 - 1 */
+
+int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
+{
+	if (!dev->is_physfn)
+		return -EINVAL;
+	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+				    dev->sriov->stride * vf_id) >> 8);
+}
+
+int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
+{
+	if (!dev->is_physfn)
+		return -EINVAL;
+	return (dev->devfn + dev->sriov->offset +
+		dev->sriov->stride * vf_id) & 0xff;
+}
+EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
+
+int pci_iov_vf_id(struct pci_dev *dev)
+{
+	struct pci_dev *pf;
+
+	if (!dev->is_virtfn)
+		return -EINVAL;
+
+	pf = pci_physfn(dev);
+	return (pci_dev_id(dev) - (pci_dev_id(pf) + pf->sriov->offset)) /
+	       pf->sriov->stride;
+}
+EXPORT_SYMBOL_GPL(pci_iov_vf_id);
+
+/**
+ * pci_iov_get_pf_drvdata - Return the drvdata of a PF
+ * @dev: VF pci_dev
+ * @pf_driver: Device driver required to own the PF
+ *
+ * This must be called from a context that ensures that a VF driver is attached.
+ * The value returned is invalid once the VF driver completes its remove()
+ * callback.
+ *
+ * Locking is achieved by the driver core. A VF driver cannot be probed until
+ * pci_enable_sriov() is called and pci_disable_sriov() does not return until
+ * all VF drivers have completed their remove().
+ *
+ * The PF driver must call pci_disable_sriov() before it begins to destroy the
+ * drvdata.
+ */
+void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver)
+{
+	struct pci_dev *pf_dev;
+
+	if (!dev->is_virtfn)
+		return ERR_PTR(-EINVAL);
+	pf_dev = dev->physfn;
+	if (pf_dev->driver != pf_driver)
+		return ERR_PTR(-EINVAL);
+	return pci_get_drvdata(pf_dev);
+}
+EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata);
+
+/*
+ * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
+ * change when NumVFs changes.
+ *
+ * Update iov->offset and iov->stride when NumVFs is written.
+ */
+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
+}
+
+/*
+ * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
+ * determine how many additional bus numbers will be consumed by VFs.
+ *
+ * Iterate over all valid NumVFs, validate offset and stride, and calculate
+ * the maximum number of bus numbers that could ever be required.
+ */
+static int compute_max_vf_buses(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+	int nr_virtfn, busnr, rc = 0;
+
+	for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
+		pci_iov_set_numvfs(dev, nr_virtfn);
+		if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
+			rc = -EIO;
+			goto out;
+		}
+
+		busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
+		if (busnr > iov->max_VF_buses)
+			iov->max_VF_buses = busnr;
+	}
+
+out:
+	pci_iov_set_numvfs(dev, 0);
+	return rc;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return bus;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	if (child)
+		return child;
+
+	child = pci_add_new_bus(bus, NULL, busnr);
+	if (!child)
+		return NULL;
+
+	pci_bus_insert_busn_res(child, busnr, busnr);
+
+	return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
+{
+	if (physbus != virtbus && list_empty(&virtbus->devices))
+		pci_remove_bus(virtbus);
+}
+
+resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
+{
+	if (!dev->is_physfn)
+		return 0;
+
+	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
+}
+
+static void pci_read_vf_config_common(struct pci_dev *virtfn)
+{
+	struct pci_dev *physfn = virtfn->physfn;
+
+	/*
+	 * Some config registers are the same across all associated VFs.
+	 * Read them once from VF0 so we can skip reading them from the
+	 * other VFs.
+	 *
+	 * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
+	 * have the same Revision ID and Subsystem ID, but we assume they
+	 * do.
+	 */
+	pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
+			      &physfn->sriov->class);
+	pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
+			     &physfn->sriov->hdr_type);
+	pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
+			     &physfn->sriov->subsystem_vendor);
+	pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
+			     &physfn->sriov->subsystem_device);
+}
+
+int pci_iov_sysfs_link(struct pci_dev *dev,
+		struct pci_dev *virtfn, int id)
+{
+	char buf[VIRTFN_ID_LEN];
+	int rc;
+
+	sprintf(buf, "virtfn%u", id);
+	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+	if (rc)
+		goto failed;
+	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+	if (rc)
+		goto failed1;
+
+	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+	return 0;
+
+failed1:
+	sysfs_remove_link(&dev->dev.kobj, buf);
+failed:
+	return rc;
+}
+
+#ifdef CONFIG_PCI_MSI
+static ssize_t sriov_vf_total_msix_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u32 vf_total_msix = 0;
+
+	device_lock(dev);
+	if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
+		goto unlock;
+
+	vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
+unlock:
+	device_unlock(dev);
+	return sysfs_emit(buf, "%u\n", vf_total_msix);
+}
+static DEVICE_ATTR_RO(sriov_vf_total_msix);
+
+static ssize_t sriov_vf_msix_count_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct pci_dev *vf_dev = to_pci_dev(dev);
+	struct pci_dev *pdev = pci_physfn(vf_dev);
+	int val, ret = 0;
+
+	if (kstrtoint(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val < 0)
+		return -EINVAL;
+
+	device_lock(&pdev->dev);
+	if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
+		ret = -EOPNOTSUPP;
+		goto err_pdev;
+	}
+
+	device_lock(&vf_dev->dev);
+	if (vf_dev->driver) {
+		/*
+		 * A driver is already attached to this VF and has configured
+		 * itself based on the current MSI-X vector count. Changing
+		 * the vector size could mess up the driver, so block it.
+		 */
+		ret = -EBUSY;
+		goto err_dev;
+	}
+
+	ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
+
+err_dev:
+	device_unlock(&vf_dev->dev);
+err_pdev:
+	device_unlock(&pdev->dev);
+	return ret ? : count;
+}
+static DEVICE_ATTR_WO(sriov_vf_msix_count);
+#endif
+
+static struct attribute *sriov_vf_dev_attrs[] = {
+#ifdef CONFIG_PCI_MSI
+	&dev_attr_sriov_vf_msix_count.attr,
+#endif
+	NULL,
+};
+
+static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pdev->is_virtfn)
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group sriov_vf_dev_attr_group = {
+	.attrs = sriov_vf_dev_attrs,
+	.is_visible = sriov_vf_attrs_are_visible,
+};
+
+int pci_iov_add_virtfn(struct pci_dev *dev, int id)
+{
+	int i;
+	int rc = -ENOMEM;
+	u64 size;
+	struct pci_dev *virtfn;
+	struct resource *res;
+	struct pci_sriov *iov = dev->sriov;
+	struct pci_bus *bus;
+
+	bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
+	if (!bus)
+		goto failed;
+
+	virtfn = pci_alloc_dev(bus);
+	if (!virtfn)
+		goto failed0;
+
+	virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
+	virtfn->vendor = dev->vendor;
+	virtfn->device = iov->vf_device;
+	virtfn->is_virtfn = 1;
+	virtfn->physfn = pci_dev_get(dev);
+	virtfn->no_command_memory = 1;
+
+	if (id == 0)
+		pci_read_vf_config_common(virtfn);
+
+	rc = pci_setup_device(virtfn);
+	if (rc)
+		goto failed1;
+
+	virtfn->dev.parent = dev->dev.parent;
+	virtfn->multifunction = 0;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->parent)
+			continue;
+		virtfn->resource[i].name = pci_name(virtfn);
+		virtfn->resource[i].flags = res->flags;
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		virtfn->resource[i].start = res->start + size * id;
+		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+		rc = request_resource(res, &virtfn->resource[i]);
+		BUG_ON(rc);
+	}
+
+	pci_device_add(virtfn, virtfn->bus);
+	rc = pci_iov_sysfs_link(dev, virtfn, id);
+	if (rc)
+		goto failed1;
+
+	pci_bus_add_device(virtfn);
+
+	return 0;
+
+failed1:
+	pci_stop_and_remove_bus_device(virtfn);
+	pci_dev_put(dev);
+failed0:
+	virtfn_remove_bus(dev->bus, bus);
+failed:
+
+	return rc;
+}
+
+void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
+{
+	char buf[VIRTFN_ID_LEN];
+	struct pci_dev *virtfn;
+
+	virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
+					     pci_iov_virtfn_bus(dev, id),
+					     pci_iov_virtfn_devfn(dev, id));
+	if (!virtfn)
+		return;
+
+	sprintf(buf, "virtfn%u", id);
+	sysfs_remove_link(&dev->dev.kobj, buf);
+	/*
+	 * pci_stop_dev() could have been called for this virtfn already,
+	 * so the directory for the virtfn may have been removed before.
+	 * Double check to avoid spurious sysfs warnings.
+	 */
+	if (virtfn->dev.kobj.sd)
+		sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+	pci_stop_and_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn->bus);
+
+	/* balance pci_get_domain_bus_and_slot() */
+	pci_dev_put(virtfn);
+	pci_dev_put(dev);
+}
+
+static ssize_t sriov_totalvfs_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
+}
+
+static ssize_t sriov_numvfs_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 num_vfs;
+
+	/* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */
+	device_lock(&pdev->dev);
+	num_vfs = pdev->sriov->num_VFs;
+	device_unlock(&pdev->dev);
+
+	return sysfs_emit(buf, "%u\n", num_vfs);
+}
+
+/*
+ * num_vfs > 0; number of VFs to enable
+ * num_vfs = 0; disable all VFs
+ *
+ * Note: SRIOV spec does not allow partial VF
+ *	 disable, so it's all or none.
+ */
+static ssize_t sriov_numvfs_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret = 0;
+	u16 num_vfs;
+
+	if (kstrtou16(buf, 0, &num_vfs) < 0)
+		return -EINVAL;
+
+	if (num_vfs > pci_sriov_get_totalvfs(pdev))
+		return -ERANGE;
+
+	device_lock(&pdev->dev);
+
+	if (num_vfs == pdev->sriov->num_VFs)
+		goto exit;
+
+	/* is PF driver loaded */
+	if (!pdev->driver) {
+		pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n");
+		ret = -ENOENT;
+		goto exit;
+	}
+
+	/* is PF driver loaded w/callback */
+	if (!pdev->driver->sriov_configure) {
+		pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n");
+		ret = -ENOENT;
+		goto exit;
+	}
+
+	if (num_vfs == 0) {
+		/* disable VFs */
+		ret = pdev->driver->sriov_configure(pdev, 0);
+		goto exit;
+	}
+
+	/* enable VFs */
+	if (pdev->sriov->num_VFs) {
+		pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
+			 pdev->sriov->num_VFs, num_vfs);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	ret = pdev->driver->sriov_configure(pdev, num_vfs);
+	if (ret < 0)
+		goto exit;
+
+	if (ret != num_vfs)
+		pci_warn(pdev, "%d VFs requested; only %d enabled\n",
+			 num_vfs, ret);
+
+exit:
+	device_unlock(&pdev->dev);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t sriov_offset_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pdev->sriov->offset);
+}
+
+static ssize_t sriov_stride_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pdev->sriov->stride);
+}
+
+static ssize_t sriov_vf_device_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%x\n", pdev->sriov->vf_device);
+}
+
+static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pdev->sriov->drivers_autoprobe);
+}
+
+static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool drivers_autoprobe;
+
+	if (kstrtobool(buf, &drivers_autoprobe) < 0)
+		return -EINVAL;
+
+	pdev->sriov->drivers_autoprobe = drivers_autoprobe;
+
+	return count;
+}
+
+static DEVICE_ATTR_RO(sriov_totalvfs);
+static DEVICE_ATTR_RW(sriov_numvfs);
+static DEVICE_ATTR_RO(sriov_offset);
+static DEVICE_ATTR_RO(sriov_stride);
+static DEVICE_ATTR_RO(sriov_vf_device);
+static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
+
+static struct attribute *sriov_pf_dev_attrs[] = {
+	&dev_attr_sriov_totalvfs.attr,
+	&dev_attr_sriov_numvfs.attr,
+	&dev_attr_sriov_offset.attr,
+	&dev_attr_sriov_stride.attr,
+	&dev_attr_sriov_vf_device.attr,
+	&dev_attr_sriov_drivers_autoprobe.attr,
+#ifdef CONFIG_PCI_MSI
+	&dev_attr_sriov_vf_total_msix.attr,
+#endif
+	NULL,
+};
+
+static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+
+	if (!dev_is_pf(dev))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group sriov_pf_dev_attr_group = {
+	.attrs = sriov_pf_dev_attrs,
+	.is_visible = sriov_pf_attrs_are_visible,
+};
+
+int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	return 0;
+}
+
+int __weak pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
+{
+	unsigned int i;
+	int rc;
+
+	if (dev->no_vf_scan)
+		return 0;
+
+	for (i = 0; i < num_vfs; i++) {
+		rc = pci_iov_add_virtfn(dev, i);
+		if (rc)
+			goto failed;
+	}
+	return 0;
+failed:
+	while (i--)
+		pci_iov_remove_virtfn(dev, i);
+
+	return rc;
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+	int rc;
+	int i;
+	int nres;
+	u16 initial;
+	struct resource *res;
+	struct pci_dev *pdev;
+	struct pci_sriov *iov = dev->sriov;
+	int bars = 0;
+	int bus;
+
+	if (!nr_virtfn)
+		return 0;
+
+	if (iov->num_VFs)
+		return -EINVAL;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+	if (initial > iov->total_VFs ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
+		return -EIO;
+
+	if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+		return -EINVAL;
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		bars |= (1 << (i + PCI_IOV_RESOURCES));
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (res->parent)
+			nres++;
+	}
+	if (nres != iov->nres) {
+		pci_err(dev, "not enough MMIO resources for SR-IOV\n");
+		return -ENOMEM;
+	}
+
+	bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
+	if (bus > dev->bus->busn_res.end) {
+		pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
+			nr_virtfn, bus, &dev->bus->busn_res);
+		return -ENOMEM;
+	}
+
+	if (pci_enable_resources(dev, bars)) {
+		pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
+		return -ENOMEM;
+	}
+
+	if (iov->link != dev->devfn) {
+		pdev = pci_get_slot(dev->bus, iov->link);
+		if (!pdev)
+			return -ENODEV;
+
+		if (!pdev->is_physfn) {
+			pci_dev_put(pdev);
+			return -ENOSYS;
+		}
+
+		rc = sysfs_create_link(&dev->dev.kobj,
+					&pdev->dev.kobj, "dep_link");
+		pci_dev_put(pdev);
+		if (rc)
+			return rc;
+	}
+
+	iov->initial_VFs = initial;
+	if (nr_virtfn < initial)
+		initial = nr_virtfn;
+
+	rc = pcibios_sriov_enable(dev, initial);
+	if (rc) {
+		pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
+		goto err_pcibios;
+	}
+
+	pci_iov_set_numvfs(dev, nr_virtfn);
+	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+	pci_cfg_access_lock(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	msleep(100);
+	pci_cfg_access_unlock(dev);
+
+	rc = sriov_add_vfs(dev, initial);
+	if (rc)
+		goto err_pcibios;
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+	iov->num_VFs = nr_virtfn;
+
+	return 0;
+
+err_pcibios:
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_cfg_access_lock(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_cfg_access_unlock(dev);
+
+	pcibios_sriov_disable(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	pci_iov_set_numvfs(dev, 0);
+	return rc;
+}
+
+static void sriov_del_vfs(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+	int i;
+
+	for (i = 0; i < iov->num_VFs; i++)
+		pci_iov_remove_virtfn(dev, i);
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->num_VFs)
+		return;
+
+	sriov_del_vfs(dev);
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_cfg_access_lock(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_cfg_access_unlock(dev);
+
+	pcibios_sriov_disable(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	iov->num_VFs = 0;
+	pci_iov_set_numvfs(dev, 0);
+}
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+	int i, bar64;
+	int rc;
+	int nres;
+	u32 pgsz;
+	u16 ctrl, total;
+	struct pci_sriov *iov;
+	struct resource *res;
+	struct pci_dev *pdev;
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE) {
+		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+		ssleep(1);
+	}
+
+	ctrl = 0;
+	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+		if (pdev->is_physfn)
+			goto found;
+
+	pdev = NULL;
+	if (pci_ari_enabled(dev->bus))
+		ctrl |= PCI_SRIOV_CTRL_ARI;
+
+found:
+	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+	if (!total)
+		return 0;
+
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+	pgsz &= ~((1 << i) - 1);
+	if (!pgsz)
+		return -EIO;
+
+	pgsz &= ~(pgsz - 1);
+	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov)
+		return -ENOMEM;
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		/*
+		 * If it is already FIXED, don't change it, something
+		 * (perhaps EA or header fixups) wants it this way.
+		 */
+		if (res->flags & IORESOURCE_PCI_FIXED)
+			bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
+		else
+			bar64 = __pci_read_base(dev, pci_bar_unknown, res,
+						pos + PCI_SRIOV_BAR + i * 4);
+		if (!res->flags)
+			continue;
+		if (resource_size(res) & (PAGE_SIZE - 1)) {
+			rc = -EIO;
+			goto failed;
+		}
+		iov->barsz[i] = resource_size(res);
+		res->end = res->start + resource_size(res) * total - 1;
+		pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
+			 i, res, i, total);
+		i += bar64;
+		nres++;
+	}
+
+	iov->pos = pos;
+	iov->nres = nres;
+	iov->ctrl = ctrl;
+	iov->total_VFs = total;
+	iov->driver_max_VFs = total;
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
+	iov->pgsz = pgsz;
+	iov->self = dev;
+	iov->drivers_autoprobe = true;
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
+		iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
+
+	if (pdev)
+		iov->dev = pci_dev_get(pdev);
+	else
+		iov->dev = dev;
+
+	dev->sriov = iov;
+	dev->is_physfn = 1;
+	rc = compute_max_vf_buses(dev);
+	if (rc)
+		goto fail_max_buses;
+
+	return 0;
+
+fail_max_buses:
+	dev->sriov = NULL;
+	dev->is_physfn = 0;
+failed:
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		res->flags = 0;
+	}
+
+	kfree(iov);
+	return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+	BUG_ON(dev->sriov->num_VFs);
+
+	if (dev != dev->sriov->dev)
+		pci_dev_put(dev->sriov->dev);
+
+	kfree(dev->sriov);
+	dev->sriov = NULL;
+}
+
+static void sriov_restore_state(struct pci_dev *dev)
+{
+	int i;
+	u16 ctrl;
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE)
+		return;
+
+	/*
+	 * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
+	 * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
+	 */
+	ctrl &= ~PCI_SRIOV_CTRL_ARI;
+	ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+	pci_iov_set_numvfs(dev, iov->num_VFs);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+		msleep(100);
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+	int pos;
+
+	if (!pci_is_pcie(dev))
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos)
+		return sriov_init(dev, pos);
+
+	return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_release(dev);
+}
+
+/**
+ * pci_iov_remove - clean up SR-IOV state after PF driver is detached
+ * @dev: the PCI device
+ */
+void pci_iov_remove(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!dev->is_physfn)
+		return;
+
+	iov->driver_max_VFs = iov->total_VFs;
+	if (iov->num_VFs)
+		pci_warn(dev, "driver left SR-IOV enabled after remove\n");
+}
+
+/**
+ * pci_iov_update_resource - update a VF BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ *
+ * Update a VF BAR in the SR-IOV capability of a PF.
+ */
+void pci_iov_update_resource(struct pci_dev *dev, int resno)
+{
+	struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
+	struct resource *res = dev->resource + resno;
+	int vf_bar = resno - PCI_IOV_RESOURCES;
+	struct pci_bus_region region;
+	u16 cmd;
+	u32 new;
+	int reg;
+
+	/*
+	 * The generic pci_restore_bars() path calls this for all devices,
+	 * including VFs and non-SR-IOV devices.  If this is not a PF, we
+	 * have nothing to do.
+	 */
+	if (!iov)
+		return;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
+	if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
+		dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
+			 vf_bar, res);
+		return;
+	}
+
+	/*
+	 * Ignore unimplemented BARs, unused resource slots for 64-bit
+	 * BARs, and non-movable resources, e.g., those described via
+	 * Enhanced Allocation.
+	 */
+	if (!res->flags)
+		return;
+
+	if (res->flags & IORESOURCE_UNSET)
+		return;
+
+	if (res->flags & IORESOURCE_PCI_FIXED)
+		return;
+
+	pcibios_resource_to_bus(dev->bus, &region, res);
+	new = region.start;
+	new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
+
+	reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
+	pci_write_config_dword(dev, reg, new);
+	if (res->flags & IORESOURCE_MEM_64) {
+		new = region.start >> 16 >> 16;
+		pci_write_config_dword(dev, reg + 4, new);
+	}
+}
+
+resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
+						      int resno)
+{
+	return pci_iov_resource_size(dev, resno);
+}
+
+/**
+ * pci_sriov_resource_alignment - get resource alignment for VF BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ *
+ * Returns the alignment of the VF BAR found in the SR-IOV capability.
+ * This is not the same as the resource size which is defined as
+ * the VF BAR size multiplied by the number of VFs.  The alignment
+ * is just the VF BAR size.
+ */
+resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
+{
+	return pcibios_iov_resource_alignment(dev, resno);
+}
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_restore_state(dev);
+}
+
+/**
+ * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
+ * @dev: the PCI device
+ * @auto_probe: set VF drivers auto probe flag
+ */
+void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
+{
+	if (dev->is_physfn)
+		dev->sriov->drivers_autoprobe = auto_probe;
+}
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+	int max = 0;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!dev->is_physfn)
+			continue;
+		if (dev->sriov->max_VF_buses > max)
+			max = dev->sriov->max_VF_buses;
+	}
+
+	return max ? max - bus->number : 0;
+}
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ * @nr_virtfn: number of virtual functions to enable
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return -ENOSYS;
+
+	return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return;
+
+	sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
+
+/**
+ * pci_num_vf - return number of VFs associated with a PF device_release_driver
+ * @dev: the PCI device
+ *
+ * Returns number of VFs, or 0 if SR-IOV is not enabled.
+ */
+int pci_num_vf(struct pci_dev *dev)
+{
+	if (!dev->is_physfn)
+		return 0;
+
+	return dev->sriov->num_VFs;
+}
+EXPORT_SYMBOL_GPL(pci_num_vf);
+
+/**
+ * pci_vfs_assigned - returns number of VFs are assigned to a guest
+ * @dev: the PCI device
+ *
+ * Returns number of VFs belonging to this device that are assigned to a guest.
+ * If device is not a physical function returns 0.
+ */
+int pci_vfs_assigned(struct pci_dev *dev)
+{
+	struct pci_dev *vfdev;
+	unsigned int vfs_assigned = 0;
+	unsigned short dev_id;
+
+	/* only search if we are a PF */
+	if (!dev->is_physfn)
+		return 0;
+
+	/*
+	 * determine the device ID for the VFs, the vendor ID will be the
+	 * same as the PF so there is no need to check for that one
+	 */
+	dev_id = dev->sriov->vf_device;
+
+	/* loop through all the VFs to see if we own any that are assigned */
+	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
+	while (vfdev) {
+		/*
+		 * It is considered assigned if it is a virtual function with
+		 * our dev as the physical function and the assigned bit is set
+		 */
+		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
+			pci_is_dev_assigned(vfdev))
+			vfs_assigned++;
+
+		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
+	}
+
+	return vfs_assigned;
+}
+EXPORT_SYMBOL_GPL(pci_vfs_assigned);
+
+/**
+ * pci_sriov_set_totalvfs -- reduce the TotalVFs available
+ * @dev: the PCI PF device
+ * @numvfs: number that should be used for TotalVFs supported
+ *
+ * Should be called from PF driver's probe routine with
+ * device's mutex held.
+ *
+ * Returns 0 if PF is an SRIOV-capable device and
+ * value of numvfs valid. If not a PF return -ENOSYS;
+ * if numvfs is invalid return -EINVAL;
+ * if VFs already enabled, return -EBUSY.
+ */
+int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
+{
+	if (!dev->is_physfn)
+		return -ENOSYS;
+
+	if (numvfs > dev->sriov->total_VFs)
+		return -EINVAL;
+
+	/* Shouldn't change if VFs already enabled */
+	if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
+		return -EBUSY;
+
+	dev->sriov->driver_max_VFs = numvfs;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
+
+/**
+ * pci_sriov_get_totalvfs -- get total VFs supported on this device
+ * @dev: the PCI PF device
+ *
+ * For a PCIe device with SRIOV support, return the PCIe
+ * SRIOV capability value of TotalVFs or the value of driver_max_VFs
+ * if the driver reduced it.  Otherwise 0.
+ */
+int pci_sriov_get_totalvfs(struct pci_dev *dev)
+{
+	if (!dev->is_physfn)
+		return 0;
+
+	return dev->sriov->driver_max_VFs;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
+
+/**
+ * pci_sriov_configure_simple - helper to configure SR-IOV
+ * @dev: the PCI device
+ * @nr_virtfn: number of virtual functions to enable, 0 to disable
+ *
+ * Enable or disable SR-IOV for devices that don't require any PF setup
+ * before enabling SR-IOV.  Return value is negative on error, or number of
+ * VFs allocated on success.
+ */
+int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
+{
+	int rc;
+
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return -ENODEV;
+
+	if (pci_vfs_assigned(dev)) {
+		pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
+		return -EPERM;
+	}
+
+	if (nr_virtfn == 0) {
+		sriov_disable(dev);
+		return 0;
+	}
+
+	rc = sriov_enable(dev, nr_virtfn);
+	if (rc < 0)
+		return rc;
+
+	return nr_virtfn;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
new file mode 100644
index 000000000..0050e8f68
--- /dev/null
+++ b/drivers/pci/irq.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI IRQ handling code
+ *
+ * Copyright (c) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ * Copyright (C) 2017 Christoph Hellwig.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+
+/**
+ * pci_request_irq - allocate an interrupt line for a PCI device
+ * @dev:	PCI device to operate on
+ * @nr:		device-relative interrupt vector index (0-based).
+ * @handler:	Function to be called when the IRQ occurs.
+ *		Primary handler for threaded interrupts.
+ *		If NULL and thread_fn != NULL the default primary handler is
+ *		installed.
+ * @thread_fn:	Function called from the IRQ handler thread
+ *		If NULL, no IRQ thread is created
+ * @dev_id:	Cookie passed back to the handler function
+ * @fmt:	Printf-like format string naming the handler
+ *
+ * This call allocates interrupt resources and enables the interrupt line and
+ * IRQ handling. From the point this call is made @handler and @thread_fn may
+ * be invoked.  All interrupts requested using this function might be shared.
+ *
+ * @dev_id must not be NULL and must be globally unique.
+ */
+int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler,
+		irq_handler_t thread_fn, void *dev_id, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	char *devname;
+	unsigned long irqflags = IRQF_SHARED;
+
+	if (!handler)
+		irqflags |= IRQF_ONESHOT;
+
+	va_start(ap, fmt);
+	devname = kvasprintf(GFP_KERNEL, fmt, ap);
+	va_end(ap);
+	if (!devname)
+		return -ENOMEM;
+
+	ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn,
+				   irqflags, devname, dev_id);
+	if (ret)
+		kfree(devname);
+	return ret;
+}
+EXPORT_SYMBOL(pci_request_irq);
+
+/**
+ * pci_free_irq - free an interrupt allocated with pci_request_irq
+ * @dev:	PCI device to operate on
+ * @nr:		device-relative interrupt vector index (0-based).
+ * @dev_id:	Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the interrupt
+ * line is no longer in use by any driver it is disabled.  The caller must
+ * ensure the interrupt is disabled on the device before calling this function.
+ * The function does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function must not be called from interrupt context.
+ */
+void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id)
+{
+	kfree(free_irq(pci_irq_vector(dev, nr), dev_id));
+}
+EXPORT_SYMBOL(pci_free_irq);
diff --git a/drivers/pci/mmap.c b/drivers/pci/mmap.c
new file mode 100644
index 000000000..450403905
--- /dev/null
+++ b/drivers/pci/mmap.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic PCI resource mmap helper
+ *
+ * Copyright © 2017 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+
+#ifdef ARCH_GENERIC_PCI_MMAP_RESOURCE
+
+static const struct vm_operations_struct pci_phys_vm_ops = {
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+	.access = generic_access_phys,
+#endif
+};
+
+int pci_mmap_resource_range(struct pci_dev *pdev, int bar,
+			    struct vm_area_struct *vma,
+			    enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long size;
+	int ret;
+
+	size = ((pci_resource_len(pdev, bar) - 1) >> PAGE_SHIFT) + 1;
+	if (vma->vm_pgoff + vma_pages(vma) > size)
+		return -EINVAL;
+
+	if (write_combine)
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
+
+	if (mmap_state == pci_mmap_io) {
+		ret = pci_iobar_pfn(pdev, bar, vma);
+		if (ret)
+			return ret;
+	} else
+		vma->vm_pgoff += (pci_resource_start(pdev, bar) >> PAGE_SHIFT);
+
+	vma->vm_ops = &pci_phys_vm_ops;
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+#endif
diff --git a/drivers/pci/msi/Makefile b/drivers/pci/msi/Makefile
new file mode 100644
index 000000000..839ff72d7
--- /dev/null
+++ b/drivers/pci/msi/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PCI/MSI
+obj-$(CONFIG_PCI)			+= pcidev_msi.o
+obj-$(CONFIG_PCI_MSI)			+= api.o msi.o irqdomain.o
+obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS)	+= legacy.o
diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c
new file mode 100644
index 000000000..be679aa5d
--- /dev/null
+++ b/drivers/pci/msi/api.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI MSI/MSI-X — Exported APIs for device drivers
+ *
+ * Copyright (C) 2003-2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+ * Copyright (C) 2016 Christoph Hellwig.
+ * Copyright (C) 2022 Linutronix GmbH
+ */
+
+#include <linux/export.h>
+#include <linux/irq.h>
+
+#include "msi.h"
+
+/**
+ * pci_enable_msi() - Enable MSI interrupt mode on device
+ * @dev: the PCI device to operate on
+ *
+ * Legacy device driver API to enable MSI interrupts mode on device and
+ * allocate a single interrupt vector. On success, the allocated vector
+ * Linux IRQ will be saved at @dev->irq. The driver must invoke
+ * pci_disable_msi() on cleanup.
+ *
+ * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API
+ * pair should, in general, be used instead.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int pci_enable_msi(struct pci_dev *dev)
+{
+	int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
+	if (rc < 0)
+		return rc;
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_msi);
+
+/**
+ * pci_disable_msi() - Disable MSI interrupt mode on device
+ * @dev: the PCI device to operate on
+ *
+ * Legacy device driver API to disable MSI interrupt mode on device,
+ * free earlier allocated interrupt vectors, and restore INTx emulation.
+ * The PCI device Linux IRQ (@dev->irq) is restored to its default
+ * pin-assertion IRQ. This is the cleanup pair of pci_enable_msi().
+ *
+ * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API
+ * pair should, in general, be used instead.
+ */
+void pci_disable_msi(struct pci_dev *dev)
+{
+	if (!pci_msi_enabled() || !dev || !dev->msi_enabled)
+		return;
+
+	msi_lock_descs(&dev->dev);
+	pci_msi_shutdown(dev);
+	pci_free_msi_irqs(dev);
+	msi_unlock_descs(&dev->dev);
+}
+EXPORT_SYMBOL(pci_disable_msi);
+
+/**
+ * pci_msix_vec_count() - Get number of MSI-X interrupt vectors on device
+ * @dev: the PCI device to operate on
+ *
+ * Return: number of MSI-X interrupt vectors available on this device
+ * (i.e., the device's MSI-X capability structure "table size"), -EINVAL
+ * if the device is not MSI-X capable, other errnos otherwise.
+ */
+int pci_msix_vec_count(struct pci_dev *dev)
+{
+	u16 control;
+
+	if (!dev->msix_cap)
+		return -EINVAL;
+
+	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
+	return msix_table_size(control);
+}
+EXPORT_SYMBOL(pci_msix_vec_count);
+
+/**
+ * pci_enable_msix_range() - Enable MSI-X interrupt mode on device
+ * @dev:     the PCI device to operate on
+ * @entries: input/output parameter, array of MSI-X configuration entries
+ * @minvec:  minimum required number of MSI-X vectors
+ * @maxvec:  maximum desired number of MSI-X vectors
+ *
+ * Legacy device driver API to enable MSI-X interrupt mode on device and
+ * configure its MSI-X capability structure as appropriate.  The passed
+ * @entries array must have each of its members "entry" field set to a
+ * desired (valid) MSI-X vector number, where the range of valid MSI-X
+ * vector numbers can be queried through pci_msix_vec_count().  If
+ * successful, the driver must invoke pci_disable_msix() on cleanup.
+ *
+ * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API
+ * pair should, in general, be used instead.
+ *
+ * Return: number of MSI-X vectors allocated (which might be smaller
+ * than @maxvecs), where Linux IRQ numbers for such allocated vectors
+ * are saved back in the @entries array elements' "vector" field. Return
+ * -ENOSPC if less than @minvecs interrupt vectors are available.
+ * Return -EINVAL if one of the passed @entries members "entry" field
+ * was invalid or a duplicate, or if plain MSI interrupts mode was
+ * earlier enabled on device. Return other errnos otherwise.
+ */
+int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
+			  int minvec, int maxvec)
+{
+	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
+}
+EXPORT_SYMBOL(pci_enable_msix_range);
+
+/**
+ * pci_msix_can_alloc_dyn - Query whether dynamic allocation after enabling
+ *			    MSI-X is supported
+ *
+ * @dev:	PCI device to operate on
+ *
+ * Return: True if supported, false otherwise
+ */
+bool pci_msix_can_alloc_dyn(struct pci_dev *dev)
+{
+	if (!dev->msix_cap)
+		return false;
+
+	return pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX_ALLOC_DYN, DENY_LEGACY);
+}
+EXPORT_SYMBOL_GPL(pci_msix_can_alloc_dyn);
+
+/**
+ * pci_msix_alloc_irq_at - Allocate an MSI-X interrupt after enabling MSI-X
+ *			   at a given MSI-X vector index or any free vector index
+ *
+ * @dev:	PCI device to operate on
+ * @index:	Index to allocate. If @index == MSI_ANY_INDEX this allocates
+ *		the next free index in the MSI-X table
+ * @affdesc:	Optional pointer to an affinity descriptor structure. NULL otherwise
+ *
+ * Return: A struct msi_map
+ *
+ *	On success msi_map::index contains the allocated index (>= 0) and
+ *	msi_map::virq contains the allocated Linux interrupt number (> 0).
+ *
+ *	On fail msi_map::index contains the error code and msi_map::virq
+ *	is set to 0.
+ */
+struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
+				     const struct irq_affinity_desc *affdesc)
+{
+	struct msi_map map = { .index = -ENOTSUPP };
+
+	if (!dev->msix_enabled)
+		return map;
+
+	if (!pci_msix_can_alloc_dyn(dev))
+		return map;
+
+	return msi_domain_alloc_irq_at(&dev->dev, MSI_DEFAULT_DOMAIN, index, affdesc, NULL);
+}
+EXPORT_SYMBOL_GPL(pci_msix_alloc_irq_at);
+
+/**
+ * pci_msix_free_irq - Free an interrupt on a PCI/MSIX interrupt domain
+ *
+ * @dev:	The PCI device to operate on
+ * @map:	A struct msi_map describing the interrupt to free
+ *
+ * Undo an interrupt vector allocation. Does not disable MSI-X.
+ */
+void pci_msix_free_irq(struct pci_dev *dev, struct msi_map map)
+{
+	if (WARN_ON_ONCE(map.index < 0 || map.virq <= 0))
+		return;
+	if (WARN_ON_ONCE(!pci_msix_can_alloc_dyn(dev)))
+		return;
+	msi_domain_free_irqs_range(&dev->dev, MSI_DEFAULT_DOMAIN, map.index, map.index);
+}
+EXPORT_SYMBOL_GPL(pci_msix_free_irq);
+
+/**
+ * pci_disable_msix() - Disable MSI-X interrupt mode on device
+ * @dev: the PCI device to operate on
+ *
+ * Legacy device driver API to disable MSI-X interrupt mode on device,
+ * free earlier-allocated interrupt vectors, and restore INTx.
+ * The PCI device Linux IRQ (@dev->irq) is restored to its default pin
+ * assertion IRQ. This is the cleanup pair of pci_enable_msix_range().
+ *
+ * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API
+ * pair should, in general, be used instead.
+ */
+void pci_disable_msix(struct pci_dev *dev)
+{
+	if (!pci_msi_enabled() || !dev || !dev->msix_enabled)
+		return;
+
+	msi_lock_descs(&dev->dev);
+	pci_msix_shutdown(dev);
+	pci_free_msi_irqs(dev);
+	msi_unlock_descs(&dev->dev);
+}
+EXPORT_SYMBOL(pci_disable_msix);
+
+/**
+ * pci_alloc_irq_vectors() - Allocate multiple device interrupt vectors
+ * @dev:      the PCI device to operate on
+ * @min_vecs: minimum required number of vectors (must be >= 1)
+ * @max_vecs: maximum desired number of vectors
+ * @flags:    One or more of:
+ *
+ *            * %PCI_IRQ_MSIX      Allow trying MSI-X vector allocations
+ *            * %PCI_IRQ_MSI       Allow trying MSI vector allocations
+ *
+ *            * %PCI_IRQ_LEGACY    Allow trying legacy INTx interrupts, if
+ *              and only if @min_vecs == 1
+ *
+ *            * %PCI_IRQ_AFFINITY  Auto-manage IRQs affinity by spreading
+ *              the vectors around available CPUs
+ *
+ * Allocate up to @max_vecs interrupt vectors on device. MSI-X irq
+ * vector allocation has a higher precedence over plain MSI, which has a
+ * higher precedence over legacy INTx emulation.
+ *
+ * Upon a successful allocation, the caller should use pci_irq_vector()
+ * to get the Linux IRQ number to be passed to request_threaded_irq().
+ * The driver must call pci_free_irq_vectors() on cleanup.
+ *
+ * Return: number of allocated vectors (which might be smaller than
+ * @max_vecs), -ENOSPC if less than @min_vecs interrupt vectors are
+ * available, other errnos otherwise.
+ */
+int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+			  unsigned int max_vecs, unsigned int flags)
+{
+	return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs,
+					      flags, NULL);
+}
+EXPORT_SYMBOL(pci_alloc_irq_vectors);
+
+/**
+ * pci_alloc_irq_vectors_affinity() - Allocate multiple device interrupt
+ *                                    vectors with affinity requirements
+ * @dev:      the PCI device to operate on
+ * @min_vecs: minimum required number of vectors (must be >= 1)
+ * @max_vecs: maximum desired number of vectors
+ * @flags:    allocation flags, as in pci_alloc_irq_vectors()
+ * @affd:     affinity requirements (can be %NULL).
+ *
+ * Same as pci_alloc_irq_vectors(), but with the extra @affd parameter.
+ * Check that function docs, and &struct irq_affinity, for more details.
+ */
+int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+				   unsigned int max_vecs, unsigned int flags,
+				   struct irq_affinity *affd)
+{
+	struct irq_affinity msi_default_affd = {0};
+	int nvecs = -ENOSPC;
+
+	if (flags & PCI_IRQ_AFFINITY) {
+		if (!affd)
+			affd = &msi_default_affd;
+	} else {
+		if (WARN_ON(affd))
+			affd = NULL;
+	}
+
+	if (flags & PCI_IRQ_MSIX) {
+		nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
+						affd, flags);
+		if (nvecs > 0)
+			return nvecs;
+	}
+
+	if (flags & PCI_IRQ_MSI) {
+		nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
+		if (nvecs > 0)
+			return nvecs;
+	}
+
+	/* use legacy IRQ if allowed */
+	if (flags & PCI_IRQ_LEGACY) {
+		if (min_vecs == 1 && dev->irq) {
+			/*
+			 * Invoke the affinity spreading logic to ensure that
+			 * the device driver can adjust queue configuration
+			 * for the single interrupt case.
+			 */
+			if (affd)
+				irq_create_affinity_masks(1, affd);
+			pci_intx(dev, 1);
+			return 1;
+		}
+	}
+
+	return nvecs;
+}
+EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
+
+/**
+ * pci_irq_vector() - Get Linux IRQ number of a device interrupt vector
+ * @dev: the PCI device to operate on
+ * @nr:  device-relative interrupt vector index (0-based); has different
+ *       meanings, depending on interrupt mode:
+ *
+ *         * MSI-X     the index in the MSI-X vector table
+ *         * MSI       the index of the enabled MSI vectors
+ *         * INTx      must be 0
+ *
+ * Return: the Linux IRQ number, or -EINVAL if @nr is out of range
+ */
+int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
+{
+	unsigned int irq;
+
+	if (!dev->msi_enabled && !dev->msix_enabled)
+		return !nr ? dev->irq : -EINVAL;
+
+	irq = msi_get_virq(&dev->dev, nr);
+	return irq ? irq : -EINVAL;
+}
+EXPORT_SYMBOL(pci_irq_vector);
+
+/**
+ * pci_irq_get_affinity() - Get a device interrupt vector affinity
+ * @dev: the PCI device to operate on
+ * @nr:  device-relative interrupt vector index (0-based); has different
+ *       meanings, depending on interrupt mode:
+ *
+ *         * MSI-X     the index in the MSI-X vector table
+ *         * MSI       the index of the enabled MSI vectors
+ *         * INTx      must be 0
+ *
+ * Return: MSI/MSI-X vector affinity, NULL if @nr is out of range or if
+ * the MSI(-X) vector was allocated without explicit affinity
+ * requirements (e.g., by pci_enable_msi(), pci_enable_msix_range(), or
+ * pci_alloc_irq_vectors() without the %PCI_IRQ_AFFINITY flag). Return a
+ * generic set of CPU IDs representing all possible CPUs available
+ * during system boot if the device is in legacy INTx mode.
+ */
+const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
+{
+	int idx, irq = pci_irq_vector(dev, nr);
+	struct msi_desc *desc;
+
+	if (WARN_ON_ONCE(irq <= 0))
+		return NULL;
+
+	desc = irq_get_msi_desc(irq);
+	/* Non-MSI does not have the information handy */
+	if (!desc)
+		return cpu_possible_mask;
+
+	/* MSI[X] interrupts can be allocated without affinity descriptor */
+	if (!desc->affinity)
+		return NULL;
+
+	/*
+	 * MSI has a mask array in the descriptor.
+	 * MSI-X has a single mask.
+	 */
+	idx = dev->msi_enabled ? nr : 0;
+	return &desc->affinity[idx].mask;
+}
+EXPORT_SYMBOL(pci_irq_get_affinity);
+
+/**
+ * pci_ims_alloc_irq - Allocate an interrupt on a PCI/IMS interrupt domain
+ * @dev:	The PCI device to operate on
+ * @icookie:	Pointer to an IMS implementation specific cookie for this
+ *		IMS instance (PASID, queue ID, pointer...).
+ *		The cookie content is copied into the MSI descriptor for the
+ *		interrupt chip callbacks or domain specific setup functions.
+ * @affdesc:	Optional pointer to an interrupt affinity descriptor
+ *
+ * There is no index for IMS allocations as IMS is an implementation
+ * specific storage and does not have any direct associations between
+ * index, which might be a pure software construct, and device
+ * functionality. This association is established by the driver either via
+ * the index - if there is a hardware table - or in case of purely software
+ * managed IMS implementation the association happens via the
+ * irq_write_msi_msg() callback of the implementation specific interrupt
+ * chip, which utilizes the provided @icookie to store the MSI message in
+ * the appropriate place.
+ *
+ * Return: A struct msi_map
+ *
+ *	On success msi_map::index contains the allocated index (>= 0) and
+ *	msi_map::virq the allocated Linux interrupt number (> 0).
+ *
+ *	On fail msi_map::index contains the error code and msi_map::virq
+ *	is set to 0.
+ */
+struct msi_map pci_ims_alloc_irq(struct pci_dev *dev, union msi_instance_cookie *icookie,
+				 const struct irq_affinity_desc *affdesc)
+{
+	return msi_domain_alloc_irq_at(&dev->dev, MSI_SECONDARY_DOMAIN, MSI_ANY_INDEX,
+				       affdesc, icookie);
+}
+EXPORT_SYMBOL_GPL(pci_ims_alloc_irq);
+
+/**
+ * pci_ims_free_irq - Allocate an interrupt on a PCI/IMS interrupt domain
+ *		      which was allocated via pci_ims_alloc_irq()
+ * @dev:	The PCI device to operate on
+ * @map:	A struct msi_map describing the interrupt to free as
+ *		returned from pci_ims_alloc_irq()
+ */
+void pci_ims_free_irq(struct pci_dev *dev, struct msi_map map)
+{
+	if (WARN_ON_ONCE(map.index < 0 || map.virq <= 0))
+		return;
+	msi_domain_free_irqs_range(&dev->dev, MSI_SECONDARY_DOMAIN, map.index, map.index);
+}
+EXPORT_SYMBOL_GPL(pci_ims_free_irq);
+
+/**
+ * pci_free_irq_vectors() - Free previously allocated IRQs for a device
+ * @dev: the PCI device to operate on
+ *
+ * Undo the interrupt vector allocations and possible device MSI/MSI-X
+ * enablement earlier done through pci_alloc_irq_vectors_affinity() or
+ * pci_alloc_irq_vectors().
+ */
+void pci_free_irq_vectors(struct pci_dev *dev)
+{
+	pci_disable_msix(dev);
+	pci_disable_msi(dev);
+}
+EXPORT_SYMBOL(pci_free_irq_vectors);
+
+/**
+ * pci_restore_msi_state() - Restore cached MSI(-X) state on device
+ * @dev: the PCI device to operate on
+ *
+ * Write the Linux-cached MSI(-X) state back on device. This is
+ * typically useful upon system resume, or after an error-recovery PCI
+ * adapter reset.
+ */
+void pci_restore_msi_state(struct pci_dev *dev)
+{
+	__pci_restore_msi_state(dev);
+	__pci_restore_msix_state(dev);
+}
+EXPORT_SYMBOL_GPL(pci_restore_msi_state);
+
+/**
+ * pci_msi_enabled() - Are MSI(-X) interrupts enabled system-wide?
+ *
+ * Return: true if MSI has not been globally disabled through ACPI FADT,
+ * PCI bridge quirks, or the "pci=nomsi" kernel command-line option.
+ */
+int pci_msi_enabled(void)
+{
+	return pci_msi_enable;
+}
+EXPORT_SYMBOL(pci_msi_enabled);
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
new file mode 100644
index 000000000..c8be056c2
--- /dev/null
+++ b/drivers/pci/msi/irqdomain.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Message Signaled Interrupt (MSI) - irqdomain support
+ */
+#include <linux/acpi_iort.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+
+#include "msi.h"
+
+int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct irq_domain *domain;
+
+	domain = dev_get_msi_domain(&dev->dev);
+	if (domain && irq_domain_is_hierarchy(domain))
+		return msi_domain_alloc_irqs_all_locked(&dev->dev, MSI_DEFAULT_DOMAIN, nvec);
+
+	return pci_msi_legacy_setup_msi_irqs(dev, nvec, type);
+}
+
+void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct irq_domain *domain;
+
+	domain = dev_get_msi_domain(&dev->dev);
+	if (domain && irq_domain_is_hierarchy(domain)) {
+		msi_domain_free_irqs_all_locked(&dev->dev, MSI_DEFAULT_DOMAIN);
+	} else {
+		pci_msi_legacy_teardown_msi_irqs(dev);
+		msi_free_msi_descs(&dev->dev);
+	}
+}
+
+/**
+ * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
+ * @irq_data:	Pointer to interrupt data of the MSI interrupt
+ * @msg:	Pointer to the message
+ */
+static void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(irq_data);
+
+	/*
+	 * For MSI-X desc->irq is always equal to irq_data->irq. For
+	 * MSI only the first interrupt of MULTI MSI passes the test.
+	 */
+	if (desc->irq == irq_data->irq)
+		__pci_write_msi_msg(desc, msg);
+}
+
+/**
+ * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
+ * @desc:	Pointer to the MSI descriptor
+ *
+ * The ID number is only used within the irqdomain.
+ */
+static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
+{
+	struct pci_dev *dev = msi_desc_to_pci_dev(desc);
+
+	return (irq_hw_number_t)desc->msi_index |
+		pci_dev_id(dev) << 11 |
+		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
+}
+
+static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
+				    struct msi_desc *desc)
+{
+	arg->desc = desc;
+	arg->hwirq = pci_msi_domain_calc_hwirq(desc);
+}
+
+static struct msi_domain_ops pci_msi_domain_ops_default = {
+	.set_desc	= pci_msi_domain_set_desc,
+};
+
+static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
+{
+	struct msi_domain_ops *ops = info->ops;
+
+	if (ops == NULL) {
+		info->ops = &pci_msi_domain_ops_default;
+	} else {
+		if (ops->set_desc == NULL)
+			ops->set_desc = pci_msi_domain_set_desc;
+	}
+}
+
+static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
+{
+	struct irq_chip *chip = info->chip;
+
+	BUG_ON(!chip);
+	if (!chip->irq_write_msi_msg)
+		chip->irq_write_msi_msg = pci_msi_domain_write_msg;
+	if (!chip->irq_mask)
+		chip->irq_mask = pci_msi_mask_irq;
+	if (!chip->irq_unmask)
+		chip->irq_unmask = pci_msi_unmask_irq;
+}
+
+/**
+ * pci_msi_create_irq_domain - Create a MSI interrupt domain
+ * @fwnode:	Optional fwnode of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ *
+ * Updates the domain and chip ops and creates a MSI interrupt domain.
+ *
+ * Returns:
+ * A domain pointer or NULL in case of failure.
+ */
+struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
+					     struct msi_domain_info *info,
+					     struct irq_domain *parent)
+{
+	if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE))
+		info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
+
+	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+		pci_msi_domain_update_dom_ops(info);
+	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+		pci_msi_domain_update_chip_ops(info);
+
+	/* Let the core code free MSI descriptors when freeing interrupts */
+	info->flags |= MSI_FLAG_FREE_MSI_DESCS;
+
+	info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS;
+	if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
+		info->flags |= MSI_FLAG_MUST_REACTIVATE;
+
+	/* PCI-MSI is oneshot-safe */
+	info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+	/* Let the core update the bus token */
+	info->bus_token = DOMAIN_BUS_PCI_MSI;
+
+	return msi_create_irq_domain(fwnode, info, parent);
+}
+EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);
+
+/*
+ * Per device MSI[-X] domain functionality
+ */
+static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+	arg->desc = desc;
+	arg->hwirq = desc->msi_index;
+}
+
+static void pci_irq_mask_msi(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+	pci_msi_mask(desc, BIT(data->irq - desc->irq));
+}
+
+static void pci_irq_unmask_msi(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+	pci_msi_unmask(desc, BIT(data->irq - desc->irq));
+}
+
+#ifdef CONFIG_GENERIC_IRQ_RESERVATION_MODE
+# define MSI_REACTIVATE		MSI_FLAG_MUST_REACTIVATE
+#else
+# define MSI_REACTIVATE		0
+#endif
+
+#define MSI_COMMON_FLAGS	(MSI_FLAG_FREE_MSI_DESCS |	\
+				 MSI_FLAG_ACTIVATE_EARLY |	\
+				 MSI_FLAG_DEV_SYSFS |		\
+				 MSI_REACTIVATE)
+
+static const struct msi_domain_template pci_msi_template = {
+	.chip = {
+		.name			= "PCI-MSI",
+		.irq_mask		= pci_irq_mask_msi,
+		.irq_unmask		= pci_irq_unmask_msi,
+		.irq_write_msi_msg	= pci_msi_domain_write_msg,
+		.flags			= IRQCHIP_ONESHOT_SAFE,
+	},
+
+	.ops = {
+		.set_desc		= pci_device_domain_set_desc,
+	},
+
+	.info = {
+		.flags			= MSI_COMMON_FLAGS | MSI_FLAG_MULTI_PCI_MSI,
+		.bus_token		= DOMAIN_BUS_PCI_DEVICE_MSI,
+	},
+};
+
+static void pci_irq_mask_msix(struct irq_data *data)
+{
+	pci_msix_mask(irq_data_get_msi_desc(data));
+}
+
+static void pci_irq_unmask_msix(struct irq_data *data)
+{
+	pci_msix_unmask(irq_data_get_msi_desc(data));
+}
+
+static void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg,
+				  struct msi_desc *desc)
+{
+	/* Don't fiddle with preallocated MSI descriptors */
+	if (!desc->pci.mask_base)
+		msix_prepare_msi_desc(to_pci_dev(desc->dev), desc);
+}
+
+static const struct msi_domain_template pci_msix_template = {
+	.chip = {
+		.name			= "PCI-MSIX",
+		.irq_mask		= pci_irq_mask_msix,
+		.irq_unmask		= pci_irq_unmask_msix,
+		.irq_write_msi_msg	= pci_msi_domain_write_msg,
+		.flags			= IRQCHIP_ONESHOT_SAFE,
+	},
+
+	.ops = {
+		.prepare_desc		= pci_msix_prepare_desc,
+		.set_desc		= pci_device_domain_set_desc,
+	},
+
+	.info = {
+		.flags			= MSI_COMMON_FLAGS | MSI_FLAG_PCI_MSIX |
+					  MSI_FLAG_PCI_MSIX_ALLOC_DYN,
+		.bus_token		= DOMAIN_BUS_PCI_DEVICE_MSIX,
+	},
+};
+
+static bool pci_match_device_domain(struct pci_dev *pdev, enum irq_domain_bus_token bus_token)
+{
+	return msi_match_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN, bus_token);
+}
+
+static bool pci_create_device_domain(struct pci_dev *pdev, const struct msi_domain_template *tmpl,
+				     unsigned int hwsize)
+{
+	struct irq_domain *domain = dev_get_msi_domain(&pdev->dev);
+
+	if (!domain || !irq_domain_is_msi_parent(domain))
+		return true;
+
+	return msi_create_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN, tmpl,
+					    hwsize, NULL, NULL);
+}
+
+/**
+ * pci_setup_msi_device_domain - Setup a device MSI interrupt domain
+ * @pdev:	The PCI device to create the domain on
+ *
+ * Return:
+ *  True when:
+ *	- The device does not have a MSI parent irq domain associated,
+ *	  which keeps the legacy architecture specific and the global
+ *	  PCI/MSI domain models working
+ *	- The MSI domain exists already
+ *	- The MSI domain was successfully allocated
+ *  False when:
+ *	- MSI-X is enabled
+ *	- The domain creation fails.
+ *
+ * The created MSI domain is preserved until:
+ *	- The device is removed
+ *	- MSI is disabled and a MSI-X domain is created
+ */
+bool pci_setup_msi_device_domain(struct pci_dev *pdev)
+{
+	if (WARN_ON_ONCE(pdev->msix_enabled))
+		return false;
+
+	if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSI))
+		return true;
+	if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX))
+		msi_remove_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN);
+
+	return pci_create_device_domain(pdev, &pci_msi_template, 1);
+}
+
+/**
+ * pci_setup_msix_device_domain - Setup a device MSI-X interrupt domain
+ * @pdev:	The PCI device to create the domain on
+ * @hwsize:	The size of the MSI-X vector table
+ *
+ * Return:
+ *  True when:
+ *	- The device does not have a MSI parent irq domain associated,
+ *	  which keeps the legacy architecture specific and the global
+ *	  PCI/MSI domain models working
+ *	- The MSI-X domain exists already
+ *	- The MSI-X domain was successfully allocated
+ *  False when:
+ *	- MSI is enabled
+ *	- The domain creation fails.
+ *
+ * The created MSI-X domain is preserved until:
+ *	- The device is removed
+ *	- MSI-X is disabled and a MSI domain is created
+ */
+bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize)
+{
+	if (WARN_ON_ONCE(pdev->msi_enabled))
+		return false;
+
+	if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX))
+		return true;
+	if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSI))
+		msi_remove_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN);
+
+	return pci_create_device_domain(pdev, &pci_msix_template, hwsize);
+}
+
+/**
+ * pci_msi_domain_supports - Check for support of a particular feature flag
+ * @pdev:		The PCI device to operate on
+ * @feature_mask:	The feature mask to check for (full match)
+ * @mode:		If ALLOW_LEGACY this grants the feature when there is no irq domain
+ *			associated to the device. If DENY_LEGACY the lack of an irq domain
+ *			makes the feature unsupported
+ */
+bool pci_msi_domain_supports(struct pci_dev *pdev, unsigned int feature_mask,
+			     enum support_mode mode)
+{
+	struct msi_domain_info *info;
+	struct irq_domain *domain;
+	unsigned int supported;
+
+	domain = dev_get_msi_domain(&pdev->dev);
+
+	if (!domain || !irq_domain_is_hierarchy(domain))
+		return mode == ALLOW_LEGACY;
+
+	if (!irq_domain_is_msi_parent(domain)) {
+		/*
+		 * For "global" PCI/MSI interrupt domains the associated
+		 * msi_domain_info::flags is the authoritative source of
+		 * information.
+		 */
+		info = domain->host_data;
+		supported = info->flags;
+	} else {
+		/*
+		 * For MSI parent domains the supported feature set
+		 * is available in the parent ops. This makes checks
+		 * possible before actually instantiating the
+		 * per device domain because the parent is never
+		 * expanding the PCI/MSI functionality.
+		 */
+		supported = domain->msi_parent_ops->supported_flags;
+	}
+
+	return (supported & feature_mask) == feature_mask;
+}
+
+/**
+ * pci_create_ims_domain - Create a secondary IMS domain for a PCI device
+ * @pdev:	The PCI device to operate on
+ * @template:	The MSI info template which describes the domain
+ * @hwsize:	The size of the hardware entry table or 0 if the domain
+ *		is purely software managed
+ * @data:	Optional pointer to domain specific data to be stored
+ *		in msi_domain_info::data
+ *
+ * Return: True on success, false otherwise
+ *
+ * An IMS domain is expected to have the following constraints:
+ *	- The index space is managed by the core code
+ *
+ *	- There is no requirement for consecutive index ranges
+ *
+ *	- The interrupt chip must provide the following callbacks:
+ *		- irq_mask()
+ *		- irq_unmask()
+ *		- irq_write_msi_msg()
+ *
+ *	- The interrupt chip must provide the following optional callbacks
+ *	  when the irq_mask(), irq_unmask() and irq_write_msi_msg() callbacks
+ *	  cannot operate directly on hardware, e.g. in the case that the
+ *	  interrupt message store is in queue memory:
+ *		- irq_bus_lock()
+ *		- irq_bus_unlock()
+ *
+ *	  These callbacks are invoked from preemptible task context and are
+ *	  allowed to sleep. In this case the mandatory callbacks above just
+ *	  store the information. The irq_bus_unlock() callback is supposed
+ *	  to make the change effective before returning.
+ *
+ *	- Interrupt affinity setting is handled by the underlying parent
+ *	  interrupt domain and communicated to the IMS domain via
+ *	  irq_write_msi_msg().
+ *
+ * The domain is automatically destroyed when the PCI device is removed.
+ */
+bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template,
+			   unsigned int hwsize, void *data)
+{
+	struct irq_domain *domain = dev_get_msi_domain(&pdev->dev);
+
+	if (!domain || !irq_domain_is_msi_parent(domain))
+		return false;
+
+	if (template->info.bus_token != DOMAIN_BUS_PCI_DEVICE_IMS ||
+	    !(template->info.flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS) ||
+	    !(template->info.flags & MSI_FLAG_FREE_MSI_DESCS) ||
+	    !template->chip.irq_mask || !template->chip.irq_unmask ||
+	    !template->chip.irq_write_msi_msg || template->chip.irq_set_affinity)
+		return false;
+
+	return msi_create_device_irq_domain(&pdev->dev, MSI_SECONDARY_DOMAIN, template,
+					    hwsize, data, NULL);
+}
+EXPORT_SYMBOL_GPL(pci_create_ims_domain);
+
+/*
+ * Users of the generic MSI infrastructure expect a device to have a single ID,
+ * so with DMA aliases we have to pick the least-worst compromise. Devices with
+ * DMA phantom functions tend to still emit MSIs from the real function number,
+ * so we ignore those and only consider topological aliases where either the
+ * alias device or RID appears on a different bus number. We also make the
+ * reasonable assumption that bridges are walked in an upstream direction (so
+ * the last one seen wins), and the much braver assumption that the most likely
+ * case is that of PCI->PCIe so we should always use the alias RID. This echoes
+ * the logic from intel_irq_remapping's set_msi_sid(), which presumably works
+ * well enough in practice; in the face of the horrible PCIe<->PCI-X conditions
+ * for taking ownership all we can really do is close our eyes and hope...
+ */
+static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data)
+{
+	u32 *pa = data;
+	u8 bus = PCI_BUS_NUM(*pa);
+
+	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus)
+		*pa = alias;
+
+	return 0;
+}
+
+/**
+ * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID)
+ * @domain:	The interrupt domain
+ * @pdev:	The PCI device.
+ *
+ * The RID for a device is formed from the alias, with a firmware
+ * supplied mapping applied
+ *
+ * Returns: The RID.
+ */
+u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
+{
+	struct device_node *of_node;
+	u32 rid = pci_dev_id(pdev);
+
+	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
+
+	of_node = irq_domain_get_of_node(domain);
+	rid = of_node ? of_msi_map_id(&pdev->dev, of_node, rid) :
+			iort_msi_map_id(&pdev->dev, rid);
+
+	return rid;
+}
+
+/**
+ * pci_msi_get_device_domain - Get the MSI domain for a given PCI device
+ * @pdev:	The PCI device
+ *
+ * Use the firmware data to find a device-specific MSI domain
+ * (i.e. not one that is set as a default).
+ *
+ * Returns: The corresponding MSI domain or NULL if none has been found.
+ */
+struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
+{
+	struct irq_domain *dom;
+	u32 rid = pci_dev_id(pdev);
+
+	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
+	dom = of_msi_map_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI);
+	if (!dom)
+		dom = iort_get_device_domain(&pdev->dev, rid,
+					     DOMAIN_BUS_PCI_MSI);
+	return dom;
+}
diff --git a/drivers/pci/msi/legacy.c b/drivers/pci/msi/legacy.c
new file mode 100644
index 000000000..db761adef
--- /dev/null
+++ b/drivers/pci/msi/legacy.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Message Signaled Interrupt (MSI).
+ *
+ * Legacy architecture specific setup and teardown mechanism.
+ */
+#include "msi.h"
+
+/* Arch hooks */
+int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+	return -EINVAL;
+}
+
+void __weak arch_teardown_msi_irq(unsigned int irq)
+{
+}
+
+int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *desc;
+	int ret;
+
+	/*
+	 * If an architecture wants to support multiple MSI, it needs to
+	 * override arch_setup_msi_irqs()
+	 */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	msi_for_each_desc(desc, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+		ret = arch_setup_msi_irq(dev, desc);
+		if (ret)
+			return ret < 0 ? ret : -ENOSPC;
+	}
+
+	return 0;
+}
+
+void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *desc;
+	int i;
+
+	msi_for_each_desc(desc, &dev->dev, MSI_DESC_ASSOCIATED) {
+		for (i = 0; i < desc->nvec_used; i++)
+			arch_teardown_msi_irq(desc->irq + i);
+	}
+}
+
+static int pci_msi_setup_check_result(struct pci_dev *dev, int type, int ret)
+{
+	struct msi_desc *desc;
+	int avail = 0;
+
+	if (type != PCI_CAP_ID_MSIX || ret >= 0)
+		return ret;
+
+	/* Scan the MSI descriptors for successfully allocated ones. */
+	msi_for_each_desc(desc, &dev->dev, MSI_DESC_ASSOCIATED)
+		avail++;
+
+	return avail ? avail : ret;
+}
+
+int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int ret = arch_setup_msi_irqs(dev, nvec, type);
+
+	ret = pci_msi_setup_check_result(dev, type, ret);
+	if (!ret)
+		ret = msi_device_populate_sysfs(&dev->dev);
+	return ret;
+}
+
+void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
+{
+	msi_device_destroy_sysfs(&dev->dev);
+	arch_teardown_msi_irqs(dev);
+}
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
new file mode 100644
index 000000000..ef1d8857a
--- /dev/null
+++ b/drivers/pci/msi/msi.c
@@ -0,0 +1,915 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Message Signaled Interrupt (MSI)
+ *
+ * Copyright (C) 2003-2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+ * Copyright (C) 2016 Christoph Hellwig.
+ */
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+
+#include "../pci.h"
+#include "msi.h"
+
+int pci_msi_enable = 1;
+int pci_msi_ignore_mask;
+
+/**
+ * pci_msi_supported - check whether MSI may be enabled on a device
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: how many MSIs have been requested?
+ *
+ * Look at global flags, the device itself, and its parent buses
+ * to determine if MSI/-X are supported for the device. If MSI/-X is
+ * supported return 1, else return 0.
+ **/
+static int pci_msi_supported(struct pci_dev *dev, int nvec)
+{
+	struct pci_bus *bus;
+
+	/* MSI must be globally enabled and supported by the device */
+	if (!pci_msi_enable)
+		return 0;
+
+	if (!dev || dev->no_msi)
+		return 0;
+
+	/*
+	 * You can't ask to have 0 or less MSIs configured.
+	 *  a) it's stupid ..
+	 *  b) the list manipulation code assumes nvec >= 1.
+	 */
+	if (nvec < 1)
+		return 0;
+
+	/*
+	 * Any bridge which does NOT route MSI transactions from its
+	 * secondary bus to its primary bus must set NO_MSI flag on
+	 * the secondary pci_bus.
+	 *
+	 * The NO_MSI flag can either be set directly by:
+	 * - arch-specific PCI host bus controller drivers (deprecated)
+	 * - quirks for specific PCI bridges
+	 *
+	 * or indirectly by platform-specific PCI host bridge drivers by
+	 * advertising the 'msi_domain' property, which results in
+	 * the NO_MSI flag when no MSI domain is found for this bridge
+	 * at probe time.
+	 */
+	for (bus = dev->bus; bus; bus = bus->parent)
+		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+			return 0;
+
+	return 1;
+}
+
+static void pcim_msi_release(void *pcidev)
+{
+	struct pci_dev *dev = pcidev;
+
+	dev->is_msi_managed = false;
+	pci_free_irq_vectors(dev);
+}
+
+/*
+ * Needs to be separate from pcim_release to prevent an ordering problem
+ * vs. msi_device_data_release() in the MSI core code.
+ */
+static int pcim_setup_msi_release(struct pci_dev *dev)
+{
+	int ret;
+
+	if (!pci_is_managed(dev) || dev->is_msi_managed)
+		return 0;
+
+	ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
+	if (!ret)
+		dev->is_msi_managed = true;
+	return ret;
+}
+
+/*
+ * Ordering vs. devres: msi device data has to be installed first so that
+ * pcim_msi_release() is invoked before it on device release.
+ */
+static int pci_setup_msi_context(struct pci_dev *dev)
+{
+	int ret = msi_setup_device_data(&dev->dev);
+
+	if (!ret)
+		ret = pcim_setup_msi_release(dev);
+	return ret;
+}
+
+/*
+ * Helper functions for mask/unmask and MSI message handling
+ */
+
+void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
+{
+	raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
+	unsigned long flags;
+
+	if (!desc->pci.msi_attrib.can_mask)
+		return;
+
+	raw_spin_lock_irqsave(lock, flags);
+	desc->pci.msi_mask &= ~clear;
+	desc->pci.msi_mask |= set;
+	pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos,
+			       desc->pci.msi_mask);
+	raw_spin_unlock_irqrestore(lock, flags);
+}
+
+/**
+ * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_mask_irq(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+	__pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
+}
+EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
+
+/**
+ * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_unmask_irq(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+	__pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
+}
+EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
+
+void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
+	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
+
+	BUG_ON(dev->current_state != PCI_D0);
+
+	if (entry->pci.msi_attrib.is_msix) {
+		void __iomem *base = pci_msix_desc_addr(entry);
+
+		if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual))
+			return;
+
+		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
+		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
+		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
+	} else {
+		int pos = dev->msi_cap;
+		u16 data;
+
+		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
+				      &msg->address_lo);
+		if (entry->pci.msi_attrib.is_64) {
+			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
+					      &msg->address_hi);
+			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
+		} else {
+			msg->address_hi = 0;
+			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
+		}
+		msg->data = data;
+	}
+}
+
+static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc,
+				     struct msi_msg *msg)
+{
+	int pos = dev->msi_cap;
+	u16 msgctl;
+
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+	msgctl &= ~PCI_MSI_FLAGS_QSIZE;
+	msgctl |= desc->pci.msi_attrib.multiple << 4;
+	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
+
+	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo);
+	if (desc->pci.msi_attrib.is_64) {
+		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,  msg->address_hi);
+		pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data);
+	} else {
+		pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data);
+	}
+	/* Ensure that the writes are visible in the device */
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+}
+
+static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg)
+{
+	void __iomem *base = pci_msix_desc_addr(desc);
+	u32 ctrl = desc->pci.msix_ctrl;
+	bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
+	if (desc->pci.msi_attrib.is_virtual)
+		return;
+	/*
+	 * The specification mandates that the entry is masked
+	 * when the message is modified:
+	 *
+	 * "If software changes the Address or Data value of an
+	 * entry while the entry is unmasked, the result is
+	 * undefined."
+	 */
+	if (unmasked)
+		pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
+	writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
+	writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
+	writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
+
+	if (unmasked)
+		pci_msix_write_vector_ctrl(desc, ctrl);
+
+	/* Ensure that the writes are visible in the device */
+	readl(base + PCI_MSIX_ENTRY_DATA);
+}
+
+void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
+	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
+
+	if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
+		/* Don't touch the hardware now */
+	} else if (entry->pci.msi_attrib.is_msix) {
+		pci_write_msg_msix(entry, msg);
+	} else {
+		pci_write_msg_msi(dev, entry, msg);
+	}
+
+	entry->msg = *msg;
+
+	if (entry->write_msi_msg)
+		entry->write_msi_msg(entry, entry->write_msi_msg_data);
+}
+
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+	struct msi_desc *entry = irq_get_msi_desc(irq);
+
+	__pci_write_msi_msg(entry, msg);
+}
+EXPORT_SYMBOL_GPL(pci_write_msi_msg);
+
+
+/* PCI/MSI specific functionality */
+
+static void pci_intx_for_msi(struct pci_dev *dev, int enable)
+{
+	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
+		pci_intx(dev, enable);
+}
+
+static void pci_msi_set_enable(struct pci_dev *dev, int enable)
+{
+	u16 control;
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	control &= ~PCI_MSI_FLAGS_ENABLE;
+	if (enable)
+		control |= PCI_MSI_FLAGS_ENABLE;
+	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
+}
+
+static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
+			      struct irq_affinity_desc *masks)
+{
+	struct msi_desc desc;
+	u16 control;
+
+	/* MSI Entry Initialization */
+	memset(&desc, 0, sizeof(desc));
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	/* Lies, damned lies, and MSIs */
+	if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
+		control |= PCI_MSI_FLAGS_MASKBIT;
+	/* Respect XEN's mask disabling */
+	if (pci_msi_ignore_mask)
+		control &= ~PCI_MSI_FLAGS_MASKBIT;
+
+	desc.nvec_used			= nvec;
+	desc.pci.msi_attrib.is_64	= !!(control & PCI_MSI_FLAGS_64BIT);
+	desc.pci.msi_attrib.can_mask	= !!(control & PCI_MSI_FLAGS_MASKBIT);
+	desc.pci.msi_attrib.default_irq	= dev->irq;
+	desc.pci.msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
+	desc.pci.msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
+	desc.affinity			= masks;
+
+	if (control & PCI_MSI_FLAGS_64BIT)
+		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
+	else
+		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
+
+	/* Save the initial mask status */
+	if (desc.pci.msi_attrib.can_mask)
+		pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask);
+
+	return msi_insert_msi_desc(&dev->dev, &desc);
+}
+
+static int msi_verify_entries(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	if (!dev->no_64bit_msi)
+		return 0;
+
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
+		if (entry->msg.address_hi) {
+			pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
+				entry->msg.address_hi, entry->msg.address_lo);
+			break;
+		}
+	}
+	return !entry ? 0 : -EIO;
+}
+
+/**
+ * msi_capability_init - configure device's MSI capability structure
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
+ * @affd: description of automatic IRQ affinity assignments (may be %NULL)
+ *
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts.  A return value of zero indicates the successful
+ * setup of an entry with the new MSI IRQ.  A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec,
+			       struct irq_affinity *affd)
+{
+	struct irq_affinity_desc *masks = NULL;
+	struct msi_desc *entry;
+	int ret;
+
+	/* Reject multi-MSI early on irq domain enabled architectures */
+	if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY))
+		return 1;
+
+	/*
+	 * Disable MSI during setup in the hardware, but mark it enabled
+	 * so that setup code can evaluate it.
+	 */
+	pci_msi_set_enable(dev, 0);
+	dev->msi_enabled = 1;
+
+	if (affd)
+		masks = irq_create_affinity_masks(nvec, affd);
+
+	msi_lock_descs(&dev->dev);
+	ret = msi_setup_msi_desc(dev, nvec, masks);
+	if (ret)
+		goto fail;
+
+	/* All MSIs are unmasked by default; mask them all */
+	entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
+	pci_msi_mask(entry, msi_multi_mask(entry));
+
+	/* Configure MSI capability structure */
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
+	if (ret)
+		goto err;
+
+	ret = msi_verify_entries(dev);
+	if (ret)
+		goto err;
+
+	/* Set MSI enabled bits	*/
+	pci_intx_for_msi(dev, 0);
+	pci_msi_set_enable(dev, 1);
+
+	pcibios_free_irq(dev);
+	dev->irq = entry->irq;
+	goto unlock;
+
+err:
+	pci_msi_unmask(entry, msi_multi_mask(entry));
+	pci_free_msi_irqs(dev);
+fail:
+	dev->msi_enabled = 0;
+unlock:
+	msi_unlock_descs(&dev->dev);
+	kfree(masks);
+	return ret;
+}
+
+int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
+			   struct irq_affinity *affd)
+{
+	int nvec;
+	int rc;
+
+	if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
+		return -EINVAL;
+
+	/* Check whether driver already requested MSI-X IRQs */
+	if (dev->msix_enabled) {
+		pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
+		return -EINVAL;
+	}
+
+	if (maxvec < minvec)
+		return -ERANGE;
+
+	if (WARN_ON_ONCE(dev->msi_enabled))
+		return -EINVAL;
+
+	nvec = pci_msi_vec_count(dev);
+	if (nvec < 0)
+		return nvec;
+	if (nvec < minvec)
+		return -ENOSPC;
+
+	if (nvec > maxvec)
+		nvec = maxvec;
+
+	rc = pci_setup_msi_context(dev);
+	if (rc)
+		return rc;
+
+	if (!pci_setup_msi_device_domain(dev))
+		return -ENODEV;
+
+	for (;;) {
+		if (affd) {
+			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
+			if (nvec < minvec)
+				return -ENOSPC;
+		}
+
+		rc = msi_capability_init(dev, nvec, affd);
+		if (rc == 0)
+			return nvec;
+
+		if (rc < 0)
+			return rc;
+		if (rc < minvec)
+			return -ENOSPC;
+
+		nvec = rc;
+	}
+}
+
+/**
+ * pci_msi_vec_count - Return the number of MSI vectors a device can send
+ * @dev: device to report about
+ *
+ * This function returns the number of MSI vectors a device requested via
+ * Multiple Message Capable register. It returns a negative errno if the
+ * device is not capable sending MSI interrupts. Otherwise, the call succeeds
+ * and returns a power of two, up to a maximum of 2^5 (32), according to the
+ * MSI specification.
+ **/
+int pci_msi_vec_count(struct pci_dev *dev)
+{
+	int ret;
+	u16 msgctl;
+
+	if (!dev->msi_cap)
+		return -EINVAL;
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
+	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+	return ret;
+}
+EXPORT_SYMBOL(pci_msi_vec_count);
+
+/*
+ * Architecture override returns true when the PCI MSI message should be
+ * written by the generic restore function.
+ */
+bool __weak arch_restore_msi_irqs(struct pci_dev *dev)
+{
+	return true;
+}
+
+void __pci_restore_msi_state(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+	u16 control;
+
+	if (!dev->msi_enabled)
+		return;
+
+	entry = irq_get_msi_desc(dev->irq);
+
+	pci_intx_for_msi(dev, 0);
+	pci_msi_set_enable(dev, 0);
+	if (arch_restore_msi_irqs(dev))
+		__pci_write_msi_msg(entry, &entry->msg);
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	pci_msi_update_mask(entry, 0, 0);
+	control &= ~PCI_MSI_FLAGS_QSIZE;
+	control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
+	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
+}
+
+void pci_msi_shutdown(struct pci_dev *dev)
+{
+	struct msi_desc *desc;
+
+	if (!pci_msi_enable || !dev || !dev->msi_enabled)
+		return;
+
+	pci_msi_set_enable(dev, 0);
+	pci_intx_for_msi(dev, 1);
+	dev->msi_enabled = 0;
+
+	/* Return the device with MSI unmasked as initial states */
+	desc = msi_first_desc(&dev->dev, MSI_DESC_ALL);
+	if (!WARN_ON_ONCE(!desc))
+		pci_msi_unmask(desc, msi_multi_mask(desc));
+
+	/* Restore dev->irq to its default pin-assertion IRQ */
+	dev->irq = desc->pci.msi_attrib.default_irq;
+	pcibios_alloc_irq(dev);
+}
+
+/* PCI/MSI-X specific functionality */
+
+static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
+{
+	u16 ctrl;
+
+	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+	ctrl &= ~clear;
+	ctrl |= set;
+	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+}
+
+static void __iomem *msix_map_region(struct pci_dev *dev,
+				     unsigned int nr_entries)
+{
+	resource_size_t phys_addr;
+	u32 table_offset;
+	unsigned long flags;
+	u8 bir;
+
+	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
+			      &table_offset);
+	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
+	flags = pci_resource_flags(dev, bir);
+	if (!flags || (flags & IORESOURCE_UNSET))
+		return NULL;
+
+	table_offset &= PCI_MSIX_TABLE_OFFSET;
+	phys_addr = pci_resource_start(dev, bir) + table_offset;
+
+	return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
+}
+
+/**
+ * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation
+ * @dev:	The PCI device for which the descriptor is prepared
+ * @desc:	The MSI descriptor for preparation
+ *
+ * This is separate from msix_setup_msi_descs() below to handle dynamic
+ * allocations for MSI-X after initial enablement.
+ *
+ * Ideally the whole MSI-X setup would work that way, but there is no way to
+ * support this for the legacy arch_setup_msi_irqs() mechanism and for the
+ * fake irq domains like the x86 XEN one. Sigh...
+ *
+ * The descriptor is zeroed and only @desc::msi_index and @desc::affinity
+ * are set. When called from msix_setup_msi_descs() then the is_virtual
+ * attribute is initialized as well.
+ *
+ * Fill in the rest.
+ */
+void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc)
+{
+	desc->nvec_used				= 1;
+	desc->pci.msi_attrib.is_msix		= 1;
+	desc->pci.msi_attrib.is_64		= 1;
+	desc->pci.msi_attrib.default_irq	= dev->irq;
+	desc->pci.mask_base			= dev->msix_base;
+	desc->pci.msi_attrib.can_mask		= !pci_msi_ignore_mask &&
+						  !desc->pci.msi_attrib.is_virtual;
+
+	if (desc->pci.msi_attrib.can_mask) {
+		void __iomem *addr = pci_msix_desc_addr(desc);
+
+		desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+	}
+}
+
+static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries,
+				int nvec, struct irq_affinity_desc *masks)
+{
+	int ret = 0, i, vec_count = pci_msix_vec_count(dev);
+	struct irq_affinity_desc *curmsk;
+	struct msi_desc desc;
+
+	memset(&desc, 0, sizeof(desc));
+
+	for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) {
+		desc.msi_index = entries ? entries[i].entry : i;
+		desc.affinity = masks ? curmsk : NULL;
+		desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count;
+
+		msix_prepare_msi_desc(dev, &desc);
+
+		ret = msi_insert_msi_desc(&dev->dev, &desc);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
+{
+	struct msi_desc *desc;
+
+	if (entries) {
+		msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) {
+			entries->vector = desc->irq;
+			entries++;
+		}
+	}
+}
+
+static void msix_mask_all(void __iomem *base, int tsize)
+{
+	u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
+	int i;
+
+	if (pci_msi_ignore_mask)
+		return;
+
+	for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
+		writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
+}
+
+static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries,
+				 int nvec, struct irq_affinity *affd)
+{
+	struct irq_affinity_desc *masks = NULL;
+	int ret;
+
+	if (affd)
+		masks = irq_create_affinity_masks(nvec, affd);
+
+	msi_lock_descs(&dev->dev);
+	ret = msix_setup_msi_descs(dev, entries, nvec, masks);
+	if (ret)
+		goto out_free;
+
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
+	if (ret)
+		goto out_free;
+
+	/* Check if all MSI entries honor device restrictions */
+	ret = msi_verify_entries(dev);
+	if (ret)
+		goto out_free;
+
+	msix_update_entries(dev, entries);
+	goto out_unlock;
+
+out_free:
+	pci_free_msi_irqs(dev);
+out_unlock:
+	msi_unlock_descs(&dev->dev);
+	kfree(masks);
+	return ret;
+}
+
+/**
+ * msix_capability_init - configure device's MSI-X capability
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ * @entries: pointer to an array of struct msix_entry entries
+ * @nvec: number of @entries
+ * @affd: Optional pointer to enable automatic affinity assignment
+ *
+ * Setup the MSI-X capability structure of device function with a
+ * single MSI-X IRQ. A return of zero indicates the successful setup of
+ * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
+ **/
+static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+				int nvec, struct irq_affinity *affd)
+{
+	int ret, tsize;
+	u16 control;
+
+	/*
+	 * Some devices require MSI-X to be enabled before the MSI-X
+	 * registers can be accessed.  Mask all the vectors to prevent
+	 * interrupts coming in before they're fully set up.
+	 */
+	pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
+				    PCI_MSIX_FLAGS_ENABLE);
+
+	/* Mark it enabled so setup functions can query it */
+	dev->msix_enabled = 1;
+
+	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
+	/* Request & Map MSI-X table region */
+	tsize = msix_table_size(control);
+	dev->msix_base = msix_map_region(dev, tsize);
+	if (!dev->msix_base) {
+		ret = -ENOMEM;
+		goto out_disable;
+	}
+
+	ret = msix_setup_interrupts(dev, entries, nvec, affd);
+	if (ret)
+		goto out_disable;
+
+	/* Disable INTX */
+	pci_intx_for_msi(dev, 0);
+
+	/*
+	 * Ensure that all table entries are masked to prevent
+	 * stale entries from firing in a crash kernel.
+	 *
+	 * Done late to deal with a broken Marvell NVME device
+	 * which takes the MSI-X mask bits into account even
+	 * when MSI-X is disabled, which prevents MSI delivery.
+	 */
+	msix_mask_all(dev->msix_base, tsize);
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
+
+	pcibios_free_irq(dev);
+	return 0;
+
+out_disable:
+	dev->msix_enabled = 0;
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
+
+	return ret;
+}
+
+static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+{
+	bool nogap;
+	int i, j;
+
+	if (!entries)
+		return true;
+
+	nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY);
+
+	for (i = 0; i < nvec; i++) {
+		/* Check for duplicate entries */
+		for (j = i + 1; j < nvec; j++) {
+			if (entries[i].entry == entries[j].entry)
+				return false;
+		}
+		/* Check for unsupported gaps */
+		if (nogap && entries[i].entry != i)
+			return false;
+	}
+	return true;
+}
+
+int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec,
+			    int maxvec, struct irq_affinity *affd, int flags)
+{
+	int hwsize, rc, nvec = maxvec;
+
+	if (maxvec < minvec)
+		return -ERANGE;
+
+	if (dev->msi_enabled) {
+		pci_info(dev, "can't enable MSI-X (MSI already enabled)\n");
+		return -EINVAL;
+	}
+
+	if (WARN_ON_ONCE(dev->msix_enabled))
+		return -EINVAL;
+
+	/* Check MSI-X early on irq domain enabled architectures */
+	if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY))
+		return -ENOTSUPP;
+
+	if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
+		return -EINVAL;
+
+	hwsize = pci_msix_vec_count(dev);
+	if (hwsize < 0)
+		return hwsize;
+
+	if (!pci_msix_validate_entries(dev, entries, nvec))
+		return -EINVAL;
+
+	if (hwsize < nvec) {
+		/* Keep the IRQ virtual hackery working */
+		if (flags & PCI_IRQ_VIRTUAL)
+			hwsize = nvec;
+		else
+			nvec = hwsize;
+	}
+
+	if (nvec < minvec)
+		return -ENOSPC;
+
+	rc = pci_setup_msi_context(dev);
+	if (rc)
+		return rc;
+
+	if (!pci_setup_msix_device_domain(dev, hwsize))
+		return -ENODEV;
+
+	for (;;) {
+		if (affd) {
+			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
+			if (nvec < minvec)
+				return -ENOSPC;
+		}
+
+		rc = msix_capability_init(dev, entries, nvec, affd);
+		if (rc == 0)
+			return nvec;
+
+		if (rc < 0)
+			return rc;
+		if (rc < minvec)
+			return -ENOSPC;
+
+		nvec = rc;
+	}
+}
+
+void __pci_restore_msix_state(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+	bool write_msg;
+
+	if (!dev->msix_enabled)
+		return;
+
+	/* route the table */
+	pci_intx_for_msi(dev, 0);
+	pci_msix_clear_and_set_ctrl(dev, 0,
+				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
+
+	write_msg = arch_restore_msi_irqs(dev);
+
+	msi_lock_descs(&dev->dev);
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
+		if (write_msg)
+			__pci_write_msi_msg(entry, &entry->msg);
+		pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
+	}
+	msi_unlock_descs(&dev->dev);
+
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
+}
+
+void pci_msix_shutdown(struct pci_dev *dev)
+{
+	struct msi_desc *desc;
+
+	if (!pci_msi_enable || !dev || !dev->msix_enabled)
+		return;
+
+	if (pci_dev_is_disconnected(dev)) {
+		dev->msix_enabled = 0;
+		return;
+	}
+
+	/* Return the device with MSI-X masked as initial states */
+	msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL)
+		pci_msix_mask(desc);
+
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+	pci_intx_for_msi(dev, 1);
+	dev->msix_enabled = 0;
+	pcibios_alloc_irq(dev);
+}
+
+/* Common interfaces */
+
+void pci_free_msi_irqs(struct pci_dev *dev)
+{
+	pci_msi_teardown_msi_irqs(dev);
+
+	if (dev->msix_base) {
+		iounmap(dev->msix_base);
+		dev->msix_base = NULL;
+	}
+}
+
+/* Misc. infrastructure */
+
+struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
+{
+	return to_pci_dev(desc->dev);
+}
+EXPORT_SYMBOL(msi_desc_to_pci_dev);
+
+void pci_no_msi(void)
+{
+	pci_msi_enable = 0;
+}
diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h
new file mode 100644
index 000000000..ee53cf079
--- /dev/null
+++ b/drivers/pci/msi/msi.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/pci.h>
+#include <linux/msi.h>
+
+#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
+
+int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void pci_msi_teardown_msi_irqs(struct pci_dev *dev);
+
+/* Mask/unmask helpers */
+void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set);
+
+static inline void pci_msi_mask(struct msi_desc *desc, u32 mask)
+{
+	pci_msi_update_mask(desc, 0, mask);
+}
+
+static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask)
+{
+	pci_msi_update_mask(desc, mask, 0);
+}
+
+static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
+{
+	return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE;
+}
+
+/*
+ * This internal function does not flush PCI writes to the device.  All
+ * users must ensure that they read from the device before either assuming
+ * that the device state is up to date, or returning out of this file.
+ * It does not affect the msi_desc::msix_ctrl cache either. Use with care!
+ */
+static inline void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
+{
+	void __iomem *desc_addr = pci_msix_desc_addr(desc);
+
+	if (desc->pci.msi_attrib.can_mask)
+		writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+}
+
+static inline void pci_msix_mask(struct msi_desc *desc)
+{
+	desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+	pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
+	/* Flush write to device */
+	readl(desc->pci.mask_base);
+}
+
+static inline void pci_msix_unmask(struct msi_desc *desc)
+{
+	desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+	pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
+}
+
+static inline void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
+{
+	if (desc->pci.msi_attrib.is_msix)
+		pci_msix_mask(desc);
+	else
+		pci_msi_mask(desc, mask);
+}
+
+static inline void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
+{
+	if (desc->pci.msi_attrib.is_msix)
+		pci_msix_unmask(desc);
+	else
+		pci_msi_unmask(desc, mask);
+}
+
+/*
+ * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
+ * mask all MSI interrupts by clearing the MSI enable bit does not work
+ * reliably as devices without an INTx disable bit will then generate a
+ * level IRQ which will never be cleared.
+ */
+static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc)
+{
+	/* Don't shift by >= width of type */
+	if (desc->pci.msi_attrib.multi_cap >= 5)
+		return 0xffffffff;
+	return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1;
+}
+
+void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc);
+
+/* Subsystem variables */
+extern int pci_msi_enable;
+
+/* MSI internal functions invoked from the public APIs */
+void pci_msi_shutdown(struct pci_dev *dev);
+void pci_msix_shutdown(struct pci_dev *dev);
+void pci_free_msi_irqs(struct pci_dev *dev);
+int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd);
+int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec,
+			    int maxvec,  struct irq_affinity *affd, int flags);
+void __pci_restore_msi_state(struct pci_dev *dev);
+void __pci_restore_msix_state(struct pci_dev *dev);
+
+/* irq_domain related functionality */
+
+enum support_mode {
+	ALLOW_LEGACY,
+	DENY_LEGACY,
+};
+
+bool pci_msi_domain_supports(struct pci_dev *dev, unsigned int feature_mask, enum support_mode mode);
+bool pci_setup_msi_device_domain(struct pci_dev *pdev);
+bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize);
+
+/* Legacy (!IRQDOMAIN) fallbacks */
+
+#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
+int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev);
+#else
+static inline int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	WARN_ON_ONCE(1);
+	return -ENODEV;
+}
+
+static inline void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
+{
+	WARN_ON_ONCE(1);
+}
+#endif
diff --git a/drivers/pci/msi/pcidev_msi.c b/drivers/pci/msi/pcidev_msi.c
new file mode 100644
index 000000000..5520aff53
--- /dev/null
+++ b/drivers/pci/msi/pcidev_msi.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MSI[X} related functions which are available unconditionally.
+ */
+#include "../pci.h"
+
+/*
+ * Disable the MSI[X] hardware to avoid screaming interrupts during boot.
+ * This is the power on reset default so usually this should be a noop.
+ */
+
+void pci_msi_init(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (!dev->msi_cap)
+		return;
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
+	if (ctrl & PCI_MSI_FLAGS_ENABLE) {
+		pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS,
+				      ctrl & ~PCI_MSI_FLAGS_ENABLE);
+	}
+
+	if (!(ctrl & PCI_MSI_FLAGS_64BIT))
+		dev->no_64bit_msi = 1;
+}
+
+void pci_msix_init(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (!dev->msix_cap)
+		return;
+
+	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+	if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
+		pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS,
+				      ctrl & ~PCI_MSIX_FLAGS_ENABLE);
+	}
+}
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
new file mode 100644
index 000000000..51e3dd0ea
--- /dev/null
+++ b/drivers/pci/of.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI <-> OF mapping helpers
+ *
+ * Copyright 2011 IBM Corp.
+ */
+#define pr_fmt(fmt)	"PCI: OF: " fmt
+
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include "pci.h"
+
+#ifdef CONFIG_PCI
+/**
+ * pci_set_of_node - Find and set device's DT device_node
+ * @dev: the PCI device structure to fill
+ *
+ * Returns 0 on success with of_node set or when no device is described in the
+ * DT. Returns -ENODEV if the device is present, but disabled in the DT.
+ */
+int pci_set_of_node(struct pci_dev *dev)
+{
+	struct device_node *node;
+
+	if (!dev->bus->dev.of_node)
+		return 0;
+
+	node = of_pci_find_child_device(dev->bus->dev.of_node, dev->devfn);
+	if (!node)
+		return 0;
+
+	device_set_node(&dev->dev, of_fwnode_handle(node));
+	return 0;
+}
+
+void pci_release_of_node(struct pci_dev *dev)
+{
+	of_node_put(dev->dev.of_node);
+	device_set_node(&dev->dev, NULL);
+}
+
+void pci_set_bus_of_node(struct pci_bus *bus)
+{
+	struct device_node *node;
+
+	if (bus->self == NULL) {
+		node = pcibios_get_phb_of_node(bus);
+	} else {
+		node = of_node_get(bus->self->dev.of_node);
+		if (node && of_property_read_bool(node, "external-facing"))
+			bus->self->external_facing = true;
+	}
+
+	device_set_node(&bus->dev, of_fwnode_handle(node));
+}
+
+void pci_release_bus_of_node(struct pci_bus *bus)
+{
+	of_node_put(bus->dev.of_node);
+	device_set_node(&bus->dev, NULL);
+}
+
+struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+	/* This should only be called for PHBs */
+	if (WARN_ON(bus->self || bus->parent))
+		return NULL;
+
+	/*
+	 * Look for a node pointer in either the intermediary device we
+	 * create above the root bus or its own parent. Normally only
+	 * the later is populated.
+	 */
+	if (bus->bridge->of_node)
+		return of_node_get(bus->bridge->of_node);
+	if (bus->bridge->parent && bus->bridge->parent->of_node)
+		return of_node_get(bus->bridge->parent->of_node);
+	return NULL;
+}
+
+struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_IRQ_DOMAIN
+	struct irq_domain *d;
+
+	if (!bus->dev.of_node)
+		return NULL;
+
+	/* Start looking for a phandle to an MSI controller. */
+	d = of_msi_get_domain(&bus->dev, bus->dev.of_node, DOMAIN_BUS_PCI_MSI);
+	if (d)
+		return d;
+
+	/*
+	 * If we don't have an msi-parent property, look for a domain
+	 * directly attached to the host bridge.
+	 */
+	d = irq_find_matching_host(bus->dev.of_node, DOMAIN_BUS_PCI_MSI);
+	if (d)
+		return d;
+
+	return irq_find_host(bus->dev.of_node);
+#else
+	return NULL;
+#endif
+}
+
+bool pci_host_of_has_msi_map(struct device *dev)
+{
+	if (dev && dev->of_node)
+		return of_get_property(dev->of_node, "msi-map", NULL);
+	return false;
+}
+
+static inline int __of_pci_pci_compare(struct device_node *node,
+				       unsigned int data)
+{
+	int devfn;
+
+	devfn = of_pci_get_devfn(node);
+	if (devfn < 0)
+		return 0;
+
+	return devfn == data;
+}
+
+struct device_node *of_pci_find_child_device(struct device_node *parent,
+					     unsigned int devfn)
+{
+	struct device_node *node, *node2;
+
+	for_each_child_of_node(parent, node) {
+		if (__of_pci_pci_compare(node, devfn))
+			return node;
+		/*
+		 * Some OFs create a parent node "multifunc-device" as
+		 * a fake root for all functions of a multi-function
+		 * device we go down them as well.
+		 */
+		if (of_node_name_eq(node, "multifunc-device")) {
+			for_each_child_of_node(node, node2) {
+				if (__of_pci_pci_compare(node2, devfn)) {
+					of_node_put(node);
+					return node2;
+				}
+			}
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(of_pci_find_child_device);
+
+/**
+ * of_pci_get_devfn() - Get device and function numbers for a device node
+ * @np: device node
+ *
+ * Parses a standard 5-cell PCI resource and returns an 8-bit value that can
+ * be passed to the PCI_SLOT() and PCI_FUNC() macros to extract the device
+ * and function numbers respectively. On error a negative error code is
+ * returned.
+ */
+int of_pci_get_devfn(struct device_node *np)
+{
+	u32 reg[5];
+	int error;
+
+	error = of_property_read_u32_array(np, "reg", reg, ARRAY_SIZE(reg));
+	if (error)
+		return error;
+
+	return (reg[0] >> 8) & 0xff;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_devfn);
+
+/**
+ * of_pci_parse_bus_range() - parse the bus-range property of a PCI device
+ * @node: device node
+ * @res: address to a struct resource to return the bus-range
+ *
+ * Returns 0 on success or a negative error-code on failure.
+ */
+int of_pci_parse_bus_range(struct device_node *node, struct resource *res)
+{
+	u32 bus_range[2];
+	int error;
+
+	error = of_property_read_u32_array(node, "bus-range", bus_range,
+					   ARRAY_SIZE(bus_range));
+	if (error)
+		return error;
+
+	res->name = node->name;
+	res->start = bus_range[0];
+	res->end = bus_range[1];
+	res->flags = IORESOURCE_BUS;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_pci_parse_bus_range);
+
+/**
+ * of_get_pci_domain_nr - Find the host bridge domain number
+ *			  of the given device node.
+ * @node: Device tree node with the domain information.
+ *
+ * This function will try to obtain the host bridge domain number by finding
+ * a property called "linux,pci-domain" of the given device node.
+ *
+ * Return:
+ * * > 0	- On success, an associated domain number.
+ * * -EINVAL	- The property "linux,pci-domain" does not exist.
+ * * -ENODATA	- The linux,pci-domain" property does not have value.
+ * * -EOVERFLOW	- Invalid "linux,pci-domain" property value.
+ *
+ * Returns the associated domain number from DT in the range [0-0xffff], or
+ * a negative value if the required property is not found.
+ */
+int of_get_pci_domain_nr(struct device_node *node)
+{
+	u32 domain;
+	int error;
+
+	error = of_property_read_u32(node, "linux,pci-domain", &domain);
+	if (error)
+		return error;
+
+	return (u16)domain;
+}
+EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
+
+/**
+ * of_pci_check_probe_only - Setup probe only mode if linux,pci-probe-only
+ *                           is present and valid
+ */
+void of_pci_check_probe_only(void)
+{
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(of_chosen, "linux,pci-probe-only", &val);
+	if (ret) {
+		if (ret == -ENODATA || ret == -EOVERFLOW)
+			pr_warn("linux,pci-probe-only without valid value, ignoring\n");
+		return;
+	}
+
+	if (val)
+		pci_add_flags(PCI_PROBE_ONLY);
+	else
+		pci_clear_flags(PCI_PROBE_ONLY);
+
+	pr_info("PROBE_ONLY %s\n", val ? "enabled" : "disabled");
+}
+EXPORT_SYMBOL_GPL(of_pci_check_probe_only);
+
+/**
+ * devm_of_pci_get_host_bridge_resources() - Resource-managed parsing of PCI
+ *                                           host bridge resources from DT
+ * @dev: host bridge device
+ * @busno: bus number associated with the bridge root bus
+ * @bus_max: maximum number of buses for this bridge
+ * @resources: list where the range of resources will be added after DT parsing
+ * @ib_resources: list where the range of inbound resources (with addresses
+ *                from 'dma-ranges') will be added after DT parsing
+ * @io_base: pointer to a variable that will contain on return the physical
+ * address for the start of the I/O range. Can be NULL if the caller doesn't
+ * expect I/O ranges to be present in the device tree.
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * It returns zero if the range parsing has been successful or a standard error
+ * value if it failed.
+ */
+static int devm_of_pci_get_host_bridge_resources(struct device *dev,
+			unsigned char busno, unsigned char bus_max,
+			struct list_head *resources,
+			struct list_head *ib_resources,
+			resource_size_t *io_base)
+{
+	struct device_node *dev_node = dev->of_node;
+	struct resource *res, tmp_res;
+	struct resource *bus_range;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	const char *range_type;
+	int err;
+
+	if (io_base)
+		*io_base = (resource_size_t)OF_BAD_ADDR;
+
+	bus_range = devm_kzalloc(dev, sizeof(*bus_range), GFP_KERNEL);
+	if (!bus_range)
+		return -ENOMEM;
+
+	dev_info(dev, "host bridge %pOF ranges:\n", dev_node);
+
+	err = of_pci_parse_bus_range(dev_node, bus_range);
+	if (err) {
+		bus_range->start = busno;
+		bus_range->end = bus_max;
+		bus_range->flags = IORESOURCE_BUS;
+		dev_info(dev, "  No bus range found for %pOF, using %pR\n",
+			 dev_node, bus_range);
+	} else {
+		if (bus_range->end > bus_range->start + bus_max)
+			bus_range->end = bus_range->start + bus_max;
+	}
+	pci_add_resource(resources, bus_range);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev_node);
+	if (err)
+		return 0;
+
+	dev_dbg(dev, "Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_IO)
+			range_type = "IO";
+		else if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_MEM)
+			range_type = "MEM";
+		else
+			range_type = "err";
+		dev_info(dev, "  %6s %#012llx..%#012llx -> %#012llx\n",
+			 range_type, range.cpu_addr,
+			 range.cpu_addr + range.size - 1, range.pci_addr);
+
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		err = of_pci_range_to_resource(&range, dev_node, &tmp_res);
+		if (err)
+			continue;
+
+		res = devm_kmemdup(dev, &tmp_res, sizeof(tmp_res), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto failed;
+		}
+
+		if (resource_type(res) == IORESOURCE_IO) {
+			if (!io_base) {
+				dev_err(dev, "I/O range found for %pOF. Please provide an io_base pointer to save CPU base address\n",
+					dev_node);
+				err = -EINVAL;
+				goto failed;
+			}
+			if (*io_base != (resource_size_t)OF_BAD_ADDR)
+				dev_warn(dev, "More than one I/O resource converted for %pOF. CPU base address for old range lost!\n",
+					 dev_node);
+			*io_base = range.cpu_addr;
+		} else if (resource_type(res) == IORESOURCE_MEM) {
+			res->flags &= ~IORESOURCE_MEM_64;
+		}
+
+		pci_add_resource_offset(resources, res,	res->start - range.pci_addr);
+	}
+
+	/* Check for dma-ranges property */
+	if (!ib_resources)
+		return 0;
+	err = of_pci_dma_range_parser_init(&parser, dev_node);
+	if (err)
+		return 0;
+
+	dev_dbg(dev, "Parsing dma-ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (((range.flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM) ||
+		    range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		dev_info(dev, "  %6s %#012llx..%#012llx -> %#012llx\n",
+			 "IB MEM", range.cpu_addr,
+			 range.cpu_addr + range.size - 1, range.pci_addr);
+
+
+		err = of_pci_range_to_resource(&range, dev_node, &tmp_res);
+		if (err)
+			continue;
+
+		res = devm_kmemdup(dev, &tmp_res, sizeof(tmp_res), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto failed;
+		}
+
+		pci_add_resource_offset(ib_resources, res,
+					res->start - range.pci_addr);
+	}
+
+	return 0;
+
+failed:
+	pci_free_resource_list(resources);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_OF_IRQ)
+/**
+ * of_irq_parse_pci - Resolve the interrupt for a PCI device
+ * @pdev:       the device whose interrupt is to be resolved
+ * @out_irq:    structure of_phandle_args filled by this function
+ *
+ * This function resolves the PCI interrupt for a given PCI device. If a
+ * device-node exists for a given pci_dev, it will use normal OF tree
+ * walking. If not, it will implement standard swizzling and walk up the
+ * PCI tree until an device-node is found, at which point it will finish
+ * resolving using the OF tree walking.
+ */
+static int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
+{
+	struct device_node *dn, *ppnode = NULL;
+	struct pci_dev *ppdev;
+	__be32 laddr[3];
+	u8 pin;
+	int rc;
+
+	/*
+	 * Check if we have a device node, if yes, fallback to standard
+	 * device tree parsing
+	 */
+	dn = pci_device_to_OF_node(pdev);
+	if (dn) {
+		rc = of_irq_parse_one(dn, 0, out_irq);
+		if (!rc)
+			return rc;
+	}
+
+	/*
+	 * Ok, we don't, time to have fun. Let's start by building up an
+	 * interrupt spec.  we assume #interrupt-cells is 1, which is standard
+	 * for PCI. If you do different, then don't use that routine.
+	 */
+	rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+	if (rc != 0)
+		goto err;
+	/* No pin, exit with no error message. */
+	if (pin == 0)
+		return -ENODEV;
+
+	/* Local interrupt-map in the device node? Use it! */
+	if (of_property_present(dn, "interrupt-map")) {
+		pin = pci_swizzle_interrupt_pin(pdev, pin);
+		ppnode = dn;
+	}
+
+	/* Now we walk up the PCI tree */
+	while (!ppnode) {
+		/* Get the pci_dev of our parent */
+		ppdev = pdev->bus->self;
+
+		/* Ouch, it's a host bridge... */
+		if (ppdev == NULL) {
+			ppnode = pci_bus_to_OF_node(pdev->bus);
+
+			/* No node for host bridge ? give up */
+			if (ppnode == NULL) {
+				rc = -EINVAL;
+				goto err;
+			}
+		} else {
+			/* We found a P2P bridge, check if it has a node */
+			ppnode = pci_device_to_OF_node(ppdev);
+		}
+
+		/*
+		 * Ok, we have found a parent with a device-node, hand over to
+		 * the OF parsing code.
+		 * We build a unit address from the linux device to be used for
+		 * resolution. Note that we use the linux bus number which may
+		 * not match your firmware bus numbering.
+		 * Fortunately, in most cases, interrupt-map-mask doesn't
+		 * include the bus number as part of the matching.
+		 * You should still be careful about that though if you intend
+		 * to rely on this function (you ship a firmware that doesn't
+		 * create device nodes for all PCI devices).
+		 */
+		if (ppnode)
+			break;
+
+		/*
+		 * We can only get here if we hit a P2P bridge with no node;
+		 * let's do standard swizzling and try again
+		 */
+		pin = pci_swizzle_interrupt_pin(pdev, pin);
+		pdev = ppdev;
+	}
+
+	out_irq->np = ppnode;
+	out_irq->args_count = 1;
+	out_irq->args[0] = pin;
+	laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
+	laddr[1] = laddr[2] = cpu_to_be32(0);
+	rc = of_irq_parse_raw(laddr, out_irq);
+	if (rc)
+		goto err;
+	return 0;
+err:
+	if (rc == -ENOENT) {
+		dev_warn(&pdev->dev,
+			"%s: no interrupt-map found, INTx interrupts not available\n",
+			__func__);
+		pr_warn_once("%s: possibly some PCI slots don't have level triggered interrupts capability\n",
+			__func__);
+	} else {
+		dev_err(&pdev->dev, "%s: failed with rc=%d\n", __func__, rc);
+	}
+	return rc;
+}
+
+/**
+ * of_irq_parse_and_map_pci() - Decode a PCI IRQ from the device tree and map to a VIRQ
+ * @dev: The PCI device needing an IRQ
+ * @slot: PCI slot number; passed when used as map_irq callback. Unused
+ * @pin: PCI IRQ pin number; passed when used as map_irq callback. Unused
+ *
+ * @slot and @pin are unused, but included in the function so that this
+ * function can be used directly as the map_irq callback to
+ * pci_assign_irq() and struct pci_host_bridge.map_irq pointer
+ */
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct of_phandle_args oirq;
+	int ret;
+
+	ret = of_irq_parse_pci(dev, &oirq);
+	if (ret)
+		return 0; /* Proper return code 0 == NO_IRQ */
+
+	return irq_create_of_mapping(&oirq);
+}
+EXPORT_SYMBOL_GPL(of_irq_parse_and_map_pci);
+#endif	/* CONFIG_OF_IRQ */
+
+static int pci_parse_request_of_pci_ranges(struct device *dev,
+					   struct pci_host_bridge *bridge)
+{
+	int err, res_valid = 0;
+	resource_size_t iobase;
+	struct resource_entry *win, *tmp;
+
+	INIT_LIST_HEAD(&bridge->windows);
+	INIT_LIST_HEAD(&bridge->dma_ranges);
+
+	err = devm_of_pci_get_host_bridge_resources(dev, 0, 0xff, &bridge->windows,
+						    &bridge->dma_ranges, &iobase);
+	if (err)
+		return err;
+
+	err = devm_request_pci_bus_resources(dev, &bridge->windows);
+	if (err)
+		return err;
+
+	resource_list_for_each_entry_safe(win, tmp, &bridge->windows) {
+		struct resource *res = win->res;
+
+		switch (resource_type(res)) {
+		case IORESOURCE_IO:
+			err = devm_pci_remap_iospace(dev, res, iobase);
+			if (err) {
+				dev_warn(dev, "error %d: failed to map resource %pR\n",
+					 err, res);
+				resource_list_destroy_entry(win);
+			}
+			break;
+		case IORESOURCE_MEM:
+			res_valid |= !(res->flags & IORESOURCE_PREFETCH);
+
+			if (!(res->flags & IORESOURCE_PREFETCH))
+				if (upper_32_bits(resource_size(res)))
+					dev_warn(dev, "Memory resource size exceeds max for 32 bits\n");
+
+			break;
+		}
+	}
+
+	if (!res_valid)
+		dev_warn(dev, "non-prefetchable memory resource required\n");
+
+	return 0;
+}
+
+int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge)
+{
+	if (!dev->of_node)
+		return 0;
+
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = of_irq_parse_and_map_pci;
+
+	return pci_parse_request_of_pci_ranges(dev, bridge);
+}
+
+#ifdef CONFIG_PCI_DYNAMIC_OF_NODES
+
+void of_pci_remove_node(struct pci_dev *pdev)
+{
+	struct device_node *np;
+
+	np = pci_device_to_OF_node(pdev);
+	if (!np || !of_node_check_flag(np, OF_DYNAMIC))
+		return;
+	pdev->dev.of_node = NULL;
+
+	of_changeset_revert(np->data);
+	of_changeset_destroy(np->data);
+	of_node_put(np);
+}
+
+void of_pci_make_dev_node(struct pci_dev *pdev)
+{
+	struct device_node *ppnode, *np = NULL;
+	const char *pci_type;
+	struct of_changeset *cset;
+	const char *name;
+	int ret;
+
+	/*
+	 * If there is already a device tree node linked to this device,
+	 * return immediately.
+	 */
+	if (pci_device_to_OF_node(pdev))
+		return;
+
+	/* Check if there is device tree node for parent device */
+	if (!pdev->bus->self)
+		ppnode = pdev->bus->dev.of_node;
+	else
+		ppnode = pdev->bus->self->dev.of_node;
+	if (!ppnode)
+		return;
+
+	if (pci_is_bridge(pdev))
+		pci_type = "pci";
+	else
+		pci_type = "dev";
+
+	name = kasprintf(GFP_KERNEL, "%s@%x,%x", pci_type,
+			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+	if (!name)
+		return;
+
+	cset = kmalloc(sizeof(*cset), GFP_KERNEL);
+	if (!cset)
+		goto out_free_name;
+	of_changeset_init(cset);
+
+	np = of_changeset_create_node(cset, ppnode, name);
+	if (!np)
+		goto out_destroy_cset;
+
+	ret = of_pci_add_properties(pdev, cset, np);
+	if (ret)
+		goto out_free_node;
+
+	ret = of_changeset_apply(cset);
+	if (ret)
+		goto out_free_node;
+
+	np->data = cset;
+	pdev->dev.of_node = np;
+	kfree(name);
+
+	return;
+
+out_free_node:
+	of_node_put(np);
+out_destroy_cset:
+	of_changeset_destroy(cset);
+	kfree(cset);
+out_free_name:
+	kfree(name);
+}
+#endif
+
+#endif /* CONFIG_PCI */
+
+/**
+ * of_pci_get_max_link_speed - Find the maximum link speed of the given device node.
+ * @node: Device tree node with the maximum link speed information.
+ *
+ * This function will try to find the limitation of link speed by finding
+ * a property called "max-link-speed" of the given device node.
+ *
+ * Return:
+ * * > 0	- On success, a maximum link speed.
+ * * -EINVAL	- Invalid "max-link-speed" property value, or failure to access
+ *		  the property of the device tree node.
+ *
+ * Returns the associated max link speed from DT, or a negative value if the
+ * required property is not found or is invalid.
+ */
+int of_pci_get_max_link_speed(struct device_node *node)
+{
+	u32 max_link_speed;
+
+	if (of_property_read_u32(node, "max-link-speed", &max_link_speed) ||
+	    max_link_speed == 0 || max_link_speed > 4)
+		return -EINVAL;
+
+	return max_link_speed;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_max_link_speed);
+
+/**
+ * of_pci_get_slot_power_limit - Parses the "slot-power-limit-milliwatt"
+ *				 property.
+ *
+ * @node: device tree node with the slot power limit information
+ * @slot_power_limit_value: pointer where the value should be stored in PCIe
+ *			    Slot Capabilities Register format
+ * @slot_power_limit_scale: pointer where the scale should be stored in PCIe
+ *			    Slot Capabilities Register format
+ *
+ * Returns the slot power limit in milliwatts and if @slot_power_limit_value
+ * and @slot_power_limit_scale pointers are non-NULL, fills in the value and
+ * scale in format used by PCIe Slot Capabilities Register.
+ *
+ * If the property is not found or is invalid, returns 0.
+ */
+u32 of_pci_get_slot_power_limit(struct device_node *node,
+				u8 *slot_power_limit_value,
+				u8 *slot_power_limit_scale)
+{
+	u32 slot_power_limit_mw;
+	u8 value, scale;
+
+	if (of_property_read_u32(node, "slot-power-limit-milliwatt",
+				 &slot_power_limit_mw))
+		slot_power_limit_mw = 0;
+
+	/* Calculate Slot Power Limit Value and Slot Power Limit Scale */
+	if (slot_power_limit_mw == 0) {
+		value = 0x00;
+		scale = 0;
+	} else if (slot_power_limit_mw <= 255) {
+		value = slot_power_limit_mw;
+		scale = 3;
+	} else if (slot_power_limit_mw <= 255*10) {
+		value = slot_power_limit_mw / 10;
+		scale = 2;
+		slot_power_limit_mw = slot_power_limit_mw / 10 * 10;
+	} else if (slot_power_limit_mw <= 255*100) {
+		value = slot_power_limit_mw / 100;
+		scale = 1;
+		slot_power_limit_mw = slot_power_limit_mw / 100 * 100;
+	} else if (slot_power_limit_mw <= 239*1000) {
+		value = slot_power_limit_mw / 1000;
+		scale = 0;
+		slot_power_limit_mw = slot_power_limit_mw / 1000 * 1000;
+	} else if (slot_power_limit_mw < 250*1000) {
+		value = 0xEF;
+		scale = 0;
+		slot_power_limit_mw = 239*1000;
+	} else if (slot_power_limit_mw <= 600*1000) {
+		value = 0xF0 + (slot_power_limit_mw / 1000 - 250) / 25;
+		scale = 0;
+		slot_power_limit_mw = slot_power_limit_mw / (1000*25) * (1000*25);
+	} else {
+		value = 0xFE;
+		scale = 0;
+		slot_power_limit_mw = 600*1000;
+	}
+
+	if (slot_power_limit_value)
+		*slot_power_limit_value = value;
+
+	if (slot_power_limit_scale)
+		*slot_power_limit_scale = scale;
+
+	return slot_power_limit_mw;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_slot_power_limit);
diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c
new file mode 100644
index 000000000..c2c733415
--- /dev/null
+++ b/drivers/pci/of_property.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include "pci.h"
+
+#define OF_PCI_ADDRESS_CELLS		3
+#define OF_PCI_SIZE_CELLS		2
+#define OF_PCI_MAX_INT_PIN		4
+
+struct of_pci_addr_pair {
+	u32		phys_addr[OF_PCI_ADDRESS_CELLS];
+	u32		size[OF_PCI_SIZE_CELLS];
+};
+
+/*
+ * Each entry in the ranges table is a tuple containing the child address,
+ * the parent address, and the size of the region in the child address space.
+ * Thus, for PCI, in each entry parent address is an address on the primary
+ * side and the child address is the corresponding address on the secondary
+ * side.
+ */
+struct of_pci_range {
+	u32		child_addr[OF_PCI_ADDRESS_CELLS];
+	u32		parent_addr[OF_PCI_ADDRESS_CELLS];
+	u32		size[OF_PCI_SIZE_CELLS];
+};
+
+#define OF_PCI_ADDR_SPACE_IO		0x1
+#define OF_PCI_ADDR_SPACE_MEM32		0x2
+#define OF_PCI_ADDR_SPACE_MEM64		0x3
+
+#define OF_PCI_ADDR_FIELD_NONRELOC	BIT(31)
+#define OF_PCI_ADDR_FIELD_SS		GENMASK(25, 24)
+#define OF_PCI_ADDR_FIELD_PREFETCH	BIT(30)
+#define OF_PCI_ADDR_FIELD_BUS		GENMASK(23, 16)
+#define OF_PCI_ADDR_FIELD_DEV		GENMASK(15, 11)
+#define OF_PCI_ADDR_FIELD_FUNC		GENMASK(10, 8)
+#define OF_PCI_ADDR_FIELD_REG		GENMASK(7, 0)
+
+enum of_pci_prop_compatible {
+	PROP_COMPAT_PCI_VVVV_DDDD,
+	PROP_COMPAT_PCICLASS_CCSSPP,
+	PROP_COMPAT_PCICLASS_CCSS,
+	PROP_COMPAT_NUM,
+};
+
+static void of_pci_set_address(struct pci_dev *pdev, u32 *prop, u64 addr,
+			       u32 reg_num, u32 flags, bool reloc)
+{
+	prop[0] = FIELD_PREP(OF_PCI_ADDR_FIELD_BUS, pdev->bus->number) |
+		FIELD_PREP(OF_PCI_ADDR_FIELD_DEV, PCI_SLOT(pdev->devfn)) |
+		FIELD_PREP(OF_PCI_ADDR_FIELD_FUNC, PCI_FUNC(pdev->devfn));
+	prop[0] |= flags | reg_num;
+	if (!reloc) {
+		prop[0] |= OF_PCI_ADDR_FIELD_NONRELOC;
+		prop[1] = upper_32_bits(addr);
+		prop[2] = lower_32_bits(addr);
+	}
+}
+
+static int of_pci_get_addr_flags(struct resource *res, u32 *flags)
+{
+	u32 ss;
+
+	if (res->flags & IORESOURCE_IO)
+		ss = OF_PCI_ADDR_SPACE_IO;
+	else if (res->flags & IORESOURCE_MEM_64)
+		ss = OF_PCI_ADDR_SPACE_MEM64;
+	else if (res->flags & IORESOURCE_MEM)
+		ss = OF_PCI_ADDR_SPACE_MEM32;
+	else
+		return -EINVAL;
+
+	*flags = 0;
+	if (res->flags & IORESOURCE_PREFETCH)
+		*flags |= OF_PCI_ADDR_FIELD_PREFETCH;
+
+	*flags |= FIELD_PREP(OF_PCI_ADDR_FIELD_SS, ss);
+
+	return 0;
+}
+
+static int of_pci_prop_bus_range(struct pci_dev *pdev,
+				 struct of_changeset *ocs,
+				 struct device_node *np)
+{
+	u32 bus_range[] = { pdev->subordinate->busn_res.start,
+			    pdev->subordinate->busn_res.end };
+
+	return of_changeset_add_prop_u32_array(ocs, np, "bus-range", bus_range,
+					       ARRAY_SIZE(bus_range));
+}
+
+static int of_pci_prop_ranges(struct pci_dev *pdev, struct of_changeset *ocs,
+			      struct device_node *np)
+{
+	struct of_pci_range *rp;
+	struct resource *res;
+	int i, j, ret;
+	u32 flags, num;
+	u64 val64;
+
+	if (pci_is_bridge(pdev)) {
+		num = PCI_BRIDGE_RESOURCE_NUM;
+		res = &pdev->resource[PCI_BRIDGE_RESOURCES];
+	} else {
+		num = PCI_STD_NUM_BARS;
+		res = &pdev->resource[PCI_STD_RESOURCES];
+	}
+
+	rp = kcalloc(num, sizeof(*rp), GFP_KERNEL);
+	if (!rp)
+		return -ENOMEM;
+
+	for (i = 0, j = 0; j < num; j++) {
+		if (!resource_size(&res[j]))
+			continue;
+
+		if (of_pci_get_addr_flags(&res[j], &flags))
+			continue;
+
+		val64 = res[j].start;
+		of_pci_set_address(pdev, rp[i].parent_addr, val64, 0, flags,
+				   false);
+		if (pci_is_bridge(pdev)) {
+			memcpy(rp[i].child_addr, rp[i].parent_addr,
+			       sizeof(rp[i].child_addr));
+		} else {
+			/*
+			 * For endpoint device, the lower 64-bits of child
+			 * address is always zero.
+			 */
+			rp[i].child_addr[0] = j;
+		}
+
+		val64 = resource_size(&res[j]);
+		rp[i].size[0] = upper_32_bits(val64);
+		rp[i].size[1] = lower_32_bits(val64);
+
+		i++;
+	}
+
+	ret = of_changeset_add_prop_u32_array(ocs, np, "ranges", (u32 *)rp,
+					      i * sizeof(*rp) / sizeof(u32));
+	kfree(rp);
+
+	return ret;
+}
+
+static int of_pci_prop_reg(struct pci_dev *pdev, struct of_changeset *ocs,
+			   struct device_node *np)
+{
+	struct of_pci_addr_pair reg = { 0 };
+
+	/* configuration space */
+	of_pci_set_address(pdev, reg.phys_addr, 0, 0, 0, true);
+
+	return of_changeset_add_prop_u32_array(ocs, np, "reg", (u32 *)&reg,
+					       sizeof(reg) / sizeof(u32));
+}
+
+static int of_pci_prop_interrupts(struct pci_dev *pdev,
+				  struct of_changeset *ocs,
+				  struct device_node *np)
+{
+	int ret;
+	u8 pin;
+
+	ret = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+	if (ret != 0)
+		return ret;
+
+	if (!pin)
+		return 0;
+
+	return of_changeset_add_prop_u32(ocs, np, "interrupts", (u32)pin);
+}
+
+static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs,
+				struct device_node *np)
+{
+	u32 i, addr_sz[OF_PCI_MAX_INT_PIN] = { 0 }, map_sz = 0;
+	struct of_phandle_args out_irq[OF_PCI_MAX_INT_PIN];
+	__be32 laddr[OF_PCI_ADDRESS_CELLS] = { 0 };
+	u32 int_map_mask[] = { 0xffff00, 0, 0, 7 };
+	struct device_node *pnode;
+	struct pci_dev *child;
+	u32 *int_map, *mapp;
+	int ret;
+	u8 pin;
+
+	pnode = pci_device_to_OF_node(pdev->bus->self);
+	if (!pnode)
+		pnode = pci_bus_to_OF_node(pdev->bus);
+
+	if (!pnode) {
+		pci_err(pdev, "failed to get parent device node");
+		return -EINVAL;
+	}
+
+	laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
+	for (pin = 1; pin <= OF_PCI_MAX_INT_PIN;  pin++) {
+		i = pin - 1;
+		out_irq[i].np = pnode;
+		out_irq[i].args_count = 1;
+		out_irq[i].args[0] = pin;
+		ret = of_irq_parse_raw(laddr, &out_irq[i]);
+		if (ret) {
+			out_irq[i].np = NULL;
+			pci_dbg(pdev, "parse irq %d failed, ret %d", pin, ret);
+			continue;
+		}
+		of_property_read_u32(out_irq[i].np, "#address-cells",
+				     &addr_sz[i]);
+	}
+
+	list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
+		for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) {
+			i = pci_swizzle_interrupt_pin(child, pin) - 1;
+			if (!out_irq[i].np)
+				continue;
+			map_sz += 5 + addr_sz[i] + out_irq[i].args_count;
+		}
+	}
+
+	/*
+	 * Parsing interrupt failed for all pins. In this case, it does not
+	 * need to generate interrupt-map property.
+	 */
+	if (!map_sz)
+		return 0;
+
+	int_map = kcalloc(map_sz, sizeof(u32), GFP_KERNEL);
+	mapp = int_map;
+
+	list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
+		for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) {
+			i = pci_swizzle_interrupt_pin(child, pin) - 1;
+			if (!out_irq[i].np)
+				continue;
+
+			*mapp = (child->bus->number << 16) |
+				(child->devfn << 8);
+			mapp += OF_PCI_ADDRESS_CELLS;
+			*mapp = pin;
+			mapp++;
+			*mapp = out_irq[i].np->phandle;
+			mapp++;
+			if (addr_sz[i]) {
+				ret = of_property_read_u32_array(out_irq[i].np,
+								 "reg", mapp,
+								 addr_sz[i]);
+				if (ret)
+					goto failed;
+			}
+			mapp += addr_sz[i];
+			memcpy(mapp, out_irq[i].args,
+			       out_irq[i].args_count * sizeof(u32));
+			mapp += out_irq[i].args_count;
+		}
+	}
+
+	ret = of_changeset_add_prop_u32_array(ocs, np, "interrupt-map", int_map,
+					      map_sz);
+	if (ret)
+		goto failed;
+
+	ret = of_changeset_add_prop_u32(ocs, np, "#interrupt-cells", 1);
+	if (ret)
+		goto failed;
+
+	ret = of_changeset_add_prop_u32_array(ocs, np, "interrupt-map-mask",
+					      int_map_mask,
+					      ARRAY_SIZE(int_map_mask));
+	if (ret)
+		goto failed;
+
+	kfree(int_map);
+	return 0;
+
+failed:
+	kfree(int_map);
+	return ret;
+}
+
+static int of_pci_prop_compatible(struct pci_dev *pdev,
+				  struct of_changeset *ocs,
+				  struct device_node *np)
+{
+	const char *compat_strs[PROP_COMPAT_NUM] = { 0 };
+	int i, ret;
+
+	compat_strs[PROP_COMPAT_PCI_VVVV_DDDD] =
+		kasprintf(GFP_KERNEL, "pci%x,%x", pdev->vendor, pdev->device);
+	compat_strs[PROP_COMPAT_PCICLASS_CCSSPP] =
+		kasprintf(GFP_KERNEL, "pciclass,%06x", pdev->class);
+	compat_strs[PROP_COMPAT_PCICLASS_CCSS] =
+		kasprintf(GFP_KERNEL, "pciclass,%04x", pdev->class >> 8);
+
+	ret = of_changeset_add_prop_string_array(ocs, np, "compatible",
+						 compat_strs, PROP_COMPAT_NUM);
+	for (i = 0; i < PROP_COMPAT_NUM; i++)
+		kfree(compat_strs[i]);
+
+	return ret;
+}
+
+int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
+			  struct device_node *np)
+{
+	int ret;
+
+	/*
+	 * The added properties will be released when the
+	 * changeset is destroyed.
+	 */
+	if (pci_is_bridge(pdev)) {
+		ret = of_changeset_add_prop_string(ocs, np, "device_type",
+						   "pci");
+		if (ret)
+			return ret;
+
+		ret = of_pci_prop_bus_range(pdev, ocs, np);
+		if (ret)
+			return ret;
+
+		ret = of_pci_prop_intr_map(pdev, ocs, np);
+		if (ret)
+			return ret;
+	}
+
+	ret = of_pci_prop_ranges(pdev, ocs, np);
+	if (ret)
+		return ret;
+
+	ret = of_changeset_add_prop_u32(ocs, np, "#address-cells",
+					OF_PCI_ADDRESS_CELLS);
+	if (ret)
+		return ret;
+
+	ret = of_changeset_add_prop_u32(ocs, np, "#size-cells",
+					OF_PCI_SIZE_CELLS);
+	if (ret)
+		return ret;
+
+	ret = of_pci_prop_reg(pdev, ocs, np);
+	if (ret)
+		return ret;
+
+	ret = of_pci_prop_compatible(pdev, ocs, np);
+	if (ret)
+		return ret;
+
+	ret = of_pci_prop_interrupts(pdev, ocs, np);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
new file mode 100644
index 000000000..fa7370f95
--- /dev/null
+++ b/drivers/pci/p2pdma.c
@@ -0,0 +1,1113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Peer 2 Peer DMA support.
+ *
+ * Copyright (c) 2016-2018, Logan Gunthorpe
+ * Copyright (c) 2016-2017, Microsemi Corporation
+ * Copyright (c) 2017, Christoph Hellwig
+ * Copyright (c) 2018, Eideticom Inc.
+ */
+
+#define pr_fmt(fmt) "pci-p2pdma: " fmt
+#include <linux/ctype.h>
+#include <linux/dma-map-ops.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/memremap.h>
+#include <linux/percpu-refcount.h>
+#include <linux/random.h>
+#include <linux/seq_buf.h>
+#include <linux/xarray.h>
+
+struct pci_p2pdma {
+	struct gen_pool *pool;
+	bool p2pmem_published;
+	struct xarray map_types;
+};
+
+struct pci_p2pdma_pagemap {
+	struct dev_pagemap pgmap;
+	struct pci_dev *provider;
+	u64 bus_offset;
+};
+
+static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
+{
+	return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
+}
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_p2pdma *p2pdma;
+	size_t size = 0;
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (p2pdma && p2pdma->pool)
+		size = gen_pool_size(p2pdma->pool);
+	rcu_read_unlock();
+
+	return sysfs_emit(buf, "%zd\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static ssize_t available_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_p2pdma *p2pdma;
+	size_t avail = 0;
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (p2pdma && p2pdma->pool)
+		avail = gen_pool_avail(p2pdma->pool);
+	rcu_read_unlock();
+
+	return sysfs_emit(buf, "%zd\n", avail);
+}
+static DEVICE_ATTR_RO(available);
+
+static ssize_t published_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_p2pdma *p2pdma;
+	bool published = false;
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (p2pdma)
+		published = p2pdma->p2pmem_published;
+	rcu_read_unlock();
+
+	return sysfs_emit(buf, "%d\n", published);
+}
+static DEVICE_ATTR_RO(published);
+
+static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr, struct vm_area_struct *vma)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	size_t len = vma->vm_end - vma->vm_start;
+	struct pci_p2pdma *p2pdma;
+	struct percpu_ref *ref;
+	unsigned long vaddr;
+	void *kaddr;
+	int ret;
+
+	/* prevent private mappings from being established */
+	if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+		pci_info_ratelimited(pdev,
+				     "%s: fail, attempted private mapping\n",
+				     current->comm);
+		return -EINVAL;
+	}
+
+	if (vma->vm_pgoff) {
+		pci_info_ratelimited(pdev,
+				     "%s: fail, attempted mapping with non-zero offset\n",
+				     current->comm);
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (!p2pdma) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref);
+	if (!kaddr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * vm_insert_page() can sleep, so a reference is taken to mapping
+	 * such that rcu_read_unlock() can be done before inserting the
+	 * pages
+	 */
+	if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
+		ret = -ENODEV;
+		goto out_free_mem;
+	}
+	rcu_read_unlock();
+
+	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+		ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr));
+		if (ret) {
+			gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+			return ret;
+		}
+		percpu_ref_get(ref);
+		put_page(virt_to_page(kaddr));
+		kaddr += PAGE_SIZE;
+		len -= PAGE_SIZE;
+	}
+
+	percpu_ref_put(ref);
+
+	return 0;
+out_free_mem:
+	gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static struct bin_attribute p2pmem_alloc_attr = {
+	.attr = { .name = "allocate", .mode = 0660 },
+	.mmap = p2pmem_alloc_mmap,
+	/*
+	 * Some places where we want to call mmap (ie. python) will check
+	 * that the file size is greater than the mmap size before allowing
+	 * the mmap to continue. To work around this, just set the size
+	 * to be very large.
+	 */
+	.size = SZ_1T,
+};
+
+static struct attribute *p2pmem_attrs[] = {
+	&dev_attr_size.attr,
+	&dev_attr_available.attr,
+	&dev_attr_published.attr,
+	NULL,
+};
+
+static struct bin_attribute *p2pmem_bin_attrs[] = {
+	&p2pmem_alloc_attr,
+	NULL,
+};
+
+static const struct attribute_group p2pmem_group = {
+	.attrs = p2pmem_attrs,
+	.bin_attrs = p2pmem_bin_attrs,
+	.name = "p2pmem",
+};
+
+static void p2pdma_page_free(struct page *page)
+{
+	struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
+	/* safe to dereference while a reference is held to the percpu ref */
+	struct pci_p2pdma *p2pdma =
+		rcu_dereference_protected(pgmap->provider->p2pdma, 1);
+	struct percpu_ref *ref;
+
+	gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page),
+			    PAGE_SIZE, (void **)&ref);
+	percpu_ref_put(ref);
+}
+
+static const struct dev_pagemap_ops p2pdma_pgmap_ops = {
+	.page_free = p2pdma_page_free,
+};
+
+static void pci_p2pdma_release(void *data)
+{
+	struct pci_dev *pdev = data;
+	struct pci_p2pdma *p2pdma;
+
+	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+	if (!p2pdma)
+		return;
+
+	/* Flush and disable pci_alloc_p2p_mem() */
+	pdev->p2pdma = NULL;
+	synchronize_rcu();
+
+	gen_pool_destroy(p2pdma->pool);
+	sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
+	xa_destroy(&p2pdma->map_types);
+}
+
+static int pci_p2pdma_setup(struct pci_dev *pdev)
+{
+	int error = -ENOMEM;
+	struct pci_p2pdma *p2p;
+
+	p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
+	if (!p2p)
+		return -ENOMEM;
+
+	xa_init(&p2p->map_types);
+
+	p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
+	if (!p2p->pool)
+		goto out;
+
+	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
+	if (error)
+		goto out_pool_destroy;
+
+	error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
+	if (error)
+		goto out_pool_destroy;
+
+	rcu_assign_pointer(pdev->p2pdma, p2p);
+	return 0;
+
+out_pool_destroy:
+	gen_pool_destroy(p2p->pool);
+out:
+	devm_kfree(&pdev->dev, p2p);
+	return error;
+}
+
+static void pci_p2pdma_unmap_mappings(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	/*
+	 * Removing the alloc attribute from sysfs will call
+	 * unmap_mapping_range() on the inode, teardown any existing userspace
+	 * mappings and prevent new ones from being created.
+	 */
+	sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr,
+				     p2pmem_group.name);
+}
+
+/**
+ * pci_p2pdma_add_resource - add memory for use as p2p memory
+ * @pdev: the device to add the memory to
+ * @bar: PCI BAR to add
+ * @size: size of the memory to add, may be zero to use the whole BAR
+ * @offset: offset into the PCI BAR
+ *
+ * The memory will be given ZONE_DEVICE struct pages so that it may
+ * be used with any DMA request.
+ */
+int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+			    u64 offset)
+{
+	struct pci_p2pdma_pagemap *p2p_pgmap;
+	struct dev_pagemap *pgmap;
+	struct pci_p2pdma *p2pdma;
+	void *addr;
+	int error;
+
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
+		return -EINVAL;
+
+	if (offset >= pci_resource_len(pdev, bar))
+		return -EINVAL;
+
+	if (!size)
+		size = pci_resource_len(pdev, bar) - offset;
+
+	if (size + offset > pci_resource_len(pdev, bar))
+		return -EINVAL;
+
+	if (!pdev->p2pdma) {
+		error = pci_p2pdma_setup(pdev);
+		if (error)
+			return error;
+	}
+
+	p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+	if (!p2p_pgmap)
+		return -ENOMEM;
+
+	pgmap = &p2p_pgmap->pgmap;
+	pgmap->range.start = pci_resource_start(pdev, bar) + offset;
+	pgmap->range.end = pgmap->range.start + size - 1;
+	pgmap->nr_range = 1;
+	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+	pgmap->ops = &p2pdma_pgmap_ops;
+
+	p2p_pgmap->provider = pdev;
+	p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
+		pci_resource_start(pdev, bar);
+
+	addr = devm_memremap_pages(&pdev->dev, pgmap);
+	if (IS_ERR(addr)) {
+		error = PTR_ERR(addr);
+		goto pgmap_free;
+	}
+
+	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings,
+					 pdev);
+	if (error)
+		goto pages_free;
+
+	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+	error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
+			pci_bus_address(pdev, bar) + offset,
+			range_len(&pgmap->range), dev_to_node(&pdev->dev),
+			&pgmap->ref);
+	if (error)
+		goto pages_free;
+
+	pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
+		 pgmap->range.start, pgmap->range.end);
+
+	return 0;
+
+pages_free:
+	devm_memunmap_pages(&pdev->dev, pgmap);
+pgmap_free:
+	devm_kfree(&pdev->dev, pgmap);
+	return error;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
+
+/*
+ * Note this function returns the parent PCI device with a
+ * reference taken. It is the caller's responsibility to drop
+ * the reference.
+ */
+static struct pci_dev *find_parent_pci_dev(struct device *dev)
+{
+	struct device *parent;
+
+	dev = get_device(dev);
+
+	while (dev) {
+		if (dev_is_pci(dev))
+			return to_pci_dev(dev);
+
+		parent = get_device(dev->parent);
+		put_device(dev);
+		dev = parent;
+	}
+
+	return NULL;
+}
+
+/*
+ * Check if a PCI bridge has its ACS redirection bits set to redirect P2P
+ * TLPs upstream via ACS. Returns 1 if the packets will be redirected
+ * upstream, 0 otherwise.
+ */
+static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
+{
+	int pos;
+	u16 ctrl;
+
+	pos = pdev->acs_cap;
+	if (!pos)
+		return 0;
+
+	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
+
+	if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
+		return 1;
+
+	return 0;
+}
+
+static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
+{
+	if (!buf)
+		return;
+
+	seq_buf_printf(buf, "%s;", pci_name(pdev));
+}
+
+static bool cpu_supports_p2pdma(void)
+{
+#ifdef CONFIG_X86
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	/* Any AMD CPU whose family ID is Zen or newer supports p2pdma */
+	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17)
+		return true;
+#endif
+
+	return false;
+}
+
+static const struct pci_p2pdma_whitelist_entry {
+	unsigned short vendor;
+	unsigned short device;
+	enum {
+		REQ_SAME_HOST_BRIDGE	= 1 << 0,
+	} flags;
+} pci_p2pdma_whitelist[] = {
+	/* Intel Xeon E5/Core i7 */
+	{PCI_VENDOR_ID_INTEL,	0x3c00, REQ_SAME_HOST_BRIDGE},
+	{PCI_VENDOR_ID_INTEL,	0x3c01, REQ_SAME_HOST_BRIDGE},
+	/* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
+	{PCI_VENDOR_ID_INTEL,	0x2f00, REQ_SAME_HOST_BRIDGE},
+	{PCI_VENDOR_ID_INTEL,	0x2f01, REQ_SAME_HOST_BRIDGE},
+	/* Intel Skylake-E */
+	{PCI_VENDOR_ID_INTEL,	0x2030, 0},
+	{PCI_VENDOR_ID_INTEL,	0x2031, 0},
+	{PCI_VENDOR_ID_INTEL,	0x2032, 0},
+	{PCI_VENDOR_ID_INTEL,	0x2033, 0},
+	{PCI_VENDOR_ID_INTEL,	0x2020, 0},
+	{PCI_VENDOR_ID_INTEL,	0x09a2, 0},
+	{}
+};
+
+/*
+ * If the first device on host's root bus is either devfn 00.0 or a PCIe
+ * Root Port, return it.  Otherwise return NULL.
+ *
+ * We often use a devfn 00.0 "host bridge" in the pci_p2pdma_whitelist[]
+ * (though there is no PCI/PCIe requirement for such a device).  On some
+ * platforms, e.g., Intel Skylake, there is no such host bridge device, and
+ * pci_p2pdma_whitelist[] may contain a Root Port at any devfn.
+ *
+ * This function is similar to pci_get_slot(host->bus, 0), but it does
+ * not take the pci_bus_sem lock since __host_bridge_whitelist() must not
+ * sleep.
+ *
+ * For this to be safe, the caller should hold a reference to a device on the
+ * bridge, which should ensure the host_bridge device will not be freed
+ * or removed from the head of the devices list.
+ */
+static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host)
+{
+	struct pci_dev *root;
+
+	root = list_first_entry_or_null(&host->bus->devices,
+					struct pci_dev, bus_list);
+
+	if (!root)
+		return NULL;
+
+	if (root->devfn == PCI_DEVFN(0, 0))
+		return root;
+
+	if (pci_pcie_type(root) == PCI_EXP_TYPE_ROOT_PORT)
+		return root;
+
+	return NULL;
+}
+
+static bool __host_bridge_whitelist(struct pci_host_bridge *host,
+				    bool same_host_bridge, bool warn)
+{
+	struct pci_dev *root = pci_host_bridge_dev(host);
+	const struct pci_p2pdma_whitelist_entry *entry;
+	unsigned short vendor, device;
+
+	if (!root)
+		return false;
+
+	vendor = root->vendor;
+	device = root->device;
+
+	for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
+		if (vendor != entry->vendor || device != entry->device)
+			continue;
+		if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
+			return false;
+
+		return true;
+	}
+
+	if (warn)
+		pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n",
+			 vendor, device);
+
+	return false;
+}
+
+/*
+ * If we can't find a common upstream bridge take a look at the root
+ * complex and compare it to a whitelist of known good hardware.
+ */
+static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b,
+				  bool warn)
+{
+	struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
+	struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
+
+	if (host_a == host_b)
+		return __host_bridge_whitelist(host_a, true, warn);
+
+	if (__host_bridge_whitelist(host_a, false, warn) &&
+	    __host_bridge_whitelist(host_b, false, warn))
+		return true;
+
+	return false;
+}
+
+static unsigned long map_types_idx(struct pci_dev *client)
+{
+	return (pci_domain_nr(client->bus) << 16) | pci_dev_id(client);
+}
+
+/*
+ * Calculate the P2PDMA mapping type and distance between two PCI devices.
+ *
+ * If the two devices are the same PCI function, return
+ * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0.
+ *
+ * If they are two functions of the same device, return
+ * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge,
+ * then one hop back down to another function of the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch,
+ * return a distance of 4. This corresponds to the following PCI tree:
+ *
+ *     -+  Root Port
+ *      \+ Switch Upstream Port
+ *       +-+ Switch Downstream Port 0
+ *       + \- Device A
+ *       \-+ Switch Downstream Port 1
+ *         \- Device B
+ *
+ * The distance is 4 because we traverse from Device A to Downstream Port 0
+ * to the common Switch Upstream Port, back down to Downstream Port 1 and
+ * then to Device B. The mapping type returned depends on the ACS
+ * redirection setting of the ports along the path.
+ *
+ * If ACS redirect is set on any port in the path, traffic between the
+ * devices will go through the host bridge, so return
+ * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return
+ * PCI_P2PDMA_MAP_BUS_ADDR.
+ *
+ * Any two devices that have a data path that goes through the host bridge
+ * will consult a whitelist. If the host bridge is in the whitelist, return
+ * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of
+ * ports per above. If the device is not in the whitelist, return
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ */
+static enum pci_p2pdma_map_type
+calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client,
+		int *dist, bool verbose)
+{
+	enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+	struct pci_dev *a = provider, *b = client, *bb;
+	bool acs_redirects = false;
+	struct pci_p2pdma *p2pdma;
+	struct seq_buf acs_list;
+	int acs_cnt = 0;
+	int dist_a = 0;
+	int dist_b = 0;
+	char buf[128];
+
+	seq_buf_init(&acs_list, buf, sizeof(buf));
+
+	/*
+	 * Note, we don't need to take references to devices returned by
+	 * pci_upstream_bridge() seeing we hold a reference to a child
+	 * device which will already hold a reference to the upstream bridge.
+	 */
+	while (a) {
+		dist_b = 0;
+
+		if (pci_bridge_has_acs_redir(a)) {
+			seq_buf_print_bus_devfn(&acs_list, a);
+			acs_cnt++;
+		}
+
+		bb = b;
+
+		while (bb) {
+			if (a == bb)
+				goto check_b_path_acs;
+
+			bb = pci_upstream_bridge(bb);
+			dist_b++;
+		}
+
+		a = pci_upstream_bridge(a);
+		dist_a++;
+	}
+
+	*dist = dist_a + dist_b;
+	goto map_through_host_bridge;
+
+check_b_path_acs:
+	bb = b;
+
+	while (bb) {
+		if (a == bb)
+			break;
+
+		if (pci_bridge_has_acs_redir(bb)) {
+			seq_buf_print_bus_devfn(&acs_list, bb);
+			acs_cnt++;
+		}
+
+		bb = pci_upstream_bridge(bb);
+	}
+
+	*dist = dist_a + dist_b;
+
+	if (!acs_cnt) {
+		map_type = PCI_P2PDMA_MAP_BUS_ADDR;
+		goto done;
+	}
+
+	if (verbose) {
+		acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */
+		pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
+			 pci_name(provider));
+		pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
+			 acs_list.buffer);
+	}
+	acs_redirects = true;
+
+map_through_host_bridge:
+	if (!cpu_supports_p2pdma() &&
+	    !host_bridge_whitelist(provider, client, acs_redirects)) {
+		if (verbose)
+			pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
+				 pci_name(provider));
+		map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
+	}
+done:
+	rcu_read_lock();
+	p2pdma = rcu_dereference(provider->p2pdma);
+	if (p2pdma)
+		xa_store(&p2pdma->map_types, map_types_idx(client),
+			 xa_mk_value(map_type), GFP_KERNEL);
+	rcu_read_unlock();
+	return map_type;
+}
+
+/**
+ * pci_p2pdma_distance_many - Determine the cumulative distance between
+ *	a p2pdma provider and the clients in use.
+ * @provider: p2pdma provider to check against the client list
+ * @clients: array of devices to check (NULL-terminated)
+ * @num_clients: number of clients in the array
+ * @verbose: if true, print warnings for devices when we return -1
+ *
+ * Returns -1 if any of the clients are not compatible, otherwise returns a
+ * positive number where a lower number is the preferable choice. (If there's
+ * one client that's the same as the provider it will return 0, which is best
+ * choice).
+ *
+ * "compatible" means the provider and the clients are either all behind
+ * the same PCI root port or the host bridges connected to each of the devices
+ * are listed in the 'pci_p2pdma_whitelist'.
+ */
+int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
+			     int num_clients, bool verbose)
+{
+	enum pci_p2pdma_map_type map;
+	bool not_supported = false;
+	struct pci_dev *pci_client;
+	int total_dist = 0;
+	int i, distance;
+
+	if (num_clients == 0)
+		return -1;
+
+	for (i = 0; i < num_clients; i++) {
+		pci_client = find_parent_pci_dev(clients[i]);
+		if (!pci_client) {
+			if (verbose)
+				dev_warn(clients[i],
+					 "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
+			return -1;
+		}
+
+		map = calc_map_type_and_dist(provider, pci_client, &distance,
+					     verbose);
+
+		pci_dev_put(pci_client);
+
+		if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED)
+			not_supported = true;
+
+		if (not_supported && !verbose)
+			break;
+
+		total_dist += distance;
+	}
+
+	if (not_supported)
+		return -1;
+
+	return total_dist;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
+
+/**
+ * pci_has_p2pmem - check if a given PCI device has published any p2pmem
+ * @pdev: PCI device to check
+ */
+bool pci_has_p2pmem(struct pci_dev *pdev)
+{
+	struct pci_p2pdma *p2pdma;
+	bool res;
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	res = p2pdma && p2pdma->p2pmem_published;
+	rcu_read_unlock();
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(pci_has_p2pmem);
+
+/**
+ * pci_p2pmem_find_many - find a peer-to-peer DMA memory device compatible with
+ *	the specified list of clients and shortest distance
+ * @clients: array of devices to check (NULL-terminated)
+ * @num_clients: number of client devices in the list
+ *
+ * If multiple devices are behind the same switch, the one "closest" to the
+ * client devices in use will be chosen first. (So if one of the providers is
+ * the same as one of the clients, that provider will be used ahead of any
+ * other providers that are unrelated). If multiple providers are an equal
+ * distance away, one will be chosen at random.
+ *
+ * Returns a pointer to the PCI device with a reference taken (use pci_dev_put
+ * to return the reference) or NULL if no compatible device is found. The
+ * found provider will also be assigned to the client list.
+ */
+struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients)
+{
+	struct pci_dev *pdev = NULL;
+	int distance;
+	int closest_distance = INT_MAX;
+	struct pci_dev **closest_pdevs;
+	int dev_cnt = 0;
+	const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
+	int i;
+
+	closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!closest_pdevs)
+		return NULL;
+
+	for_each_pci_dev(pdev) {
+		if (!pci_has_p2pmem(pdev))
+			continue;
+
+		distance = pci_p2pdma_distance_many(pdev, clients,
+						    num_clients, false);
+		if (distance < 0 || distance > closest_distance)
+			continue;
+
+		if (distance == closest_distance && dev_cnt >= max_devs)
+			continue;
+
+		if (distance < closest_distance) {
+			for (i = 0; i < dev_cnt; i++)
+				pci_dev_put(closest_pdevs[i]);
+
+			dev_cnt = 0;
+			closest_distance = distance;
+		}
+
+		closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
+	}
+
+	if (dev_cnt)
+		pdev = pci_dev_get(closest_pdevs[get_random_u32_below(dev_cnt)]);
+
+	for (i = 0; i < dev_cnt; i++)
+		pci_dev_put(closest_pdevs[i]);
+
+	kfree(closest_pdevs);
+	return pdev;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
+
+/**
+ * pci_alloc_p2pmem - allocate peer-to-peer DMA memory
+ * @pdev: the device to allocate memory from
+ * @size: number of bytes to allocate
+ *
+ * Returns the allocated memory or NULL on error.
+ */
+void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
+{
+	void *ret = NULL;
+	struct percpu_ref *ref;
+	struct pci_p2pdma *p2pdma;
+
+	/*
+	 * Pairs with synchronize_rcu() in pci_p2pdma_release() to
+	 * ensure pdev->p2pdma is non-NULL for the duration of the
+	 * read-lock.
+	 */
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (unlikely(!p2pdma))
+		goto out;
+
+	ret = (void *)gen_pool_alloc_owner(p2pdma->pool, size, (void **) &ref);
+	if (!ret)
+		goto out;
+
+	if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
+		gen_pool_free(p2pdma->pool, (unsigned long) ret, size);
+		ret = NULL;
+		goto out;
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
+
+/**
+ * pci_free_p2pmem - free peer-to-peer DMA memory
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ * @size: number of bytes that were allocated
+ */
+void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
+{
+	struct percpu_ref *ref;
+	struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+
+	gen_pool_free_owner(p2pdma->pool, (uintptr_t)addr, size,
+			(void **) &ref);
+	percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(pci_free_p2pmem);
+
+/**
+ * pci_p2pmem_virt_to_bus - return the PCI bus address for a given virtual
+ *	address obtained with pci_alloc_p2pmem()
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ */
+pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
+{
+	struct pci_p2pdma *p2pdma;
+
+	if (!addr)
+		return 0;
+
+	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+	if (!p2pdma)
+		return 0;
+
+	/*
+	 * Note: when we added the memory to the pool we used the PCI
+	 * bus address as the physical address. So gen_pool_virt_to_phys()
+	 * actually returns the bus address despite the misleading name.
+	 */
+	return gen_pool_virt_to_phys(p2pdma->pool, (unsigned long)addr);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
+
+/**
+ * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist
+ * @pdev: the device to allocate memory from
+ * @nents: the number of SG entries in the list
+ * @length: number of bytes to allocate
+ *
+ * Return: %NULL on error or &struct scatterlist pointer and @nents on success
+ */
+struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+					 unsigned int *nents, u32 length)
+{
+	struct scatterlist *sg;
+	void *addr;
+
+	sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+	if (!sg)
+		return NULL;
+
+	sg_init_table(sg, 1);
+
+	addr = pci_alloc_p2pmem(pdev, length);
+	if (!addr)
+		goto out_free_sg;
+
+	sg_set_buf(sg, addr, length);
+	*nents = 1;
+	return sg;
+
+out_free_sg:
+	kfree(sg);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
+
+/**
+ * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl()
+ * @pdev: the device to allocate memory from
+ * @sgl: the allocated scatterlist
+ */
+void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
+{
+	struct scatterlist *sg;
+	int count;
+
+	for_each_sg(sgl, sg, INT_MAX, count) {
+		if (!sg)
+			break;
+
+		pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
+	}
+	kfree(sgl);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
+
+/**
+ * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by
+ *	other devices with pci_p2pmem_find()
+ * @pdev: the device with peer-to-peer DMA memory to publish
+ * @publish: set to true to publish the memory, false to unpublish it
+ *
+ * Published memory can be used by other PCI device drivers for
+ * peer-2-peer DMA operations. Non-published memory is reserved for
+ * exclusive use of the device driver that registers the peer-to-peer
+ * memory.
+ */
+void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
+{
+	struct pci_p2pdma *p2pdma;
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(pdev->p2pdma);
+	if (p2pdma)
+		p2pdma->p2pmem_published = publish;
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
+
+static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap,
+						    struct device *dev)
+{
+	enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
+	struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider;
+	struct pci_dev *client;
+	struct pci_p2pdma *p2pdma;
+	int dist;
+
+	if (!provider->p2pdma)
+		return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+
+	if (!dev_is_pci(dev))
+		return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+
+	client = to_pci_dev(dev);
+
+	rcu_read_lock();
+	p2pdma = rcu_dereference(provider->p2pdma);
+
+	if (p2pdma)
+		type = xa_to_value(xa_load(&p2pdma->map_types,
+					   map_types_idx(client)));
+	rcu_read_unlock();
+
+	if (type == PCI_P2PDMA_MAP_UNKNOWN)
+		return calc_map_type_and_dist(provider, client, &dist, true);
+
+	return type;
+}
+
+/**
+ * pci_p2pdma_map_segment - map an sg segment determining the mapping type
+ * @state: State structure that should be declared outside of the for_each_sg()
+ *	loop and initialized to zero.
+ * @dev: DMA device that's doing the mapping operation
+ * @sg: scatterlist segment to map
+ *
+ * This is a helper to be used by non-IOMMU dma_map_sg() implementations where
+ * the sg segment is the same for the page_link and the dma_address.
+ *
+ * Attempt to map a single segment in an SGL with the PCI bus address.
+ * The segment must point to a PCI P2PDMA page and thus must be
+ * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check.
+ *
+ * Returns the type of mapping used and maps the page if the type is
+ * PCI_P2PDMA_MAP_BUS_ADDR.
+ */
+enum pci_p2pdma_map_type
+pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
+		       struct scatterlist *sg)
+{
+	if (state->pgmap != sg_page(sg)->pgmap) {
+		state->pgmap = sg_page(sg)->pgmap;
+		state->map = pci_p2pdma_map_type(state->pgmap, dev);
+		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
+	}
+
+	if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) {
+		sg->dma_address = sg_phys(sg) + state->bus_off;
+		sg_dma_len(sg) = sg->length;
+		sg_dma_mark_bus_address(sg);
+	}
+
+	return state->map;
+}
+
+/**
+ * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
+ *		to enable p2pdma
+ * @page: contents of the value to be stored
+ * @p2p_dev: returns the PCI device that was selected to be used
+ *		(if one was specified in the stored value)
+ * @use_p2pdma: returns whether to enable p2pdma or not
+ *
+ * Parses an attribute value to decide whether to enable p2pdma.
+ * The value can select a PCI device (using its full BDF device
+ * name) or a boolean (in any format kstrtobool() accepts). A false
+ * value disables p2pdma, a true value expects the caller
+ * to automatically find a compatible device and specifying a PCI device
+ * expects the caller to use the specific provider.
+ *
+ * pci_p2pdma_enable_show() should be used as the show operation for
+ * the attribute.
+ *
+ * Returns 0 on success
+ */
+int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
+			    bool *use_p2pdma)
+{
+	struct device *dev;
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
+	if (dev) {
+		*use_p2pdma = true;
+		*p2p_dev = to_pci_dev(dev);
+
+		if (!pci_has_p2pmem(*p2p_dev)) {
+			pci_err(*p2p_dev,
+				"PCI device has no peer-to-peer memory: %s\n",
+				page);
+			pci_dev_put(*p2p_dev);
+			return -ENODEV;
+		}
+
+		return 0;
+	} else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
+		/*
+		 * If the user enters a PCI device that  doesn't exist
+		 * like "0000:01:00.1", we don't want kstrtobool to think
+		 * it's a '0' when it's clearly not what the user wanted.
+		 * So we require 0's and 1's to be exactly one character.
+		 */
+	} else if (!kstrtobool(page, use_p2pdma)) {
+		return 0;
+	}
+
+	pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
+
+/**
+ * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating
+ *		whether p2pdma is enabled
+ * @page: contents of the stored value
+ * @p2p_dev: the selected p2p device (NULL if no device is selected)
+ * @use_p2pdma: whether p2pdma has been enabled
+ *
+ * Attributes that use pci_p2pdma_enable_store() should use this function
+ * to show the value of the attribute.
+ *
+ * Returns 0 on success
+ */
+ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
+			       bool use_p2pdma)
+{
+	if (!use_p2pdma)
+		return sprintf(page, "0\n");
+
+	if (!p2p_dev)
+		return sprintf(page, "1\n");
+
+	return sprintf(page, "%s\n", pci_name(p2p_dev));
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
new file mode 100644
index 000000000..05b7357bd
--- /dev/null
+++ b/drivers/pci/pci-acpi.c
@@ -0,0 +1,1520 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI support in ACPI
+ *
+ * Copyright (C) 2005 David Shaohua Li <shaohua.li@intel.com>
+ * Copyright (C) 2004 Tom Long Nguyen <tom.l.nguyen@intel.com>
+ * Copyright (C) 2004 Intel Corp.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irqdomain.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/pci_hotplug.h>
+#include <linux/module.h>
+#include <linux/pci-acpi.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_qos.h>
+#include <linux/rwsem.h>
+#include "pci.h"
+
+/*
+ * The GUID is defined in the PCI Firmware Specification available
+ * here to PCI-SIG members:
+ * https://members.pcisig.com/wg/PCI-SIG/document/15350
+ */
+const guid_t pci_acpi_dsm_guid =
+	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+		  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
+
+#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
+static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
+{
+	struct device *dev = &adev->dev;
+	struct resource_entry *entry;
+	struct list_head list;
+	unsigned long flags;
+	int ret;
+
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_MEM;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *) flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n",
+			ret);
+		return ret;
+	}
+
+	if (ret == 0) {
+		dev_err(dev, "no IO and memory resources present in _CRS\n");
+		return -EINVAL;
+	}
+
+	entry = list_first_entry(&list, struct resource_entry, node);
+	*res = *entry->res;
+	acpi_dev_free_resource_list(&list);
+	return 0;
+}
+
+static acpi_status acpi_match_rc(acpi_handle handle, u32 lvl, void *context,
+				 void **retval)
+{
+	u16 *segment = context;
+	unsigned long long uid;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid);
+	if (ACPI_FAILURE(status) || uid != *segment)
+		return AE_CTRL_DEPTH;
+
+	*(acpi_handle *)retval = handle;
+	return AE_CTRL_TERMINATE;
+}
+
+int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
+			  struct resource *res)
+{
+	struct acpi_device *adev;
+	acpi_status status;
+	acpi_handle handle;
+	int ret;
+
+	status = acpi_get_devices(hid, acpi_match_rc, &segment, &handle);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "can't find _HID %s device to locate resources\n",
+			hid);
+		return -ENODEV;
+	}
+
+	adev = acpi_fetch_acpi_dev(handle);
+	if (!adev)
+		return -ENODEV;
+
+	ret = acpi_get_rc_addr(adev, res);
+	if (ret) {
+		dev_err(dev, "can't get resource from %s\n",
+			dev_name(&adev->dev));
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
+{
+	acpi_status status = AE_NOT_EXIST;
+	unsigned long long mcfg_addr;
+
+	if (handle)
+		status = acpi_evaluate_integer(handle, METHOD_NAME__CBA,
+					       NULL, &mcfg_addr);
+	if (ACPI_FAILURE(status))
+		return 0;
+
+	return (phys_addr_t)mcfg_addr;
+}
+
+/* _HPX PCI Setting Record (Type 0); same as _HPP */
+struct hpx_type0 {
+	u32 revision;		/* Not present in _HPP */
+	u8  cache_line_size;	/* Not applicable to PCIe */
+	u8  latency_timer;	/* Not applicable to PCIe */
+	u8  enable_serr;
+	u8  enable_perr;
+};
+
+static struct hpx_type0 pci_default_type0 = {
+	.revision = 1,
+	.cache_line_size = 8,
+	.latency_timer = 0x40,
+	.enable_serr = 0,
+	.enable_perr = 0,
+};
+
+static void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
+{
+	u16 pci_cmd, pci_bctl;
+
+	if (!hpx)
+		hpx = &pci_default_type0;
+
+	if (hpx->revision > 1) {
+		pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
+			 hpx->revision);
+		hpx = &pci_default_type0;
+	}
+
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
+	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+	if (hpx->enable_serr)
+		pci_cmd |= PCI_COMMAND_SERR;
+	if (hpx->enable_perr)
+		pci_cmd |= PCI_COMMAND_PARITY;
+	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+	/* Program bridge control value */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+				      hpx->latency_timer);
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+		if (hpx->enable_perr)
+			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+	}
+}
+
+static acpi_status decode_type0_hpx_record(union acpi_object *record,
+					   struct hpx_type0 *hpx0)
+{
+	int i;
+	union acpi_object *fields = record->package.elements;
+	u32 revision = fields[1].integer.value;
+
+	switch (revision) {
+	case 1:
+		if (record->package.count != 6)
+			return AE_ERROR;
+		for (i = 2; i < 6; i++)
+			if (fields[i].type != ACPI_TYPE_INTEGER)
+				return AE_ERROR;
+		hpx0->revision        = revision;
+		hpx0->cache_line_size = fields[2].integer.value;
+		hpx0->latency_timer   = fields[3].integer.value;
+		hpx0->enable_serr     = fields[4].integer.value;
+		hpx0->enable_perr     = fields[5].integer.value;
+		break;
+	default:
+		pr_warn("%s: Type 0 Revision %d record not supported\n",
+		       __func__, revision);
+		return AE_ERROR;
+	}
+	return AE_OK;
+}
+
+/* _HPX PCI-X Setting Record (Type 1) */
+struct hpx_type1 {
+	u32 revision;
+	u8  max_mem_read;
+	u8  avg_max_split;
+	u16 tot_max_split;
+};
+
+static void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
+{
+	int pos;
+
+	if (!hpx)
+		return;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!pos)
+		return;
+
+	pci_warn(dev, "PCI-X settings not supported\n");
+}
+
+static acpi_status decode_type1_hpx_record(union acpi_object *record,
+					   struct hpx_type1 *hpx1)
+{
+	int i;
+	union acpi_object *fields = record->package.elements;
+	u32 revision = fields[1].integer.value;
+
+	switch (revision) {
+	case 1:
+		if (record->package.count != 5)
+			return AE_ERROR;
+		for (i = 2; i < 5; i++)
+			if (fields[i].type != ACPI_TYPE_INTEGER)
+				return AE_ERROR;
+		hpx1->revision      = revision;
+		hpx1->max_mem_read  = fields[2].integer.value;
+		hpx1->avg_max_split = fields[3].integer.value;
+		hpx1->tot_max_split = fields[4].integer.value;
+		break;
+	default:
+		pr_warn("%s: Type 1 Revision %d record not supported\n",
+		       __func__, revision);
+		return AE_ERROR;
+	}
+	return AE_OK;
+}
+
+static bool pcie_root_rcb_set(struct pci_dev *dev)
+{
+	struct pci_dev *rp = pcie_find_root_port(dev);
+	u16 lnkctl;
+
+	if (!rp)
+		return false;
+
+	pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
+	if (lnkctl & PCI_EXP_LNKCTL_RCB)
+		return true;
+
+	return false;
+}
+
+/* _HPX PCI Express Setting Record (Type 2) */
+struct hpx_type2 {
+	u32 revision;
+	u32 unc_err_mask_and;
+	u32 unc_err_mask_or;
+	u32 unc_err_sever_and;
+	u32 unc_err_sever_or;
+	u32 cor_err_mask_and;
+	u32 cor_err_mask_or;
+	u32 adv_err_cap_and;
+	u32 adv_err_cap_or;
+	u16 pci_exp_devctl_and;
+	u16 pci_exp_devctl_or;
+	u16 pci_exp_lnkctl_and;
+	u16 pci_exp_lnkctl_or;
+	u32 sec_unc_err_sever_and;
+	u32 sec_unc_err_sever_or;
+	u32 sec_unc_err_mask_and;
+	u32 sec_unc_err_mask_or;
+};
+
+static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
+{
+	int pos;
+	u32 reg32;
+
+	if (!hpx)
+		return;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	if (hpx->revision > 1) {
+		pci_warn(dev, "PCIe settings rev %d not supported\n",
+			 hpx->revision);
+		return;
+	}
+
+	/*
+	 * Don't allow _HPX to change MPS or MRRS settings.  We manage
+	 * those to make sure they're consistent with the rest of the
+	 * platform.
+	 */
+	hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
+				    PCI_EXP_DEVCTL_READRQ;
+	hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
+				    PCI_EXP_DEVCTL_READRQ);
+
+	/* Initialize Device Control Register */
+	pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+			~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
+
+	/* Initialize Link Control Register */
+	if (pcie_cap_has_lnkctl(dev)) {
+
+		/*
+		 * If the Root Port supports Read Completion Boundary of
+		 * 128, set RCB to 128.  Otherwise, clear it.
+		 */
+		hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
+		hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
+		if (pcie_root_rcb_set(dev))
+			hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
+
+		pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
+			~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
+	}
+
+	/* Find Advanced Error Reporting Enhanced Capability */
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Initialize Uncorrectable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+	reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+	/* Initialize Uncorrectable Error Severity Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+	reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+	/* Initialize Correctable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+	reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+	/* Initialize Advanced Error Capabilities and Control Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
+
+	/* Don't enable ECRC generation or checking if unsupported */
+	if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
+		reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
+	if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
+		reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	/*
+	 * FIXME: The following two registers are not supported yet.
+	 *
+	 *   o Secondary Uncorrectable Error Severity Register
+	 *   o Secondary Uncorrectable Error Mask Register
+	 */
+}
+
+static acpi_status decode_type2_hpx_record(union acpi_object *record,
+					   struct hpx_type2 *hpx2)
+{
+	int i;
+	union acpi_object *fields = record->package.elements;
+	u32 revision = fields[1].integer.value;
+
+	switch (revision) {
+	case 1:
+		if (record->package.count != 18)
+			return AE_ERROR;
+		for (i = 2; i < 18; i++)
+			if (fields[i].type != ACPI_TYPE_INTEGER)
+				return AE_ERROR;
+		hpx2->revision      = revision;
+		hpx2->unc_err_mask_and      = fields[2].integer.value;
+		hpx2->unc_err_mask_or       = fields[3].integer.value;
+		hpx2->unc_err_sever_and     = fields[4].integer.value;
+		hpx2->unc_err_sever_or      = fields[5].integer.value;
+		hpx2->cor_err_mask_and      = fields[6].integer.value;
+		hpx2->cor_err_mask_or       = fields[7].integer.value;
+		hpx2->adv_err_cap_and       = fields[8].integer.value;
+		hpx2->adv_err_cap_or        = fields[9].integer.value;
+		hpx2->pci_exp_devctl_and    = fields[10].integer.value;
+		hpx2->pci_exp_devctl_or     = fields[11].integer.value;
+		hpx2->pci_exp_lnkctl_and    = fields[12].integer.value;
+		hpx2->pci_exp_lnkctl_or     = fields[13].integer.value;
+		hpx2->sec_unc_err_sever_and = fields[14].integer.value;
+		hpx2->sec_unc_err_sever_or  = fields[15].integer.value;
+		hpx2->sec_unc_err_mask_and  = fields[16].integer.value;
+		hpx2->sec_unc_err_mask_or   = fields[17].integer.value;
+		break;
+	default:
+		pr_warn("%s: Type 2 Revision %d record not supported\n",
+		       __func__, revision);
+		return AE_ERROR;
+	}
+	return AE_OK;
+}
+
+/* _HPX PCI Express Setting Record (Type 3) */
+struct hpx_type3 {
+	u16 device_type;
+	u16 function_type;
+	u16 config_space_location;
+	u16 pci_exp_cap_id;
+	u16 pci_exp_cap_ver;
+	u16 pci_exp_vendor_id;
+	u16 dvsec_id;
+	u16 dvsec_rev;
+	u16 match_offset;
+	u32 match_mask_and;
+	u32 match_value;
+	u16 reg_offset;
+	u32 reg_mask_and;
+	u32 reg_mask_or;
+};
+
+enum hpx_type3_dev_type {
+	HPX_TYPE_ENDPOINT	= BIT(0),
+	HPX_TYPE_LEG_END	= BIT(1),
+	HPX_TYPE_RC_END		= BIT(2),
+	HPX_TYPE_RC_EC		= BIT(3),
+	HPX_TYPE_ROOT_PORT	= BIT(4),
+	HPX_TYPE_UPSTREAM	= BIT(5),
+	HPX_TYPE_DOWNSTREAM	= BIT(6),
+	HPX_TYPE_PCI_BRIDGE	= BIT(7),
+	HPX_TYPE_PCIE_BRIDGE	= BIT(8),
+};
+
+static u16 hpx3_device_type(struct pci_dev *dev)
+{
+	u16 pcie_type = pci_pcie_type(dev);
+	static const int pcie_to_hpx3_type[] = {
+		[PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
+		[PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
+		[PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
+		[PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
+		[PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
+		[PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
+		[PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
+		[PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
+		[PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
+	};
+
+	if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
+		return 0;
+
+	return pcie_to_hpx3_type[pcie_type];
+}
+
+enum hpx_type3_fn_type {
+	HPX_FN_NORMAL		= BIT(0),
+	HPX_FN_SRIOV_PHYS	= BIT(1),
+	HPX_FN_SRIOV_VIRT	= BIT(2),
+};
+
+static u8 hpx3_function_type(struct pci_dev *dev)
+{
+	if (dev->is_virtfn)
+		return HPX_FN_SRIOV_VIRT;
+	else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
+		return HPX_FN_SRIOV_PHYS;
+	else
+		return HPX_FN_NORMAL;
+}
+
+static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
+{
+	u8 cap_ver = hpx3_cap_id & 0xf;
+
+	if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
+		return true;
+	else if (cap_ver == pcie_cap_id)
+		return true;
+
+	return false;
+}
+
+enum hpx_type3_cfg_loc {
+	HPX_CFG_PCICFG		= 0,
+	HPX_CFG_PCIE_CAP	= 1,
+	HPX_CFG_PCIE_CAP_EXT	= 2,
+	HPX_CFG_VEND_CAP	= 3,
+	HPX_CFG_DVSEC		= 4,
+	HPX_CFG_MAX,
+};
+
+static void program_hpx_type3_register(struct pci_dev *dev,
+				       const struct hpx_type3 *reg)
+{
+	u32 match_reg, write_reg, header, orig_value;
+	u16 pos;
+
+	if (!(hpx3_device_type(dev) & reg->device_type))
+		return;
+
+	if (!(hpx3_function_type(dev) & reg->function_type))
+		return;
+
+	switch (reg->config_space_location) {
+	case HPX_CFG_PCICFG:
+		pos = 0;
+		break;
+	case HPX_CFG_PCIE_CAP:
+		pos = pci_find_capability(dev, reg->pci_exp_cap_id);
+		if (pos == 0)
+			return;
+
+		break;
+	case HPX_CFG_PCIE_CAP_EXT:
+		pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
+		if (pos == 0)
+			return;
+
+		pci_read_config_dword(dev, pos, &header);
+		if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
+					  reg->pci_exp_cap_ver))
+			return;
+
+		break;
+	case HPX_CFG_VEND_CAP:
+	case HPX_CFG_DVSEC:
+	default:
+		pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
+		return;
+	}
+
+	pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
+
+	if ((match_reg & reg->match_mask_and) != reg->match_value)
+		return;
+
+	pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
+	orig_value = write_reg;
+	write_reg &= reg->reg_mask_and;
+	write_reg |= reg->reg_mask_or;
+
+	if (orig_value == write_reg)
+		return;
+
+	pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
+
+	pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
+		pos, orig_value, write_reg);
+}
+
+static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
+{
+	if (!hpx)
+		return;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	program_hpx_type3_register(dev, hpx);
+}
+
+static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
+				union acpi_object *reg_fields)
+{
+	hpx3_reg->device_type            = reg_fields[0].integer.value;
+	hpx3_reg->function_type          = reg_fields[1].integer.value;
+	hpx3_reg->config_space_location  = reg_fields[2].integer.value;
+	hpx3_reg->pci_exp_cap_id         = reg_fields[3].integer.value;
+	hpx3_reg->pci_exp_cap_ver        = reg_fields[4].integer.value;
+	hpx3_reg->pci_exp_vendor_id      = reg_fields[5].integer.value;
+	hpx3_reg->dvsec_id               = reg_fields[6].integer.value;
+	hpx3_reg->dvsec_rev              = reg_fields[7].integer.value;
+	hpx3_reg->match_offset           = reg_fields[8].integer.value;
+	hpx3_reg->match_mask_and         = reg_fields[9].integer.value;
+	hpx3_reg->match_value            = reg_fields[10].integer.value;
+	hpx3_reg->reg_offset             = reg_fields[11].integer.value;
+	hpx3_reg->reg_mask_and           = reg_fields[12].integer.value;
+	hpx3_reg->reg_mask_or            = reg_fields[13].integer.value;
+}
+
+static acpi_status program_type3_hpx_record(struct pci_dev *dev,
+					   union acpi_object *record)
+{
+	union acpi_object *fields = record->package.elements;
+	u32 desc_count, expected_length, revision;
+	union acpi_object *reg_fields;
+	struct hpx_type3 hpx3;
+	int i;
+
+	revision = fields[1].integer.value;
+	switch (revision) {
+	case 1:
+		desc_count = fields[2].integer.value;
+		expected_length = 3 + desc_count * 14;
+
+		if (record->package.count != expected_length)
+			return AE_ERROR;
+
+		for (i = 2; i < expected_length; i++)
+			if (fields[i].type != ACPI_TYPE_INTEGER)
+				return AE_ERROR;
+
+		for (i = 0; i < desc_count; i++) {
+			reg_fields = fields + 3 + i * 14;
+			parse_hpx3_register(&hpx3, reg_fields);
+			program_hpx_type3(dev, &hpx3);
+		}
+
+		break;
+	default:
+		printk(KERN_WARNING
+			"%s: Type 3 Revision %d record not supported\n",
+			__func__, revision);
+		return AE_ERROR;
+	}
+	return AE_OK;
+}
+
+static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *package, *record, *fields;
+	struct hpx_type0 hpx0;
+	struct hpx_type1 hpx1;
+	struct hpx_type2 hpx2;
+	u32 type;
+	int i;
+
+	status = acpi_evaluate_object(handle, "_HPX", NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	package = (union acpi_object *)buffer.pointer;
+	if (package->type != ACPI_TYPE_PACKAGE) {
+		status = AE_ERROR;
+		goto exit;
+	}
+
+	for (i = 0; i < package->package.count; i++) {
+		record = &package->package.elements[i];
+		if (record->type != ACPI_TYPE_PACKAGE) {
+			status = AE_ERROR;
+			goto exit;
+		}
+
+		fields = record->package.elements;
+		if (fields[0].type != ACPI_TYPE_INTEGER ||
+		    fields[1].type != ACPI_TYPE_INTEGER) {
+			status = AE_ERROR;
+			goto exit;
+		}
+
+		type = fields[0].integer.value;
+		switch (type) {
+		case 0:
+			memset(&hpx0, 0, sizeof(hpx0));
+			status = decode_type0_hpx_record(record, &hpx0);
+			if (ACPI_FAILURE(status))
+				goto exit;
+			program_hpx_type0(dev, &hpx0);
+			break;
+		case 1:
+			memset(&hpx1, 0, sizeof(hpx1));
+			status = decode_type1_hpx_record(record, &hpx1);
+			if (ACPI_FAILURE(status))
+				goto exit;
+			program_hpx_type1(dev, &hpx1);
+			break;
+		case 2:
+			memset(&hpx2, 0, sizeof(hpx2));
+			status = decode_type2_hpx_record(record, &hpx2);
+			if (ACPI_FAILURE(status))
+				goto exit;
+			program_hpx_type2(dev, &hpx2);
+			break;
+		case 3:
+			status = program_type3_hpx_record(dev, record);
+			if (ACPI_FAILURE(status))
+				goto exit;
+			break;
+		default:
+			pr_err("%s: Type %d record not supported\n",
+			       __func__, type);
+			status = AE_ERROR;
+			goto exit;
+		}
+	}
+ exit:
+	kfree(buffer.pointer);
+	return status;
+}
+
+static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *package, *fields;
+	struct hpx_type0 hpx0;
+	int i;
+
+	memset(&hpx0, 0, sizeof(hpx0));
+
+	status = acpi_evaluate_object(handle, "_HPP", NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	package = (union acpi_object *) buffer.pointer;
+	if (package->type != ACPI_TYPE_PACKAGE ||
+	    package->package.count != 4) {
+		status = AE_ERROR;
+		goto exit;
+	}
+
+	fields = package->package.elements;
+	for (i = 0; i < 4; i++) {
+		if (fields[i].type != ACPI_TYPE_INTEGER) {
+			status = AE_ERROR;
+			goto exit;
+		}
+	}
+
+	hpx0.revision        = 1;
+	hpx0.cache_line_size = fields[0].integer.value;
+	hpx0.latency_timer   = fields[1].integer.value;
+	hpx0.enable_serr     = fields[2].integer.value;
+	hpx0.enable_perr     = fields[3].integer.value;
+
+	program_hpx_type0(dev, &hpx0);
+
+exit:
+	kfree(buffer.pointer);
+	return status;
+}
+
+/* pci_acpi_program_hp_params
+ *
+ * @dev - the pci_dev for which we want parameters
+ */
+int pci_acpi_program_hp_params(struct pci_dev *dev)
+{
+	acpi_status status;
+	acpi_handle handle, phandle;
+	struct pci_bus *pbus;
+
+	if (acpi_pci_disabled)
+		return -ENODEV;
+
+	handle = NULL;
+	for (pbus = dev->bus; pbus; pbus = pbus->parent) {
+		handle = acpi_pci_get_bridge_handle(pbus);
+		if (handle)
+			break;
+	}
+
+	/*
+	 * _HPP settings apply to all child buses, until another _HPP is
+	 * encountered. If we don't find an _HPP for the input pci dev,
+	 * look for it in the parent device scope since that would apply to
+	 * this pci dev.
+	 */
+	while (handle) {
+		status = acpi_run_hpx(dev, handle);
+		if (ACPI_SUCCESS(status))
+			return 0;
+		status = acpi_run_hpp(dev, handle);
+		if (ACPI_SUCCESS(status))
+			return 0;
+		if (acpi_is_root_bridge(handle))
+			break;
+		status = acpi_get_parent(handle, &phandle);
+		if (ACPI_FAILURE(status))
+			break;
+		handle = phandle;
+	}
+	return -ENODEV;
+}
+
+/**
+ * pciehp_is_native - Check whether a hotplug port is handled by the OS
+ * @bridge: Hotplug port to check
+ *
+ * Returns true if the given @bridge is handled by the native PCIe hotplug
+ * driver.
+ */
+bool pciehp_is_native(struct pci_dev *bridge)
+{
+	const struct pci_host_bridge *host;
+	u32 slot_cap;
+
+	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+		return false;
+
+	pcie_capability_read_dword(bridge, PCI_EXP_SLTCAP, &slot_cap);
+	if (!(slot_cap & PCI_EXP_SLTCAP_HPC))
+		return false;
+
+	if (pcie_ports_native)
+		return true;
+
+	host = pci_find_host_bridge(bridge->bus);
+	return host->native_pcie_hotplug;
+}
+
+/**
+ * shpchp_is_native - Check whether a hotplug port is handled by the OS
+ * @bridge: Hotplug port to check
+ *
+ * Returns true if the given @bridge is handled by the native SHPC hotplug
+ * driver.
+ */
+bool shpchp_is_native(struct pci_dev *bridge)
+{
+	return bridge->shpc_managed;
+}
+
+/**
+ * pci_acpi_wake_bus - Root bus wakeup notification fork function.
+ * @context: Device wakeup context.
+ */
+static void pci_acpi_wake_bus(struct acpi_device_wakeup_context *context)
+{
+	struct acpi_device *adev;
+	struct acpi_pci_root *root;
+
+	adev = container_of(context, struct acpi_device, wakeup.context);
+	root = acpi_driver_data(adev);
+	pci_pme_wakeup_bus(root->bus);
+}
+
+/**
+ * pci_acpi_wake_dev - PCI device wakeup notification work function.
+ * @context: Device wakeup context.
+ */
+static void pci_acpi_wake_dev(struct acpi_device_wakeup_context *context)
+{
+	struct pci_dev *pci_dev;
+
+	pci_dev = to_pci_dev(context->dev);
+
+	if (pci_dev->pme_poll)
+		pci_dev->pme_poll = false;
+
+	if (pci_dev->current_state == PCI_D3cold) {
+		pci_wakeup_event(pci_dev);
+		pm_request_resume(&pci_dev->dev);
+		return;
+	}
+
+	/* Clear PME Status if set. */
+	if (pci_dev->pme_support)
+		pci_check_pme_status(pci_dev);
+
+	pci_wakeup_event(pci_dev);
+	pm_request_resume(&pci_dev->dev);
+
+	pci_pme_wakeup_bus(pci_dev->subordinate);
+}
+
+/**
+ * pci_acpi_add_bus_pm_notifier - Register PM notifier for root PCI bus.
+ * @dev: PCI root bridge ACPI device.
+ */
+acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev)
+{
+	return acpi_add_pm_notifier(dev, NULL, pci_acpi_wake_bus);
+}
+
+/**
+ * pci_acpi_add_pm_notifier - Register PM notifier for given PCI device.
+ * @dev: ACPI device to add the notifier for.
+ * @pci_dev: PCI device to check for the PME status if an event is signaled.
+ */
+acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
+				     struct pci_dev *pci_dev)
+{
+	return acpi_add_pm_notifier(dev, &pci_dev->dev, pci_acpi_wake_dev);
+}
+
+/*
+ * _SxD returns the D-state with the highest power
+ * (lowest D-state number) supported in the S-state "x".
+ *
+ * If the devices does not have a _PRW
+ * (Power Resources for Wake) supporting system wakeup from "x"
+ * then the OS is free to choose a lower power (higher number
+ * D-state) than the return value from _SxD.
+ *
+ * But if _PRW is enabled at S-state "x", the OS
+ * must not choose a power lower than _SxD --
+ * unless the device has an _SxW method specifying
+ * the lowest power (highest D-state number) the device
+ * may enter while still able to wake the system.
+ *
+ * ie. depending on global OS policy:
+ *
+ * if (_PRW at S-state x)
+ *	choose from highest power _SxD to lowest power _SxW
+ * else // no _PRW at S-state x
+ *	choose highest power _SxD or any lower power
+ */
+
+pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
+{
+	int acpi_state, d_max;
+
+	if (pdev->no_d3cold || !pdev->d3cold_allowed)
+		d_max = ACPI_STATE_D3_HOT;
+	else
+		d_max = ACPI_STATE_D3_COLD;
+	acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL, d_max);
+	if (acpi_state < 0)
+		return PCI_POWER_ERROR;
+
+	switch (acpi_state) {
+	case ACPI_STATE_D0:
+		return PCI_D0;
+	case ACPI_STATE_D1:
+		return PCI_D1;
+	case ACPI_STATE_D2:
+		return PCI_D2;
+	case ACPI_STATE_D3_HOT:
+		return PCI_D3hot;
+	case ACPI_STATE_D3_COLD:
+		return PCI_D3cold;
+	}
+	return PCI_POWER_ERROR;
+}
+
+static struct acpi_device *acpi_pci_find_companion(struct device *dev);
+
+void pci_set_acpi_fwnode(struct pci_dev *dev)
+{
+	if (!dev_fwnode(&dev->dev) && !pci_dev_is_added(dev))
+		ACPI_COMPANION_SET(&dev->dev,
+				   acpi_pci_find_companion(&dev->dev));
+}
+
+/**
+ * pci_dev_acpi_reset - do a function level reset using _RST method
+ * @dev: device to reset
+ * @probe: if true, return 0 if device supports _RST
+ */
+int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
+{
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
+
+	if (!handle || !acpi_has_method(handle, "_RST"))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
+		pci_warn(dev, "ACPI _RST failed\n");
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+bool acpi_pci_power_manageable(struct pci_dev *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+
+	return adev && acpi_device_power_manageable(adev);
+}
+
+bool acpi_pci_bridge_d3(struct pci_dev *dev)
+{
+	struct pci_dev *rpdev;
+	struct acpi_device *adev, *rpadev;
+	const union acpi_object *obj;
+
+	if (acpi_pci_disabled || !dev->is_hotplug_bridge)
+		return false;
+
+	adev = ACPI_COMPANION(&dev->dev);
+	if (adev) {
+		/*
+		 * If the bridge has _S0W, whether or not it can go into D3
+		 * depends on what is returned by that object.  In particular,
+		 * if the power state returned by _S0W is D2 or shallower,
+		 * entering D3 should not be allowed.
+		 */
+		if (acpi_dev_power_state_for_wake(adev) <= ACPI_STATE_D2)
+			return false;
+
+		/*
+		 * Otherwise, assume that the bridge can enter D3 so long as it
+		 * is power-manageable via ACPI.
+		 */
+		if (acpi_device_power_manageable(adev))
+			return true;
+	}
+
+	rpdev = pcie_find_root_port(dev);
+	if (!rpdev)
+		return false;
+
+	if (rpdev == dev)
+		rpadev = adev;
+	else
+		rpadev = ACPI_COMPANION(&rpdev->dev);
+
+	if (!rpadev)
+		return false;
+
+	/*
+	 * If the Root Port cannot signal wakeup signals at all, i.e., it
+	 * doesn't supply a wakeup GPE via _PRW, it cannot signal hotplug
+	 * events from low-power states including D3hot and D3cold.
+	 */
+	if (!rpadev->wakeup.flags.valid)
+		return false;
+
+	/*
+	 * In the bridge-below-a-Root-Port case, evaluate _S0W for the Root Port
+	 * to verify whether or not it can signal wakeup from D3.
+	 */
+	if (rpadev != adev &&
+	    acpi_dev_power_state_for_wake(rpadev) <= ACPI_STATE_D2)
+		return false;
+
+	/*
+	 * The "HotPlugSupportInD3" property in a Root Port _DSD indicates
+	 * the Port can signal hotplug events while in D3.  We assume any
+	 * bridges *below* that Root Port can also signal hotplug events
+	 * while in D3.
+	 */
+	if (!acpi_dev_get_property(rpadev, "HotPlugSupportInD3",
+				   ACPI_TYPE_INTEGER, &obj) &&
+	    obj->integer.value == 1)
+		return true;
+
+	return false;
+}
+
+static void acpi_pci_config_space_access(struct pci_dev *dev, bool enable)
+{
+	int val = enable ? ACPI_REG_CONNECT : ACPI_REG_DISCONNECT;
+	int ret = acpi_evaluate_reg(ACPI_HANDLE(&dev->dev),
+				    ACPI_ADR_SPACE_PCI_CONFIG, val);
+	if (ret)
+		pci_dbg(dev, "ACPI _REG %s evaluation failed (%d)\n",
+			enable ? "connect" : "disconnect", ret);
+}
+
+int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+	static const u8 state_conv[] = {
+		[PCI_D0] = ACPI_STATE_D0,
+		[PCI_D1] = ACPI_STATE_D1,
+		[PCI_D2] = ACPI_STATE_D2,
+		[PCI_D3hot] = ACPI_STATE_D3_HOT,
+		[PCI_D3cold] = ACPI_STATE_D3_COLD,
+	};
+	int error;
+
+	/* If the ACPI device has _EJ0, ignore the device */
+	if (!adev || acpi_has_method(adev->handle, "_EJ0"))
+		return -ENODEV;
+
+	switch (state) {
+	case PCI_D0:
+	case PCI_D1:
+	case PCI_D2:
+	case PCI_D3hot:
+	case PCI_D3cold:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (state == PCI_D3cold) {
+		if (dev_pm_qos_flags(&dev->dev, PM_QOS_FLAG_NO_POWER_OFF) ==
+				PM_QOS_FLAGS_ALL)
+			return -EBUSY;
+
+		/* Notify AML lack of PCI config space availability */
+		acpi_pci_config_space_access(dev, false);
+	}
+
+	error = acpi_device_set_power(adev, state_conv[state]);
+	if (error)
+		return error;
+
+	pci_dbg(dev, "power state changed by ACPI to %s\n",
+	        acpi_power_state_string(adev->power.state));
+
+	/*
+	 * Notify AML of PCI config space availability.  Config space is
+	 * accessible in all states except D3cold; the only transitions
+	 * that change availability are transitions to D3cold and from
+	 * D3cold to D0.
+	 */
+	if (state == PCI_D0)
+		acpi_pci_config_space_access(dev, true);
+
+	return 0;
+}
+
+pci_power_t acpi_pci_get_power_state(struct pci_dev *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+	static const pci_power_t state_conv[] = {
+		[ACPI_STATE_D0]      = PCI_D0,
+		[ACPI_STATE_D1]      = PCI_D1,
+		[ACPI_STATE_D2]      = PCI_D2,
+		[ACPI_STATE_D3_HOT]  = PCI_D3hot,
+		[ACPI_STATE_D3_COLD] = PCI_D3cold,
+	};
+	int state;
+
+	if (!adev || !acpi_device_power_manageable(adev))
+		return PCI_UNKNOWN;
+
+	state = adev->power.state;
+	if (state == ACPI_STATE_UNKNOWN)
+		return PCI_UNKNOWN;
+
+	return state_conv[state];
+}
+
+void acpi_pci_refresh_power_state(struct pci_dev *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+
+	if (adev && acpi_device_power_manageable(adev))
+		acpi_device_update_power(adev, NULL);
+}
+
+static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable)
+{
+	while (bus->parent) {
+		if (acpi_pm_device_can_wakeup(&bus->self->dev))
+			return acpi_pm_set_device_wakeup(&bus->self->dev, enable);
+
+		bus = bus->parent;
+	}
+
+	/* We have reached the root bus. */
+	if (bus->bridge) {
+		if (acpi_pm_device_can_wakeup(bus->bridge))
+			return acpi_pm_set_device_wakeup(bus->bridge, enable);
+	}
+	return 0;
+}
+
+int acpi_pci_wakeup(struct pci_dev *dev, bool enable)
+{
+	if (acpi_pci_disabled)
+		return 0;
+
+	if (acpi_pm_device_can_wakeup(&dev->dev))
+		return acpi_pm_set_device_wakeup(&dev->dev, enable);
+
+	return acpi_pci_propagate_wakeup(dev->bus, enable);
+}
+
+bool acpi_pci_need_resume(struct pci_dev *dev)
+{
+	struct acpi_device *adev;
+
+	if (acpi_pci_disabled)
+		return false;
+
+	/*
+	 * In some cases (eg. Samsung 305V4A) leaving a bridge in suspend over
+	 * system-wide suspend/resume confuses the platform firmware, so avoid
+	 * doing that.  According to Section 16.1.6 of ACPI 6.2, endpoint
+	 * devices are expected to be in D3 before invoking the S3 entry path
+	 * from the firmware, so they should not be affected by this issue.
+	 */
+	if (pci_is_bridge(dev) && acpi_target_system_state() != ACPI_STATE_S0)
+		return true;
+
+	adev = ACPI_COMPANION(&dev->dev);
+	if (!adev || !acpi_device_power_manageable(adev))
+		return false;
+
+	if (adev->wakeup.flags.valid &&
+	    device_may_wakeup(&dev->dev) != !!adev->wakeup.prepare_count)
+		return true;
+
+	if (acpi_target_system_state() == ACPI_STATE_S0)
+		return false;
+
+	return !!adev->power.flags.dsw_present;
+}
+
+void acpi_pci_add_bus(struct pci_bus *bus)
+{
+	union acpi_object *obj;
+	struct pci_host_bridge *bridge;
+
+	if (acpi_pci_disabled || !bus->bridge || !ACPI_HANDLE(bus->bridge))
+		return;
+
+	acpi_pci_slot_enumerate(bus);
+	acpiphp_enumerate_slots(bus);
+
+	/*
+	 * For a host bridge, check its _DSM for function 8 and if
+	 * that is available, mark it in pci_host_bridge.
+	 */
+	if (!pci_is_root_bus(bus))
+		return;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
+				DSM_PCI_POWER_ON_RESET_DELAY, NULL);
+	if (!obj)
+		return;
+
+	if (obj->type == ACPI_TYPE_INTEGER && obj->integer.value == 1) {
+		bridge = pci_find_host_bridge(bus);
+		bridge->ignore_reset_delay = 1;
+	}
+	ACPI_FREE(obj);
+}
+
+void acpi_pci_remove_bus(struct pci_bus *bus)
+{
+	if (acpi_pci_disabled || !bus->bridge)
+		return;
+
+	acpiphp_remove_slots(bus);
+	acpi_pci_slot_remove(bus);
+}
+
+/* ACPI bus type */
+
+
+static DECLARE_RWSEM(pci_acpi_companion_lookup_sem);
+static struct acpi_device *(*pci_acpi_find_companion_hook)(struct pci_dev *);
+
+/**
+ * pci_acpi_set_companion_lookup_hook - Set ACPI companion lookup callback.
+ * @func: ACPI companion lookup callback pointer or NULL.
+ *
+ * Set a special ACPI companion lookup callback for PCI devices whose companion
+ * objects in the ACPI namespace have _ADR with non-standard bus-device-function
+ * encodings.
+ *
+ * Return 0 on success or a negative error code on failure (in which case no
+ * changes are made).
+ *
+ * The caller is responsible for the appropriate ordering of the invocations of
+ * this function with respect to the enumeration of the PCI devices needing the
+ * callback installed by it.
+ */
+int pci_acpi_set_companion_lookup_hook(struct acpi_device *(*func)(struct pci_dev *))
+{
+	int ret;
+
+	if (!func)
+		return -EINVAL;
+
+	down_write(&pci_acpi_companion_lookup_sem);
+
+	if (pci_acpi_find_companion_hook) {
+		ret = -EBUSY;
+	} else {
+		pci_acpi_find_companion_hook = func;
+		ret = 0;
+	}
+
+	up_write(&pci_acpi_companion_lookup_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_acpi_set_companion_lookup_hook);
+
+/**
+ * pci_acpi_clear_companion_lookup_hook - Clear ACPI companion lookup callback.
+ *
+ * Clear the special ACPI companion lookup callback previously set by
+ * pci_acpi_set_companion_lookup_hook().  Block until the last running instance
+ * of the callback returns before clearing it.
+ *
+ * The caller is responsible for the appropriate ordering of the invocations of
+ * this function with respect to the enumeration of the PCI devices needing the
+ * callback cleared by it.
+ */
+void pci_acpi_clear_companion_lookup_hook(void)
+{
+	down_write(&pci_acpi_companion_lookup_sem);
+
+	pci_acpi_find_companion_hook = NULL;
+
+	up_write(&pci_acpi_companion_lookup_sem);
+}
+EXPORT_SYMBOL_GPL(pci_acpi_clear_companion_lookup_hook);
+
+static struct acpi_device *acpi_pci_find_companion(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct acpi_device *adev;
+	bool check_children;
+	u64 addr;
+
+	if (!dev->parent)
+		return NULL;
+
+	down_read(&pci_acpi_companion_lookup_sem);
+
+	adev = pci_acpi_find_companion_hook ?
+		pci_acpi_find_companion_hook(pci_dev) : NULL;
+
+	up_read(&pci_acpi_companion_lookup_sem);
+
+	if (adev)
+		return adev;
+
+	check_children = pci_is_bridge(pci_dev);
+	/* Please ref to ACPI spec for the syntax of _ADR */
+	addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn);
+	adev = acpi_find_child_device(ACPI_COMPANION(dev->parent), addr,
+				      check_children);
+
+	/*
+	 * There may be ACPI device objects in the ACPI namespace that are
+	 * children of the device object representing the host bridge, but don't
+	 * represent PCI devices.  Both _HID and _ADR may be present for them,
+	 * even though that is against the specification (for example, see
+	 * Section 6.1 of ACPI 6.3), but in many cases the _ADR returns 0 which
+	 * appears to indicate that they should not be taken into consideration
+	 * as potential companions of PCI devices on the root bus.
+	 *
+	 * To catch this special case, disregard the returned device object if
+	 * it has a valid _HID, addr is 0 and the PCI device at hand is on the
+	 * root bus.
+	 */
+	if (adev && adev->pnp.type.platform_id && !addr &&
+	    pci_is_root_bus(pci_dev->bus))
+		return NULL;
+
+	return adev;
+}
+
+/**
+ * pci_acpi_optimize_delay - optimize PCI D3 and D3cold delay from ACPI
+ * @pdev: the PCI device whose delay is to be updated
+ * @handle: ACPI handle of this device
+ *
+ * Update the d3hot_delay and d3cold_delay of a PCI device from the ACPI _DSM
+ * control method of either the device itself or the PCI host bridge.
+ *
+ * Function 8, "Reset Delay," applies to the entire hierarchy below a PCI
+ * host bridge.  If it returns one, the OS may assume that all devices in
+ * the hierarchy have already completed power-on reset delays.
+ *
+ * Function 9, "Device Readiness Durations," applies only to the object
+ * where it is located.  It returns delay durations required after various
+ * events if the device requires less time than the spec requires.  Delays
+ * from this function take precedence over the Reset Delay function.
+ *
+ * These _DSM functions are defined by the draft ECN of January 28, 2014,
+ * titled "ACPI additions for FW latency optimizations."
+ */
+static void pci_acpi_optimize_delay(struct pci_dev *pdev,
+				    acpi_handle handle)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+	int value;
+	union acpi_object *obj, *elements;
+
+	if (bridge->ignore_reset_delay)
+		pdev->d3cold_delay = 0;
+
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
+				DSM_PCI_DEVICE_READINESS_DURATIONS, NULL);
+	if (!obj)
+		return;
+
+	if (obj->type == ACPI_TYPE_PACKAGE && obj->package.count == 5) {
+		elements = obj->package.elements;
+		if (elements[0].type == ACPI_TYPE_INTEGER) {
+			value = (int)elements[0].integer.value / 1000;
+			if (value < PCI_PM_D3COLD_WAIT)
+				pdev->d3cold_delay = value;
+		}
+		if (elements[3].type == ACPI_TYPE_INTEGER) {
+			value = (int)elements[3].integer.value / 1000;
+			if (value < PCI_PM_D3HOT_WAIT)
+				pdev->d3hot_delay = value;
+		}
+	}
+	ACPI_FREE(obj);
+}
+
+static void pci_acpi_set_external_facing(struct pci_dev *dev)
+{
+	u8 val;
+
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		return;
+	if (device_property_read_u8(&dev->dev, "ExternalFacingPort", &val))
+		return;
+
+	/*
+	 * These root ports expose PCIe (including DMA) outside of the
+	 * system.  Everything downstream from them is external.
+	 */
+	if (val)
+		dev->external_facing = 1;
+}
+
+void pci_acpi_setup(struct device *dev, struct acpi_device *adev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	pci_acpi_optimize_delay(pci_dev, adev->handle);
+	pci_acpi_set_external_facing(pci_dev);
+	pci_acpi_add_edr_notifier(pci_dev);
+
+	pci_acpi_add_pm_notifier(adev, pci_dev);
+	if (!adev->wakeup.flags.valid)
+		return;
+
+	device_set_wakeup_capable(dev, true);
+	/*
+	 * For bridges that can do D3 we enable wake automatically (as
+	 * we do for the power management itself in that case). The
+	 * reason is that the bridge may have additional methods such as
+	 * _DSW that need to be called.
+	 */
+	if (pci_dev->bridge_d3)
+		device_wakeup_enable(dev);
+
+	acpi_pci_wakeup(pci_dev, false);
+	acpi_device_power_add_dependent(adev, dev);
+
+	if (pci_is_bridge(pci_dev))
+		acpi_dev_power_up_children_with_adr(adev);
+}
+
+void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	pci_acpi_remove_edr_notifier(pci_dev);
+	pci_acpi_remove_pm_notifier(adev);
+	if (adev->wakeup.flags.valid) {
+		acpi_device_power_remove_dependent(adev, dev);
+		if (pci_dev->bridge_d3)
+			device_wakeup_disable(dev);
+
+		device_set_wakeup_capable(dev, false);
+	}
+}
+
+static struct fwnode_handle *(*pci_msi_get_fwnode_cb)(struct device *dev);
+
+/**
+ * pci_msi_register_fwnode_provider - Register callback to retrieve fwnode
+ * @fn:       Callback matching a device to a fwnode that identifies a PCI
+ *            MSI domain.
+ *
+ * This should be called by irqchip driver, which is the parent of
+ * the MSI domain to provide callback interface to query fwnode.
+ */
+void
+pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *))
+{
+	pci_msi_get_fwnode_cb = fn;
+}
+
+/**
+ * pci_host_bridge_acpi_msi_domain - Retrieve MSI domain of a PCI host bridge
+ * @bus:      The PCI host bridge bus.
+ *
+ * This function uses the callback function registered by
+ * pci_msi_register_fwnode_provider() to retrieve the irq_domain with
+ * type DOMAIN_BUS_PCI_MSI of the specified host bridge bus.
+ * This returns NULL on error or when the domain is not found.
+ */
+struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus)
+{
+	struct fwnode_handle *fwnode;
+
+	if (!pci_msi_get_fwnode_cb)
+		return NULL;
+
+	fwnode = pci_msi_get_fwnode_cb(&bus->dev);
+	if (!fwnode)
+		return NULL;
+
+	return irq_find_matching_fwnode(fwnode, DOMAIN_BUS_PCI_MSI);
+}
+
+static int __init acpi_pci_init(void)
+{
+	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) {
+		pr_info("ACPI FADT declares the system doesn't support MSI, so disable it\n");
+		pci_no_msi();
+	}
+
+	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+		pr_info("ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
+		pcie_no_aspm();
+	}
+
+	if (acpi_pci_disabled)
+		return 0;
+
+	acpi_pci_slot_init();
+	acpiphp_init();
+
+	return 0;
+}
+arch_initcall(acpi_pci_init);
diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
new file mode 100644
index 000000000..9334b2dd4
--- /dev/null
+++ b/drivers/pci/pci-bridge-emul.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Marvell
+ *
+ * Author: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
+ *
+ * This file helps PCI controller drivers implement a fake root port
+ * PCI bridge when the HW doesn't provide such a root port PCI
+ * bridge.
+ *
+ * It emulates a PCI bridge by providing a fake PCI configuration
+ * space (and optionally a PCIe capability configuration space) in
+ * memory. By default the read/write operations simply read and update
+ * this fake configuration space in memory. However, PCI controller
+ * drivers can provide through the 'struct pci_sw_bridge_ops'
+ * structure a set of operations to override or complement this
+ * default behavior.
+ */
+
+#include <linux/pci.h>
+#include "pci-bridge-emul.h"
+
+#define PCI_BRIDGE_CONF_END	PCI_STD_HEADER_SIZEOF
+#define PCI_CAP_SSID_SIZEOF	(PCI_SSVID_DEVICE_ID + 2)
+#define PCI_CAP_PCIE_SIZEOF	(PCI_EXP_SLTSTA2 + 2)
+
+/**
+ * struct pci_bridge_reg_behavior - register bits behaviors
+ * @ro:		Read-Only bits
+ * @rw:		Read-Write bits
+ * @w1c:	Write-1-to-Clear bits
+ *
+ * Reads and Writes will be filtered by specified behavior. All other bits not
+ * declared are assumed 'Reserved' and will return 0 on reads, per PCIe 5.0:
+ * "Reserved register fields must be read only and must return 0 (all 0's for
+ * multi-bit fields) when read".
+ */
+struct pci_bridge_reg_behavior {
+	/* Read-only bits */
+	u32 ro;
+
+	/* Read-write bits */
+	u32 rw;
+
+	/* Write-1-to-clear bits */
+	u32 w1c;
+};
+
+static const
+struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = {
+	[PCI_VENDOR_ID / 4] = { .ro = ~0 },
+	[PCI_COMMAND / 4] = {
+		.rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		       PCI_COMMAND_MASTER | PCI_COMMAND_PARITY |
+		       PCI_COMMAND_SERR),
+		.ro = ((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE |
+			PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT |
+			PCI_COMMAND_FAST_BACK) |
+		       (PCI_STATUS_CAP_LIST | PCI_STATUS_66MHZ |
+			PCI_STATUS_FAST_BACK | PCI_STATUS_DEVSEL_MASK) << 16),
+		.w1c = PCI_STATUS_ERROR_BITS << 16,
+	},
+	[PCI_CLASS_REVISION / 4] = { .ro = ~0 },
+
+	/*
+	 * Cache Line Size register: implement as read-only, we do not
+	 * pretend implementing "Memory Write and Invalidate"
+	 * transactions"
+	 *
+	 * Latency Timer Register: implemented as read-only, as "A
+	 * bridge that is not capable of a burst transfer of more than
+	 * two data phases on its primary interface is permitted to
+	 * hardwire the Latency Timer to a value of 16 or less"
+	 *
+	 * Header Type: always read-only
+	 *
+	 * BIST register: implemented as read-only, as "A bridge that
+	 * does not support BIST must implement this register as a
+	 * read-only register that returns 0 when read"
+	 */
+	[PCI_CACHE_LINE_SIZE / 4] = { .ro = ~0 },
+
+	/*
+	 * Base Address registers not used must be implemented as
+	 * read-only registers that return 0 when read.
+	 */
+	[PCI_BASE_ADDRESS_0 / 4] = { .ro = ~0 },
+	[PCI_BASE_ADDRESS_1 / 4] = { .ro = ~0 },
+
+	[PCI_PRIMARY_BUS / 4] = {
+		/* Primary, secondary and subordinate bus are RW */
+		.rw = GENMASK(24, 0),
+		/* Secondary latency is read-only */
+		.ro = GENMASK(31, 24),
+	},
+
+	[PCI_IO_BASE / 4] = {
+		/* The high four bits of I/O base/limit are RW */
+		.rw = (GENMASK(15, 12) | GENMASK(7, 4)),
+
+		/* The low four bits of I/O base/limit are RO */
+		.ro = (((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+			 PCI_STATUS_DEVSEL_MASK) << 16) |
+		       GENMASK(11, 8) | GENMASK(3, 0)),
+
+		.w1c = PCI_STATUS_ERROR_BITS << 16,
+	},
+
+	[PCI_MEMORY_BASE / 4] = {
+		/* The high 12-bits of mem base/limit are RW */
+		.rw = GENMASK(31, 20) | GENMASK(15, 4),
+
+		/* The low four bits of mem base/limit are RO */
+		.ro = GENMASK(19, 16) | GENMASK(3, 0),
+	},
+
+	[PCI_PREF_MEMORY_BASE / 4] = {
+		/* The high 12-bits of pref mem base/limit are RW */
+		.rw = GENMASK(31, 20) | GENMASK(15, 4),
+
+		/* The low four bits of pref mem base/limit are RO */
+		.ro = GENMASK(19, 16) | GENMASK(3, 0),
+	},
+
+	[PCI_PREF_BASE_UPPER32 / 4] = {
+		.rw = ~0,
+	},
+
+	[PCI_PREF_LIMIT_UPPER32 / 4] = {
+		.rw = ~0,
+	},
+
+	[PCI_IO_BASE_UPPER16 / 4] = {
+		.rw = ~0,
+	},
+
+	[PCI_CAPABILITY_LIST / 4] = {
+		.ro = GENMASK(7, 0),
+	},
+
+	/*
+	 * If expansion ROM is unsupported then ROM Base Address register must
+	 * be implemented as read-only register that return 0 when read, same
+	 * as for unused Base Address registers.
+	 */
+	[PCI_ROM_ADDRESS1 / 4] = {
+		.ro = ~0,
+	},
+
+	/*
+	 * Interrupt line (bits 7:0) are RW, interrupt pin (bits 15:8)
+	 * are RO, and bridge control (31:16) are a mix of RW, RO,
+	 * reserved and W1C bits
+	 */
+	[PCI_INTERRUPT_LINE / 4] = {
+		/* Interrupt line is RW */
+		.rw = (GENMASK(7, 0) |
+		       ((PCI_BRIDGE_CTL_PARITY |
+			 PCI_BRIDGE_CTL_SERR |
+			 PCI_BRIDGE_CTL_ISA |
+			 PCI_BRIDGE_CTL_VGA |
+			 PCI_BRIDGE_CTL_MASTER_ABORT |
+			 PCI_BRIDGE_CTL_BUS_RESET |
+			 BIT(8) | BIT(9) | BIT(11)) << 16)),
+
+		/* Interrupt pin is RO */
+		.ro = (GENMASK(15, 8) | ((PCI_BRIDGE_CTL_FAST_BACK) << 16)),
+
+		.w1c = BIT(10) << 16,
+	},
+};
+
+static const
+struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = {
+	[PCI_CAP_LIST_ID / 4] = {
+		/*
+		 * Capability ID, Next Capability Pointer and
+		 * bits [14:0] of Capabilities register are all read-only.
+		 * Bit 15 of Capabilities register is reserved.
+		 */
+		.ro = GENMASK(30, 0),
+	},
+
+	[PCI_EXP_DEVCAP / 4] = {
+		/*
+		 * Bits [31:29] and [17:16] are reserved.
+		 * Bits [27:18] are reserved for non-upstream ports.
+		 * Bits 28 and [14:6] are reserved for non-endpoint devices.
+		 * Other bits are read-only.
+		 */
+		.ro = BIT(15) | GENMASK(5, 0),
+	},
+
+	[PCI_EXP_DEVCTL / 4] = {
+		/*
+		 * Device control register is RW, except bit 15 which is
+		 * reserved for non-endpoints or non-PCIe-to-PCI/X bridges.
+		 */
+		.rw = GENMASK(14, 0),
+
+		/*
+		 * Device status register has bits 6 and [3:0] W1C, [5:4] RO,
+		 * the rest is reserved. Also bit 6 is reserved for non-upstream
+		 * ports.
+		 */
+		.w1c = GENMASK(3, 0) << 16,
+		.ro = GENMASK(5, 4) << 16,
+	},
+
+	[PCI_EXP_LNKCAP / 4] = {
+		/*
+		 * All bits are RO, except bit 23 which is reserved and
+		 * bit 18 which is reserved for non-upstream ports.
+		 */
+		.ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)),
+	},
+
+	[PCI_EXP_LNKCTL / 4] = {
+		/*
+		 * Link control has bits [15:14], [11:3] and [1:0] RW, the
+		 * rest is reserved. Bit 8 is reserved for non-upstream ports.
+		 *
+		 * Link status has bits [13:0] RO, and bits [15:14]
+		 * W1C.
+		 */
+		.rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0),
+		.ro = GENMASK(13, 0) << 16,
+		.w1c = GENMASK(15, 14) << 16,
+	},
+
+	[PCI_EXP_SLTCAP / 4] = {
+		.ro = ~0,
+	},
+
+	[PCI_EXP_SLTCTL / 4] = {
+		/*
+		 * Slot control has bits [14:0] RW, the rest is
+		 * reserved.
+		 *
+		 * Slot status has bits 8 and [4:0] W1C, bits [7:5] RO, the
+		 * rest is reserved.
+		 */
+		.rw = GENMASK(14, 0),
+		.w1c = (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
+			PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
+			PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC) << 16,
+		.ro = (PCI_EXP_SLTSTA_MRLSS | PCI_EXP_SLTSTA_PDS |
+		       PCI_EXP_SLTSTA_EIS) << 16,
+	},
+
+	[PCI_EXP_RTCTL / 4] = {
+		/*
+		 * Root control has bits [4:0] RW, the rest is
+		 * reserved.
+		 *
+		 * Root capabilities has bit 0 RO, the rest is reserved.
+		 */
+		.rw = (PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE |
+		       PCI_EXP_RTCTL_SEFEE | PCI_EXP_RTCTL_PMEIE |
+		       PCI_EXP_RTCTL_CRSSVE),
+		.ro = PCI_EXP_RTCAP_CRSVIS << 16,
+	},
+
+	[PCI_EXP_RTSTA / 4] = {
+		/*
+		 * Root status has bits 17 and [15:0] RO, bit 16 W1C, the rest
+		 * is reserved.
+		 */
+		.ro = GENMASK(15, 0) | PCI_EXP_RTSTA_PENDING,
+		.w1c = PCI_EXP_RTSTA_PME,
+	},
+
+	[PCI_EXP_DEVCAP2 / 4] = {
+		/*
+		 * Device capabilities 2 register has reserved bits [30:27].
+		 * Also bits [26:24] are reserved for non-upstream ports.
+		 */
+		.ro = BIT(31) | GENMASK(23, 0),
+	},
+
+	[PCI_EXP_DEVCTL2 / 4] = {
+		/*
+		 * Device control 2 register is RW. Bit 11 is reserved for
+		 * non-upstream ports.
+		 *
+		 * Device status 2 register is reserved.
+		 */
+		.rw = GENMASK(15, 12) | GENMASK(10, 0),
+	},
+
+	[PCI_EXP_LNKCAP2 / 4] = {
+		/* Link capabilities 2 register has reserved bits [30:25] and 0. */
+		.ro = BIT(31) | GENMASK(24, 1),
+	},
+
+	[PCI_EXP_LNKCTL2 / 4] = {
+		/*
+		 * Link control 2 register is RW.
+		 *
+		 * Link status 2 register has bits 5, 15 W1C;
+		 * bits 10, 11 reserved and others are RO.
+		 */
+		.rw = GENMASK(15, 0),
+		.w1c = (BIT(15) | BIT(5)) << 16,
+		.ro = (GENMASK(14, 12) | GENMASK(9, 6) | GENMASK(4, 0)) << 16,
+	},
+
+	[PCI_EXP_SLTCAP2 / 4] = {
+		/* Slot capabilities 2 register is reserved. */
+	},
+
+	[PCI_EXP_SLTCTL2 / 4] = {
+		/* Both Slot control 2 and Slot status 2 registers are reserved. */
+	},
+};
+
+static pci_bridge_emul_read_status_t
+pci_bridge_emul_read_ssid(struct pci_bridge_emul *bridge, int reg, u32 *value)
+{
+	switch (reg) {
+	case PCI_CAP_LIST_ID:
+		*value = PCI_CAP_ID_SSVID |
+			((bridge->pcie_start > bridge->ssid_start) ? (bridge->pcie_start << 8) : 0);
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	case PCI_SSVID_VENDOR_ID:
+		*value = bridge->subsystem_vendor_id |
+			(bridge->subsystem_id << 16);
+		return PCI_BRIDGE_EMUL_HANDLED;
+
+	default:
+		return PCI_BRIDGE_EMUL_NOT_HANDLED;
+	}
+}
+
+/*
+ * Initialize a pci_bridge_emul structure to represent a fake PCI
+ * bridge configuration space. The caller needs to have initialized
+ * the PCI configuration space with whatever values make sense
+ * (typically at least vendor, device, revision), the ->ops pointer,
+ * and optionally ->data and ->has_pcie.
+ */
+int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
+			 unsigned int flags)
+{
+	BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END);
+
+	/*
+	 * class_revision: Class is high 24 bits and revision is low 8 bit
+	 * of this member, while class for PCI Bridge Normal Decode has the
+	 * 24-bit value: PCI_CLASS_BRIDGE_PCI_NORMAL
+	 */
+	bridge->conf.class_revision |=
+		cpu_to_le32(PCI_CLASS_BRIDGE_PCI_NORMAL << 8);
+	bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE;
+	bridge->conf.cache_line_size = 0x10;
+	bridge->conf.status = cpu_to_le16(PCI_STATUS_CAP_LIST);
+	bridge->pci_regs_behavior = kmemdup(pci_regs_behavior,
+					    sizeof(pci_regs_behavior),
+					    GFP_KERNEL);
+	if (!bridge->pci_regs_behavior)
+		return -ENOMEM;
+
+	/* If ssid_start and pcie_start were not specified then choose the lowest possible value. */
+	if (!bridge->ssid_start && !bridge->pcie_start) {
+		if (bridge->subsystem_vendor_id)
+			bridge->ssid_start = PCI_BRIDGE_CONF_END;
+		if (bridge->has_pcie)
+			bridge->pcie_start = bridge->ssid_start + PCI_CAP_SSID_SIZEOF;
+	} else if (!bridge->ssid_start && bridge->subsystem_vendor_id) {
+		if (bridge->pcie_start - PCI_BRIDGE_CONF_END >= PCI_CAP_SSID_SIZEOF)
+			bridge->ssid_start = PCI_BRIDGE_CONF_END;
+		else
+			bridge->ssid_start = bridge->pcie_start + PCI_CAP_PCIE_SIZEOF;
+	} else if (!bridge->pcie_start && bridge->has_pcie) {
+		if (bridge->ssid_start - PCI_BRIDGE_CONF_END >= PCI_CAP_PCIE_SIZEOF)
+			bridge->pcie_start = PCI_BRIDGE_CONF_END;
+		else
+			bridge->pcie_start = bridge->ssid_start + PCI_CAP_SSID_SIZEOF;
+	}
+
+	bridge->conf.capabilities_pointer = min(bridge->ssid_start, bridge->pcie_start);
+
+	if (bridge->conf.capabilities_pointer)
+		bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST);
+
+	if (bridge->has_pcie) {
+		bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP;
+		bridge->pcie_conf.next = (bridge->ssid_start > bridge->pcie_start) ?
+					 bridge->ssid_start : 0;
+		bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4);
+		bridge->pcie_cap_regs_behavior =
+			kmemdup(pcie_cap_regs_behavior,
+				sizeof(pcie_cap_regs_behavior),
+				GFP_KERNEL);
+		if (!bridge->pcie_cap_regs_behavior) {
+			kfree(bridge->pci_regs_behavior);
+			return -ENOMEM;
+		}
+		/* These bits are applicable only for PCI and reserved on PCIe */
+		bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &=
+			~GENMASK(15, 8);
+		bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &=
+			~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE |
+			   PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT |
+			   PCI_COMMAND_FAST_BACK) |
+			  (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+			   PCI_STATUS_DEVSEL_MASK) << 16);
+		bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &=
+			~GENMASK(31, 24);
+		bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &=
+			~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+			   PCI_STATUS_DEVSEL_MASK) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &=
+			~((PCI_BRIDGE_CTL_MASTER_ABORT |
+			   BIT(8) | BIT(9) | BIT(11)) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &=
+			~((PCI_BRIDGE_CTL_FAST_BACK) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &=
+			~(BIT(10) << 16);
+	}
+
+	if (flags & PCI_BRIDGE_EMUL_NO_PREFMEM_FORWARD) {
+		bridge->pci_regs_behavior[PCI_PREF_MEMORY_BASE / 4].ro = ~0;
+		bridge->pci_regs_behavior[PCI_PREF_MEMORY_BASE / 4].rw = 0;
+	}
+
+	if (flags & PCI_BRIDGE_EMUL_NO_IO_FORWARD) {
+		bridge->pci_regs_behavior[PCI_COMMAND / 4].ro |= PCI_COMMAND_IO;
+		bridge->pci_regs_behavior[PCI_COMMAND / 4].rw &= ~PCI_COMMAND_IO;
+		bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro |= GENMASK(15, 0);
+		bridge->pci_regs_behavior[PCI_IO_BASE / 4].rw &= ~GENMASK(15, 0);
+		bridge->pci_regs_behavior[PCI_IO_BASE_UPPER16 / 4].ro = ~0;
+		bridge->pci_regs_behavior[PCI_IO_BASE_UPPER16 / 4].rw = 0;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_bridge_emul_init);
+
+/*
+ * Cleanup a pci_bridge_emul structure that was previously initialized
+ * using pci_bridge_emul_init().
+ */
+void pci_bridge_emul_cleanup(struct pci_bridge_emul *bridge)
+{
+	if (bridge->has_pcie)
+		kfree(bridge->pcie_cap_regs_behavior);
+	kfree(bridge->pci_regs_behavior);
+}
+EXPORT_SYMBOL_GPL(pci_bridge_emul_cleanup);
+
+/*
+ * Should be called by the PCI controller driver when reading the PCI
+ * configuration space of the fake bridge. It will call back the
+ * ->ops->read_base or ->ops->read_pcie operations.
+ */
+int pci_bridge_emul_conf_read(struct pci_bridge_emul *bridge, int where,
+			      int size, u32 *value)
+{
+	int ret;
+	int reg = where & ~3;
+	pci_bridge_emul_read_status_t (*read_op)(struct pci_bridge_emul *bridge,
+						 int reg, u32 *value);
+	__le32 *cfgspace;
+	const struct pci_bridge_reg_behavior *behavior;
+
+	if (reg < PCI_BRIDGE_CONF_END) {
+		/* Emulated PCI space */
+		read_op = bridge->ops->read_base;
+		cfgspace = (__le32 *) &bridge->conf;
+		behavior = bridge->pci_regs_behavior;
+	} else if (reg >= bridge->ssid_start && reg < bridge->ssid_start + PCI_CAP_SSID_SIZEOF &&
+		   bridge->subsystem_vendor_id) {
+		/* Emulated PCI Bridge Subsystem Vendor ID capability */
+		reg -= bridge->ssid_start;
+		read_op = pci_bridge_emul_read_ssid;
+		cfgspace = NULL;
+		behavior = NULL;
+	} else if (reg >= bridge->pcie_start && reg < bridge->pcie_start + PCI_CAP_PCIE_SIZEOF &&
+		   bridge->has_pcie) {
+		/* Our emulated PCIe capability */
+		reg -= bridge->pcie_start;
+		read_op = bridge->ops->read_pcie;
+		cfgspace = (__le32 *) &bridge->pcie_conf;
+		behavior = bridge->pcie_cap_regs_behavior;
+	} else if (reg >= PCI_CFG_SPACE_SIZE && bridge->has_pcie) {
+		/* PCIe extended capability space */
+		reg -= PCI_CFG_SPACE_SIZE;
+		read_op = bridge->ops->read_ext;
+		cfgspace = NULL;
+		behavior = NULL;
+	} else {
+		/* Not implemented */
+		*value = 0;
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	if (read_op)
+		ret = read_op(bridge, reg, value);
+	else
+		ret = PCI_BRIDGE_EMUL_NOT_HANDLED;
+
+	if (ret == PCI_BRIDGE_EMUL_NOT_HANDLED) {
+		if (cfgspace)
+			*value = le32_to_cpu(cfgspace[reg / 4]);
+		else
+			*value = 0;
+	}
+
+	/*
+	 * Make sure we never return any reserved bit with a value
+	 * different from 0.
+	 */
+	if (behavior)
+		*value &= behavior[reg / 4].ro | behavior[reg / 4].rw |
+			  behavior[reg / 4].w1c;
+
+	if (size == 1)
+		*value = (*value >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*value = (*value >> (8 * (where & 3))) & 0xffff;
+	else if (size != 4)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_bridge_emul_conf_read);
+
+/*
+ * Should be called by the PCI controller driver when writing the PCI
+ * configuration space of the fake bridge. It will call back the
+ * ->ops->write_base or ->ops->write_pcie operations.
+ */
+int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where,
+			       int size, u32 value)
+{
+	int reg = where & ~3;
+	int mask, ret, old, new, shift;
+	void (*write_op)(struct pci_bridge_emul *bridge, int reg,
+			 u32 old, u32 new, u32 mask);
+	__le32 *cfgspace;
+	const struct pci_bridge_reg_behavior *behavior;
+
+	ret = pci_bridge_emul_conf_read(bridge, reg, 4, &old);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return ret;
+
+	if (reg < PCI_BRIDGE_CONF_END) {
+		/* Emulated PCI space */
+		write_op = bridge->ops->write_base;
+		cfgspace = (__le32 *) &bridge->conf;
+		behavior = bridge->pci_regs_behavior;
+	} else if (reg >= bridge->pcie_start && reg < bridge->pcie_start + PCI_CAP_PCIE_SIZEOF &&
+		   bridge->has_pcie) {
+		/* Our emulated PCIe capability */
+		reg -= bridge->pcie_start;
+		write_op = bridge->ops->write_pcie;
+		cfgspace = (__le32 *) &bridge->pcie_conf;
+		behavior = bridge->pcie_cap_regs_behavior;
+	} else if (reg >= PCI_CFG_SPACE_SIZE && bridge->has_pcie) {
+		/* PCIe extended capability space */
+		reg -= PCI_CFG_SPACE_SIZE;
+		write_op = bridge->ops->write_ext;
+		cfgspace = NULL;
+		behavior = NULL;
+	} else {
+		/* Not implemented */
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	shift = (where & 0x3) * 8;
+
+	if (size == 4)
+		mask = 0xffffffff;
+	else if (size == 2)
+		mask = 0xffff << shift;
+	else if (size == 1)
+		mask = 0xff << shift;
+	else
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (behavior) {
+		/* Keep all bits, except the RW bits */
+		new = old & (~mask | ~behavior[reg / 4].rw);
+
+		/* Update the value of the RW bits */
+		new |= (value << shift) & (behavior[reg / 4].rw & mask);
+
+		/* Clear the W1C bits */
+		new &= ~((value << shift) & (behavior[reg / 4].w1c & mask));
+	} else {
+		new = old & ~mask;
+		new |= (value << shift) & mask;
+	}
+
+	if (cfgspace) {
+		/* Save the new value with the cleared W1C bits into the cfgspace */
+		cfgspace[reg / 4] = cpu_to_le32(new);
+	}
+
+	if (behavior) {
+		/*
+		 * Clear the W1C bits not specified by the write mask, so that the
+		 * write_op() does not clear them.
+		 */
+		new &= ~(behavior[reg / 4].w1c & ~mask);
+
+		/*
+		 * Set the W1C bits specified by the write mask, so that write_op()
+		 * knows about that they are to be cleared.
+		 */
+		new |= (value << shift) & (behavior[reg / 4].w1c & mask);
+	}
+
+	if (write_op)
+		write_op(bridge, reg, old, new, mask);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_bridge_emul_conf_write);
diff --git a/drivers/pci/pci-bridge-emul.h b/drivers/pci/pci-bridge-emul.h
new file mode 100644
index 000000000..2a0e59c7f
--- /dev/null
+++ b/drivers/pci/pci-bridge-emul.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PCI_BRIDGE_EMUL_H__
+#define __PCI_BRIDGE_EMUL_H__
+
+#include <linux/kernel.h>
+
+/* PCI configuration space of a PCI-to-PCI bridge. */
+struct pci_bridge_emul_conf {
+	__le16 vendor;
+	__le16 device;
+	__le16 command;
+	__le16 status;
+	__le32 class_revision;
+	u8 cache_line_size;
+	u8 latency_timer;
+	u8 header_type;
+	u8 bist;
+	__le32 bar[2];
+	u8 primary_bus;
+	u8 secondary_bus;
+	u8 subordinate_bus;
+	u8 secondary_latency_timer;
+	u8 iobase;
+	u8 iolimit;
+	__le16 secondary_status;
+	__le16 membase;
+	__le16 memlimit;
+	__le16 pref_mem_base;
+	__le16 pref_mem_limit;
+	__le32 prefbaseupper;
+	__le32 preflimitupper;
+	__le16 iobaseupper;
+	__le16 iolimitupper;
+	u8 capabilities_pointer;
+	u8 reserve[3];
+	__le32 romaddr;
+	u8 intline;
+	u8 intpin;
+	__le16 bridgectrl;
+};
+
+/* PCI configuration space of the PCIe capabilities */
+struct pci_bridge_emul_pcie_conf {
+	u8 cap_id;
+	u8 next;
+	__le16 cap;
+	__le32 devcap;
+	__le16 devctl;
+	__le16 devsta;
+	__le32 lnkcap;
+	__le16 lnkctl;
+	__le16 lnksta;
+	__le32 slotcap;
+	__le16 slotctl;
+	__le16 slotsta;
+	__le16 rootctl;
+	__le16 rootcap;
+	__le32 rootsta;
+	__le32 devcap2;
+	__le16 devctl2;
+	__le16 devsta2;
+	__le32 lnkcap2;
+	__le16 lnkctl2;
+	__le16 lnksta2;
+	__le32 slotcap2;
+	__le16 slotctl2;
+	__le16 slotsta2;
+};
+
+struct pci_bridge_emul;
+
+typedef enum { PCI_BRIDGE_EMUL_HANDLED,
+	       PCI_BRIDGE_EMUL_NOT_HANDLED } pci_bridge_emul_read_status_t;
+
+struct pci_bridge_emul_ops {
+	/*
+	 * Called when reading from the regular PCI bridge
+	 * configuration space. Return PCI_BRIDGE_EMUL_HANDLED when the
+	 * operation has handled the read operation and filled in the
+	 * *value, or PCI_BRIDGE_EMUL_NOT_HANDLED when the read should
+	 * be emulated by the common code by reading from the
+	 * in-memory copy of the configuration space.
+	 */
+	pci_bridge_emul_read_status_t (*read_base)(struct pci_bridge_emul *bridge,
+						   int reg, u32 *value);
+
+	/*
+	 * Same as ->read_base(), except it is for reading from the
+	 * PCIe capability configuration space.
+	 */
+	pci_bridge_emul_read_status_t (*read_pcie)(struct pci_bridge_emul *bridge,
+						   int reg, u32 *value);
+
+	/*
+	 * Same as ->read_base(), except it is for reading from the
+	 * PCIe extended capability configuration space.
+	 */
+	pci_bridge_emul_read_status_t (*read_ext)(struct pci_bridge_emul *bridge,
+						  int reg, u32 *value);
+
+	/*
+	 * Called when writing to the regular PCI bridge configuration
+	 * space. old is the current value, new is the new value being
+	 * written, and mask indicates which parts of the value are
+	 * being changed.
+	 */
+	void (*write_base)(struct pci_bridge_emul *bridge, int reg,
+			   u32 old, u32 new, u32 mask);
+
+	/*
+	 * Same as ->write_base(), except it is for writing from the
+	 * PCIe capability configuration space.
+	 */
+	void (*write_pcie)(struct pci_bridge_emul *bridge, int reg,
+			   u32 old, u32 new, u32 mask);
+
+	/*
+	 * Same as ->write_base(), except it is for writing from the
+	 * PCIe extended capability configuration space.
+	 */
+	void (*write_ext)(struct pci_bridge_emul *bridge, int reg,
+			  u32 old, u32 new, u32 mask);
+};
+
+struct pci_bridge_reg_behavior;
+
+struct pci_bridge_emul {
+	struct pci_bridge_emul_conf conf;
+	struct pci_bridge_emul_pcie_conf pcie_conf;
+	const struct pci_bridge_emul_ops *ops;
+	struct pci_bridge_reg_behavior *pci_regs_behavior;
+	struct pci_bridge_reg_behavior *pcie_cap_regs_behavior;
+	void *data;
+	u8 pcie_start;
+	u8 ssid_start;
+	bool has_pcie;
+	u16 subsystem_vendor_id;
+	u16 subsystem_id;
+};
+
+enum {
+	/*
+	 * PCI bridge does not support forwarding of prefetchable memory
+	 * requests between primary and secondary buses.
+	 */
+	PCI_BRIDGE_EMUL_NO_PREFMEM_FORWARD = BIT(0),
+
+	/*
+	 * PCI bridge does not support forwarding of IO requests between
+	 * primary and secondary buses.
+	 */
+	PCI_BRIDGE_EMUL_NO_IO_FORWARD = BIT(1),
+};
+
+int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
+			 unsigned int flags);
+void pci_bridge_emul_cleanup(struct pci_bridge_emul *bridge);
+
+int pci_bridge_emul_conf_read(struct pci_bridge_emul *bridge, int where,
+			      int size, u32 *value);
+int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where,
+			       int size, u32 value);
+
+#endif /* __PCI_BRIDGE_EMUL_H__ */
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
new file mode 100644
index 000000000..51ec9e7e7
--- /dev/null
+++ b/drivers/pci/pci-driver.c
@@ -0,0 +1,1739 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) Copyright 2002-2004, 2007 Greg Kroah-Hartman <greg@kroah.com>
+ * (C) Copyright 2007 Novell Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/mempolicy.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/sched/isolation.h>
+#include <linux/cpu.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/kexec.h>
+#include <linux/of_device.h>
+#include <linux/acpi.h>
+#include <linux/dma-map-ops.h>
+#include <linux/iommu.h>
+#include "pci.h"
+#include "pcie/portdrv.h"
+
+struct pci_dynid {
+	struct list_head node;
+	struct pci_device_id id;
+};
+
+/**
+ * pci_add_dynid - add a new PCI device ID to this driver and re-probe devices
+ * @drv: target pci driver
+ * @vendor: PCI vendor ID
+ * @device: PCI device ID
+ * @subvendor: PCI subvendor ID
+ * @subdevice: PCI subdevice ID
+ * @class: PCI class
+ * @class_mask: PCI class mask
+ * @driver_data: private driver data
+ *
+ * Adds a new dynamic pci device ID to this driver and causes the
+ * driver to probe for all devices again.  @drv must have been
+ * registered prior to calling this function.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int pci_add_dynid(struct pci_driver *drv,
+		  unsigned int vendor, unsigned int device,
+		  unsigned int subvendor, unsigned int subdevice,
+		  unsigned int class, unsigned int class_mask,
+		  unsigned long driver_data)
+{
+	struct pci_dynid *dynid;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.vendor = vendor;
+	dynid->id.device = device;
+	dynid->id.subvendor = subvendor;
+	dynid->id.subdevice = subdevice;
+	dynid->id.class = class;
+	dynid->id.class_mask = class_mask;
+	dynid->id.driver_data = driver_data;
+
+	spin_lock(&drv->dynids.lock);
+	list_add_tail(&dynid->node, &drv->dynids.list);
+	spin_unlock(&drv->dynids.lock);
+
+	return driver_attach(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(pci_add_dynid);
+
+static void pci_free_dynids(struct pci_driver *drv)
+{
+	struct pci_dynid *dynid, *n;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
+		list_del(&dynid->node);
+		kfree(dynid);
+	}
+	spin_unlock(&drv->dynids.lock);
+}
+
+/**
+ * pci_match_id - See if a PCI device matches a given pci_id table
+ * @ids: array of PCI device ID structures to search in
+ * @dev: the PCI device structure to match against.
+ *
+ * Used by a driver to check whether a PCI device is in its list of
+ * supported devices.  Returns the matching pci_device_id structure or
+ * %NULL if there is no match.
+ *
+ * Deprecated; don't use this as it will not catch any dynamic IDs
+ * that a driver might want to check for.
+ */
+const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
+					 struct pci_dev *dev)
+{
+	if (ids) {
+		while (ids->vendor || ids->subvendor || ids->class_mask) {
+			if (pci_match_one_device(ids, dev))
+				return ids;
+			ids++;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(pci_match_id);
+
+static const struct pci_device_id pci_device_id_any = {
+	.vendor = PCI_ANY_ID,
+	.device = PCI_ANY_ID,
+	.subvendor = PCI_ANY_ID,
+	.subdevice = PCI_ANY_ID,
+};
+
+/**
+ * pci_match_device - See if a device matches a driver's list of IDs
+ * @drv: the PCI driver to match against
+ * @dev: the PCI device structure to match against
+ *
+ * Used by a driver to check whether a PCI device is in its list of
+ * supported devices or in the dynids list, which may have been augmented
+ * via the sysfs "new_id" file.  Returns the matching pci_device_id
+ * structure or %NULL if there is no match.
+ */
+static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
+						    struct pci_dev *dev)
+{
+	struct pci_dynid *dynid;
+	const struct pci_device_id *found_id = NULL, *ids;
+
+	/* When driver_override is set, only bind to the matching driver */
+	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+		return NULL;
+
+	/* Look at the dynamic ids first, before the static ones */
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry(dynid, &drv->dynids.list, node) {
+		if (pci_match_one_device(&dynid->id, dev)) {
+			found_id = &dynid->id;
+			break;
+		}
+	}
+	spin_unlock(&drv->dynids.lock);
+
+	if (found_id)
+		return found_id;
+
+	for (ids = drv->id_table; (found_id = pci_match_id(ids, dev));
+	     ids = found_id + 1) {
+		/*
+		 * The match table is split based on driver_override.
+		 * In case override_only was set, enforce driver_override
+		 * matching.
+		 */
+		if (found_id->override_only) {
+			if (dev->driver_override)
+				return found_id;
+		} else {
+			return found_id;
+		}
+	}
+
+	/* driver_override will always match, send a dummy id */
+	if (dev->driver_override)
+		return &pci_device_id_any;
+	return NULL;
+}
+
+/**
+ * new_id_store - sysfs frontend to pci_add_dynid()
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Allow PCI IDs to be added to an existing driver via sysfs.
+ */
+static ssize_t new_id_store(struct device_driver *driver, const char *buf,
+			    size_t count)
+{
+	struct pci_driver *pdrv = to_pci_driver(driver);
+	const struct pci_device_id *ids = pdrv->id_table;
+	u32 vendor, device, subvendor = PCI_ANY_ID,
+		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+	unsigned long driver_data = 0;
+	int fields;
+	int retval = 0;
+
+	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
+			&vendor, &device, &subvendor, &subdevice,
+			&class, &class_mask, &driver_data);
+	if (fields < 2)
+		return -EINVAL;
+
+	if (fields != 7) {
+		struct pci_dev *pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
+		if (!pdev)
+			return -ENOMEM;
+
+		pdev->vendor = vendor;
+		pdev->device = device;
+		pdev->subsystem_vendor = subvendor;
+		pdev->subsystem_device = subdevice;
+		pdev->class = class;
+
+		if (pci_match_device(pdrv, pdev))
+			retval = -EEXIST;
+
+		kfree(pdev);
+
+		if (retval)
+			return retval;
+	}
+
+	/* Only accept driver_data values that match an existing id_table
+	   entry */
+	if (ids) {
+		retval = -EINVAL;
+		while (ids->vendor || ids->subvendor || ids->class_mask) {
+			if (driver_data == ids->driver_data) {
+				retval = 0;
+				break;
+			}
+			ids++;
+		}
+		if (retval)	/* No match */
+			return retval;
+	}
+
+	retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
+			       class, class_mask, driver_data);
+	if (retval)
+		return retval;
+	return count;
+}
+static DRIVER_ATTR_WO(new_id);
+
+/**
+ * remove_id_store - remove a PCI device ID from this driver
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
+			       size_t count)
+{
+	struct pci_dynid *dynid, *n;
+	struct pci_driver *pdrv = to_pci_driver(driver);
+	u32 vendor, device, subvendor = PCI_ANY_ID,
+		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+	int fields;
+	size_t retval = -ENODEV;
+
+	fields = sscanf(buf, "%x %x %x %x %x %x",
+			&vendor, &device, &subvendor, &subdevice,
+			&class, &class_mask);
+	if (fields < 2)
+		return -EINVAL;
+
+	spin_lock(&pdrv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
+		struct pci_device_id *id = &dynid->id;
+		if ((id->vendor == vendor) &&
+		    (id->device == device) &&
+		    (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
+		    (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
+		    !((id->class ^ class) & class_mask)) {
+			list_del(&dynid->node);
+			kfree(dynid);
+			retval = count;
+			break;
+		}
+	}
+	spin_unlock(&pdrv->dynids.lock);
+
+	return retval;
+}
+static DRIVER_ATTR_WO(remove_id);
+
+static struct attribute *pci_drv_attrs[] = {
+	&driver_attr_new_id.attr,
+	&driver_attr_remove_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(pci_drv);
+
+struct drv_dev_and_id {
+	struct pci_driver *drv;
+	struct pci_dev *dev;
+	const struct pci_device_id *id;
+};
+
+static long local_pci_probe(void *_ddi)
+{
+	struct drv_dev_and_id *ddi = _ddi;
+	struct pci_dev *pci_dev = ddi->dev;
+	struct pci_driver *pci_drv = ddi->drv;
+	struct device *dev = &pci_dev->dev;
+	int rc;
+
+	/*
+	 * Unbound PCI devices are always put in D0, regardless of
+	 * runtime PM status.  During probe, the device is set to
+	 * active and the usage count is incremented.  If the driver
+	 * supports runtime PM, it should call pm_runtime_put_noidle(),
+	 * or any other runtime PM helper function decrementing the usage
+	 * count, in its probe routine and pm_runtime_get_noresume() in
+	 * its remove routine.
+	 */
+	pm_runtime_get_sync(dev);
+	pci_dev->driver = pci_drv;
+	rc = pci_drv->probe(pci_dev, ddi->id);
+	if (!rc)
+		return rc;
+	if (rc < 0) {
+		pci_dev->driver = NULL;
+		pm_runtime_put_sync(dev);
+		return rc;
+	}
+	/*
+	 * Probe function should return < 0 for failure, 0 for success
+	 * Treat values > 0 as success, but warn.
+	 */
+	pci_warn(pci_dev, "Driver probe function unexpectedly returned %d\n",
+		 rc);
+	return 0;
+}
+
+static bool pci_physfn_is_probed(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_IOV
+	return dev->is_virtfn && dev->physfn->is_probed;
+#else
+	return false;
+#endif
+}
+
+static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
+			  const struct pci_device_id *id)
+{
+	int error, node, cpu;
+	struct drv_dev_and_id ddi = { drv, dev, id };
+
+	/*
+	 * Execute driver initialization on node where the device is
+	 * attached.  This way the driver likely allocates its local memory
+	 * on the right node.
+	 */
+	node = dev_to_node(&dev->dev);
+	dev->is_probed = 1;
+
+	cpu_hotplug_disable();
+
+	/*
+	 * Prevent nesting work_on_cpu() for the case where a Virtual Function
+	 * device is probed from work_on_cpu() of the Physical device.
+	 */
+	if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
+	    pci_physfn_is_probed(dev)) {
+		cpu = nr_cpu_ids;
+	} else {
+		cpumask_var_t wq_domain_mask;
+
+		if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
+			error = -ENOMEM;
+			goto out;
+		}
+		cpumask_and(wq_domain_mask,
+			    housekeeping_cpumask(HK_TYPE_WQ),
+			    housekeeping_cpumask(HK_TYPE_DOMAIN));
+
+		cpu = cpumask_any_and(cpumask_of_node(node),
+				      wq_domain_mask);
+		free_cpumask_var(wq_domain_mask);
+	}
+
+	if (cpu < nr_cpu_ids)
+		error = work_on_cpu(cpu, local_pci_probe, &ddi);
+	else
+		error = local_pci_probe(&ddi);
+out:
+	dev->is_probed = 0;
+	cpu_hotplug_enable();
+	return error;
+}
+
+/**
+ * __pci_device_probe - check if a driver wants to claim a specific PCI device
+ * @drv: driver to call to check if it wants the PCI device
+ * @pci_dev: PCI device being probed
+ *
+ * returns 0 on success, else error.
+ * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
+ */
+static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
+{
+	const struct pci_device_id *id;
+	int error = 0;
+
+	if (drv->probe) {
+		error = -ENODEV;
+
+		id = pci_match_device(drv, pci_dev);
+		if (id)
+			error = pci_call_probe(drv, pci_dev, id);
+	}
+	return error;
+}
+
+int __weak pcibios_alloc_irq(struct pci_dev *dev)
+{
+	return 0;
+}
+
+void __weak pcibios_free_irq(struct pci_dev *dev)
+{
+}
+
+#ifdef CONFIG_PCI_IOV
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe ||
+		pdev->driver_override);
+}
+#else
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return true;
+}
+#endif
+
+static int pci_device_probe(struct device *dev)
+{
+	int error;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = to_pci_driver(dev->driver);
+
+	if (!pci_device_can_probe(pci_dev))
+		return -ENODEV;
+
+	pci_assign_irq(pci_dev);
+
+	error = pcibios_alloc_irq(pci_dev);
+	if (error < 0)
+		return error;
+
+	pci_dev_get(pci_dev);
+	error = __pci_device_probe(drv, pci_dev);
+	if (error) {
+		pcibios_free_irq(pci_dev);
+		pci_dev_put(pci_dev);
+	}
+
+	return error;
+}
+
+static void pci_device_remove(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
+
+	if (drv->remove) {
+		pm_runtime_get_sync(dev);
+		drv->remove(pci_dev);
+		pm_runtime_put_noidle(dev);
+	}
+	pcibios_free_irq(pci_dev);
+	pci_dev->driver = NULL;
+	pci_iov_remove(pci_dev);
+
+	/* Undo the runtime PM settings in local_pci_probe() */
+	pm_runtime_put_sync(dev);
+
+	/*
+	 * If the device is still on, set the power state as "unknown",
+	 * since it might change by the next time we load the driver.
+	 */
+	if (pci_dev->current_state == PCI_D0)
+		pci_dev->current_state = PCI_UNKNOWN;
+
+	/*
+	 * We would love to complain here if pci_dev->is_enabled is set, that
+	 * the driver should have called pci_disable_device(), but the
+	 * unfortunate fact is there are too many odd BIOS and bridge setups
+	 * that don't like drivers doing that all of the time.
+	 * Oh well, we can dream of sane hardware when we sleep, no matter how
+	 * horrible the crap we have to deal with is when we are awake...
+	 */
+
+	pci_dev_put(pci_dev);
+}
+
+static void pci_device_shutdown(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
+
+	pm_runtime_resume(dev);
+
+	if (drv && drv->shutdown)
+		drv->shutdown(pci_dev);
+
+	/*
+	 * If this is a kexec reboot, turn off Bus Master bit on the
+	 * device to tell it to not continue to do DMA. Don't touch
+	 * devices in D3cold or unknown states.
+	 * If it is not a kexec reboot, firmware will hit the PCI
+	 * devices with big hammer and stop their DMA any way.
+	 */
+	if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot))
+		pci_clear_master(pci_dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+/* Auxiliary functions used for system resume */
+
+/**
+ * pci_restore_standard_config - restore standard config registers of PCI device
+ * @pci_dev: PCI device to handle
+ */
+static int pci_restore_standard_config(struct pci_dev *pci_dev)
+{
+	pci_update_current_state(pci_dev, PCI_UNKNOWN);
+
+	if (pci_dev->current_state != PCI_D0) {
+		int error = pci_set_power_state(pci_dev, PCI_D0);
+		if (error)
+			return error;
+	}
+
+	pci_restore_state(pci_dev);
+	pci_pme_restore(pci_dev);
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+
+/* Auxiliary functions used for system resume and run-time resume */
+
+static void pci_pm_default_resume(struct pci_dev *pci_dev)
+{
+	pci_fixup_device(pci_fixup_resume, pci_dev);
+	pci_enable_wake(pci_dev, PCI_D0, false);
+}
+
+static void pci_pm_power_up_and_verify_state(struct pci_dev *pci_dev)
+{
+	pci_power_up(pci_dev);
+	pci_update_current_state(pci_dev, PCI_D0);
+}
+
+static void pci_pm_default_resume_early(struct pci_dev *pci_dev)
+{
+	pci_pm_power_up_and_verify_state(pci_dev);
+	pci_restore_state(pci_dev);
+	pci_pme_restore(pci_dev);
+}
+
+static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev)
+{
+	int ret;
+
+	ret = pci_bridge_wait_for_secondary_bus(pci_dev, "resume");
+	if (ret) {
+		/*
+		 * The downstream link failed to come up, so mark the
+		 * devices below as disconnected to make sure we don't
+		 * attempt to resume them.
+		 */
+		pci_walk_bus(pci_dev->subordinate, pci_dev_set_disconnected,
+			     NULL);
+		return;
+	}
+
+	/*
+	 * When powering on a bridge from D3cold, the whole hierarchy may be
+	 * powered on into D0uninitialized state, resume them to give them a
+	 * chance to suspend again
+	 */
+	pci_resume_bus(pci_dev->subordinate);
+}
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PM_SLEEP
+
+/*
+ * Default "suspend" method for devices that have no driver provided suspend,
+ * or not even a driver at all (second part).
+ */
+static void pci_pm_set_unknown_state(struct pci_dev *pci_dev)
+{
+	/*
+	 * mark its power state as "unknown", since we don't know if
+	 * e.g. the BIOS will change its device state when we suspend.
+	 */
+	if (pci_dev->current_state == PCI_D0)
+		pci_dev->current_state = PCI_UNKNOWN;
+}
+
+/*
+ * Default "resume" method for devices that have no driver provided resume,
+ * or not even a driver at all (second part).
+ */
+static int pci_pm_reenable_device(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* if the device was enabled before suspend, re-enable */
+	retval = pci_reenable_device(pci_dev);
+	/*
+	 * if the device was busmaster before the suspend, make it busmaster
+	 * again
+	 */
+	if (pci_dev->is_busmaster)
+		pci_set_master(pci_dev);
+
+	return retval;
+}
+
+static int pci_legacy_suspend(struct device *dev, pm_message_t state)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
+
+	if (drv && drv->suspend) {
+		pci_power_t prev = pci_dev->current_state;
+		int error;
+
+		error = drv->suspend(pci_dev, state);
+		suspend_report_result(dev, drv->suspend, error);
+		if (error)
+			return error;
+
+		if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+		    && pci_dev->current_state != PCI_UNKNOWN) {
+			pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev,
+				      "PCI PM: Device state not saved by %pS\n",
+				      drv->suspend);
+		}
+	}
+
+	pci_fixup_device(pci_fixup_suspend, pci_dev);
+
+	return 0;
+}
+
+static int pci_legacy_suspend_late(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	if (!pci_dev->state_saved)
+		pci_save_state(pci_dev);
+
+	pci_pm_set_unknown_state(pci_dev);
+
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
+	return 0;
+}
+
+static int pci_legacy_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = pci_dev->driver;
+
+	pci_fixup_device(pci_fixup_resume, pci_dev);
+
+	return drv && drv->resume ?
+			drv->resume(pci_dev) : pci_pm_reenable_device(pci_dev);
+}
+
+/* Auxiliary functions used by the new power management framework */
+
+static void pci_pm_default_suspend(struct pci_dev *pci_dev)
+{
+	/* Disable non-bridge devices without PM support */
+	if (!pci_has_subordinate(pci_dev))
+		pci_disable_enabled_device(pci_dev);
+}
+
+static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev)
+{
+	struct pci_driver *drv = pci_dev->driver;
+	bool ret = drv && (drv->suspend || drv->resume);
+
+	/*
+	 * Legacy PM support is used by default, so warn if the new framework is
+	 * supported as well.  Drivers are supposed to support either the
+	 * former, or the latter, but not both at the same time.
+	 */
+	pci_WARN(pci_dev, ret && drv->driver.pm, "device %04x:%04x\n",
+		 pci_dev->vendor, pci_dev->device);
+
+	return ret;
+}
+
+/* New power management framework */
+
+static int pci_pm_prepare(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm && pm->prepare) {
+		int error = pm->prepare(dev);
+		if (error < 0)
+			return error;
+
+		if (!error && dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_PREPARE))
+			return 0;
+	}
+	if (pci_dev_need_resume(pci_dev))
+		return 0;
+
+	/*
+	 * The PME setting needs to be adjusted here in case the direct-complete
+	 * optimization is used with respect to this device.
+	 */
+	pci_dev_adjust_pme(pci_dev);
+	return 1;
+}
+
+static void pci_pm_complete(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	pci_dev_complete_resume(pci_dev);
+	pm_generic_complete(dev);
+
+	/* Resume device if platform firmware has put it in reset-power-on */
+	if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) {
+		pci_power_t pre_sleep_state = pci_dev->current_state;
+
+		pci_refresh_power_state(pci_dev);
+		/*
+		 * On platforms with ACPI this check may also trigger for
+		 * devices sharing power resources if one of those power
+		 * resources has been activated as a result of a change of the
+		 * power state of another device sharing it.  However, in that
+		 * case it is also better to resume the device, in general.
+		 */
+		if (pci_dev->current_state < pre_sleep_state)
+			pm_request_resume(dev);
+	}
+}
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define pci_pm_prepare	NULL
+#define pci_pm_complete	NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_SUSPEND
+static void pcie_pme_root_status_cleanup(struct pci_dev *pci_dev)
+{
+	/*
+	 * Some BIOSes forget to clear Root PME Status bits after system
+	 * wakeup, which breaks ACPI-based runtime wakeup on PCI Express.
+	 * Clear those bits now just in case (shouldn't hurt).
+	 */
+	if (pci_is_pcie(pci_dev) &&
+	    (pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT ||
+	     pci_pcie_type(pci_dev) == PCI_EXP_TYPE_RC_EC))
+		pcie_clear_root_pme_status(pci_dev);
+}
+
+static int pci_pm_suspend(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	pci_dev->skip_bus_pm = false;
+
+	/*
+	 * Disabling PTM allows some systems, e.g., Intel mobile chips
+	 * since Coffee Lake, to enter a lower-power PM state.
+	 */
+	pci_suspend_ptm(pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend(dev, PMSG_SUSPEND);
+
+	if (!pm) {
+		pci_pm_default_suspend(pci_dev);
+		return 0;
+	}
+
+	/*
+	 * PCI devices suspended at run time may need to be resumed at this
+	 * point, because in general it may be necessary to reconfigure them for
+	 * system suspend.  Namely, if the device is expected to wake up the
+	 * system from the sleep state, it may have to be reconfigured for this
+	 * purpose, or if the device is not expected to wake up the system from
+	 * the sleep state, it should be prevented from signaling wakeup events
+	 * going forward.
+	 *
+	 * Also if the driver of the device does not indicate that its system
+	 * suspend callbacks can cope with runtime-suspended devices, it is
+	 * better to resume the device from runtime suspend here.
+	 */
+	if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) ||
+	    pci_dev_need_resume(pci_dev)) {
+		pm_runtime_resume(dev);
+		pci_dev->state_saved = false;
+	} else {
+		pci_dev_adjust_pme(pci_dev);
+	}
+
+	if (pm->suspend) {
+		pci_power_t prev = pci_dev->current_state;
+		int error;
+
+		error = pm->suspend(dev);
+		suspend_report_result(dev, pm->suspend, error);
+		if (error)
+			return error;
+
+		if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+		    && pci_dev->current_state != PCI_UNKNOWN) {
+			pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev,
+				      "PCI PM: State of device not saved by %pS\n",
+				      pm->suspend);
+		}
+	}
+
+	return 0;
+}
+
+static int pci_pm_suspend_late(struct device *dev)
+{
+	if (dev_pm_skip_suspend(dev))
+		return 0;
+
+	pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev));
+
+	return pm_generic_suspend_late(dev);
+}
+
+static int pci_pm_suspend_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (dev_pm_skip_suspend(dev))
+		return 0;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend_late(dev);
+
+	if (!pm) {
+		pci_save_state(pci_dev);
+		goto Fixup;
+	}
+
+	if (pm->suspend_noirq) {
+		pci_power_t prev = pci_dev->current_state;
+		int error;
+
+		error = pm->suspend_noirq(dev);
+		suspend_report_result(dev, pm->suspend_noirq, error);
+		if (error)
+			return error;
+
+		if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+		    && pci_dev->current_state != PCI_UNKNOWN) {
+			pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev,
+				      "PCI PM: State of device not saved by %pS\n",
+				      pm->suspend_noirq);
+			goto Fixup;
+		}
+	}
+
+	if (!pci_dev->state_saved) {
+		pci_save_state(pci_dev);
+
+		/*
+		 * If the device is a bridge with a child in D0 below it,
+		 * it needs to stay in D0, so check skip_bus_pm to avoid
+		 * putting it into a low-power state in that case.
+		 */
+		if (!pci_dev->skip_bus_pm && pci_power_manageable(pci_dev))
+			pci_prepare_to_sleep(pci_dev);
+	}
+
+	pci_dbg(pci_dev, "PCI PM: Suspend power state: %s\n",
+		pci_power_name(pci_dev->current_state));
+
+	if (pci_dev->current_state == PCI_D0) {
+		pci_dev->skip_bus_pm = true;
+		/*
+		 * Per PCI PM r1.2, table 6-1, a bridge must be in D0 if any
+		 * downstream device is in D0, so avoid changing the power state
+		 * of the parent bridge by setting the skip_bus_pm flag for it.
+		 */
+		if (pci_dev->bus->self)
+			pci_dev->bus->self->skip_bus_pm = true;
+	}
+
+	if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) {
+		pci_dbg(pci_dev, "PCI PM: Skipped\n");
+		goto Fixup;
+	}
+
+	pci_pm_set_unknown_state(pci_dev);
+
+	/*
+	 * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's
+	 * PCI COMMAND register isn't 0, the BIOS assumes that the controller
+	 * hasn't been quiesced and tries to turn it off.  If the controller
+	 * is already in D3, this can hang or cause memory corruption.
+	 *
+	 * Since the value of the COMMAND register doesn't matter once the
+	 * device has been suspended, we can safely set it to 0 here.
+	 */
+	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
+
+Fixup:
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
+	/*
+	 * If the target system sleep state is suspend-to-idle, it is sufficient
+	 * to check whether or not the device's wakeup settings are good for
+	 * runtime PM.  Otherwise, the pm_resume_via_firmware() check will cause
+	 * pci_pm_complete() to take care of fixing up the device's state
+	 * anyway, if need be.
+	 */
+	if (device_can_wakeup(dev) && !device_may_wakeup(dev))
+		dev->power.may_skip_resume = false;
+
+	return 0;
+}
+
+static int pci_pm_resume_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	pci_power_t prev_state = pci_dev->current_state;
+	bool skip_bus_pm = pci_dev->skip_bus_pm;
+
+	if (dev_pm_skip_resume(dev))
+		return 0;
+
+	/*
+	 * In the suspend-to-idle case, devices left in D0 during suspend will
+	 * stay in D0, so it is not necessary to restore or update their
+	 * configuration here and attempting to put them into D0 again is
+	 * pointless, so avoid doing that.
+	 */
+	if (!(skip_bus_pm && pm_suspend_no_platform()))
+		pci_pm_default_resume_early(pci_dev);
+
+	pci_fixup_device(pci_fixup_resume_early, pci_dev);
+	pcie_pme_root_status_cleanup(pci_dev);
+
+	if (!skip_bus_pm && prev_state == PCI_D3cold)
+		pci_pm_bridge_power_up_actions(pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return 0;
+
+	if (pm && pm->resume_noirq)
+		return pm->resume_noirq(dev);
+
+	return 0;
+}
+
+static int pci_pm_resume_early(struct device *dev)
+{
+	if (dev_pm_skip_resume(dev))
+		return 0;
+
+	return pm_generic_resume_early(dev);
+}
+
+static int pci_pm_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	/*
+	 * This is necessary for the suspend error path in which resume is
+	 * called without restoring the standard config registers of the device.
+	 */
+	if (pci_dev->state_saved)
+		pci_restore_standard_config(pci_dev);
+
+	pci_resume_ptm(pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_resume(dev);
+
+	pci_pm_default_resume(pci_dev);
+
+	if (pm) {
+		if (pm->resume)
+			return pm->resume(dev);
+	} else {
+		pci_pm_reenable_device(pci_dev);
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_SUSPEND */
+
+#define pci_pm_suspend		NULL
+#define pci_pm_suspend_late	NULL
+#define pci_pm_suspend_noirq	NULL
+#define pci_pm_resume		NULL
+#define pci_pm_resume_early	NULL
+#define pci_pm_resume_noirq	NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+
+static int pci_pm_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend(dev, PMSG_FREEZE);
+
+	if (!pm) {
+		pci_pm_default_suspend(pci_dev);
+		return 0;
+	}
+
+	/*
+	 * Resume all runtime-suspended devices before creating a snapshot
+	 * image of system memory, because the restore kernel generally cannot
+	 * be expected to always handle them consistently and they need to be
+	 * put into the runtime-active metastate during system resume anyway,
+	 * so it is better to ensure that the state saved in the image will be
+	 * always consistent with that.
+	 */
+	pm_runtime_resume(dev);
+	pci_dev->state_saved = false;
+
+	if (pm->freeze) {
+		int error;
+
+		error = pm->freeze(dev);
+		suspend_report_result(dev, pm->freeze, error);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+static int pci_pm_freeze_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend_late(dev);
+
+	if (pm && pm->freeze_noirq) {
+		int error;
+
+		error = pm->freeze_noirq(dev);
+		suspend_report_result(dev, pm->freeze_noirq, error);
+		if (error)
+			return error;
+	}
+
+	if (!pci_dev->state_saved)
+		pci_save_state(pci_dev);
+
+	pci_pm_set_unknown_state(pci_dev);
+
+	return 0;
+}
+
+static int pci_pm_thaw_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	/*
+	 * The pm->thaw_noirq() callback assumes the device has been
+	 * returned to D0 and its config state has been restored.
+	 *
+	 * In addition, pci_restore_state() restores MSI-X state in MMIO
+	 * space, which requires the device to be in D0, so return it to D0
+	 * in case the driver's "freeze" callbacks put it into a low-power
+	 * state.
+	 */
+	pci_pm_power_up_and_verify_state(pci_dev);
+	pci_restore_state(pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return 0;
+
+	if (pm && pm->thaw_noirq)
+		return pm->thaw_noirq(dev);
+
+	return 0;
+}
+
+static int pci_pm_thaw(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int error = 0;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_resume(dev);
+
+	if (pm) {
+		if (pm->thaw)
+			error = pm->thaw(dev);
+	} else {
+		pci_pm_reenable_device(pci_dev);
+	}
+
+	pci_dev->state_saved = false;
+
+	return error;
+}
+
+static int pci_pm_poweroff(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend(dev, PMSG_HIBERNATE);
+
+	if (!pm) {
+		pci_pm_default_suspend(pci_dev);
+		return 0;
+	}
+
+	/* The reason to do that is the same as in pci_pm_suspend(). */
+	if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) ||
+	    pci_dev_need_resume(pci_dev)) {
+		pm_runtime_resume(dev);
+		pci_dev->state_saved = false;
+	} else {
+		pci_dev_adjust_pme(pci_dev);
+	}
+
+	if (pm->poweroff) {
+		int error;
+
+		error = pm->poweroff(dev);
+		suspend_report_result(dev, pm->poweroff, error);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+static int pci_pm_poweroff_late(struct device *dev)
+{
+	if (dev_pm_skip_suspend(dev))
+		return 0;
+
+	pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev));
+
+	return pm_generic_poweroff_late(dev);
+}
+
+static int pci_pm_poweroff_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (dev_pm_skip_suspend(dev))
+		return 0;
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_suspend_late(dev);
+
+	if (!pm) {
+		pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+		return 0;
+	}
+
+	if (pm->poweroff_noirq) {
+		int error;
+
+		error = pm->poweroff_noirq(dev);
+		suspend_report_result(dev, pm->poweroff_noirq, error);
+		if (error)
+			return error;
+	}
+
+	if (!pci_dev->state_saved && !pci_has_subordinate(pci_dev))
+		pci_prepare_to_sleep(pci_dev);
+
+	/*
+	 * The reason for doing this here is the same as for the analogous code
+	 * in pci_pm_suspend_noirq().
+	 */
+	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
+
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
+	return 0;
+}
+
+static int pci_pm_restore_noirq(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	pci_pm_default_resume_early(pci_dev);
+	pci_fixup_device(pci_fixup_resume_early, pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return 0;
+
+	if (pm && pm->restore_noirq)
+		return pm->restore_noirq(dev);
+
+	return 0;
+}
+
+static int pci_pm_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	/*
+	 * This is necessary for the hibernation error path in which restore is
+	 * called without restoring the standard config registers of the device.
+	 */
+	if (pci_dev->state_saved)
+		pci_restore_standard_config(pci_dev);
+
+	if (pci_has_legacy_pm_support(pci_dev))
+		return pci_legacy_resume(dev);
+
+	pci_pm_default_resume(pci_dev);
+
+	if (pm) {
+		if (pm->restore)
+			return pm->restore(dev);
+	} else {
+		pci_pm_reenable_device(pci_dev);
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
+
+#define pci_pm_freeze		NULL
+#define pci_pm_freeze_noirq	NULL
+#define pci_pm_thaw		NULL
+#define pci_pm_thaw_noirq	NULL
+#define pci_pm_poweroff		NULL
+#define pci_pm_poweroff_late	NULL
+#define pci_pm_poweroff_noirq	NULL
+#define pci_pm_restore		NULL
+#define pci_pm_restore_noirq	NULL
+
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
+
+#ifdef CONFIG_PM
+
+static int pci_pm_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	pci_power_t prev = pci_dev->current_state;
+	int error;
+
+	pci_suspend_ptm(pci_dev);
+
+	/*
+	 * If pci_dev->driver is not set (unbound), we leave the device in D0,
+	 * but it may go to D3cold when the bridge above it runtime suspends.
+	 * Save its config space in case that happens.
+	 */
+	if (!pci_dev->driver) {
+		pci_save_state(pci_dev);
+		return 0;
+	}
+
+	pci_dev->state_saved = false;
+	if (pm && pm->runtime_suspend) {
+		error = pm->runtime_suspend(dev);
+		/*
+		 * -EBUSY and -EAGAIN is used to request the runtime PM core
+		 * to schedule a new suspend, so log the event only with debug
+		 * log level.
+		 */
+		if (error == -EBUSY || error == -EAGAIN) {
+			pci_dbg(pci_dev, "can't suspend now (%ps returned %d)\n",
+				pm->runtime_suspend, error);
+			return error;
+		} else if (error) {
+			pci_err(pci_dev, "can't suspend (%ps returned %d)\n",
+				pm->runtime_suspend, error);
+			return error;
+		}
+	}
+
+	pci_fixup_device(pci_fixup_suspend, pci_dev);
+
+	if (pm && pm->runtime_suspend
+	    && !pci_dev->state_saved && pci_dev->current_state != PCI_D0
+	    && pci_dev->current_state != PCI_UNKNOWN) {
+		pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev,
+			      "PCI PM: State of device not saved by %pS\n",
+			      pm->runtime_suspend);
+		return 0;
+	}
+
+	if (!pci_dev->state_saved) {
+		pci_save_state(pci_dev);
+		pci_finish_runtime_suspend(pci_dev);
+	}
+
+	return 0;
+}
+
+static int pci_pm_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	pci_power_t prev_state = pci_dev->current_state;
+	int error = 0;
+
+	/*
+	 * Restoring config space is necessary even if the device is not bound
+	 * to a driver because although we left it in D0, it may have gone to
+	 * D3cold when the bridge above it runtime suspended.
+	 */
+	pci_pm_default_resume_early(pci_dev);
+	pci_resume_ptm(pci_dev);
+
+	if (!pci_dev->driver)
+		return 0;
+
+	pci_fixup_device(pci_fixup_resume_early, pci_dev);
+	pci_pm_default_resume(pci_dev);
+
+	if (prev_state == PCI_D3cold)
+		pci_pm_bridge_power_up_actions(pci_dev);
+
+	if (pm && pm->runtime_resume)
+		error = pm->runtime_resume(dev);
+
+	return error;
+}
+
+static int pci_pm_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	/*
+	 * If pci_dev->driver is not set (unbound), the device should
+	 * always remain in D0 regardless of the runtime PM status
+	 */
+	if (!pci_dev->driver)
+		return 0;
+
+	if (!pm)
+		return -ENOSYS;
+
+	if (pm->runtime_idle)
+		return pm->runtime_idle(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops pci_dev_pm_ops = {
+	.prepare = pci_pm_prepare,
+	.complete = pci_pm_complete,
+	.suspend = pci_pm_suspend,
+	.suspend_late = pci_pm_suspend_late,
+	.resume = pci_pm_resume,
+	.resume_early = pci_pm_resume_early,
+	.freeze = pci_pm_freeze,
+	.thaw = pci_pm_thaw,
+	.poweroff = pci_pm_poweroff,
+	.poweroff_late = pci_pm_poweroff_late,
+	.restore = pci_pm_restore,
+	.suspend_noirq = pci_pm_suspend_noirq,
+	.resume_noirq = pci_pm_resume_noirq,
+	.freeze_noirq = pci_pm_freeze_noirq,
+	.thaw_noirq = pci_pm_thaw_noirq,
+	.poweroff_noirq = pci_pm_poweroff_noirq,
+	.restore_noirq = pci_pm_restore_noirq,
+	.runtime_suspend = pci_pm_runtime_suspend,
+	.runtime_resume = pci_pm_runtime_resume,
+	.runtime_idle = pci_pm_runtime_idle,
+};
+
+#define PCI_PM_OPS_PTR	(&pci_dev_pm_ops)
+
+#else /* !CONFIG_PM */
+
+#define pci_pm_runtime_suspend	NULL
+#define pci_pm_runtime_resume	NULL
+#define pci_pm_runtime_idle	NULL
+
+#define PCI_PM_OPS_PTR	NULL
+
+#endif /* !CONFIG_PM */
+
+/**
+ * __pci_register_driver - register a new pci driver
+ * @drv: the driver structure to register
+ * @owner: owner module of drv
+ * @mod_name: module name string
+ *
+ * Adds the driver structure to the list of registered drivers.
+ * Returns a negative value on error, otherwise 0.
+ * If no error occurred, the driver remains registered even if
+ * no device was claimed during registration.
+ */
+int __pci_register_driver(struct pci_driver *drv, struct module *owner,
+			  const char *mod_name)
+{
+	/* initialize common driver fields */
+	drv->driver.name = drv->name;
+	drv->driver.bus = &pci_bus_type;
+	drv->driver.owner = owner;
+	drv->driver.mod_name = mod_name;
+	drv->driver.groups = drv->groups;
+	drv->driver.dev_groups = drv->dev_groups;
+
+	spin_lock_init(&drv->dynids.lock);
+	INIT_LIST_HEAD(&drv->dynids.list);
+
+	/* register with core */
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(__pci_register_driver);
+
+/**
+ * pci_unregister_driver - unregister a pci driver
+ * @drv: the driver structure to unregister
+ *
+ * Deletes the driver structure from the list of registered PCI drivers,
+ * gives it a chance to clean up by calling its remove() function for
+ * each device it was responsible for, and marks those devices as
+ * driverless.
+ */
+
+void pci_unregister_driver(struct pci_driver *drv)
+{
+	driver_unregister(&drv->driver);
+	pci_free_dynids(drv);
+}
+EXPORT_SYMBOL(pci_unregister_driver);
+
+static struct pci_driver pci_compat_driver = {
+	.name = "compat"
+};
+
+/**
+ * pci_dev_driver - get the pci_driver of a device
+ * @dev: the device to query
+ *
+ * Returns the appropriate pci_driver structure or %NULL if there is no
+ * registered driver for the device.
+ */
+struct pci_driver *pci_dev_driver(const struct pci_dev *dev)
+{
+	int i;
+
+	if (dev->driver)
+		return dev->driver;
+
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+		if (dev->resource[i].flags & IORESOURCE_BUSY)
+			return &pci_compat_driver;
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_dev_driver);
+
+/**
+ * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure
+ * @dev: the PCI device structure to match against
+ * @drv: the device driver to search for matching PCI device id structures
+ *
+ * Used by a driver to check whether a PCI device present in the
+ * system is in its list of supported devices. Returns the matching
+ * pci_device_id structure or %NULL if there is no match.
+ */
+static int pci_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *pci_drv;
+	const struct pci_device_id *found_id;
+
+	if (!pci_dev->match_driver)
+		return 0;
+
+	pci_drv = to_pci_driver(drv);
+	found_id = pci_match_device(pci_drv, pci_dev);
+	if (found_id)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * pci_dev_get - increments the reference count of the pci device structure
+ * @dev: the device being referenced
+ *
+ * Each live reference to a device should be refcounted.
+ *
+ * Drivers for PCI devices should normally record such references in
+ * their probe() methods, when they bind to a device, and release
+ * them by calling pci_dev_put(), in their disconnect() methods.
+ *
+ * A pointer to the device with the incremented reference counter is returned.
+ */
+struct pci_dev *pci_dev_get(struct pci_dev *dev)
+{
+	if (dev)
+		get_device(&dev->dev);
+	return dev;
+}
+EXPORT_SYMBOL(pci_dev_get);
+
+/**
+ * pci_dev_put - release a use of the pci device structure
+ * @dev: device that's been disconnected
+ *
+ * Must be called when a user of a device is finished with it.  When the last
+ * user of the device calls this function, the memory of the device is freed.
+ */
+void pci_dev_put(struct pci_dev *dev)
+{
+	if (dev)
+		put_device(&dev->dev);
+}
+EXPORT_SYMBOL(pci_dev_put);
+
+static int pci_uevent(const struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct pci_dev *pdev;
+
+	if (!dev)
+		return -ENODEV;
+
+	pdev = to_pci_dev(dev);
+
+	if (add_uevent_var(env, "PCI_CLASS=%04X", pdev->class))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor,
+			   pdev->subsystem_device))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "PCI_SLOT_NAME=%s", pci_name(pdev)))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X",
+			   pdev->vendor, pdev->device,
+			   pdev->subsystem_vendor, pdev->subsystem_device,
+			   (u8)(pdev->class >> 16), (u8)(pdev->class >> 8),
+			   (u8)(pdev->class)))
+		return -ENOMEM;
+
+	return 0;
+}
+
+#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH)
+/**
+ * pci_uevent_ers - emit a uevent during recovery path of PCI device
+ * @pdev: PCI device undergoing error recovery
+ * @err_type: type of error event
+ */
+void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type)
+{
+	int idx = 0;
+	char *envp[3];
+
+	switch (err_type) {
+	case PCI_ERS_RESULT_NONE:
+	case PCI_ERS_RESULT_CAN_RECOVER:
+		envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY";
+		envp[idx++] = "DEVICE_ONLINE=0";
+		break;
+	case PCI_ERS_RESULT_RECOVERED:
+		envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY";
+		envp[idx++] = "DEVICE_ONLINE=1";
+		break;
+	case PCI_ERS_RESULT_DISCONNECT:
+		envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY";
+		envp[idx++] = "DEVICE_ONLINE=0";
+		break;
+	default:
+		break;
+	}
+
+	if (idx > 0) {
+		envp[idx++] = NULL;
+		kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp);
+	}
+}
+#endif
+
+static int pci_bus_num_vf(struct device *dev)
+{
+	return pci_num_vf(to_pci_dev(dev));
+}
+
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to dev structure
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node or ACPI node of host bridge's parent (if any).
+ */
+static int pci_dma_configure(struct device *dev)
+{
+	struct pci_driver *driver = to_pci_driver(dev->driver);
+	struct device *bridge;
+	int ret = 0;
+
+	bridge = pci_get_host_bridge_device(to_pci_dev(dev));
+
+	if (IS_ENABLED(CONFIG_OF) && bridge->parent &&
+	    bridge->parent->of_node) {
+		ret = of_dma_configure(dev, bridge->parent->of_node, true);
+	} else if (has_acpi_companion(bridge)) {
+		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
+
+		ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev));
+	}
+
+	pci_put_host_bridge_device(bridge);
+
+	if (!ret && !driver->driver_managed_dma) {
+		ret = iommu_device_use_default_domain(dev);
+		if (ret)
+			arch_teardown_dma_ops(dev);
+	}
+
+	return ret;
+}
+
+static void pci_dma_cleanup(struct device *dev)
+{
+	struct pci_driver *driver = to_pci_driver(dev->driver);
+
+	if (!driver->driver_managed_dma)
+		iommu_device_unuse_default_domain(dev);
+}
+
+struct bus_type pci_bus_type = {
+	.name		= "pci",
+	.match		= pci_bus_match,
+	.uevent		= pci_uevent,
+	.probe		= pci_device_probe,
+	.remove		= pci_device_remove,
+	.shutdown	= pci_device_shutdown,
+	.dev_groups	= pci_dev_groups,
+	.bus_groups	= pci_bus_groups,
+	.drv_groups	= pci_drv_groups,
+	.pm		= PCI_PM_OPS_PTR,
+	.num_vf		= pci_bus_num_vf,
+	.dma_configure	= pci_dma_configure,
+	.dma_cleanup	= pci_dma_cleanup,
+};
+EXPORT_SYMBOL(pci_bus_type);
+
+#ifdef CONFIG_PCIEPORTBUS
+static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct pcie_device *pciedev;
+	struct pcie_port_service_driver *driver;
+
+	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
+		return 0;
+
+	pciedev = to_pcie_device(dev);
+	driver = to_service_driver(drv);
+
+	if (driver->service != pciedev->service)
+		return 0;
+
+	if (driver->port_type != PCIE_ANY_PORT &&
+	    driver->port_type != pci_pcie_type(pciedev->port))
+		return 0;
+
+	return 1;
+}
+
+struct bus_type pcie_port_bus_type = {
+	.name		= "pci_express",
+	.match		= pcie_port_bus_match,
+};
+#endif
+
+static int __init pci_driver_init(void)
+{
+	int ret;
+
+	ret = bus_register(&pci_bus_type);
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_PCIEPORTBUS
+	ret = bus_register(&pcie_port_bus_type);
+	if (ret)
+		return ret;
+#endif
+	dma_debug_add_bus(&pci_bus_type);
+	return 0;
+}
+postcore_initcall(pci_driver_init);
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
new file mode 100644
index 000000000..0c6446519
--- /dev/null
+++ b/drivers/pci/pci-label.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Export the firmware instance and label associated with a PCI device to
+ * sysfs
+ *
+ * Copyright (C) 2010 Dell Inc.
+ * by Narendra K <Narendra_K@dell.com>,
+ * Jordan Hargrave <Jordan_Hargrave@dell.com>
+ *
+ * PCI Firmware Specification Revision 3.1 section 4.6.7 (DSM for Naming a
+ * PCI or PCI Express Device Under Operating Systems) defines an instance
+ * number and string name. This code retrieves them and exports them to sysfs.
+ * If the system firmware does not provide the ACPI _DSM (Device Specific
+ * Method), then the SMBIOS type 41 instance number and string is exported to
+ * sysfs.
+ *
+ * SMBIOS defines type 41 for onboard pci devices. This code retrieves
+ * the instance number and string from the type 41 record and exports
+ * it to sysfs.
+ *
+ * Please see https://linux.dell.com/files/biosdevname/ for more
+ * information.
+ */
+
+#include <linux/dmi.h>
+#include <linux/sysfs.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/nls.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include "pci.h"
+
+static bool device_has_acpi_name(struct device *dev)
+{
+#ifdef CONFIG_ACPI
+	acpi_handle handle = ACPI_HANDLE(dev);
+
+	if (!handle)
+		return false;
+
+	return acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
+			      1 << DSM_PCI_DEVICE_NAME);
+#else
+	return false;
+#endif
+}
+
+#ifdef CONFIG_DMI
+enum smbios_attr_enum {
+	SMBIOS_ATTR_NONE = 0,
+	SMBIOS_ATTR_LABEL_SHOW,
+	SMBIOS_ATTR_INSTANCE_SHOW,
+};
+
+static size_t find_smbios_instance_string(struct pci_dev *pdev, char *buf,
+					  enum smbios_attr_enum attribute)
+{
+	const struct dmi_device *dmi;
+	struct dmi_dev_onboard *donboard;
+	int domain_nr = pci_domain_nr(pdev->bus);
+	int bus = pdev->bus->number;
+	int devfn = pdev->devfn;
+
+	dmi = NULL;
+	while ((dmi = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD,
+				      NULL, dmi)) != NULL) {
+		donboard = dmi->device_data;
+		if (donboard && donboard->segment == domain_nr &&
+				donboard->bus == bus &&
+				donboard->devfn == devfn) {
+			if (buf) {
+				if (attribute == SMBIOS_ATTR_INSTANCE_SHOW)
+					return sysfs_emit(buf, "%d\n",
+							  donboard->instance);
+				else if (attribute == SMBIOS_ATTR_LABEL_SHOW)
+					return sysfs_emit(buf, "%s\n",
+							  dmi->name);
+			}
+			return strlen(dmi->name);
+		}
+	}
+	return 0;
+}
+
+static ssize_t smbios_label_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return find_smbios_instance_string(pdev, buf,
+					   SMBIOS_ATTR_LABEL_SHOW);
+}
+static struct device_attribute dev_attr_smbios_label = __ATTR(label, 0444,
+						    smbios_label_show, NULL);
+
+static ssize_t index_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return find_smbios_instance_string(pdev, buf,
+					   SMBIOS_ATTR_INSTANCE_SHOW);
+}
+static DEVICE_ATTR_RO(index);
+
+static struct attribute *smbios_attrs[] = {
+	&dev_attr_smbios_label.attr,
+	&dev_attr_index.attr,
+	NULL,
+};
+
+static umode_t smbios_attr_is_visible(struct kobject *kobj, struct attribute *a,
+				      int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (device_has_acpi_name(dev))
+		return 0;
+
+	if (!find_smbios_instance_string(pdev, NULL, SMBIOS_ATTR_NONE))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group pci_dev_smbios_attr_group = {
+	.attrs = smbios_attrs,
+	.is_visible = smbios_attr_is_visible,
+};
+#endif
+
+#ifdef CONFIG_ACPI
+enum acpi_attr_enum {
+	ACPI_ATTR_LABEL_SHOW,
+	ACPI_ATTR_INDEX_SHOW,
+};
+
+static int dsm_label_utf16s_to_utf8s(union acpi_object *obj, char *buf)
+{
+	int len;
+
+	len = utf16s_to_utf8s((const wchar_t *)obj->buffer.pointer,
+			      obj->buffer.length,
+			      UTF16_LITTLE_ENDIAN,
+			      buf, PAGE_SIZE - 1);
+	buf[len++] = '\n';
+
+	return len;
+}
+
+static int dsm_get_label(struct device *dev, char *buf,
+			 enum acpi_attr_enum attr)
+{
+	acpi_handle handle = ACPI_HANDLE(dev);
+	union acpi_object *obj, *tmp;
+	int len = 0;
+
+	if (!handle)
+		return -1;
+
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
+				DSM_PCI_DEVICE_NAME, NULL);
+	if (!obj)
+		return -1;
+
+	tmp = obj->package.elements;
+	if (obj->type == ACPI_TYPE_PACKAGE && obj->package.count == 2 &&
+	    tmp[0].type == ACPI_TYPE_INTEGER &&
+	    (tmp[1].type == ACPI_TYPE_STRING ||
+	     tmp[1].type == ACPI_TYPE_BUFFER)) {
+		/*
+		 * The second string element is optional even when
+		 * this _DSM is implemented; when not implemented,
+		 * this entry must return a null string.
+		 */
+		if (attr == ACPI_ATTR_INDEX_SHOW) {
+			len = sysfs_emit(buf, "%llu\n", tmp->integer.value);
+		} else if (attr == ACPI_ATTR_LABEL_SHOW) {
+			if (tmp[1].type == ACPI_TYPE_STRING)
+				len = sysfs_emit(buf, "%s\n",
+						 tmp[1].string.pointer);
+			else if (tmp[1].type == ACPI_TYPE_BUFFER)
+				len = dsm_label_utf16s_to_utf8s(tmp + 1, buf);
+		}
+	}
+
+	ACPI_FREE(obj);
+
+	return len > 0 ? len : -1;
+}
+
+static ssize_t label_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	return dsm_get_label(dev, buf, ACPI_ATTR_LABEL_SHOW);
+}
+static DEVICE_ATTR_RO(label);
+
+static ssize_t acpi_index_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	return dsm_get_label(dev, buf, ACPI_ATTR_INDEX_SHOW);
+}
+static DEVICE_ATTR_RO(acpi_index);
+
+static struct attribute *acpi_attrs[] = {
+	&dev_attr_label.attr,
+	&dev_attr_acpi_index.attr,
+	NULL,
+};
+
+static umode_t acpi_attr_is_visible(struct kobject *kobj, struct attribute *a,
+				    int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+
+	if (!device_has_acpi_name(dev))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group pci_dev_acpi_attr_group = {
+	.attrs = acpi_attrs,
+	.is_visible = acpi_attr_is_visible,
+};
+#endif
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
new file mode 100644
index 000000000..fbfd78127
--- /dev/null
+++ b/drivers/pci/pci-mid.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel MID platform PM support
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/intel-mid.h>
+
+#include "pci.h"
+
+static bool pci_mid_pm_enabled __read_mostly;
+
+bool pci_use_mid_pm(void)
+{
+	return pci_mid_pm_enabled;
+}
+
+int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	return intel_mid_pci_set_power_state(pdev, state);
+}
+
+pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	return intel_mid_pci_get_power_state(pdev);
+}
+
+/*
+ * This table should be in sync with the one in
+ * arch/x86/platform/intel-mid/pwr.c.
+ */
+static const struct x86_cpu_id lpss_cpu_ids[] = {
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL),
+	{}
+};
+
+static int __init mid_pci_init(void)
+{
+	const struct x86_cpu_id *id;
+
+	id = x86_match_cpu(lpss_cpu_ids);
+	if (id)
+		pci_mid_pm_enabled = true;
+
+	return 0;
+}
+arch_initcall(mid_pci_init);
diff --git a/drivers/pci/pci-pf-stub.c b/drivers/pci/pci-pf-stub.c
new file mode 100644
index 000000000..45855a5e9
--- /dev/null
+++ b/drivers/pci/pci-pf-stub.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci-pf-stub - simple stub driver for PCI SR-IOV PF device
+ *
+ * This driver is meant to act as a "whitelist" for devices that provide
+ * SR-IOV functionality while at the same time not actually needing a
+ * driver of their own.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+/*
+ * pci_pf_stub_whitelist - White list of devices to bind pci-pf-stub onto
+ *
+ * This table provides the list of IDs this driver is supposed to bind
+ * onto.  You could think of this as a list of "quirked" devices where we
+ * are adding support for SR-IOV here since there are no other drivers
+ * that they would be running under.
+ */
+static const struct pci_device_id pci_pf_stub_whitelist[] = {
+	{ PCI_VDEVICE(AMAZON, 0x0053) },
+	/* required last entry */
+	{ 0 }
+};
+MODULE_DEVICE_TABLE(pci, pci_pf_stub_whitelist);
+
+static int pci_pf_stub_probe(struct pci_dev *dev,
+			     const struct pci_device_id *id)
+{
+	pci_info(dev, "claimed by pci-pf-stub\n");
+	return 0;
+}
+
+static struct pci_driver pf_stub_driver = {
+	.name			= "pci-pf-stub",
+	.id_table		= pci_pf_stub_whitelist,
+	.probe			= pci_pf_stub_probe,
+	.sriov_configure	= pci_sriov_configure_simple,
+};
+module_pci_driver(pf_stub_driver);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
new file mode 100644
index 000000000..d1f4c1ce7
--- /dev/null
+++ b/drivers/pci/pci-stub.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple stub driver to reserve a PCI device
+ *
+ * Copyright (C) 2008 Red Hat, Inc.
+ * Author:
+ *	Chris Wright
+ *
+ * Usage is simple, allocate a new id to the stub driver and bind the
+ * device to it.  For example:
+ *
+ * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind
+ * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+ * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/pci-stub
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+static char ids[1024] __initdata;
+
+module_param_string(ids, ids, sizeof(ids), 0);
+MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the stub driver, format is "
+		 "\"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\""
+		 " and multiple comma separated entries can be specified");
+
+static int pci_stub_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	pci_info(dev, "claimed by stub\n");
+	return 0;
+}
+
+static struct pci_driver stub_driver = {
+	.name		= "pci-stub",
+	.id_table	= NULL,	/* only dynamic id's */
+	.probe		= pci_stub_probe,
+	.driver_managed_dma = true,
+};
+
+static int __init pci_stub_init(void)
+{
+	char *p, *id;
+	int rc;
+
+	rc = pci_register_driver(&stub_driver);
+	if (rc)
+		return rc;
+
+	/* no ids passed actually */
+	if (ids[0] == '\0')
+		return 0;
+
+	/* add ids specified in the module parameter */
+	p = ids;
+	while ((id = strsep(&p, ","))) {
+		unsigned int vendor, device, subvendor = PCI_ANY_ID,
+			subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+		int fields;
+
+		if (!strlen(id))
+			continue;
+
+		fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
+				&vendor, &device, &subvendor, &subdevice,
+				&class, &class_mask);
+
+		if (fields < 2) {
+			pr_warn("pci-stub: invalid ID string \"%s\"\n", id);
+			continue;
+		}
+
+		pr_info("pci-stub: add %04X:%04X sub=%04X:%04X cls=%08X/%08X\n",
+		       vendor, device, subvendor, subdevice, class, class_mask);
+
+		rc = pci_add_dynid(&stub_driver, vendor, device,
+				   subvendor, subdevice, class, class_mask, 0);
+		if (rc)
+			pr_warn("pci-stub: failed to add dynamic ID (%d)\n",
+				rc);
+	}
+
+	return 0;
+}
+
+static void __exit pci_stub_exit(void)
+{
+	pci_unregister_driver(&stub_driver);
+}
+
+module_init(pci_stub_init);
+module_exit(pci_stub_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chris Wright <chrisw@sous-sol.org>");
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
new file mode 100644
index 000000000..3317b9354
--- /dev/null
+++ b/drivers/pci/pci-sysfs.c
@@ -0,0 +1,1660 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) Copyright 2002-2004 Greg Kroah-Hartman <greg@kroah.com>
+ * (C) Copyright 2002-2004 IBM Corp.
+ * (C) Copyright 2003 Matthew Wilcox
+ * (C) Copyright 2003 Hewlett-Packard
+ * (C) Copyright 2004 Jon Smirl <jonsmirl@yahoo.com>
+ * (C) Copyright 2004 Silicon Graphics, Inc. Jesse Barnes <jbarnes@sgi.com>
+ *
+ * File attributes for PCI devices
+ *
+ * Modeled after usb's driverfs.c
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <linux/export.h>
+#include <linux/topology.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/vgaarb.h>
+#include <linux/pm_runtime.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/aperture.h>
+#include "pci.h"
+
+static int sysfs_initialized;	/* = 0 */
+
+/* show configuration fields */
+#define pci_config_attr(field, format_string)				\
+static ssize_t								\
+field##_show(struct device *dev, struct device_attribute *attr, char *buf)				\
+{									\
+	struct pci_dev *pdev;						\
+									\
+	pdev = to_pci_dev(dev);						\
+	return sysfs_emit(buf, format_string, pdev->field);		\
+}									\
+static DEVICE_ATTR_RO(field)
+
+pci_config_attr(vendor, "0x%04x\n");
+pci_config_attr(device, "0x%04x\n");
+pci_config_attr(subsystem_vendor, "0x%04x\n");
+pci_config_attr(subsystem_device, "0x%04x\n");
+pci_config_attr(revision, "0x%02x\n");
+pci_config_attr(class, "0x%06x\n");
+
+static ssize_t irq_show(struct device *dev,
+			struct device_attribute *attr,
+			char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * For MSI, show the first MSI IRQ; for all other cases including
+	 * MSI-X, show the legacy INTx IRQ.
+	 */
+	if (pdev->msi_enabled)
+		return sysfs_emit(buf, "%u\n", pci_irq_vector(pdev, 0));
+#endif
+
+	return sysfs_emit(buf, "%u\n", pdev->irq);
+}
+static DEVICE_ATTR_RO(irq);
+
+static ssize_t broken_parity_status_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	return sysfs_emit(buf, "%u\n", pdev->broken_parity_status);
+}
+
+static ssize_t broken_parity_status_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	pdev->broken_parity_status = !!val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(broken_parity_status);
+
+static ssize_t pci_dev_show_local_cpu(struct device *dev, bool list,
+				      struct device_attribute *attr, char *buf)
+{
+	const struct cpumask *mask;
+
+#ifdef CONFIG_NUMA
+	if (dev_to_node(dev) == NUMA_NO_NODE)
+		mask = cpu_online_mask;
+	else
+		mask = cpumask_of_node(dev_to_node(dev));
+#else
+	mask = cpumask_of_pcibus(to_pci_dev(dev)->bus);
+#endif
+	return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t local_cpus_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return pci_dev_show_local_cpu(dev, false, attr, buf);
+}
+static DEVICE_ATTR_RO(local_cpus);
+
+static ssize_t local_cpulist_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	return pci_dev_show_local_cpu(dev, true, attr, buf);
+}
+static DEVICE_ATTR_RO(local_cpulist);
+
+/*
+ * PCI Bus Class Devices
+ */
+static ssize_t cpuaffinity_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	const struct cpumask *cpumask = cpumask_of_pcibus(to_pci_bus(dev));
+
+	return cpumap_print_to_pagebuf(false, buf, cpumask);
+}
+static DEVICE_ATTR_RO(cpuaffinity);
+
+static ssize_t cpulistaffinity_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	const struct cpumask *cpumask = cpumask_of_pcibus(to_pci_bus(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+static DEVICE_ATTR_RO(cpulistaffinity);
+
+static ssize_t power_state_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%s\n", pci_power_name(pdev->current_state));
+}
+static DEVICE_ATTR_RO(power_state);
+
+/* show resources */
+static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	int i;
+	int max;
+	resource_size_t start, end;
+	size_t len = 0;
+
+	if (pci_dev->subordinate)
+		max = DEVICE_COUNT_RESOURCE;
+	else
+		max = PCI_BRIDGE_RESOURCES;
+
+	for (i = 0; i < max; i++) {
+		struct resource *res =  &pci_dev->resource[i];
+		pci_resource_to_user(pci_dev, i, res, &start, &end);
+		len += sysfs_emit_at(buf, len, "0x%016llx 0x%016llx 0x%016llx\n",
+				     (unsigned long long)start,
+				     (unsigned long long)end,
+				     (unsigned long long)res->flags);
+	}
+	return len;
+}
+static DEVICE_ATTR_RO(resource);
+
+static ssize_t max_link_speed_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%s\n",
+			  pci_speed_string(pcie_get_speed_cap(pdev)));
+}
+static DEVICE_ATTR_RO(max_link_speed);
+
+static ssize_t max_link_width_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pcie_get_width_cap(pdev));
+}
+static DEVICE_ATTR_RO(max_link_width);
+
+static ssize_t current_link_speed_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	u16 linkstat;
+	int err;
+	enum pci_bus_speed speed;
+
+	err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat);
+	if (err)
+		return -EINVAL;
+
+	speed = pcie_link_speed[linkstat & PCI_EXP_LNKSTA_CLS];
+
+	return sysfs_emit(buf, "%s\n", pci_speed_string(speed));
+}
+static DEVICE_ATTR_RO(current_link_speed);
+
+static ssize_t current_link_width_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	u16 linkstat;
+	int err;
+
+	err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat);
+	if (err)
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%u\n", FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat));
+}
+static DEVICE_ATTR_RO(current_link_width);
+
+static ssize_t secondary_bus_number_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	u8 sec_bus;
+	int err;
+
+	err = pci_read_config_byte(pci_dev, PCI_SECONDARY_BUS, &sec_bus);
+	if (err)
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%u\n", sec_bus);
+}
+static DEVICE_ATTR_RO(secondary_bus_number);
+
+static ssize_t subordinate_bus_number_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	u8 sub_bus;
+	int err;
+
+	err = pci_read_config_byte(pci_dev, PCI_SUBORDINATE_BUS, &sub_bus);
+	if (err)
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%u\n", sub_bus);
+}
+static DEVICE_ATTR_RO(subordinate_bus_number);
+
+static ssize_t ari_enabled_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%u\n", pci_ari_enabled(pci_dev->bus));
+}
+static DEVICE_ATTR_RO(ari_enabled);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n",
+			  pci_dev->vendor, pci_dev->device,
+			  pci_dev->subsystem_vendor, pci_dev->subsystem_device,
+			  (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),
+			  (u8)(pci_dev->class));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+	ssize_t result = 0;
+
+	/* this can crash the machine when done on the "wrong" device */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	device_lock(dev);
+	if (dev->driver)
+		result = -EBUSY;
+	else if (val)
+		result = pci_enable_device(pdev);
+	else if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
+	else
+		result = -EIO;
+	device_unlock(dev);
+
+	return result < 0 ? result : count;
+}
+
+static ssize_t enable_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+	return sysfs_emit(buf, "%u\n", atomic_read(&pdev->enable_cnt));
+}
+static DEVICE_ATTR_RW(enable);
+
+#ifdef CONFIG_NUMA
+static ssize_t numa_node_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int node;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (kstrtoint(buf, 0, &node) < 0)
+		return -EINVAL;
+
+	if ((node < 0 && node != NUMA_NO_NODE) || node >= MAX_NUMNODES)
+		return -EINVAL;
+
+	if (node != NUMA_NO_NODE && !node_online(node))
+		return -EINVAL;
+
+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+	pci_alert(pdev, FW_BUG "Overriding NUMA node to %d.  Contact your vendor for updates.",
+		  node);
+
+	dev->numa_node = node;
+	return count;
+}
+
+static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	return sysfs_emit(buf, "%d\n", dev->numa_node);
+}
+static DEVICE_ATTR_RW(numa_node);
+#endif
+
+static ssize_t dma_mask_bits_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sysfs_emit(buf, "%d\n", fls64(pdev->dma_mask));
+}
+static DEVICE_ATTR_RO(dma_mask_bits);
+
+static ssize_t consistent_dma_mask_bits_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	return sysfs_emit(buf, "%d\n", fls64(dev->coherent_dma_mask));
+}
+static DEVICE_ATTR_RO(consistent_dma_mask_bits);
+
+static ssize_t msi_bus_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *subordinate = pdev->subordinate;
+
+	return sysfs_emit(buf, "%u\n", subordinate ?
+			  !(subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+			    : !pdev->no_msi);
+}
+
+static ssize_t msi_bus_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *subordinate = pdev->subordinate;
+	unsigned long val;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	/*
+	 * "no_msi" and "bus_flags" only affect what happens when a driver
+	 * requests MSI or MSI-X.  They don't affect any drivers that have
+	 * already requested MSI or MSI-X.
+	 */
+	if (!subordinate) {
+		pdev->no_msi = !val;
+		pci_info(pdev, "MSI/MSI-X %s for future drivers\n",
+			 val ? "allowed" : "disallowed");
+		return count;
+	}
+
+	if (val)
+		subordinate->bus_flags &= ~PCI_BUS_FLAGS_NO_MSI;
+	else
+		subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+
+	dev_info(&subordinate->dev, "MSI/MSI-X %s for future drivers of devices on this bus\n",
+		 val ? "allowed" : "disallowed");
+	return count;
+}
+static DEVICE_ATTR_RW(msi_bus);
+
+static ssize_t rescan_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+	unsigned long val;
+	struct pci_bus *b = NULL;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		pci_lock_rescan_remove();
+		while ((b = pci_find_next_bus(b)) != NULL)
+			pci_rescan_bus(b);
+		pci_unlock_rescan_remove();
+	}
+	return count;
+}
+static BUS_ATTR_WO(rescan);
+
+static struct attribute *pci_bus_attrs[] = {
+	&bus_attr_rescan.attr,
+	NULL,
+};
+
+static const struct attribute_group pci_bus_group = {
+	.attrs = pci_bus_attrs,
+};
+
+const struct attribute_group *pci_bus_groups[] = {
+	&pci_bus_group,
+	NULL,
+};
+
+static ssize_t dev_rescan_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	unsigned long val;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		pci_lock_rescan_remove();
+		pci_rescan_bus(pdev->bus);
+		pci_unlock_rescan_remove();
+	}
+	return count;
+}
+static struct device_attribute dev_attr_dev_rescan = __ATTR(rescan, 0200, NULL,
+							    dev_rescan_store);
+
+static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val && device_remove_file_self(dev, attr))
+		pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
+	return count;
+}
+static DEVICE_ATTR_IGNORE_LOCKDEP(remove, 0220, NULL,
+				  remove_store);
+
+static ssize_t bus_rescan_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned long val;
+	struct pci_bus *bus = to_pci_bus(dev);
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		pci_lock_rescan_remove();
+		if (!pci_is_root_bus(bus) && list_empty(&bus->devices))
+			pci_rescan_bus_bridge_resize(bus->self);
+		else
+			pci_rescan_bus(bus);
+		pci_unlock_rescan_remove();
+	}
+	return count;
+}
+static struct device_attribute dev_attr_bus_rescan = __ATTR(rescan, 0200, NULL,
+							    bus_rescan_store);
+
+#if defined(CONFIG_PM) && defined(CONFIG_ACPI)
+static ssize_t d3cold_allowed_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	pdev->d3cold_allowed = !!val;
+	pci_bridge_d3_update(pdev);
+
+	pm_runtime_resume(dev);
+
+	return count;
+}
+
+static ssize_t d3cold_allowed_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	return sysfs_emit(buf, "%u\n", pdev->d3cold_allowed);
+}
+static DEVICE_ATTR_RW(d3cold_allowed);
+#endif
+
+#ifdef CONFIG_OF
+static ssize_t devspec_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct device_node *np = pci_device_to_OF_node(pdev);
+
+	if (np == NULL)
+		return 0;
+	return sysfs_emit(buf, "%pOF\n", np);
+}
+static DEVICE_ATTR_RO(devspec);
+#endif
+
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = driver_set_override(dev, &pdev->driver_override, buf, count);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	ssize_t len;
+
+	device_lock(dev);
+	len = sysfs_emit(buf, "%s\n", pdev->driver_override);
+	device_unlock(dev);
+	return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
+static struct attribute *pci_dev_attrs[] = {
+	&dev_attr_power_state.attr,
+	&dev_attr_resource.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
+	&dev_attr_subsystem_vendor.attr,
+	&dev_attr_subsystem_device.attr,
+	&dev_attr_revision.attr,
+	&dev_attr_class.attr,
+	&dev_attr_irq.attr,
+	&dev_attr_local_cpus.attr,
+	&dev_attr_local_cpulist.attr,
+	&dev_attr_modalias.attr,
+#ifdef CONFIG_NUMA
+	&dev_attr_numa_node.attr,
+#endif
+	&dev_attr_dma_mask_bits.attr,
+	&dev_attr_consistent_dma_mask_bits.attr,
+	&dev_attr_enable.attr,
+	&dev_attr_broken_parity_status.attr,
+	&dev_attr_msi_bus.attr,
+#if defined(CONFIG_PM) && defined(CONFIG_ACPI)
+	&dev_attr_d3cold_allowed.attr,
+#endif
+#ifdef CONFIG_OF
+	&dev_attr_devspec.attr,
+#endif
+	&dev_attr_driver_override.attr,
+	&dev_attr_ari_enabled.attr,
+	NULL,
+};
+
+static struct attribute *pci_bridge_attrs[] = {
+	&dev_attr_subordinate_bus_number.attr,
+	&dev_attr_secondary_bus_number.attr,
+	NULL,
+};
+
+static struct attribute *pcie_dev_attrs[] = {
+	&dev_attr_current_link_speed.attr,
+	&dev_attr_current_link_width.attr,
+	&dev_attr_max_link_width.attr,
+	&dev_attr_max_link_speed.attr,
+	NULL,
+};
+
+static struct attribute *pcibus_attrs[] = {
+	&dev_attr_bus_rescan.attr,
+	&dev_attr_cpuaffinity.attr,
+	&dev_attr_cpulistaffinity.attr,
+	NULL,
+};
+
+static const struct attribute_group pcibus_group = {
+	.attrs = pcibus_attrs,
+};
+
+const struct attribute_group *pcibus_groups[] = {
+	&pcibus_group,
+	NULL,
+};
+
+static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *vga_dev = vga_default_device();
+
+	if (vga_dev)
+		return sysfs_emit(buf, "%u\n", (pdev == vga_dev));
+
+	return sysfs_emit(buf, "%u\n",
+			  !!(pdev->resource[PCI_ROM_RESOURCE].flags &
+			     IORESOURCE_ROM_SHADOW));
+}
+static DEVICE_ATTR_RO(boot_vga);
+
+static ssize_t pci_read_config(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
+	unsigned int size = 64;
+	loff_t init_off = off;
+	u8 *data = (u8 *) buf;
+
+	/* Several chips lock up trying to read undefined config space */
+	if (file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN))
+		size = dev->cfg_size;
+	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+		size = 128;
+
+	if (off > size)
+		return 0;
+	if (off + count > size) {
+		size -= off;
+		count = size;
+	} else {
+		size = count;
+	}
+
+	pci_config_pm_runtime_get(dev);
+
+	if ((off & 1) && size) {
+		u8 val;
+		pci_user_read_config_byte(dev, off, &val);
+		data[off - init_off] = val;
+		off++;
+		size--;
+	}
+
+	if ((off & 3) && size > 2) {
+		u16 val;
+		pci_user_read_config_word(dev, off, &val);
+		data[off - init_off] = val & 0xff;
+		data[off - init_off + 1] = (val >> 8) & 0xff;
+		off += 2;
+		size -= 2;
+	}
+
+	while (size > 3) {
+		u32 val;
+		pci_user_read_config_dword(dev, off, &val);
+		data[off - init_off] = val & 0xff;
+		data[off - init_off + 1] = (val >> 8) & 0xff;
+		data[off - init_off + 2] = (val >> 16) & 0xff;
+		data[off - init_off + 3] = (val >> 24) & 0xff;
+		off += 4;
+		size -= 4;
+		cond_resched();
+	}
+
+	if (size >= 2) {
+		u16 val;
+		pci_user_read_config_word(dev, off, &val);
+		data[off - init_off] = val & 0xff;
+		data[off - init_off + 1] = (val >> 8) & 0xff;
+		off += 2;
+		size -= 2;
+	}
+
+	if (size > 0) {
+		u8 val;
+		pci_user_read_config_byte(dev, off, &val);
+		data[off - init_off] = val;
+	}
+
+	pci_config_pm_runtime_put(dev);
+
+	return count;
+}
+
+static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
+	unsigned int size = count;
+	loff_t init_off = off;
+	u8 *data = (u8 *) buf;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
+	if (resource_is_exclusive(&dev->driver_exclusive_resource, off,
+				  count)) {
+		pci_warn_once(dev, "%s: Unexpected write to kernel-exclusive config offset %llx",
+			      current->comm, off);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+
+	if (off > dev->cfg_size)
+		return 0;
+	if (off + count > dev->cfg_size) {
+		size = dev->cfg_size - off;
+		count = size;
+	}
+
+	pci_config_pm_runtime_get(dev);
+
+	if ((off & 1) && size) {
+		pci_user_write_config_byte(dev, off, data[off - init_off]);
+		off++;
+		size--;
+	}
+
+	if ((off & 3) && size > 2) {
+		u16 val = data[off - init_off];
+		val |= (u16) data[off - init_off + 1] << 8;
+		pci_user_write_config_word(dev, off, val);
+		off += 2;
+		size -= 2;
+	}
+
+	while (size > 3) {
+		u32 val = data[off - init_off];
+		val |= (u32) data[off - init_off + 1] << 8;
+		val |= (u32) data[off - init_off + 2] << 16;
+		val |= (u32) data[off - init_off + 3] << 24;
+		pci_user_write_config_dword(dev, off, val);
+		off += 4;
+		size -= 4;
+	}
+
+	if (size >= 2) {
+		u16 val = data[off - init_off];
+		val |= (u16) data[off - init_off + 1] << 8;
+		pci_user_write_config_word(dev, off, val);
+		off += 2;
+		size -= 2;
+	}
+
+	if (size)
+		pci_user_write_config_byte(dev, off, data[off - init_off]);
+
+	pci_config_pm_runtime_put(dev);
+
+	return count;
+}
+static BIN_ATTR(config, 0644, pci_read_config, pci_write_config, 0);
+
+static struct bin_attribute *pci_dev_config_attrs[] = {
+	&bin_attr_config,
+	NULL,
+};
+
+static umode_t pci_dev_config_attr_is_visible(struct kobject *kobj,
+					      struct bin_attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	a->size = PCI_CFG_SPACE_SIZE;
+	if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
+		a->size = PCI_CFG_SPACE_EXP_SIZE;
+
+	return a->attr.mode;
+}
+
+static const struct attribute_group pci_dev_config_attr_group = {
+	.bin_attrs = pci_dev_config_attrs,
+	.is_bin_visible = pci_dev_config_attr_is_visible,
+};
+
+#ifdef HAVE_PCI_LEGACY
+/**
+ * pci_read_legacy_io - read byte(s) from legacy I/O port space
+ * @filp: open sysfs file
+ * @kobj: kobject corresponding to file to read from
+ * @bin_attr: struct bin_attribute for this file
+ * @buf: buffer to store results
+ * @off: offset into legacy I/O port space
+ * @count: number of bytes to read
+ *
+ * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
+ * callback routine (pci_legacy_read).
+ */
+static ssize_t pci_read_legacy_io(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *bin_attr, char *buf,
+				  loff_t off, size_t count)
+{
+	struct pci_bus *bus = to_pci_bus(kobj_to_dev(kobj));
+
+	/* Only support 1, 2 or 4 byte accesses */
+	if (count != 1 && count != 2 && count != 4)
+		return -EINVAL;
+
+	return pci_legacy_read(bus, off, (u32 *)buf, count);
+}
+
+/**
+ * pci_write_legacy_io - write byte(s) to legacy I/O port space
+ * @filp: open sysfs file
+ * @kobj: kobject corresponding to file to read from
+ * @bin_attr: struct bin_attribute for this file
+ * @buf: buffer containing value to be written
+ * @off: offset into legacy I/O port space
+ * @count: number of bytes to write
+ *
+ * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
+ * callback routine (pci_legacy_write).
+ */
+static ssize_t pci_write_legacy_io(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *bin_attr, char *buf,
+				   loff_t off, size_t count)
+{
+	struct pci_bus *bus = to_pci_bus(kobj_to_dev(kobj));
+
+	/* Only support 1, 2 or 4 byte accesses */
+	if (count != 1 && count != 2 && count != 4)
+		return -EINVAL;
+
+	return pci_legacy_write(bus, off, *(u32 *)buf, count);
+}
+
+/**
+ * pci_mmap_legacy_mem - map legacy PCI memory into user memory space
+ * @filp: open sysfs file
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
+ * legacy memory space (first meg of bus space) into application virtual
+ * memory space.
+ */
+static int pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *attr,
+			       struct vm_area_struct *vma)
+{
+	struct pci_bus *bus = to_pci_bus(kobj_to_dev(kobj));
+
+	return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+}
+
+/**
+ * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @filp: open sysfs file
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
+ * legacy IO space (first meg of bus space) into application virtual
+ * memory space. Returns -ENOSYS if the operation isn't supported
+ */
+static int pci_mmap_legacy_io(struct file *filp, struct kobject *kobj,
+			      struct bin_attribute *attr,
+			      struct vm_area_struct *vma)
+{
+	struct pci_bus *bus = to_pci_bus(kobj_to_dev(kobj));
+
+	return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Stub implementation. Can be overridden by arch if necessary.
+ */
+void __weak pci_adjust_legacy_attr(struct pci_bus *b,
+				   enum pci_mmap_state mmap_type)
+{
+}
+
+/**
+ * pci_create_legacy_files - create legacy I/O port and memory files
+ * @b: bus to create files under
+ *
+ * Some platforms allow access to legacy I/O port and ISA memory space on
+ * a per-bus basis.  This routine creates the files and ties them into
+ * their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propagate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
+ */
+void pci_create_legacy_files(struct pci_bus *b)
+{
+	int error;
+
+	if (!sysfs_initialized)
+		return;
+
+	b->legacy_io = kcalloc(2, sizeof(struct bin_attribute),
+			       GFP_ATOMIC);
+	if (!b->legacy_io)
+		goto kzalloc_err;
+
+	sysfs_bin_attr_init(b->legacy_io);
+	b->legacy_io->attr.name = "legacy_io";
+	b->legacy_io->size = 0xffff;
+	b->legacy_io->attr.mode = 0600;
+	b->legacy_io->read = pci_read_legacy_io;
+	b->legacy_io->write = pci_write_legacy_io;
+	b->legacy_io->mmap = pci_mmap_legacy_io;
+	b->legacy_io->f_mapping = iomem_get_mapping;
+	pci_adjust_legacy_attr(b, pci_mmap_io);
+	error = device_create_bin_file(&b->dev, b->legacy_io);
+	if (error)
+		goto legacy_io_err;
+
+	/* Allocated above after the legacy_io struct */
+	b->legacy_mem = b->legacy_io + 1;
+	sysfs_bin_attr_init(b->legacy_mem);
+	b->legacy_mem->attr.name = "legacy_mem";
+	b->legacy_mem->size = 1024*1024;
+	b->legacy_mem->attr.mode = 0600;
+	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	b->legacy_mem->f_mapping = iomem_get_mapping;
+	pci_adjust_legacy_attr(b, pci_mmap_mem);
+	error = device_create_bin_file(&b->dev, b->legacy_mem);
+	if (error)
+		goto legacy_mem_err;
+
+	return;
+
+legacy_mem_err:
+	device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+	kfree(b->legacy_io);
+	b->legacy_io = NULL;
+kzalloc_err:
+	dev_warn(&b->dev, "could not create legacy I/O port and ISA memory resources in sysfs\n");
+}
+
+void pci_remove_legacy_files(struct pci_bus *b)
+{
+	if (b->legacy_io) {
+		device_remove_bin_file(&b->dev, b->legacy_io);
+		device_remove_bin_file(&b->dev, b->legacy_mem);
+		kfree(b->legacy_io); /* both are allocated here */
+	}
+}
+#endif /* HAVE_PCI_LEGACY */
+
+#if defined(HAVE_PCI_MMAP) || defined(ARCH_GENERIC_PCI_MMAP_RESOURCE)
+
+int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma,
+		  enum pci_mmap_api mmap_api)
+{
+	unsigned long nr, start, size;
+	resource_size_t pci_start = 0, pci_end;
+
+	if (pci_resource_len(pdev, resno) == 0)
+		return 0;
+	nr = vma_pages(vma);
+	start = vma->vm_pgoff;
+	size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
+	if (mmap_api == PCI_MMAP_PROCFS) {
+		pci_resource_to_user(pdev, resno, &pdev->resource[resno],
+				     &pci_start, &pci_end);
+		pci_start >>= PAGE_SHIFT;
+	}
+	if (start >= pci_start && start < pci_start + size &&
+			start + nr <= pci_start + size)
+		return 1;
+	return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @write_combine: 1 for write_combine mapping
+ *
+ * Use the regular PCI mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int write_combine)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	int bar = (unsigned long)attr->private;
+	enum pci_mmap_state mmap_type;
+	struct resource *res = &pdev->resource[bar];
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
+		return -EINVAL;
+
+	if (!pci_mmap_fits(pdev, bar, vma, PCI_MMAP_SYSFS))
+		return -EINVAL;
+
+	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+	return pci_mmap_resource_range(pdev, bar, vma, mmap_type, write_combine);
+}
+
+static int pci_mmap_resource_uc(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+static int pci_mmap_resource_wc(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static ssize_t pci_resource_io(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *attr, char *buf,
+			       loff_t off, size_t count, bool write)
+{
+#ifdef CONFIG_HAS_IOPORT
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	int bar = (unsigned long)attr->private;
+	unsigned long port = off;
+
+	port += pci_resource_start(pdev, bar);
+
+	if (port > pci_resource_end(pdev, bar))
+		return 0;
+
+	if (port + count - 1 > pci_resource_end(pdev, bar))
+		return -EINVAL;
+
+	switch (count) {
+	case 1:
+		if (write)
+			outb(*(u8 *)buf, port);
+		else
+			*(u8 *)buf = inb(port);
+		return 1;
+	case 2:
+		if (write)
+			outw(*(u16 *)buf, port);
+		else
+			*(u16 *)buf = inw(port);
+		return 2;
+	case 4:
+		if (write)
+			outl(*(u32 *)buf, port);
+		else
+			*(u32 *)buf = inl(port);
+		return 4;
+	}
+	return -EINVAL;
+#else
+	return -ENXIO;
+#endif
+}
+
+static ssize_t pci_read_resource_io(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr, char *buf,
+				    loff_t off, size_t count)
+{
+	return pci_resource_io(filp, kobj, attr, buf, off, count, false);
+}
+
+static ssize_t pci_write_resource_io(struct file *filp, struct kobject *kobj,
+				     struct bin_attribute *attr, char *buf,
+				     loff_t off, size_t count)
+{
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
+	return pci_resource_io(filp, kobj, attr, buf, off, count, true);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @pdev: dev to cleanup
+ *
+ * If we created resource files for @pdev, remove them from sysfs and
+ * free their resources.
+ */
+static void pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int name_len = write_combine ? 13 : 10;
+	struct bin_attribute *res_attr;
+	char *res_attr_name;
+	int retval;
+
+	res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC);
+	if (!res_attr)
+		return -ENOMEM;
+
+	res_attr_name = (char *)(res_attr + 1);
+
+	sysfs_bin_attr_init(res_attr);
+	if (write_combine) {
+		sprintf(res_attr_name, "resource%d_wc", num);
+		res_attr->mmap = pci_mmap_resource_wc;
+	} else {
+		sprintf(res_attr_name, "resource%d", num);
+		if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
+			res_attr->read = pci_read_resource_io;
+			res_attr->write = pci_write_resource_io;
+			if (arch_can_pci_mmap_io())
+				res_attr->mmap = pci_mmap_resource_uc;
+		} else {
+			res_attr->mmap = pci_mmap_resource_uc;
+		}
+	}
+	if (res_attr->mmap)
+		res_attr->f_mapping = iomem_get_mapping;
+	res_attr->attr.name = res_attr_name;
+	res_attr->attr.mode = 0600;
+	res_attr->size = pci_resource_len(pdev, num);
+	res_attr->private = (void *)(unsigned long)num;
+	retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+	if (retval) {
+		kfree(res_attr);
+		return retval;
+	}
+
+	if (write_combine)
+		pdev->res_attr_wc[num] = res_attr;
+	else
+		pdev->res_attr[num] = res_attr;
+
+	return 0;
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @pdev: dev in question
+ *
+ * Walk the resources in @pdev creating files for each resource available.
+ */
+static int pci_create_resource_files(struct pci_dev *pdev)
+{
+	int i;
+	int retval;
+
+	/* Expose the PCI resources from this device as files */
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+
+		/* skip empty resources */
+		if (!pci_resource_len(pdev, i))
+			continue;
+
+		retval = pci_create_attr(pdev, i, 0);
+		/* for prefetchable resources, create a WC mappable file */
+		if (!retval && arch_can_pci_mmap_wc() &&
+		    pdev->resource[i].flags & IORESOURCE_PREFETCH)
+			retval = pci_create_attr(pdev, i, 1);
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
+		}
+	}
+	return 0;
+}
+#else /* !(defined(HAVE_PCI_MMAP) || defined(ARCH_GENERIC_PCI_MMAP_RESOURCE)) */
+int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; }
+void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
+#endif
+
+/**
+ * pci_write_rom - used to enable access to the PCI ROM display
+ * @filp: sysfs file
+ * @kobj: kernel object handle
+ * @bin_attr: struct bin_attribute for this file
+ * @buf: user input
+ * @off: file offset
+ * @count: number of byte in input
+ *
+ * writing anything except 0 enables it
+ */
+static ssize_t pci_write_rom(struct file *filp, struct kobject *kobj,
+			     struct bin_attribute *bin_attr, char *buf,
+			     loff_t off, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if ((off ==  0) && (*buf == '0') && (count == 2))
+		pdev->rom_attr_enabled = 0;
+	else
+		pdev->rom_attr_enabled = 1;
+
+	return count;
+}
+
+/**
+ * pci_read_rom - read a PCI ROM
+ * @filp: sysfs file
+ * @kobj: kernel object handle
+ * @bin_attr: struct bin_attribute for this file
+ * @buf: where to put the data we read from the ROM
+ * @off: file offset
+ * @count: number of bytes to read
+ *
+ * Put @count bytes starting at @off into @buf from the ROM in the PCI
+ * device corresponding to @kobj.
+ */
+static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *bin_attr, char *buf,
+			    loff_t off, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	void __iomem *rom;
+	size_t size;
+
+	if (!pdev->rom_attr_enabled)
+		return -EINVAL;
+
+	rom = pci_map_rom(pdev, &size);	/* size starts out as PCI window size */
+	if (!rom || !size)
+		return -EIO;
+
+	if (off >= size)
+		count = 0;
+	else {
+		if (off + count > size)
+			count = size - off;
+
+		memcpy_fromio(buf, rom + off, count);
+	}
+	pci_unmap_rom(pdev, rom);
+
+	return count;
+}
+static BIN_ATTR(rom, 0600, pci_read_rom, pci_write_rom, 0);
+
+static struct bin_attribute *pci_dev_rom_attrs[] = {
+	&bin_attr_rom,
+	NULL,
+};
+
+static umode_t pci_dev_rom_attr_is_visible(struct kobject *kobj,
+					   struct bin_attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	size_t rom_size;
+
+	/* If the device has a ROM, try to expose it in sysfs. */
+	rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	if (!rom_size)
+		return 0;
+
+	a->size = rom_size;
+
+	return a->attr.mode;
+}
+
+static const struct attribute_group pci_dev_rom_attr_group = {
+	.bin_attrs = pci_dev_rom_attrs,
+	.is_bin_visible = pci_dev_rom_attr_is_visible,
+};
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+	ssize_t result;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val != 1)
+		return -EINVAL;
+
+	pm_runtime_get_sync(dev);
+	result = pci_reset_function(pdev);
+	pm_runtime_put(dev);
+	if (result < 0)
+		return result;
+
+	return count;
+}
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *pci_dev_reset_attrs[] = {
+	&dev_attr_reset.attr,
+	NULL,
+};
+
+static umode_t pci_dev_reset_attr_is_visible(struct kobject *kobj,
+					     struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pci_reset_supported(pdev))
+		return 0;
+
+	return a->mode;
+}
+
+static const struct attribute_group pci_dev_reset_attr_group = {
+	.attrs = pci_dev_reset_attrs,
+	.is_visible = pci_dev_reset_attr_is_visible,
+};
+
+#define pci_dev_resource_resize_attr(n)					\
+static ssize_t resource##n##_resize_show(struct device *dev,		\
+					 struct device_attribute *attr,	\
+					 char * buf)			\
+{									\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	ssize_t ret;							\
+									\
+	pci_config_pm_runtime_get(pdev);				\
+									\
+	ret = sysfs_emit(buf, "%016llx\n",				\
+			 (u64)pci_rebar_get_possible_sizes(pdev, n));	\
+									\
+	pci_config_pm_runtime_put(pdev);				\
+									\
+	return ret;							\
+}									\
+									\
+static ssize_t resource##n##_resize_store(struct device *dev,		\
+					  struct device_attribute *attr,\
+					  const char *buf, size_t count)\
+{									\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	unsigned long size, flags;					\
+	int ret, i;							\
+	u16 cmd;							\
+									\
+	if (kstrtoul(buf, 0, &size) < 0)				\
+		return -EINVAL;						\
+									\
+	device_lock(dev);						\
+	if (dev->driver) {						\
+		ret = -EBUSY;						\
+		goto unlock;						\
+	}								\
+									\
+	pci_config_pm_runtime_get(pdev);				\
+									\
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {		\
+		ret = aperture_remove_conflicting_pci_devices(pdev,	\
+						"resourceN_resize");	\
+		if (ret)						\
+			goto pm_put;					\
+	}								\
+									\
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);			\
+	pci_write_config_word(pdev, PCI_COMMAND,			\
+			      cmd & ~PCI_COMMAND_MEMORY);		\
+									\
+	flags = pci_resource_flags(pdev, n);				\
+									\
+	pci_remove_resource_files(pdev);				\
+									\
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {			\
+		if (pci_resource_len(pdev, i) &&			\
+		    pci_resource_flags(pdev, i) == flags)		\
+			pci_release_resource(pdev, i);			\
+	}								\
+									\
+	ret = pci_resize_resource(pdev, n, size);			\
+									\
+	pci_assign_unassigned_bus_resources(pdev->bus);			\
+									\
+	if (pci_create_resource_files(pdev))				\
+		pci_warn(pdev, "Failed to recreate resource files after BAR resizing\n");\
+									\
+	pci_write_config_word(pdev, PCI_COMMAND, cmd);			\
+pm_put:									\
+	pci_config_pm_runtime_put(pdev);				\
+unlock:									\
+	device_unlock(dev);						\
+									\
+	return ret ? ret : count;					\
+}									\
+static DEVICE_ATTR_RW(resource##n##_resize)
+
+pci_dev_resource_resize_attr(0);
+pci_dev_resource_resize_attr(1);
+pci_dev_resource_resize_attr(2);
+pci_dev_resource_resize_attr(3);
+pci_dev_resource_resize_attr(4);
+pci_dev_resource_resize_attr(5);
+
+static struct attribute *resource_resize_attrs[] = {
+	&dev_attr_resource0_resize.attr,
+	&dev_attr_resource1_resize.attr,
+	&dev_attr_resource2_resize.attr,
+	&dev_attr_resource3_resize.attr,
+	&dev_attr_resource4_resize.attr,
+	&dev_attr_resource5_resize.attr,
+	NULL,
+};
+
+static umode_t resource_resize_is_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	return pci_rebar_get_current_size(pdev, n) < 0 ? 0 : a->mode;
+}
+
+static const struct attribute_group pci_dev_resource_resize_group = {
+	.attrs = resource_resize_attrs,
+	.is_visible = resource_resize_is_visible,
+};
+
+int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
+{
+	if (!sysfs_initialized)
+		return -EACCES;
+
+	return pci_create_resource_files(pdev);
+}
+
+/**
+ * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
+ * @pdev: device whose entries we should free
+ *
+ * Cleanup when @pdev is removed from sysfs.
+ */
+void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
+{
+	if (!sysfs_initialized)
+		return;
+
+	pci_remove_resource_files(pdev);
+}
+
+static int __init pci_sysfs_init(void)
+{
+	struct pci_dev *pdev = NULL;
+	struct pci_bus *pbus = NULL;
+	int retval;
+
+	sysfs_initialized = 1;
+	for_each_pci_dev(pdev) {
+		retval = pci_create_sysfs_dev_files(pdev);
+		if (retval) {
+			pci_dev_put(pdev);
+			return retval;
+		}
+	}
+
+	while ((pbus = pci_find_next_bus(pbus)))
+		pci_create_legacy_files(pbus);
+
+	return 0;
+}
+late_initcall(pci_sysfs_init);
+
+static struct attribute *pci_dev_dev_attrs[] = {
+	&dev_attr_boot_vga.attr,
+	NULL,
+};
+
+static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
+					 struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (a == &dev_attr_boot_vga.attr)
+		if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+			return 0;
+
+	return a->mode;
+}
+
+static struct attribute *pci_dev_hp_attrs[] = {
+	&dev_attr_remove.attr,
+	&dev_attr_dev_rescan.attr,
+	NULL,
+};
+
+static umode_t pci_dev_hp_attrs_are_visible(struct kobject *kobj,
+					    struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (pdev->is_virtfn)
+		return 0;
+
+	return a->mode;
+}
+
+static umode_t pci_bridge_attrs_are_visible(struct kobject *kobj,
+					    struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (pci_is_bridge(pdev))
+		return a->mode;
+
+	return 0;
+}
+
+static umode_t pcie_dev_attrs_are_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (pci_is_pcie(pdev))
+		return a->mode;
+
+	return 0;
+}
+
+static const struct attribute_group pci_dev_group = {
+	.attrs = pci_dev_attrs,
+};
+
+const struct attribute_group *pci_dev_groups[] = {
+	&pci_dev_group,
+	&pci_dev_config_attr_group,
+	&pci_dev_rom_attr_group,
+	&pci_dev_reset_attr_group,
+	&pci_dev_reset_method_attr_group,
+	&pci_dev_vpd_attr_group,
+#ifdef CONFIG_DMI
+	&pci_dev_smbios_attr_group,
+#endif
+#ifdef CONFIG_ACPI
+	&pci_dev_acpi_attr_group,
+#endif
+	&pci_dev_resource_resize_group,
+	NULL,
+};
+
+static const struct attribute_group pci_dev_hp_attr_group = {
+	.attrs = pci_dev_hp_attrs,
+	.is_visible = pci_dev_hp_attrs_are_visible,
+};
+
+static const struct attribute_group pci_dev_attr_group = {
+	.attrs = pci_dev_dev_attrs,
+	.is_visible = pci_dev_attrs_are_visible,
+};
+
+static const struct attribute_group pci_bridge_attr_group = {
+	.attrs = pci_bridge_attrs,
+	.is_visible = pci_bridge_attrs_are_visible,
+};
+
+static const struct attribute_group pcie_dev_attr_group = {
+	.attrs = pcie_dev_attrs,
+	.is_visible = pcie_dev_attrs_are_visible,
+};
+
+static const struct attribute_group *pci_dev_attr_groups[] = {
+	&pci_dev_attr_group,
+	&pci_dev_hp_attr_group,
+#ifdef CONFIG_PCI_IOV
+	&sriov_pf_dev_attr_group,
+	&sriov_vf_dev_attr_group,
+#endif
+	&pci_bridge_attr_group,
+	&pcie_dev_attr_group,
+#ifdef CONFIG_PCIEAER
+	&aer_stats_attr_group,
+#endif
+#ifdef CONFIG_PCIEASPM
+	&aspm_ctrl_attr_group,
+#endif
+	NULL,
+};
+
+const struct device_type pci_dev_type = {
+	.groups = pci_dev_attr_groups,
+};
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
new file mode 100644
index 000000000..59d6cb1a3
--- /dev/null
+++ b/drivers/pci/pci.c
@@ -0,0 +1,7059 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Bus Services, see include/linux/pci.h for further explanation.
+ *
+ * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
+ * David Mosberger-Tang
+ *
+ * Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/log2.h>
+#include <linux/logic_pio.h>
+#include <linux/pm_wakeup.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+#include <linux/pci_hotplug.h>
+#include <linux/vmalloc.h>
+#include <asm/dma.h>
+#include <linux/aer.h>
+#include <linux/bitfield.h>
+#include "pci.h"
+
+DEFINE_MUTEX(pci_slot_mutex);
+
+const char *pci_power_names[] = {
+	"error", "D0", "D1", "D2", "D3hot", "D3cold", "unknown",
+};
+EXPORT_SYMBOL_GPL(pci_power_names);
+
+#ifdef CONFIG_X86_32
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+#endif
+
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+
+unsigned int pci_pm_d3hot_delay;
+
+static void pci_pme_list_scan(struct work_struct *work);
+
+static LIST_HEAD(pci_pme_list);
+static DEFINE_MUTEX(pci_pme_list_mutex);
+static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan);
+
+struct pci_pme_device {
+	struct list_head list;
+	struct pci_dev *dev;
+};
+
+#define PME_TIMEOUT 1000 /* How long between PME checks */
+
+/*
+ * Following exit from Conventional Reset, devices must be ready within 1 sec
+ * (PCIe r6.0 sec 6.6.1).  A D3cold to D0 transition implies a Conventional
+ * Reset (PCIe r6.0 sec 5.8).
+ */
+#define PCI_RESET_WAIT 1000 /* msec */
+
+/*
+ * Devices may extend the 1 sec period through Request Retry Status
+ * completions (PCIe r6.0 sec 2.3.1).  The spec does not provide an upper
+ * limit, but 60 sec ought to be enough for any device to become
+ * responsive.
+ */
+#define PCIE_RESET_READY_POLL_MS 60000 /* msec */
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay_ms = max(dev->d3hot_delay, pci_pm_d3hot_delay);
+	unsigned int upper;
+
+	if (delay_ms) {
+		/* Use a 20% upper bound, 1ms minimum */
+		upper = max(DIV_ROUND_CLOSEST(delay_ms, 5), 1U);
+		usleep_range(delay_ms * USEC_PER_MSEC,
+			     (delay_ms + upper) * USEC_PER_MSEC);
+	}
+}
+
+bool pci_reset_supported(struct pci_dev *dev)
+{
+	return dev->reset_methods[0] != 0;
+}
+
+#ifdef CONFIG_PCI_DOMAINS
+int pci_domains_supported = 1;
+#endif
+
+#define DEFAULT_CARDBUS_IO_SIZE		(256)
+#define DEFAULT_CARDBUS_MEM_SIZE	(64*1024*1024)
+/* pci=cbmemsize=nnM,cbiosize=nn can override this */
+unsigned long pci_cardbus_io_size = DEFAULT_CARDBUS_IO_SIZE;
+unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
+
+#define DEFAULT_HOTPLUG_IO_SIZE		(256)
+#define DEFAULT_HOTPLUG_MMIO_SIZE	(2*1024*1024)
+#define DEFAULT_HOTPLUG_MMIO_PREF_SIZE	(2*1024*1024)
+/* hpiosize=nn can override this */
+unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
+/*
+ * pci=hpmmiosize=nnM overrides non-prefetchable MMIO size,
+ * pci=hpmmioprefsize=nnM overrides prefetchable MMIO size;
+ * pci=hpmemsize=nnM overrides both
+ */
+unsigned long pci_hotplug_mmio_size = DEFAULT_HOTPLUG_MMIO_SIZE;
+unsigned long pci_hotplug_mmio_pref_size = DEFAULT_HOTPLUG_MMIO_PREF_SIZE;
+
+#define DEFAULT_HOTPLUG_BUS_SIZE	1
+unsigned long pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE;
+
+
+/* PCIe MPS/MRRS strategy; can be overridden by kernel command-line param */
+#ifdef CONFIG_PCIE_BUS_TUNE_OFF
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
+#elif defined CONFIG_PCIE_BUS_SAFE
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
+#elif defined CONFIG_PCIE_BUS_PERFORMANCE
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
+#elif defined CONFIG_PCIE_BUS_PEER2PEER
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PEER2PEER;
+#else
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_DEFAULT;
+#endif
+
+/*
+ * The default CLS is used if arch didn't set CLS explicitly and not
+ * all pci devices agree on the same value.  Arch can override either
+ * the dfl or actual value as it sees fit.  Don't forget this is
+ * measured in 32-bit words, not bytes.
+ */
+u8 pci_dfl_cache_line_size = L1_CACHE_BYTES >> 2;
+u8 pci_cache_line_size;
+
+/*
+ * If we set up a device for bus mastering, we need to check the latency
+ * timer as certain BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+/* If set, the PCIe ARI capability will not be used. */
+static bool pcie_ari_disabled;
+
+/* If set, the PCIe ATS capability will not be used. */
+static bool pcie_ats_disabled;
+
+/* If set, the PCI config space of each device is printed during boot. */
+bool pci_early_dump;
+
+bool pci_ats_disabled(void)
+{
+	return pcie_ats_disabled;
+}
+EXPORT_SYMBOL_GPL(pci_ats_disabled);
+
+/* Disable bridge_d3 for all PCIe ports */
+static bool pci_bridge_d3_disable;
+/* Force bridge_d3 for all PCIe ports */
+static bool pci_bridge_d3_force;
+
+static int __init pcie_port_pm_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		pci_bridge_d3_disable = true;
+	else if (!strcmp(str, "force"))
+		pci_bridge_d3_force = true;
+	return 1;
+}
+__setup("pcie_port_pm=", pcie_port_pm_setup);
+
+/**
+ * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+ * @bus: pointer to PCI bus structure to search
+ *
+ * Given a PCI bus, returns the highest PCI bus number present in the set
+ * including the given PCI bus and its list of child PCI buses.
+ */
+unsigned char pci_bus_max_busnr(struct pci_bus *bus)
+{
+	struct pci_bus *tmp;
+	unsigned char max, n;
+
+	max = bus->busn_res.end;
+	list_for_each_entry(tmp, &bus->children, node) {
+		n = pci_bus_max_busnr(tmp);
+		if (n > max)
+			max = n;
+	}
+	return max;
+}
+EXPORT_SYMBOL_GPL(pci_bus_max_busnr);
+
+/**
+ * pci_status_get_and_clear_errors - return and clear error bits in PCI_STATUS
+ * @pdev: the PCI device
+ *
+ * Returns error bits set in PCI_STATUS and clears them.
+ */
+int pci_status_get_and_clear_errors(struct pci_dev *pdev)
+{
+	u16 status;
+	int ret;
+
+	ret = pci_read_config_word(pdev, PCI_STATUS, &status);
+	if (ret != PCIBIOS_SUCCESSFUL)
+		return -EIO;
+
+	status &= PCI_STATUS_ERROR_BITS;
+	if (status)
+		pci_write_config_word(pdev, PCI_STATUS, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(pci_status_get_and_clear_errors);
+
+#ifdef CONFIG_HAS_IOMEM
+static void __iomem *__pci_ioremap_resource(struct pci_dev *pdev, int bar,
+					    bool write_combine)
+{
+	struct resource *res = &pdev->resource[bar];
+	resource_size_t start = res->start;
+	resource_size_t size = resource_size(res);
+
+	/*
+	 * Make sure the BAR is actually a memory resource, not an IO resource
+	 */
+	if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) {
+		pci_err(pdev, "can't ioremap BAR %d: %pR\n", bar, res);
+		return NULL;
+	}
+
+	if (write_combine)
+		return ioremap_wc(start, size);
+
+	return ioremap(start, size);
+}
+
+void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	return __pci_ioremap_resource(pdev, bar, false);
+}
+EXPORT_SYMBOL_GPL(pci_ioremap_bar);
+
+void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar)
+{
+	return __pci_ioremap_resource(pdev, bar, true);
+}
+EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar);
+#endif
+
+/**
+ * pci_dev_str_match_path - test if a path string matches a device
+ * @dev: the PCI device to test
+ * @path: string to match the device against
+ * @endptr: pointer to the string after the match
+ *
+ * Test if a string (typically from a kernel parameter) formatted as a
+ * path of device/function addresses matches a PCI device. The string must
+ * be of the form:
+ *
+ *   [<domain>:]<bus>:<device>.<func>[/<device>.<func>]*
+ *
+ * A path for a device can be obtained using 'lspci -t'.  Using a path
+ * is more robust against bus renumbering than using only a single bus,
+ * device and function address.
+ *
+ * Returns 1 if the string matches the device, 0 if it does not and
+ * a negative error code if it fails to parse the string.
+ */
+static int pci_dev_str_match_path(struct pci_dev *dev, const char *path,
+				  const char **endptr)
+{
+	int ret;
+	unsigned int seg, bus, slot, func;
+	char *wpath, *p;
+	char end;
+
+	*endptr = strchrnul(path, ';');
+
+	wpath = kmemdup_nul(path, *endptr - path, GFP_ATOMIC);
+	if (!wpath)
+		return -ENOMEM;
+
+	while (1) {
+		p = strrchr(wpath, '/');
+		if (!p)
+			break;
+		ret = sscanf(p, "/%x.%x%c", &slot, &func, &end);
+		if (ret != 2) {
+			ret = -EINVAL;
+			goto free_and_exit;
+		}
+
+		if (dev->devfn != PCI_DEVFN(slot, func)) {
+			ret = 0;
+			goto free_and_exit;
+		}
+
+		/*
+		 * Note: we don't need to get a reference to the upstream
+		 * bridge because we hold a reference to the top level
+		 * device which should hold a reference to the bridge,
+		 * and so on.
+		 */
+		dev = pci_upstream_bridge(dev);
+		if (!dev) {
+			ret = 0;
+			goto free_and_exit;
+		}
+
+		*p = 0;
+	}
+
+	ret = sscanf(wpath, "%x:%x:%x.%x%c", &seg, &bus, &slot,
+		     &func, &end);
+	if (ret != 4) {
+		seg = 0;
+		ret = sscanf(wpath, "%x:%x.%x%c", &bus, &slot, &func, &end);
+		if (ret != 3) {
+			ret = -EINVAL;
+			goto free_and_exit;
+		}
+	}
+
+	ret = (seg == pci_domain_nr(dev->bus) &&
+	       bus == dev->bus->number &&
+	       dev->devfn == PCI_DEVFN(slot, func));
+
+free_and_exit:
+	kfree(wpath);
+	return ret;
+}
+
+/**
+ * pci_dev_str_match - test if a string matches a device
+ * @dev: the PCI device to test
+ * @p: string to match the device against
+ * @endptr: pointer to the string after the match
+ *
+ * Test if a string (typically from a kernel parameter) matches a specified
+ * PCI device. The string may be of one of the following formats:
+ *
+ *   [<domain>:]<bus>:<device>.<func>[/<device>.<func>]*
+ *   pci:<vendor>:<device>[:<subvendor>:<subdevice>]
+ *
+ * The first format specifies a PCI bus/device/function address which
+ * may change if new hardware is inserted, if motherboard firmware changes,
+ * or due to changes caused in kernel parameters. If the domain is
+ * left unspecified, it is taken to be 0.  In order to be robust against
+ * bus renumbering issues, a path of PCI device/function numbers may be used
+ * to address the specific device.  The path for a device can be determined
+ * through the use of 'lspci -t'.
+ *
+ * The second format matches devices using IDs in the configuration
+ * space which may match multiple devices in the system. A value of 0
+ * for any field will match all devices. (Note: this differs from
+ * in-kernel code that uses PCI_ANY_ID which is ~0; this is for
+ * legacy reasons and convenience so users don't have to specify
+ * FFFFFFFFs on the command line.)
+ *
+ * Returns 1 if the string matches the device, 0 if it does not and
+ * a negative error code if the string cannot be parsed.
+ */
+static int pci_dev_str_match(struct pci_dev *dev, const char *p,
+			     const char **endptr)
+{
+	int ret;
+	int count;
+	unsigned short vendor, device, subsystem_vendor, subsystem_device;
+
+	if (strncmp(p, "pci:", 4) == 0) {
+		/* PCI vendor/device (subvendor/subdevice) IDs are specified */
+		p += 4;
+		ret = sscanf(p, "%hx:%hx:%hx:%hx%n", &vendor, &device,
+			     &subsystem_vendor, &subsystem_device, &count);
+		if (ret != 4) {
+			ret = sscanf(p, "%hx:%hx%n", &vendor, &device, &count);
+			if (ret != 2)
+				return -EINVAL;
+
+			subsystem_vendor = 0;
+			subsystem_device = 0;
+		}
+
+		p += count;
+
+		if ((!vendor || vendor == dev->vendor) &&
+		    (!device || device == dev->device) &&
+		    (!subsystem_vendor ||
+			    subsystem_vendor == dev->subsystem_vendor) &&
+		    (!subsystem_device ||
+			    subsystem_device == dev->subsystem_device))
+			goto found;
+	} else {
+		/*
+		 * PCI Bus, Device, Function IDs are specified
+		 * (optionally, may include a path of devfns following it)
+		 */
+		ret = pci_dev_str_match_path(dev, p, &p);
+		if (ret < 0)
+			return ret;
+		else if (ret)
+			goto found;
+	}
+
+	*endptr = p;
+	return 0;
+
+found:
+	*endptr = p;
+	return 1;
+}
+
+static u8 __pci_find_next_cap_ttl(struct pci_bus *bus, unsigned int devfn,
+				  u8 pos, int cap, int *ttl)
+{
+	u8 id;
+	u16 ent;
+
+	pci_bus_read_config_byte(bus, devfn, pos, &pos);
+
+	while ((*ttl)--) {
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		pci_bus_read_config_word(bus, devfn, pos, &ent);
+
+		id = ent & 0xff;
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos = (ent >> 8);
+	}
+	return 0;
+}
+
+static u8 __pci_find_next_cap(struct pci_bus *bus, unsigned int devfn,
+			      u8 pos, int cap)
+{
+	int ttl = PCI_FIND_CAP_TTL;
+
+	return __pci_find_next_cap_ttl(bus, devfn, pos, cap, &ttl);
+}
+
+u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap)
+{
+	return __pci_find_next_cap(dev->bus, dev->devfn,
+				   pos + PCI_CAP_LIST_NEXT, cap);
+}
+EXPORT_SYMBOL_GPL(pci_find_next_capability);
+
+static u8 __pci_bus_find_cap_start(struct pci_bus *bus,
+				    unsigned int devfn, u8 hdr_type)
+{
+	u16 status;
+
+	pci_bus_read_config_word(bus, devfn, PCI_STATUS, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	switch (hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+	case PCI_HEADER_TYPE_BRIDGE:
+		return PCI_CAPABILITY_LIST;
+	case PCI_HEADER_TYPE_CARDBUS:
+		return PCI_CB_CAPABILITY_LIST;
+	}
+
+	return 0;
+}
+
+/**
+ * pci_find_capability - query for devices' capabilities
+ * @dev: PCI device to query
+ * @cap: capability code
+ *
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.  Possible values for @cap include:
+ *
+ *  %PCI_CAP_ID_PM           Power Management
+ *  %PCI_CAP_ID_AGP          Accelerated Graphics Port
+ *  %PCI_CAP_ID_VPD          Vital Product Data
+ *  %PCI_CAP_ID_SLOTID       Slot Identification
+ *  %PCI_CAP_ID_MSI          Message Signalled Interrupts
+ *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap
+ *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI Express
+ */
+u8 pci_find_capability(struct pci_dev *dev, int cap)
+{
+	u8 pos;
+
+	pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type);
+	if (pos)
+		pos = __pci_find_next_cap(dev->bus, dev->devfn, pos, cap);
+
+	return pos;
+}
+EXPORT_SYMBOL(pci_find_capability);
+
+/**
+ * pci_bus_find_capability - query for devices' capabilities
+ * @bus: the PCI bus to query
+ * @devfn: PCI device to query
+ * @cap: capability code
+ *
+ * Like pci_find_capability() but works for PCI devices that do not have a
+ * pci_dev structure set up yet.
+ *
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.
+ */
+u8 pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
+{
+	u8 hdr_type, pos;
+
+	pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type);
+
+	pos = __pci_bus_find_cap_start(bus, devfn, hdr_type & 0x7f);
+	if (pos)
+		pos = __pci_find_next_cap(bus, devfn, pos, cap);
+
+	return pos;
+}
+EXPORT_SYMBOL(pci_bus_find_capability);
+
+/**
+ * pci_find_next_ext_capability - Find an extended capability
+ * @dev: PCI device to query
+ * @start: address at which to start looking (0 to start at beginning of list)
+ * @cap: capability code
+ *
+ * Returns the address of the next matching extended capability structure
+ * within the device's PCI configuration space or 0 if the device does
+ * not support it.  Some capabilities can occur several times, e.g., the
+ * vendor-specific capability, and this provides a way to find them all.
+ */
+u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 start, int cap)
+{
+	u32 header;
+	int ttl;
+	u16 pos = PCI_CFG_SPACE_SIZE;
+
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
+		return 0;
+
+	if (start)
+		pos = start;
+
+	if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+
+	/*
+	 * If we have no capabilities, this is indicated by cap ID,
+	 * cap version and next pointer all being 0.
+	 */
+	if (header == 0)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos != start)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < PCI_CFG_SPACE_SIZE)
+			break;
+
+		if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_next_ext_capability);
+
+/**
+ * pci_find_ext_capability - Find an extended capability
+ * @dev: PCI device to query
+ * @cap: capability code
+ *
+ * Returns the address of the requested extended capability structure
+ * within the device's PCI configuration space or 0 if the device does
+ * not support it.  Possible values for @cap include:
+ *
+ *  %PCI_EXT_CAP_ID_ERR		Advanced Error Reporting
+ *  %PCI_EXT_CAP_ID_VC		Virtual Channel
+ *  %PCI_EXT_CAP_ID_DSN		Device Serial Number
+ *  %PCI_EXT_CAP_ID_PWR		Power Budgeting
+ */
+u16 pci_find_ext_capability(struct pci_dev *dev, int cap)
+{
+	return pci_find_next_ext_capability(dev, 0, cap);
+}
+EXPORT_SYMBOL_GPL(pci_find_ext_capability);
+
+/**
+ * pci_get_dsn - Read and return the 8-byte Device Serial Number
+ * @dev: PCI device to query
+ *
+ * Looks up the PCI_EXT_CAP_ID_DSN and reads the 8 bytes of the Device Serial
+ * Number.
+ *
+ * Returns the DSN, or zero if the capability does not exist.
+ */
+u64 pci_get_dsn(struct pci_dev *dev)
+{
+	u32 dword;
+	u64 dsn;
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DSN);
+	if (!pos)
+		return 0;
+
+	/*
+	 * The Device Serial Number is two dwords offset 4 bytes from the
+	 * capability position. The specification says that the first dword is
+	 * the lower half, and the second dword is the upper half.
+	 */
+	pos += 4;
+	pci_read_config_dword(dev, pos, &dword);
+	dsn = (u64)dword;
+	pci_read_config_dword(dev, pos + 4, &dword);
+	dsn |= ((u64)dword) << 32;
+
+	return dsn;
+}
+EXPORT_SYMBOL_GPL(pci_get_dsn);
+
+static u8 __pci_find_next_ht_cap(struct pci_dev *dev, u8 pos, int ht_cap)
+{
+	int rc, ttl = PCI_FIND_CAP_TTL;
+	u8 cap, mask;
+
+	if (ht_cap == HT_CAPTYPE_SLAVE || ht_cap == HT_CAPTYPE_HOST)
+		mask = HT_3BIT_CAP_MASK;
+	else
+		mask = HT_5BIT_CAP_MASK;
+
+	pos = __pci_find_next_cap_ttl(dev->bus, dev->devfn, pos,
+				      PCI_CAP_ID_HT, &ttl);
+	while (pos) {
+		rc = pci_read_config_byte(dev, pos + 3, &cap);
+		if (rc != PCIBIOS_SUCCESSFUL)
+			return 0;
+
+		if ((cap & mask) == ht_cap)
+			return pos;
+
+		pos = __pci_find_next_cap_ttl(dev->bus, dev->devfn,
+					      pos + PCI_CAP_LIST_NEXT,
+					      PCI_CAP_ID_HT, &ttl);
+	}
+
+	return 0;
+}
+
+/**
+ * pci_find_next_ht_capability - query a device's HyperTransport capabilities
+ * @dev: PCI device to query
+ * @pos: Position from which to continue searching
+ * @ht_cap: HyperTransport capability code
+ *
+ * To be used in conjunction with pci_find_ht_capability() to search for
+ * all capabilities matching @ht_cap. @pos should always be a value returned
+ * from pci_find_ht_capability().
+ *
+ * NB. To be 100% safe against broken PCI devices, the caller should take
+ * steps to avoid an infinite loop.
+ */
+u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap)
+{
+	return __pci_find_next_ht_cap(dev, pos + PCI_CAP_LIST_NEXT, ht_cap);
+}
+EXPORT_SYMBOL_GPL(pci_find_next_ht_capability);
+
+/**
+ * pci_find_ht_capability - query a device's HyperTransport capabilities
+ * @dev: PCI device to query
+ * @ht_cap: HyperTransport capability code
+ *
+ * Tell if a device supports a given HyperTransport capability.
+ * Returns an address within the device's PCI configuration space
+ * or 0 in case the device does not support the request capability.
+ * The address points to the PCI capability, of type PCI_CAP_ID_HT,
+ * which has a HyperTransport capability matching @ht_cap.
+ */
+u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap)
+{
+	u8 pos;
+
+	pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type);
+	if (pos)
+		pos = __pci_find_next_ht_cap(dev, pos, ht_cap);
+
+	return pos;
+}
+EXPORT_SYMBOL_GPL(pci_find_ht_capability);
+
+/**
+ * pci_find_vsec_capability - Find a vendor-specific extended capability
+ * @dev: PCI device to query
+ * @vendor: Vendor ID for which capability is defined
+ * @cap: Vendor-specific capability ID
+ *
+ * If @dev has Vendor ID @vendor, search for a VSEC capability with
+ * VSEC ID @cap. If found, return the capability offset in
+ * config space; otherwise return 0.
+ */
+u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
+{
+	u16 vsec = 0;
+	u32 header;
+	int ret;
+
+	if (vendor != dev->vendor)
+		return 0;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						     PCI_EXT_CAP_ID_VNDR))) {
+		ret = pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER, &header);
+		if (ret != PCIBIOS_SUCCESSFUL)
+			continue;
+
+		if (PCI_VNDR_HEADER_ID(header) == cap)
+			return vsec;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
+
+/**
+ * pci_find_dvsec_capability - Find DVSEC for vendor
+ * @dev: PCI device to query
+ * @vendor: Vendor ID to match for the DVSEC
+ * @dvsec: Designated Vendor-specific capability ID
+ *
+ * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
+ * offset in config space; otherwise return 0.
+ */
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
+{
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
+	if (!pos)
+		return 0;
+
+	while (pos) {
+		u16 v, id;
+
+		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
+		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
+		if (vendor == v && dvsec == id)
+			return pos;
+
+		pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
+
+/**
+ * pci_find_parent_resource - return resource region of parent bus of given
+ *			      region
+ * @dev: PCI device structure contains resources to be searched
+ * @res: child resource record for which parent is sought
+ *
+ * For given resource region of given device, return the resource region of
+ * parent bus the given region is contained in.
+ */
+struct resource *pci_find_parent_resource(const struct pci_dev *dev,
+					  struct resource *res)
+{
+	const struct pci_bus *bus = dev->bus;
+	struct resource *r;
+
+	pci_bus_for_each_resource(bus, r) {
+		if (!r)
+			continue;
+		if (resource_contains(r, res)) {
+
+			/*
+			 * If the window is prefetchable but the BAR is
+			 * not, the allocator made a mistake.
+			 */
+			if (r->flags & IORESOURCE_PREFETCH &&
+			    !(res->flags & IORESOURCE_PREFETCH))
+				return NULL;
+
+			/*
+			 * If we're below a transparent bridge, there may
+			 * be both a positively-decoded aperture and a
+			 * subtractively-decoded region that contain the BAR.
+			 * We want the positively-decoded one, so this depends
+			 * on pci_bus_for_each_resource() giving us those
+			 * first.
+			 */
+			return r;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(pci_find_parent_resource);
+
+/**
+ * pci_find_resource - Return matching PCI device resource
+ * @dev: PCI device to query
+ * @res: Resource to look for
+ *
+ * Goes over standard PCI resources (BARs) and checks if the given resource
+ * is partially or fully contained in any of them. In that case the
+ * matching resource is returned, %NULL otherwise.
+ */
+struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (r->start && resource_contains(r, res))
+			return r;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_find_resource);
+
+/**
+ * pci_wait_for_pending - wait for @mask bit(s) to clear in status word @pos
+ * @dev: the PCI device to operate on
+ * @pos: config space offset of status word
+ * @mask: mask of bit(s) to care about in status word
+ *
+ * Return 1 when mask bit(s) in status word clear, 0 otherwise.
+ */
+int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask)
+{
+	int i;
+
+	/* Wait for Transaction Pending bit clean */
+	for (i = 0; i < 4; i++) {
+		u16 status;
+		if (i)
+			msleep((1 << (i - 1)) * 100);
+
+		pci_read_config_word(dev, pos, &status);
+		if (!(status & mask))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int pci_acs_enable;
+
+/**
+ * pci_request_acs - ask for ACS to be enabled if supported
+ */
+void pci_request_acs(void)
+{
+	pci_acs_enable = 1;
+}
+
+static const char *disable_acs_redir_param;
+
+/**
+ * pci_disable_acs_redir - disable ACS redirect capabilities
+ * @dev: the PCI device
+ *
+ * For only devices specified in the disable_acs_redir parameter.
+ */
+static void pci_disable_acs_redir(struct pci_dev *dev)
+{
+	int ret = 0;
+	const char *p;
+	int pos;
+	u16 ctrl;
+
+	if (!disable_acs_redir_param)
+		return;
+
+	p = disable_acs_redir_param;
+	while (*p) {
+		ret = pci_dev_str_match(dev, p, &p);
+		if (ret < 0) {
+			pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n",
+				     disable_acs_redir_param);
+
+			break;
+		} else if (ret == 1) {
+			/* Found a match */
+			break;
+		}
+
+		if (*p != ';' && *p != ',') {
+			/* End of param or invalid format */
+			break;
+		}
+		p++;
+	}
+
+	if (ret != 1)
+		return;
+
+	if (!pci_dev_specific_disable_acs_redir(dev))
+		return;
+
+	pos = dev->acs_cap;
+	if (!pos) {
+		pci_warn(dev, "cannot disable ACS redirect for this hardware as it does not have ACS capabilities\n");
+		return;
+	}
+
+	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
+
+	/* P2P Request & Completion Redirect */
+	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
+
+	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+
+	pci_info(dev, "disabled ACS redirect\n");
+}
+
+/**
+ * pci_std_enable_acs - enable ACS on devices using standard ACS capabilities
+ * @dev: the PCI device
+ */
+static void pci_std_enable_acs(struct pci_dev *dev)
+{
+	int pos;
+	u16 cap;
+	u16 ctrl;
+
+	pos = dev->acs_cap;
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
+
+	/* Source Validation */
+	ctrl |= (cap & PCI_ACS_SV);
+
+	/* P2P Request Redirect */
+	ctrl |= (cap & PCI_ACS_RR);
+
+	/* P2P Completion Redirect */
+	ctrl |= (cap & PCI_ACS_CR);
+
+	/* Upstream Forwarding */
+	ctrl |= (cap & PCI_ACS_UF);
+
+	/* Enable Translation Blocking for external devices and noats */
+	if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
+		ctrl |= (cap & PCI_ACS_TB);
+
+	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+}
+
+/**
+ * pci_enable_acs - enable ACS if hardware support it
+ * @dev: the PCI device
+ */
+static void pci_enable_acs(struct pci_dev *dev)
+{
+	if (!pci_acs_enable)
+		goto disable_acs_redir;
+
+	if (!pci_dev_specific_enable_acs(dev))
+		goto disable_acs_redir;
+
+	pci_std_enable_acs(dev);
+
+disable_acs_redir:
+	/*
+	 * Note: pci_disable_acs_redir() must be called even if ACS was not
+	 * enabled by the kernel because it may have been enabled by
+	 * platform firmware.  So if we are told to disable it, we should
+	 * always disable it after setting the kernel's default
+	 * preferences.
+	 */
+	pci_disable_acs_redir(dev);
+}
+
+/**
+ * pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
+ * @dev: PCI device to have its BARs restored
+ *
+ * Restore the BAR values for a given device, so as to make it
+ * accessible by its driver.
+ */
+static void pci_restore_bars(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++)
+		pci_update_resource(dev, i);
+}
+
+static inline bool platform_pci_power_manageable(struct pci_dev *dev)
+{
+	if (pci_use_mid_pm())
+		return true;
+
+	return acpi_pci_power_manageable(dev);
+}
+
+static inline int platform_pci_set_power_state(struct pci_dev *dev,
+					       pci_power_t t)
+{
+	if (pci_use_mid_pm())
+		return mid_pci_set_power_state(dev, t);
+
+	return acpi_pci_set_power_state(dev, t);
+}
+
+static inline pci_power_t platform_pci_get_power_state(struct pci_dev *dev)
+{
+	if (pci_use_mid_pm())
+		return mid_pci_get_power_state(dev);
+
+	return acpi_pci_get_power_state(dev);
+}
+
+static inline void platform_pci_refresh_power_state(struct pci_dev *dev)
+{
+	if (!pci_use_mid_pm())
+		acpi_pci_refresh_power_state(dev);
+}
+
+static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
+{
+	if (pci_use_mid_pm())
+		return PCI_POWER_ERROR;
+
+	return acpi_pci_choose_state(dev);
+}
+
+static inline int platform_pci_set_wakeup(struct pci_dev *dev, bool enable)
+{
+	if (pci_use_mid_pm())
+		return PCI_POWER_ERROR;
+
+	return acpi_pci_wakeup(dev, enable);
+}
+
+static inline bool platform_pci_need_resume(struct pci_dev *dev)
+{
+	if (pci_use_mid_pm())
+		return false;
+
+	return acpi_pci_need_resume(dev);
+}
+
+static inline bool platform_pci_bridge_d3(struct pci_dev *dev)
+{
+	if (pci_use_mid_pm())
+		return false;
+
+	return acpi_pci_bridge_d3(dev);
+}
+
+/**
+ * pci_update_current_state - Read power state of given device and cache it
+ * @dev: PCI device to handle.
+ * @state: State to cache in case the device doesn't have the PM capability
+ *
+ * The power state is read from the PMCSR register, which however is
+ * inaccessible in D3cold.  The platform firmware is therefore queried first
+ * to detect accessibility of the register.  In case the platform firmware
+ * reports an incorrect state or the device isn't power manageable by the
+ * platform at all, we try to detect D3cold by testing accessibility of the
+ * vendor ID in config space.
+ */
+void pci_update_current_state(struct pci_dev *dev, pci_power_t state)
+{
+	if (platform_pci_get_power_state(dev) == PCI_D3cold) {
+		dev->current_state = PCI_D3cold;
+	} else if (dev->pm_cap) {
+		u16 pmcsr;
+
+		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+		if (PCI_POSSIBLE_ERROR(pmcsr)) {
+			dev->current_state = PCI_D3cold;
+			return;
+		}
+		dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
+	} else {
+		dev->current_state = state;
+	}
+}
+
+/**
+ * pci_refresh_power_state - Refresh the given device's power state data
+ * @dev: Target PCI device.
+ *
+ * Ask the platform to refresh the devices power state information and invoke
+ * pci_update_current_state() to update its current PCI power state.
+ */
+void pci_refresh_power_state(struct pci_dev *dev)
+{
+	platform_pci_refresh_power_state(dev);
+	pci_update_current_state(dev, dev->current_state);
+}
+
+/**
+ * pci_platform_power_transition - Use platform to change device power state
+ * @dev: PCI device to handle.
+ * @state: State to put the device into.
+ */
+int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
+{
+	int error;
+
+	error = platform_pci_set_power_state(dev, state);
+	if (!error)
+		pci_update_current_state(dev, state);
+	else if (!dev->pm_cap) /* Fall back to PCI_D0 */
+		dev->current_state = PCI_D0;
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(pci_platform_power_transition);
+
+static int pci_resume_one(struct pci_dev *pci_dev, void *ign)
+{
+	pm_request_resume(&pci_dev->dev);
+	return 0;
+}
+
+/**
+ * pci_resume_bus - Walk given bus and runtime resume devices on it
+ * @bus: Top bus of the subtree to walk.
+ */
+void pci_resume_bus(struct pci_bus *bus)
+{
+	if (bus)
+		pci_walk_bus(bus, pci_resume_one, NULL);
+}
+
+static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+{
+	int delay = 1;
+	bool retrain = false;
+	struct pci_dev *bridge;
+
+	if (pci_is_pcie(dev)) {
+		bridge = pci_upstream_bridge(dev);
+		if (bridge)
+			retrain = true;
+	}
+
+	/*
+	 * After reset, the device should not silently discard config
+	 * requests, but it may still indicate that it needs more time by
+	 * responding to them with CRS completions.  The Root Port will
+	 * generally synthesize ~0 (PCI_ERROR_RESPONSE) data to complete
+	 * the read (except when CRS SV is enabled and the read was for the
+	 * Vendor ID; in that case it synthesizes 0x0001 data).
+	 *
+	 * Wait for the device to return a non-CRS completion.  Read the
+	 * Command register instead of Vendor ID so we don't have to
+	 * contend with the CRS SV value.
+	 */
+	for (;;) {
+		u32 id;
+
+		pci_read_config_dword(dev, PCI_COMMAND, &id);
+		if (!PCI_POSSIBLE_ERROR(id))
+			break;
+
+		if (delay > timeout) {
+			pci_warn(dev, "not ready %dms after %s; giving up\n",
+				 delay - 1, reset_type);
+			return -ENOTTY;
+		}
+
+		if (delay > PCI_RESET_WAIT) {
+			if (retrain) {
+				retrain = false;
+				if (pcie_failed_link_retrain(bridge)) {
+					delay = 1;
+					continue;
+				}
+			}
+			pci_info(dev, "not ready %dms after %s; waiting\n",
+				 delay - 1, reset_type);
+		}
+
+		msleep(delay);
+		delay *= 2;
+	}
+
+	if (delay > PCI_RESET_WAIT)
+		pci_info(dev, "ready %dms after %s\n", delay - 1,
+			 reset_type);
+
+	return 0;
+}
+
+/**
+ * pci_power_up - Put the given device into D0
+ * @dev: PCI device to power up
+ *
+ * On success, return 0 or 1, depending on whether or not it is necessary to
+ * restore the device's BARs subsequently (1 is returned in that case).
+ *
+ * On failure, return a negative error code.  Always return failure if @dev
+ * lacks a Power Management Capability, even if the platform was able to
+ * put the device in D0 via non-PCI means.
+ */
+int pci_power_up(struct pci_dev *dev)
+{
+	bool need_restore;
+	pci_power_t state;
+	u16 pmcsr;
+
+	platform_pci_set_power_state(dev, PCI_D0);
+
+	if (!dev->pm_cap) {
+		state = platform_pci_get_power_state(dev);
+		if (state == PCI_UNKNOWN)
+			dev->current_state = PCI_D0;
+		else
+			dev->current_state = state;
+
+		return -EIO;
+	}
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	if (PCI_POSSIBLE_ERROR(pmcsr)) {
+		pci_err(dev, "Unable to change power state from %s to D0, device inaccessible\n",
+			pci_power_name(dev->current_state));
+		dev->current_state = PCI_D3cold;
+		return -EIO;
+	}
+
+	state = pmcsr & PCI_PM_CTRL_STATE_MASK;
+
+	need_restore = (state == PCI_D3hot || dev->current_state >= PCI_D3hot) &&
+			!(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
+
+	if (state == PCI_D0)
+		goto end;
+
+	/*
+	 * Force the entire word to 0. This doesn't affect PME_Status, disables
+	 * PME_En, and sets PowerState to 0.
+	 */
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, 0);
+
+	/* Mandatory transition delays; see PCI PM 1.2. */
+	if (state == PCI_D3hot)
+		pci_dev_d3_sleep(dev);
+	else if (state == PCI_D2)
+		udelay(PCI_PM_D2_DELAY);
+
+end:
+	dev->current_state = PCI_D0;
+	if (need_restore)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * pci_set_full_power_state - Put a PCI device into D0 and update its state
+ * @dev: PCI device to power up
+ *
+ * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
+ * to confirm the state change, restore its BARs if they might be lost and
+ * reconfigure ASPM in accordance with the new power state.
+ *
+ * If pci_restore_state() is going to be called right after a power state change
+ * to D0, it is more efficient to use pci_power_up() directly instead of this
+ * function.
+ */
+static int pci_set_full_power_state(struct pci_dev *dev)
+{
+	u16 pmcsr;
+	int ret;
+
+	ret = pci_power_up(dev);
+	if (ret < 0) {
+		if (dev->current_state == PCI_D0)
+			return 0;
+
+		return ret;
+	}
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
+	if (dev->current_state != PCI_D0) {
+		pci_info_ratelimited(dev, "Refused to change power state from %s to D0\n",
+				     pci_power_name(dev->current_state));
+	} else if (ret > 0) {
+		/*
+		 * According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
+		 * INTERFACE SPECIFICATION, REV. 1.2", a device transitioning
+		 * from D3hot to D0 _may_ perform an internal reset, thereby
+		 * going to "D0 Uninitialized" rather than "D0 Initialized".
+		 * For example, at least some versions of the 3c905B and the
+		 * 3c556B exhibit this behaviour.
+		 *
+		 * At least some laptop BIOSen (e.g. the Thinkpad T21) leave
+		 * devices in a D3hot state at boot.  Consequently, we need to
+		 * restore at least the BARs so that the device will be
+		 * accessible to its driver.
+		 */
+		pci_restore_bars(dev);
+	}
+
+	if (dev->bus->self)
+		pcie_aspm_pm_state_change(dev->bus->self);
+
+	return 0;
+}
+
+/**
+ * __pci_dev_set_current_state - Set current state of a PCI device
+ * @dev: Device to handle
+ * @data: pointer to state to be set
+ */
+static int __pci_dev_set_current_state(struct pci_dev *dev, void *data)
+{
+	pci_power_t state = *(pci_power_t *)data;
+
+	dev->current_state = state;
+	return 0;
+}
+
+/**
+ * pci_bus_set_current_state - Walk given bus and set current state of devices
+ * @bus: Top bus of the subtree to walk.
+ * @state: state to be set
+ */
+void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
+{
+	if (bus)
+		pci_walk_bus(bus, __pci_dev_set_current_state, &state);
+}
+
+/**
+ * pci_set_low_power_state - Put a PCI device into a low-power state.
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D1, D2, D3hot) to put the device into.
+ *
+ * Use the device's PCI_PM_CTRL register to put it into a low-power state.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if device already is in the requested state.
+ * 0 if device's power state has been successfully changed.
+ */
+static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
+{
+	u16 pmcsr;
+
+	if (!dev->pm_cap)
+		return -EIO;
+
+	/*
+	 * Validate transition: We can enter D0 from any state, but if
+	 * we're already in a low-power state, we can only go deeper.  E.g.,
+	 * we can go from D1 to D3, but we can't go directly from D3 to D1;
+	 * we'd have to go from D3 to D0, then to D1.
+	 */
+	if (dev->current_state <= PCI_D3cold && dev->current_state > state) {
+		pci_dbg(dev, "Invalid power transition (from %s to %s)\n",
+			pci_power_name(dev->current_state),
+			pci_power_name(state));
+		return -EINVAL;
+	}
+
+	/* Check if this device supports the desired state */
+	if ((state == PCI_D1 && !dev->d1_support)
+	   || (state == PCI_D2 && !dev->d2_support))
+		return -EIO;
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	if (PCI_POSSIBLE_ERROR(pmcsr)) {
+		pci_err(dev, "Unable to change power state from %s to %s, device inaccessible\n",
+			pci_power_name(dev->current_state),
+			pci_power_name(state));
+		dev->current_state = PCI_D3cold;
+		return -EIO;
+	}
+
+	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+	pmcsr |= state;
+
+	/* Enter specified state */
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
+
+	/* Mandatory power management transition delays; see PCI PM 1.2. */
+	if (state == PCI_D3hot)
+		pci_dev_d3_sleep(dev);
+	else if (state == PCI_D2)
+		udelay(PCI_PM_D2_DELAY);
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
+	if (dev->current_state != state)
+		pci_info_ratelimited(dev, "Refused to change power state from %s to %s\n",
+				     pci_power_name(dev->current_state),
+				     pci_power_name(state));
+
+	if (dev->bus->self)
+		pcie_aspm_pm_state_change(dev->bus->self);
+
+	return 0;
+}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * Transition a device to a new power state, using the platform firmware and/or
+ * the device's PCI PM registers.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
+ * 0 if device already is in the requested state.
+ * 0 if the transition is to D3 but D3 is not supported.
+ * 0 if device's power state has been successfully changed.
+ */
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+	int error;
+
+	/* Bound the state we're entering */
+	if (state > PCI_D3cold)
+		state = PCI_D3cold;
+	else if (state < PCI_D0)
+		state = PCI_D0;
+	else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
+
+		/*
+		 * If the device or the parent bridge do not support PCI
+		 * PM, ignore the request if we're doing anything other
+		 * than putting it into D0 (which would only happen on
+		 * boot).
+		 */
+		return 0;
+
+	/* Check if we're already there */
+	if (dev->current_state == state)
+		return 0;
+
+	if (state == PCI_D0)
+		return pci_set_full_power_state(dev);
+
+	/*
+	 * This device is quirked not to be put into D3, so don't put it in
+	 * D3
+	 */
+	if (state >= PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
+		return 0;
+
+	if (state == PCI_D3cold) {
+		/*
+		 * To put the device in D3cold, put it into D3hot in the native
+		 * way, then put it into D3cold using platform ops.
+		 */
+		error = pci_set_low_power_state(dev, PCI_D3hot);
+
+		if (pci_platform_power_transition(dev, PCI_D3cold))
+			return error;
+
+		/* Powering off a bridge may power off the whole hierarchy */
+		if (dev->current_state == PCI_D3cold)
+			pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
+	} else {
+		error = pci_set_low_power_state(dev, state);
+
+		if (pci_platform_power_transition(dev, state))
+			return error;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_set_power_state);
+
+#define PCI_EXP_SAVE_REGS	7
+
+static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
+						       u16 cap, bool extended)
+{
+	struct pci_cap_saved_state *tmp;
+
+	hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) {
+		if (tmp->cap.cap_extended == extended && tmp->cap.cap_nr == cap)
+			return tmp;
+	}
+	return NULL;
+}
+
+struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap)
+{
+	return _pci_find_saved_cap(dev, cap, false);
+}
+
+struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, u16 cap)
+{
+	return _pci_find_saved_cap(dev, cap, true);
+}
+
+static int pci_save_pcie_state(struct pci_dev *dev)
+{
+	int i = 0;
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	if (!pci_is_pcie(dev))
+		return 0;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state) {
+		pci_err(dev, "buffer not found in %s\n", __func__);
+		return -ENOMEM;
+	}
+
+	cap = (u16 *)&save_state->cap.data[0];
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_SLTCTL, &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_RTCTL,  &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &cap[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_SLTCTL2, &cap[i++]);
+
+	return 0;
+}
+
+void pci_bridge_reconfigure_ltr(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCIEASPM
+	struct pci_dev *bridge;
+	u32 ctl;
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge && bridge->ltr_path) {
+		pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl);
+		if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) {
+			pci_dbg(bridge, "re-enabling LTR\n");
+			pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
+						 PCI_EXP_DEVCTL2_LTR_EN);
+		}
+	}
+#endif
+}
+
+static void pci_restore_pcie_state(struct pci_dev *dev)
+{
+	int i = 0;
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state)
+		return;
+
+	/*
+	 * Downstream ports reset the LTR enable bit when link goes down.
+	 * Check and re-configure the bit here before restoring device.
+	 * PCIe r5.0, sec 7.5.3.16.
+	 */
+	pci_bridge_reconfigure_ltr(dev);
+
+	cap = (u16 *)&save_state->cap.data[0];
+	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_SLTCTL, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_RTCTL, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_DEVCTL2, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, cap[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_SLTCTL2, cap[i++]);
+}
+
+static int pci_save_pcix_state(struct pci_dev *dev)
+{
+	int pos;
+	struct pci_cap_saved_state *save_state;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!pos)
+		return 0;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
+	if (!save_state) {
+		pci_err(dev, "buffer not found in %s\n", __func__);
+		return -ENOMEM;
+	}
+
+	pci_read_config_word(dev, pos + PCI_X_CMD,
+			     (u16 *)save_state->cap.data);
+
+	return 0;
+}
+
+static void pci_restore_pcix_state(struct pci_dev *dev)
+{
+	int i = 0, pos;
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
+	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!save_state || !pos)
+		return;
+	cap = (u16 *)&save_state->cap.data[0];
+
+	pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
+}
+
+static void pci_save_ltr_state(struct pci_dev *dev)
+{
+	int ltr;
+	struct pci_cap_saved_state *save_state;
+	u32 *cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
+	if (!ltr)
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR);
+	if (!save_state) {
+		pci_err(dev, "no suspend buffer for LTR; ASPM issues possible after resume\n");
+		return;
+	}
+
+	/* Some broken devices only support dword access to LTR */
+	cap = &save_state->cap.data[0];
+	pci_read_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, cap);
+}
+
+static void pci_restore_ltr_state(struct pci_dev *dev)
+{
+	struct pci_cap_saved_state *save_state;
+	int ltr;
+	u32 *cap;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR);
+	ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
+	if (!save_state || !ltr)
+		return;
+
+	/* Some broken devices only support dword access to LTR */
+	cap = &save_state->cap.data[0];
+	pci_write_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, *cap);
+}
+
+/**
+ * pci_save_state - save the PCI configuration space of a device before
+ *		    suspending
+ * @dev: PCI device that we're dealing with
+ */
+int pci_save_state(struct pci_dev *dev)
+{
+	int i;
+	/* XXX: 100% dword access ok here? */
+	for (i = 0; i < 16; i++) {
+		pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]);
+		pci_dbg(dev, "save config %#04x: %#010x\n",
+			i * 4, dev->saved_config_space[i]);
+	}
+	dev->state_saved = true;
+
+	i = pci_save_pcie_state(dev);
+	if (i != 0)
+		return i;
+
+	i = pci_save_pcix_state(dev);
+	if (i != 0)
+		return i;
+
+	pci_save_ltr_state(dev);
+	pci_save_dpc_state(dev);
+	pci_save_aer_state(dev);
+	pci_save_ptm_state(dev);
+	return pci_save_vc_state(dev);
+}
+EXPORT_SYMBOL(pci_save_state);
+
+static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
+				     u32 saved_val, int retry, bool force)
+{
+	u32 val;
+
+	pci_read_config_dword(pdev, offset, &val);
+	if (!force && val == saved_val)
+		return;
+
+	for (;;) {
+		pci_dbg(pdev, "restore config %#04x: %#010x -> %#010x\n",
+			offset, val, saved_val);
+		pci_write_config_dword(pdev, offset, saved_val);
+		if (retry-- <= 0)
+			return;
+
+		pci_read_config_dword(pdev, offset, &val);
+		if (val == saved_val)
+			return;
+
+		mdelay(1);
+	}
+}
+
+static void pci_restore_config_space_range(struct pci_dev *pdev,
+					   int start, int end, int retry,
+					   bool force)
+{
+	int index;
+
+	for (index = end; index >= start; index--)
+		pci_restore_config_dword(pdev, 4 * index,
+					 pdev->saved_config_space[index],
+					 retry, force);
+}
+
+static void pci_restore_config_space(struct pci_dev *pdev)
+{
+	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+		pci_restore_config_space_range(pdev, 10, 15, 0, false);
+		/* Restore BARs before the command register. */
+		pci_restore_config_space_range(pdev, 4, 9, 10, false);
+		pci_restore_config_space_range(pdev, 0, 3, 0, false);
+	} else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		pci_restore_config_space_range(pdev, 12, 15, 0, false);
+
+		/*
+		 * Force rewriting of prefetch registers to avoid S3 resume
+		 * issues on Intel PCI bridges that occur when these
+		 * registers are not explicitly written.
+		 */
+		pci_restore_config_space_range(pdev, 9, 11, 0, true);
+		pci_restore_config_space_range(pdev, 0, 8, 0, false);
+	} else {
+		pci_restore_config_space_range(pdev, 0, 15, 0, false);
+	}
+}
+
+static void pci_restore_rebar_state(struct pci_dev *pdev)
+{
+	unsigned int pos, nbars, i;
+	u32 ctrl;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+	if (!pos)
+		return;
+
+	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+	nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >>
+		    PCI_REBAR_CTRL_NBAR_SHIFT;
+
+	for (i = 0; i < nbars; i++, pos += 8) {
+		struct resource *res;
+		int bar_idx, size;
+
+		pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+		bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
+		res = pdev->resource + bar_idx;
+		size = pci_rebar_bytes_to_size(resource_size(res));
+		ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
+		ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
+		pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
+	}
+}
+
+/**
+ * pci_restore_state - Restore the saved state of a PCI device
+ * @dev: PCI device that we're dealing with
+ */
+void pci_restore_state(struct pci_dev *dev)
+{
+	if (!dev->state_saved)
+		return;
+
+	/*
+	 * Restore max latencies (in the LTR capability) before enabling
+	 * LTR itself (in the PCIe capability).
+	 */
+	pci_restore_ltr_state(dev);
+
+	pci_restore_pcie_state(dev);
+	pci_restore_pasid_state(dev);
+	pci_restore_pri_state(dev);
+	pci_restore_ats_state(dev);
+	pci_restore_vc_state(dev);
+	pci_restore_rebar_state(dev);
+	pci_restore_dpc_state(dev);
+	pci_restore_ptm_state(dev);
+
+	pci_aer_clear_status(dev);
+	pci_restore_aer_state(dev);
+
+	pci_restore_config_space(dev);
+
+	pci_restore_pcix_state(dev);
+	pci_restore_msi_state(dev);
+
+	/* Restore ACS and IOV configuration state */
+	pci_enable_acs(dev);
+	pci_restore_iov_state(dev);
+
+	dev->state_saved = false;
+}
+EXPORT_SYMBOL(pci_restore_state);
+
+struct pci_saved_state {
+	u32 config_space[16];
+	struct pci_cap_saved_data cap[];
+};
+
+/**
+ * pci_store_saved_state - Allocate and return an opaque struct containing
+ *			   the device saved state.
+ * @dev: PCI device that we're dealing with
+ *
+ * Return NULL if no state or error.
+ */
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
+{
+	struct pci_saved_state *state;
+	struct pci_cap_saved_state *tmp;
+	struct pci_cap_saved_data *cap;
+	size_t size;
+
+	if (!dev->state_saved)
+		return NULL;
+
+	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
+
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next)
+		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+
+	state = kzalloc(size, GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	memcpy(state->config_space, dev->saved_config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next) {
+		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+		memcpy(cap, &tmp->cap, len);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
+	}
+	/* Empty cap_save terminates list */
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(pci_store_saved_state);
+
+/**
+ * pci_load_saved_state - Reload the provided save state into struct pci_dev.
+ * @dev: PCI device that we're dealing with
+ * @state: Saved state returned from pci_store_saved_state()
+ */
+int pci_load_saved_state(struct pci_dev *dev,
+			 struct pci_saved_state *state)
+{
+	struct pci_cap_saved_data *cap;
+
+	dev->state_saved = false;
+
+	if (!state)
+		return 0;
+
+	memcpy(dev->saved_config_space, state->config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	while (cap->size) {
+		struct pci_cap_saved_state *tmp;
+
+		tmp = _pci_find_saved_cap(dev, cap->cap_nr, cap->cap_extended);
+		if (!tmp || tmp->cap.size != cap->size)
+			return -EINVAL;
+
+		memcpy(tmp->cap.data, cap->data, tmp->cap.size);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap +
+		       sizeof(struct pci_cap_saved_data) + cap->size);
+	}
+
+	dev->state_saved = true;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
+
+/**
+ * pci_load_and_free_saved_state - Reload the save state pointed to by state,
+ *				   and free the memory allocated for it.
+ * @dev: PCI device that we're dealing with
+ * @state: Pointer to saved state returned from pci_store_saved_state()
+ */
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+				  struct pci_saved_state **state)
+{
+	int ret = pci_load_saved_state(dev, *state);
+	kfree(*state);
+	*state = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
+
+int __weak pcibios_enable_device(struct pci_dev *dev, int bars)
+{
+	return pci_enable_resources(dev, bars);
+}
+
+static int do_pci_enable_device(struct pci_dev *dev, int bars)
+{
+	int err;
+	struct pci_dev *bridge;
+	u16 cmd;
+	u8 pin;
+
+	err = pci_set_power_state(dev, PCI_D0);
+	if (err < 0 && err != -EIO)
+		return err;
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge)
+		pcie_aspm_powersave_config_link(bridge);
+
+	err = pcibios_enable_device(dev, bars);
+	if (err < 0)
+		return err;
+	pci_fixup_device(pci_fixup_enable, dev);
+
+	if (dev->msi_enabled || dev->msix_enabled)
+		return 0;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (pin) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (cmd & PCI_COMMAND_INTX_DISABLE)
+			pci_write_config_word(dev, PCI_COMMAND,
+					      cmd & ~PCI_COMMAND_INTX_DISABLE);
+	}
+
+	return 0;
+}
+
+/**
+ * pci_reenable_device - Resume abandoned device
+ * @dev: PCI device to be resumed
+ *
+ * NOTE: This function is a backend of pci_default_resume() and is not supposed
+ * to be called by normal code, write proper resume handler and use it instead.
+ */
+int pci_reenable_device(struct pci_dev *dev)
+{
+	if (pci_is_enabled(dev))
+		return do_pci_enable_device(dev, (1 << PCI_NUM_RESOURCES) - 1);
+	return 0;
+}
+EXPORT_SYMBOL(pci_reenable_device);
+
+static void pci_enable_bridge(struct pci_dev *dev)
+{
+	struct pci_dev *bridge;
+	int retval;
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge)
+		pci_enable_bridge(bridge);
+
+	if (pci_is_enabled(dev)) {
+		if (!dev->is_busmaster)
+			pci_set_master(dev);
+		return;
+	}
+
+	retval = pci_enable_device(dev);
+	if (retval)
+		pci_err(dev, "Error enabling bridge (%d), continuing\n",
+			retval);
+	pci_set_master(dev);
+}
+
+static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
+{
+	struct pci_dev *bridge;
+	int err;
+	int i, bars = 0;
+
+	/*
+	 * Power state could be unknown at this point, either due to a fresh
+	 * boot or a device removal call.  So get the current power state
+	 * so that things like MSI message writing will behave as expected
+	 * (e.g. if the device really is in D0 at enable time).
+	 */
+	pci_update_current_state(dev, dev->current_state);
+
+	if (atomic_inc_return(&dev->enable_cnt) > 1)
+		return 0;		/* already enabled */
+
+	bridge = pci_upstream_bridge(dev);
+	if (bridge)
+		pci_enable_bridge(bridge);
+
+	/* only skip sriov related */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+		if (dev->resource[i].flags & flags)
+			bars |= (1 << i);
+	for (i = PCI_BRIDGE_RESOURCES; i < DEVICE_COUNT_RESOURCE; i++)
+		if (dev->resource[i].flags & flags)
+			bars |= (1 << i);
+
+	err = do_pci_enable_device(dev, bars);
+	if (err < 0)
+		atomic_dec(&dev->enable_cnt);
+	return err;
+}
+
+/**
+ * pci_enable_device_io - Initialize a device for use with IO space
+ * @dev: PCI device to be initialized
+ *
+ * Initialize device before it's used by a driver. Ask low-level code
+ * to enable I/O resources. Wake up the device if it was suspended.
+ * Beware, this function can fail.
+ */
+int pci_enable_device_io(struct pci_dev *dev)
+{
+	return pci_enable_device_flags(dev, IORESOURCE_IO);
+}
+EXPORT_SYMBOL(pci_enable_device_io);
+
+/**
+ * pci_enable_device_mem - Initialize a device for use with Memory space
+ * @dev: PCI device to be initialized
+ *
+ * Initialize device before it's used by a driver. Ask low-level code
+ * to enable Memory resources. Wake up the device if it was suspended.
+ * Beware, this function can fail.
+ */
+int pci_enable_device_mem(struct pci_dev *dev)
+{
+	return pci_enable_device_flags(dev, IORESOURCE_MEM);
+}
+EXPORT_SYMBOL(pci_enable_device_mem);
+
+/**
+ * pci_enable_device - Initialize device before it's used by a driver.
+ * @dev: PCI device to be initialized
+ *
+ * Initialize device before it's used by a driver. Ask low-level code
+ * to enable I/O and memory. Wake up the device if it was suspended.
+ * Beware, this function can fail.
+ *
+ * Note we don't actually enable the device many times if we call
+ * this function repeatedly (we just increment the count).
+ */
+int pci_enable_device(struct pci_dev *dev)
+{
+	return pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO);
+}
+EXPORT_SYMBOL(pci_enable_device);
+
+/*
+ * Managed PCI resources.  This manages device on/off, INTx/MSI/MSI-X
+ * on/off and BAR regions.  pci_dev itself records MSI/MSI-X status, so
+ * there's no need to track it separately.  pci_devres is initialized
+ * when a device is enabled using managed PCI device enable interface.
+ */
+struct pci_devres {
+	unsigned int enabled:1;
+	unsigned int pinned:1;
+	unsigned int orig_intx:1;
+	unsigned int restore_intx:1;
+	unsigned int mwi:1;
+	u32 region_mask;
+};
+
+static void pcim_release(struct device *gendev, void *res)
+{
+	struct pci_dev *dev = to_pci_dev(gendev);
+	struct pci_devres *this = res;
+	int i;
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
+		if (this->region_mask & (1 << i))
+			pci_release_region(dev, i);
+
+	if (this->mwi)
+		pci_clear_mwi(dev);
+
+	if (this->restore_intx)
+		pci_intx(dev, this->orig_intx);
+
+	if (this->enabled && !this->pinned)
+		pci_disable_device(dev);
+}
+
+static struct pci_devres *get_pci_dr(struct pci_dev *pdev)
+{
+	struct pci_devres *dr, *new_dr;
+
+	dr = devres_find(&pdev->dev, pcim_release, NULL, NULL);
+	if (dr)
+		return dr;
+
+	new_dr = devres_alloc(pcim_release, sizeof(*new_dr), GFP_KERNEL);
+	if (!new_dr)
+		return NULL;
+	return devres_get(&pdev->dev, new_dr, NULL, NULL);
+}
+
+static struct pci_devres *find_pci_dr(struct pci_dev *pdev)
+{
+	if (pci_is_managed(pdev))
+		return devres_find(&pdev->dev, pcim_release, NULL, NULL);
+	return NULL;
+}
+
+/**
+ * pcim_enable_device - Managed pci_enable_device()
+ * @pdev: PCI device to be initialized
+ *
+ * Managed pci_enable_device().
+ */
+int pcim_enable_device(struct pci_dev *pdev)
+{
+	struct pci_devres *dr;
+	int rc;
+
+	dr = get_pci_dr(pdev);
+	if (unlikely(!dr))
+		return -ENOMEM;
+	if (dr->enabled)
+		return 0;
+
+	rc = pci_enable_device(pdev);
+	if (!rc) {
+		pdev->is_managed = 1;
+		dr->enabled = 1;
+	}
+	return rc;
+}
+EXPORT_SYMBOL(pcim_enable_device);
+
+/**
+ * pcim_pin_device - Pin managed PCI device
+ * @pdev: PCI device to pin
+ *
+ * Pin managed PCI device @pdev.  Pinned device won't be disabled on
+ * driver detach.  @pdev must have been enabled with
+ * pcim_enable_device().
+ */
+void pcim_pin_device(struct pci_dev *pdev)
+{
+	struct pci_devres *dr;
+
+	dr = find_pci_dr(pdev);
+	WARN_ON(!dr || !dr->enabled);
+	if (dr)
+		dr->pinned = 1;
+}
+EXPORT_SYMBOL(pcim_pin_device);
+
+/*
+ * pcibios_device_add - provide arch specific hooks when adding device dev
+ * @dev: the PCI device being added
+ *
+ * Permits the platform to provide architecture specific functionality when
+ * devices are added. This is the default implementation. Architecture
+ * implementations can override this.
+ */
+int __weak pcibios_device_add(struct pci_dev *dev)
+{
+	return 0;
+}
+
+/**
+ * pcibios_release_device - provide arch specific hooks when releasing
+ *			    device dev
+ * @dev: the PCI device being released
+ *
+ * Permits the platform to provide architecture specific functionality when
+ * devices are released. This is the default implementation. Architecture
+ * implementations can override this.
+ */
+void __weak pcibios_release_device(struct pci_dev *dev) {}
+
+/**
+ * pcibios_disable_device - disable arch specific PCI resources for device dev
+ * @dev: the PCI device to disable
+ *
+ * Disables architecture specific PCI resources for the device. This
+ * is the default implementation. Architecture implementations can
+ * override this.
+ */
+void __weak pcibios_disable_device(struct pci_dev *dev) {}
+
+/**
+ * pcibios_penalize_isa_irq - penalize an ISA IRQ
+ * @irq: ISA IRQ to penalize
+ * @active: IRQ active or not
+ *
+ * Permits the platform to provide architecture-specific functionality when
+ * penalizing ISA IRQs. This is the default implementation. Architecture
+ * implementations can override this.
+ */
+void __weak pcibios_penalize_isa_irq(int irq, int active) {}
+
+static void do_pci_disable_device(struct pci_dev *dev)
+{
+	u16 pci_command;
+
+	pci_read_config_word(dev, PCI_COMMAND, &pci_command);
+	if (pci_command & PCI_COMMAND_MASTER) {
+		pci_command &= ~PCI_COMMAND_MASTER;
+		pci_write_config_word(dev, PCI_COMMAND, pci_command);
+	}
+
+	pcibios_disable_device(dev);
+}
+
+/**
+ * pci_disable_enabled_device - Disable device without updating enable_cnt
+ * @dev: PCI device to disable
+ *
+ * NOTE: This function is a backend of PCI power management routines and is
+ * not supposed to be called drivers.
+ */
+void pci_disable_enabled_device(struct pci_dev *dev)
+{
+	if (pci_is_enabled(dev))
+		do_pci_disable_device(dev);
+}
+
+/**
+ * pci_disable_device - Disable PCI device after use
+ * @dev: PCI device to be disabled
+ *
+ * Signal to the system that the PCI device is not in use by the system
+ * anymore.  This only involves disabling PCI bus-mastering, if active.
+ *
+ * Note we don't actually disable the device until all callers of
+ * pci_enable_device() have called pci_disable_device().
+ */
+void pci_disable_device(struct pci_dev *dev)
+{
+	struct pci_devres *dr;
+
+	dr = find_pci_dr(dev);
+	if (dr)
+		dr->enabled = 0;
+
+	dev_WARN_ONCE(&dev->dev, atomic_read(&dev->enable_cnt) <= 0,
+		      "disabling already-disabled device");
+
+	if (atomic_dec_return(&dev->enable_cnt) != 0)
+		return;
+
+	do_pci_disable_device(dev);
+
+	dev->is_busmaster = 0;
+}
+EXPORT_SYMBOL(pci_disable_device);
+
+/**
+ * pcibios_set_pcie_reset_state - set reset state for device dev
+ * @dev: the PCIe device reset
+ * @state: Reset state to enter into
+ *
+ * Set the PCIe reset state for the device. This is the default
+ * implementation. Architecture implementations can override this.
+ */
+int __weak pcibios_set_pcie_reset_state(struct pci_dev *dev,
+					enum pcie_reset_state state)
+{
+	return -EINVAL;
+}
+
+/**
+ * pci_set_pcie_reset_state - set reset state for device dev
+ * @dev: the PCIe device reset
+ * @state: Reset state to enter into
+ *
+ * Sets the PCI reset state for the device.
+ */
+int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
+{
+	return pcibios_set_pcie_reset_state(dev, state);
+}
+EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
+
+#ifdef CONFIG_PCIEAER
+void pcie_clear_device_status(struct pci_dev *dev)
+{
+	u16 sta;
+
+	pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
+	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
+}
+#endif
+
+/**
+ * pcie_clear_root_pme_status - Clear root port PME interrupt status.
+ * @dev: PCIe root port or event collector.
+ */
+void pcie_clear_root_pme_status(struct pci_dev *dev)
+{
+	pcie_capability_set_dword(dev, PCI_EXP_RTSTA, PCI_EXP_RTSTA_PME);
+}
+
+/**
+ * pci_check_pme_status - Check if given device has generated PME.
+ * @dev: Device to check.
+ *
+ * Check the PME status of the device and if set, clear it and clear PME enable
+ * (if set).  Return 'true' if PME status and PME enable were both set or
+ * 'false' otherwise.
+ */
+bool pci_check_pme_status(struct pci_dev *dev)
+{
+	int pmcsr_pos;
+	u16 pmcsr;
+	bool ret = false;
+
+	if (!dev->pm_cap)
+		return false;
+
+	pmcsr_pos = dev->pm_cap + PCI_PM_CTRL;
+	pci_read_config_word(dev, pmcsr_pos, &pmcsr);
+	if (!(pmcsr & PCI_PM_CTRL_PME_STATUS))
+		return false;
+
+	/* Clear PME status. */
+	pmcsr |= PCI_PM_CTRL_PME_STATUS;
+	if (pmcsr & PCI_PM_CTRL_PME_ENABLE) {
+		/* Disable PME to avoid interrupt flood. */
+		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
+		ret = true;
+	}
+
+	pci_write_config_word(dev, pmcsr_pos, pmcsr);
+
+	return ret;
+}
+
+/**
+ * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
+ * @dev: Device to handle.
+ * @pme_poll_reset: Whether or not to reset the device's pme_poll flag.
+ *
+ * Check if @dev has generated PME and queue a resume request for it in that
+ * case.
+ */
+static int pci_pme_wakeup(struct pci_dev *dev, void *pme_poll_reset)
+{
+	if (pme_poll_reset && dev->pme_poll)
+		dev->pme_poll = false;
+
+	if (pci_check_pme_status(dev)) {
+		pci_wakeup_event(dev);
+		pm_request_resume(&dev->dev);
+	}
+	return 0;
+}
+
+/**
+ * pci_pme_wakeup_bus - Walk given bus and wake up devices on it, if necessary.
+ * @bus: Top bus of the subtree to walk.
+ */
+void pci_pme_wakeup_bus(struct pci_bus *bus)
+{
+	if (bus)
+		pci_walk_bus(bus, pci_pme_wakeup, (void *)true);
+}
+
+
+/**
+ * pci_pme_capable - check the capability of PCI device to generate PME#
+ * @dev: PCI device to handle.
+ * @state: PCI state from which device will issue PME#.
+ */
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
+{
+	if (!dev->pm_cap)
+		return false;
+
+	return !!(dev->pme_support & (1 << state));
+}
+EXPORT_SYMBOL(pci_pme_capable);
+
+static void pci_pme_list_scan(struct work_struct *work)
+{
+	struct pci_pme_device *pme_dev, *n;
+
+	mutex_lock(&pci_pme_list_mutex);
+	list_for_each_entry_safe(pme_dev, n, &pci_pme_list, list) {
+		struct pci_dev *pdev = pme_dev->dev;
+
+		if (pdev->pme_poll) {
+			struct pci_dev *bridge = pdev->bus->self;
+			struct device *dev = &pdev->dev;
+			int pm_status;
+
+			/*
+			 * If bridge is in low power state, the
+			 * configuration space of subordinate devices
+			 * may be not accessible
+			 */
+			if (bridge && bridge->current_state != PCI_D0)
+				continue;
+
+			/*
+			 * If the device is in a low power state it
+			 * should not be polled either.
+			 */
+			pm_status = pm_runtime_get_if_active(dev, true);
+			if (!pm_status)
+				continue;
+
+			if (pdev->current_state != PCI_D3cold)
+				pci_pme_wakeup(pdev, NULL);
+
+			if (pm_status > 0)
+				pm_runtime_put(dev);
+		} else {
+			list_del(&pme_dev->list);
+			kfree(pme_dev);
+		}
+	}
+	if (!list_empty(&pci_pme_list))
+		queue_delayed_work(system_freezable_wq, &pci_pme_work,
+				   msecs_to_jiffies(PME_TIMEOUT));
+	mutex_unlock(&pci_pme_list_mutex);
+}
+
+static void __pci_pme_active(struct pci_dev *dev, bool enable)
+{
+	u16 pmcsr;
+
+	if (!dev->pme_support)
+		return;
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	/* Clear PME_Status by writing 1 to it and enable PME# */
+	pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+	if (!enable)
+		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
+
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
+}
+
+/**
+ * pci_pme_restore - Restore PME configuration after config space restore.
+ * @dev: PCI device to update.
+ */
+void pci_pme_restore(struct pci_dev *dev)
+{
+	u16 pmcsr;
+
+	if (!dev->pme_support)
+		return;
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	if (dev->wakeup_prepared) {
+		pmcsr |= PCI_PM_CTRL_PME_ENABLE;
+		pmcsr &= ~PCI_PM_CTRL_PME_STATUS;
+	} else {
+		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
+		pmcsr |= PCI_PM_CTRL_PME_STATUS;
+	}
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
+}
+
+/**
+ * pci_pme_active - enable or disable PCI device's PME# function
+ * @dev: PCI device to handle.
+ * @enable: 'true' to enable PME# generation; 'false' to disable it.
+ *
+ * The caller must verify that the device is capable of generating PME# before
+ * calling this function with @enable equal to 'true'.
+ */
+void pci_pme_active(struct pci_dev *dev, bool enable)
+{
+	__pci_pme_active(dev, enable);
+
+	/*
+	 * PCI (as opposed to PCIe) PME requires that the device have
+	 * its PME# line hooked up correctly. Not all hardware vendors
+	 * do this, so the PME never gets delivered and the device
+	 * remains asleep. The easiest way around this is to
+	 * periodically walk the list of suspended devices and check
+	 * whether any have their PME flag set. The assumption is that
+	 * we'll wake up often enough anyway that this won't be a huge
+	 * hit, and the power savings from the devices will still be a
+	 * win.
+	 *
+	 * Although PCIe uses in-band PME message instead of PME# line
+	 * to report PME, PME does not work for some PCIe devices in
+	 * reality.  For example, there are devices that set their PME
+	 * status bits, but don't really bother to send a PME message;
+	 * there are PCI Express Root Ports that don't bother to
+	 * trigger interrupts when they receive PME messages from the
+	 * devices below.  So PME poll is used for PCIe devices too.
+	 */
+
+	if (dev->pme_poll) {
+		struct pci_pme_device *pme_dev;
+		if (enable) {
+			pme_dev = kmalloc(sizeof(struct pci_pme_device),
+					  GFP_KERNEL);
+			if (!pme_dev) {
+				pci_warn(dev, "can't enable PME#\n");
+				return;
+			}
+			pme_dev->dev = dev;
+			mutex_lock(&pci_pme_list_mutex);
+			list_add(&pme_dev->list, &pci_pme_list);
+			if (list_is_singular(&pci_pme_list))
+				queue_delayed_work(system_freezable_wq,
+						   &pci_pme_work,
+						   msecs_to_jiffies(PME_TIMEOUT));
+			mutex_unlock(&pci_pme_list_mutex);
+		} else {
+			mutex_lock(&pci_pme_list_mutex);
+			list_for_each_entry(pme_dev, &pci_pme_list, list) {
+				if (pme_dev->dev == dev) {
+					list_del(&pme_dev->list);
+					kfree(pme_dev);
+					break;
+				}
+			}
+			mutex_unlock(&pci_pme_list_mutex);
+		}
+	}
+
+	pci_dbg(dev, "PME# %s\n", enable ? "enabled" : "disabled");
+}
+EXPORT_SYMBOL(pci_pme_active);
+
+/**
+ * __pci_enable_wake - enable PCI device as wakeup event source
+ * @dev: PCI device affected
+ * @state: PCI state from which device will issue wakeup events
+ * @enable: True to enable event generation; false to disable
+ *
+ * This enables the device as a wakeup event source, or disables it.
+ * When such events involves platform-specific hooks, those hooks are
+ * called automatically by this routine.
+ *
+ * Devices with legacy power management (no standard PCI PM capabilities)
+ * always require such platform hooks.
+ *
+ * RETURN VALUE:
+ * 0 is returned on success
+ * -EINVAL is returned if device is not supposed to wake up the system
+ * Error code depending on the platform is returned if both the platform and
+ * the native mechanism fail to enable the generation of wake-up events
+ */
+static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
+{
+	int ret = 0;
+
+	/*
+	 * Bridges that are not power-manageable directly only signal
+	 * wakeup on behalf of subordinate devices which is set up
+	 * elsewhere, so skip them. However, bridges that are
+	 * power-manageable may signal wakeup for themselves (for example,
+	 * on a hotplug event) and they need to be covered here.
+	 */
+	if (!pci_power_manageable(dev))
+		return 0;
+
+	/* Don't do the same thing twice in a row for one device. */
+	if (!!enable == !!dev->wakeup_prepared)
+		return 0;
+
+	/*
+	 * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
+	 * Anderson we should be doing PME# wake enable followed by ACPI wake
+	 * enable.  To disable wake-up we call the platform first, for symmetry.
+	 */
+
+	if (enable) {
+		int error;
+
+		/*
+		 * Enable PME signaling if the device can signal PME from
+		 * D3cold regardless of whether or not it can signal PME from
+		 * the current target state, because that will allow it to
+		 * signal PME when the hierarchy above it goes into D3cold and
+		 * the device itself ends up in D3cold as a result of that.
+		 */
+		if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
+			pci_pme_active(dev, true);
+		else
+			ret = 1;
+		error = platform_pci_set_wakeup(dev, true);
+		if (ret)
+			ret = error;
+		if (!ret)
+			dev->wakeup_prepared = true;
+	} else {
+		platform_pci_set_wakeup(dev, false);
+		pci_pme_active(dev, false);
+		dev->wakeup_prepared = false;
+	}
+
+	return ret;
+}
+
+/**
+ * pci_enable_wake - change wakeup settings for a PCI device
+ * @pci_dev: Target device
+ * @state: PCI state from which device will issue wakeup events
+ * @enable: Whether or not to enable event generation
+ *
+ * If @enable is set, check device_may_wakeup() for the device before calling
+ * __pci_enable_wake() for it.
+ */
+int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable)
+{
+	if (enable && !device_may_wakeup(&pci_dev->dev))
+		return -EINVAL;
+
+	return __pci_enable_wake(pci_dev, state, enable);
+}
+EXPORT_SYMBOL(pci_enable_wake);
+
+/**
+ * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
+ * @dev: PCI device to prepare
+ * @enable: True to enable wake-up event generation; false to disable
+ *
+ * Many drivers want the device to wake up the system from D3_hot or D3_cold
+ * and this function allows them to set that up cleanly - pci_enable_wake()
+ * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
+ * ordering constraints.
+ *
+ * This function only returns error code if the device is not allowed to wake
+ * up the system from sleep or it is not capable of generating PME# from both
+ * D3_hot and D3_cold and the platform is unable to enable wake-up power for it.
+ */
+int pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	return pci_pme_capable(dev, PCI_D3cold) ?
+			pci_enable_wake(dev, PCI_D3cold, enable) :
+			pci_enable_wake(dev, PCI_D3hot, enable);
+}
+EXPORT_SYMBOL(pci_wake_from_d3);
+
+/**
+ * pci_target_state - find an appropriate low power state for a given PCI dev
+ * @dev: PCI device
+ * @wakeup: Whether or not wakeup functionality will be enabled for the device.
+ *
+ * Use underlying platform code to find a supported low power state for @dev.
+ * If the platform can't manage @dev, return the deepest state from which it
+ * can generate wake events, based on any available PME info.
+ */
+static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup)
+{
+	if (platform_pci_power_manageable(dev)) {
+		/*
+		 * Call the platform to find the target state for the device.
+		 */
+		pci_power_t state = platform_pci_choose_state(dev);
+
+		switch (state) {
+		case PCI_POWER_ERROR:
+		case PCI_UNKNOWN:
+			return PCI_D3hot;
+
+		case PCI_D1:
+		case PCI_D2:
+			if (pci_no_d1d2(dev))
+				return PCI_D3hot;
+		}
+
+		return state;
+	}
+
+	/*
+	 * If the device is in D3cold even though it's not power-manageable by
+	 * the platform, it may have been powered down by non-standard means.
+	 * Best to let it slumber.
+	 */
+	if (dev->current_state == PCI_D3cold)
+		return PCI_D3cold;
+	else if (!dev->pm_cap)
+		return PCI_D0;
+
+	if (wakeup && dev->pme_support) {
+		pci_power_t state = PCI_D3hot;
+
+		/*
+		 * Find the deepest state from which the device can generate
+		 * PME#.
+		 */
+		while (state && !(dev->pme_support & (1 << state)))
+			state--;
+
+		if (state)
+			return state;
+		else if (dev->pme_support & 1)
+			return PCI_D0;
+	}
+
+	return PCI_D3hot;
+}
+
+/**
+ * pci_prepare_to_sleep - prepare PCI device for system-wide transition
+ *			  into a sleep state
+ * @dev: Device to handle.
+ *
+ * Choose the power state appropriate for the device depending on whether
+ * it can wake up the system and/or is power manageable by the platform
+ * (PCI_D3hot is the default) and put the device into that state.
+ */
+int pci_prepare_to_sleep(struct pci_dev *dev)
+{
+	bool wakeup = device_may_wakeup(&dev->dev);
+	pci_power_t target_state = pci_target_state(dev, wakeup);
+	int error;
+
+	if (target_state == PCI_POWER_ERROR)
+		return -EIO;
+
+	pci_enable_wake(dev, target_state, wakeup);
+
+	error = pci_set_power_state(dev, target_state);
+
+	if (error)
+		pci_enable_wake(dev, target_state, false);
+
+	return error;
+}
+EXPORT_SYMBOL(pci_prepare_to_sleep);
+
+/**
+ * pci_back_from_sleep - turn PCI device on during system-wide transition
+ *			 into working state
+ * @dev: Device to handle.
+ *
+ * Disable device's system wake-up capability and put it into D0.
+ */
+int pci_back_from_sleep(struct pci_dev *dev)
+{
+	int ret = pci_set_power_state(dev, PCI_D0);
+
+	if (ret)
+		return ret;
+
+	pci_enable_wake(dev, PCI_D0, false);
+	return 0;
+}
+EXPORT_SYMBOL(pci_back_from_sleep);
+
+/**
+ * pci_finish_runtime_suspend - Carry out PCI-specific part of runtime suspend.
+ * @dev: PCI device being suspended.
+ *
+ * Prepare @dev to generate wake-up events at run time and put it into a low
+ * power state.
+ */
+int pci_finish_runtime_suspend(struct pci_dev *dev)
+{
+	pci_power_t target_state;
+	int error;
+
+	target_state = pci_target_state(dev, device_can_wakeup(&dev->dev));
+	if (target_state == PCI_POWER_ERROR)
+		return -EIO;
+
+	__pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
+
+	error = pci_set_power_state(dev, target_state);
+
+	if (error)
+		pci_enable_wake(dev, target_state, false);
+
+	return error;
+}
+
+/**
+ * pci_dev_run_wake - Check if device can generate run-time wake-up events.
+ * @dev: Device to check.
+ *
+ * Return true if the device itself is capable of generating wake-up events
+ * (through the platform or using the native PCIe PME) or if the device supports
+ * PME and one of its upstream bridges can generate wake-up events.
+ */
+bool pci_dev_run_wake(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+
+	if (!dev->pme_support)
+		return false;
+
+	/* PME-capable in principle, but not from the target power state */
+	if (!pci_pme_capable(dev, pci_target_state(dev, true)))
+		return false;
+
+	if (device_can_wakeup(&dev->dev))
+		return true;
+
+	while (bus->parent) {
+		struct pci_dev *bridge = bus->self;
+
+		if (device_can_wakeup(&bridge->dev))
+			return true;
+
+		bus = bus->parent;
+	}
+
+	/* We have reached the root bus. */
+	if (bus->bridge)
+		return device_can_wakeup(bus->bridge);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(pci_dev_run_wake);
+
+/**
+ * pci_dev_need_resume - Check if it is necessary to resume the device.
+ * @pci_dev: Device to check.
+ *
+ * Return 'true' if the device is not runtime-suspended or it has to be
+ * reconfigured due to wakeup settings difference between system and runtime
+ * suspend, or the current power state of it is not suitable for the upcoming
+ * (system-wide) transition.
+ */
+bool pci_dev_need_resume(struct pci_dev *pci_dev)
+{
+	struct device *dev = &pci_dev->dev;
+	pci_power_t target_state;
+
+	if (!pm_runtime_suspended(dev) || platform_pci_need_resume(pci_dev))
+		return true;
+
+	target_state = pci_target_state(pci_dev, device_may_wakeup(dev));
+
+	/*
+	 * If the earlier platform check has not triggered, D3cold is just power
+	 * removal on top of D3hot, so no need to resume the device in that
+	 * case.
+	 */
+	return target_state != pci_dev->current_state &&
+		target_state != PCI_D3cold &&
+		pci_dev->current_state != PCI_D3hot;
+}
+
+/**
+ * pci_dev_adjust_pme - Adjust PME setting for a suspended device.
+ * @pci_dev: Device to check.
+ *
+ * If the device is suspended and it is not configured for system wakeup,
+ * disable PME for it to prevent it from waking up the system unnecessarily.
+ *
+ * Note that if the device's power state is D3cold and the platform check in
+ * pci_dev_need_resume() has not triggered, the device's configuration need not
+ * be changed.
+ */
+void pci_dev_adjust_pme(struct pci_dev *pci_dev)
+{
+	struct device *dev = &pci_dev->dev;
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (pm_runtime_suspended(dev) && !device_may_wakeup(dev) &&
+	    pci_dev->current_state < PCI_D3cold)
+		__pci_pme_active(pci_dev, false);
+
+	spin_unlock_irq(&dev->power.lock);
+}
+
+/**
+ * pci_dev_complete_resume - Finalize resume from system sleep for a device.
+ * @pci_dev: Device to handle.
+ *
+ * If the device is runtime suspended and wakeup-capable, enable PME for it as
+ * it might have been disabled during the prepare phase of system suspend if
+ * the device was not configured for system wakeup.
+ */
+void pci_dev_complete_resume(struct pci_dev *pci_dev)
+{
+	struct device *dev = &pci_dev->dev;
+
+	if (!pci_dev_run_wake(pci_dev))
+		return;
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (pm_runtime_suspended(dev) && pci_dev->current_state < PCI_D3cold)
+		__pci_pme_active(pci_dev, true);
+
+	spin_unlock_irq(&dev->power.lock);
+}
+
+/**
+ * pci_choose_state - Choose the power state of a PCI device.
+ * @dev: Target PCI device.
+ * @state: Target state for the whole system.
+ *
+ * Returns PCI power state suitable for @dev and @state.
+ */
+pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
+{
+	if (state.event == PM_EVENT_ON)
+		return PCI_D0;
+
+	return pci_target_state(dev, false);
+}
+EXPORT_SYMBOL(pci_choose_state);
+
+void pci_config_pm_runtime_get(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device *parent = dev->parent;
+
+	if (parent)
+		pm_runtime_get_sync(parent);
+	pm_runtime_get_noresume(dev);
+	/*
+	 * pdev->current_state is set to PCI_D3cold during suspending,
+	 * so wait until suspending completes
+	 */
+	pm_runtime_barrier(dev);
+	/*
+	 * Only need to resume devices in D3cold, because config
+	 * registers are still accessible for devices suspended but
+	 * not in D3cold.
+	 */
+	if (pdev->current_state == PCI_D3cold)
+		pm_runtime_resume(dev);
+}
+
+void pci_config_pm_runtime_put(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device *parent = dev->parent;
+
+	pm_runtime_put(dev);
+	if (parent)
+		pm_runtime_put_sync(parent);
+}
+
+static const struct dmi_system_id bridge_d3_blacklist[] = {
+#ifdef CONFIG_X86
+	{
+		/*
+		 * Gigabyte X299 root port is not marked as hotplug capable
+		 * which allows Linux to power manage it.  However, this
+		 * confuses the BIOS SMI handler so don't power manage root
+		 * ports on that system.
+		 */
+		.ident = "X299 DESIGNARE EX-CF",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
+			DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
+		},
+	},
+	{
+		/*
+		 * Downstream device is not accessible after putting a root port
+		 * into D3cold and back into D0 on Elo Continental Z2 board
+		 */
+		.ident = "Elo Continental Z2",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Elo Touch Solutions"),
+			DMI_MATCH(DMI_BOARD_NAME, "Geminilake"),
+			DMI_MATCH(DMI_BOARD_VERSION, "Continental Z2"),
+		},
+	},
+#endif
+	{ }
+};
+
+/**
+ * pci_bridge_d3_possible - Is it possible to put the bridge into D3
+ * @bridge: Bridge to check
+ *
+ * This function checks if it is possible to move the bridge to D3.
+ * Currently we only allow D3 for recent enough PCIe ports and Thunderbolt.
+ */
+bool pci_bridge_d3_possible(struct pci_dev *bridge)
+{
+	if (!pci_is_pcie(bridge))
+		return false;
+
+	switch (pci_pcie_type(bridge)) {
+	case PCI_EXP_TYPE_ROOT_PORT:
+	case PCI_EXP_TYPE_UPSTREAM:
+	case PCI_EXP_TYPE_DOWNSTREAM:
+		if (pci_bridge_d3_disable)
+			return false;
+
+		/*
+		 * Hotplug ports handled by firmware in System Management Mode
+		 * may not be put into D3 by the OS (Thunderbolt on non-Macs).
+		 */
+		if (bridge->is_hotplug_bridge && !pciehp_is_native(bridge))
+			return false;
+
+		if (pci_bridge_d3_force)
+			return true;
+
+		/* Even the oldest 2010 Thunderbolt controller supports D3. */
+		if (bridge->is_thunderbolt)
+			return true;
+
+		/* Platform might know better if the bridge supports D3 */
+		if (platform_pci_bridge_d3(bridge))
+			return true;
+
+		/*
+		 * Hotplug ports handled natively by the OS were not validated
+		 * by vendors for runtime D3 at least until 2018 because there
+		 * was no OS support.
+		 */
+		if (bridge->is_hotplug_bridge)
+			return false;
+
+		if (dmi_check_system(bridge_d3_blacklist))
+			return false;
+
+		/*
+		 * It should be safe to put PCIe ports from 2015 or newer
+		 * to D3.
+		 */
+		if (dmi_get_bios_year() >= 2015)
+			return true;
+		break;
+	}
+
+	return false;
+}
+
+static int pci_dev_check_d3cold(struct pci_dev *dev, void *data)
+{
+	bool *d3cold_ok = data;
+
+	if (/* The device needs to be allowed to go D3cold ... */
+	    dev->no_d3cold || !dev->d3cold_allowed ||
+
+	    /* ... and if it is wakeup capable to do so from D3cold. */
+	    (device_may_wakeup(&dev->dev) &&
+	     !pci_pme_capable(dev, PCI_D3cold)) ||
+
+	    /* If it is a bridge it must be allowed to go to D3. */
+	    !pci_power_manageable(dev))
+
+		*d3cold_ok = false;
+
+	return !*d3cold_ok;
+}
+
+/*
+ * pci_bridge_d3_update - Update bridge D3 capabilities
+ * @dev: PCI device which is changed
+ *
+ * Update upstream bridge PM capabilities accordingly depending on if the
+ * device PM configuration was changed or the device is being removed.  The
+ * change is also propagated upstream.
+ */
+void pci_bridge_d3_update(struct pci_dev *dev)
+{
+	bool remove = !device_is_registered(&dev->dev);
+	struct pci_dev *bridge;
+	bool d3cold_ok = true;
+
+	bridge = pci_upstream_bridge(dev);
+	if (!bridge || !pci_bridge_d3_possible(bridge))
+		return;
+
+	/*
+	 * If D3 is currently allowed for the bridge, removing one of its
+	 * children won't change that.
+	 */
+	if (remove && bridge->bridge_d3)
+		return;
+
+	/*
+	 * If D3 is currently allowed for the bridge and a child is added or
+	 * changed, disallowance of D3 can only be caused by that child, so
+	 * we only need to check that single device, not any of its siblings.
+	 *
+	 * If D3 is currently not allowed for the bridge, checking the device
+	 * first may allow us to skip checking its siblings.
+	 */
+	if (!remove)
+		pci_dev_check_d3cold(dev, &d3cold_ok);
+
+	/*
+	 * If D3 is currently not allowed for the bridge, this may be caused
+	 * either by the device being changed/removed or any of its siblings,
+	 * so we need to go through all children to find out if one of them
+	 * continues to block D3.
+	 */
+	if (d3cold_ok && !bridge->bridge_d3)
+		pci_walk_bus(bridge->subordinate, pci_dev_check_d3cold,
+			     &d3cold_ok);
+
+	if (bridge->bridge_d3 != d3cold_ok) {
+		bridge->bridge_d3 = d3cold_ok;
+		/* Propagate change to upstream bridges */
+		pci_bridge_d3_update(bridge);
+	}
+}
+
+/**
+ * pci_d3cold_enable - Enable D3cold for device
+ * @dev: PCI device to handle
+ *
+ * This function can be used in drivers to enable D3cold from the device
+ * they handle.  It also updates upstream PCI bridge PM capabilities
+ * accordingly.
+ */
+void pci_d3cold_enable(struct pci_dev *dev)
+{
+	if (dev->no_d3cold) {
+		dev->no_d3cold = false;
+		pci_bridge_d3_update(dev);
+	}
+}
+EXPORT_SYMBOL_GPL(pci_d3cold_enable);
+
+/**
+ * pci_d3cold_disable - Disable D3cold for device
+ * @dev: PCI device to handle
+ *
+ * This function can be used in drivers to disable D3cold from the device
+ * they handle.  It also updates upstream PCI bridge PM capabilities
+ * accordingly.
+ */
+void pci_d3cold_disable(struct pci_dev *dev)
+{
+	if (!dev->no_d3cold) {
+		dev->no_d3cold = true;
+		pci_bridge_d3_update(dev);
+	}
+}
+EXPORT_SYMBOL_GPL(pci_d3cold_disable);
+
+/**
+ * pci_pm_init - Initialize PM functions of given PCI device
+ * @dev: PCI device to handle.
+ */
+void pci_pm_init(struct pci_dev *dev)
+{
+	int pm;
+	u16 status;
+	u16 pmc;
+
+	pm_runtime_forbid(&dev->dev);
+	pm_runtime_set_active(&dev->dev);
+	pm_runtime_enable(&dev->dev);
+	device_enable_async_suspend(&dev->dev);
+	dev->wakeup_prepared = false;
+
+	dev->pm_cap = 0;
+	dev->pme_support = 0;
+
+	/* find PCI PM capability in list */
+	pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+	if (!pm)
+		return;
+	/* Check device's ability to generate PME# */
+	pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc);
+
+	if ((pmc & PCI_PM_CAP_VER_MASK) > 3) {
+		pci_err(dev, "unsupported PM cap regs version (%u)\n",
+			pmc & PCI_PM_CAP_VER_MASK);
+		return;
+	}
+
+	dev->pm_cap = pm;
+	dev->d3hot_delay = PCI_PM_D3HOT_WAIT;
+	dev->d3cold_delay = PCI_PM_D3COLD_WAIT;
+	dev->bridge_d3 = pci_bridge_d3_possible(dev);
+	dev->d3cold_allowed = true;
+
+	dev->d1_support = false;
+	dev->d2_support = false;
+	if (!pci_no_d1d2(dev)) {
+		if (pmc & PCI_PM_CAP_D1)
+			dev->d1_support = true;
+		if (pmc & PCI_PM_CAP_D2)
+			dev->d2_support = true;
+
+		if (dev->d1_support || dev->d2_support)
+			pci_info(dev, "supports%s%s\n",
+				   dev->d1_support ? " D1" : "",
+				   dev->d2_support ? " D2" : "");
+	}
+
+	pmc &= PCI_PM_CAP_PME_MASK;
+	if (pmc) {
+		pci_info(dev, "PME# supported from%s%s%s%s%s\n",
+			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+			 (pmc & PCI_PM_CAP_PME_D3hot) ? " D3hot" : "",
+			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+		dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
+		dev->pme_poll = true;
+		/*
+		 * Make device's PM flags reflect the wake-up capability, but
+		 * let the user space enable it to wake up the system as needed.
+		 */
+		device_set_wakeup_capable(&dev->dev, true);
+		/* Disable the PME# generation functionality */
+		pci_pme_active(dev, false);
+	}
+
+	pci_read_config_word(dev, PCI_STATUS, &status);
+	if (status & PCI_STATUS_IMM_READY)
+		dev->imm_ready = 1;
+}
+
+static unsigned long pci_ea_flags(struct pci_dev *dev, u8 prop)
+{
+	unsigned long flags = IORESOURCE_PCI_FIXED | IORESOURCE_PCI_EA_BEI;
+
+	switch (prop) {
+	case PCI_EA_P_MEM:
+	case PCI_EA_P_VF_MEM:
+		flags |= IORESOURCE_MEM;
+		break;
+	case PCI_EA_P_MEM_PREFETCH:
+	case PCI_EA_P_VF_MEM_PREFETCH:
+		flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		break;
+	case PCI_EA_P_IO:
+		flags |= IORESOURCE_IO;
+		break;
+	default:
+		return 0;
+	}
+
+	return flags;
+}
+
+static struct resource *pci_ea_get_resource(struct pci_dev *dev, u8 bei,
+					    u8 prop)
+{
+	if (bei <= PCI_EA_BEI_BAR5 && prop <= PCI_EA_P_IO)
+		return &dev->resource[bei];
+#ifdef CONFIG_PCI_IOV
+	else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5 &&
+		 (prop == PCI_EA_P_VF_MEM || prop == PCI_EA_P_VF_MEM_PREFETCH))
+		return &dev->resource[PCI_IOV_RESOURCES +
+				      bei - PCI_EA_BEI_VF_BAR0];
+#endif
+	else if (bei == PCI_EA_BEI_ROM)
+		return &dev->resource[PCI_ROM_RESOURCE];
+	else
+		return NULL;
+}
+
+/* Read an Enhanced Allocation (EA) entry */
+static int pci_ea_read(struct pci_dev *dev, int offset)
+{
+	struct resource *res;
+	int ent_size, ent_offset = offset;
+	resource_size_t start, end;
+	unsigned long flags;
+	u32 dw0, bei, base, max_offset;
+	u8 prop;
+	bool support_64 = (sizeof(resource_size_t) >= 8);
+
+	pci_read_config_dword(dev, ent_offset, &dw0);
+	ent_offset += 4;
+
+	/* Entry size field indicates DWORDs after 1st */
+	ent_size = ((dw0 & PCI_EA_ES) + 1) << 2;
+
+	if (!(dw0 & PCI_EA_ENABLE)) /* Entry not enabled */
+		goto out;
+
+	bei = (dw0 & PCI_EA_BEI) >> 4;
+	prop = (dw0 & PCI_EA_PP) >> 8;
+
+	/*
+	 * If the Property is in the reserved range, try the Secondary
+	 * Property instead.
+	 */
+	if (prop > PCI_EA_P_BRIDGE_IO && prop < PCI_EA_P_MEM_RESERVED)
+		prop = (dw0 & PCI_EA_SP) >> 16;
+	if (prop > PCI_EA_P_BRIDGE_IO)
+		goto out;
+
+	res = pci_ea_get_resource(dev, bei, prop);
+	if (!res) {
+		pci_err(dev, "Unsupported EA entry BEI: %u\n", bei);
+		goto out;
+	}
+
+	flags = pci_ea_flags(dev, prop);
+	if (!flags) {
+		pci_err(dev, "Unsupported EA properties: %#x\n", prop);
+		goto out;
+	}
+
+	/* Read Base */
+	pci_read_config_dword(dev, ent_offset, &base);
+	start = (base & PCI_EA_FIELD_MASK);
+	ent_offset += 4;
+
+	/* Read MaxOffset */
+	pci_read_config_dword(dev, ent_offset, &max_offset);
+	ent_offset += 4;
+
+	/* Read Base MSBs (if 64-bit entry) */
+	if (base & PCI_EA_IS_64) {
+		u32 base_upper;
+
+		pci_read_config_dword(dev, ent_offset, &base_upper);
+		ent_offset += 4;
+
+		flags |= IORESOURCE_MEM_64;
+
+		/* entry starts above 32-bit boundary, can't use */
+		if (!support_64 && base_upper)
+			goto out;
+
+		if (support_64)
+			start |= ((u64)base_upper << 32);
+	}
+
+	end = start + (max_offset | 0x03);
+
+	/* Read MaxOffset MSBs (if 64-bit entry) */
+	if (max_offset & PCI_EA_IS_64) {
+		u32 max_offset_upper;
+
+		pci_read_config_dword(dev, ent_offset, &max_offset_upper);
+		ent_offset += 4;
+
+		flags |= IORESOURCE_MEM_64;
+
+		/* entry too big, can't use */
+		if (!support_64 && max_offset_upper)
+			goto out;
+
+		if (support_64)
+			end += ((u64)max_offset_upper << 32);
+	}
+
+	if (end < start) {
+		pci_err(dev, "EA Entry crosses address boundary\n");
+		goto out;
+	}
+
+	if (ent_size != ent_offset - offset) {
+		pci_err(dev, "EA Entry Size (%d) does not match length read (%d)\n",
+			ent_size, ent_offset - offset);
+		goto out;
+	}
+
+	res->name = pci_name(dev);
+	res->start = start;
+	res->end = end;
+	res->flags = flags;
+
+	if (bei <= PCI_EA_BEI_BAR5)
+		pci_info(dev, "BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n",
+			   bei, res, prop);
+	else if (bei == PCI_EA_BEI_ROM)
+		pci_info(dev, "ROM: %pR (from Enhanced Allocation, properties %#02x)\n",
+			   res, prop);
+	else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5)
+		pci_info(dev, "VF BAR %d: %pR (from Enhanced Allocation, properties %#02x)\n",
+			   bei - PCI_EA_BEI_VF_BAR0, res, prop);
+	else
+		pci_info(dev, "BEI %d res: %pR (from Enhanced Allocation, properties %#02x)\n",
+			   bei, res, prop);
+
+out:
+	return offset + ent_size;
+}
+
+/* Enhanced Allocation Initialization */
+void pci_ea_init(struct pci_dev *dev)
+{
+	int ea;
+	u8 num_ent;
+	int offset;
+	int i;
+
+	/* find PCI EA capability in list */
+	ea = pci_find_capability(dev, PCI_CAP_ID_EA);
+	if (!ea)
+		return;
+
+	/* determine the number of entries */
+	pci_bus_read_config_byte(dev->bus, dev->devfn, ea + PCI_EA_NUM_ENT,
+					&num_ent);
+	num_ent &= PCI_EA_NUM_ENT_MASK;
+
+	offset = ea + PCI_EA_FIRST_ENT;
+
+	/* Skip DWORD 2 for type 1 functions */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		offset += 4;
+
+	/* parse each EA entry */
+	for (i = 0; i < num_ent; ++i)
+		offset = pci_ea_read(dev, offset);
+}
+
+static void pci_add_saved_cap(struct pci_dev *pci_dev,
+	struct pci_cap_saved_state *new_cap)
+{
+	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
+}
+
+/**
+ * _pci_add_cap_save_buffer - allocate buffer for saving given
+ *			      capability registers
+ * @dev: the PCI device
+ * @cap: the capability to allocate the buffer for
+ * @extended: Standard or Extended capability ID
+ * @size: requested size of the buffer
+ */
+static int _pci_add_cap_save_buffer(struct pci_dev *dev, u16 cap,
+				    bool extended, unsigned int size)
+{
+	int pos;
+	struct pci_cap_saved_state *save_state;
+
+	if (extended)
+		pos = pci_find_ext_capability(dev, cap);
+	else
+		pos = pci_find_capability(dev, cap);
+
+	if (!pos)
+		return 0;
+
+	save_state = kzalloc(sizeof(*save_state) + size, GFP_KERNEL);
+	if (!save_state)
+		return -ENOMEM;
+
+	save_state->cap.cap_nr = cap;
+	save_state->cap.cap_extended = extended;
+	save_state->cap.size = size;
+	pci_add_saved_cap(dev, save_state);
+
+	return 0;
+}
+
+int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size)
+{
+	return _pci_add_cap_save_buffer(dev, cap, false, size);
+}
+
+int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size)
+{
+	return _pci_add_cap_save_buffer(dev, cap, true, size);
+}
+
+/**
+ * pci_allocate_cap_save_buffers - allocate buffers for saving capabilities
+ * @dev: the PCI device
+ */
+void pci_allocate_cap_save_buffers(struct pci_dev *dev)
+{
+	int error;
+
+	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
+					PCI_EXP_SAVE_REGS * sizeof(u16));
+	if (error)
+		pci_err(dev, "unable to preallocate PCI Express save buffer\n");
+
+	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_PCIX, sizeof(u16));
+	if (error)
+		pci_err(dev, "unable to preallocate PCI-X save buffer\n");
+
+	error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_LTR,
+					    2 * sizeof(u16));
+	if (error)
+		pci_err(dev, "unable to allocate suspend buffer for LTR\n");
+
+	pci_allocate_vc_save_buffers(dev);
+}
+
+void pci_free_cap_save_buffers(struct pci_dev *dev)
+{
+	struct pci_cap_saved_state *tmp;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next)
+		kfree(tmp);
+}
+
+/**
+ * pci_configure_ari - enable or disable ARI forwarding
+ * @dev: the PCI device
+ *
+ * If @dev and its upstream bridge both support ARI, enable ARI in the
+ * bridge.  Otherwise, disable ARI in the bridge.
+ */
+void pci_configure_ari(struct pci_dev *dev)
+{
+	u32 cap;
+	struct pci_dev *bridge;
+
+	if (pcie_ari_disabled || !pci_is_pcie(dev) || dev->devfn)
+		return;
+
+	bridge = dev->bus->self;
+	if (!bridge)
+		return;
+
+	pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_ARI))
+		return;
+
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI)) {
+		pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_ARI);
+		bridge->ari_enabled = 1;
+	} else {
+		pcie_capability_clear_word(bridge, PCI_EXP_DEVCTL2,
+					   PCI_EXP_DEVCTL2_ARI);
+		bridge->ari_enabled = 0;
+	}
+}
+
+static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
+{
+	int pos;
+	u16 cap, ctrl;
+
+	pos = pdev->acs_cap;
+	if (!pos)
+		return false;
+
+	/*
+	 * Except for egress control, capabilities are either required
+	 * or only required if controllable.  Features missing from the
+	 * capability field can therefore be assumed as hard-wired enabled.
+	 */
+	pci_read_config_word(pdev, pos + PCI_ACS_CAP, &cap);
+	acs_flags &= (cap | PCI_ACS_EC);
+
+	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
+	return (ctrl & acs_flags) == acs_flags;
+}
+
+/**
+ * pci_acs_enabled - test ACS against required flags for a given device
+ * @pdev: device to test
+ * @acs_flags: required PCI ACS flags
+ *
+ * Return true if the device supports the provided flags.  Automatically
+ * filters out flags that are not implemented on multifunction devices.
+ *
+ * Note that this interface checks the effective ACS capabilities of the
+ * device rather than the actual capabilities.  For instance, most single
+ * function endpoints are not required to support ACS because they have no
+ * opportunity for peer-to-peer access.  We therefore return 'true'
+ * regardless of whether the device exposes an ACS capability.  This makes
+ * it much easier for callers of this function to ignore the actual type
+ * or topology of the device when testing ACS support.
+ */
+bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
+{
+	int ret;
+
+	ret = pci_dev_specific_acs_enabled(pdev, acs_flags);
+	if (ret >= 0)
+		return ret > 0;
+
+	/*
+	 * Conventional PCI and PCI-X devices never support ACS, either
+	 * effectively or actually.  The shared bus topology implies that
+	 * any device on the bus can receive or snoop DMA.
+	 */
+	if (!pci_is_pcie(pdev))
+		return false;
+
+	switch (pci_pcie_type(pdev)) {
+	/*
+	 * PCI/X-to-PCIe bridges are not specifically mentioned by the spec,
+	 * but since their primary interface is PCI/X, we conservatively
+	 * handle them as we would a non-PCIe device.
+	 */
+	case PCI_EXP_TYPE_PCIE_BRIDGE:
+	/*
+	 * PCIe 3.0, 6.12.1 excludes ACS on these devices.  "ACS is never
+	 * applicable... must never implement an ACS Extended Capability...".
+	 * This seems arbitrary, but we take a conservative interpretation
+	 * of this statement.
+	 */
+	case PCI_EXP_TYPE_PCI_BRIDGE:
+	case PCI_EXP_TYPE_RC_EC:
+		return false;
+	/*
+	 * PCIe 3.0, 6.12.1.1 specifies that downstream and root ports should
+	 * implement ACS in order to indicate their peer-to-peer capabilities,
+	 * regardless of whether they are single- or multi-function devices.
+	 */
+	case PCI_EXP_TYPE_DOWNSTREAM:
+	case PCI_EXP_TYPE_ROOT_PORT:
+		return pci_acs_flags_enabled(pdev, acs_flags);
+	/*
+	 * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be
+	 * implemented by the remaining PCIe types to indicate peer-to-peer
+	 * capabilities, but only when they are part of a multifunction
+	 * device.  The footnote for section 6.12 indicates the specific
+	 * PCIe types included here.
+	 */
+	case PCI_EXP_TYPE_ENDPOINT:
+	case PCI_EXP_TYPE_UPSTREAM:
+	case PCI_EXP_TYPE_LEG_END:
+	case PCI_EXP_TYPE_RC_END:
+		if (!pdev->multifunction)
+			break;
+
+		return pci_acs_flags_enabled(pdev, acs_flags);
+	}
+
+	/*
+	 * PCIe 3.0, 6.12.1.3 specifies no ACS capabilities are applicable
+	 * to single function devices with the exception of downstream ports.
+	 */
+	return true;
+}
+
+/**
+ * pci_acs_path_enabled - test ACS flags from start to end in a hierarchy
+ * @start: starting downstream device
+ * @end: ending upstream device or NULL to search to the root bus
+ * @acs_flags: required flags
+ *
+ * Walk up a device tree from start to end testing PCI ACS support.  If
+ * any step along the way does not support the required flags, return false.
+ */
+bool pci_acs_path_enabled(struct pci_dev *start,
+			  struct pci_dev *end, u16 acs_flags)
+{
+	struct pci_dev *pdev, *parent = start;
+
+	do {
+		pdev = parent;
+
+		if (!pci_acs_enabled(pdev, acs_flags))
+			return false;
+
+		if (pci_is_root_bus(pdev->bus))
+			return (end == NULL);
+
+		parent = pdev->bus->self;
+	} while (pdev != end);
+
+	return true;
+}
+
+/**
+ * pci_acs_init - Initialize ACS if hardware supports it
+ * @dev: the PCI device
+ */
+void pci_acs_init(struct pci_dev *dev)
+{
+	dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+
+	/*
+	 * Attempt to enable ACS regardless of capability because some Root
+	 * Ports (e.g. those quirked with *_intel_pch_acs_*) do not have
+	 * the standard ACS capability but still support ACS via those
+	 * quirks.
+	 */
+	pci_enable_acs(dev);
+}
+
+/**
+ * pci_rebar_find_pos - find position of resize ctrl reg for BAR
+ * @pdev: PCI device
+ * @bar: BAR to find
+ *
+ * Helper to find the position of the ctrl register for a BAR.
+ * Returns -ENOTSUPP if resizable BARs are not supported at all.
+ * Returns -ENOENT if no ctrl register for the BAR could be found.
+ */
+static int pci_rebar_find_pos(struct pci_dev *pdev, int bar)
+{
+	unsigned int pos, nbars, i;
+	u32 ctrl;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+	if (!pos)
+		return -ENOTSUPP;
+
+	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+	nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >>
+		    PCI_REBAR_CTRL_NBAR_SHIFT;
+
+	for (i = 0; i < nbars; i++, pos += 8) {
+		int bar_idx;
+
+		pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+		bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
+		if (bar_idx == bar)
+			return pos;
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * pci_rebar_get_possible_sizes - get possible sizes for BAR
+ * @pdev: PCI device
+ * @bar: BAR to query
+ *
+ * Get the possible sizes of a resizable BAR as bitmask defined in the spec
+ * (bit 0=1MB, bit 19=512GB). Returns 0 if BAR isn't resizable.
+ */
+u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar)
+{
+	int pos;
+	u32 cap;
+
+	pos = pci_rebar_find_pos(pdev, bar);
+	if (pos < 0)
+		return 0;
+
+	pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap);
+	cap = FIELD_GET(PCI_REBAR_CAP_SIZES, cap);
+
+	/* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */
+	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f &&
+	    bar == 0 && cap == 0x700)
+		return 0x3f00;
+
+	return cap;
+}
+EXPORT_SYMBOL(pci_rebar_get_possible_sizes);
+
+/**
+ * pci_rebar_get_current_size - get the current size of a BAR
+ * @pdev: PCI device
+ * @bar: BAR to set size to
+ *
+ * Read the size of a BAR from the resizable BAR config.
+ * Returns size if found or negative error code.
+ */
+int pci_rebar_get_current_size(struct pci_dev *pdev, int bar)
+{
+	int pos;
+	u32 ctrl;
+
+	pos = pci_rebar_find_pos(pdev, bar);
+	if (pos < 0)
+		return pos;
+
+	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+	return (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT;
+}
+
+/**
+ * pci_rebar_set_size - set a new size for a BAR
+ * @pdev: PCI device
+ * @bar: BAR to set size to
+ * @size: new size as defined in the spec (0=1MB, 19=512GB)
+ *
+ * Set the new size of a BAR as defined in the spec.
+ * Returns zero if resizing was successful, error code otherwise.
+ */
+int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size)
+{
+	int pos;
+	u32 ctrl;
+
+	pos = pci_rebar_find_pos(pdev, bar);
+	if (pos < 0)
+		return pos;
+
+	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
+	ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
+	ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
+	pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
+	return 0;
+}
+
+/**
+ * pci_enable_atomic_ops_to_root - enable AtomicOp requests to root port
+ * @dev: the PCI device
+ * @cap_mask: mask of desired AtomicOp sizes, including one or more of:
+ *	PCI_EXP_DEVCAP2_ATOMIC_COMP32
+ *	PCI_EXP_DEVCAP2_ATOMIC_COMP64
+ *	PCI_EXP_DEVCAP2_ATOMIC_COMP128
+ *
+ * Return 0 if all upstream bridges support AtomicOp routing, egress
+ * blocking is disabled on all upstream ports, and the root port supports
+ * the requested completion capabilities (32-bit, 64-bit and/or 128-bit
+ * AtomicOp completion), or negative otherwise.
+ */
+int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
+{
+	struct pci_bus *bus = dev->bus;
+	struct pci_dev *bridge;
+	u32 cap, ctl2;
+
+	/*
+	 * Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit
+	 * in Device Control 2 is reserved in VFs and the PF value applies
+	 * to all associated VFs.
+	 */
+	if (dev->is_virtfn)
+		return -EINVAL;
+
+	if (!pci_is_pcie(dev))
+		return -EINVAL;
+
+	/*
+	 * Per PCIe r4.0, sec 6.15, endpoints and root ports may be
+	 * AtomicOp requesters.  For now, we only support endpoints as
+	 * requesters and root ports as completers.  No endpoints as
+	 * completers, and no peer-to-peer.
+	 */
+
+	switch (pci_pcie_type(dev)) {
+	case PCI_EXP_TYPE_ENDPOINT:
+	case PCI_EXP_TYPE_LEG_END:
+	case PCI_EXP_TYPE_RC_END:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	while (bus->parent) {
+		bridge = bus->self;
+
+		pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap);
+
+		switch (pci_pcie_type(bridge)) {
+		/* Ensure switch ports support AtomicOp routing */
+		case PCI_EXP_TYPE_UPSTREAM:
+		case PCI_EXP_TYPE_DOWNSTREAM:
+			if (!(cap & PCI_EXP_DEVCAP2_ATOMIC_ROUTE))
+				return -EINVAL;
+			break;
+
+		/* Ensure root port supports all the sizes we care about */
+		case PCI_EXP_TYPE_ROOT_PORT:
+			if ((cap & cap_mask) != cap_mask)
+				return -EINVAL;
+			break;
+		}
+
+		/* Ensure upstream ports don't block AtomicOps on egress */
+		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
+			pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
+						   &ctl2);
+			if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)
+				return -EINVAL;
+		}
+
+		bus = bus->parent;
+	}
+
+	pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
+				 PCI_EXP_DEVCTL2_ATOMIC_REQ);
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_atomic_ops_to_root);
+
+/**
+ * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
+ * @dev: the PCI device
+ * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD)
+ *
+ * Perform INTx swizzling for a device behind one level of bridge.  This is
+ * required by section 9.1 of the PCI-to-PCI bridge specification for devices
+ * behind bridges on add-in cards.  For devices with ARI enabled, the slot
+ * number is always 0 (see the Implementation Note in section 2.2.8.1 of
+ * the PCI Express Base Specification, Revision 2.1)
+ */
+u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin)
+{
+	int slot;
+
+	if (pci_ari_enabled(dev->bus))
+		slot = 0;
+	else
+		slot = PCI_SLOT(dev->devfn);
+
+	return (((pin - 1) + slot) % 4) + 1;
+}
+
+int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
+{
+	u8 pin;
+
+	pin = dev->pin;
+	if (!pin)
+		return -1;
+
+	while (!pci_is_root_bus(dev->bus)) {
+		pin = pci_swizzle_interrupt_pin(dev, pin);
+		dev = dev->bus->self;
+	}
+	*bridge = dev;
+	return pin;
+}
+
+/**
+ * pci_common_swizzle - swizzle INTx all the way to root bridge
+ * @dev: the PCI device
+ * @pinp: pointer to the INTx pin value (1=INTA, 2=INTB, 3=INTD, 4=INTD)
+ *
+ * Perform INTx swizzling for a device.  This traverses through all PCI-to-PCI
+ * bridges all the way up to a PCI root bus.
+ */
+u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	u8 pin = *pinp;
+
+	while (!pci_is_root_bus(dev->bus)) {
+		pin = pci_swizzle_interrupt_pin(dev, pin);
+		dev = dev->bus->self;
+	}
+	*pinp = pin;
+	return PCI_SLOT(dev->devfn);
+}
+EXPORT_SYMBOL_GPL(pci_common_swizzle);
+
+/**
+ * pci_release_region - Release a PCI bar
+ * @pdev: PCI device whose resources were previously reserved by
+ *	  pci_request_region()
+ * @bar: BAR to release
+ *
+ * Releases the PCI I/O and memory resources previously reserved by a
+ * successful call to pci_request_region().  Call this function only
+ * after all use of the PCI regions has ceased.
+ */
+void pci_release_region(struct pci_dev *pdev, int bar)
+{
+	struct pci_devres *dr;
+
+	if (pci_resource_len(pdev, bar) == 0)
+		return;
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
+		release_region(pci_resource_start(pdev, bar),
+				pci_resource_len(pdev, bar));
+	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
+		release_mem_region(pci_resource_start(pdev, bar),
+				pci_resource_len(pdev, bar));
+
+	dr = find_pci_dr(pdev);
+	if (dr)
+		dr->region_mask &= ~(1 << bar);
+}
+EXPORT_SYMBOL(pci_release_region);
+
+/**
+ * __pci_request_region - Reserved PCI I/O and memory resource
+ * @pdev: PCI device whose resources are to be reserved
+ * @bar: BAR to be reserved
+ * @res_name: Name to be associated with resource.
+ * @exclusive: whether the region access is exclusive or not
+ *
+ * Mark the PCI region associated with PCI device @pdev BAR @bar as
+ * being reserved by owner @res_name.  Do not access any
+ * address inside the PCI regions unless this call returns
+ * successfully.
+ *
+ * If @exclusive is set, then the region is marked so that userspace
+ * is explicitly not allowed to map the resource via /dev/mem or
+ * sysfs MMIO access.
+ *
+ * Returns 0 on success, or %EBUSY on error.  A warning
+ * message is also printed on failure.
+ */
+static int __pci_request_region(struct pci_dev *pdev, int bar,
+				const char *res_name, int exclusive)
+{
+	struct pci_devres *dr;
+
+	if (pci_resource_len(pdev, bar) == 0)
+		return 0;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
+		if (!request_region(pci_resource_start(pdev, bar),
+			    pci_resource_len(pdev, bar), res_name))
+			goto err_out;
+	} else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+		if (!__request_mem_region(pci_resource_start(pdev, bar),
+					pci_resource_len(pdev, bar), res_name,
+					exclusive))
+			goto err_out;
+	}
+
+	dr = find_pci_dr(pdev);
+	if (dr)
+		dr->region_mask |= 1 << bar;
+
+	return 0;
+
+err_out:
+	pci_warn(pdev, "BAR %d: can't reserve %pR\n", bar,
+		 &pdev->resource[bar]);
+	return -EBUSY;
+}
+
+/**
+ * pci_request_region - Reserve PCI I/O and memory resource
+ * @pdev: PCI device whose resources are to be reserved
+ * @bar: BAR to be reserved
+ * @res_name: Name to be associated with resource
+ *
+ * Mark the PCI region associated with PCI device @pdev BAR @bar as
+ * being reserved by owner @res_name.  Do not access any
+ * address inside the PCI regions unless this call returns
+ * successfully.
+ *
+ * Returns 0 on success, or %EBUSY on error.  A warning
+ * message is also printed on failure.
+ */
+int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+{
+	return __pci_request_region(pdev, bar, res_name, 0);
+}
+EXPORT_SYMBOL(pci_request_region);
+
+/**
+ * pci_release_selected_regions - Release selected PCI I/O and memory resources
+ * @pdev: PCI device whose resources were previously reserved
+ * @bars: Bitmask of BARs to be released
+ *
+ * Release selected PCI I/O and memory resources previously reserved.
+ * Call this function only after all use of the PCI regions has ceased.
+ */
+void pci_release_selected_regions(struct pci_dev *pdev, int bars)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++)
+		if (bars & (1 << i))
+			pci_release_region(pdev, i);
+}
+EXPORT_SYMBOL(pci_release_selected_regions);
+
+static int __pci_request_selected_regions(struct pci_dev *pdev, int bars,
+					  const char *res_name, int excl)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++)
+		if (bars & (1 << i))
+			if (__pci_request_region(pdev, i, res_name, excl))
+				goto err_out;
+	return 0;
+
+err_out:
+	while (--i >= 0)
+		if (bars & (1 << i))
+			pci_release_region(pdev, i);
+
+	return -EBUSY;
+}
+
+
+/**
+ * pci_request_selected_regions - Reserve selected PCI I/O and memory resources
+ * @pdev: PCI device whose resources are to be reserved
+ * @bars: Bitmask of BARs to be requested
+ * @res_name: Name to be associated with resource
+ */
+int pci_request_selected_regions(struct pci_dev *pdev, int bars,
+				 const char *res_name)
+{
+	return __pci_request_selected_regions(pdev, bars, res_name, 0);
+}
+EXPORT_SYMBOL(pci_request_selected_regions);
+
+int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars,
+					   const char *res_name)
+{
+	return __pci_request_selected_regions(pdev, bars, res_name,
+			IORESOURCE_EXCLUSIVE);
+}
+EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
+
+/**
+ * pci_release_regions - Release reserved PCI I/O and memory resources
+ * @pdev: PCI device whose resources were previously reserved by
+ *	  pci_request_regions()
+ *
+ * Releases all PCI I/O and memory resources previously reserved by a
+ * successful call to pci_request_regions().  Call this function only
+ * after all use of the PCI regions has ceased.
+ */
+
+void pci_release_regions(struct pci_dev *pdev)
+{
+	pci_release_selected_regions(pdev, (1 << PCI_STD_NUM_BARS) - 1);
+}
+EXPORT_SYMBOL(pci_release_regions);
+
+/**
+ * pci_request_regions - Reserve PCI I/O and memory resources
+ * @pdev: PCI device whose resources are to be reserved
+ * @res_name: Name to be associated with resource.
+ *
+ * Mark all PCI regions associated with PCI device @pdev as
+ * being reserved by owner @res_name.  Do not access any
+ * address inside the PCI regions unless this call returns
+ * successfully.
+ *
+ * Returns 0 on success, or %EBUSY on error.  A warning
+ * message is also printed on failure.
+ */
+int pci_request_regions(struct pci_dev *pdev, const char *res_name)
+{
+	return pci_request_selected_regions(pdev,
+			((1 << PCI_STD_NUM_BARS) - 1), res_name);
+}
+EXPORT_SYMBOL(pci_request_regions);
+
+/**
+ * pci_request_regions_exclusive - Reserve PCI I/O and memory resources
+ * @pdev: PCI device whose resources are to be reserved
+ * @res_name: Name to be associated with resource.
+ *
+ * Mark all PCI regions associated with PCI device @pdev as being reserved
+ * by owner @res_name.  Do not access any address inside the PCI regions
+ * unless this call returns successfully.
+ *
+ * pci_request_regions_exclusive() will mark the region so that /dev/mem
+ * and the sysfs MMIO access will not be allowed.
+ *
+ * Returns 0 on success, or %EBUSY on error.  A warning message is also
+ * printed on failure.
+ */
+int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
+{
+	return pci_request_selected_regions_exclusive(pdev,
+				((1 << PCI_STD_NUM_BARS) - 1), res_name);
+}
+EXPORT_SYMBOL(pci_request_regions_exclusive);
+
+/*
+ * Record the PCI IO range (expressed as CPU physical address + size).
+ * Return a negative value if an error has occurred, zero otherwise
+ */
+int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
+			resource_size_t	size)
+{
+	int ret = 0;
+#ifdef PCI_IOBASE
+	struct logic_pio_hwaddr *range;
+
+	if (!size || addr + size < addr)
+		return -EINVAL;
+
+	range = kzalloc(sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	range->fwnode = fwnode;
+	range->size = size;
+	range->hw_start = addr;
+	range->flags = LOGIC_PIO_CPU_MMIO;
+
+	ret = logic_pio_register_range(range);
+	if (ret)
+		kfree(range);
+
+	/* Ignore duplicates due to deferred probing */
+	if (ret == -EEXIST)
+		ret = 0;
+#endif
+
+	return ret;
+}
+
+phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+#ifdef PCI_IOBASE
+	if (pio < MMIO_UPPER_LIMIT)
+		return logic_pio_to_hwaddr(pio);
+#endif
+
+	return (phys_addr_t) OF_BAD_ADDR;
+}
+EXPORT_SYMBOL_GPL(pci_pio_to_address);
+
+unsigned long __weak pci_address_to_pio(phys_addr_t address)
+{
+#ifdef PCI_IOBASE
+	return logic_pio_trans_cpuaddr(address);
+#else
+	if (address > IO_SPACE_LIMIT)
+		return (unsigned long)-1;
+
+	return (unsigned long) address;
+#endif
+}
+
+/**
+ * pci_remap_iospace - Remap the memory mapped I/O space
+ * @res: Resource describing the I/O space
+ * @phys_addr: physical address of range to be mapped
+ *
+ * Remap the memory mapped I/O space described by the @res and the CPU
+ * physical address @phys_addr into virtual address space.  Only
+ * architectures that have memory mapped IO functions defined (and the
+ * PCI_IOBASE value defined) should call this function.
+ */
+#ifndef pci_remap_iospace
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
+{
+#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
+	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
+
+	if (!(res->flags & IORESOURCE_IO))
+		return -EINVAL;
+
+	if (res->end > IO_SPACE_LIMIT)
+		return -EINVAL;
+
+	return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+				  pgprot_device(PAGE_KERNEL));
+#else
+	/*
+	 * This architecture does not have memory mapped I/O space,
+	 * so this function should never be called
+	 */
+	WARN_ONCE(1, "This architecture does not support memory mapped I/O\n");
+	return -ENODEV;
+#endif
+}
+EXPORT_SYMBOL(pci_remap_iospace);
+#endif
+
+/**
+ * pci_unmap_iospace - Unmap the memory mapped I/O space
+ * @res: resource to be unmapped
+ *
+ * Unmap the CPU virtual address @res from virtual address space.  Only
+ * architectures that have memory mapped IO functions defined (and the
+ * PCI_IOBASE value defined) should call this function.
+ */
+void pci_unmap_iospace(struct resource *res)
+{
+#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
+	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
+
+	vunmap_range(vaddr, vaddr + resource_size(res));
+#endif
+}
+EXPORT_SYMBOL(pci_unmap_iospace);
+
+static void devm_pci_unmap_iospace(struct device *dev, void *ptr)
+{
+	struct resource **res = ptr;
+
+	pci_unmap_iospace(*res);
+}
+
+/**
+ * devm_pci_remap_iospace - Managed pci_remap_iospace()
+ * @dev: Generic device to remap IO address for
+ * @res: Resource describing the I/O space
+ * @phys_addr: physical address of range to be mapped
+ *
+ * Managed pci_remap_iospace().  Map is automatically unmapped on driver
+ * detach.
+ */
+int devm_pci_remap_iospace(struct device *dev, const struct resource *res,
+			   phys_addr_t phys_addr)
+{
+	const struct resource **ptr;
+	int error;
+
+	ptr = devres_alloc(devm_pci_unmap_iospace, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	error = pci_remap_iospace(res, phys_addr);
+	if (error) {
+		devres_free(ptr);
+	} else	{
+		*ptr = res;
+		devres_add(dev, ptr);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL(devm_pci_remap_iospace);
+
+/**
+ * devm_pci_remap_cfgspace - Managed pci_remap_cfgspace()
+ * @dev: Generic device to remap IO address for
+ * @offset: Resource address to map
+ * @size: Size of map
+ *
+ * Managed pci_remap_cfgspace().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_pci_remap_cfgspace(struct device *dev,
+				      resource_size_t offset,
+				      resource_size_t size)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = pci_remap_cfgspace(offset, size);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_pci_remap_cfgspace);
+
+/**
+ * devm_pci_remap_cfg_resource - check, request region and ioremap cfg resource
+ * @dev: generic device to handle the resource for
+ * @res: configuration space resource to be handled
+ *
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps with pci_remap_cfgspace() API that ensures the
+ * proper PCI configuration space memory attributes are guaranteed.
+ *
+ * All operations are managed and will be undone on driver detach.
+ *
+ * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure. Usage example::
+ *
+ *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ *	base = devm_pci_remap_cfg_resource(&pdev->dev, res);
+ *	if (IS_ERR(base))
+ *		return PTR_ERR(base);
+ */
+void __iomem *devm_pci_remap_cfg_resource(struct device *dev,
+					  struct resource *res)
+{
+	resource_size_t size;
+	const char *name;
+	void __iomem *dest_ptr;
+
+	BUG_ON(!dev);
+
+	if (!res || resource_type(res) != IORESOURCE_MEM) {
+		dev_err(dev, "invalid resource\n");
+		return IOMEM_ERR_PTR(-EINVAL);
+	}
+
+	size = resource_size(res);
+
+	if (res->name)
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s %s", dev_name(dev),
+				      res->name);
+	else
+		name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+	if (!name)
+		return IOMEM_ERR_PTR(-ENOMEM);
+
+	if (!devm_request_mem_region(dev, res->start, size, name)) {
+		dev_err(dev, "can't request region for resource %pR\n", res);
+		return IOMEM_ERR_PTR(-EBUSY);
+	}
+
+	dest_ptr = devm_pci_remap_cfgspace(dev, res->start, size);
+	if (!dest_ptr) {
+		dev_err(dev, "ioremap failed for resource %pR\n", res);
+		devm_release_mem_region(dev, res->start, size);
+		dest_ptr = IOMEM_ERR_PTR(-ENOMEM);
+	}
+
+	return dest_ptr;
+}
+EXPORT_SYMBOL(devm_pci_remap_cfg_resource);
+
+static void __pci_set_master(struct pci_dev *dev, bool enable)
+{
+	u16 old_cmd, cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
+	if (enable)
+		cmd = old_cmd | PCI_COMMAND_MASTER;
+	else
+		cmd = old_cmd & ~PCI_COMMAND_MASTER;
+	if (cmd != old_cmd) {
+		pci_dbg(dev, "%s bus mastering\n",
+			enable ? "enabling" : "disabling");
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	dev->is_busmaster = enable;
+}
+
+/**
+ * pcibios_setup - process "pci=" kernel boot arguments
+ * @str: string used to pass in "pci=" kernel boot arguments
+ *
+ * Process kernel boot arguments.  This is the default implementation.
+ * Architecture specific implementations can override this as necessary.
+ */
+char * __weak __init pcibios_setup(char *str)
+{
+	return str;
+}
+
+/**
+ * pcibios_set_master - enable PCI bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables PCI bus-mastering for the device.  This is the default
+ * implementation.  Architecture specific implementations can override
+ * this if necessary.
+ */
+void __weak pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+
+	/* The latency timer doesn't apply to PCIe (either Type 0 or Type 1) */
+	if (pci_is_pcie(dev))
+		return;
+
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+/**
+ * pci_set_master - enables bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables bus-mastering on the device and calls pcibios_set_master()
+ * to do the needed arch specific settings.
+ */
+void pci_set_master(struct pci_dev *dev)
+{
+	__pci_set_master(dev, true);
+	pcibios_set_master(dev);
+}
+EXPORT_SYMBOL(pci_set_master);
+
+/**
+ * pci_clear_master - disables bus-mastering for device dev
+ * @dev: the PCI device to disable
+ */
+void pci_clear_master(struct pci_dev *dev)
+{
+	__pci_set_master(dev, false);
+}
+EXPORT_SYMBOL(pci_clear_master);
+
+/**
+ * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
+ * @dev: the PCI device for which MWI is to be enabled
+ *
+ * Helper function for pci_set_mwi.
+ * Originally copied from drivers/net/acenic.c.
+ * Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int pci_set_cacheline_size(struct pci_dev *dev)
+{
+	u8 cacheline_size;
+
+	if (!pci_cache_line_size)
+		return -EINVAL;
+
+	/* Validate current setting: the PCI_CACHE_LINE_SIZE must be
+	   equal to or multiple of the right value. */
+	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
+	if (cacheline_size >= pci_cache_line_size &&
+	    (cacheline_size % pci_cache_line_size) == 0)
+		return 0;
+
+	/* Write the correct value. */
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size);
+	/* Read it back. */
+	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
+	if (cacheline_size == pci_cache_line_size)
+		return 0;
+
+	pci_dbg(dev, "cache line size of %d is not supported\n",
+		   pci_cache_line_size << 2);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(pci_set_cacheline_size);
+
+/**
+ * pci_set_mwi - enables memory-write-invalidate PCI transaction
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int pci_set_mwi(struct pci_dev *dev)
+{
+#ifdef PCI_DISABLE_MWI
+	return 0;
+#else
+	int rc;
+	u16 cmd;
+
+	rc = pci_set_cacheline_size(dev);
+	if (rc)
+		return rc;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (!(cmd & PCI_COMMAND_INVALIDATE)) {
+		pci_dbg(dev, "enabling Mem-Wr-Inval\n");
+		cmd |= PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(pci_set_mwi);
+
+/**
+ * pcim_set_mwi - a device-managed pci_set_mwi()
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Managed pci_set_mwi().
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int pcim_set_mwi(struct pci_dev *dev)
+{
+	struct pci_devres *dr;
+
+	dr = find_pci_dr(dev);
+	if (!dr)
+		return -ENOMEM;
+
+	dr->mwi = 1;
+	return pci_set_mwi(dev);
+}
+EXPORT_SYMBOL(pcim_set_mwi);
+
+/**
+ * pci_try_set_mwi - enables memory-write-invalidate PCI transaction
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND.
+ * Callers are not required to check the return value.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int pci_try_set_mwi(struct pci_dev *dev)
+{
+#ifdef PCI_DISABLE_MWI
+	return 0;
+#else
+	return pci_set_mwi(dev);
+#endif
+}
+EXPORT_SYMBOL(pci_try_set_mwi);
+
+/**
+ * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
+ * @dev: the PCI device to disable
+ *
+ * Disables PCI Memory-Write-Invalidate transaction on the device
+ */
+void pci_clear_mwi(struct pci_dev *dev)
+{
+#ifndef PCI_DISABLE_MWI
+	u16 cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_INVALIDATE) {
+		cmd &= ~PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+#endif
+}
+EXPORT_SYMBOL(pci_clear_mwi);
+
+/**
+ * pci_disable_parity - disable parity checking for device
+ * @dev: the PCI device to operate on
+ *
+ * Disable parity checking for device @dev
+ */
+void pci_disable_parity(struct pci_dev *dev)
+{
+	u16 cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_PARITY) {
+		cmd &= ~PCI_COMMAND_PARITY;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+}
+
+/**
+ * pci_intx - enables/disables PCI INTx for device dev
+ * @pdev: the PCI device to operate on
+ * @enable: boolean: whether to enable or disable PCI INTx
+ *
+ * Enables/disables PCI INTx for device @pdev
+ */
+void pci_intx(struct pci_dev *pdev, int enable)
+{
+	u16 pci_command, new;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+
+	if (enable)
+		new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
+	else
+		new = pci_command | PCI_COMMAND_INTX_DISABLE;
+
+	if (new != pci_command) {
+		struct pci_devres *dr;
+
+		pci_write_config_word(pdev, PCI_COMMAND, new);
+
+		dr = find_pci_dr(pdev);
+		if (dr && !dr->restore_intx) {
+			dr->restore_intx = 1;
+			dr->orig_intx = !enable;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(pci_intx);
+
+static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask)
+{
+	struct pci_bus *bus = dev->bus;
+	bool mask_updated = true;
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd;
+	unsigned long flags;
+	bool irq_pending;
+
+	/*
+	 * We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible.
+	 */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+
+	bus->ops->read(bus, dev->devfn, PCI_COMMAND, 4, &cmd_status_dword);
+
+	irq_pending = (cmd_status_dword >> 16) & PCI_STATUS_INTERRUPT;
+
+	/*
+	 * Check interrupt status register to see whether our device
+	 * triggered the interrupt (when masking) or the next IRQ is
+	 * already pending (when unmasking).
+	 */
+	if (mask != irq_pending) {
+		mask_updated = false;
+		goto done;
+	}
+
+	origcmd = cmd_status_dword;
+	newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE;
+	if (mask)
+		newcmd |= PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		bus->ops->write(bus, dev->devfn, PCI_COMMAND, 2, newcmd);
+
+done:
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+
+	return mask_updated;
+}
+
+/**
+ * pci_check_and_mask_intx - mask INTx on pending interrupt
+ * @dev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, mask it and return
+ * true in that case. False is returned if no interrupt was pending.
+ */
+bool pci_check_and_mask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, true);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_mask_intx);
+
+/**
+ * pci_check_and_unmask_intx - unmask INTx if no interrupt is pending
+ * @dev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, unmask it if not and
+ * return true. False is returned and the mask remains active if there was
+ * still an interrupt pending.
+ */
+bool pci_check_and_unmask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, false);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_unmask_intx);
+
+/**
+ * pci_wait_for_pending_transaction - wait for pending transaction
+ * @dev: the PCI device to operate on
+ *
+ * Return 0 if transaction is pending 1 otherwise.
+ */
+int pci_wait_for_pending_transaction(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev))
+		return 1;
+
+	return pci_wait_for_pending(dev, pci_pcie_cap(dev) + PCI_EXP_DEVSTA,
+				    PCI_EXP_DEVSTA_TRPND);
+}
+EXPORT_SYMBOL(pci_wait_for_pending_transaction);
+
+/**
+ * pcie_flr - initiate a PCIe function level reset
+ * @dev: device to reset
+ *
+ * Initiate a function level reset unconditionally on @dev without
+ * checking any flags and DEVCAP
+ */
+int pcie_flr(struct pci_dev *dev)
+{
+	if (!pci_wait_for_pending_transaction(dev))
+		pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
+
+	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+
+	if (dev->imm_ready)
+		return 0;
+
+	/*
+	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
+	 * 100ms, but may silently discard requests while the FLR is in
+	 * progress.  Wait 100ms before trying to access the device.
+	 */
+	msleep(100);
+
+	return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+}
+EXPORT_SYMBOL_GPL(pcie_flr);
+
+/**
+ * pcie_reset_flr - initiate a PCIe function level reset
+ * @dev: device to reset
+ * @probe: if true, return 0 if device can be reset this way
+ *
+ * Initiate a function level reset on @dev.
+ */
+int pcie_reset_flr(struct pci_dev *dev, bool probe)
+{
+	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
+		return -ENOTTY;
+
+	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	return pcie_flr(dev);
+}
+EXPORT_SYMBOL_GPL(pcie_reset_flr);
+
+static int pci_af_flr(struct pci_dev *dev, bool probe)
+{
+	int pos;
+	u8 cap;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_AF);
+	if (!pos)
+		return -ENOTTY;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
+		return -ENOTTY;
+
+	pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap);
+	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	/*
+	 * Wait for Transaction Pending bit to clear.  A word-aligned test
+	 * is used, so we use the control offset rather than status and shift
+	 * the test bit to match.
+	 */
+	if (!pci_wait_for_pending(dev, pos + PCI_AF_CTRL,
+				 PCI_AF_STATUS_TP << 8))
+		pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
+
+	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
+
+	if (dev->imm_ready)
+		return 0;
+
+	/*
+	 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
+	 * updated 27 July 2006; a device must complete an FLR within
+	 * 100ms, but may silently discard requests while the FLR is in
+	 * progress.  Wait 100ms before trying to access the device.
+	 */
+	msleep(100);
+
+	return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+}
+
+/**
+ * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0.
+ * @dev: Device to reset.
+ * @probe: if true, return 0 if the device can be reset this way.
+ *
+ * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is
+ * unset, it will be reinitialized internally when going from PCI_D3hot to
+ * PCI_D0.  If that's the case and the device is not in a low-power state
+ * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset.
+ *
+ * NOTE: This causes the caller to sleep for twice the device power transition
+ * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
+ * by default (i.e. unless the @dev's d3hot_delay field has a different value).
+ * Moreover, only devices in D0 can be reset by this function.
+ */
+static int pci_pm_reset(struct pci_dev *dev, bool probe)
+{
+	u16 csr;
+
+	if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
+		return -ENOTTY;
+
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &csr);
+	if (csr & PCI_PM_CTRL_NO_SOFT_RESET)
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	if (dev->current_state != PCI_D0)
+		return -EINVAL;
+
+	csr &= ~PCI_PM_CTRL_STATE_MASK;
+	csr |= PCI_D3hot;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
+	pci_dev_d3_sleep(dev);
+
+	csr &= ~PCI_PM_CTRL_STATE_MASK;
+	csr |= PCI_D0;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
+	pci_dev_d3_sleep(dev);
+
+	return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+}
+
+/**
+ * pcie_wait_for_link_status - Wait for link status change
+ * @pdev: Device whose link to wait for.
+ * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE.
+ * @active: Waiting for active or inactive?
+ *
+ * Return 0 if successful, or -ETIMEDOUT if status has not changed within
+ * PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
+ */
+static int pcie_wait_for_link_status(struct pci_dev *pdev,
+				     bool use_lt, bool active)
+{
+	u16 lnksta_mask, lnksta_match;
+	unsigned long end_jiffies;
+	u16 lnksta;
+
+	lnksta_mask = use_lt ? PCI_EXP_LNKSTA_LT : PCI_EXP_LNKSTA_DLLLA;
+	lnksta_match = active ? lnksta_mask : 0;
+
+	end_jiffies = jiffies + msecs_to_jiffies(PCIE_LINK_RETRAIN_TIMEOUT_MS);
+	do {
+		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+		if ((lnksta & lnksta_mask) == lnksta_match)
+			return 0;
+		msleep(1);
+	} while (time_before(jiffies, end_jiffies));
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * pcie_retrain_link - Request a link retrain and wait for it to complete
+ * @pdev: Device whose link to retrain.
+ * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status.
+ *
+ * Retrain completion status is retrieved from the Link Status Register
+ * according to @use_lt.  It is not verified whether the use of the DLLLA
+ * bit is valid.
+ *
+ * Return 0 if successful, or -ETIMEDOUT if training has not completed
+ * within PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
+ */
+int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
+{
+	int rc;
+
+	/*
+	 * Ensure the updated LNKCTL parameters are used during link
+	 * training by checking that there is no ongoing link training to
+	 * avoid LTSSM race as recommended in Implementation Note at the
+	 * end of PCIe r6.0.1 sec 7.5.3.7.
+	 */
+	rc = pcie_wait_for_link_status(pdev, use_lt, !use_lt);
+	if (rc)
+		return rc;
+
+	pcie_capability_set_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL);
+	if (pdev->clear_retrain_link) {
+		/*
+		 * Due to an erratum in some devices the Retrain Link bit
+		 * needs to be cleared again manually to allow the link
+		 * training to succeed.
+		 */
+		pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL);
+	}
+
+	return pcie_wait_for_link_status(pdev, use_lt, !use_lt);
+}
+
+/**
+ * pcie_wait_for_link_delay - Wait until link is active or inactive
+ * @pdev: Bridge device
+ * @active: waiting for active or inactive?
+ * @delay: Delay to wait after link has become active (in ms)
+ *
+ * Use this to wait till link becomes active or inactive.
+ */
+static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active,
+				     int delay)
+{
+	int rc;
+
+	/*
+	 * Some controllers might not implement link active reporting. In this
+	 * case, we wait for 1000 ms + any delay requested by the caller.
+	 */
+	if (!pdev->link_active_reporting) {
+		msleep(PCIE_LINK_RETRAIN_TIMEOUT_MS + delay);
+		return true;
+	}
+
+	/*
+	 * PCIe r4.0 sec 6.6.1, a component must enter LTSSM Detect within 20ms,
+	 * after which we should expect an link active if the reset was
+	 * successful. If so, software must wait a minimum 100ms before sending
+	 * configuration requests to devices downstream this port.
+	 *
+	 * If the link fails to activate, either the device was physically
+	 * removed or the link is permanently failed.
+	 */
+	if (active)
+		msleep(20);
+	rc = pcie_wait_for_link_status(pdev, false, active);
+	if (active) {
+		if (rc)
+			rc = pcie_failed_link_retrain(pdev);
+		if (rc)
+			return false;
+
+		msleep(delay);
+		return true;
+	}
+
+	if (rc)
+		return false;
+
+	return true;
+}
+
+/**
+ * pcie_wait_for_link - Wait until link is active or inactive
+ * @pdev: Bridge device
+ * @active: waiting for active or inactive?
+ *
+ * Use this to wait till link becomes active or inactive.
+ */
+bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
+{
+	return pcie_wait_for_link_delay(pdev, active, 100);
+}
+
+/*
+ * Find maximum D3cold delay required by all the devices on the bus.  The
+ * spec says 100 ms, but firmware can lower it and we allow drivers to
+ * increase it as well.
+ *
+ * Called with @pci_bus_sem locked for reading.
+ */
+static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+{
+	const struct pci_dev *pdev;
+	int min_delay = 100;
+	int max_delay = 0;
+
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (pdev->d3cold_delay < min_delay)
+			min_delay = pdev->d3cold_delay;
+		if (pdev->d3cold_delay > max_delay)
+			max_delay = pdev->d3cold_delay;
+	}
+
+	return max(min_delay, max_delay);
+}
+
+/**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
+ * @reset_type: reset type in human-readable form
+ *
+ * Handle necessary delays before access to the devices on the secondary
+ * side of the bridge are permitted after D3cold to D0 transition
+ * or Conventional Reset.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
+ *
+ * Return 0 on success or -ENOTTY if the first device on the secondary bus
+ * failed to become accessible.
+ */
+int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
+{
+	struct pci_dev *child;
+	int delay;
+
+	if (pci_dev_is_disconnected(dev))
+		return 0;
+
+	if (!pci_is_bridge(dev))
+		return 0;
+
+	down_read(&pci_bus_sem);
+
+	/*
+	 * We only deal with devices that are present currently on the bus.
+	 * For any hot-added devices the access delay is handled in pciehp
+	 * board_added(). In case of ACPI hotplug the firmware is expected
+	 * to configure the devices before OS is notified.
+	 */
+	if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+		up_read(&pci_bus_sem);
+		return 0;
+	}
+
+	/* Take d3cold_delay requirements into account */
+	delay = pci_bus_max_d3cold_delay(dev->subordinate);
+	if (!delay) {
+		up_read(&pci_bus_sem);
+		return 0;
+	}
+
+	child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+				 bus_list);
+	up_read(&pci_bus_sem);
+
+	/*
+	 * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+	 * accessing the device after reset (that is 1000 ms + 100 ms).
+	 */
+	if (!pci_is_pcie(dev)) {
+		pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+		msleep(1000 + delay);
+		return 0;
+	}
+
+	/*
+	 * For PCIe downstream and root ports that do not support speeds
+	 * greater than 5 GT/s need to wait minimum 100 ms. For higher
+	 * speeds (gen3) we need to wait first for the data link layer to
+	 * become active.
+	 *
+	 * However, 100 ms is the minimum and the PCIe spec says the
+	 * software must allow at least 1s before it can determine that the
+	 * device that did not respond is a broken device. Also device can
+	 * take longer than that to respond if it indicates so through Request
+	 * Retry Status completions.
+	 *
+	 * Therefore we wait for 100 ms and check for the device presence
+	 * until the timeout expires.
+	 */
+	if (!pcie_downstream_port(dev))
+		return 0;
+
+	if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+		u16 status;
+
+		pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+		msleep(delay);
+
+		if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
+			return 0;
+
+		/*
+		 * If the port supports active link reporting we now check
+		 * whether the link is active and if not bail out early with
+		 * the assumption that the device is not present anymore.
+		 */
+		if (!dev->link_active_reporting)
+			return -ENOTTY;
+
+		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &status);
+		if (!(status & PCI_EXP_LNKSTA_DLLLA))
+			return -ENOTTY;
+
+		return pci_dev_wait(child, reset_type,
+				    PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
+	}
+
+	pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
+		delay);
+	if (!pcie_wait_for_link_delay(dev, true, delay)) {
+		/* Did not train, no need to wait any further */
+		pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+		return -ENOTTY;
+	}
+
+	return pci_dev_wait(child, reset_type,
+			    PCIE_RESET_READY_POLL_MS - delay);
+}
+
+void pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+	ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+
+	/*
+	 * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms.  Double
+	 * this to 2ms to ensure that we meet the minimum requirement.
+	 */
+	msleep(2);
+
+	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+}
+
+void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+	pci_reset_secondary_bus(dev);
+}
+
+/**
+ * pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge.
+ * @dev: Bridge device
+ *
+ * Use the bridge control register to assert reset on the secondary bus.
+ * Devices on the secondary bus are left in power-on state.
+ */
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
+{
+	pcibios_reset_secondary_bus(dev);
+
+	return pci_bridge_wait_for_secondary_bus(dev, "bus reset");
+}
+EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+
+static int pci_parent_bus_reset(struct pci_dev *dev, bool probe)
+{
+	struct pci_dev *pdev;
+
+	if (pci_is_root_bus(dev->bus) || dev->subordinate ||
+	    !dev->bus->self || dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
+		return -ENOTTY;
+
+	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+		if (pdev != dev)
+			return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	return pci_bridge_secondary_bus_reset(dev->bus->self);
+}
+
+static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, bool probe)
+{
+	int rc = -ENOTTY;
+
+	if (!hotplug || !try_module_get(hotplug->owner))
+		return rc;
+
+	if (hotplug->ops->reset_slot)
+		rc = hotplug->ops->reset_slot(hotplug, probe);
+
+	module_put(hotplug->owner);
+
+	return rc;
+}
+
+static int pci_dev_reset_slot_function(struct pci_dev *dev, bool probe)
+{
+	if (dev->multifunction || dev->subordinate || !dev->slot ||
+	    dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
+		return -ENOTTY;
+
+	return pci_reset_hotplug_slot(dev->slot->hotplug, probe);
+}
+
+static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
+{
+	int rc;
+
+	rc = pci_dev_reset_slot_function(dev, probe);
+	if (rc != -ENOTTY)
+		return rc;
+	return pci_parent_bus_reset(dev, probe);
+}
+
+void pci_dev_lock(struct pci_dev *dev)
+{
+	/* block PM suspend, driver probe, etc. */
+	device_lock(&dev->dev);
+	pci_cfg_access_lock(dev);
+}
+EXPORT_SYMBOL_GPL(pci_dev_lock);
+
+/* Return 1 on successful lock, 0 on contention */
+int pci_dev_trylock(struct pci_dev *dev)
+{
+	if (device_trylock(&dev->dev)) {
+		if (pci_cfg_access_trylock(dev))
+			return 1;
+		device_unlock(&dev->dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_dev_trylock);
+
+void pci_dev_unlock(struct pci_dev *dev)
+{
+	pci_cfg_access_unlock(dev);
+	device_unlock(&dev->dev);
+}
+EXPORT_SYMBOL_GPL(pci_dev_unlock);
+
+static void pci_dev_save_and_disable(struct pci_dev *dev)
+{
+	const struct pci_error_handlers *err_handler =
+			dev->driver ? dev->driver->err_handler : NULL;
+
+	/*
+	 * dev->driver->err_handler->reset_prepare() is protected against
+	 * races with ->remove() by the device lock, which must be held by
+	 * the caller.
+	 */
+	if (err_handler && err_handler->reset_prepare)
+		err_handler->reset_prepare(dev);
+
+	/*
+	 * Wake-up device prior to save.  PM registers default to D0 after
+	 * reset and a simple register restore doesn't reliably return
+	 * to a non-D0 state anyway.
+	 */
+	pci_set_power_state(dev, PCI_D0);
+
+	pci_save_state(dev);
+	/*
+	 * Disable the device by clearing the Command register, except for
+	 * INTx-disable which is set.  This not only disables MMIO and I/O port
+	 * BARs, but also prevents the device from being Bus Master, preventing
+	 * DMA from the device including MSI/MSI-X interrupts.  For PCI 2.3
+	 * compliant devices, INTx-disable prevents legacy interrupts.
+	 */
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+}
+
+static void pci_dev_restore(struct pci_dev *dev)
+{
+	const struct pci_error_handlers *err_handler =
+			dev->driver ? dev->driver->err_handler : NULL;
+
+	pci_restore_state(dev);
+
+	/*
+	 * dev->driver->err_handler->reset_done() is protected against
+	 * races with ->remove() by the device lock, which must be held by
+	 * the caller.
+	 */
+	if (err_handler && err_handler->reset_done)
+		err_handler->reset_done(dev);
+}
+
+/* dev->reset_methods[] is a 0-terminated list of indices into this array */
+static const struct pci_reset_fn_method pci_reset_fn_methods[] = {
+	{ },
+	{ pci_dev_specific_reset, .name = "device_specific" },
+	{ pci_dev_acpi_reset, .name = "acpi" },
+	{ pcie_reset_flr, .name = "flr" },
+	{ pci_af_flr, .name = "af_flr" },
+	{ pci_pm_reset, .name = "pm" },
+	{ pci_reset_bus_function, .name = "bus" },
+};
+
+static ssize_t reset_method_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	ssize_t len = 0;
+	int i, m;
+
+	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
+		m = pdev->reset_methods[i];
+		if (!m)
+			break;
+
+		len += sysfs_emit_at(buf, len, "%s%s", len ? " " : "",
+				     pci_reset_fn_methods[m].name);
+	}
+
+	if (len)
+		len += sysfs_emit_at(buf, len, "\n");
+
+	return len;
+}
+
+static int reset_method_lookup(const char *name)
+{
+	int m;
+
+	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
+		if (sysfs_streq(name, pci_reset_fn_methods[m].name))
+			return m;
+	}
+
+	return 0;	/* not found */
+}
+
+static ssize_t reset_method_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	char *options, *name;
+	int m, n;
+	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };
+
+	if (sysfs_streq(buf, "")) {
+		pdev->reset_methods[0] = 0;
+		pci_warn(pdev, "All device reset methods disabled by user");
+		return count;
+	}
+
+	if (sysfs_streq(buf, "default")) {
+		pci_init_reset_methods(pdev);
+		return count;
+	}
+
+	options = kstrndup(buf, count, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	n = 0;
+	while ((name = strsep(&options, " ")) != NULL) {
+		if (sysfs_streq(name, ""))
+			continue;
+
+		name = strim(name);
+
+		m = reset_method_lookup(name);
+		if (!m) {
+			pci_err(pdev, "Invalid reset method '%s'", name);
+			goto error;
+		}
+
+		if (pci_reset_fn_methods[m].reset_fn(pdev, PCI_RESET_PROBE)) {
+			pci_err(pdev, "Unsupported reset method '%s'", name);
+			goto error;
+		}
+
+		if (n == PCI_NUM_RESET_METHODS - 1) {
+			pci_err(pdev, "Too many reset methods\n");
+			goto error;
+		}
+
+		reset_methods[n++] = m;
+	}
+
+	reset_methods[n] = 0;
+
+	/* Warn if dev-specific supported but not highest priority */
+	if (pci_reset_fn_methods[1].reset_fn(pdev, PCI_RESET_PROBE) == 0 &&
+	    reset_methods[0] != 1)
+		pci_warn(pdev, "Device-specific reset disabled/de-prioritized by user");
+	memcpy(pdev->reset_methods, reset_methods, sizeof(pdev->reset_methods));
+	kfree(options);
+	return count;
+
+error:
+	/* Leave previous methods unchanged */
+	kfree(options);
+	return -EINVAL;
+}
+static DEVICE_ATTR_RW(reset_method);
+
+static struct attribute *pci_dev_reset_method_attrs[] = {
+	&dev_attr_reset_method.attr,
+	NULL,
+};
+
+static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
+						    struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pci_reset_supported(pdev))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group pci_dev_reset_method_attr_group = {
+	.attrs = pci_dev_reset_method_attrs,
+	.is_visible = pci_dev_reset_method_attr_is_visible,
+};
+
+/**
+ * __pci_reset_function_locked - reset a PCI device function while holding
+ * the @dev mutex lock.
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * The device function is presumed to be unused and the caller is holding
+ * the device mutex lock when this function is called.
+ *
+ * Resetting the device will make the contents of PCI configuration space
+ * random, so any caller of this must be prepared to reinitialise the
+ * device including MSI, bus mastering, BARs, decoding IO and memory spaces,
+ * etc.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int __pci_reset_function_locked(struct pci_dev *dev)
+{
+	int i, m, rc;
+
+	might_sleep();
+
+	/*
+	 * A reset method returns -ENOTTY if it doesn't support this device and
+	 * we should try the next method.
+	 *
+	 * If it returns 0 (success), we're finished.  If it returns any other
+	 * error, we're also finished: this indicates that further reset
+	 * mechanisms might be broken on the device.
+	 */
+	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
+		m = dev->reset_methods[i];
+		if (!m)
+			return -ENOTTY;
+
+		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_DO_RESET);
+		if (!rc)
+			return 0;
+		if (rc != -ENOTTY)
+			return rc;
+	}
+
+	return -ENOTTY;
+}
+EXPORT_SYMBOL_GPL(__pci_reset_function_locked);
+
+/**
+ * pci_init_reset_methods - check whether device can be safely reset
+ * and store supported reset mechanisms.
+ * @dev: PCI device to check for reset mechanisms
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be in D0-D3hot
+ * state.
+ *
+ * Stores reset mechanisms supported by device in reset_methods byte array
+ * which is a member of struct pci_dev.
+ */
+void pci_init_reset_methods(struct pci_dev *dev)
+{
+	int m, i, rc;
+
+	BUILD_BUG_ON(ARRAY_SIZE(pci_reset_fn_methods) != PCI_NUM_RESET_METHODS);
+
+	might_sleep();
+
+	i = 0;
+	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
+		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_PROBE);
+		if (!rc)
+			dev->reset_methods[i++] = m;
+		else if (rc != -ENOTTY)
+			break;
+	}
+
+	dev->reset_methods[i] = 0;
+}
+
+/**
+ * pci_reset_function - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function_locked() in that it saves and restores device state
+ * over the reset and takes the PCI device lock.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function(struct pci_dev *dev)
+{
+	int rc;
+
+	if (!pci_reset_supported(dev))
+		return -ENOTTY;
+
+	pci_dev_lock(dev);
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+	pci_dev_unlock(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function);
+
+/**
+ * pci_reset_function_locked - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function_locked() in that it saves and restores device state
+ * over the reset.  It also differs from pci_reset_function() in that it
+ * requires the PCI device lock to be held.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function_locked(struct pci_dev *dev)
+{
+	int rc;
+
+	if (!pci_reset_supported(dev))
+		return -ENOTTY;
+
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function_locked);
+
+/**
+ * pci_try_reset_function - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Same as above, except return -EAGAIN if unable to lock device.
+ */
+int pci_try_reset_function(struct pci_dev *dev)
+{
+	int rc;
+
+	if (!pci_reset_supported(dev))
+		return -ENOTTY;
+
+	if (!pci_dev_trylock(dev))
+		return -EAGAIN;
+
+	pci_dev_save_and_disable(dev);
+	rc = __pci_reset_function_locked(dev);
+	pci_dev_restore(dev);
+	pci_dev_unlock(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_try_reset_function);
+
+/* Do any devices on or below this bus prevent a bus reset? */
+static bool pci_bus_resettable(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+
+	if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
+		return false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
+		    (dev->subordinate && !pci_bus_resettable(dev->subordinate)))
+			return false;
+	}
+
+	return true;
+}
+
+/* Lock devices from the top of the tree down */
+static void pci_bus_lock(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pci_dev_lock(dev);
+		if (dev->subordinate)
+			pci_bus_lock(dev->subordinate);
+	}
+}
+
+/* Unlock devices from the bottom of the tree up */
+static void pci_bus_unlock(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->subordinate)
+			pci_bus_unlock(dev->subordinate);
+		pci_dev_unlock(dev);
+	}
+}
+
+/* Return 1 on successful lock, 0 on contention */
+static int pci_bus_trylock(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!pci_dev_trylock(dev))
+			goto unlock;
+		if (dev->subordinate) {
+			if (!pci_bus_trylock(dev->subordinate)) {
+				pci_dev_unlock(dev);
+				goto unlock;
+			}
+		}
+	}
+	return 1;
+
+unlock:
+	list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) {
+		if (dev->subordinate)
+			pci_bus_unlock(dev->subordinate);
+		pci_dev_unlock(dev);
+	}
+	return 0;
+}
+
+/* Do any devices on or below this slot prevent a bus reset? */
+static bool pci_slot_resettable(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	if (slot->bus->self &&
+	    (slot->bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
+		return false;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
+		    (dev->subordinate && !pci_bus_resettable(dev->subordinate)))
+			return false;
+	}
+
+	return true;
+}
+
+/* Lock devices from the top of the tree down */
+static void pci_slot_lock(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		pci_dev_lock(dev);
+		if (dev->subordinate)
+			pci_bus_lock(dev->subordinate);
+	}
+}
+
+/* Unlock devices from the bottom of the tree up */
+static void pci_slot_unlock(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		if (dev->subordinate)
+			pci_bus_unlock(dev->subordinate);
+		pci_dev_unlock(dev);
+	}
+}
+
+/* Return 1 on successful lock, 0 on contention */
+static int pci_slot_trylock(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		if (!pci_dev_trylock(dev))
+			goto unlock;
+		if (dev->subordinate) {
+			if (!pci_bus_trylock(dev->subordinate)) {
+				pci_dev_unlock(dev);
+				goto unlock;
+			}
+		}
+	}
+	return 1;
+
+unlock:
+	list_for_each_entry_continue_reverse(dev,
+					     &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		if (dev->subordinate)
+			pci_bus_unlock(dev->subordinate);
+		pci_dev_unlock(dev);
+	}
+	return 0;
+}
+
+/*
+ * Save and disable devices from the top of the tree down while holding
+ * the @dev mutex lock for the entire tree.
+ */
+static void pci_bus_save_and_disable_locked(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pci_dev_save_and_disable(dev);
+		if (dev->subordinate)
+			pci_bus_save_and_disable_locked(dev->subordinate);
+	}
+}
+
+/*
+ * Restore devices from top of the tree down while holding @dev mutex lock
+ * for the entire tree.  Parent bridges need to be restored before we can
+ * get to subordinate devices.
+ */
+static void pci_bus_restore_locked(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pci_dev_restore(dev);
+		if (dev->subordinate)
+			pci_bus_restore_locked(dev->subordinate);
+	}
+}
+
+/*
+ * Save and disable devices from the top of the tree down while holding
+ * the @dev mutex lock for the entire tree.
+ */
+static void pci_slot_save_and_disable_locked(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		pci_dev_save_and_disable(dev);
+		if (dev->subordinate)
+			pci_bus_save_and_disable_locked(dev->subordinate);
+	}
+}
+
+/*
+ * Restore devices from top of the tree down while holding @dev mutex lock
+ * for the entire tree.  Parent bridges need to be restored before we can
+ * get to subordinate devices.
+ */
+static void pci_slot_restore_locked(struct pci_slot *slot)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
+		if (!dev->slot || dev->slot != slot)
+			continue;
+		pci_dev_restore(dev);
+		if (dev->subordinate)
+			pci_bus_restore_locked(dev->subordinate);
+	}
+}
+
+static int pci_slot_reset(struct pci_slot *slot, bool probe)
+{
+	int rc;
+
+	if (!slot || !pci_slot_resettable(slot))
+		return -ENOTTY;
+
+	if (!probe)
+		pci_slot_lock(slot);
+
+	might_sleep();
+
+	rc = pci_reset_hotplug_slot(slot->hotplug, probe);
+
+	if (!probe)
+		pci_slot_unlock(slot);
+
+	return rc;
+}
+
+/**
+ * pci_probe_reset_slot - probe whether a PCI slot can be reset
+ * @slot: PCI slot to probe
+ *
+ * Return 0 if slot can be reset, negative if a slot reset is not supported.
+ */
+int pci_probe_reset_slot(struct pci_slot *slot)
+{
+	return pci_slot_reset(slot, PCI_RESET_PROBE);
+}
+EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
+
+/**
+ * __pci_reset_slot - Try to reset a PCI slot
+ * @slot: PCI slot to reset
+ *
+ * A PCI bus may host multiple slots, each slot may support a reset mechanism
+ * independent of other slots.  For instance, some slots may support slot power
+ * control.  In the case of a 1:1 bus to slot architecture, this function may
+ * wrap the bus reset to avoid spurious slot related events such as hotplug.
+ * Generally a slot reset should be attempted before a bus reset.  All of the
+ * function of the slot and any subordinate buses behind the slot are reset
+ * through this function.  PCI config space of all devices in the slot and
+ * behind the slot is saved before and restored after reset.
+ *
+ * Same as above except return -EAGAIN if the slot cannot be locked
+ */
+static int __pci_reset_slot(struct pci_slot *slot)
+{
+	int rc;
+
+	rc = pci_slot_reset(slot, PCI_RESET_PROBE);
+	if (rc)
+		return rc;
+
+	if (pci_slot_trylock(slot)) {
+		pci_slot_save_and_disable_locked(slot);
+		might_sleep();
+		rc = pci_reset_hotplug_slot(slot->hotplug, PCI_RESET_DO_RESET);
+		pci_slot_restore_locked(slot);
+		pci_slot_unlock(slot);
+	} else
+		rc = -EAGAIN;
+
+	return rc;
+}
+
+static int pci_bus_reset(struct pci_bus *bus, bool probe)
+{
+	int ret;
+
+	if (!bus->self || !pci_bus_resettable(bus))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	pci_bus_lock(bus);
+
+	might_sleep();
+
+	ret = pci_bridge_secondary_bus_reset(bus->self);
+
+	pci_bus_unlock(bus);
+
+	return ret;
+}
+
+/**
+ * pci_bus_error_reset - reset the bridge's subordinate bus
+ * @bridge: The parent device that connects to the bus to reset
+ *
+ * This function will first try to reset the slots on this bus if the method is
+ * available. If slot reset fails or is not available, this will fall back to a
+ * secondary bus reset.
+ */
+int pci_bus_error_reset(struct pci_dev *bridge)
+{
+	struct pci_bus *bus = bridge->subordinate;
+	struct pci_slot *slot;
+
+	if (!bus)
+		return -ENOTTY;
+
+	mutex_lock(&pci_slot_mutex);
+	if (list_empty(&bus->slots))
+		goto bus_reset;
+
+	list_for_each_entry(slot, &bus->slots, list)
+		if (pci_probe_reset_slot(slot))
+			goto bus_reset;
+
+	list_for_each_entry(slot, &bus->slots, list)
+		if (pci_slot_reset(slot, PCI_RESET_DO_RESET))
+			goto bus_reset;
+
+	mutex_unlock(&pci_slot_mutex);
+	return 0;
+bus_reset:
+	mutex_unlock(&pci_slot_mutex);
+	return pci_bus_reset(bridge->subordinate, PCI_RESET_DO_RESET);
+}
+
+/**
+ * pci_probe_reset_bus - probe whether a PCI bus can be reset
+ * @bus: PCI bus to probe
+ *
+ * Return 0 if bus can be reset, negative if a bus reset is not supported.
+ */
+int pci_probe_reset_bus(struct pci_bus *bus)
+{
+	return pci_bus_reset(bus, PCI_RESET_PROBE);
+}
+EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
+
+/**
+ * __pci_reset_bus - Try to reset a PCI bus
+ * @bus: top level PCI bus to reset
+ *
+ * Same as above except return -EAGAIN if the bus cannot be locked
+ */
+static int __pci_reset_bus(struct pci_bus *bus)
+{
+	int rc;
+
+	rc = pci_bus_reset(bus, PCI_RESET_PROBE);
+	if (rc)
+		return rc;
+
+	if (pci_bus_trylock(bus)) {
+		pci_bus_save_and_disable_locked(bus);
+		might_sleep();
+		rc = pci_bridge_secondary_bus_reset(bus->self);
+		pci_bus_restore_locked(bus);
+		pci_bus_unlock(bus);
+	} else
+		rc = -EAGAIN;
+
+	return rc;
+}
+
+/**
+ * pci_reset_bus - Try to reset a PCI bus
+ * @pdev: top level PCI device to reset via slot/bus
+ *
+ * Same as above except return -EAGAIN if the bus cannot be locked
+ */
+int pci_reset_bus(struct pci_dev *pdev)
+{
+	return (!pci_probe_reset_slot(pdev->slot)) ?
+	    __pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus);
+}
+EXPORT_SYMBOL_GPL(pci_reset_bus);
+
+/**
+ * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
+ * @dev: PCI device to query
+ *
+ * Returns mmrbc: maximum designed memory read count in bytes or
+ * appropriate error value.
+ */
+int pcix_get_max_mmrbc(struct pci_dev *dev)
+{
+	int cap;
+	u32 stat;
+
+	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!cap)
+		return -EINVAL;
+
+	if (pci_read_config_dword(dev, cap + PCI_X_STATUS, &stat))
+		return -EINVAL;
+
+	return 512 << ((stat & PCI_X_STATUS_MAX_READ) >> 21);
+}
+EXPORT_SYMBOL(pcix_get_max_mmrbc);
+
+/**
+ * pcix_get_mmrbc - get PCI-X maximum memory read byte count
+ * @dev: PCI device to query
+ *
+ * Returns mmrbc: maximum memory read count in bytes or appropriate error
+ * value.
+ */
+int pcix_get_mmrbc(struct pci_dev *dev)
+{
+	int cap;
+	u16 cmd;
+
+	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!cap)
+		return -EINVAL;
+
+	if (pci_read_config_word(dev, cap + PCI_X_CMD, &cmd))
+		return -EINVAL;
+
+	return 512 << ((cmd & PCI_X_CMD_MAX_READ) >> 2);
+}
+EXPORT_SYMBOL(pcix_get_mmrbc);
+
+/**
+ * pcix_set_mmrbc - set PCI-X maximum memory read byte count
+ * @dev: PCI device to query
+ * @mmrbc: maximum memory read count in bytes
+ *    valid values are 512, 1024, 2048, 4096
+ *
+ * If possible sets maximum memory read byte count, some bridges have errata
+ * that prevent this.
+ */
+int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc)
+{
+	int cap;
+	u32 stat, v, o;
+	u16 cmd;
+
+	if (mmrbc < 512 || mmrbc > 4096 || !is_power_of_2(mmrbc))
+		return -EINVAL;
+
+	v = ffs(mmrbc) - 10;
+
+	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!cap)
+		return -EINVAL;
+
+	if (pci_read_config_dword(dev, cap + PCI_X_STATUS, &stat))
+		return -EINVAL;
+
+	if (v > (stat & PCI_X_STATUS_MAX_READ) >> 21)
+		return -E2BIG;
+
+	if (pci_read_config_word(dev, cap + PCI_X_CMD, &cmd))
+		return -EINVAL;
+
+	o = (cmd & PCI_X_CMD_MAX_READ) >> 2;
+	if (o != v) {
+		if (v > o && (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_MMRBC))
+			return -EIO;
+
+		cmd &= ~PCI_X_CMD_MAX_READ;
+		cmd |= v << 2;
+		if (pci_write_config_word(dev, cap + PCI_X_CMD, cmd))
+			return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(pcix_set_mmrbc);
+
+/**
+ * pcie_get_readrq - get PCI Express read request size
+ * @dev: PCI device to query
+ *
+ * Returns maximum memory read request in bytes or appropriate error value.
+ */
+int pcie_get_readrq(struct pci_dev *dev)
+{
+	u16 ctl;
+
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
+
+	return 128 << ((ctl & PCI_EXP_DEVCTL_READRQ) >> 12);
+}
+EXPORT_SYMBOL(pcie_get_readrq);
+
+/**
+ * pcie_set_readrq - set PCI Express maximum memory read request
+ * @dev: PCI device to query
+ * @rq: maximum memory read count in bytes
+ *    valid values are 128, 256, 512, 1024, 2048, 4096
+ *
+ * If possible sets maximum memory read request in bytes
+ */
+int pcie_set_readrq(struct pci_dev *dev, int rq)
+{
+	u16 v;
+	int ret;
+	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+
+	if (rq < 128 || rq > 4096 || !is_power_of_2(rq))
+		return -EINVAL;
+
+	/*
+	 * If using the "performance" PCIe config, we clamp the read rq
+	 * size to the max packet size to keep the host bridge from
+	 * generating requests larger than we can cope with.
+	 */
+	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
+		int mps = pcie_get_mps(dev);
+
+		if (mps < rq)
+			rq = mps;
+	}
+
+	v = (ffs(rq) - 8) << 12;
+
+	if (bridge->no_inc_mrrs) {
+		int max_mrrs = pcie_get_readrq(dev);
+
+		if (rq > max_mrrs) {
+			pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
+			return -EINVAL;
+		}
+	}
+
+	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+						  PCI_EXP_DEVCTL_READRQ, v);
+
+	return pcibios_err_to_errno(ret);
+}
+EXPORT_SYMBOL(pcie_set_readrq);
+
+/**
+ * pcie_get_mps - get PCI Express maximum payload size
+ * @dev: PCI device to query
+ *
+ * Returns maximum payload size in bytes
+ */
+int pcie_get_mps(struct pci_dev *dev)
+{
+	u16 ctl;
+
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
+
+	return 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
+}
+EXPORT_SYMBOL(pcie_get_mps);
+
+/**
+ * pcie_set_mps - set PCI Express maximum payload size
+ * @dev: PCI device to query
+ * @mps: maximum payload size in bytes
+ *    valid values are 128, 256, 512, 1024, 2048, 4096
+ *
+ * If possible sets maximum payload size
+ */
+int pcie_set_mps(struct pci_dev *dev, int mps)
+{
+	u16 v;
+	int ret;
+
+	if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
+		return -EINVAL;
+
+	v = ffs(mps) - 8;
+	if (v > dev->pcie_mpss)
+		return -EINVAL;
+	v <<= 5;
+
+	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+						  PCI_EXP_DEVCTL_PAYLOAD, v);
+
+	return pcibios_err_to_errno(ret);
+}
+EXPORT_SYMBOL(pcie_set_mps);
+
+/**
+ * pcie_bandwidth_available - determine minimum link settings of a PCIe
+ *			      device and its bandwidth limitation
+ * @dev: PCI device to query
+ * @limiting_dev: storage for device causing the bandwidth limitation
+ * @speed: storage for speed of limiting device
+ * @width: storage for width of limiting device
+ *
+ * Walk up the PCI device chain and find the point where the minimum
+ * bandwidth is available.  Return the bandwidth available there and (if
+ * limiting_dev, speed, and width pointers are supplied) information about
+ * that point.  The bandwidth returned is in Mb/s, i.e., megabits/second of
+ * raw bandwidth.
+ */
+u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
+			     enum pci_bus_speed *speed,
+			     enum pcie_link_width *width)
+{
+	u16 lnksta;
+	enum pci_bus_speed next_speed;
+	enum pcie_link_width next_width;
+	u32 bw, next_bw;
+
+	if (speed)
+		*speed = PCI_SPEED_UNKNOWN;
+	if (width)
+		*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	bw = 0;
+
+	while (dev) {
+		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
+
+		next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS];
+		next_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
+
+		next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed);
+
+		/* Check if current device limits the total bandwidth */
+		if (!bw || next_bw <= bw) {
+			bw = next_bw;
+
+			if (limiting_dev)
+				*limiting_dev = dev;
+			if (speed)
+				*speed = next_speed;
+			if (width)
+				*width = next_width;
+		}
+
+		dev = pci_upstream_bridge(dev);
+	}
+
+	return bw;
+}
+EXPORT_SYMBOL(pcie_bandwidth_available);
+
+/**
+ * pcie_get_speed_cap - query for the PCI device's link speed capability
+ * @dev: PCI device to query
+ *
+ * Query the PCI device speed capability.  Return the maximum link speed
+ * supported by the device.
+ */
+enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
+{
+	u32 lnkcap2, lnkcap;
+
+	/*
+	 * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18.  The
+	 * implementation note there recommends using the Supported Link
+	 * Speeds Vector in Link Capabilities 2 when supported.
+	 *
+	 * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software
+	 * should use the Supported Link Speeds field in Link Capabilities,
+	 * where only 2.5 GT/s and 5.0 GT/s speeds were defined.
+	 */
+	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
+
+	/* PCIe r3.0-compliant */
+	if (lnkcap2)
+		return PCIE_LNKCAP2_SLS2SPEED(lnkcap2);
+
+	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
+	if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB)
+		return PCIE_SPEED_5_0GT;
+	else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB)
+		return PCIE_SPEED_2_5GT;
+
+	return PCI_SPEED_UNKNOWN;
+}
+EXPORT_SYMBOL(pcie_get_speed_cap);
+
+/**
+ * pcie_get_width_cap - query for the PCI device's link width capability
+ * @dev: PCI device to query
+ *
+ * Query the PCI device width capability.  Return the maximum link width
+ * supported by the device.
+ */
+enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev)
+{
+	u32 lnkcap;
+
+	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
+	if (lnkcap)
+		return FIELD_GET(PCI_EXP_LNKCAP_MLW, lnkcap);
+
+	return PCIE_LNK_WIDTH_UNKNOWN;
+}
+EXPORT_SYMBOL(pcie_get_width_cap);
+
+/**
+ * pcie_bandwidth_capable - calculate a PCI device's link bandwidth capability
+ * @dev: PCI device
+ * @speed: storage for link speed
+ * @width: storage for link width
+ *
+ * Calculate a PCI device's link bandwidth by querying for its link speed
+ * and width, multiplying them, and applying encoding overhead.  The result
+ * is in Mb/s, i.e., megabits/second of raw bandwidth.
+ */
+u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
+			   enum pcie_link_width *width)
+{
+	*speed = pcie_get_speed_cap(dev);
+	*width = pcie_get_width_cap(dev);
+
+	if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN)
+		return 0;
+
+	return *width * PCIE_SPEED2MBS_ENC(*speed);
+}
+
+/**
+ * __pcie_print_link_status - Report the PCI device's link speed and width
+ * @dev: PCI device to query
+ * @verbose: Print info even when enough bandwidth is available
+ *
+ * If the available bandwidth at the device is less than the device is
+ * capable of, report the device's maximum possible bandwidth and the
+ * upstream link that limits its performance.  If @verbose, always print
+ * the available bandwidth, even if the device isn't constrained.
+ */
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose)
+{
+	enum pcie_link_width width, width_cap;
+	enum pci_bus_speed speed, speed_cap;
+	struct pci_dev *limiting_dev = NULL;
+	u32 bw_avail, bw_cap;
+
+	bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap);
+	bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
+
+	if (bw_avail >= bw_cap && verbose)
+		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
+			 bw_cap / 1000, bw_cap % 1000,
+			 pci_speed_string(speed_cap), width_cap);
+	else if (bw_avail < bw_cap)
+		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
+			 bw_avail / 1000, bw_avail % 1000,
+			 pci_speed_string(speed), width,
+			 limiting_dev ? pci_name(limiting_dev) : "<unknown>",
+			 bw_cap / 1000, bw_cap % 1000,
+			 pci_speed_string(speed_cap), width_cap);
+}
+
+/**
+ * pcie_print_link_status - Report the PCI device's link speed and width
+ * @dev: PCI device to query
+ *
+ * Report the available bandwidth at the device.
+ */
+void pcie_print_link_status(struct pci_dev *dev)
+{
+	__pcie_print_link_status(dev, true);
+}
+EXPORT_SYMBOL(pcie_print_link_status);
+
+/**
+ * pci_select_bars - Make BAR mask from the type of resource
+ * @dev: the PCI device for which BAR mask is made
+ * @flags: resource type mask to be selected
+ *
+ * This helper routine makes bar mask from the type of resource.
+ */
+int pci_select_bars(struct pci_dev *dev, unsigned long flags)
+{
+	int i, bars = 0;
+	for (i = 0; i < PCI_NUM_RESOURCES; i++)
+		if (pci_resource_flags(dev, i) & flags)
+			bars |= (1 << i);
+	return bars;
+}
+EXPORT_SYMBOL(pci_select_bars);
+
+/* Some architectures require additional programming to enable VGA */
+static arch_set_vga_state_t arch_set_vga_state;
+
+void __init pci_register_set_vga_state(arch_set_vga_state_t func)
+{
+	arch_set_vga_state = func;	/* NULL disables */
+}
+
+static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
+				  unsigned int command_bits, u32 flags)
+{
+	if (arch_set_vga_state)
+		return arch_set_vga_state(dev, decode, command_bits,
+						flags);
+	return 0;
+}
+
+/**
+ * pci_set_vga_state - set VGA decode state on device and parents if requested
+ * @dev: the PCI device
+ * @decode: true = enable decoding, false = disable decoding
+ * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
+ * @flags: traverse ancestors and change bridges
+ * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
+ */
+int pci_set_vga_state(struct pci_dev *dev, bool decode,
+		      unsigned int command_bits, u32 flags)
+{
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+	int rc;
+
+	WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) && (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
+
+	/* ARCH specific VGA enables */
+	rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);
+	if (rc)
+		return rc;
+
+	if (flags & PCI_VGA_STATE_CHANGE_DECODES) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (decode)
+			cmd |= command_bits;
+		else
+			cmd &= ~command_bits;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+
+	if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
+		return 0;
+
+	bus = dev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+					     &cmd);
+			if (decode)
+				cmd |= PCI_BRIDGE_CTL_VGA;
+			else
+				cmd &= ~PCI_BRIDGE_CTL_VGA;
+			pci_write_config_word(bridge, PCI_BRIDGE_CONTROL,
+					      cmd);
+		}
+		bus = bus->parent;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+bool pci_pr3_present(struct pci_dev *pdev)
+{
+	struct acpi_device *adev;
+
+	if (acpi_disabled)
+		return false;
+
+	adev = ACPI_COMPANION(&pdev->dev);
+	if (!adev)
+		return false;
+
+	return adev->power.flags.power_resources &&
+		acpi_has_method(adev->handle, "_PR3");
+}
+EXPORT_SYMBOL_GPL(pci_pr3_present);
+#endif
+
+/**
+ * pci_add_dma_alias - Add a DMA devfn alias for a device
+ * @dev: the PCI device for which alias is added
+ * @devfn_from: alias slot and function
+ * @nr_devfns: number of subsequent devfns to alias
+ *
+ * This helper encodes an 8-bit devfn as a bit number in dma_alias_mask
+ * which is used to program permissible bus-devfn source addresses for DMA
+ * requests in an IOMMU.  These aliases factor into IOMMU group creation
+ * and are useful for devices generating DMA requests beyond or different
+ * from their logical bus-devfn.  Examples include device quirks where the
+ * device simply uses the wrong devfn, as well as non-transparent bridges
+ * where the alias may be a proxy for devices in another domain.
+ *
+ * IOMMU group creation is performed during device discovery or addition,
+ * prior to any potential DMA mapping and therefore prior to driver probing
+ * (especially for userspace assigned devices where IOMMU group definition
+ * cannot be left as a userspace activity).  DMA aliases should therefore
+ * be configured via quirks, such as the PCI fixup header quirk.
+ */
+void pci_add_dma_alias(struct pci_dev *dev, u8 devfn_from,
+		       unsigned int nr_devfns)
+{
+	int devfn_to;
+
+	nr_devfns = min(nr_devfns, (unsigned int)MAX_NR_DEVFNS - devfn_from);
+	devfn_to = devfn_from + nr_devfns - 1;
+
+	if (!dev->dma_alias_mask)
+		dev->dma_alias_mask = bitmap_zalloc(MAX_NR_DEVFNS, GFP_KERNEL);
+	if (!dev->dma_alias_mask) {
+		pci_warn(dev, "Unable to allocate DMA alias mask\n");
+		return;
+	}
+
+	bitmap_set(dev->dma_alias_mask, devfn_from, nr_devfns);
+
+	if (nr_devfns == 1)
+		pci_info(dev, "Enabling fixed DMA alias to %02x.%d\n",
+				PCI_SLOT(devfn_from), PCI_FUNC(devfn_from));
+	else if (nr_devfns > 1)
+		pci_info(dev, "Enabling fixed DMA alias for devfn range from %02x.%d to %02x.%d\n",
+				PCI_SLOT(devfn_from), PCI_FUNC(devfn_from),
+				PCI_SLOT(devfn_to), PCI_FUNC(devfn_to));
+}
+
+bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2)
+{
+	return (dev1->dma_alias_mask &&
+		test_bit(dev2->devfn, dev1->dma_alias_mask)) ||
+	       (dev2->dma_alias_mask &&
+		test_bit(dev1->devfn, dev2->dma_alias_mask)) ||
+	       pci_real_dma_dev(dev1) == dev2 ||
+	       pci_real_dma_dev(dev2) == dev1;
+}
+
+bool pci_device_is_present(struct pci_dev *pdev)
+{
+	u32 v;
+
+	/* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */
+	pdev = pci_physfn(pdev);
+	if (pci_dev_is_disconnected(pdev))
+		return false;
+	return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
+}
+EXPORT_SYMBOL_GPL(pci_device_is_present);
+
+void pci_ignore_hotplug(struct pci_dev *dev)
+{
+	struct pci_dev *bridge = dev->bus->self;
+
+	dev->ignore_hotplug = 1;
+	/* Propagate the "ignore hotplug" setting to the parent bridge. */
+	if (bridge)
+		bridge->ignore_hotplug = 1;
+}
+EXPORT_SYMBOL_GPL(pci_ignore_hotplug);
+
+/**
+ * pci_real_dma_dev - Get PCI DMA device for PCI device
+ * @dev: the PCI device that may have a PCI DMA alias
+ *
+ * Permits the platform to provide architecture-specific functionality to
+ * devices needing to alias DMA to another PCI device on another PCI bus. If
+ * the PCI device is on the same bus, it is recommended to use
+ * pci_add_dma_alias(). This is the default implementation. Architecture
+ * implementations can override this.
+ */
+struct pci_dev __weak *pci_real_dma_dev(struct pci_dev *dev)
+{
+	return dev;
+}
+
+resource_size_t __weak pcibios_default_alignment(void)
+{
+	return 0;
+}
+
+/*
+ * Arches that don't want to expose struct resource to userland as-is in
+ * sysfs and /proc can implement their own pci_resource_to_user().
+ */
+void __weak pci_resource_to_user(const struct pci_dev *dev, int bar,
+				 const struct resource *rsrc,
+				 resource_size_t *start, resource_size_t *end)
+{
+	*start = rsrc->start;
+	*end = rsrc->end;
+}
+
+static char *resource_alignment_param;
+static DEFINE_SPINLOCK(resource_alignment_lock);
+
+/**
+ * pci_specified_resource_alignment - get resource alignment specified by user.
+ * @dev: the PCI device to get
+ * @resize: whether or not to change resources' size when reassigning alignment
+ *
+ * RETURNS: Resource alignment if it is specified.
+ *          Zero if it is not specified.
+ */
+static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
+							bool *resize)
+{
+	int align_order, count;
+	resource_size_t align = pcibios_default_alignment();
+	const char *p;
+	int ret;
+
+	spin_lock(&resource_alignment_lock);
+	p = resource_alignment_param;
+	if (!p || !*p)
+		goto out;
+	if (pci_has_flag(PCI_PROBE_ONLY)) {
+		align = 0;
+		pr_info_once("PCI: Ignoring requested alignments (PCI_PROBE_ONLY)\n");
+		goto out;
+	}
+
+	while (*p) {
+		count = 0;
+		if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
+		    p[count] == '@') {
+			p += count + 1;
+			if (align_order > 63) {
+				pr_err("PCI: Invalid requested alignment (order %d)\n",
+				       align_order);
+				align_order = PAGE_SHIFT;
+			}
+		} else {
+			align_order = PAGE_SHIFT;
+		}
+
+		ret = pci_dev_str_match(dev, p, &p);
+		if (ret == 1) {
+			*resize = true;
+			align = 1ULL << align_order;
+			break;
+		} else if (ret < 0) {
+			pr_err("PCI: Can't parse resource_alignment parameter: %s\n",
+			       p);
+			break;
+		}
+
+		if (*p != ';' && *p != ',') {
+			/* End of param or invalid format */
+			break;
+		}
+		p++;
+	}
+out:
+	spin_unlock(&resource_alignment_lock);
+	return align;
+}
+
+static void pci_request_resource_alignment(struct pci_dev *dev, int bar,
+					   resource_size_t align, bool resize)
+{
+	struct resource *r = &dev->resource[bar];
+	resource_size_t size;
+
+	if (!(r->flags & IORESOURCE_MEM))
+		return;
+
+	if (r->flags & IORESOURCE_PCI_FIXED) {
+		pci_info(dev, "BAR%d %pR: ignoring requested alignment %#llx\n",
+			 bar, r, (unsigned long long)align);
+		return;
+	}
+
+	size = resource_size(r);
+	if (size >= align)
+		return;
+
+	/*
+	 * Increase the alignment of the resource.  There are two ways we
+	 * can do this:
+	 *
+	 * 1) Increase the size of the resource.  BARs are aligned on their
+	 *    size, so when we reallocate space for this resource, we'll
+	 *    allocate it with the larger alignment.  This also prevents
+	 *    assignment of any other BARs inside the alignment region, so
+	 *    if we're requesting page alignment, this means no other BARs
+	 *    will share the page.
+	 *
+	 *    The disadvantage is that this makes the resource larger than
+	 *    the hardware BAR, which may break drivers that compute things
+	 *    based on the resource size, e.g., to find registers at a
+	 *    fixed offset before the end of the BAR.
+	 *
+	 * 2) Retain the resource size, but use IORESOURCE_STARTALIGN and
+	 *    set r->start to the desired alignment.  By itself this
+	 *    doesn't prevent other BARs being put inside the alignment
+	 *    region, but if we realign *every* resource of every device in
+	 *    the system, none of them will share an alignment region.
+	 *
+	 * When the user has requested alignment for only some devices via
+	 * the "pci=resource_alignment" argument, "resize" is true and we
+	 * use the first method.  Otherwise we assume we're aligning all
+	 * devices and we use the second.
+	 */
+
+	pci_info(dev, "BAR%d %pR: requesting alignment to %#llx\n",
+		 bar, r, (unsigned long long)align);
+
+	if (resize) {
+		r->start = 0;
+		r->end = align - 1;
+	} else {
+		r->flags &= ~IORESOURCE_SIZEALIGN;
+		r->flags |= IORESOURCE_STARTALIGN;
+		r->start = align;
+		r->end = r->start + size - 1;
+	}
+	r->flags |= IORESOURCE_UNSET;
+}
+
+/*
+ * This function disables memory decoding and releases memory resources
+ * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to the device.
+ */
+void pci_reassigndev_resource_alignment(struct pci_dev *dev)
+{
+	int i;
+	struct resource *r;
+	resource_size_t align;
+	u16 command;
+	bool resize = false;
+
+	/*
+	 * VF BARs are read-only zero according to SR-IOV spec r1.1, sec
+	 * 3.4.1.11.  Their resources are allocated from the space
+	 * described by the VF BARx register in the PF's SR-IOV capability.
+	 * We can't influence their alignment here.
+	 */
+	if (dev->is_virtfn)
+		return;
+
+	/* check if specified PCI is target device to reassign */
+	align = pci_specified_resource_alignment(dev, &resize);
+	if (!align)
+		return;
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+		pci_warn(dev, "Can't reassign resources to host bridge\n");
+		return;
+	}
+
+	pci_read_config_word(dev, PCI_COMMAND, &command);
+	command &= ~PCI_COMMAND_MEMORY;
+	pci_write_config_word(dev, PCI_COMMAND, command);
+
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+		pci_request_resource_alignment(dev, i, align, resize);
+
+	/*
+	 * Need to disable bridge's resource window,
+	 * to enable the kernel to reassign new resource
+	 * window later on.
+	 */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+			r = &dev->resource[i];
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
+			r->flags |= IORESOURCE_UNSET;
+			r->end = resource_size(r) - 1;
+			r->start = 0;
+		}
+		pci_disable_bridge_window(dev);
+	}
+}
+
+static ssize_t resource_alignment_show(const struct bus_type *bus, char *buf)
+{
+	size_t count = 0;
+
+	spin_lock(&resource_alignment_lock);
+	if (resource_alignment_param)
+		count = sysfs_emit(buf, "%s\n", resource_alignment_param);
+	spin_unlock(&resource_alignment_lock);
+
+	return count;
+}
+
+static ssize_t resource_alignment_store(const struct bus_type *bus,
+					const char *buf, size_t count)
+{
+	char *param, *old, *end;
+
+	if (count >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	param = kstrndup(buf, count, GFP_KERNEL);
+	if (!param)
+		return -ENOMEM;
+
+	end = strchr(param, '\n');
+	if (end)
+		*end = '\0';
+
+	spin_lock(&resource_alignment_lock);
+	old = resource_alignment_param;
+	if (strlen(param)) {
+		resource_alignment_param = param;
+	} else {
+		kfree(param);
+		resource_alignment_param = NULL;
+	}
+	spin_unlock(&resource_alignment_lock);
+
+	kfree(old);
+
+	return count;
+}
+
+static BUS_ATTR_RW(resource_alignment);
+
+static int __init pci_resource_alignment_sysfs_init(void)
+{
+	return bus_create_file(&pci_bus_type,
+					&bus_attr_resource_alignment);
+}
+late_initcall(pci_resource_alignment_sysfs_init);
+
+static void pci_no_domains(void)
+{
+#ifdef CONFIG_PCI_DOMAINS
+	pci_domains_supported = 0;
+#endif
+}
+
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+static DEFINE_IDA(pci_domain_nr_static_ida);
+static DEFINE_IDA(pci_domain_nr_dynamic_ida);
+
+static void of_pci_reserve_static_domain_nr(void)
+{
+	struct device_node *np;
+	int domain_nr;
+
+	for_each_node_by_type(np, "pci") {
+		domain_nr = of_get_pci_domain_nr(np);
+		if (domain_nr < 0)
+			continue;
+		/*
+		 * Permanently allocate domain_nr in dynamic_ida
+		 * to prevent it from dynamic allocation.
+		 */
+		ida_alloc_range(&pci_domain_nr_dynamic_ida,
+				domain_nr, domain_nr, GFP_KERNEL);
+	}
+}
+
+static int of_pci_bus_find_domain_nr(struct device *parent)
+{
+	static bool static_domains_reserved = false;
+	int domain_nr;
+
+	/* On the first call scan device tree for static allocations. */
+	if (!static_domains_reserved) {
+		of_pci_reserve_static_domain_nr();
+		static_domains_reserved = true;
+	}
+
+	if (parent) {
+		/*
+		 * If domain is in DT, allocate it in static IDA.  This
+		 * prevents duplicate static allocations in case of errors
+		 * in DT.
+		 */
+		domain_nr = of_get_pci_domain_nr(parent->of_node);
+		if (domain_nr >= 0)
+			return ida_alloc_range(&pci_domain_nr_static_ida,
+					       domain_nr, domain_nr,
+					       GFP_KERNEL);
+	}
+
+	/*
+	 * If domain was not specified in DT, choose a free ID from dynamic
+	 * allocations. All domain numbers from DT are permanently in
+	 * dynamic allocations to prevent assigning them to other DT nodes
+	 * without static domain.
+	 */
+	return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
+}
+
+static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (bus->domain_nr < 0)
+		return;
+
+	/* Release domain from IDA where it was allocated. */
+	if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
+		ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
+	else
+		ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
+}
+
+int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
+			       acpi_pci_bus_find_domain_nr(bus);
+}
+
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (!acpi_disabled)
+		return;
+	of_pci_bus_release_domain_nr(bus, parent);
+}
+#endif
+
+/**
+ * pci_ext_cfg_avail - can we access extended PCI config space?
+ *
+ * Returns 1 if we can access PCI extended config space (offsets
+ * greater than 0xff). This is the default implementation. Architecture
+ * implementations can override this.
+ */
+int __weak pci_ext_cfg_avail(void)
+{
+	return 1;
+}
+
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
+static int __init pci_setup(char *str)
+{
+	while (str) {
+		char *k = strchr(str, ',');
+		if (k)
+			*k++ = 0;
+		if (*str && (str = pcibios_setup(str)) && *str) {
+			if (!strcmp(str, "nomsi")) {
+				pci_no_msi();
+			} else if (!strncmp(str, "noats", 5)) {
+				pr_info("PCIe: ATS is disabled\n");
+				pcie_ats_disabled = true;
+			} else if (!strcmp(str, "noaer")) {
+				pci_no_aer();
+			} else if (!strcmp(str, "earlydump")) {
+				pci_early_dump = true;
+			} else if (!strncmp(str, "realloc=", 8)) {
+				pci_realloc_get_opt(str + 8);
+			} else if (!strncmp(str, "realloc", 7)) {
+				pci_realloc_get_opt("on");
+			} else if (!strcmp(str, "nodomains")) {
+				pci_no_domains();
+			} else if (!strncmp(str, "noari", 5)) {
+				pcie_ari_disabled = true;
+			} else if (!strncmp(str, "cbiosize=", 9)) {
+				pci_cardbus_io_size = memparse(str + 9, &str);
+			} else if (!strncmp(str, "cbmemsize=", 10)) {
+				pci_cardbus_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "resource_alignment=", 19)) {
+				resource_alignment_param = str + 19;
+			} else if (!strncmp(str, "ecrc=", 5)) {
+				pcie_ecrc_get_policy(str + 5);
+			} else if (!strncmp(str, "hpiosize=", 9)) {
+				pci_hotplug_io_size = memparse(str + 9, &str);
+			} else if (!strncmp(str, "hpmmiosize=", 11)) {
+				pci_hotplug_mmio_size = memparse(str + 11, &str);
+			} else if (!strncmp(str, "hpmmioprefsize=", 15)) {
+				pci_hotplug_mmio_pref_size = memparse(str + 15, &str);
+			} else if (!strncmp(str, "hpmemsize=", 10)) {
+				pci_hotplug_mmio_size = memparse(str + 10, &str);
+				pci_hotplug_mmio_pref_size = pci_hotplug_mmio_size;
+			} else if (!strncmp(str, "hpbussize=", 10)) {
+				pci_hotplug_bus_size =
+					simple_strtoul(str + 10, &str, 0);
+				if (pci_hotplug_bus_size > 0xff)
+					pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE;
+			} else if (!strncmp(str, "pcie_bus_tune_off", 17)) {
+				pcie_bus_config = PCIE_BUS_TUNE_OFF;
+			} else if (!strncmp(str, "pcie_bus_safe", 13)) {
+				pcie_bus_config = PCIE_BUS_SAFE;
+			} else if (!strncmp(str, "pcie_bus_perf", 13)) {
+				pcie_bus_config = PCIE_BUS_PERFORMANCE;
+			} else if (!strncmp(str, "pcie_bus_peer2peer", 18)) {
+				pcie_bus_config = PCIE_BUS_PEER2PEER;
+			} else if (!strncmp(str, "pcie_scan_all", 13)) {
+				pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
+			} else if (!strncmp(str, "disable_acs_redir=", 18)) {
+				disable_acs_redir_param = str + 18;
+			} else {
+				pr_err("PCI: Unknown option `%s'\n", str);
+			}
+		}
+		str = k;
+	}
+	return 0;
+}
+early_param("pci", pci_setup);
+
+/*
+ * 'resource_alignment_param' and 'disable_acs_redir_param' are initialized
+ * in pci_setup(), above, to point to data in the __initdata section which
+ * will be freed after the init sequence is complete. We can't allocate memory
+ * in pci_setup() because some architectures do not have any memory allocation
+ * service available during an early_param() call. So we allocate memory and
+ * copy the variable here before the init section is freed.
+ *
+ */
+static int __init pci_realloc_setup_params(void)
+{
+	resource_alignment_param = kstrdup(resource_alignment_param,
+					   GFP_KERNEL);
+	disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
+
+	return 0;
+}
+pure_initcall(pci_realloc_setup_params);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
new file mode 100644
index 000000000..5484048f4
--- /dev/null
+++ b/drivers/pci/pci.h
@@ -0,0 +1,840 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DRIVERS_PCI_H
+#define DRIVERS_PCI_H
+
+#include <linux/pci.h>
+
+/* Number of possible devfns: 0.0 to 1f.7 inclusive */
+#define MAX_NR_DEVFNS 256
+
+#define PCI_FIND_CAP_TTL	48
+
+#define PCI_VSEC_ID_INTEL_TBT	0x1234	/* Thunderbolt */
+
+#define PCIE_LINK_RETRAIN_TIMEOUT_MS	1000
+
+/*
+ * PCIe r6.0, sec 5.3.3.2.1 <PME Synchronization>
+ * Recommends 1ms to 10ms timeout to check L2 ready.
+ */
+#define PCIE_PME_TO_L2_TIMEOUT_US	10000
+
+extern const unsigned char pcie_link_speed[];
+extern bool pci_early_dump;
+
+bool pcie_cap_has_lnkctl(const struct pci_dev *dev);
+bool pcie_cap_has_lnkctl2(const struct pci_dev *dev);
+bool pcie_cap_has_rtctl(const struct pci_dev *dev);
+
+/* Functions internal to the PCI core code */
+
+int pci_create_sysfs_dev_files(struct pci_dev *pdev);
+void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
+void pci_cleanup_rom(struct pci_dev *dev);
+#ifdef CONFIG_DMI
+extern const struct attribute_group pci_dev_smbios_attr_group;
+#endif
+
+enum pci_mmap_api {
+	PCI_MMAP_SYSFS,	/* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */
+	PCI_MMAP_PROCFS	/* mmap on /proc/bus/pci/<BDF> */
+};
+int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
+		  enum pci_mmap_api mmap_api);
+
+bool pci_reset_supported(struct pci_dev *dev);
+void pci_init_reset_methods(struct pci_dev *dev);
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
+int pci_bus_error_reset(struct pci_dev *dev);
+
+struct pci_cap_saved_data {
+	u16		cap_nr;
+	bool		cap_extended;
+	unsigned int	size;
+	u32		data[];
+};
+
+struct pci_cap_saved_state {
+	struct hlist_node		next;
+	struct pci_cap_saved_data	cap;
+};
+
+void pci_allocate_cap_save_buffers(struct pci_dev *dev);
+void pci_free_cap_save_buffers(struct pci_dev *dev);
+int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size);
+int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
+				u16 cap, unsigned int size);
+struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
+struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
+						   u16 cap);
+
+#define PCI_PM_D2_DELAY         200	/* usec; see PCIe r4.0, sec 5.9.1 */
+#define PCI_PM_D3HOT_WAIT       10	/* msec */
+#define PCI_PM_D3COLD_WAIT      100	/* msec */
+
+void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
+void pci_refresh_power_state(struct pci_dev *dev);
+int pci_power_up(struct pci_dev *dev);
+void pci_disable_enabled_device(struct pci_dev *dev);
+int pci_finish_runtime_suspend(struct pci_dev *dev);
+void pcie_clear_device_status(struct pci_dev *dev);
+void pcie_clear_root_pme_status(struct pci_dev *dev);
+bool pci_check_pme_status(struct pci_dev *dev);
+void pci_pme_wakeup_bus(struct pci_bus *bus);
+int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
+void pci_pme_restore(struct pci_dev *dev);
+bool pci_dev_need_resume(struct pci_dev *dev);
+void pci_dev_adjust_pme(struct pci_dev *dev);
+void pci_dev_complete_resume(struct pci_dev *pci_dev);
+void pci_config_pm_runtime_get(struct pci_dev *dev);
+void pci_config_pm_runtime_put(struct pci_dev *dev);
+void pci_pm_init(struct pci_dev *dev);
+void pci_ea_init(struct pci_dev *dev);
+void pci_msi_init(struct pci_dev *dev);
+void pci_msix_init(struct pci_dev *dev);
+bool pci_bridge_d3_possible(struct pci_dev *dev);
+void pci_bridge_d3_update(struct pci_dev *dev);
+void pci_bridge_reconfigure_ltr(struct pci_dev *dev);
+int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type);
+
+static inline void pci_wakeup_event(struct pci_dev *dev)
+{
+	/* Wait 100 ms before the system can be put into a sleep state. */
+	pm_wakeup_event(&dev->dev, 100);
+}
+
+static inline bool pci_has_subordinate(struct pci_dev *pci_dev)
+{
+	return !!(pci_dev->subordinate);
+}
+
+static inline bool pci_power_manageable(struct pci_dev *pci_dev)
+{
+	/*
+	 * Currently we allow normal PCI devices and PCI bridges transition
+	 * into D3 if their bridge_d3 is set.
+	 */
+	return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3;
+}
+
+static inline bool pcie_downstream_port(const struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+
+	return type == PCI_EXP_TYPE_ROOT_PORT ||
+	       type == PCI_EXP_TYPE_DOWNSTREAM ||
+	       type == PCI_EXP_TYPE_PCIE_BRIDGE;
+}
+
+void pci_vpd_init(struct pci_dev *dev);
+void pci_vpd_release(struct pci_dev *dev);
+extern const struct attribute_group pci_dev_vpd_attr_group;
+
+/* PCI Virtual Channel */
+int pci_save_vc_state(struct pci_dev *dev);
+void pci_restore_vc_state(struct pci_dev *dev);
+void pci_allocate_vc_save_buffers(struct pci_dev *dev);
+
+/* PCI /proc functions */
+#ifdef CONFIG_PROC_FS
+int pci_proc_attach_device(struct pci_dev *dev);
+int pci_proc_detach_device(struct pci_dev *dev);
+int pci_proc_detach_bus(struct pci_bus *bus);
+#else
+static inline int pci_proc_attach_device(struct pci_dev *dev) { return 0; }
+static inline int pci_proc_detach_device(struct pci_dev *dev) { return 0; }
+static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
+#endif
+
+/* Functions for PCI Hotplug drivers to use */
+int pci_hp_add_bridge(struct pci_dev *dev);
+
+#ifdef HAVE_PCI_LEGACY
+void pci_create_legacy_files(struct pci_bus *bus);
+void pci_remove_legacy_files(struct pci_bus *bus);
+#else
+static inline void pci_create_legacy_files(struct pci_bus *bus) { }
+static inline void pci_remove_legacy_files(struct pci_bus *bus) { }
+#endif
+
+/* Lock for read/write access to pci device and bus lists */
+extern struct rw_semaphore pci_bus_sem;
+extern struct mutex pci_slot_mutex;
+
+extern raw_spinlock_t pci_lock;
+
+extern unsigned int pci_pm_d3hot_delay;
+
+#ifdef CONFIG_PCI_MSI
+void pci_no_msi(void);
+#else
+static inline void pci_no_msi(void) { }
+#endif
+
+void pci_realloc_get_opt(char *);
+
+static inline int pci_no_d1d2(struct pci_dev *dev)
+{
+	unsigned int parent_dstates = 0;
+
+	if (dev->bus->self)
+		parent_dstates = dev->bus->self->no_d1d2;
+	return (dev->no_d1d2 || parent_dstates);
+
+}
+extern const struct attribute_group *pci_dev_groups[];
+extern const struct attribute_group *pcibus_groups[];
+extern const struct device_type pci_dev_type;
+extern const struct attribute_group *pci_bus_groups[];
+
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mmio_size;
+extern unsigned long pci_hotplug_mmio_pref_size;
+extern unsigned long pci_hotplug_bus_size;
+
+/**
+ * pci_match_one_device - Tell if a PCI device structure has a matching
+ *			  PCI device id structure
+ * @id: single PCI device id structure to match
+ * @dev: the PCI device structure to match against
+ *
+ * Returns the matching pci_device_id structure or %NULL if there is no match.
+ */
+static inline const struct pci_device_id *
+pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+{
+	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
+	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
+	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
+	    !((id->class ^ dev->class) & id->class_mask))
+		return id;
+	return NULL;
+}
+
+/* PCI slot sysfs helper code */
+#define to_pci_slot(s) container_of(s, struct pci_slot, kobj)
+
+extern struct kset *pci_slots_kset;
+
+struct pci_slot_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct pci_slot *, char *);
+	ssize_t (*store)(struct pci_slot *, const char *, size_t);
+};
+#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
+
+enum pci_bar_type {
+	pci_bar_unknown,	/* Standard PCI BAR probe */
+	pci_bar_io,		/* An I/O port BAR */
+	pci_bar_mem32,		/* A 32-bit memory BAR */
+	pci_bar_mem64,		/* A 64-bit memory BAR */
+};
+
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
+void pci_put_host_bridge_device(struct device *dev);
+
+int pci_configure_extended_tags(struct pci_dev *dev, void *ign);
+bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
+				int crs_timeout);
+bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
+					int crs_timeout);
+int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout);
+
+int pci_setup_device(struct pci_dev *dev);
+int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+		    struct resource *res, unsigned int reg);
+void pci_configure_ari(struct pci_dev *dev);
+void __pci_bus_size_bridges(struct pci_bus *bus,
+			struct list_head *realloc_head);
+void __pci_bus_assign_resources(const struct pci_bus *bus,
+				struct list_head *realloc_head,
+				struct list_head *fail_head);
+bool pci_bus_clip_resource(struct pci_dev *dev, int idx);
+
+void pci_reassigndev_resource_alignment(struct pci_dev *dev);
+void pci_disable_bridge_window(struct pci_dev *dev);
+struct pci_bus *pci_bus_get(struct pci_bus *bus);
+void pci_bus_put(struct pci_bus *bus);
+
+/* PCIe link information from Link Capabilities 2 */
+#define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \
+	((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \
+	 PCI_SPEED_UNKNOWN)
+
+/* PCIe speed to Mb/s reduced by encoding overhead */
+#define PCIE_SPEED2MBS_ENC(speed) \
+	((speed) == PCIE_SPEED_64_0GT ? 64000*128/130 : \
+	 (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \
+	 (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \
+	 (speed) == PCIE_SPEED_8_0GT  ?  8000*128/130 : \
+	 (speed) == PCIE_SPEED_5_0GT  ?  5000*8/10 : \
+	 (speed) == PCIE_SPEED_2_5GT  ?  2500*8/10 : \
+	 0)
+
+const char *pci_speed_string(enum pci_bus_speed speed);
+enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
+enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
+u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
+			   enum pcie_link_width *width);
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
+void pcie_report_downtraining(struct pci_dev *dev);
+void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+	int		pos;		/* Capability position */
+	int		nres;		/* Number of resources */
+	u32		cap;		/* SR-IOV Capabilities */
+	u16		ctrl;		/* SR-IOV Control */
+	u16		total_VFs;	/* Total VFs associated with the PF */
+	u16		initial_VFs;	/* Initial VFs associated with the PF */
+	u16		num_VFs;	/* Number of VFs available */
+	u16		offset;		/* First VF Routing ID offset */
+	u16		stride;		/* Following VF stride */
+	u16		vf_device;	/* VF device ID */
+	u32		pgsz;		/* Page size for BAR alignment */
+	u8		link;		/* Function Dependency Link */
+	u8		max_VF_buses;	/* Max buses consumed by VFs */
+	u16		driver_max_VFs;	/* Max num VFs driver supports */
+	struct pci_dev	*dev;		/* Lowest numbered PF */
+	struct pci_dev	*self;		/* This PF */
+	u32		class;		/* VF device */
+	u8		hdr_type;	/* VF header type */
+	u16		subsystem_vendor; /* VF subsystem vendor */
+	u16		subsystem_device; /* VF subsystem device */
+	resource_size_t	barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
+	bool		drivers_autoprobe; /* Auto probing of VFs by driver */
+};
+
+#ifdef CONFIG_PCI_DOE
+void pci_doe_init(struct pci_dev *pdev);
+void pci_doe_destroy(struct pci_dev *pdev);
+void pci_doe_disconnected(struct pci_dev *pdev);
+#else
+static inline void pci_doe_init(struct pci_dev *pdev) { }
+static inline void pci_doe_destroy(struct pci_dev *pdev) { }
+static inline void pci_doe_disconnected(struct pci_dev *pdev) { }
+#endif
+
+/**
+ * pci_dev_set_io_state - Set the new error state if possible.
+ *
+ * @dev: PCI device to set new error_state
+ * @new: the state we want dev to be in
+ *
+ * If the device is experiencing perm_failure, it has to remain in that state.
+ * Any other transition is allowed.
+ *
+ * Returns true if state has been changed to the requested state.
+ */
+static inline bool pci_dev_set_io_state(struct pci_dev *dev,
+					pci_channel_state_t new)
+{
+	pci_channel_state_t old;
+
+	switch (new) {
+	case pci_channel_io_perm_failure:
+		xchg(&dev->error_state, pci_channel_io_perm_failure);
+		return true;
+	case pci_channel_io_frozen:
+		old = cmpxchg(&dev->error_state, pci_channel_io_normal,
+			      pci_channel_io_frozen);
+		return old != pci_channel_io_perm_failure;
+	case pci_channel_io_normal:
+		old = cmpxchg(&dev->error_state, pci_channel_io_frozen,
+			      pci_channel_io_normal);
+		return old != pci_channel_io_perm_failure;
+	default:
+		return false;
+	}
+}
+
+static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
+{
+	pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+	pci_doe_disconnected(dev);
+
+	return 0;
+}
+
+static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
+{
+	return dev->error_state == pci_channel_io_perm_failure;
+}
+
+/* pci_dev priv_flags */
+#define PCI_DEV_ADDED 0
+#define PCI_DPC_RECOVERED 1
+#define PCI_DPC_RECOVERING 2
+
+static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
+{
+	assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
+}
+
+static inline bool pci_dev_is_added(const struct pci_dev *dev)
+{
+	return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
+}
+
+#ifdef CONFIG_PCIEAER
+#include <linux/aer.h>
+
+#define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
+
+struct aer_err_info {
+	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
+	int error_dev_num;
+
+	unsigned int id:16;
+
+	unsigned int severity:2;	/* 0:NONFATAL | 1:FATAL | 2:COR */
+	unsigned int __pad1:5;
+	unsigned int multi_error_valid:1;
+
+	unsigned int first_error:5;
+	unsigned int __pad2:2;
+	unsigned int tlp_header_valid:1;
+
+	unsigned int status;		/* COR/UNCOR Error Status */
+	unsigned int mask;		/* COR/UNCOR Error Mask */
+	struct aer_header_log_regs tlp;	/* TLP Header */
+};
+
+int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
+void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
+#endif	/* CONFIG_PCIEAER */
+
+#ifdef CONFIG_PCIEPORTBUS
+/* Cached RCEC Endpoint Association */
+struct rcec_ea {
+	u8		nextbusn;
+	u8		lastbusn;
+	u32		bitmap;
+};
+#endif
+
+#ifdef CONFIG_PCIE_DPC
+void pci_save_dpc_state(struct pci_dev *dev);
+void pci_restore_dpc_state(struct pci_dev *dev);
+void pci_dpc_init(struct pci_dev *pdev);
+void dpc_process_error(struct pci_dev *pdev);
+pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
+bool pci_dpc_recovered(struct pci_dev *pdev);
+#else
+static inline void pci_save_dpc_state(struct pci_dev *dev) { }
+static inline void pci_restore_dpc_state(struct pci_dev *dev) { }
+static inline void pci_dpc_init(struct pci_dev *pdev) { }
+static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; }
+#endif
+
+#ifdef CONFIG_PCIEPORTBUS
+void pci_rcec_init(struct pci_dev *dev);
+void pci_rcec_exit(struct pci_dev *dev);
+void pcie_link_rcec(struct pci_dev *rcec);
+void pcie_walk_rcec(struct pci_dev *rcec,
+		    int (*cb)(struct pci_dev *, void *),
+		    void *userdata);
+#else
+static inline void pci_rcec_init(struct pci_dev *dev) { }
+static inline void pci_rcec_exit(struct pci_dev *dev) { }
+static inline void pcie_link_rcec(struct pci_dev *rcec) { }
+static inline void pcie_walk_rcec(struct pci_dev *rcec,
+				  int (*cb)(struct pci_dev *, void *),
+				  void *userdata) { }
+#endif
+
+#ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
+void pci_restore_ats_state(struct pci_dev *dev);
+#else
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline void pci_restore_ats_state(struct pci_dev *dev) { }
+#endif /* CONFIG_PCI_ATS */
+
+#ifdef CONFIG_PCI_PRI
+void pci_pri_init(struct pci_dev *dev);
+void pci_restore_pri_state(struct pci_dev *pdev);
+#else
+static inline void pci_pri_init(struct pci_dev *dev) { }
+static inline void pci_restore_pri_state(struct pci_dev *pdev) { }
+#endif
+
+#ifdef CONFIG_PCI_PASID
+void pci_pasid_init(struct pci_dev *dev);
+void pci_restore_pasid_state(struct pci_dev *pdev);
+#else
+static inline void pci_pasid_init(struct pci_dev *dev) { }
+static inline void pci_restore_pasid_state(struct pci_dev *pdev) { }
+#endif
+
+#ifdef CONFIG_PCI_IOV
+int pci_iov_init(struct pci_dev *dev);
+void pci_iov_release(struct pci_dev *dev);
+void pci_iov_remove(struct pci_dev *dev);
+void pci_iov_update_resource(struct pci_dev *dev, int resno);
+resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
+void pci_restore_iov_state(struct pci_dev *dev);
+int pci_iov_bus_range(struct pci_bus *bus);
+extern const struct attribute_group sriov_pf_dev_attr_group;
+extern const struct attribute_group sriov_vf_dev_attr_group;
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev) { }
+static inline void pci_iov_remove(struct pci_dev *dev) { }
+static inline void pci_restore_iov_state(struct pci_dev *dev) { }
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+#ifdef CONFIG_PCIE_PTM
+void pci_ptm_init(struct pci_dev *dev);
+void pci_save_ptm_state(struct pci_dev *dev);
+void pci_restore_ptm_state(struct pci_dev *dev);
+void pci_suspend_ptm(struct pci_dev *dev);
+void pci_resume_ptm(struct pci_dev *dev);
+#else
+static inline void pci_ptm_init(struct pci_dev *dev) { }
+static inline void pci_save_ptm_state(struct pci_dev *dev) { }
+static inline void pci_restore_ptm_state(struct pci_dev *dev) { }
+static inline void pci_suspend_ptm(struct pci_dev *dev) { }
+static inline void pci_resume_ptm(struct pci_dev *dev) { }
+#endif
+
+unsigned long pci_cardbus_resource_alignment(struct resource *);
+
+static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
+						     struct resource *res)
+{
+#ifdef CONFIG_PCI_IOV
+	int resno = res - dev->resource;
+
+	if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END)
+		return pci_sriov_resource_alignment(dev, resno);
+#endif
+	if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS)
+		return pci_cardbus_resource_alignment(res);
+	return resource_alignment(res);
+}
+
+void pci_acs_init(struct pci_dev *dev);
+#ifdef CONFIG_PCI_QUIRKS
+int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
+int pci_dev_specific_enable_acs(struct pci_dev *dev);
+int pci_dev_specific_disable_acs_redir(struct pci_dev *dev);
+bool pcie_failed_link_retrain(struct pci_dev *dev);
+#else
+static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
+					       u16 acs_flags)
+{
+	return -ENOTTY;
+}
+static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
+{
+	return -ENOTTY;
+}
+static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
+{
+	return -ENOTTY;
+}
+static inline bool pcie_failed_link_retrain(struct pci_dev *dev)
+{
+	return false;
+}
+#endif
+
+/* PCI error reporting and recovery */
+pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
+		pci_channel_state_t state,
+		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));
+
+bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
+int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
+#ifdef CONFIG_PCIEASPM
+void pcie_aspm_init_link_state(struct pci_dev *pdev);
+void pcie_aspm_exit_link_state(struct pci_dev *pdev);
+void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
+#else
+static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
+static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
+static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
+static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
+#endif
+
+#ifdef CONFIG_PCIE_ECRC
+void pcie_set_ecrc_checking(struct pci_dev *dev);
+void pcie_ecrc_get_policy(char *str);
+#else
+static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
+static inline void pcie_ecrc_get_policy(char *str) { }
+#endif
+
+struct pci_dev_reset_methods {
+	u16 vendor;
+	u16 device;
+	int (*reset)(struct pci_dev *dev, bool probe);
+};
+
+struct pci_reset_fn_method {
+	int (*reset_fn)(struct pci_dev *pdev, bool probe);
+	char *name;
+};
+
+#ifdef CONFIG_PCI_QUIRKS
+int pci_dev_specific_reset(struct pci_dev *dev, bool probe);
+#else
+static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
+{
+	return -ENOTTY;
+}
+#endif
+
+#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
+int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
+			  struct resource *res);
+#else
+static inline int acpi_get_rc_resources(struct device *dev, const char *hid,
+					u16 segment, struct resource *res)
+{
+	return -ENODEV;
+}
+#endif
+
+int pci_rebar_get_current_size(struct pci_dev *pdev, int bar);
+int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size);
+static inline u64 pci_rebar_size_to_bytes(int size)
+{
+	return 1ULL << (size + 20);
+}
+
+struct device_node;
+
+#ifdef CONFIG_OF
+int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
+int of_get_pci_domain_nr(struct device_node *node);
+int of_pci_get_max_link_speed(struct device_node *node);
+u32 of_pci_get_slot_power_limit(struct device_node *node,
+				u8 *slot_power_limit_value,
+				u8 *slot_power_limit_scale);
+int pci_set_of_node(struct pci_dev *dev);
+void pci_release_of_node(struct pci_dev *dev);
+void pci_set_bus_of_node(struct pci_bus *bus);
+void pci_release_bus_of_node(struct pci_bus *bus);
+
+int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge);
+
+#else
+static inline int
+of_pci_parse_bus_range(struct device_node *node, struct resource *res)
+{
+	return -EINVAL;
+}
+
+static inline int
+of_get_pci_domain_nr(struct device_node *node)
+{
+	return -1;
+}
+
+static inline int
+of_pci_get_max_link_speed(struct device_node *node)
+{
+	return -EINVAL;
+}
+
+static inline u32
+of_pci_get_slot_power_limit(struct device_node *node,
+			    u8 *slot_power_limit_value,
+			    u8 *slot_power_limit_scale)
+{
+	if (slot_power_limit_value)
+		*slot_power_limit_value = 0;
+	if (slot_power_limit_scale)
+		*slot_power_limit_scale = 0;
+	return 0;
+}
+
+static inline int pci_set_of_node(struct pci_dev *dev) { return 0; }
+static inline void pci_release_of_node(struct pci_dev *dev) { }
+static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
+static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
+
+static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge)
+{
+	return 0;
+}
+
+#endif /* CONFIG_OF */
+
+struct of_changeset;
+
+#ifdef CONFIG_PCI_DYNAMIC_OF_NODES
+void of_pci_make_dev_node(struct pci_dev *pdev);
+void of_pci_remove_node(struct pci_dev *pdev);
+int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
+			  struct device_node *np);
+#else
+static inline void of_pci_make_dev_node(struct pci_dev *pdev) { }
+static inline void of_pci_remove_node(struct pci_dev *pdev) { }
+#endif
+
+#ifdef CONFIG_PCIEAER
+void pci_no_aer(void);
+void pci_aer_init(struct pci_dev *dev);
+void pci_aer_exit(struct pci_dev *dev);
+extern const struct attribute_group aer_stats_attr_group;
+void pci_aer_clear_fatal_status(struct pci_dev *dev);
+int pci_aer_clear_status(struct pci_dev *dev);
+int pci_aer_raw_clear_status(struct pci_dev *dev);
+void pci_save_aer_state(struct pci_dev *dev);
+void pci_restore_aer_state(struct pci_dev *dev);
+#else
+static inline void pci_no_aer(void) { }
+static inline void pci_aer_init(struct pci_dev *d) { }
+static inline void pci_aer_exit(struct pci_dev *d) { }
+static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
+static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; }
+static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; }
+static inline void pci_save_aer_state(struct pci_dev *dev) { }
+static inline void pci_restore_aer_state(struct pci_dev *dev) { }
+#endif
+
+#ifdef CONFIG_ACPI
+int pci_acpi_program_hp_params(struct pci_dev *dev);
+extern const struct attribute_group pci_dev_acpi_attr_group;
+void pci_set_acpi_fwnode(struct pci_dev *dev);
+int pci_dev_acpi_reset(struct pci_dev *dev, bool probe);
+bool acpi_pci_power_manageable(struct pci_dev *dev);
+bool acpi_pci_bridge_d3(struct pci_dev *dev);
+int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+pci_power_t acpi_pci_get_power_state(struct pci_dev *dev);
+void acpi_pci_refresh_power_state(struct pci_dev *dev);
+int acpi_pci_wakeup(struct pci_dev *dev, bool enable);
+bool acpi_pci_need_resume(struct pci_dev *dev);
+pci_power_t acpi_pci_choose_state(struct pci_dev *pdev);
+#else
+static inline int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
+{
+	return -ENOTTY;
+}
+static inline void pci_set_acpi_fwnode(struct pci_dev *dev) { }
+static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline bool acpi_pci_power_manageable(struct pci_dev *dev)
+{
+	return false;
+}
+static inline bool acpi_pci_bridge_d3(struct pci_dev *dev)
+{
+	return false;
+}
+static inline int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+	return -ENODEV;
+}
+static inline pci_power_t acpi_pci_get_power_state(struct pci_dev *dev)
+{
+	return PCI_UNKNOWN;
+}
+static inline void acpi_pci_refresh_power_state(struct pci_dev *dev) { }
+static inline int acpi_pci_wakeup(struct pci_dev *dev, bool enable)
+{
+	return -ENODEV;
+}
+static inline bool acpi_pci_need_resume(struct pci_dev *dev)
+{
+	return false;
+}
+static inline pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
+{
+	return PCI_POWER_ERROR;
+}
+#endif
+
+#ifdef CONFIG_PCIEASPM
+extern const struct attribute_group aspm_ctrl_attr_group;
+#endif
+
+extern const struct attribute_group pci_dev_reset_method_attr_group;
+
+#ifdef CONFIG_X86_INTEL_MID
+bool pci_use_mid_pm(void);
+int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+pci_power_t mid_pci_get_power_state(struct pci_dev *pdev);
+#else
+static inline bool pci_use_mid_pm(void)
+{
+	return false;
+}
+static inline int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	return -ENODEV;
+}
+static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	return PCI_UNKNOWN;
+}
+#endif
+
+/*
+ * Config Address for PCI Configuration Mechanism #1
+ *
+ * See PCI Local Bus Specification, Revision 3.0,
+ * Section 3.2.2.3.2, Figure 3-2, p. 50.
+ */
+
+#define PCI_CONF1_BUS_SHIFT	16 /* Bus number */
+#define PCI_CONF1_DEV_SHIFT	11 /* Device number */
+#define PCI_CONF1_FUNC_SHIFT	8  /* Function number */
+
+#define PCI_CONF1_BUS_MASK	0xff
+#define PCI_CONF1_DEV_MASK	0x1f
+#define PCI_CONF1_FUNC_MASK	0x7
+#define PCI_CONF1_REG_MASK	0xfc /* Limit aligned offset to a maximum of 256B */
+
+#define PCI_CONF1_ENABLE	BIT(31)
+#define PCI_CONF1_BUS(x)	(((x) & PCI_CONF1_BUS_MASK) << PCI_CONF1_BUS_SHIFT)
+#define PCI_CONF1_DEV(x)	(((x) & PCI_CONF1_DEV_MASK) << PCI_CONF1_DEV_SHIFT)
+#define PCI_CONF1_FUNC(x)	(((x) & PCI_CONF1_FUNC_MASK) << PCI_CONF1_FUNC_SHIFT)
+#define PCI_CONF1_REG(x)	((x) & PCI_CONF1_REG_MASK)
+
+#define PCI_CONF1_ADDRESS(bus, dev, func, reg) \
+	(PCI_CONF1_ENABLE | \
+	 PCI_CONF1_BUS(bus) | \
+	 PCI_CONF1_DEV(dev) | \
+	 PCI_CONF1_FUNC(func) | \
+	 PCI_CONF1_REG(reg))
+
+/*
+ * Extension of PCI Config Address for accessing extended PCIe registers
+ *
+ * No standardized specification, but used on lot of non-ECAM-compliant ARM SoCs
+ * or on AMD Barcelona and new CPUs. Reserved bits [27:24] of PCI Config Address
+ * are used for specifying additional 4 high bits of PCI Express register.
+ */
+
+#define PCI_CONF1_EXT_REG_SHIFT	16
+#define PCI_CONF1_EXT_REG_MASK	0xf00
+#define PCI_CONF1_EXT_REG(x)	(((x) & PCI_CONF1_EXT_REG_MASK) << PCI_CONF1_EXT_REG_SHIFT)
+
+#define PCI_CONF1_EXT_ADDRESS(bus, dev, func, reg) \
+	(PCI_CONF1_ADDRESS(bus, dev, func, reg) | \
+	 PCI_CONF1_EXT_REG(reg))
+
+#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
new file mode 100644
index 000000000..228652a59
--- /dev/null
+++ b/drivers/pci/pcie/Kconfig
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# PCI Express Port Bus Configuration
+#
+config PCIEPORTBUS
+	bool "PCI Express Port Bus support"
+	default y if USB4
+	help
+	  This enables PCI Express Port Bus support. Users can then enable
+	  support for Native Hot-Plug, Advanced Error Reporting, Power
+	  Management Events, and Downstream Port Containment.
+
+#
+# Include service Kconfig here
+#
+config HOTPLUG_PCI_PCIE
+	bool "PCI Express Hotplug driver"
+	depends on HOTPLUG_PCI && PCIEPORTBUS
+	default y if USB4
+	help
+	  Say Y here if you have a motherboard that supports PCIe native
+	  hotplug.
+
+	  Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug.
+
+	  When in doubt, say N.
+
+config PCIEAER
+	bool "PCI Express Advanced Error Reporting support"
+	depends on PCIEPORTBUS
+	select RAS
+	help
+	  This enables PCI Express Root Port Advanced Error Reporting
+	  (AER) driver support. Error reporting messages sent to Root
+	  Port will be handled by PCI Express AER driver.
+
+config PCIEAER_INJECT
+	tristate "PCI Express error injection support"
+	depends on PCIEAER
+	select GENERIC_IRQ_INJECTION
+	help
+	  This enables PCI Express Root Port Advanced Error Reporting
+	  (AER) software error injector.
+
+	  Debugging AER code is quite difficult because it is hard
+	  to trigger various real hardware errors. Software-based
+	  error injection can fake almost all kinds of errors with the
+	  help of a user space helper tool aer-inject, which can be
+	  gotten from:
+	     https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
+
+#
+# PCI Express ECRC
+#
+config PCIE_ECRC
+	bool "PCI Express ECRC settings control"
+	depends on PCIEAER
+	help
+	  Used to override firmware/bios settings for PCI Express ECRC
+	  (transaction layer end-to-end CRC checking).
+
+	  When in doubt, say N.
+
+#
+# PCI Express ASPM
+#
+config PCIEASPM
+	bool "PCI Express ASPM control" if EXPERT
+	default y
+	help
+	  This enables OS control over PCI Express ASPM (Active State
+	  Power Management) and Clock Power Management. ASPM supports
+	  state L0/L0s/L1.
+
+	  ASPM is initially set up by the firmware. With this option enabled,
+	  Linux can modify this state in order to disable ASPM on known-bad
+	  hardware or configurations and enable it when known-safe.
+
+	  ASPM can be disabled or enabled at runtime via
+	  /sys/module/pcie_aspm/parameters/policy
+
+	  When in doubt, say Y.
+
+choice
+	prompt "Default ASPM policy"
+	default PCIEASPM_DEFAULT
+	depends on PCIEASPM
+
+config PCIEASPM_DEFAULT
+	bool "BIOS default"
+	depends on PCIEASPM
+	help
+	  Use the BIOS defaults for PCI Express ASPM.
+
+config PCIEASPM_POWERSAVE
+	bool "Powersave"
+	depends on PCIEASPM
+	help
+	  Enable PCI Express ASPM L0s and L1 where possible, even if the
+	  BIOS did not.
+
+config PCIEASPM_POWER_SUPERSAVE
+	bool "Power Supersave"
+	depends on PCIEASPM
+	help
+	  Same as PCIEASPM_POWERSAVE, except it also enables L1 substates where
+	  possible. This would result in higher power savings while staying in L1
+	  where the components support it.
+
+config PCIEASPM_PERFORMANCE
+	bool "Performance"
+	depends on PCIEASPM
+	help
+	  Disable PCI Express ASPM L0s and L1, even if the BIOS enabled them.
+endchoice
+
+config PCIE_PME
+	def_bool y
+	depends on PCIEPORTBUS && PM
+
+config PCIE_DPC
+	bool "PCI Express Downstream Port Containment support"
+	depends on PCIEPORTBUS && PCIEAER
+	help
+	  This enables PCI Express Downstream Port Containment (DPC)
+	  driver support.  DPC events from Root and Downstream ports
+	  will be handled by the DPC driver.  If your system doesn't
+	  have this capability or you do not want to use this feature,
+	  it is safe to answer N.
+
+config PCIE_PTM
+	bool "PCI Express Precision Time Measurement support"
+	help
+	  This enables PCI Express Precision Time Measurement (PTM)
+	  support.
+
+	  This is only useful if you have devices that support PTM, but it
+	  is safe to enable even if you don't.
+
+config PCIE_EDR
+	bool "PCI Express Error Disconnect Recover support"
+	depends on PCIE_DPC && ACPI
+	help
+	  This option adds Error Disconnect Recover support as specified
+	  in the Downstream Port Containment Related Enhancements ECN to
+	  the PCI Firmware Specification r3.2.  Enable this if you want to
+	  support hybrid DPC model which uses both firmware and OS to
+	  implement DPC.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
new file mode 100644
index 000000000..8de4ed5f9
--- /dev/null
+++ b/drivers/pci/pcie/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for PCI Express features and port driver
+
+pcieportdrv-y			:= portdrv.o rcec.o
+
+obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
+
+obj-$(CONFIG_PCIEASPM)		+= aspm.o
+obj-$(CONFIG_PCIEAER)		+= aer.o err.o
+obj-$(CONFIG_PCIEAER_INJECT)	+= aer_inject.o
+obj-$(CONFIG_PCIE_PME)		+= pme.o
+obj-$(CONFIG_PCIE_DPC)		+= dpc.o
+obj-$(CONFIG_PCIE_PTM)		+= ptm.o
+obj-$(CONFIG_PCIE_EDR)		+= edr.o
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
new file mode 100644
index 000000000..40d84cb0c
--- /dev/null
+++ b/drivers/pci/pcie/aer.c
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement the AER root port service driver. The driver registers an IRQ
+ * handler. When a root port triggers an AER interrupt, the IRQ handler
+ * collects root port status and schedules work.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ * (C) Copyright 2009 Hewlett-Packard Development Company, L.P.
+ *    Andrew Patterson <andrew.patterson@hp.com>
+ */
+
+#define pr_fmt(fmt) "AER: " fmt
+#define dev_fmt pr_fmt
+
+#include <linux/bitops.h>
+#include <linux/cper.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kfifo.h>
+#include <linux/slab.h>
+#include <acpi/apei.h>
+#include <acpi/ghes.h>
+#include <ras/ras_event.h>
+
+#include "../pci.h"
+#include "portdrv.h"
+
+#define AER_ERROR_SOURCES_MAX		128
+
+#define AER_MAX_TYPEOF_COR_ERRS		16	/* as per PCI_ERR_COR_STATUS */
+#define AER_MAX_TYPEOF_UNCOR_ERRS	27	/* as per PCI_ERR_UNCOR_STATUS*/
+
+struct aer_err_source {
+	unsigned int status;
+	unsigned int id;
+};
+
+struct aer_rpc {
+	struct pci_dev *rpd;		/* Root Port device */
+	DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
+};
+
+/* AER stats for the device */
+struct aer_stats {
+
+	/*
+	 * Fields for all AER capable devices. They indicate the errors
+	 * "as seen by this device". Note that this may mean that if an
+	 * end point is causing problems, the AER counters may increment
+	 * at its link partner (e.g. root port) because the errors will be
+	 * "seen" by the link partner and not the problematic end point
+	 * itself (which may report all counters as 0 as it never saw any
+	 * problems).
+	 */
+	/* Counters for different type of correctable errors */
+	u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
+	/* Counters for different type of fatal uncorrectable errors */
+	u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Counters for different type of nonfatal uncorrectable errors */
+	u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Total number of ERR_COR sent by this device */
+	u64 dev_total_cor_errs;
+	/* Total number of ERR_FATAL sent by this device */
+	u64 dev_total_fatal_errs;
+	/* Total number of ERR_NONFATAL sent by this device */
+	u64 dev_total_nonfatal_errs;
+
+	/*
+	 * Fields for Root ports & root complex event collectors only, these
+	 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
+	 * messages received by the root port / event collector, INCLUDING the
+	 * ones that are generated internally (by the rootport itself)
+	 */
+	u64 rootport_total_cor_errs;
+	u64 rootport_total_fatal_errs;
+	u64 rootport_total_nonfatal_errs;
+};
+
+#define AER_LOG_TLP_MASKS		(PCI_ERR_UNC_POISON_TLP|	\
+					PCI_ERR_UNC_ECRC|		\
+					PCI_ERR_UNC_UNSUP|		\
+					PCI_ERR_UNC_COMP_ABORT|		\
+					PCI_ERR_UNC_UNX_COMP|		\
+					PCI_ERR_UNC_MALF_TLP)
+
+#define SYSTEM_ERROR_INTR_ON_MESG_MASK	(PCI_EXP_RTCTL_SECEE|	\
+					PCI_EXP_RTCTL_SENFEE|	\
+					PCI_EXP_RTCTL_SEFEE)
+#define ROOT_PORT_INTR_ON_MESG_MASK	(PCI_ERR_ROOT_CMD_COR_EN|	\
+					PCI_ERR_ROOT_CMD_NONFATAL_EN|	\
+					PCI_ERR_ROOT_CMD_FATAL_EN)
+#define ERR_COR_ID(d)			(d & 0xffff)
+#define ERR_UNCOR_ID(d)			(d >> 16)
+
+#define AER_ERR_STATUS_MASK		(PCI_ERR_ROOT_UNCOR_RCV |	\
+					PCI_ERR_ROOT_COR_RCV |		\
+					PCI_ERR_ROOT_MULTI_COR_RCV |	\
+					PCI_ERR_ROOT_MULTI_UNCOR_RCV)
+
+static int pcie_aer_disable;
+static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
+
+void pci_no_aer(void)
+{
+	pcie_aer_disable = 1;
+}
+
+bool pci_aer_available(void)
+{
+	return !pcie_aer_disable && pci_msi_enabled();
+}
+
+#ifdef CONFIG_PCIE_ECRC
+
+#define ECRC_POLICY_DEFAULT 0		/* ECRC set by BIOS */
+#define ECRC_POLICY_OFF     1		/* ECRC off for performance */
+#define ECRC_POLICY_ON      2		/* ECRC on for data integrity */
+
+static int ecrc_policy = ECRC_POLICY_DEFAULT;
+
+static const char * const ecrc_policy_str[] = {
+	[ECRC_POLICY_DEFAULT] = "bios",
+	[ECRC_POLICY_OFF] = "off",
+	[ECRC_POLICY_ON] = "on"
+};
+
+/**
+ * enable_ecrc_checking - enable PCIe ECRC checking for a device
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+static int enable_ecrc_checking(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 reg32;
+
+	if (!aer)
+		return -ENODEV;
+
+	pci_read_config_dword(dev, aer + PCI_ERR_CAP, &reg32);
+	if (reg32 & PCI_ERR_CAP_ECRC_GENC)
+		reg32 |= PCI_ERR_CAP_ECRC_GENE;
+	if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
+		reg32 |= PCI_ERR_CAP_ECRC_CHKE;
+	pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
+
+	return 0;
+}
+
+/**
+ * disable_ecrc_checking - disables PCIe ECRC checking for a device
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+static int disable_ecrc_checking(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 reg32;
+
+	if (!aer)
+		return -ENODEV;
+
+	pci_read_config_dword(dev, aer + PCI_ERR_CAP, &reg32);
+	reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+	pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
+
+	return 0;
+}
+
+/**
+ * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy
+ * @dev: the PCI device
+ */
+void pcie_set_ecrc_checking(struct pci_dev *dev)
+{
+	if (!pcie_aer_is_native(dev))
+		return;
+
+	switch (ecrc_policy) {
+	case ECRC_POLICY_DEFAULT:
+		return;
+	case ECRC_POLICY_OFF:
+		disable_ecrc_checking(dev);
+		break;
+	case ECRC_POLICY_ON:
+		enable_ecrc_checking(dev);
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * pcie_ecrc_get_policy - parse kernel command-line ecrc option
+ * @str: ECRC policy from kernel command line to use
+ */
+void pcie_ecrc_get_policy(char *str)
+{
+	int i;
+
+	i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str);
+	if (i < 0)
+		return;
+
+	ecrc_policy = i;
+}
+#endif	/* CONFIG_PCIE_ECRC */
+
+#define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
+				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
+
+int pcie_aer_is_native(struct pci_dev *dev)
+{
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+	if (!dev->aer_cap)
+		return 0;
+
+	return pcie_ports_native || host->native_aer;
+}
+EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL);
+
+static int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+	int rc;
+
+	if (!pcie_aer_is_native(dev))
+		return -EIO;
+
+	rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
+	return pcibios_err_to_errno(rc);
+}
+
+int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 status, sev;
+
+	if (!pcie_aer_is_native(dev))
+		return -EIO;
+
+	/* Clear status bits for ERR_NONFATAL errors only */
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
+	status &= ~sev;
+	if (status)
+		pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status);
+
+void pci_aer_clear_fatal_status(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 status, sev;
+
+	if (!pcie_aer_is_native(dev))
+		return;
+
+	/* Clear status bits for ERR_FATAL errors only */
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
+	status &= sev;
+	if (status)
+		pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
+}
+
+/**
+ * pci_aer_raw_clear_status - Clear AER error registers.
+ * @dev: the PCI device
+ *
+ * Clearing AER error status registers unconditionally, regardless of
+ * whether they're owned by firmware or the OS.
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_aer_raw_clear_status(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 status;
+	int port_type;
+
+	if (!aer)
+		return -EIO;
+
+	port_type = pci_pcie_type(dev);
+	if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
+	    port_type == PCI_EXP_TYPE_RC_EC) {
+		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
+		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
+	}
+
+	pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
+	pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status);
+
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
+
+	return 0;
+}
+
+int pci_aer_clear_status(struct pci_dev *dev)
+{
+	if (!pcie_aer_is_native(dev))
+		return -EIO;
+
+	return pci_aer_raw_clear_status(dev);
+}
+
+void pci_save_aer_state(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	struct pci_cap_saved_state *save_state;
+	u32 *cap;
+
+	if (!aer)
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
+	if (!save_state)
+		return;
+
+	cap = &save_state->cap.data[0];
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++);
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++);
+	pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++);
+	pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++);
+	if (pcie_cap_has_rtctl(dev))
+		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++);
+}
+
+void pci_restore_aer_state(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	struct pci_cap_saved_state *save_state;
+	u32 *cap;
+
+	if (!aer)
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
+	if (!save_state)
+		return;
+
+	cap = &save_state->cap.data[0];
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++);
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++);
+	pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++);
+	pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++);
+	if (pcie_cap_has_rtctl(dev))
+		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++);
+}
+
+void pci_aer_init(struct pci_dev *dev)
+{
+	int n;
+
+	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!dev->aer_cap)
+		return;
+
+	dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+
+	/*
+	 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER,
+	 * PCI_ERR_COR_MASK, and PCI_ERR_CAP.  Root and Root Complex Event
+	 * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec
+	 * 7.8.4).
+	 */
+	n = pcie_cap_has_rtctl(dev) ? 5 : 4;
+	pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);
+
+	pci_aer_clear_status(dev);
+
+	if (pci_aer_available())
+		pci_enable_pcie_error_reporting(dev);
+
+	pcie_set_ecrc_checking(dev);
+}
+
+void pci_aer_exit(struct pci_dev *dev)
+{
+	kfree(dev->aer_stats);
+	dev->aer_stats = NULL;
+}
+
+#define AER_AGENT_RECEIVER		0
+#define AER_AGENT_REQUESTER		1
+#define AER_AGENT_COMPLETER		2
+#define AER_AGENT_TRANSMITTER		3
+
+#define AER_AGENT_REQUESTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
+	0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
+#define AER_AGENT_COMPLETER_MASK(t)	((t == AER_CORRECTABLE) ?	\
+	0 : PCI_ERR_UNC_COMP_ABORT)
+#define AER_AGENT_TRANSMITTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
+	(PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
+
+#define AER_GET_AGENT(t, e)						\
+	((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER :	\
+	(e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER :	\
+	(e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER :	\
+	AER_AGENT_RECEIVER)
+
+#define AER_PHYSICAL_LAYER_ERROR	0
+#define AER_DATA_LINK_LAYER_ERROR	1
+#define AER_TRANSACTION_LAYER_ERROR	2
+
+#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
+	PCI_ERR_COR_RCVR : 0)
+#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
+	(PCI_ERR_COR_BAD_TLP|						\
+	PCI_ERR_COR_BAD_DLLP|						\
+	PCI_ERR_COR_REP_ROLL|						\
+	PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
+
+#define AER_GET_LAYER_ERROR(t, e)					\
+	((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
+	(e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
+	AER_TRANSACTION_LAYER_ERROR)
+
+/*
+ * AER error strings
+ */
+static const char *aer_error_severity_string[] = {
+	"Uncorrected (Non-Fatal)",
+	"Uncorrected (Fatal)",
+	"Corrected"
+};
+
+static const char *aer_error_layer[] = {
+	"Physical Layer",
+	"Data Link Layer",
+	"Transaction Layer"
+};
+
+static const char *aer_correctable_error_string[] = {
+	"RxErr",			/* Bit Position 0	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"BadTLP",			/* Bit Position 6	*/
+	"BadDLLP",			/* Bit Position 7	*/
+	"Rollover",			/* Bit Position 8	*/
+	NULL,
+	NULL,
+	NULL,
+	"Timeout",			/* Bit Position 12	*/
+	"NonFatalErr",			/* Bit Position 13	*/
+	"CorrIntErr",			/* Bit Position 14	*/
+	"HeaderOF",			/* Bit Position 15	*/
+	NULL,				/* Bit Position 16	*/
+	NULL,				/* Bit Position 17	*/
+	NULL,				/* Bit Position 18	*/
+	NULL,				/* Bit Position 19	*/
+	NULL,				/* Bit Position 20	*/
+	NULL,				/* Bit Position 21	*/
+	NULL,				/* Bit Position 22	*/
+	NULL,				/* Bit Position 23	*/
+	NULL,				/* Bit Position 24	*/
+	NULL,				/* Bit Position 25	*/
+	NULL,				/* Bit Position 26	*/
+	NULL,				/* Bit Position 27	*/
+	NULL,				/* Bit Position 28	*/
+	NULL,				/* Bit Position 29	*/
+	NULL,				/* Bit Position 30	*/
+	NULL,				/* Bit Position 31	*/
+};
+
+static const char *aer_uncorrectable_error_string[] = {
+	"Undefined",			/* Bit Position 0	*/
+	NULL,
+	NULL,
+	NULL,
+	"DLP",				/* Bit Position 4	*/
+	"SDES",				/* Bit Position 5	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"TLP",				/* Bit Position 12	*/
+	"FCP",				/* Bit Position 13	*/
+	"CmpltTO",			/* Bit Position 14	*/
+	"CmpltAbrt",			/* Bit Position 15	*/
+	"UnxCmplt",			/* Bit Position 16	*/
+	"RxOF",				/* Bit Position 17	*/
+	"MalfTLP",			/* Bit Position 18	*/
+	"ECRC",				/* Bit Position 19	*/
+	"UnsupReq",			/* Bit Position 20	*/
+	"ACSViol",			/* Bit Position 21	*/
+	"UncorrIntErr",			/* Bit Position 22	*/
+	"BlockedTLP",			/* Bit Position 23	*/
+	"AtomicOpBlocked",		/* Bit Position 24	*/
+	"TLPBlockedErr",		/* Bit Position 25	*/
+	"PoisonTLPBlocked",		/* Bit Position 26	*/
+	NULL,				/* Bit Position 27	*/
+	NULL,				/* Bit Position 28	*/
+	NULL,				/* Bit Position 29	*/
+	NULL,				/* Bit Position 30	*/
+	NULL,				/* Bit Position 31	*/
+};
+
+static const char *aer_agent_string[] = {
+	"Receiver ID",
+	"Requester ID",
+	"Completer ID",
+	"Transmitter ID"
+};
+
+#define aer_stats_dev_attr(name, stats_array, strings_array,		\
+			   total_string, total_field)			\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *attr,	\
+		     char *buf)						\
+{									\
+	unsigned int i;							\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	u64 *stats = pdev->aer_stats->stats_array;			\
+	size_t len = 0;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+		if (strings_array[i])					\
+			len += sysfs_emit_at(buf, len, "%s %llu\n",	\
+					     strings_array[i],		\
+					     stats[i]);			\
+		else if (stats[i])					\
+			len += sysfs_emit_at(buf, len,			\
+					     #stats_array "_bit[%d] %llu\n",\
+					     i, stats[i]);		\
+	}								\
+	len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string,	\
+			     pdev->aer_stats->total_field);		\
+	return len;							\
+}									\
+static DEVICE_ATTR_RO(name)
+
+aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
+		   aer_correctable_error_string, "ERR_COR",
+		   dev_total_cor_errs);
+aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
+		   aer_uncorrectable_error_string, "ERR_FATAL",
+		   dev_total_fatal_errs);
+aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
+		   aer_uncorrectable_error_string, "ERR_NONFATAL",
+		   dev_total_nonfatal_errs);
+
+#define aer_stats_rootport_attr(name, field)				\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *attr,	\
+		     char *buf)						\
+{									\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field);	\
+}									\
+static DEVICE_ATTR_RO(name)
+
+aer_stats_rootport_attr(aer_rootport_total_err_cor,
+			 rootport_total_cor_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_fatal,
+			 rootport_total_fatal_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
+			 rootport_total_nonfatal_errs);
+
+static struct attribute *aer_stats_attrs[] __ro_after_init = {
+	&dev_attr_aer_dev_correctable.attr,
+	&dev_attr_aer_dev_fatal.attr,
+	&dev_attr_aer_dev_nonfatal.attr,
+	&dev_attr_aer_rootport_total_err_cor.attr,
+	&dev_attr_aer_rootport_total_err_fatal.attr,
+	&dev_attr_aer_rootport_total_err_nonfatal.attr,
+	NULL
+};
+
+static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
+					   struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pdev->aer_stats)
+		return 0;
+
+	if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
+	     a == &dev_attr_aer_rootport_total_err_fatal.attr ||
+	     a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
+	    ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
+	     (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC)))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group aer_stats_attr_group = {
+	.attrs  = aer_stats_attrs,
+	.is_visible = aer_stats_attrs_are_visible,
+};
+
+static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
+				   struct aer_err_info *info)
+{
+	unsigned long status = info->status & ~info->mask;
+	int i, max = -1;
+	u64 *counter = NULL;
+	struct aer_stats *aer_stats = pdev->aer_stats;
+
+	if (!aer_stats)
+		return;
+
+	switch (info->severity) {
+	case AER_CORRECTABLE:
+		aer_stats->dev_total_cor_errs++;
+		counter = &aer_stats->dev_cor_errs[0];
+		max = AER_MAX_TYPEOF_COR_ERRS;
+		break;
+	case AER_NONFATAL:
+		aer_stats->dev_total_nonfatal_errs++;
+		counter = &aer_stats->dev_nonfatal_errs[0];
+		max = AER_MAX_TYPEOF_UNCOR_ERRS;
+		break;
+	case AER_FATAL:
+		aer_stats->dev_total_fatal_errs++;
+		counter = &aer_stats->dev_fatal_errs[0];
+		max = AER_MAX_TYPEOF_UNCOR_ERRS;
+		break;
+	}
+
+	for_each_set_bit(i, &status, max)
+		counter[i]++;
+}
+
+static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
+				 struct aer_err_source *e_src)
+{
+	struct aer_stats *aer_stats = pdev->aer_stats;
+
+	if (!aer_stats)
+		return;
+
+	if (e_src->status & PCI_ERR_ROOT_COR_RCV)
+		aer_stats->rootport_total_cor_errs++;
+
+	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
+		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
+			aer_stats->rootport_total_fatal_errs++;
+		else
+			aer_stats->rootport_total_nonfatal_errs++;
+	}
+}
+
+static void __print_tlp_header(struct pci_dev *dev,
+			       struct aer_header_log_regs *t)
+{
+	pci_err(dev, "  TLP Header: %08x %08x %08x %08x\n",
+		t->dw0, t->dw1, t->dw2, t->dw3);
+}
+
+static void __aer_print_error(struct pci_dev *dev,
+			      struct aer_err_info *info)
+{
+	const char **strings;
+	unsigned long status = info->status & ~info->mask;
+	const char *level, *errmsg;
+	int i;
+
+	if (info->severity == AER_CORRECTABLE) {
+		strings = aer_correctable_error_string;
+		level = KERN_WARNING;
+	} else {
+		strings = aer_uncorrectable_error_string;
+		level = KERN_ERR;
+	}
+
+	for_each_set_bit(i, &status, 32) {
+		errmsg = strings[i];
+		if (!errmsg)
+			errmsg = "Unknown Error Bit";
+
+		pci_printk(level, dev, "   [%2d] %-22s%s\n", i, errmsg,
+				info->first_error == i ? " (First)" : "");
+	}
+	pci_dev_aer_stats_incr(dev, info);
+}
+
+void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+	int layer, agent;
+	int id = pci_dev_id(dev);
+	const char *level;
+
+	if (!info->status) {
+		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
+			aer_error_severity_string[info->severity]);
+		goto out;
+	}
+
+	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
+	agent = AER_GET_AGENT(info->severity, info->status);
+
+	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
+
+	pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+		   aer_error_severity_string[info->severity],
+		   aer_error_layer[layer], aer_agent_string[agent]);
+
+	pci_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
+		   dev->vendor, dev->device, info->status, info->mask);
+
+	__aer_print_error(dev, info);
+
+	if (info->tlp_header_valid)
+		__print_tlp_header(dev, &info->tlp);
+
+out:
+	if (info->id && info->error_dev_num > 1 && info->id == id)
+		pci_err(dev, "  Error of this Agent is reported first\n");
+
+	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+			info->severity, info->tlp_header_valid, &info->tlp);
+}
+
+static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
+{
+	u8 bus = info->id >> 8;
+	u8 devfn = info->id & 0xff;
+
+	pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n",
+		 info->multi_error_valid ? "Multiple " : "",
+		 aer_error_severity_string[info->severity],
+		 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
+		 PCI_FUNC(devfn));
+}
+
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+int cper_severity_to_aer(int cper_severity)
+{
+	switch (cper_severity) {
+	case CPER_SEV_RECOVERABLE:
+		return AER_NONFATAL;
+	case CPER_SEV_FATAL:
+		return AER_FATAL;
+	default:
+		return AER_CORRECTABLE;
+	}
+}
+EXPORT_SYMBOL_GPL(cper_severity_to_aer);
+
+void cper_print_aer(struct pci_dev *dev, int aer_severity,
+		    struct aer_capability_regs *aer)
+{
+	int layer, agent, tlp_header_valid = 0;
+	u32 status, mask;
+	struct aer_err_info info;
+
+	if (aer_severity == AER_CORRECTABLE) {
+		status = aer->cor_status;
+		mask = aer->cor_mask;
+	} else {
+		status = aer->uncor_status;
+		mask = aer->uncor_mask;
+		tlp_header_valid = status & AER_LOG_TLP_MASKS;
+	}
+
+	layer = AER_GET_LAYER_ERROR(aer_severity, status);
+	agent = AER_GET_AGENT(aer_severity, status);
+
+	memset(&info, 0, sizeof(info));
+	info.severity = aer_severity;
+	info.status = status;
+	info.mask = mask;
+	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
+
+	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
+	__aer_print_error(dev, &info);
+	pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
+		aer_error_layer[layer], aer_agent_string[agent]);
+
+	if (aer_severity != AER_CORRECTABLE)
+		pci_err(dev, "aer_uncor_severity: 0x%08x\n",
+			aer->uncor_severity);
+
+	if (tlp_header_valid)
+		__print_tlp_header(dev, &aer->header_log);
+
+	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+			aer_severity, tlp_header_valid, &aer->header_log);
+}
+#endif
+
+/**
+ * add_error_device - list device to be handled
+ * @e_info: pointer to error info
+ * @dev: pointer to pci_dev to be added
+ */
+static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
+{
+	if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
+		e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
+		e_info->error_dev_num++;
+		return 0;
+	}
+	return -ENOSPC;
+}
+
+/**
+ * is_error_source - check whether the device is source of reported error
+ * @dev: pointer to pci_dev to be checked
+ * @e_info: pointer to reported error info
+ */
+static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
+{
+	int aer = dev->aer_cap;
+	u32 status, mask;
+	u16 reg16;
+
+	/*
+	 * When bus id is equal to 0, it might be a bad id
+	 * reported by root port.
+	 */
+	if ((PCI_BUS_NUM(e_info->id) != 0) &&
+	    !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
+		/* Device ID match? */
+		if (e_info->id == pci_dev_id(dev))
+			return true;
+
+		/* Continue id comparing if there is no multiple error */
+		if (!e_info->multi_error_valid)
+			return false;
+	}
+
+	/*
+	 * When either
+	 *      1) bus id is equal to 0. Some ports might lose the bus
+	 *              id of error source id;
+	 *      2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
+	 *      3) There are multiple errors and prior ID comparing fails;
+	 * We check AER status registers to find possible reporter.
+	 */
+	if (atomic_read(&dev->enable_cnt) == 0)
+		return false;
+
+	/* Check if AER is enabled */
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &reg16);
+	if (!(reg16 & PCI_EXP_AER_FLAGS))
+		return false;
+
+	if (!aer)
+		return false;
+
+	/* Check if error is recorded */
+	if (e_info->severity == AER_CORRECTABLE) {
+		pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
+		pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
+	} else {
+		pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
+		pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
+	}
+	if (status & ~mask)
+		return true;
+
+	return false;
+}
+
+static int find_device_iter(struct pci_dev *dev, void *data)
+{
+	struct aer_err_info *e_info = (struct aer_err_info *)data;
+
+	if (is_error_source(dev, e_info)) {
+		/* List this device */
+		if (add_error_device(e_info, dev)) {
+			/* We cannot handle more... Stop iteration */
+			/* TODO: Should print error message here? */
+			return 1;
+		}
+
+		/* If there is only a single error, stop iteration */
+		if (!e_info->multi_error_valid)
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * find_source_device - search through device hierarchy for source device
+ * @parent: pointer to Root Port pci_dev data structure
+ * @e_info: including detailed error information such like id
+ *
+ * Return true if found.
+ *
+ * Invoked by DPC when error is detected at the Root Port.
+ * Caller of this function must set id, severity, and multi_error_valid of
+ * struct aer_err_info pointed by @e_info properly.  This function must fill
+ * e_info->error_dev_num and e_info->dev[], based on the given information.
+ */
+static bool find_source_device(struct pci_dev *parent,
+		struct aer_err_info *e_info)
+{
+	struct pci_dev *dev = parent;
+	int result;
+
+	/* Must reset in this function */
+	e_info->error_dev_num = 0;
+
+	/* Is Root Port an agent that sends error message? */
+	result = find_device_iter(dev, e_info);
+	if (result)
+		return true;
+
+	if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC)
+		pcie_walk_rcec(parent, find_device_iter, e_info);
+	else
+		pci_walk_bus(parent->subordinate, find_device_iter, e_info);
+
+	if (!e_info->error_dev_num) {
+		pci_info(parent, "can't find device of ID%04x\n", e_info->id);
+		return false;
+	}
+	return true;
+}
+
+/**
+ * handle_error_source - handle logging error into an event log
+ * @dev: pointer to pci_dev data structure of error source device
+ * @info: comprehensive error information
+ *
+ * Invoked when an error being detected by Root Port.
+ */
+static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
+{
+	int aer = dev->aer_cap;
+
+	if (info->severity == AER_CORRECTABLE) {
+		/*
+		 * Correctable error does not need software intervention.
+		 * No need to go through error recovery process.
+		 */
+		if (aer)
+			pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
+					info->status);
+		if (pcie_aer_is_native(dev)) {
+			struct pci_driver *pdrv = dev->driver;
+
+			if (pdrv && pdrv->err_handler &&
+			    pdrv->err_handler->cor_error_detected)
+				pdrv->err_handler->cor_error_detected(dev);
+			pcie_clear_device_status(dev);
+		}
+	} else if (info->severity == AER_NONFATAL)
+		pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
+	else if (info->severity == AER_FATAL)
+		pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
+	pci_dev_put(dev);
+}
+
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+
+#define AER_RECOVER_RING_SIZE		16
+
+struct aer_recover_entry {
+	u8	bus;
+	u8	devfn;
+	u16	domain;
+	int	severity;
+	struct aer_capability_regs *regs;
+};
+
+static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
+		    AER_RECOVER_RING_SIZE);
+
+static void aer_recover_work_func(struct work_struct *work)
+{
+	struct aer_recover_entry entry;
+	struct pci_dev *pdev;
+
+	while (kfifo_get(&aer_recover_ring, &entry)) {
+		pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
+						   entry.devfn);
+		if (!pdev) {
+			pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
+			       entry.domain, entry.bus,
+			       PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
+			continue;
+		}
+		cper_print_aer(pdev, entry.severity, entry.regs);
+		/*
+		 * Memory for aer_capability_regs(entry.regs) is being allocated from the
+		 * ghes_estatus_pool to protect it from overwriting when multiple sections
+		 * are present in the error status. Thus free the same after processing
+		 * the data.
+		 */
+		ghes_estatus_pool_region_free((unsigned long)entry.regs,
+					      sizeof(struct aer_capability_regs));
+
+		if (entry.severity == AER_NONFATAL)
+			pcie_do_recovery(pdev, pci_channel_io_normal,
+					 aer_root_reset);
+		else if (entry.severity == AER_FATAL)
+			pcie_do_recovery(pdev, pci_channel_io_frozen,
+					 aer_root_reset);
+		pci_dev_put(pdev);
+	}
+}
+
+/*
+ * Mutual exclusion for writers of aer_recover_ring, reader side don't
+ * need lock, because there is only one reader and lock is not needed
+ * between reader and writer.
+ */
+static DEFINE_SPINLOCK(aer_recover_ring_lock);
+static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
+
+void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
+		       int severity, struct aer_capability_regs *aer_regs)
+{
+	struct aer_recover_entry entry = {
+		.bus		= bus,
+		.devfn		= devfn,
+		.domain		= domain,
+		.severity	= severity,
+		.regs		= aer_regs,
+	};
+
+	if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1,
+				 &aer_recover_ring_lock))
+		schedule_work(&aer_recover_work);
+	else
+		pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n",
+		       domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+}
+EXPORT_SYMBOL_GPL(aer_recover_queue);
+#endif
+
+/**
+ * aer_get_device_error_info - read error status from dev and store it to info
+ * @dev: pointer to the device expected to have a error record
+ * @info: pointer to structure to store the error record
+ *
+ * Return 1 on success, 0 on error.
+ *
+ * Note that @info is reused among all error devices. Clear fields properly.
+ */
+int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+{
+	int type = pci_pcie_type(dev);
+	int aer = dev->aer_cap;
+	int temp;
+
+	/* Must reset in this function */
+	info->status = 0;
+	info->tlp_header_valid = 0;
+
+	/* The device might not support AER */
+	if (!aer)
+		return 0;
+
+	if (info->severity == AER_CORRECTABLE) {
+		pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS,
+			&info->status);
+		pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK,
+			&info->mask);
+		if (!(info->status & ~info->mask))
+			return 0;
+	} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
+		   type == PCI_EXP_TYPE_RC_EC ||
+		   type == PCI_EXP_TYPE_DOWNSTREAM ||
+		   info->severity == AER_NONFATAL) {
+
+		/* Link is still healthy for IO reads */
+		pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS,
+			&info->status);
+		pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
+			&info->mask);
+		if (!(info->status & ~info->mask))
+			return 0;
+
+		/* Get First Error Pointer */
+		pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
+		info->first_error = PCI_ERR_CAP_FEP(temp);
+
+		if (info->status & AER_LOG_TLP_MASKS) {
+			info->tlp_header_valid = 1;
+			pci_read_config_dword(dev,
+				aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
+			pci_read_config_dword(dev,
+				aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
+			pci_read_config_dword(dev,
+				aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
+			pci_read_config_dword(dev,
+				aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
+		}
+	}
+
+	return 1;
+}
+
+static inline void aer_process_err_devices(struct aer_err_info *e_info)
+{
+	int i;
+
+	/* Report all before handle them, not to lost records by reset etc. */
+	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
+		if (aer_get_device_error_info(e_info->dev[i], e_info))
+			aer_print_error(e_info->dev[i], e_info);
+	}
+	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
+		if (aer_get_device_error_info(e_info->dev[i], e_info))
+			handle_error_source(e_info->dev[i], e_info);
+	}
+}
+
+/**
+ * aer_isr_one_error - consume an error detected by root port
+ * @rpc: pointer to the root port which holds an error
+ * @e_src: pointer to an error source
+ */
+static void aer_isr_one_error(struct aer_rpc *rpc,
+		struct aer_err_source *e_src)
+{
+	struct pci_dev *pdev = rpc->rpd;
+	struct aer_err_info e_info;
+
+	pci_rootport_aer_stats_incr(pdev, e_src);
+
+	/*
+	 * There is a possibility that both correctable error and
+	 * uncorrectable error being logged. Report correctable error first.
+	 */
+	if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
+		e_info.id = ERR_COR_ID(e_src->id);
+		e_info.severity = AER_CORRECTABLE;
+
+		if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
+			e_info.multi_error_valid = 1;
+		else
+			e_info.multi_error_valid = 0;
+		aer_print_port_info(pdev, &e_info);
+
+		if (find_source_device(pdev, &e_info))
+			aer_process_err_devices(&e_info);
+	}
+
+	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
+		e_info.id = ERR_UNCOR_ID(e_src->id);
+
+		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
+			e_info.severity = AER_FATAL;
+		else
+			e_info.severity = AER_NONFATAL;
+
+		if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
+			e_info.multi_error_valid = 1;
+		else
+			e_info.multi_error_valid = 0;
+
+		aer_print_port_info(pdev, &e_info);
+
+		if (find_source_device(pdev, &e_info))
+			aer_process_err_devices(&e_info);
+	}
+}
+
+/**
+ * aer_isr - consume errors detected by root port
+ * @irq: IRQ assigned to Root Port
+ * @context: pointer to Root Port data structure
+ *
+ * Invoked, as DPC, when root port records new detected error
+ */
+static irqreturn_t aer_isr(int irq, void *context)
+{
+	struct pcie_device *dev = (struct pcie_device *)context;
+	struct aer_rpc *rpc = get_service_data(dev);
+	struct aer_err_source e_src;
+
+	if (kfifo_is_empty(&rpc->aer_fifo))
+		return IRQ_NONE;
+
+	while (kfifo_get(&rpc->aer_fifo, &e_src))
+		aer_isr_one_error(rpc, &e_src);
+	return IRQ_HANDLED;
+}
+
+/**
+ * aer_irq - Root Port's ISR
+ * @irq: IRQ assigned to Root Port
+ * @context: pointer to Root Port data structure
+ *
+ * Invoked when Root Port detects AER messages.
+ */
+static irqreturn_t aer_irq(int irq, void *context)
+{
+	struct pcie_device *pdev = (struct pcie_device *)context;
+	struct aer_rpc *rpc = get_service_data(pdev);
+	struct pci_dev *rp = rpc->rpd;
+	int aer = rp->aer_cap;
+	struct aer_err_source e_src = {};
+
+	pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status);
+	if (!(e_src.status & AER_ERR_STATUS_MASK))
+		return IRQ_NONE;
+
+	pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id);
+	pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status);
+
+	if (!kfifo_put(&rpc->aer_fifo, e_src))
+		return IRQ_HANDLED;
+
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * aer_enable_rootport - enable Root Port's interrupts when receiving messages
+ * @rpc: pointer to a Root Port data structure
+ *
+ * Invoked when PCIe bus loads AER service driver.
+ */
+static void aer_enable_rootport(struct aer_rpc *rpc)
+{
+	struct pci_dev *pdev = rpc->rpd;
+	int aer = pdev->aer_cap;
+	u16 reg16;
+	u32 reg32;
+
+	/* Clear PCIe Capability's Device Status */
+	pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &reg16);
+	pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
+
+	/* Disable system error generation in response to error messages */
+	pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
+				   SYSTEM_ERROR_INTR_ON_MESG_MASK);
+
+	/* Clear error status */
+	pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
+	pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32);
+	pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
+
+	/* Enable Root Port's interrupt in response to error messages */
+	pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+	pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+}
+
+/**
+ * aer_disable_rootport - disable Root Port's interrupts when receiving messages
+ * @rpc: pointer to a Root Port data structure
+ *
+ * Invoked when PCIe bus unloads AER service driver.
+ */
+static void aer_disable_rootport(struct aer_rpc *rpc)
+{
+	struct pci_dev *pdev = rpc->rpd;
+	int aer = pdev->aer_cap;
+	u32 reg32;
+
+	/* Disable Root's interrupt in response to error messages */
+	pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+	pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+
+	/* Clear Root's error status reg */
+	pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
+}
+
+/**
+ * aer_remove - clean up resources
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Invoked when PCI Express bus unloads or AER probe fails.
+ */
+static void aer_remove(struct pcie_device *dev)
+{
+	struct aer_rpc *rpc = get_service_data(dev);
+
+	aer_disable_rootport(rpc);
+}
+
+/**
+ * aer_probe - initialize resources
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Invoked when PCI Express bus loads AER service driver.
+ */
+static int aer_probe(struct pcie_device *dev)
+{
+	int status;
+	struct aer_rpc *rpc;
+	struct device *device = &dev->device;
+	struct pci_dev *port = dev->port;
+
+	BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) <
+		     AER_MAX_TYPEOF_COR_ERRS);
+	BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) <
+		     AER_MAX_TYPEOF_UNCOR_ERRS);
+
+	/* Limit to Root Ports or Root Complex Event Collectors */
+	if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+	    (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+		return -ENODEV;
+
+	rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
+	if (!rpc)
+		return -ENOMEM;
+
+	rpc->rpd = port;
+	INIT_KFIFO(rpc->aer_fifo);
+	set_service_data(dev, rpc);
+
+	status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr,
+					   IRQF_SHARED, "aerdrv", dev);
+	if (status) {
+		pci_err(port, "request AER IRQ %d failed\n", dev->irq);
+		return status;
+	}
+
+	aer_enable_rootport(rpc);
+	pci_info(port, "enabled with IRQ %d\n", dev->irq);
+	return 0;
+}
+
+/**
+ * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
+ * @dev: pointer to Root Port, RCEC, or RCiEP
+ *
+ * Invoked by Port Bus driver when performing reset.
+ */
+static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+	struct pci_dev *root;
+	int aer;
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+	u32 reg32;
+	int rc;
+
+	/*
+	 * Only Root Ports and RCECs have AER Root Command and Root Status
+	 * registers.  If "dev" is an RCiEP, the relevant registers are in
+	 * the RCEC.
+	 */
+	if (type == PCI_EXP_TYPE_RC_END)
+		root = dev->rcec;
+	else
+		root = pcie_find_root_port(dev);
+
+	/*
+	 * If the platform retained control of AER, an RCiEP may not have
+	 * an RCEC visible to us, so dev->rcec ("root") may be NULL.  In
+	 * that case, firmware is responsible for these registers.
+	 */
+	aer = root ? root->aer_cap : 0;
+
+	if ((host->native_aer || pcie_ports_native) && aer) {
+		/* Disable Root's interrupt in response to error messages */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+	}
+
+	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
+		rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET);
+		if (!rc)
+			pci_info(dev, "has been reset\n");
+		else
+			pci_info(dev, "not reset (no FLR support: %d)\n", rc);
+	} else {
+		rc = pci_bus_error_reset(dev);
+		pci_info(dev, "%s Port link has been reset (%d)\n",
+			pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc);
+	}
+
+	if ((host->native_aer || pcie_ports_native) && aer) {
+		/* Clear Root Error Status */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
+
+		/* Enable Root Port's interrupt in response to error messages */
+		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+	}
+
+	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+static struct pcie_port_service_driver aerdriver = {
+	.name		= "aer",
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_AER,
+
+	.probe		= aer_probe,
+	.remove		= aer_remove,
+};
+
+/**
+ * pcie_aer_init - register AER root service driver
+ *
+ * Invoked when AER root service driver is loaded.
+ */
+int __init pcie_aer_init(void)
+{
+	if (!pci_aer_available())
+		return -ENXIO;
+	return pcie_port_service_register(&aerdriver);
+}
diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c
new file mode 100644
index 000000000..2dab275d2
--- /dev/null
+++ b/drivers/pci/pcie/aer_inject.c
@@ -0,0 +1,548 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe AER software error injection support.
+ *
+ * Debugging PCIe AER code is quite difficult because it is hard to
+ * trigger various real hardware errors. Software based error
+ * injection can fake almost all kinds of errors with the help of a
+ * user space helper tool aer-inject, which can be gotten from:
+ *   https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
+ *
+ * Copyright 2009 Intel Corporation.
+ *     Huang Ying <ying.huang@intel.com>
+ */
+
+#define dev_fmt(fmt) "aer_inject: " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/stddef.h>
+#include <linux/device.h>
+
+#include "portdrv.h"
+
+/* Override the existing corrected and uncorrected error masks */
+static bool aer_mask_override;
+module_param(aer_mask_override, bool, 0);
+
+struct aer_error_inj {
+	u8 bus;
+	u8 dev;
+	u8 fn;
+	u32 uncor_status;
+	u32 cor_status;
+	u32 header_log0;
+	u32 header_log1;
+	u32 header_log2;
+	u32 header_log3;
+	u32 domain;
+};
+
+struct aer_error {
+	struct list_head list;
+	u32 domain;
+	unsigned int bus;
+	unsigned int devfn;
+	int pos_cap_err;
+
+	u32 uncor_status;
+	u32 cor_status;
+	u32 header_log0;
+	u32 header_log1;
+	u32 header_log2;
+	u32 header_log3;
+	u32 root_status;
+	u32 source_id;
+};
+
+struct pci_bus_ops {
+	struct list_head list;
+	struct pci_bus *bus;
+	struct pci_ops *ops;
+};
+
+static LIST_HEAD(einjected);
+
+static LIST_HEAD(pci_bus_ops_list);
+
+/* Protect einjected and pci_bus_ops_list */
+static DEFINE_SPINLOCK(inject_lock);
+
+static void aer_error_init(struct aer_error *err, u32 domain,
+			   unsigned int bus, unsigned int devfn,
+			   int pos_cap_err)
+{
+	INIT_LIST_HEAD(&err->list);
+	err->domain = domain;
+	err->bus = bus;
+	err->devfn = devfn;
+	err->pos_cap_err = pos_cap_err;
+}
+
+/* inject_lock must be held before calling */
+static struct aer_error *__find_aer_error(u32 domain, unsigned int bus,
+					  unsigned int devfn)
+{
+	struct aer_error *err;
+
+	list_for_each_entry(err, &einjected, list) {
+		if (domain == err->domain &&
+		    bus == err->bus &&
+		    devfn == err->devfn)
+			return err;
+	}
+	return NULL;
+}
+
+/* inject_lock must be held before calling */
+static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev)
+{
+	int domain = pci_domain_nr(dev->bus);
+	if (domain < 0)
+		return NULL;
+	return __find_aer_error(domain, dev->bus->number, dev->devfn);
+}
+
+/* inject_lock must be held before calling */
+static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
+{
+	struct pci_bus_ops *bus_ops;
+
+	list_for_each_entry(bus_ops, &pci_bus_ops_list, list) {
+		if (bus_ops->bus == bus)
+			return bus_ops->ops;
+	}
+	return NULL;
+}
+
+static struct pci_bus_ops *pci_bus_ops_pop(void)
+{
+	unsigned long flags;
+	struct pci_bus_ops *bus_ops;
+
+	spin_lock_irqsave(&inject_lock, flags);
+	bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
+					   struct pci_bus_ops, list);
+	if (bus_ops)
+		list_del(&bus_ops->list);
+	spin_unlock_irqrestore(&inject_lock, flags);
+	return bus_ops;
+}
+
+static u32 *find_pci_config_dword(struct aer_error *err, int where,
+				  int *prw1cs)
+{
+	int rw1cs = 0;
+	u32 *target = NULL;
+
+	if (err->pos_cap_err == -1)
+		return NULL;
+
+	switch (where - err->pos_cap_err) {
+	case PCI_ERR_UNCOR_STATUS:
+		target = &err->uncor_status;
+		rw1cs = 1;
+		break;
+	case PCI_ERR_COR_STATUS:
+		target = &err->cor_status;
+		rw1cs = 1;
+		break;
+	case PCI_ERR_HEADER_LOG:
+		target = &err->header_log0;
+		break;
+	case PCI_ERR_HEADER_LOG+4:
+		target = &err->header_log1;
+		break;
+	case PCI_ERR_HEADER_LOG+8:
+		target = &err->header_log2;
+		break;
+	case PCI_ERR_HEADER_LOG+12:
+		target = &err->header_log3;
+		break;
+	case PCI_ERR_ROOT_STATUS:
+		target = &err->root_status;
+		rw1cs = 1;
+		break;
+	case PCI_ERR_ROOT_ERR_SRC:
+		target = &err->source_id;
+		break;
+	}
+	if (prw1cs)
+		*prw1cs = rw1cs;
+	return target;
+}
+
+static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where,
+			int size, u32 *val)
+{
+	struct pci_ops *ops, *my_ops;
+	int rv;
+
+	ops = __find_pci_bus_ops(bus);
+	if (!ops)
+		return -1;
+
+	my_ops = bus->ops;
+	bus->ops = ops;
+	rv = ops->read(bus, devfn, where, size, val);
+	bus->ops = my_ops;
+
+	return rv;
+}
+
+static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where,
+			 int size, u32 val)
+{
+	struct pci_ops *ops, *my_ops;
+	int rv;
+
+	ops = __find_pci_bus_ops(bus);
+	if (!ops)
+		return -1;
+
+	my_ops = bus->ops;
+	bus->ops = ops;
+	rv = ops->write(bus, devfn, where, size, val);
+	bus->ops = my_ops;
+
+	return rv;
+}
+
+static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	u32 *sim;
+	struct aer_error *err;
+	unsigned long flags;
+	int domain;
+	int rv;
+
+	spin_lock_irqsave(&inject_lock, flags);
+	if (size != sizeof(u32))
+		goto out;
+	domain = pci_domain_nr(bus);
+	if (domain < 0)
+		goto out;
+	err = __find_aer_error(domain, bus->number, devfn);
+	if (!err)
+		goto out;
+
+	sim = find_pci_config_dword(err, where, NULL);
+	if (sim) {
+		*val = *sim;
+		spin_unlock_irqrestore(&inject_lock, flags);
+		return 0;
+	}
+out:
+	rv = aer_inj_read(bus, devfn, where, size, val);
+	spin_unlock_irqrestore(&inject_lock, flags);
+	return rv;
+}
+
+static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 val)
+{
+	u32 *sim;
+	struct aer_error *err;
+	unsigned long flags;
+	int rw1cs;
+	int domain;
+	int rv;
+
+	spin_lock_irqsave(&inject_lock, flags);
+	if (size != sizeof(u32))
+		goto out;
+	domain = pci_domain_nr(bus);
+	if (domain < 0)
+		goto out;
+	err = __find_aer_error(domain, bus->number, devfn);
+	if (!err)
+		goto out;
+
+	sim = find_pci_config_dword(err, where, &rw1cs);
+	if (sim) {
+		if (rw1cs)
+			*sim ^= val;
+		else
+			*sim = val;
+		spin_unlock_irqrestore(&inject_lock, flags);
+		return 0;
+	}
+out:
+	rv = aer_inj_write(bus, devfn, where, size, val);
+	spin_unlock_irqrestore(&inject_lock, flags);
+	return rv;
+}
+
+static struct pci_ops aer_inj_pci_ops = {
+	.read = aer_inj_read_config,
+	.write = aer_inj_write_config,
+};
+
+static void pci_bus_ops_init(struct pci_bus_ops *bus_ops,
+			     struct pci_bus *bus,
+			     struct pci_ops *ops)
+{
+	INIT_LIST_HEAD(&bus_ops->list);
+	bus_ops->bus = bus;
+	bus_ops->ops = ops;
+}
+
+static int pci_bus_set_aer_ops(struct pci_bus *bus)
+{
+	struct pci_ops *ops;
+	struct pci_bus_ops *bus_ops;
+	unsigned long flags;
+
+	bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL);
+	if (!bus_ops)
+		return -ENOMEM;
+	ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
+	spin_lock_irqsave(&inject_lock, flags);
+	if (ops == &aer_inj_pci_ops)
+		goto out;
+	pci_bus_ops_init(bus_ops, bus, ops);
+	list_add(&bus_ops->list, &pci_bus_ops_list);
+	bus_ops = NULL;
+out:
+	spin_unlock_irqrestore(&inject_lock, flags);
+	kfree(bus_ops);
+	return 0;
+}
+
+static int aer_inject(struct aer_error_inj *einj)
+{
+	struct aer_error *err, *rperr;
+	struct aer_error *err_alloc = NULL, *rperr_alloc = NULL;
+	struct pci_dev *dev, *rpdev;
+	struct pcie_device *edev;
+	struct device *device;
+	unsigned long flags;
+	unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
+	int pos_cap_err, rp_pos_cap_err;
+	u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
+	int ret = 0;
+
+	dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn);
+	if (!dev)
+		return -ENODEV;
+	rpdev = pcie_find_root_port(dev);
+	/* If Root Port not found, try to find an RCEC */
+	if (!rpdev)
+		rpdev = dev->rcec;
+	if (!rpdev) {
+		pci_err(dev, "Neither Root Port nor RCEC found\n");
+		ret = -ENODEV;
+		goto out_put;
+	}
+
+	pos_cap_err = dev->aer_cap;
+	if (!pos_cap_err) {
+		pci_err(dev, "Device doesn't support AER\n");
+		ret = -EPROTONOSUPPORT;
+		goto out_put;
+	}
+	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever);
+	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask);
+	pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
+			      &uncor_mask);
+
+	rp_pos_cap_err = rpdev->aer_cap;
+	if (!rp_pos_cap_err) {
+		pci_err(rpdev, "Root port doesn't support AER\n");
+		ret = -EPROTONOSUPPORT;
+		goto out_put;
+	}
+
+	err_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
+	if (!err_alloc) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+	rperr_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
+	if (!rperr_alloc) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
+	if (aer_mask_override) {
+		cor_mask_orig = cor_mask;
+		cor_mask &= !(einj->cor_status);
+		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
+				       cor_mask);
+
+		uncor_mask_orig = uncor_mask;
+		uncor_mask &= !(einj->uncor_status);
+		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
+				       uncor_mask);
+	}
+
+	spin_lock_irqsave(&inject_lock, flags);
+
+	err = __find_aer_error_by_dev(dev);
+	if (!err) {
+		err = err_alloc;
+		err_alloc = NULL;
+		aer_error_init(err, einj->domain, einj->bus, devfn,
+			       pos_cap_err);
+		list_add(&err->list, &einjected);
+	}
+	err->uncor_status |= einj->uncor_status;
+	err->cor_status |= einj->cor_status;
+	err->header_log0 = einj->header_log0;
+	err->header_log1 = einj->header_log1;
+	err->header_log2 = einj->header_log2;
+	err->header_log3 = einj->header_log3;
+
+	if (!aer_mask_override && einj->cor_status &&
+	    !(einj->cor_status & ~cor_mask)) {
+		ret = -EINVAL;
+		pci_warn(dev, "The correctable error(s) is masked by device\n");
+		spin_unlock_irqrestore(&inject_lock, flags);
+		goto out_put;
+	}
+	if (!aer_mask_override && einj->uncor_status &&
+	    !(einj->uncor_status & ~uncor_mask)) {
+		ret = -EINVAL;
+		pci_warn(dev, "The uncorrectable error(s) is masked by device\n");
+		spin_unlock_irqrestore(&inject_lock, flags);
+		goto out_put;
+	}
+
+	rperr = __find_aer_error_by_dev(rpdev);
+	if (!rperr) {
+		rperr = rperr_alloc;
+		rperr_alloc = NULL;
+		aer_error_init(rperr, pci_domain_nr(rpdev->bus),
+			       rpdev->bus->number, rpdev->devfn,
+			       rp_pos_cap_err);
+		list_add(&rperr->list, &einjected);
+	}
+	if (einj->cor_status) {
+		if (rperr->root_status & PCI_ERR_ROOT_COR_RCV)
+			rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
+		else
+			rperr->root_status |= PCI_ERR_ROOT_COR_RCV;
+		rperr->source_id &= 0xffff0000;
+		rperr->source_id |= (einj->bus << 8) | devfn;
+	}
+	if (einj->uncor_status) {
+		if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)
+			rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
+		if (sever & einj->uncor_status) {
+			rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV;
+			if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV))
+				rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL;
+		} else
+			rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
+		rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV;
+		rperr->source_id &= 0x0000ffff;
+		rperr->source_id |= ((einj->bus << 8) | devfn) << 16;
+	}
+	spin_unlock_irqrestore(&inject_lock, flags);
+
+	if (aer_mask_override) {
+		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
+				       cor_mask_orig);
+		pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
+				       uncor_mask_orig);
+	}
+
+	ret = pci_bus_set_aer_ops(dev->bus);
+	if (ret)
+		goto out_put;
+	ret = pci_bus_set_aer_ops(rpdev->bus);
+	if (ret)
+		goto out_put;
+
+	device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER);
+	if (device) {
+		edev = to_pcie_device(device);
+		if (!get_service_data(edev)) {
+			pci_warn(edev->port, "AER service is not initialized\n");
+			ret = -EPROTONOSUPPORT;
+			goto out_put;
+		}
+		pci_info(edev->port, "Injecting errors %08x/%08x into device %s\n",
+			 einj->cor_status, einj->uncor_status, pci_name(dev));
+		ret = irq_inject_interrupt(edev->irq);
+	} else {
+		pci_err(rpdev, "AER device not found\n");
+		ret = -ENODEV;
+	}
+out_put:
+	kfree(err_alloc);
+	kfree(rperr_alloc);
+	pci_dev_put(dev);
+	return ret;
+}
+
+static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
+				size_t usize, loff_t *off)
+{
+	struct aer_error_inj einj;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (usize < offsetof(struct aer_error_inj, domain) ||
+	    usize > sizeof(einj))
+		return -EINVAL;
+
+	memset(&einj, 0, sizeof(einj));
+	if (copy_from_user(&einj, ubuf, usize))
+		return -EFAULT;
+
+	ret = aer_inject(&einj);
+	return ret ? ret : usize;
+}
+
+static const struct file_operations aer_inject_fops = {
+	.write = aer_inject_write,
+	.owner = THIS_MODULE,
+	.llseek = noop_llseek,
+};
+
+static struct miscdevice aer_inject_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "aer_inject",
+	.fops = &aer_inject_fops,
+};
+
+static int __init aer_inject_init(void)
+{
+	return misc_register(&aer_inject_device);
+}
+
+static void __exit aer_inject_exit(void)
+{
+	struct aer_error *err, *err_next;
+	unsigned long flags;
+	struct pci_bus_ops *bus_ops;
+
+	misc_deregister(&aer_inject_device);
+
+	while ((bus_ops = pci_bus_ops_pop())) {
+		pci_bus_set_ops(bus_ops->bus, bus_ops->ops);
+		kfree(bus_ops);
+	}
+
+	spin_lock_irqsave(&inject_lock, flags);
+	list_for_each_entry_safe(err, err_next, &einjected, list) {
+		list_del(&err->list);
+		kfree(err);
+	}
+	spin_unlock_irqrestore(&inject_lock, flags);
+}
+
+module_init(aer_inject_init);
+module_exit(aer_inject_exit);
+
+MODULE_DESCRIPTION("PCIe AER software error injector");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
new file mode 100644
index 000000000..7e3b34221
--- /dev/null
+++ b/drivers/pci/pcie/aspm.c
@@ -0,0 +1,1440 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Enable PCIe link L0s/L1 state and Clock Power Management
+ *
+ * Copyright (C) 2007 Intel
+ * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com)
+ * Copyright (C) Shaohua Li (shaohua.li@intel.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include "../pci.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "pcie_aspm."
+
+/* Note: those are not register definitions */
+#define ASPM_STATE_L0S_UP	(1)	/* Upstream direction L0s state */
+#define ASPM_STATE_L0S_DW	(2)	/* Downstream direction L0s state */
+#define ASPM_STATE_L1		(4)	/* L1 state */
+#define ASPM_STATE_L1_1		(8)	/* ASPM L1.1 state */
+#define ASPM_STATE_L1_2		(0x10)	/* ASPM L1.2 state */
+#define ASPM_STATE_L1_1_PCIPM	(0x20)	/* PCI PM L1.1 state */
+#define ASPM_STATE_L1_2_PCIPM	(0x40)	/* PCI PM L1.2 state */
+#define ASPM_STATE_L1_SS_PCIPM	(ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM)
+#define ASPM_STATE_L1_2_MASK	(ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM)
+#define ASPM_STATE_L1SS		(ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\
+				 ASPM_STATE_L1_2_MASK)
+#define ASPM_STATE_L0S		(ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW)
+#define ASPM_STATE_ALL		(ASPM_STATE_L0S | ASPM_STATE_L1 |	\
+				 ASPM_STATE_L1SS)
+
+struct pcie_link_state {
+	struct pci_dev *pdev;		/* Upstream component of the Link */
+	struct pci_dev *downstream;	/* Downstream component, function 0 */
+	struct pcie_link_state *root;	/* pointer to the root port link */
+	struct pcie_link_state *parent;	/* pointer to the parent Link state */
+	struct list_head sibling;	/* node in link_list */
+
+	/* ASPM state */
+	u32 aspm_support:7;		/* Supported ASPM state */
+	u32 aspm_enabled:7;		/* Enabled ASPM state */
+	u32 aspm_capable:7;		/* Capable ASPM state with latency */
+	u32 aspm_default:7;		/* Default ASPM state by BIOS */
+	u32 aspm_disable:7;		/* Disabled ASPM state */
+
+	/* Clock PM state */
+	u32 clkpm_capable:1;		/* Clock PM capable? */
+	u32 clkpm_enabled:1;		/* Current Clock PM state */
+	u32 clkpm_default:1;		/* Default Clock PM state by BIOS */
+	u32 clkpm_disable:1;		/* Clock PM disabled */
+};
+
+static int aspm_disabled, aspm_force;
+static bool aspm_support_enabled = true;
+static DEFINE_MUTEX(aspm_lock);
+static LIST_HEAD(link_list);
+
+#define POLICY_DEFAULT 0	/* BIOS default setting */
+#define POLICY_PERFORMANCE 1	/* high performance */
+#define POLICY_POWERSAVE 2	/* high power saving */
+#define POLICY_POWER_SUPERSAVE 3 /* possibly even more power saving */
+
+#ifdef CONFIG_PCIEASPM_PERFORMANCE
+static int aspm_policy = POLICY_PERFORMANCE;
+#elif defined CONFIG_PCIEASPM_POWERSAVE
+static int aspm_policy = POLICY_POWERSAVE;
+#elif defined CONFIG_PCIEASPM_POWER_SUPERSAVE
+static int aspm_policy = POLICY_POWER_SUPERSAVE;
+#else
+static int aspm_policy;
+#endif
+
+static const char *policy_str[] = {
+	[POLICY_DEFAULT] = "default",
+	[POLICY_PERFORMANCE] = "performance",
+	[POLICY_POWERSAVE] = "powersave",
+	[POLICY_POWER_SUPERSAVE] = "powersupersave"
+};
+
+/*
+ * The L1 PM substate capability is only implemented in function 0 in a
+ * multi function device.
+ */
+static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
+{
+	struct pci_dev *child;
+
+	list_for_each_entry(child, &linkbus->devices, bus_list)
+		if (PCI_FUNC(child->devfn) == 0)
+			return child;
+	return NULL;
+}
+
+static int policy_to_aspm_state(struct pcie_link_state *link)
+{
+	switch (aspm_policy) {
+	case POLICY_PERFORMANCE:
+		/* Disable ASPM and Clock PM */
+		return 0;
+	case POLICY_POWERSAVE:
+		/* Enable ASPM L0s/L1 */
+		return (ASPM_STATE_L0S | ASPM_STATE_L1);
+	case POLICY_POWER_SUPERSAVE:
+		/* Enable Everything */
+		return ASPM_STATE_ALL;
+	case POLICY_DEFAULT:
+		return link->aspm_default;
+	}
+	return 0;
+}
+
+static int policy_to_clkpm_state(struct pcie_link_state *link)
+{
+	switch (aspm_policy) {
+	case POLICY_PERFORMANCE:
+		/* Disable ASPM and Clock PM */
+		return 0;
+	case POLICY_POWERSAVE:
+	case POLICY_POWER_SUPERSAVE:
+		/* Enable Clock PM */
+		return 1;
+	case POLICY_DEFAULT:
+		return link->clkpm_default;
+	}
+	return 0;
+}
+
+static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
+{
+	struct pci_dev *child;
+	struct pci_bus *linkbus = link->pdev->subordinate;
+	u32 val = enable ? PCI_EXP_LNKCTL_CLKREQ_EN : 0;
+
+	list_for_each_entry(child, &linkbus->devices, bus_list)
+		pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_CLKREQ_EN,
+						   val);
+	link->clkpm_enabled = !!enable;
+}
+
+static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
+{
+	/*
+	 * Don't enable Clock PM if the link is not Clock PM capable
+	 * or Clock PM is disabled
+	 */
+	if (!link->clkpm_capable || link->clkpm_disable)
+		enable = 0;
+	/* Need nothing if the specified equals to current state */
+	if (link->clkpm_enabled == enable)
+		return;
+	pcie_set_clkpm_nocheck(link, enable);
+}
+
+static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist)
+{
+	int capable = 1, enabled = 1;
+	u32 reg32;
+	u16 reg16;
+	struct pci_dev *child;
+	struct pci_bus *linkbus = link->pdev->subordinate;
+
+	/* All functions should have the same cap and state, take the worst */
+	list_for_each_entry(child, &linkbus->devices, bus_list) {
+		pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &reg32);
+		if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) {
+			capable = 0;
+			enabled = 0;
+			break;
+		}
+		pcie_capability_read_word(child, PCI_EXP_LNKCTL, &reg16);
+		if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN))
+			enabled = 0;
+	}
+	link->clkpm_enabled = enabled;
+	link->clkpm_default = enabled;
+	link->clkpm_capable = capable;
+	link->clkpm_disable = blacklist ? 1 : 0;
+}
+
+/*
+ * pcie_aspm_configure_common_clock: check if the 2 ends of a link
+ *   could use common clock. If they are, configure them to use the
+ *   common clock. That will reduce the ASPM state exit latency.
+ */
+static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
+{
+	int same_clock = 1;
+	u16 reg16, ccc, parent_old_ccc, child_old_ccc[8];
+	struct pci_dev *child, *parent = link->pdev;
+	struct pci_bus *linkbus = parent->subordinate;
+	/*
+	 * All functions of a slot should have the same Slot Clock
+	 * Configuration, so just check one function
+	 */
+	child = list_entry(linkbus->devices.next, struct pci_dev, bus_list);
+	BUG_ON(!pci_is_pcie(child));
+
+	/* Check downstream component if bit Slot Clock Configuration is 1 */
+	pcie_capability_read_word(child, PCI_EXP_LNKSTA, &reg16);
+	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
+		same_clock = 0;
+
+	/* Check upstream component if bit Slot Clock Configuration is 1 */
+	pcie_capability_read_word(parent, PCI_EXP_LNKSTA, &reg16);
+	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
+		same_clock = 0;
+
+	/* Port might be already in common clock mode */
+	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
+	parent_old_ccc = reg16 & PCI_EXP_LNKCTL_CCC;
+	if (same_clock && (reg16 & PCI_EXP_LNKCTL_CCC)) {
+		bool consistent = true;
+
+		list_for_each_entry(child, &linkbus->devices, bus_list) {
+			pcie_capability_read_word(child, PCI_EXP_LNKCTL,
+						  &reg16);
+			if (!(reg16 & PCI_EXP_LNKCTL_CCC)) {
+				consistent = false;
+				break;
+			}
+		}
+		if (consistent)
+			return;
+		pci_info(parent, "ASPM: current common clock configuration is inconsistent, reconfiguring\n");
+	}
+
+	ccc = same_clock ? PCI_EXP_LNKCTL_CCC : 0;
+	/* Configure downstream component, all functions */
+	list_for_each_entry(child, &linkbus->devices, bus_list) {
+		pcie_capability_read_word(child, PCI_EXP_LNKCTL, &reg16);
+		child_old_ccc[PCI_FUNC(child->devfn)] = reg16 & PCI_EXP_LNKCTL_CCC;
+		pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_CCC, ccc);
+	}
+
+	/* Configure upstream component */
+	pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_CCC, ccc);
+
+	if (pcie_retrain_link(link->pdev, true)) {
+
+		/* Training failed. Restore common clock configurations */
+		pci_err(parent, "ASPM: Could not configure common clock\n");
+		list_for_each_entry(child, &linkbus->devices, bus_list)
+			pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
+							   PCI_EXP_LNKCTL_CCC,
+							   child_old_ccc[PCI_FUNC(child->devfn)]);
+		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_CCC, parent_old_ccc);
+	}
+}
+
+/* Convert L0s latency encoding to ns */
+static u32 calc_l0s_latency(u32 lnkcap)
+{
+	u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L0SEL) >> 12;
+
+	if (encoding == 0x7)
+		return (5 * 1000);	/* > 4us */
+	return (64 << encoding);
+}
+
+/* Convert L0s acceptable latency encoding to ns */
+static u32 calc_l0s_acceptable(u32 encoding)
+{
+	if (encoding == 0x7)
+		return -1U;
+	return (64 << encoding);
+}
+
+/* Convert L1 latency encoding to ns */
+static u32 calc_l1_latency(u32 lnkcap)
+{
+	u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L1EL) >> 15;
+
+	if (encoding == 0x7)
+		return (65 * 1000);	/* > 64us */
+	return (1000 << encoding);
+}
+
+/* Convert L1 acceptable latency encoding to ns */
+static u32 calc_l1_acceptable(u32 encoding)
+{
+	if (encoding == 0x7)
+		return -1U;
+	return (1000 << encoding);
+}
+
+/* Convert L1SS T_pwr encoding to usec */
+static u32 calc_l12_pwron(struct pci_dev *pdev, u32 scale, u32 val)
+{
+	switch (scale) {
+	case 0:
+		return val * 2;
+	case 1:
+		return val * 10;
+	case 2:
+		return val * 100;
+	}
+	pci_err(pdev, "%s: Invalid T_PwrOn scale: %u\n", __func__, scale);
+	return 0;
+}
+
+/*
+ * Encode an LTR_L1.2_THRESHOLD value for the L1 PM Substates Control 1
+ * register.  Ports enter L1.2 when the most recent LTR value is greater
+ * than or equal to LTR_L1.2_THRESHOLD, so we round up to make sure we
+ * don't enter L1.2 too aggressively.
+ *
+ * See PCIe r6.0, sec 5.5.1, 6.18, 7.8.3.3.
+ */
+static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
+{
+	u64 threshold_ns = (u64) threshold_us * 1000;
+
+	/*
+	 * LTR_L1.2_THRESHOLD_Value ("value") is a 10-bit field with max
+	 * value of 0x3ff.
+	 */
+	if (threshold_ns <= 0x3ff * 1) {
+		*scale = 0;		/* Value times 1ns */
+		*value = threshold_ns;
+	} else if (threshold_ns <= 0x3ff * 32) {
+		*scale = 1;		/* Value times 32ns */
+		*value = roundup(threshold_ns, 32) / 32;
+	} else if (threshold_ns <= 0x3ff * 1024) {
+		*scale = 2;		/* Value times 1024ns */
+		*value = roundup(threshold_ns, 1024) / 1024;
+	} else if (threshold_ns <= 0x3ff * 32768) {
+		*scale = 3;		/* Value times 32768ns */
+		*value = roundup(threshold_ns, 32768) / 32768;
+	} else if (threshold_ns <= 0x3ff * 1048576) {
+		*scale = 4;		/* Value times 1048576ns */
+		*value = roundup(threshold_ns, 1048576) / 1048576;
+	} else if (threshold_ns <= 0x3ff * (u64) 33554432) {
+		*scale = 5;		/* Value times 33554432ns */
+		*value = roundup(threshold_ns, 33554432) / 33554432;
+	} else {
+		*scale = 5;
+		*value = 0x3ff;		/* Max representable value */
+	}
+}
+
+static void pcie_aspm_check_latency(struct pci_dev *endpoint)
+{
+	u32 latency, encoding, lnkcap_up, lnkcap_dw;
+	u32 l1_switch_latency = 0, latency_up_l0s;
+	u32 latency_up_l1, latency_dw_l0s, latency_dw_l1;
+	u32 acceptable_l0s, acceptable_l1;
+	struct pcie_link_state *link;
+
+	/* Device not in D0 doesn't need latency check */
+	if ((endpoint->current_state != PCI_D0) &&
+	    (endpoint->current_state != PCI_UNKNOWN))
+		return;
+
+	link = endpoint->bus->self->link_state;
+
+	/* Calculate endpoint L0s acceptable latency */
+	encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L0S) >> 6;
+	acceptable_l0s = calc_l0s_acceptable(encoding);
+
+	/* Calculate endpoint L1 acceptable latency */
+	encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L1) >> 9;
+	acceptable_l1 = calc_l1_acceptable(encoding);
+
+	while (link) {
+		struct pci_dev *dev = pci_function_0(link->pdev->subordinate);
+
+		/* Read direction exit latencies */
+		pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP,
+					   &lnkcap_up);
+		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP,
+					   &lnkcap_dw);
+		latency_up_l0s = calc_l0s_latency(lnkcap_up);
+		latency_up_l1 = calc_l1_latency(lnkcap_up);
+		latency_dw_l0s = calc_l0s_latency(lnkcap_dw);
+		latency_dw_l1 = calc_l1_latency(lnkcap_dw);
+
+		/* Check upstream direction L0s latency */
+		if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
+		    (latency_up_l0s > acceptable_l0s))
+			link->aspm_capable &= ~ASPM_STATE_L0S_UP;
+
+		/* Check downstream direction L0s latency */
+		if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
+		    (latency_dw_l0s > acceptable_l0s))
+			link->aspm_capable &= ~ASPM_STATE_L0S_DW;
+		/*
+		 * Check L1 latency.
+		 * Every switch on the path to root complex need 1
+		 * more microsecond for L1. Spec doesn't mention L0s.
+		 *
+		 * The exit latencies for L1 substates are not advertised
+		 * by a device.  Since the spec also doesn't mention a way
+		 * to determine max latencies introduced by enabling L1
+		 * substates on the components, it is not clear how to do
+		 * a L1 substate exit latency check.  We assume that the
+		 * L1 exit latencies advertised by a device include L1
+		 * substate latencies (and hence do not do any check).
+		 */
+		latency = max_t(u32, latency_up_l1, latency_dw_l1);
+		if ((link->aspm_capable & ASPM_STATE_L1) &&
+		    (latency + l1_switch_latency > acceptable_l1))
+			link->aspm_capable &= ~ASPM_STATE_L1;
+		l1_switch_latency += 1000;
+
+		link = link->parent;
+	}
+}
+
+static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
+				    u32 clear, u32 set)
+{
+	u32 val;
+
+	pci_read_config_dword(pdev, pos, &val);
+	val &= ~clear;
+	val |= set;
+	pci_write_config_dword(pdev, pos, val);
+}
+
+/* Calculate L1.2 PM substate timing parameters */
+static void aspm_calc_l12_info(struct pcie_link_state *link,
+				u32 parent_l1ss_cap, u32 child_l1ss_cap)
+{
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
+	u32 val1, val2, scale1, scale2;
+	u32 t_common_mode, t_power_on, l1_2_threshold, scale, value;
+	u32 ctl1 = 0, ctl2 = 0;
+	u32 pctl1, pctl2, cctl1, cctl2;
+	u32 pl1_2_enables, cl1_2_enables;
+
+	/* Choose the greater of the two Port Common_Mode_Restore_Times */
+	val1 = (parent_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
+	val2 = (child_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
+	t_common_mode = max(val1, val2);
+
+	/* Choose the greater of the two Port T_POWER_ON times */
+	val1   = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19;
+	scale1 = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16;
+	val2   = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19;
+	scale2 = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16;
+
+	if (calc_l12_pwron(parent, scale1, val1) >
+	    calc_l12_pwron(child, scale2, val2)) {
+		ctl2 |= scale1 | (val1 << 3);
+		t_power_on = calc_l12_pwron(parent, scale1, val1);
+	} else {
+		ctl2 |= scale2 | (val2 << 3);
+		t_power_on = calc_l12_pwron(child, scale2, val2);
+	}
+
+	/*
+	 * Set LTR_L1.2_THRESHOLD to the time required to transition the
+	 * Link from L0 to L1.2 and back to L0 so we enter L1.2 only if
+	 * downstream devices report (via LTR) that they can tolerate at
+	 * least that much latency.
+	 *
+	 * Based on PCIe r3.1, sec 5.5.3.3.1, Figures 5-16 and 5-17, and
+	 * Table 5-11.  T(POWER_OFF) is at most 2us and T(L1.2) is at
+	 * least 4us.
+	 */
+	l1_2_threshold = 2 + 4 + t_common_mode + t_power_on;
+	encode_l12_threshold(l1_2_threshold, &scale, &value);
+	ctl1 |= t_common_mode << 8 | scale << 29 | value << 16;
+
+	/* Some broken devices only support dword access to L1 SS */
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2);
+
+	if (ctl1 == pctl1 && ctl1 == cctl1 &&
+	    ctl2 == pctl2 && ctl2 == cctl2)
+		return;
+
+	/* Disable L1.2 while updating.  See PCIe r5.0, sec 5.5.4, 7.8.3.3 */
+	pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+	cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+
+	if (pl1_2_enables || cl1_2_enables) {
+		pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+					PCI_L1SS_CTL1_L1_2_MASK, 0);
+		pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+					PCI_L1SS_CTL1_L1_2_MASK, 0);
+	}
+
+	/* Program T_POWER_ON times in both ports */
+	pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2);
+	pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
+
+	/* Program Common_Mode_Restore_Time in upstream device */
+	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
+
+	/* Program LTR_L1.2_THRESHOLD time in both ports */
+	pci_clear_and_set_dword(parent,	parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+				PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+				PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+
+	if (pl1_2_enables || cl1_2_enables) {
+		pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0,
+					pl1_2_enables);
+		pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0,
+					cl1_2_enables);
+	}
+}
+
+static void aspm_l1ss_init(struct pcie_link_state *link)
+{
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
+	u32 parent_l1ss_cap, child_l1ss_cap;
+	u32 parent_l1ss_ctl1 = 0, child_l1ss_ctl1 = 0;
+
+	if (!parent->l1ss || !child->l1ss)
+		return;
+
+	/* Setup L1 substate */
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP,
+			      &parent_l1ss_cap);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CAP,
+			      &child_l1ss_cap);
+
+	if (!(parent_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS))
+		parent_l1ss_cap = 0;
+	if (!(child_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS))
+		child_l1ss_cap = 0;
+
+	/*
+	 * If we don't have LTR for the entire path from the Root Complex
+	 * to this device, we can't use ASPM L1.2 because it relies on the
+	 * LTR_L1.2_THRESHOLD.  See PCIe r4.0, secs 5.5.4, 6.18.
+	 */
+	if (!child->ltr_path)
+		child_l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2;
+
+	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1)
+		link->aspm_support |= ASPM_STATE_L1_1;
+	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2)
+		link->aspm_support |= ASPM_STATE_L1_2;
+	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1)
+		link->aspm_support |= ASPM_STATE_L1_1_PCIPM;
+	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2)
+		link->aspm_support |= ASPM_STATE_L1_2_PCIPM;
+
+	if (parent_l1ss_cap)
+		pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+				      &parent_l1ss_ctl1);
+	if (child_l1ss_cap)
+		pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
+				      &child_l1ss_ctl1);
+
+	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1)
+		link->aspm_enabled |= ASPM_STATE_L1_1;
+	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2)
+		link->aspm_enabled |= ASPM_STATE_L1_2;
+	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1)
+		link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM;
+	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2)
+		link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM;
+
+	if (link->aspm_support & ASPM_STATE_L1_2_MASK)
+		aspm_calc_l12_info(link, parent_l1ss_cap, child_l1ss_cap);
+}
+
+static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
+{
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
+	u32 parent_lnkcap, child_lnkcap;
+	u16 parent_lnkctl, child_lnkctl;
+	struct pci_bus *linkbus = parent->subordinate;
+
+	if (blacklist) {
+		/* Set enabled/disable so that we will disable ASPM later */
+		link->aspm_enabled = ASPM_STATE_ALL;
+		link->aspm_disable = ASPM_STATE_ALL;
+		return;
+	}
+
+	/*
+	 * If ASPM not supported, don't mess with the clocks and link,
+	 * bail out now.
+	 */
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
+	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
+	if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS))
+		return;
+
+	/* Configure common clock before checking latencies */
+	pcie_aspm_configure_common_clock(link);
+
+	/*
+	 * Re-read upstream/downstream components' register state after
+	 * clock configuration.  L0s & L1 exit latencies in the otherwise
+	 * read-only Link Capabilities may change depending on common clock
+	 * configuration (PCIe r5.0, sec 7.5.3.6).
+	 */
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
+	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
+	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl);
+	pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl);
+
+	/*
+	 * Setup L0s state
+	 *
+	 * Note that we must not enable L0s in either direction on a
+	 * given link unless components on both sides of the link each
+	 * support L0s.
+	 */
+	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S)
+		link->aspm_support |= ASPM_STATE_L0S;
+
+	if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
+		link->aspm_enabled |= ASPM_STATE_L0S_UP;
+	if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
+		link->aspm_enabled |= ASPM_STATE_L0S_DW;
+
+	/* Setup L1 state */
+	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
+		link->aspm_support |= ASPM_STATE_L1;
+
+	if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
+		link->aspm_enabled |= ASPM_STATE_L1;
+
+	aspm_l1ss_init(link);
+
+	/* Save default state */
+	link->aspm_default = link->aspm_enabled;
+
+	/* Setup initial capable state. Will be updated later */
+	link->aspm_capable = link->aspm_support;
+
+	/* Get and check endpoint acceptable latencies */
+	list_for_each_entry(child, &linkbus->devices, bus_list) {
+		if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT &&
+		    pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)
+			continue;
+
+		pcie_aspm_check_latency(child);
+	}
+}
+
+/* Configure the ASPM L1 substates */
+static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
+{
+	u32 val, enable_req;
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
+
+	enable_req = (link->aspm_enabled ^ state) & state;
+
+	/*
+	 * Here are the rules specified in the PCIe spec for enabling L1SS:
+	 * - When enabling L1.x, enable bit at parent first, then at child
+	 * - When disabling L1.x, disable bit at child first, then at parent
+	 * - When enabling ASPM L1.x, need to disable L1
+	 *   (at child followed by parent).
+	 * - The ASPM/PCIPM L1.2 must be disabled while programming timing
+	 *   parameters
+	 *
+	 * To keep it simple, disable all L1SS bits first, and later enable
+	 * what is needed.
+	 */
+
+	/* Disable all L1 substates */
+	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_L1SS_MASK, 0);
+	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_L1SS_MASK, 0);
+	/*
+	 * If needed, disable L1, and it gets enabled later
+	 * in pcie_config_aspm_link().
+	 */
+	if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) {
+		pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_ASPM_L1, 0);
+		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_ASPM_L1, 0);
+	}
+
+	val = 0;
+	if (state & ASPM_STATE_L1_1)
+		val |= PCI_L1SS_CTL1_ASPM_L1_1;
+	if (state & ASPM_STATE_L1_2)
+		val |= PCI_L1SS_CTL1_ASPM_L1_2;
+	if (state & ASPM_STATE_L1_1_PCIPM)
+		val |= PCI_L1SS_CTL1_PCIPM_L1_1;
+	if (state & ASPM_STATE_L1_2_PCIPM)
+		val |= PCI_L1SS_CTL1_PCIPM_L1_2;
+
+	/* Enable what we need to enable */
+	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_L1SS_MASK, val);
+	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_L1SS_MASK, val);
+}
+
+static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
+{
+	pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, val);
+}
+
+static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
+{
+	u32 upstream = 0, dwstream = 0;
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
+	struct pci_bus *linkbus = parent->subordinate;
+
+	/* Enable only the states that were not explicitly disabled */
+	state &= (link->aspm_capable & ~link->aspm_disable);
+
+	/* Can't enable any substates if L1 is not enabled */
+	if (!(state & ASPM_STATE_L1))
+		state &= ~ASPM_STATE_L1SS;
+
+	/* Spec says both ports must be in D0 before enabling PCI PM substates*/
+	if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) {
+		state &= ~ASPM_STATE_L1_SS_PCIPM;
+		state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM);
+	}
+
+	/* Nothing to do if the link is already in the requested state */
+	if (link->aspm_enabled == state)
+		return;
+	/* Convert ASPM state to upstream/downstream ASPM register state */
+	if (state & ASPM_STATE_L0S_UP)
+		dwstream |= PCI_EXP_LNKCTL_ASPM_L0S;
+	if (state & ASPM_STATE_L0S_DW)
+		upstream |= PCI_EXP_LNKCTL_ASPM_L0S;
+	if (state & ASPM_STATE_L1) {
+		upstream |= PCI_EXP_LNKCTL_ASPM_L1;
+		dwstream |= PCI_EXP_LNKCTL_ASPM_L1;
+	}
+
+	if (link->aspm_capable & ASPM_STATE_L1SS)
+		pcie_config_aspm_l1ss(link, state);
+
+	/*
+	 * Spec 2.0 suggests all functions should be configured the
+	 * same setting for ASPM. Enabling ASPM L1 should be done in
+	 * upstream component first and then downstream, and vice
+	 * versa for disabling ASPM L1. Spec doesn't mention L0S.
+	 */
+	if (state & ASPM_STATE_L1)
+		pcie_config_aspm_dev(parent, upstream);
+	list_for_each_entry(child, &linkbus->devices, bus_list)
+		pcie_config_aspm_dev(child, dwstream);
+	if (!(state & ASPM_STATE_L1))
+		pcie_config_aspm_dev(parent, upstream);
+
+	link->aspm_enabled = state;
+}
+
+static void pcie_config_aspm_path(struct pcie_link_state *link)
+{
+	while (link) {
+		pcie_config_aspm_link(link, policy_to_aspm_state(link));
+		link = link->parent;
+	}
+}
+
+static void free_link_state(struct pcie_link_state *link)
+{
+	link->pdev->link_state = NULL;
+	kfree(link);
+}
+
+static int pcie_aspm_sanity_check(struct pci_dev *pdev)
+{
+	struct pci_dev *child;
+	u32 reg32;
+
+	/*
+	 * Some functions in a slot might not all be PCIe functions,
+	 * very strange. Disable ASPM for the whole slot
+	 */
+	list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
+		if (!pci_is_pcie(child))
+			return -EINVAL;
+
+		/*
+		 * If ASPM is disabled then we're not going to change
+		 * the BIOS state. It's safe to continue even if it's a
+		 * pre-1.1 device
+		 */
+
+		if (aspm_disabled)
+			continue;
+
+		/*
+		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
+		 * RBER bit to determine if a function is 1.1 version device
+		 */
+		pcie_capability_read_dword(child, PCI_EXP_DEVCAP, &reg32);
+		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
+			pci_info(child, "disabling ASPM on pre-1.1 PCIe device.  You can enable it with 'pcie_aspm=force'\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link;
+
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return NULL;
+
+	INIT_LIST_HEAD(&link->sibling);
+	link->pdev = pdev;
+	link->downstream = pci_function_0(pdev->subordinate);
+
+	/*
+	 * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
+	 * hierarchies.  Note that some PCIe host implementations omit
+	 * the root ports entirely, in which case a downstream port on
+	 * a switch may become the root of the link state chain for all
+	 * its subordinate endpoints.
+	 */
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT ||
+	    pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE ||
+	    !pdev->bus->parent->self) {
+		link->root = link;
+	} else {
+		struct pcie_link_state *parent;
+
+		parent = pdev->bus->parent->self->link_state;
+		if (!parent) {
+			kfree(link);
+			return NULL;
+		}
+
+		link->parent = parent;
+		link->root = link->parent->root;
+	}
+
+	list_add(&link->sibling, &link_list);
+	pdev->link_state = link;
+	return link;
+}
+
+static void pcie_aspm_update_sysfs_visibility(struct pci_dev *pdev)
+{
+	struct pci_dev *child;
+
+	list_for_each_entry(child, &pdev->subordinate->devices, bus_list)
+		sysfs_update_group(&child->dev.kobj, &aspm_ctrl_attr_group);
+}
+
+/*
+ * pcie_aspm_init_link_state: Initiate PCI express link state.
+ * It is called after the pcie and its children devices are scanned.
+ * @pdev: the root port or switch downstream port
+ */
+void pcie_aspm_init_link_state(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link;
+	int blacklist = !!pcie_aspm_sanity_check(pdev);
+
+	if (!aspm_support_enabled)
+		return;
+
+	if (pdev->link_state)
+		return;
+
+	/*
+	 * We allocate pcie_link_state for the component on the upstream
+	 * end of a Link, so there's nothing to do unless this device is
+	 * downstream port.
+	 */
+	if (!pcie_downstream_port(pdev))
+		return;
+
+	/* VIA has a strange chipset, root port is under a bridge */
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT &&
+	    pdev->bus->self)
+		return;
+
+	down_read(&pci_bus_sem);
+	if (list_empty(&pdev->subordinate->devices))
+		goto out;
+
+	mutex_lock(&aspm_lock);
+	link = alloc_pcie_link_state(pdev);
+	if (!link)
+		goto unlock;
+	/*
+	 * Setup initial ASPM state. Note that we need to configure
+	 * upstream links also because capable state of them can be
+	 * update through pcie_aspm_cap_init().
+	 */
+	pcie_aspm_cap_init(link, blacklist);
+
+	/* Setup initial Clock PM state */
+	pcie_clkpm_cap_init(link, blacklist);
+
+	/*
+	 * At this stage drivers haven't had an opportunity to change the
+	 * link policy setting. Enabling ASPM on broken hardware can cripple
+	 * it even before the driver has had a chance to disable ASPM, so
+	 * default to a safe level right now. If we're enabling ASPM beyond
+	 * the BIOS's expectation, we'll do so once pci_enable_device() is
+	 * called.
+	 */
+	if (aspm_policy != POLICY_POWERSAVE &&
+	    aspm_policy != POLICY_POWER_SUPERSAVE) {
+		pcie_config_aspm_path(link);
+		pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	}
+
+	pcie_aspm_update_sysfs_visibility(pdev);
+
+unlock:
+	mutex_unlock(&aspm_lock);
+out:
+	up_read(&pci_bus_sem);
+}
+
+/* Recheck latencies and update aspm_capable for links under the root */
+static void pcie_update_aspm_capable(struct pcie_link_state *root)
+{
+	struct pcie_link_state *link;
+	BUG_ON(root->parent);
+	list_for_each_entry(link, &link_list, sibling) {
+		if (link->root != root)
+			continue;
+		link->aspm_capable = link->aspm_support;
+	}
+	list_for_each_entry(link, &link_list, sibling) {
+		struct pci_dev *child;
+		struct pci_bus *linkbus = link->pdev->subordinate;
+		if (link->root != root)
+			continue;
+		list_for_each_entry(child, &linkbus->devices, bus_list) {
+			if ((pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT) &&
+			    (pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END))
+				continue;
+			pcie_aspm_check_latency(child);
+		}
+	}
+}
+
+/* @pdev: the endpoint device */
+void pcie_aspm_exit_link_state(struct pci_dev *pdev)
+{
+	struct pci_dev *parent = pdev->bus->self;
+	struct pcie_link_state *link, *root, *parent_link;
+
+	if (!parent || !parent->link_state)
+		return;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+
+	link = parent->link_state;
+	root = link->root;
+	parent_link = link->parent;
+
+	/*
+	 * link->downstream is a pointer to the pci_dev of function 0.  If
+	 * we remove that function, the pci_dev is about to be deallocated,
+	 * so we can't use link->downstream again.  Free the link state to
+	 * avoid this.
+	 *
+	 * If we're removing a non-0 function, it's possible we could
+	 * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends
+	 * programming the same ASPM Control value for all functions of
+	 * multi-function devices, so disable ASPM for all of them.
+	 */
+	pcie_config_aspm_link(link, 0);
+	list_del(&link->sibling);
+	free_link_state(link);
+
+	/* Recheck latencies and configure upstream links */
+	if (parent_link) {
+		pcie_update_aspm_capable(root);
+		pcie_config_aspm_path(parent_link);
+	}
+
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+}
+
+/* @pdev: the root port or switch downstream port */
+void pcie_aspm_pm_state_change(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link = pdev->link_state;
+
+	if (aspm_disabled || !link)
+		return;
+	/*
+	 * Devices changed PM state, we should recheck if latency
+	 * meets all functions' requirement
+	 */
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	pcie_update_aspm_capable(link->root);
+	pcie_config_aspm_path(link);
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+}
+
+void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link = pdev->link_state;
+
+	if (aspm_disabled || !link)
+		return;
+
+	if (aspm_policy != POLICY_POWERSAVE &&
+	    aspm_policy != POLICY_POWER_SUPERSAVE)
+		return;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	pcie_config_aspm_path(link);
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+}
+
+static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge;
+
+	if (!pci_is_pcie(pdev))
+		return NULL;
+
+	bridge = pci_upstream_bridge(pdev);
+	if (!bridge || !pci_is_pcie(bridge))
+		return NULL;
+
+	return bridge->link_state;
+}
+
+static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+{
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+
+	if (!link)
+		return -EINVAL;
+	/*
+	 * A driver requested that ASPM be disabled on this device, but
+	 * if we don't have permission to manage ASPM (e.g., on ACPI
+	 * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and
+	 * the _OSC method), we can't honor that request.  Windows has
+	 * a similar mechanism using "PciASPMOptOut", which is also
+	 * ignored in this situation.
+	 */
+	if (aspm_disabled) {
+		pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n");
+		return -EPERM;
+	}
+
+	if (sem)
+		down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	if (state & PCIE_LINK_STATE_L0S)
+		link->aspm_disable |= ASPM_STATE_L0S;
+	if (state & PCIE_LINK_STATE_L1)
+		/* L1 PM substates require L1 */
+		link->aspm_disable |= ASPM_STATE_L1 | ASPM_STATE_L1SS;
+	if (state & PCIE_LINK_STATE_L1_1)
+		link->aspm_disable |= ASPM_STATE_L1_1;
+	if (state & PCIE_LINK_STATE_L1_2)
+		link->aspm_disable |= ASPM_STATE_L1_2;
+	if (state & PCIE_LINK_STATE_L1_1_PCIPM)
+		link->aspm_disable |= ASPM_STATE_L1_1_PCIPM;
+	if (state & PCIE_LINK_STATE_L1_2_PCIPM)
+		link->aspm_disable |= ASPM_STATE_L1_2_PCIPM;
+	pcie_config_aspm_link(link, policy_to_aspm_state(link));
+
+	if (state & PCIE_LINK_STATE_CLKPM)
+		link->clkpm_disable = 1;
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	mutex_unlock(&aspm_lock);
+	if (sem)
+		up_read(&pci_bus_sem);
+
+	return 0;
+}
+
+int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{
+	return __pci_disable_link_state(pdev, state, false);
+}
+EXPORT_SYMBOL(pci_disable_link_state_locked);
+
+/**
+ * pci_disable_link_state - Disable device's link state, so the link will
+ * never enter specific states.  Note that if the BIOS didn't grant ASPM
+ * control to the OS, this does nothing because we can't touch the LNKCTL
+ * register. Returns 0 or a negative errno.
+ *
+ * @pdev: PCI device
+ * @state: ASPM link state to disable
+ */
+int pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	return __pci_disable_link_state(pdev, state, true);
+}
+EXPORT_SYMBOL(pci_disable_link_state);
+
+static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked)
+{
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+
+	if (!link)
+		return -EINVAL;
+	/*
+	 * A driver requested that ASPM be enabled on this device, but
+	 * if we don't have permission to manage ASPM (e.g., on ACPI
+	 * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and
+	 * the _OSC method), we can't honor that request.
+	 */
+	if (aspm_disabled) {
+		pci_warn(pdev, "can't override BIOS ASPM; OS doesn't have ASPM control\n");
+		return -EPERM;
+	}
+
+	if (!locked)
+		down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	link->aspm_default = 0;
+	if (state & PCIE_LINK_STATE_L0S)
+		link->aspm_default |= ASPM_STATE_L0S;
+	if (state & PCIE_LINK_STATE_L1)
+		link->aspm_default |= ASPM_STATE_L1;
+	/* L1 PM substates require L1 */
+	if (state & PCIE_LINK_STATE_L1_1)
+		link->aspm_default |= ASPM_STATE_L1_1 | ASPM_STATE_L1;
+	if (state & PCIE_LINK_STATE_L1_2)
+		link->aspm_default |= ASPM_STATE_L1_2 | ASPM_STATE_L1;
+	if (state & PCIE_LINK_STATE_L1_1_PCIPM)
+		link->aspm_default |= ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1;
+	if (state & PCIE_LINK_STATE_L1_2_PCIPM)
+		link->aspm_default |= ASPM_STATE_L1_2_PCIPM | ASPM_STATE_L1;
+	pcie_config_aspm_link(link, policy_to_aspm_state(link));
+
+	link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0;
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	mutex_unlock(&aspm_lock);
+	if (!locked)
+		up_read(&pci_bus_sem);
+
+	return 0;
+}
+
+/**
+ * pci_enable_link_state - Clear and set the default device link state so that
+ * the link may be allowed to enter the specified states. Note that if the
+ * BIOS didn't grant ASPM control to the OS, this does nothing because we can't
+ * touch the LNKCTL register. Also note that this does not enable states
+ * disabled by pci_disable_link_state(). Return 0 or a negative errno.
+ *
+ * @pdev: PCI device
+ * @state: Mask of ASPM link states to enable
+ */
+int pci_enable_link_state(struct pci_dev *pdev, int state)
+{
+	return __pci_enable_link_state(pdev, state, false);
+}
+EXPORT_SYMBOL(pci_enable_link_state);
+
+/**
+ * pci_enable_link_state_locked - Clear and set the default device link state
+ * so that the link may be allowed to enter the specified states. Note that if
+ * the BIOS didn't grant ASPM control to the OS, this does nothing because we
+ * can't touch the LNKCTL register. Also note that this does not enable states
+ * disabled by pci_disable_link_state(). Return 0 or a negative errno.
+ *
+ * @pdev: PCI device
+ * @state: Mask of ASPM link states to enable
+ *
+ * Context: Caller holds pci_bus_sem read lock.
+ */
+int pci_enable_link_state_locked(struct pci_dev *pdev, int state)
+{
+	lockdep_assert_held_read(&pci_bus_sem);
+
+	return __pci_enable_link_state(pdev, state, true);
+}
+EXPORT_SYMBOL(pci_enable_link_state_locked);
+
+static int pcie_aspm_set_policy(const char *val,
+				const struct kernel_param *kp)
+{
+	int i;
+	struct pcie_link_state *link;
+
+	if (aspm_disabled)
+		return -EPERM;
+	i = sysfs_match_string(policy_str, val);
+	if (i < 0)
+		return i;
+	if (i == aspm_policy)
+		return 0;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	aspm_policy = i;
+	list_for_each_entry(link, &link_list, sibling) {
+		pcie_config_aspm_link(link, policy_to_aspm_state(link));
+		pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	}
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+	return 0;
+}
+
+static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
+{
+	int i, cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(policy_str); i++)
+		if (i == aspm_policy)
+			cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]);
+		else
+			cnt += sprintf(buffer + cnt, "%s ", policy_str[i]);
+	cnt += sprintf(buffer + cnt, "\n");
+	return cnt;
+}
+
+module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
+	NULL, 0644);
+
+/**
+ * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
+ * @pdev: Target device.
+ *
+ * Relies on the upstream bridge's link_state being valid.  The link_state
+ * is deallocated only when the last child of the bridge (i.e., @pdev or a
+ * sibling) is removed, and the caller should be holding a reference to
+ * @pdev, so this should be safe.
+ */
+bool pcie_aspm_enabled(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+
+	if (!link)
+		return false;
+
+	return link->aspm_enabled;
+}
+EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
+
+static ssize_t aspm_attr_show_common(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf, u8 state)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+
+	return sysfs_emit(buf, "%d\n", (link->aspm_enabled & state) ? 1 : 0);
+}
+
+static ssize_t aspm_attr_store_common(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t len, u8 state)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+	bool state_enable;
+
+	if (kstrtobool(buf, &state_enable) < 0)
+		return -EINVAL;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+
+	if (state_enable) {
+		link->aspm_disable &= ~state;
+		/* need to enable L1 for substates */
+		if (state & ASPM_STATE_L1SS)
+			link->aspm_disable &= ~ASPM_STATE_L1;
+	} else {
+		link->aspm_disable |= state;
+		if (state & ASPM_STATE_L1)
+			link->aspm_disable |= ASPM_STATE_L1SS;
+	}
+
+	pcie_config_aspm_link(link, policy_to_aspm_state(link));
+
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+
+	return len;
+}
+
+#define ASPM_ATTR(_f, _s)						\
+static ssize_t _f##_show(struct device *dev,				\
+			 struct device_attribute *attr, char *buf)	\
+{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); }	\
+									\
+static ssize_t _f##_store(struct device *dev,				\
+			  struct device_attribute *attr,		\
+			  const char *buf, size_t len)			\
+{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); }
+
+ASPM_ATTR(l0s_aspm, L0S)
+ASPM_ATTR(l1_aspm, L1)
+ASPM_ATTR(l1_1_aspm, L1_1)
+ASPM_ATTR(l1_2_aspm, L1_2)
+ASPM_ATTR(l1_1_pcipm, L1_1_PCIPM)
+ASPM_ATTR(l1_2_pcipm, L1_2_PCIPM)
+
+static ssize_t clkpm_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+
+	return sysfs_emit(buf, "%d\n", link->clkpm_enabled);
+}
+
+static ssize_t clkpm_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+	bool state_enable;
+
+	if (kstrtobool(buf, &state_enable) < 0)
+		return -EINVAL;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+
+	link->clkpm_disable = !state_enable;
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+
+	return len;
+}
+
+static DEVICE_ATTR_RW(clkpm);
+static DEVICE_ATTR_RW(l0s_aspm);
+static DEVICE_ATTR_RW(l1_aspm);
+static DEVICE_ATTR_RW(l1_1_aspm);
+static DEVICE_ATTR_RW(l1_2_aspm);
+static DEVICE_ATTR_RW(l1_1_pcipm);
+static DEVICE_ATTR_RW(l1_2_pcipm);
+
+static struct attribute *aspm_ctrl_attrs[] = {
+	&dev_attr_clkpm.attr,
+	&dev_attr_l0s_aspm.attr,
+	&dev_attr_l1_aspm.attr,
+	&dev_attr_l1_1_aspm.attr,
+	&dev_attr_l1_2_aspm.attr,
+	&dev_attr_l1_1_pcipm.attr,
+	&dev_attr_l1_2_pcipm.attr,
+	NULL
+};
+
+static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj,
+					   struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
+	static const u8 aspm_state_map[] = {
+		ASPM_STATE_L0S,
+		ASPM_STATE_L1,
+		ASPM_STATE_L1_1,
+		ASPM_STATE_L1_2,
+		ASPM_STATE_L1_1_PCIPM,
+		ASPM_STATE_L1_2_PCIPM,
+	};
+
+	if (aspm_disabled || !link)
+		return 0;
+
+	if (n == 0)
+		return link->clkpm_capable ? a->mode : 0;
+
+	return link->aspm_capable & aspm_state_map[n - 1] ? a->mode : 0;
+}
+
+const struct attribute_group aspm_ctrl_attr_group = {
+	.name = "link",
+	.attrs = aspm_ctrl_attrs,
+	.is_visible = aspm_ctrl_attrs_are_visible,
+};
+
+static int __init pcie_aspm_disable(char *str)
+{
+	if (!strcmp(str, "off")) {
+		aspm_policy = POLICY_DEFAULT;
+		aspm_disabled = 1;
+		aspm_support_enabled = false;
+		printk(KERN_INFO "PCIe ASPM is disabled\n");
+	} else if (!strcmp(str, "force")) {
+		aspm_force = 1;
+		printk(KERN_INFO "PCIe ASPM is forcibly enabled\n");
+	}
+	return 1;
+}
+
+__setup("pcie_aspm=", pcie_aspm_disable);
+
+void pcie_no_aspm(void)
+{
+	/*
+	 * Disabling ASPM is intended to prevent the kernel from modifying
+	 * existing hardware state, not to clear existing state. To that end:
+	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+	 * (b) prevent userspace from changing policy
+	 */
+	if (!aspm_force) {
+		aspm_policy = POLICY_DEFAULT;
+		aspm_disabled = 1;
+	}
+}
+
+bool pcie_aspm_support_enabled(void)
+{
+	return aspm_support_enabled;
+}
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
new file mode 100644
index 000000000..3ceed8e3d
--- /dev/null
+++ b/drivers/pci/pcie/dpc.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Express Downstream Port Containment services driver
+ * Author: Keith Busch <keith.busch@intel.com>
+ *
+ * Copyright (C) 2016 Intel Corp.
+ */
+
+#define dev_fmt(fmt) "DPC: " fmt
+
+#include <linux/aer.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include "portdrv.h"
+#include "../pci.h"
+
+static const char * const rp_pio_error_string[] = {
+	"Configuration Request received UR Completion",	 /* Bit Position 0  */
+	"Configuration Request received CA Completion",	 /* Bit Position 1  */
+	"Configuration Request Completion Timeout",	 /* Bit Position 2  */
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"I/O Request received UR Completion",		 /* Bit Position 8  */
+	"I/O Request received CA Completion",		 /* Bit Position 9  */
+	"I/O Request Completion Timeout",		 /* Bit Position 10 */
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"Memory Request received UR Completion",	 /* Bit Position 16 */
+	"Memory Request received CA Completion",	 /* Bit Position 17 */
+	"Memory Request Completion Timeout",		 /* Bit Position 18 */
+};
+
+void pci_save_dpc_state(struct pci_dev *dev)
+{
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
+	if (!save_state)
+		return;
+
+	cap = (u16 *)&save_state->cap.data[0];
+	pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap);
+}
+
+void pci_restore_dpc_state(struct pci_dev *dev)
+{
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
+	if (!save_state)
+		return;
+
+	cap = (u16 *)&save_state->cap.data[0];
+	pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue);
+
+#ifdef CONFIG_HOTPLUG_PCI_PCIE
+static bool dpc_completed(struct pci_dev *pdev)
+{
+	u16 status;
+
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status);
+	if ((!PCI_POSSIBLE_ERROR(status)) && (status & PCI_EXP_DPC_STATUS_TRIGGER))
+		return false;
+
+	if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags))
+		return false;
+
+	return true;
+}
+
+/**
+ * pci_dpc_recovered - whether DPC triggered and has recovered successfully
+ * @pdev: PCI device
+ *
+ * Return true if DPC was triggered for @pdev and has recovered successfully.
+ * Wait for recovery if it hasn't completed yet.  Called from the PCIe hotplug
+ * driver to recognize and ignore Link Down/Up events caused by DPC.
+ */
+bool pci_dpc_recovered(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *host;
+
+	if (!pdev->dpc_cap)
+		return false;
+
+	/*
+	 * Synchronization between hotplug and DPC is not supported
+	 * if DPC is owned by firmware and EDR is not enabled.
+	 */
+	host = pci_find_host_bridge(pdev->bus);
+	if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR))
+		return false;
+
+	/*
+	 * Need a timeout in case DPC never completes due to failure of
+	 * dpc_wait_rp_inactive().  The spec doesn't mandate a time limit,
+	 * but reports indicate that DPC completes within 4 seconds.
+	 */
+	wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev),
+			   msecs_to_jiffies(4000));
+
+	return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+}
+#endif /* CONFIG_HOTPLUG_PCI_PCIE */
+
+static int dpc_wait_rp_inactive(struct pci_dev *pdev)
+{
+	unsigned long timeout = jiffies + HZ;
+	u16 cap = pdev->dpc_cap, status;
+
+	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
+	while (status & PCI_EXP_DPC_RP_BUSY &&
+					!time_after(jiffies, timeout)) {
+		msleep(10);
+		pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
+	}
+	if (status & PCI_EXP_DPC_RP_BUSY) {
+		pci_warn(pdev, "root port still busy\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+{
+	pci_ers_result_t ret;
+	u16 cap;
+
+	set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags);
+
+	/*
+	 * DPC disables the Link automatically in hardware, so it has
+	 * already been reset by the time we get here.
+	 */
+	cap = pdev->dpc_cap;
+
+	/*
+	 * Wait until the Link is inactive, then clear DPC Trigger Status
+	 * to allow the Port to leave DPC.
+	 */
+	if (!pcie_wait_for_link(pdev, false))
+		pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n");
+
+	if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) {
+		clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+		ret = PCI_ERS_RESULT_DISCONNECT;
+		goto out;
+	}
+
+	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+			      PCI_EXP_DPC_STATUS_TRIGGER);
+
+	if (pci_bridge_wait_for_secondary_bus(pdev, "DPC")) {
+		clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+		ret = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+		ret = PCI_ERS_RESULT_RECOVERED;
+	}
+out:
+	clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags);
+	wake_up_all(&dpc_completed_waitqueue);
+	return ret;
+}
+
+static void dpc_process_rp_pio_error(struct pci_dev *pdev)
+{
+	u16 cap = pdev->dpc_cap, dpc_status, first_error;
+	u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix;
+	int i;
+
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_MASK, &mask);
+	pci_err(pdev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n",
+		status, mask);
+
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc);
+	pci_err(pdev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n",
+		sev, syserr, exc);
+
+	/* Get First Error Pointer */
+	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status);
+	first_error = (dpc_status & 0x1f00) >> 8;
+
+	for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) {
+		if ((status & ~mask) & (1 << i))
+			pci_err(pdev, "[%2d] %s%s\n", i, rp_pio_error_string[i],
+				first_error == i ? " (First)" : "");
+	}
+
+	if (pdev->dpc_rp_log_size < 4)
+		goto clear_status;
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
+			      &dw0);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4,
+			      &dw1);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8,
+			      &dw2);
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12,
+			      &dw3);
+	pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
+		dw0, dw1, dw2, dw3);
+
+	if (pdev->dpc_rp_log_size < 5)
+		goto clear_status;
+	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
+	pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
+
+	for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) {
+		pci_read_config_dword(pdev,
+			cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix);
+		pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
+	}
+ clear_status:
+	pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
+}
+
+static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
+					  struct aer_err_info *info)
+{
+	int pos = dev->aer_cap;
+	u32 status, mask, sev;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
+	status &= ~mask;
+	if (!status)
+		return 0;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
+	status &= sev;
+	if (status)
+		info->severity = AER_FATAL;
+	else
+		info->severity = AER_NONFATAL;
+
+	return 1;
+}
+
+void dpc_process_error(struct pci_dev *pdev)
+{
+	u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
+	struct aer_err_info info;
+
+	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
+	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
+
+	pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
+		 status, source);
+
+	reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;
+	ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;
+	pci_warn(pdev, "%s detected\n",
+		 (reason == 0) ? "unmasked uncorrectable error" :
+		 (reason == 1) ? "ERR_NONFATAL" :
+		 (reason == 2) ? "ERR_FATAL" :
+		 (ext_reason == 0) ? "RP PIO error" :
+		 (ext_reason == 1) ? "software trigger" :
+				     "reserved error");
+
+	/* show RP PIO error detail information */
+	if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0)
+		dpc_process_rp_pio_error(pdev);
+	else if (reason == 0 &&
+		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
+		 aer_get_device_error_info(pdev, &info)) {
+		aer_print_error(pdev, &info);
+		pci_aer_clear_nonfatal_status(pdev);
+		pci_aer_clear_fatal_status(pdev);
+	}
+}
+
+static irqreturn_t dpc_handler(int irq, void *context)
+{
+	struct pci_dev *pdev = context;
+
+	dpc_process_error(pdev);
+
+	/* We configure DPC so it only triggers on ERR_FATAL */
+	pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dpc_irq(int irq, void *context)
+{
+	struct pci_dev *pdev = context;
+	u16 cap = pdev->dpc_cap, status;
+
+	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
+
+	if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || PCI_POSSIBLE_ERROR(status))
+		return IRQ_NONE;
+
+	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+			      PCI_EXP_DPC_STATUS_INTERRUPT);
+	if (status & PCI_EXP_DPC_STATUS_TRIGGER)
+		return IRQ_WAKE_THREAD;
+	return IRQ_HANDLED;
+}
+
+void pci_dpc_init(struct pci_dev *pdev)
+{
+	u16 cap;
+
+	pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
+	if (!pdev->dpc_cap)
+		return;
+
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
+	if (!(cap & PCI_EXP_DPC_CAP_RP_EXT))
+		return;
+
+	pdev->dpc_rp_extensions = true;
+
+	/* Quirks may set dpc_rp_log_size if device or firmware is buggy */
+	if (!pdev->dpc_rp_log_size) {
+		pdev->dpc_rp_log_size =
+			(cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
+		if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) {
+			pci_err(pdev, "RP PIO log size %u is invalid\n",
+				pdev->dpc_rp_log_size);
+			pdev->dpc_rp_log_size = 0;
+		}
+	}
+}
+
+#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
+static int dpc_probe(struct pcie_device *dev)
+{
+	struct pci_dev *pdev = dev->port;
+	struct device *device = &dev->device;
+	int status;
+	u16 ctl, cap;
+
+	if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native)
+		return -ENOTSUPP;
+
+	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
+					   dpc_handler, IRQF_SHARED,
+					   "pcie-dpc", pdev);
+	if (status) {
+		pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,
+			 status);
+		return status;
+	}
+
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
+
+	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
+	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
+	pci_info(pdev, "enabled with IRQ %d\n", dev->irq);
+
+	pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
+		 cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
+		 FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
+		 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size,
+		 FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));
+
+	pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16));
+	return status;
+}
+
+static void dpc_remove(struct pcie_device *dev)
+{
+	struct pci_dev *pdev = dev->port;
+	u16 ctl;
+
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
+	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
+	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
+}
+
+static struct pcie_port_service_driver dpcdriver = {
+	.name		= "dpc",
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_DPC,
+	.probe		= dpc_probe,
+	.remove		= dpc_remove,
+};
+
+int __init pcie_dpc_init(void)
+{
+	return pcie_port_service_register(&dpcdriver);
+}
diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c
new file mode 100644
index 000000000..5f4914d31
--- /dev/null
+++ b/drivers/pci/pcie/edr.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Error Disconnect Recover support
+ * Author: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+ *
+ * Copyright (C) 2020 Intel Corp.
+ */
+
+#define dev_fmt(fmt) "EDR: " fmt
+
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+
+#include "portdrv.h"
+#include "../pci.h"
+
+#define EDR_PORT_DPC_ENABLE_DSM		0x0C
+#define EDR_PORT_LOCATE_DSM		0x0D
+#define EDR_OST_SUCCESS			0x80
+#define EDR_OST_FAILED			0x81
+
+/*
+ * _DSM wrapper function to enable/disable DPC
+ * @pdev   : PCI device structure
+ *
+ * returns 0 on success or errno on failure.
+ */
+static int acpi_enable_dpc(struct pci_dev *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	union acpi_object *obj, argv4, req;
+	int status = 0;
+
+	/*
+	 * Behavior when calling unsupported _DSM functions is undefined,
+	 * so check whether EDR_PORT_DPC_ENABLE_DSM is supported.
+	 */
+	if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5,
+			    1ULL << EDR_PORT_DPC_ENABLE_DSM))
+		return 0;
+
+	req.type = ACPI_TYPE_INTEGER;
+	req.integer.value = 1;
+
+	argv4.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = 1;
+	argv4.package.elements = &req;
+
+	/*
+	 * Per Downstream Port Containment Related Enhancements ECN to PCI
+	 * Firmware Specification r3.2, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is
+	 * optional.  Return success if it's not implemented.
+	 */
+	obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5,
+				EDR_PORT_DPC_ENABLE_DSM, &argv4);
+	if (!obj)
+		return 0;
+
+	if (obj->type != ACPI_TYPE_INTEGER) {
+		pci_err(pdev, FW_BUG "Enable DPC _DSM returned non integer\n");
+		status = -EIO;
+	}
+
+	if (obj->integer.value != 1) {
+		pci_err(pdev, "Enable DPC _DSM failed to enable DPC\n");
+		status = -EIO;
+	}
+
+	ACPI_FREE(obj);
+
+	return status;
+}
+
+/*
+ * _DSM wrapper function to locate DPC port
+ * @pdev   : Device which received EDR event
+ *
+ * Returns pci_dev or NULL.  Caller is responsible for dropping a reference
+ * on the returned pci_dev with pci_dev_put().
+ */
+static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	union acpi_object *obj;
+	u16 port;
+
+	/*
+	 * Behavior when calling unsupported _DSM functions is undefined,
+	 * so check whether EDR_PORT_DPC_ENABLE_DSM is supported.
+	 */
+	if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5,
+			    1ULL << EDR_PORT_LOCATE_DSM))
+		return pci_dev_get(pdev);
+
+	obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5,
+				EDR_PORT_LOCATE_DSM, NULL);
+	if (!obj)
+		return pci_dev_get(pdev);
+
+	if (obj->type != ACPI_TYPE_INTEGER) {
+		ACPI_FREE(obj);
+		pci_err(pdev, FW_BUG "Locate Port _DSM returned non integer\n");
+		return NULL;
+	}
+
+	/*
+	 * Firmware returns DPC port BDF details in following format:
+	 *	15:8 = bus
+	 *	 7:3 = device
+	 *	 2:0 = function
+	 */
+	port = obj->integer.value;
+
+	ACPI_FREE(obj);
+
+	return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+					   PCI_BUS_NUM(port), port & 0xff);
+}
+
+/*
+ * _OST wrapper function to let firmware know the status of EDR event
+ * @pdev   : Device used to send _OST
+ * @edev   : Device which experienced EDR event
+ * @status : Status of EDR event
+ */
+static int acpi_send_edr_status(struct pci_dev *pdev, struct pci_dev *edev,
+				u16 status)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	u32 ost_status;
+
+	pci_dbg(pdev, "Status for %s: %#x\n", pci_name(edev), status);
+
+	ost_status = PCI_DEVID(edev->bus->number, edev->devfn) << 16;
+	ost_status |= status;
+
+	status = acpi_evaluate_ost(adev->handle, ACPI_NOTIFY_DISCONNECT_RECOVER,
+				   ost_status, NULL);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void edr_handle_event(acpi_handle handle, u32 event, void *data)
+{
+	struct pci_dev *pdev = data, *edev;
+	pci_ers_result_t estate = PCI_ERS_RESULT_DISCONNECT;
+	u16 status;
+
+	if (event != ACPI_NOTIFY_DISCONNECT_RECOVER)
+		return;
+
+	/*
+	 * pdev is a Root Port or Downstream Port that is still present and
+	 * has triggered a containment event, e.g., DPC, so its child
+	 * devices have been disconnected (ACPI r6.5, sec 5.6.6).
+	 */
+	pci_info(pdev, "EDR event received\n");
+
+	/*
+	 * Locate the port that experienced the containment event.  pdev
+	 * may be that port or a parent of it (PCI Firmware r3.3, sec
+	 * 4.6.13).
+	 */
+	edev = acpi_dpc_port_get(pdev);
+	if (!edev) {
+		pci_err(pdev, "Firmware failed to locate DPC port\n");
+		return;
+	}
+
+	pci_dbg(pdev, "Reported EDR dev: %s\n", pci_name(edev));
+
+	/* If port does not support DPC, just send the OST */
+	if (!edev->dpc_cap) {
+		pci_err(edev, FW_BUG "This device doesn't support DPC\n");
+		goto send_ost;
+	}
+
+	/* Check if there is a valid DPC trigger */
+	pci_read_config_word(edev, edev->dpc_cap + PCI_EXP_DPC_STATUS, &status);
+	if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
+		pci_err(edev, "Invalid DPC trigger %#010x\n", status);
+		goto send_ost;
+	}
+
+	dpc_process_error(edev);
+	pci_aer_raw_clear_status(edev);
+
+	/*
+	 * Irrespective of whether the DPC event is triggered by ERR_FATAL
+	 * or ERR_NONFATAL, since the link is already down, use the FATAL
+	 * error recovery path for both cases.
+	 */
+	estate = pcie_do_recovery(edev, pci_channel_io_frozen, dpc_reset_link);
+
+send_ost:
+
+	/*
+	 * If recovery is successful, send _OST(0xF, BDF << 16 | 0x80)
+	 * to firmware. If not successful, send _OST(0xF, BDF << 16 | 0x81).
+	 */
+	if (estate == PCI_ERS_RESULT_RECOVERED) {
+		pci_dbg(edev, "DPC port successfully recovered\n");
+		pcie_clear_device_status(edev);
+		acpi_send_edr_status(pdev, edev, EDR_OST_SUCCESS);
+	} else {
+		pci_dbg(edev, "DPC port recovery failed\n");
+		acpi_send_edr_status(pdev, edev, EDR_OST_FAILED);
+	}
+
+	pci_dev_put(edev);
+}
+
+void pci_acpi_add_edr_notifier(struct pci_dev *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	acpi_status status;
+
+	if (!adev) {
+		pci_dbg(pdev, "No valid ACPI node, skipping EDR init\n");
+		return;
+	}
+
+	status = acpi_install_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY,
+					     edr_handle_event, pdev);
+	if (ACPI_FAILURE(status)) {
+		pci_err(pdev, "Failed to install notify handler\n");
+		return;
+	}
+
+	if (acpi_enable_dpc(pdev))
+		acpi_remove_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY,
+					   edr_handle_event);
+	else
+		pci_dbg(pdev, "Notify handler installed\n");
+}
+
+void pci_acpi_remove_edr_notifier(struct pci_dev *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+
+	if (!adev)
+		return;
+
+	acpi_remove_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY,
+				   edr_handle_event);
+	pci_dbg(pdev, "Notify handler removed\n");
+}
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
new file mode 100644
index 000000000..59c90d04a
--- /dev/null
+++ b/drivers/pci/pcie/err.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file implements the error recovery as a core part of PCIe error
+ * reporting. When a PCIe error is delivered, an error message will be
+ * collected and printed to console, then, an error recovery procedure
+ * will be executed by following the PCI error recovery rules.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ */
+
+#define dev_fmt(fmt) "AER: " fmt
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/aer.h>
+#include "portdrv.h"
+#include "../pci.h"
+
+static pci_ers_result_t merge_result(enum pci_ers_result orig,
+				  enum pci_ers_result new)
+{
+	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
+		return PCI_ERS_RESULT_NO_AER_DRIVER;
+
+	if (new == PCI_ERS_RESULT_NONE)
+		return orig;
+
+	switch (orig) {
+	case PCI_ERS_RESULT_CAN_RECOVER:
+	case PCI_ERS_RESULT_RECOVERED:
+		orig = new;
+		break;
+	case PCI_ERS_RESULT_DISCONNECT:
+		if (new == PCI_ERS_RESULT_NEED_RESET)
+			orig = PCI_ERS_RESULT_NEED_RESET;
+		break;
+	default:
+		break;
+	}
+
+	return orig;
+}
+
+static int report_error_detected(struct pci_dev *dev,
+				 pci_channel_state_t state,
+				 enum pci_ers_result *result)
+{
+	struct pci_driver *pdrv;
+	pci_ers_result_t vote;
+	const struct pci_error_handlers *err_handler;
+
+	device_lock(&dev->dev);
+	pdrv = dev->driver;
+	if (pci_dev_is_disconnected(dev)) {
+		vote = PCI_ERS_RESULT_DISCONNECT;
+	} else if (!pci_dev_set_io_state(dev, state)) {
+		pci_info(dev, "can't recover (state transition %u -> %u invalid)\n",
+			dev->error_state, state);
+		vote = PCI_ERS_RESULT_NONE;
+	} else if (!pdrv || !pdrv->err_handler ||
+		   !pdrv->err_handler->error_detected) {
+		/*
+		 * If any device in the subtree does not have an error_detected
+		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
+		 * error callbacks of "any" device in the subtree, and will
+		 * exit in the disconnected error state.
+		 */
+		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
+			pci_info(dev, "can't recover (no error_detected callback)\n");
+		} else {
+			vote = PCI_ERS_RESULT_NONE;
+		}
+	} else {
+		err_handler = pdrv->err_handler;
+		vote = err_handler->error_detected(dev, state);
+	}
+	pci_uevent_ers(dev, vote);
+	*result = merge_result(*result, vote);
+	device_unlock(&dev->dev);
+	return 0;
+}
+
+static int report_frozen_detected(struct pci_dev *dev, void *data)
+{
+	return report_error_detected(dev, pci_channel_io_frozen, data);
+}
+
+static int report_normal_detected(struct pci_dev *dev, void *data)
+{
+	return report_error_detected(dev, pci_channel_io_normal, data);
+}
+
+static int report_mmio_enabled(struct pci_dev *dev, void *data)
+{
+	struct pci_driver *pdrv;
+	pci_ers_result_t vote, *result = data;
+	const struct pci_error_handlers *err_handler;
+
+	device_lock(&dev->dev);
+	pdrv = dev->driver;
+	if (!pdrv ||
+		!pdrv->err_handler ||
+		!pdrv->err_handler->mmio_enabled)
+		goto out;
+
+	err_handler = pdrv->err_handler;
+	vote = err_handler->mmio_enabled(dev);
+	*result = merge_result(*result, vote);
+out:
+	device_unlock(&dev->dev);
+	return 0;
+}
+
+static int report_slot_reset(struct pci_dev *dev, void *data)
+{
+	struct pci_driver *pdrv;
+	pci_ers_result_t vote, *result = data;
+	const struct pci_error_handlers *err_handler;
+
+	device_lock(&dev->dev);
+	pdrv = dev->driver;
+	if (!pdrv ||
+		!pdrv->err_handler ||
+		!pdrv->err_handler->slot_reset)
+		goto out;
+
+	err_handler = pdrv->err_handler;
+	vote = err_handler->slot_reset(dev);
+	*result = merge_result(*result, vote);
+out:
+	device_unlock(&dev->dev);
+	return 0;
+}
+
+static int report_resume(struct pci_dev *dev, void *data)
+{
+	struct pci_driver *pdrv;
+	const struct pci_error_handlers *err_handler;
+
+	device_lock(&dev->dev);
+	pdrv = dev->driver;
+	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
+		!pdrv ||
+		!pdrv->err_handler ||
+		!pdrv->err_handler->resume)
+		goto out;
+
+	err_handler = pdrv->err_handler;
+	err_handler->resume(dev);
+out:
+	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+	device_unlock(&dev->dev);
+	return 0;
+}
+
+/**
+ * pci_walk_bridge - walk bridges potentially AER affected
+ * @bridge:	bridge which may be a Port, an RCEC, or an RCiEP
+ * @cb:		callback to be called for each device found
+ * @userdata:	arbitrary pointer to be passed to callback
+ *
+ * If the device provided is a bridge, walk the subordinate bus, including
+ * any bridged devices on buses under this bus.  Call the provided callback
+ * on each device found.
+ *
+ * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
+ * call the callback on the device itself.
+ */
+static void pci_walk_bridge(struct pci_dev *bridge,
+			    int (*cb)(struct pci_dev *, void *),
+			    void *userdata)
+{
+	if (bridge->subordinate)
+		pci_walk_bus(bridge->subordinate, cb, userdata);
+	else
+		cb(bridge, userdata);
+}
+
+pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
+		pci_channel_state_t state,
+		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
+{
+	int type = pci_pcie_type(dev);
+	struct pci_dev *bridge;
+	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+	/*
+	 * If the error was detected by a Root Port, Downstream Port, RCEC,
+	 * or RCiEP, recovery runs on the device itself.  For Ports, that
+	 * also includes any subordinate devices.
+	 *
+	 * If it was detected by another device (Endpoint, etc), recovery
+	 * runs on the device and anything else under the same Port, i.e.,
+	 * everything under "bridge".
+	 */
+	if (type == PCI_EXP_TYPE_ROOT_PORT ||
+	    type == PCI_EXP_TYPE_DOWNSTREAM ||
+	    type == PCI_EXP_TYPE_RC_EC ||
+	    type == PCI_EXP_TYPE_RC_END)
+		bridge = dev;
+	else
+		bridge = pci_upstream_bridge(dev);
+
+	pci_dbg(bridge, "broadcast error_detected message\n");
+	if (state == pci_channel_io_frozen) {
+		pci_walk_bridge(bridge, report_frozen_detected, &status);
+		if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) {
+			pci_warn(bridge, "subordinate device reset failed\n");
+			goto failed;
+		}
+	} else {
+		pci_walk_bridge(bridge, report_normal_detected, &status);
+	}
+
+	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
+		status = PCI_ERS_RESULT_RECOVERED;
+		pci_dbg(bridge, "broadcast mmio_enabled message\n");
+		pci_walk_bridge(bridge, report_mmio_enabled, &status);
+	}
+
+	if (status == PCI_ERS_RESULT_NEED_RESET) {
+		/*
+		 * TODO: Should call platform-specific
+		 * functions to reset slot before calling
+		 * drivers' slot_reset callbacks?
+		 */
+		status = PCI_ERS_RESULT_RECOVERED;
+		pci_dbg(bridge, "broadcast slot_reset message\n");
+		pci_walk_bridge(bridge, report_slot_reset, &status);
+	}
+
+	if (status != PCI_ERS_RESULT_RECOVERED)
+		goto failed;
+
+	pci_dbg(bridge, "broadcast resume message\n");
+	pci_walk_bridge(bridge, report_resume, &status);
+
+	/*
+	 * If we have native control of AER, clear error status in the device
+	 * that detected the error.  If the platform retained control of AER,
+	 * it is responsible for clearing this status.  In that case, the
+	 * signaling device may not even be visible to the OS.
+	 */
+	if (host->native_aer || pcie_ports_native) {
+		pcie_clear_device_status(dev);
+		pci_aer_clear_nonfatal_status(dev);
+	}
+	pci_info(bridge, "device recovery successful\n");
+	return status;
+
+failed:
+	pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
+
+	/* TODO: Should kernel panic here? */
+	pci_info(bridge, "device recovery failed\n");
+
+	return status;
+}
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
new file mode 100644
index 000000000..ef8ce436e
--- /dev/null
+++ b/drivers/pci/pcie/pme.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe Native PME support
+ *
+ * Copyright (C) 2007 - 2009 Intel Corp
+ * Copyright (C) 2007 - 2009 Shaohua Li <shaohua.li@intel.com>
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ */
+
+#define dev_fmt(fmt) "PME: " fmt
+
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+
+#include "../pci.h"
+#include "portdrv.h"
+
+/*
+ * If this switch is set, MSI will not be used for PCIe PME signaling.  This
+ * causes the PCIe port driver to use INTx interrupts only, but it turns out
+ * that using MSI for PCIe PME signaling doesn't play well with PCIe PME-based
+ * wake-up from system sleep states.
+ */
+bool pcie_pme_msi_disabled;
+
+static int __init pcie_pme_setup(char *str)
+{
+	if (!strncmp(str, "nomsi", 5))
+		pcie_pme_msi_disabled = true;
+
+	return 1;
+}
+__setup("pcie_pme=", pcie_pme_setup);
+
+struct pcie_pme_service_data {
+	spinlock_t lock;
+	struct pcie_device *srv;
+	struct work_struct work;
+	bool noirq; /* If set, keep the PME interrupt disabled. */
+};
+
+/**
+ * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation.
+ * @dev: PCIe root port or event collector.
+ * @enable: Enable or disable the interrupt.
+ */
+void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable)
+{
+	if (enable)
+		pcie_capability_set_word(dev, PCI_EXP_RTCTL,
+					 PCI_EXP_RTCTL_PMEIE);
+	else
+		pcie_capability_clear_word(dev, PCI_EXP_RTCTL,
+					   PCI_EXP_RTCTL_PMEIE);
+}
+
+/**
+ * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#.
+ * @bus: PCI bus to scan.
+ *
+ * Scan given PCI bus and all buses under it for devices asserting PME#.
+ */
+static bool pcie_pme_walk_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	bool ret = false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Skip PCIe devices in case we started from a root port. */
+		if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) {
+			if (dev->pme_poll)
+				dev->pme_poll = false;
+
+			pci_wakeup_event(dev);
+			pm_request_resume(&dev->dev);
+			ret = true;
+		}
+
+		if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate))
+			ret = true;
+	}
+
+	return ret;
+}
+
+/**
+ * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME.
+ * @bus: Secondary bus of the bridge.
+ * @devfn: Device/function number to check.
+ *
+ * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band
+ * PCIe PME message.  In such that case the bridge should use the Requester ID
+ * of device/function number 0 on its secondary bus.
+ */
+static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn)
+{
+	struct pci_dev *dev;
+	bool found = false;
+
+	if (devfn)
+		return false;
+
+	dev = pci_dev_get(bus->self);
+	if (!dev)
+		return false;
+
+	if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) {
+		down_read(&pci_bus_sem);
+		if (pcie_pme_walk_bus(bus))
+			found = true;
+		up_read(&pci_bus_sem);
+	}
+
+	pci_dev_put(dev);
+	return found;
+}
+
+/**
+ * pcie_pme_handle_request - Find device that generated PME and handle it.
+ * @port: Root port or event collector that generated the PME interrupt.
+ * @req_id: PCIe Requester ID of the device that generated the PME.
+ */
+static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
+{
+	u8 busnr = req_id >> 8, devfn = req_id & 0xff;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	bool found = false;
+
+	/* First, check if the PME is from the root port itself. */
+	if (port->devfn == devfn && port->bus->number == busnr) {
+		if (port->pme_poll)
+			port->pme_poll = false;
+
+		if (pci_check_pme_status(port)) {
+			pm_request_resume(&port->dev);
+			found = true;
+		} else {
+			/*
+			 * Apparently, the root port generated the PME on behalf
+			 * of a non-PCIe device downstream.  If this is done by
+			 * a root port, the Requester ID field in its status
+			 * register may contain either the root port's, or the
+			 * source device's information (PCI Express Base
+			 * Specification, Rev. 2.0, Section 6.1.9).
+			 */
+			down_read(&pci_bus_sem);
+			found = pcie_pme_walk_bus(port->subordinate);
+			up_read(&pci_bus_sem);
+		}
+		goto out;
+	}
+
+	/* Second, find the bus the source device is on. */
+	bus = pci_find_bus(pci_domain_nr(port->bus), busnr);
+	if (!bus)
+		goto out;
+
+	/* Next, check if the PME is from a PCIe-PCI bridge. */
+	found = pcie_pme_from_pci_bridge(bus, devfn);
+	if (found)
+		goto out;
+
+	/* Finally, try to find the PME source on the bus. */
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pci_dev_get(dev);
+		if (dev->devfn == devfn) {
+			found = true;
+			break;
+		}
+		pci_dev_put(dev);
+	}
+	up_read(&pci_bus_sem);
+
+	if (found) {
+		/* The device is there, but we have to check its PME status. */
+		found = pci_check_pme_status(dev);
+		if (found) {
+			if (dev->pme_poll)
+				dev->pme_poll = false;
+
+			pci_wakeup_event(dev);
+			pm_request_resume(&dev->dev);
+		}
+		pci_dev_put(dev);
+	} else if (devfn) {
+		/*
+		 * The device is not there, but we can still try to recover by
+		 * assuming that the PME was reported by a PCIe-PCI bridge that
+		 * used devfn different from zero.
+		 */
+		pci_info(port, "interrupt generated for non-existent device %02x:%02x.%d\n",
+			 busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
+		found = pcie_pme_from_pci_bridge(bus, 0);
+	}
+
+ out:
+	if (!found)
+		pci_info(port, "Spurious native interrupt!\n");
+}
+
+/**
+ * pcie_pme_work_fn - Work handler for PCIe PME interrupt.
+ * @work: Work structure giving access to service data.
+ */
+static void pcie_pme_work_fn(struct work_struct *work)
+{
+	struct pcie_pme_service_data *data =
+			container_of(work, struct pcie_pme_service_data, work);
+	struct pci_dev *port = data->srv->port;
+	u32 rtsta;
+
+	spin_lock_irq(&data->lock);
+
+	for (;;) {
+		if (data->noirq)
+			break;
+
+		pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
+		if (PCI_POSSIBLE_ERROR(rtsta))
+			break;
+
+		if (rtsta & PCI_EXP_RTSTA_PME) {
+			/*
+			 * Clear PME status of the port.  If there are other
+			 * pending PMEs, the status will be set again.
+			 */
+			pcie_clear_root_pme_status(port);
+
+			spin_unlock_irq(&data->lock);
+			pcie_pme_handle_request(port, rtsta & 0xffff);
+			spin_lock_irq(&data->lock);
+
+			continue;
+		}
+
+		/* No need to loop if there are no more PMEs pending. */
+		if (!(rtsta & PCI_EXP_RTSTA_PENDING))
+			break;
+
+		spin_unlock_irq(&data->lock);
+		cpu_relax();
+		spin_lock_irq(&data->lock);
+	}
+
+	if (!data->noirq)
+		pcie_pme_interrupt_enable(port, true);
+
+	spin_unlock_irq(&data->lock);
+}
+
+/**
+ * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt.
+ * @irq: Interrupt vector.
+ * @context: Interrupt context pointer.
+ */
+static irqreturn_t pcie_pme_irq(int irq, void *context)
+{
+	struct pci_dev *port;
+	struct pcie_pme_service_data *data;
+	u32 rtsta;
+	unsigned long flags;
+
+	port = ((struct pcie_device *)context)->port;
+	data = get_service_data((struct pcie_device *)context);
+
+	spin_lock_irqsave(&data->lock, flags);
+	pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
+
+	if (PCI_POSSIBLE_ERROR(rtsta) || !(rtsta & PCI_EXP_RTSTA_PME)) {
+		spin_unlock_irqrestore(&data->lock, flags);
+		return IRQ_NONE;
+	}
+
+	pcie_pme_interrupt_enable(port, false);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	/* We don't use pm_wq, because it's freezable. */
+	schedule_work(&data->work);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * pcie_pme_can_wakeup - Set the wakeup capability flag.
+ * @dev: PCI device to handle.
+ * @ign: Ignored.
+ */
+static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign)
+{
+	device_set_wakeup_capable(&dev->dev, true);
+	return 0;
+}
+
+/**
+ * pcie_pme_mark_devices - Set the wakeup flag for devices below a port.
+ * @port: PCIe root port or event collector to handle.
+ *
+ * For each device below given root port, including the port itself (or for each
+ * root complex integrated endpoint if @port is a root complex event collector)
+ * set the flag indicating that it can signal run-time wake-up events.
+ */
+static void pcie_pme_mark_devices(struct pci_dev *port)
+{
+	pcie_pme_can_wakeup(port, NULL);
+
+	if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC)
+		pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL);
+	else if (port->subordinate)
+		pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL);
+}
+
+/**
+ * pcie_pme_probe - Initialize PCIe PME service for given root port.
+ * @srv: PCIe service to initialize.
+ */
+static int pcie_pme_probe(struct pcie_device *srv)
+{
+	struct pci_dev *port = srv->port;
+	struct pcie_pme_service_data *data;
+	int type = pci_pcie_type(port);
+	int ret;
+
+	/* Limit to Root Ports or Root Complex Event Collectors */
+	if (type != PCI_EXP_TYPE_RC_EC &&
+	    type != PCI_EXP_TYPE_ROOT_PORT)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock_init(&data->lock);
+	INIT_WORK(&data->work, pcie_pme_work_fn);
+	data->srv = srv;
+	set_service_data(srv, data);
+
+	pcie_pme_interrupt_enable(port, false);
+	pcie_clear_root_pme_status(port);
+
+	ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv);
+	if (ret) {
+		kfree(data);
+		return ret;
+	}
+
+	pci_info(port, "Signaling with IRQ %d\n", srv->irq);
+
+	pcie_pme_mark_devices(port);
+	pcie_pme_interrupt_enable(port, true);
+	return 0;
+}
+
+static bool pcie_pme_check_wakeup(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	if (!bus)
+		return false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (device_may_wakeup(&dev->dev)
+		    || pcie_pme_check_wakeup(dev->subordinate))
+			return true;
+
+	return false;
+}
+
+static void pcie_pme_disable_interrupt(struct pci_dev *port,
+				       struct pcie_pme_service_data *data)
+{
+	spin_lock_irq(&data->lock);
+	pcie_pme_interrupt_enable(port, false);
+	pcie_clear_root_pme_status(port);
+	data->noirq = true;
+	spin_unlock_irq(&data->lock);
+}
+
+/**
+ * pcie_pme_suspend - Suspend PCIe PME service device.
+ * @srv: PCIe service device to suspend.
+ */
+static int pcie_pme_suspend(struct pcie_device *srv)
+{
+	struct pcie_pme_service_data *data = get_service_data(srv);
+	struct pci_dev *port = srv->port;
+	bool wakeup;
+	int ret;
+
+	if (device_may_wakeup(&port->dev)) {
+		wakeup = true;
+	} else {
+		down_read(&pci_bus_sem);
+		wakeup = pcie_pme_check_wakeup(port->subordinate);
+		up_read(&pci_bus_sem);
+	}
+	if (wakeup) {
+		ret = enable_irq_wake(srv->irq);
+		if (!ret)
+			return 0;
+	}
+
+	pcie_pme_disable_interrupt(port, data);
+
+	synchronize_irq(srv->irq);
+
+	return 0;
+}
+
+/**
+ * pcie_pme_resume - Resume PCIe PME service device.
+ * @srv: PCIe service device to resume.
+ */
+static int pcie_pme_resume(struct pcie_device *srv)
+{
+	struct pcie_pme_service_data *data = get_service_data(srv);
+
+	spin_lock_irq(&data->lock);
+	if (data->noirq) {
+		struct pci_dev *port = srv->port;
+
+		pcie_clear_root_pme_status(port);
+		pcie_pme_interrupt_enable(port, true);
+		data->noirq = false;
+	} else {
+		disable_irq_wake(srv->irq);
+	}
+	spin_unlock_irq(&data->lock);
+
+	return 0;
+}
+
+/**
+ * pcie_pme_remove - Prepare PCIe PME service device for removal.
+ * @srv: PCIe service device to remove.
+ */
+static void pcie_pme_remove(struct pcie_device *srv)
+{
+	struct pcie_pme_service_data *data = get_service_data(srv);
+
+	pcie_pme_disable_interrupt(srv->port, data);
+	free_irq(srv->irq, srv);
+	cancel_work_sync(&data->work);
+	kfree(data);
+}
+
+static struct pcie_port_service_driver pcie_pme_driver = {
+	.name		= "pcie_pme",
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_PME,
+
+	.probe		= pcie_pme_probe,
+	.suspend	= pcie_pme_suspend,
+	.resume		= pcie_pme_resume,
+	.remove		= pcie_pme_remove,
+};
+
+/**
+ * pcie_pme_init - Register the PCIe PME service driver.
+ */
+int __init pcie_pme_init(void)
+{
+	return pcie_port_service_register(&pcie_pme_driver);
+}
diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
new file mode 100644
index 000000000..46fad0d81
--- /dev/null
+++ b/drivers/pci/pcie/portdrv.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Purpose:	PCI Express Port Bus Driver
+ *
+ * Copyright (C) 2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+ */
+
+#include <linux/dmi.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/aer.h>
+
+#include "../pci.h"
+#include "portdrv.h"
+
+/*
+ * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must
+ * be one of the first 32 MSI-X entries.  Per PCI r3.0, sec 6.8.3.1, MSI
+ * supports a maximum of 32 vectors per function.
+ */
+#define PCIE_PORT_MAX_MSI_ENTRIES	32
+
+#define get_descriptor_id(type, service) (((type - 4) << 8) | service)
+
+struct portdrv_service_data {
+	struct pcie_port_service_driver *drv;
+	struct device *dev;
+	u32 service;
+};
+
+/**
+ * release_pcie_device - free PCI Express port service device structure
+ * @dev: Port service device to release
+ *
+ * Invoked automatically when device is being removed in response to
+ * device_unregister(dev).  Release all resources being claimed.
+ */
+static void release_pcie_device(struct device *dev)
+{
+	kfree(to_pcie_device(dev));
+}
+
+/*
+ * Fill in *pme, *aer, *dpc with the relevant Interrupt Message Numbers if
+ * services are enabled in "mask".  Return the number of MSI/MSI-X vectors
+ * required to accommodate the largest Message Number.
+ */
+static int pcie_message_numbers(struct pci_dev *dev, int mask,
+				u32 *pme, u32 *aer, u32 *dpc)
+{
+	u32 nvec = 0, pos;
+	u16 reg16;
+
+	/*
+	 * The Interrupt Message Number indicates which vector is used, i.e.,
+	 * the MSI-X table entry or the MSI offset between the base Message
+	 * Data and the generated interrupt message.  See PCIe r3.1, sec
+	 * 7.8.2, 7.10.10, 7.31.2.
+	 */
+
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
+		    PCIE_PORT_SERVICE_BWNOTIF)) {
+		pcie_capability_read_word(dev, PCI_EXP_FLAGS, &reg16);
+		*pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9;
+		nvec = *pme + 1;
+	}
+
+#ifdef CONFIG_PCIEAER
+	if (mask & PCIE_PORT_SERVICE_AER) {
+		u32 reg32;
+
+		pos = dev->aer_cap;
+		if (pos) {
+			pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS,
+					      &reg32);
+			*aer = (reg32 & PCI_ERR_ROOT_AER_IRQ) >> 27;
+			nvec = max(nvec, *aer + 1);
+		}
+	}
+#endif
+
+	if (mask & PCIE_PORT_SERVICE_DPC) {
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC);
+		if (pos) {
+			pci_read_config_word(dev, pos + PCI_EXP_DPC_CAP,
+					     &reg16);
+			*dpc = reg16 & PCI_EXP_DPC_IRQ;
+			nvec = max(nvec, *dpc + 1);
+		}
+	}
+
+	return nvec;
+}
+
+/**
+ * pcie_port_enable_irq_vec - try to set up MSI-X or MSI as interrupt mode
+ * for given port
+ * @dev: PCI Express port to handle
+ * @irqs: Array of interrupt vectors to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: 0 on success, error code on failure
+ */
+static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask)
+{
+	int nr_entries, nvec, pcie_irq;
+	u32 pme = 0, aer = 0, dpc = 0;
+
+	/* Allocate the maximum possible number of MSI/MSI-X vectors */
+	nr_entries = pci_alloc_irq_vectors(dev, 1, PCIE_PORT_MAX_MSI_ENTRIES,
+			PCI_IRQ_MSIX | PCI_IRQ_MSI);
+	if (nr_entries < 0)
+		return nr_entries;
+
+	/* See how many and which Interrupt Message Numbers we actually use */
+	nvec = pcie_message_numbers(dev, mask, &pme, &aer, &dpc);
+	if (nvec > nr_entries) {
+		pci_free_irq_vectors(dev);
+		return -EIO;
+	}
+
+	/*
+	 * If we allocated more than we need, free them and reallocate fewer.
+	 *
+	 * Reallocating may change the specific vectors we get, so
+	 * pci_irq_vector() must be done *after* the reallocation.
+	 *
+	 * If we're using MSI, hardware is *allowed* to change the Interrupt
+	 * Message Numbers when we free and reallocate the vectors, but we
+	 * assume it won't because we allocate enough vectors for the
+	 * biggest Message Number we found.
+	 */
+	if (nvec != nr_entries) {
+		pci_free_irq_vectors(dev);
+
+		nr_entries = pci_alloc_irq_vectors(dev, nvec, nvec,
+				PCI_IRQ_MSIX | PCI_IRQ_MSI);
+		if (nr_entries < 0)
+			return nr_entries;
+	}
+
+	/* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
+		    PCIE_PORT_SERVICE_BWNOTIF)) {
+		pcie_irq = pci_irq_vector(dev, pme);
+		irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq;
+		irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq;
+		irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq;
+	}
+
+	if (mask & PCIE_PORT_SERVICE_AER)
+		irqs[PCIE_PORT_SERVICE_AER_SHIFT] = pci_irq_vector(dev, aer);
+
+	if (mask & PCIE_PORT_SERVICE_DPC)
+		irqs[PCIE_PORT_SERVICE_DPC_SHIFT] = pci_irq_vector(dev, dpc);
+
+	return 0;
+}
+
+/**
+ * pcie_init_service_irqs - initialize irqs for PCI Express port services
+ * @dev: PCI Express port to handle
+ * @irqs: Array of irqs to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: Interrupt mode associated with the port
+ */
+static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask)
+{
+	int ret, i;
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		irqs[i] = -1;
+
+	/*
+	 * If we support PME but can't use MSI/MSI-X for it, we have to
+	 * fall back to INTx or other interrupts, e.g., a system shared
+	 * interrupt.
+	 */
+	if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi())
+		goto legacy_irq;
+
+	/* Try to use MSI-X or MSI if supported */
+	if (pcie_port_enable_irq_vec(dev, irqs, mask) == 0)
+		return 0;
+
+legacy_irq:
+	/* fall back to legacy IRQ */
+	ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
+	if (ret < 0)
+		return -ENODEV;
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		irqs[i] = pci_irq_vector(dev, 0);
+
+	return 0;
+}
+
+/**
+ * get_port_device_capability - discover capabilities of a PCI Express port
+ * @dev: PCI Express port to examine
+ *
+ * The capabilities are read from the port's PCI Express configuration registers
+ * as described in PCI Express Base Specification 1.0a sections 7.8.2, 7.8.9 and
+ * 7.9 - 7.11.
+ *
+ * Return value: Bitmask of discovered port capabilities
+ */
+static int get_port_device_capability(struct pci_dev *dev)
+{
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+	int services = 0;
+
+	if (dev->is_hotplug_bridge &&
+	    (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+	     pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) &&
+	    (pcie_ports_native || host->native_pcie_hotplug)) {
+		services |= PCIE_PORT_SERVICE_HP;
+
+		/*
+		 * Disable hot-plug interrupts in case they have been enabled
+		 * by the BIOS and the hot-plug service driver is not loaded.
+		 */
+		pcie_capability_clear_word(dev, PCI_EXP_SLTCTL,
+			  PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE);
+	}
+
+#ifdef CONFIG_PCIEAER
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+             pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) &&
+	    dev->aer_cap && pci_aer_available() &&
+	    (pcie_ports_native || host->native_aer))
+		services |= PCIE_PORT_SERVICE_AER;
+#endif
+
+	/* Root Ports and Root Complex Event Collectors may generate PMEs */
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+	     pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) &&
+	    (pcie_ports_native || host->native_pme)) {
+		services |= PCIE_PORT_SERVICE_PME;
+
+		/*
+		 * Disable PME interrupt on this port in case it's been enabled
+		 * by the BIOS (the PME service driver will enable it when
+		 * necessary).
+		 */
+		pcie_pme_interrupt_enable(dev, false);
+	}
+
+	/*
+	 * With dpc-native, allow Linux to use DPC even if it doesn't have
+	 * permission to use AER.
+	 */
+	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
+	    pci_aer_available() &&
+	    (pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER)))
+		services |= PCIE_PORT_SERVICE_DPC;
+
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
+	    pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+		u32 linkcap;
+
+		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap);
+		if (linkcap & PCI_EXP_LNKCAP_LBNC)
+			services |= PCIE_PORT_SERVICE_BWNOTIF;
+	}
+
+	return services;
+}
+
+/**
+ * pcie_device_init - allocate and initialize PCI Express port service device
+ * @pdev: PCI Express port to associate the service device with
+ * @service: Type of service to associate with the service device
+ * @irq: Interrupt vector to associate with the service device
+ */
+static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
+{
+	int retval;
+	struct pcie_device *pcie;
+	struct device *device;
+
+	pcie = kzalloc(sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+	pcie->port = pdev;
+	pcie->irq = irq;
+	pcie->service = service;
+
+	/* Initialize generic device interface */
+	device = &pcie->device;
+	device->bus = &pcie_port_bus_type;
+	device->release = release_pcie_device;	/* callback to free pcie dev */
+	dev_set_name(device, "%s:pcie%03x",
+		     pci_name(pdev),
+		     get_descriptor_id(pci_pcie_type(pdev), service));
+	device->parent = &pdev->dev;
+	device_enable_async_suspend(device);
+
+	retval = device_register(device);
+	if (retval) {
+		put_device(device);
+		return retval;
+	}
+
+	pm_runtime_no_callbacks(device);
+
+	return 0;
+}
+
+/**
+ * pcie_port_device_register - register PCI Express port
+ * @dev: PCI Express port to register
+ *
+ * Allocate the port extension structure and register services associated with
+ * the port.
+ */
+static int pcie_port_device_register(struct pci_dev *dev)
+{
+	int status, capabilities, i, nr_service;
+	int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
+
+	/* Enable PCI Express port device */
+	status = pci_enable_device(dev);
+	if (status)
+		return status;
+
+	/* Get and check PCI Express port services */
+	capabilities = get_port_device_capability(dev);
+	if (!capabilities)
+		return 0;
+
+	pci_set_master(dev);
+	/*
+	 * Initialize service irqs. Don't use service devices that
+	 * require interrupts if there is no way to generate them.
+	 * However, some drivers may have a polling mode (e.g. pciehp_poll_mode)
+	 * that can be used in the absence of irqs.  Allow them to determine
+	 * if that is to be used.
+	 */
+	status = pcie_init_service_irqs(dev, irqs, capabilities);
+	if (status) {
+		capabilities &= PCIE_PORT_SERVICE_HP;
+		if (!capabilities)
+			goto error_disable;
+	}
+
+	/* Allocate child services if any */
+	status = -ENODEV;
+	nr_service = 0;
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
+		int service = 1 << i;
+		if (!(capabilities & service))
+			continue;
+		if (!pcie_device_init(dev, service, irqs[i]))
+			nr_service++;
+	}
+	if (!nr_service)
+		goto error_cleanup_irqs;
+
+	return 0;
+
+error_cleanup_irqs:
+	pci_free_irq_vectors(dev);
+error_disable:
+	pci_disable_device(dev);
+	return status;
+}
+
+typedef int (*pcie_callback_t)(struct pcie_device *);
+
+static int pcie_port_device_iter(struct device *dev, void *data)
+{
+	struct pcie_port_service_driver *service_driver;
+	size_t offset = *(size_t *)data;
+	pcie_callback_t cb;
+
+	if ((dev->bus == &pcie_port_bus_type) && dev->driver) {
+		service_driver = to_service_driver(dev->driver);
+		cb = *(pcie_callback_t *)((void *)service_driver + offset);
+		if (cb)
+			return cb(to_pcie_device(dev));
+	}
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/**
+ * pcie_port_device_suspend - suspend port services associated with a PCIe port
+ * @dev: PCI Express port to handle
+ */
+static int pcie_port_device_suspend(struct device *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, suspend);
+	return device_for_each_child(dev, &off, pcie_port_device_iter);
+}
+
+static int pcie_port_device_resume_noirq(struct device *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, resume_noirq);
+	return device_for_each_child(dev, &off, pcie_port_device_iter);
+}
+
+/**
+ * pcie_port_device_resume - resume port services associated with a PCIe port
+ * @dev: PCI Express port to handle
+ */
+static int pcie_port_device_resume(struct device *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, resume);
+	return device_for_each_child(dev, &off, pcie_port_device_iter);
+}
+
+/**
+ * pcie_port_device_runtime_suspend - runtime suspend port services
+ * @dev: PCI Express port to handle
+ */
+static int pcie_port_device_runtime_suspend(struct device *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend);
+	return device_for_each_child(dev, &off, pcie_port_device_iter);
+}
+
+/**
+ * pcie_port_device_runtime_resume - runtime resume port services
+ * @dev: PCI Express port to handle
+ */
+static int pcie_port_device_runtime_resume(struct device *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, runtime_resume);
+	return device_for_each_child(dev, &off, pcie_port_device_iter);
+}
+#endif /* PM */
+
+static int remove_iter(struct device *dev, void *data)
+{
+	if (dev->bus == &pcie_port_bus_type)
+		device_unregister(dev);
+	return 0;
+}
+
+static int find_service_iter(struct device *device, void *data)
+{
+	struct pcie_port_service_driver *service_driver;
+	struct portdrv_service_data *pdrvs;
+	u32 service;
+
+	pdrvs = (struct portdrv_service_data *) data;
+	service = pdrvs->service;
+
+	if (device->bus == &pcie_port_bus_type && device->driver) {
+		service_driver = to_service_driver(device->driver);
+		if (service_driver->service == service) {
+			pdrvs->drv = service_driver;
+			pdrvs->dev = device;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * pcie_port_find_device - find the struct device
+ * @dev: PCI Express port the service is associated with
+ * @service: For the service to find
+ *
+ * Find the struct device associated with given service on a pci_dev
+ */
+struct device *pcie_port_find_device(struct pci_dev *dev,
+				      u32 service)
+{
+	struct device *device;
+	struct portdrv_service_data pdrvs;
+
+	pdrvs.dev = NULL;
+	pdrvs.service = service;
+	device_for_each_child(&dev->dev, &pdrvs, find_service_iter);
+
+	device = pdrvs.dev;
+	return device;
+}
+EXPORT_SYMBOL_GPL(pcie_port_find_device);
+
+/**
+ * pcie_port_device_remove - unregister PCI Express port service devices
+ * @dev: PCI Express port the service devices to unregister are associated with
+ *
+ * Remove PCI Express port service devices associated with given port and
+ * disable MSI-X or MSI for the port.
+ */
+static void pcie_port_device_remove(struct pci_dev *dev)
+{
+	device_for_each_child(&dev->dev, NULL, remove_iter);
+	pci_free_irq_vectors(dev);
+}
+
+/**
+ * pcie_port_probe_service - probe driver for given PCI Express port service
+ * @dev: PCI Express port service device to probe against
+ *
+ * If PCI Express port service driver is registered with
+ * pcie_port_service_register(), this function will be called by the driver core
+ * whenever match is found between the driver and a port service device.
+ */
+static int pcie_port_probe_service(struct device *dev)
+{
+	struct pcie_device *pciedev;
+	struct pcie_port_service_driver *driver;
+	int status;
+
+	if (!dev || !dev->driver)
+		return -ENODEV;
+
+	driver = to_service_driver(dev->driver);
+	if (!driver || !driver->probe)
+		return -ENODEV;
+
+	pciedev = to_pcie_device(dev);
+	status = driver->probe(pciedev);
+	if (status)
+		return status;
+
+	get_device(dev);
+	return 0;
+}
+
+/**
+ * pcie_port_remove_service - detach driver from given PCI Express port service
+ * @dev: PCI Express port service device to handle
+ *
+ * If PCI Express port service driver is registered with
+ * pcie_port_service_register(), this function will be called by the driver core
+ * when device_unregister() is called for the port service device associated
+ * with the driver.
+ */
+static int pcie_port_remove_service(struct device *dev)
+{
+	struct pcie_device *pciedev;
+	struct pcie_port_service_driver *driver;
+
+	if (!dev || !dev->driver)
+		return 0;
+
+	pciedev = to_pcie_device(dev);
+	driver = to_service_driver(dev->driver);
+	if (driver && driver->remove) {
+		driver->remove(pciedev);
+		put_device(dev);
+	}
+	return 0;
+}
+
+/**
+ * pcie_port_shutdown_service - shut down given PCI Express port service
+ * @dev: PCI Express port service device to handle
+ *
+ * If PCI Express port service driver is registered with
+ * pcie_port_service_register(), this function will be called by the driver core
+ * when device_shutdown() is called for the port service device associated
+ * with the driver.
+ */
+static void pcie_port_shutdown_service(struct device *dev) {}
+
+/**
+ * pcie_port_service_register - register PCI Express port service driver
+ * @new: PCI Express port service driver to register
+ */
+int pcie_port_service_register(struct pcie_port_service_driver *new)
+{
+	if (pcie_ports_disabled)
+		return -ENODEV;
+
+	new->driver.name = new->name;
+	new->driver.bus = &pcie_port_bus_type;
+	new->driver.probe = pcie_port_probe_service;
+	new->driver.remove = pcie_port_remove_service;
+	new->driver.shutdown = pcie_port_shutdown_service;
+
+	return driver_register(&new->driver);
+}
+
+/**
+ * pcie_port_service_unregister - unregister PCI Express port service driver
+ * @drv: PCI Express port service driver to unregister
+ */
+void pcie_port_service_unregister(struct pcie_port_service_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+
+/* If this switch is set, PCIe port native services should not be enabled. */
+bool pcie_ports_disabled;
+
+/*
+ * If the user specified "pcie_ports=native", use the PCIe services regardless
+ * of whether the platform has given us permission.  On ACPI systems, this
+ * means we ignore _OSC.
+ */
+bool pcie_ports_native;
+
+/*
+ * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe
+ * service even if the platform hasn't given us permission.
+ */
+bool pcie_ports_dpc_native;
+
+static int __init pcie_port_setup(char *str)
+{
+	if (!strncmp(str, "compat", 6))
+		pcie_ports_disabled = true;
+	else if (!strncmp(str, "native", 6))
+		pcie_ports_native = true;
+	else if (!strncmp(str, "dpc-native", 10))
+		pcie_ports_dpc_native = true;
+
+	return 1;
+}
+__setup("pcie_ports=", pcie_port_setup);
+
+/* global data */
+
+#ifdef CONFIG_PM
+static int pcie_port_runtime_suspend(struct device *dev)
+{
+	if (!to_pci_dev(dev)->bridge_d3)
+		return -EBUSY;
+
+	return pcie_port_device_runtime_suspend(dev);
+}
+
+static int pcie_port_runtime_idle(struct device *dev)
+{
+	/*
+	 * Assume the PCI core has set bridge_d3 whenever it thinks the port
+	 * should be good to go to D3.  Everything else, including moving
+	 * the port to D3, is handled by the PCI core.
+	 */
+	return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY;
+}
+
+static const struct dev_pm_ops pcie_portdrv_pm_ops = {
+	.suspend	= pcie_port_device_suspend,
+	.resume_noirq	= pcie_port_device_resume_noirq,
+	.resume		= pcie_port_device_resume,
+	.freeze		= pcie_port_device_suspend,
+	.thaw		= pcie_port_device_resume,
+	.poweroff	= pcie_port_device_suspend,
+	.restore_noirq	= pcie_port_device_resume_noirq,
+	.restore	= pcie_port_device_resume,
+	.runtime_suspend = pcie_port_runtime_suspend,
+	.runtime_resume	= pcie_port_device_runtime_resume,
+	.runtime_idle	= pcie_port_runtime_idle,
+};
+
+#define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)
+
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS	NULL
+#endif /* !PM */
+
+/*
+ * pcie_portdrv_probe - Probe PCI-Express port devices
+ * @dev: PCI-Express port device being probed
+ *
+ * If detected invokes the pcie_port_device_register() method for
+ * this port device.
+ *
+ */
+static int pcie_portdrv_probe(struct pci_dev *dev,
+					const struct pci_device_id *id)
+{
+	int type = pci_pcie_type(dev);
+	int status;
+
+	if (!pci_is_pcie(dev) ||
+	    ((type != PCI_EXP_TYPE_ROOT_PORT) &&
+	     (type != PCI_EXP_TYPE_UPSTREAM) &&
+	     (type != PCI_EXP_TYPE_DOWNSTREAM) &&
+	     (type != PCI_EXP_TYPE_RC_EC)))
+		return -ENODEV;
+
+	if (type == PCI_EXP_TYPE_RC_EC)
+		pcie_link_rcec(dev);
+
+	status = pcie_port_device_register(dev);
+	if (status)
+		return status;
+
+	pci_save_state(dev);
+
+	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE |
+					   DPM_FLAG_SMART_SUSPEND);
+
+	if (pci_bridge_d3_possible(dev)) {
+		/*
+		 * Keep the port resumed 100ms to make sure things like
+		 * config space accesses from userspace (lspci) will not
+		 * cause the port to repeatedly suspend and resume.
+		 */
+		pm_runtime_set_autosuspend_delay(&dev->dev, 100);
+		pm_runtime_use_autosuspend(&dev->dev);
+		pm_runtime_mark_last_busy(&dev->dev);
+		pm_runtime_put_autosuspend(&dev->dev);
+		pm_runtime_allow(&dev->dev);
+	}
+
+	return 0;
+}
+
+static void pcie_portdrv_remove(struct pci_dev *dev)
+{
+	if (pci_bridge_d3_possible(dev)) {
+		pm_runtime_forbid(&dev->dev);
+		pm_runtime_get_noresume(&dev->dev);
+		pm_runtime_dont_use_autosuspend(&dev->dev);
+	}
+
+	pcie_port_device_remove(dev);
+
+	pci_disable_device(dev);
+}
+
+static void pcie_portdrv_shutdown(struct pci_dev *dev)
+{
+	if (pci_bridge_d3_possible(dev)) {
+		pm_runtime_forbid(&dev->dev);
+		pm_runtime_get_noresume(&dev->dev);
+		pm_runtime_dont_use_autosuspend(&dev->dev);
+	}
+
+	pcie_port_device_remove(dev);
+}
+
+static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
+					pci_channel_state_t error)
+{
+	if (error == pci_channel_io_frozen)
+		return PCI_ERS_RESULT_NEED_RESET;
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, slot_reset);
+	device_for_each_child(&dev->dev, &off, pcie_port_device_iter);
+
+	pci_restore_state(dev);
+	pci_save_state(dev);
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/*
+ * LINUX Device Driver Model
+ */
+static const struct pci_device_id port_pci_ids[] = {
+	/* handle any PCI-Express port */
+	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) },
+	/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
+	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) },
+	/* handle any Root Complex Event Collector */
+	{ PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) },
+	{ },
+};
+
+static const struct pci_error_handlers pcie_portdrv_err_handler = {
+	.error_detected = pcie_portdrv_error_detected,
+	.slot_reset = pcie_portdrv_slot_reset,
+	.mmio_enabled = pcie_portdrv_mmio_enabled,
+};
+
+static struct pci_driver pcie_portdriver = {
+	.name		= "pcieport",
+	.id_table	= &port_pci_ids[0],
+
+	.probe		= pcie_portdrv_probe,
+	.remove		= pcie_portdrv_remove,
+	.shutdown	= pcie_portdrv_shutdown,
+
+	.err_handler	= &pcie_portdrv_err_handler,
+
+	.driver_managed_dma = true,
+
+	.driver.pm	= PCIE_PORTDRV_PM_OPS,
+};
+
+static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d)
+{
+	pr_notice("%s detected: will not use MSI for PCIe PME signaling\n",
+		  d->ident);
+	pcie_pme_disable_msi();
+	return 0;
+}
+
+static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = {
+	/*
+	 * Boxes that should not use MSI for PCIe PME signaling.
+	 */
+	{
+	 .callback = dmi_pcie_pme_disable_msi,
+	 .ident = "MSI Wind U-100",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR,
+				"MICRO-STAR INTERNATIONAL CO., LTD"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "U-100"),
+		     },
+	 },
+	 {}
+};
+
+static void __init pcie_init_services(void)
+{
+	pcie_aer_init();
+	pcie_pme_init();
+	pcie_dpc_init();
+	pcie_hp_init();
+}
+
+static int __init pcie_portdrv_init(void)
+{
+	if (pcie_ports_disabled)
+		return -EACCES;
+
+	pcie_init_services();
+	dmi_check_system(pcie_portdrv_dmi_table);
+
+	return pci_register_driver(&pcie_portdriver);
+}
+device_initcall(pcie_portdrv_init);
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
new file mode 100644
index 000000000..1f3803bde
--- /dev/null
+++ b/drivers/pci/pcie/portdrv.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Purpose:	PCI Express Port Bus Driver's Internal Data Structures
+ *
+ * Copyright (C) 2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+ */
+
+#ifndef _PORTDRV_H_
+#define _PORTDRV_H_
+
+#include <linux/compiler.h>
+
+/* Service Type */
+#define PCIE_PORT_SERVICE_PME_SHIFT	0	/* Power Management Event */
+#define PCIE_PORT_SERVICE_PME		(1 << PCIE_PORT_SERVICE_PME_SHIFT)
+#define PCIE_PORT_SERVICE_AER_SHIFT	1	/* Advanced Error Reporting */
+#define PCIE_PORT_SERVICE_AER		(1 << PCIE_PORT_SERVICE_AER_SHIFT)
+#define PCIE_PORT_SERVICE_HP_SHIFT	2	/* Native Hotplug */
+#define PCIE_PORT_SERVICE_HP		(1 << PCIE_PORT_SERVICE_HP_SHIFT)
+#define PCIE_PORT_SERVICE_DPC_SHIFT	3	/* Downstream Port Containment */
+#define PCIE_PORT_SERVICE_DPC		(1 << PCIE_PORT_SERVICE_DPC_SHIFT)
+#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT	4	/* Bandwidth notification */
+#define PCIE_PORT_SERVICE_BWNOTIF	(1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT)
+
+#define PCIE_PORT_DEVICE_MAXSERVICES   5
+
+extern bool pcie_ports_dpc_native;
+
+#ifdef CONFIG_PCIEAER
+int pcie_aer_init(void);
+#else
+static inline int pcie_aer_init(void) { return 0; }
+#endif
+
+#ifdef CONFIG_HOTPLUG_PCI_PCIE
+int pcie_hp_init(void);
+#else
+static inline int pcie_hp_init(void) { return 0; }
+#endif
+
+#ifdef CONFIG_PCIE_PME
+int pcie_pme_init(void);
+#else
+static inline int pcie_pme_init(void) { return 0; }
+#endif
+
+#ifdef CONFIG_PCIE_DPC
+int pcie_dpc_init(void);
+#else
+static inline int pcie_dpc_init(void) { return 0; }
+#endif
+
+/* Port Type */
+#define PCIE_ANY_PORT			(~0)
+
+struct pcie_device {
+	int		irq;	    /* Service IRQ/MSI/MSI-X Vector */
+	struct pci_dev *port;	    /* Root/Upstream/Downstream Port */
+	u32		service;    /* Port service this device represents */
+	void		*priv_data; /* Service Private Data */
+	struct device	device;     /* Generic Device Interface */
+};
+#define to_pcie_device(d) container_of(d, struct pcie_device, device)
+
+static inline void set_service_data(struct pcie_device *dev, void *data)
+{
+	dev->priv_data = data;
+}
+
+static inline void *get_service_data(struct pcie_device *dev)
+{
+	return dev->priv_data;
+}
+
+struct pcie_port_service_driver {
+	const char *name;
+	int (*probe)(struct pcie_device *dev);
+	void (*remove)(struct pcie_device *dev);
+	int (*suspend)(struct pcie_device *dev);
+	int (*resume_noirq)(struct pcie_device *dev);
+	int (*resume)(struct pcie_device *dev);
+	int (*runtime_suspend)(struct pcie_device *dev);
+	int (*runtime_resume)(struct pcie_device *dev);
+
+	int (*slot_reset)(struct pcie_device *dev);
+
+	int port_type;  /* Type of the port this driver can handle */
+	u32 service;    /* Port service this device represents */
+
+	struct device_driver driver;
+};
+#define to_service_driver(d) \
+	container_of(d, struct pcie_port_service_driver, driver)
+
+int pcie_port_service_register(struct pcie_port_service_driver *new);
+void pcie_port_service_unregister(struct pcie_port_service_driver *new);
+
+extern struct bus_type pcie_port_bus_type;
+
+struct pci_dev;
+
+#ifdef CONFIG_PCIE_PME
+extern bool pcie_pme_msi_disabled;
+
+static inline void pcie_pme_disable_msi(void)
+{
+	pcie_pme_msi_disabled = true;
+}
+
+static inline bool pcie_pme_no_msi(void)
+{
+	return pcie_pme_msi_disabled;
+}
+
+void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable);
+#else /* !CONFIG_PCIE_PME */
+static inline void pcie_pme_disable_msi(void) {}
+static inline bool pcie_pme_no_msi(void) { return false; }
+static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {}
+#endif /* !CONFIG_PCIE_PME */
+
+struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
+#endif /* _PORTDRV_H_ */
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
new file mode 100644
index 000000000..b4e5f5534
--- /dev/null
+++ b/drivers/pci/pcie/ptm.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Express Precision Time Measurement
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include "../pci.h"
+
+/*
+ * If the next upstream device supports PTM, return it; otherwise return
+ * NULL.  PTM Messages are local, so both link partners must support it.
+ */
+static struct pci_dev *pci_upstream_ptm(struct pci_dev *dev)
+{
+	struct pci_dev *ups = pci_upstream_bridge(dev);
+
+	/*
+	 * Switch Downstream Ports are not permitted to have a PTM
+	 * capability; their PTM behavior is controlled by the Upstream
+	 * Port (PCIe r5.0, sec 7.9.16), so if the upstream bridge is a
+	 * Switch Downstream Port, look up one more level.
+	 */
+	if (ups && pci_pcie_type(ups) == PCI_EXP_TYPE_DOWNSTREAM)
+		ups = pci_upstream_bridge(ups);
+
+	if (ups && ups->ptm_cap)
+		return ups;
+
+	return NULL;
+}
+
+/*
+ * Find the PTM Capability (if present) and extract the information we need
+ * to use it.
+ */
+void pci_ptm_init(struct pci_dev *dev)
+{
+	u16 ptm;
+	u32 cap;
+	struct pci_dev *ups;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	ptm = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM);
+	if (!ptm)
+		return;
+
+	dev->ptm_cap = ptm;
+	pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_PTM, sizeof(u32));
+
+	pci_read_config_dword(dev, ptm + PCI_PTM_CAP, &cap);
+	dev->ptm_granularity = (cap & PCI_PTM_GRANULARITY_MASK) >> 8;
+
+	/*
+	 * Per the spec recommendation (PCIe r6.0, sec 7.9.15.3), select the
+	 * furthest upstream Time Source as the PTM Root.  For Endpoints,
+	 * "the Effective Granularity is the maximum Local Clock Granularity
+	 * reported by the PTM Root and all intervening PTM Time Sources."
+	 */
+	ups = pci_upstream_ptm(dev);
+	if (ups) {
+		if (ups->ptm_granularity == 0)
+			dev->ptm_granularity = 0;
+		else if (ups->ptm_granularity > dev->ptm_granularity)
+			dev->ptm_granularity = ups->ptm_granularity;
+	} else if (cap & PCI_PTM_CAP_ROOT) {
+		dev->ptm_root = 1;
+	} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+
+		/*
+		 * Per sec 7.9.15.3, this should be the Local Clock
+		 * Granularity of the associated Time Source.  But it
+		 * doesn't say how to find that Time Source.
+		 */
+		dev->ptm_granularity = 0;
+	}
+
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+	    pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)
+		pci_enable_ptm(dev, NULL);
+}
+
+void pci_save_ptm_state(struct pci_dev *dev)
+{
+	u16 ptm = dev->ptm_cap;
+	struct pci_cap_saved_state *save_state;
+	u32 *cap;
+
+	if (!ptm)
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
+	if (!save_state)
+		return;
+
+	cap = (u32 *)&save_state->cap.data[0];
+	pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, cap);
+}
+
+void pci_restore_ptm_state(struct pci_dev *dev)
+{
+	u16 ptm = dev->ptm_cap;
+	struct pci_cap_saved_state *save_state;
+	u32 *cap;
+
+	if (!ptm)
+		return;
+
+	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
+	if (!save_state)
+		return;
+
+	cap = (u32 *)&save_state->cap.data[0];
+	pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, *cap);
+}
+
+/* Enable PTM in the Control register if possible */
+static int __pci_enable_ptm(struct pci_dev *dev)
+{
+	u16 ptm = dev->ptm_cap;
+	struct pci_dev *ups;
+	u32 ctrl;
+
+	if (!ptm)
+		return -EINVAL;
+
+	/*
+	 * A device uses local PTM Messages to request time information
+	 * from a PTM Root that's farther upstream.  Every device along the
+	 * path must support PTM and have it enabled so it can handle the
+	 * messages.  Therefore, if this device is not a PTM Root, the
+	 * upstream link partner must have PTM enabled before we can enable
+	 * PTM.
+	 */
+	if (!dev->ptm_root) {
+		ups = pci_upstream_ptm(dev);
+		if (!ups || !ups->ptm_enabled)
+			return -EINVAL;
+	}
+
+	pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, &ctrl);
+
+	ctrl |= PCI_PTM_CTRL_ENABLE;
+	ctrl &= ~PCI_PTM_GRANULARITY_MASK;
+	ctrl |= dev->ptm_granularity << 8;
+	if (dev->ptm_root)
+		ctrl |= PCI_PTM_CTRL_ROOT;
+
+	pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, ctrl);
+	return 0;
+}
+
+/**
+ * pci_enable_ptm() - Enable Precision Time Measurement
+ * @dev: PCI device
+ * @granularity: pointer to return granularity
+ *
+ * Enable Precision Time Measurement for @dev.  If successful and
+ * @granularity is non-NULL, return the Effective Granularity.
+ *
+ * Return: zero if successful, or -EINVAL if @dev lacks a PTM Capability or
+ * is not a PTM Root and lacks an upstream path of PTM-enabled devices.
+ */
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{
+	int rc;
+	char clock_desc[8];
+
+	rc = __pci_enable_ptm(dev);
+	if (rc)
+		return rc;
+
+	dev->ptm_enabled = 1;
+
+	if (granularity)
+		*granularity = dev->ptm_granularity;
+
+	switch (dev->ptm_granularity) {
+	case 0:
+		snprintf(clock_desc, sizeof(clock_desc), "unknown");
+		break;
+	case 255:
+		snprintf(clock_desc, sizeof(clock_desc), ">254ns");
+		break;
+	default:
+		snprintf(clock_desc, sizeof(clock_desc), "%uns",
+			 dev->ptm_granularity);
+		break;
+	}
+	pci_info(dev, "PTM enabled%s, %s granularity\n",
+		 dev->ptm_root ? " (root)" : "", clock_desc);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_ptm);
+
+static void __pci_disable_ptm(struct pci_dev *dev)
+{
+	u16 ptm = dev->ptm_cap;
+	u32 ctrl;
+
+	if (!ptm)
+		return;
+
+	pci_read_config_dword(dev, ptm + PCI_PTM_CTRL, &ctrl);
+	ctrl &= ~(PCI_PTM_CTRL_ENABLE | PCI_PTM_CTRL_ROOT);
+	pci_write_config_dword(dev, ptm + PCI_PTM_CTRL, ctrl);
+}
+
+/**
+ * pci_disable_ptm() - Disable Precision Time Measurement
+ * @dev: PCI device
+ *
+ * Disable Precision Time Measurement for @dev.
+ */
+void pci_disable_ptm(struct pci_dev *dev)
+{
+	if (dev->ptm_enabled) {
+		__pci_disable_ptm(dev);
+		dev->ptm_enabled = 0;
+	}
+}
+EXPORT_SYMBOL(pci_disable_ptm);
+
+/*
+ * Disable PTM, but preserve dev->ptm_enabled so we silently re-enable it on
+ * resume if necessary.
+ */
+void pci_suspend_ptm(struct pci_dev *dev)
+{
+	if (dev->ptm_enabled)
+		__pci_disable_ptm(dev);
+}
+
+/* If PTM was enabled before suspend, re-enable it when resuming */
+void pci_resume_ptm(struct pci_dev *dev)
+{
+	if (dev->ptm_enabled)
+		__pci_enable_ptm(dev);
+}
+
+bool pcie_ptm_enabled(struct pci_dev *dev)
+{
+	if (!dev)
+		return false;
+
+	return dev->ptm_enabled;
+}
+EXPORT_SYMBOL(pcie_ptm_enabled);
diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c
new file mode 100644
index 000000000..d0bcd141a
--- /dev/null
+++ b/drivers/pci/pcie/rcec.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Root Complex Event Collector Support
+ *
+ * Authors:
+ *  Sean V Kelley <sean.v.kelley@intel.com>
+ *  Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+ *
+ * Copyright (C) 2020 Intel Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+
+#include "../pci.h"
+
+struct walk_rcec_data {
+	struct pci_dev *rcec;
+	int (*user_callback)(struct pci_dev *dev, void *data);
+	void *user_data;
+};
+
+static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep)
+{
+	unsigned long bitmap = rcec->rcec_ea->bitmap;
+	unsigned int devn;
+
+	/* An RCiEP found on a different bus in range */
+	if (rcec->bus->number != rciep->bus->number)
+		return true;
+
+	/* Same bus, so check bitmap */
+	for_each_set_bit(devn, &bitmap, 32)
+		if (devn == PCI_SLOT(rciep->devfn))
+			return true;
+
+	return false;
+}
+
+static int link_rcec_helper(struct pci_dev *dev, void *data)
+{
+	struct walk_rcec_data *rcec_data = data;
+	struct pci_dev *rcec = rcec_data->rcec;
+
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) &&
+	    rcec_assoc_rciep(rcec, dev)) {
+		dev->rcec = rcec;
+		pci_dbg(dev, "PME & error events signaled via %s\n",
+			pci_name(rcec));
+	}
+
+	return 0;
+}
+
+static int walk_rcec_helper(struct pci_dev *dev, void *data)
+{
+	struct walk_rcec_data *rcec_data = data;
+	struct pci_dev *rcec = rcec_data->rcec;
+
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) &&
+	    rcec_assoc_rciep(rcec, dev))
+		rcec_data->user_callback(dev, rcec_data->user_data);
+
+	return 0;
+}
+
+static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data),
+		      void *userdata)
+{
+	struct walk_rcec_data *rcec_data = userdata;
+	struct pci_dev *rcec = rcec_data->rcec;
+	u8 nextbusn, lastbusn;
+	struct pci_bus *bus;
+	unsigned int bnr;
+
+	if (!rcec->rcec_ea)
+		return;
+
+	/* Walk own bus for bitmap based association */
+	pci_walk_bus(rcec->bus, cb, rcec_data);
+
+	nextbusn = rcec->rcec_ea->nextbusn;
+	lastbusn = rcec->rcec_ea->lastbusn;
+
+	/* All RCiEP devices are on the same bus as the RCEC */
+	if (nextbusn == 0xff && lastbusn == 0x00)
+		return;
+
+	for (bnr = nextbusn; bnr <= lastbusn; bnr++) {
+		/* No association indicated (PCIe 5.0-1, 7.9.10.3) */
+		if (bnr == rcec->bus->number)
+			continue;
+
+		bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr);
+		if (!bus)
+			continue;
+
+		/* Find RCiEP devices on the given bus ranges */
+		pci_walk_bus(bus, cb, rcec_data);
+	}
+}
+
+/**
+ * pcie_link_rcec - Link RCiEP devices associated with RCEC.
+ * @rcec: RCEC whose RCiEP devices should be linked.
+ *
+ * Link the given RCEC to each RCiEP device found.
+ */
+void pcie_link_rcec(struct pci_dev *rcec)
+{
+	struct walk_rcec_data rcec_data;
+
+	if (!rcec->rcec_ea)
+		return;
+
+	rcec_data.rcec = rcec;
+	rcec_data.user_callback = NULL;
+	rcec_data.user_data = NULL;
+
+	walk_rcec(link_rcec_helper, &rcec_data);
+}
+
+/**
+ * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback.
+ * @rcec:	RCEC whose RCiEP devices should be walked
+ * @cb:		Callback to be called for each RCiEP device found
+ * @userdata:	Arbitrary pointer to be passed to callback
+ *
+ * Walk the given RCEC. Call the callback on each RCiEP found.
+ *
+ * If @cb returns anything other than 0, break out.
+ */
+void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *),
+		    void *userdata)
+{
+	struct walk_rcec_data rcec_data;
+
+	if (!rcec->rcec_ea)
+		return;
+
+	rcec_data.rcec = rcec;
+	rcec_data.user_callback = cb;
+	rcec_data.user_data = userdata;
+
+	walk_rcec(walk_rcec_helper, &rcec_data);
+}
+
+void pci_rcec_init(struct pci_dev *dev)
+{
+	struct rcec_ea *rcec_ea;
+	u32 rcec, hdr, busn;
+	u8 ver;
+
+	/* Only for Root Complex Event Collectors */
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)
+		return;
+
+	rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC);
+	if (!rcec)
+		return;
+
+	rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL);
+	if (!rcec_ea)
+		return;
+
+	pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP,
+			      &rcec_ea->bitmap);
+
+	/* Check whether RCEC BUSN register is present */
+	pci_read_config_dword(dev, rcec, &hdr);
+	ver = PCI_EXT_CAP_VER(hdr);
+	if (ver >= PCI_RCEC_BUSN_REG_VER) {
+		pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn);
+		rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn);
+		rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn);
+	} else {
+		/* Avoid later ver check by setting nextbusn */
+		rcec_ea->nextbusn = 0xff;
+		rcec_ea->lastbusn = 0x00;
+	}
+
+	dev->rcec_ea = rcec_ea;
+}
+
+void pci_rcec_exit(struct pci_dev *dev)
+{
+	kfree(dev->rcec_ea);
+	dev->rcec_ea = NULL;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
new file mode 100644
index 000000000..43159965e
--- /dev/null
+++ b/drivers/pci/probe.c
@@ -0,0 +1,3377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI detection and setup code
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/of_pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/aer.h>
+#include <linux/acpi.h>
+#include <linux/hypervisor.h>
+#include <linux/irqdomain.h>
+#include <linux/pm_runtime.h>
+#include <linux/bitfield.h>
+#include "pci.h"
+
+#define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
+#define CARDBUS_RESERVE_BUSNR	3
+
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 255,
+	.flags	= IORESOURCE_BUS,
+};
+
+/* Ugh.  Need to stop exporting this to modules. */
+LIST_HEAD(pci_root_buses);
+EXPORT_SYMBOL(pci_root_buses);
+
+static LIST_HEAD(pci_domain_busn_res_list);
+
+struct pci_domain_busn_res {
+	struct list_head list;
+	struct resource res;
+	int domain_nr;
+};
+
+static struct resource *get_pci_domain_busn_res(int domain_nr)
+{
+	struct pci_domain_busn_res *r;
+
+	list_for_each_entry(r, &pci_domain_busn_res_list, list)
+		if (r->domain_nr == domain_nr)
+			return &r->res;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return NULL;
+
+	r->domain_nr = domain_nr;
+	r->res.start = 0;
+	r->res.end = 0xff;
+	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
+
+	list_add_tail(&r->list, &pci_domain_busn_res_list);
+
+	return &r->res;
+}
+
+/*
+ * Some device drivers need know if PCI is initiated.
+ * Basically, we think PCI is not initiated when there
+ * is no device to be found on the pci_bus_type.
+ */
+int no_pci_devices(void)
+{
+	struct device *dev;
+	int no_devices;
+
+	dev = bus_find_next_device(&pci_bus_type, NULL);
+	no_devices = (dev == NULL);
+	put_device(dev);
+	return no_devices;
+}
+EXPORT_SYMBOL(no_pci_devices);
+
+/*
+ * PCI Bus Class
+ */
+static void release_pcibus_dev(struct device *dev)
+{
+	struct pci_bus *pci_bus = to_pci_bus(dev);
+
+	put_device(pci_bus->bridge);
+	pci_bus_remove_resources(pci_bus);
+	pci_release_bus_of_node(pci_bus);
+	kfree(pci_bus);
+}
+
+static struct class pcibus_class = {
+	.name		= "pci_bus",
+	.dev_release	= &release_pcibus_dev,
+	.dev_groups	= pcibus_groups,
+};
+
+static int __init pcibus_class_init(void)
+{
+	return class_register(&pcibus_class);
+}
+postcore_initcall(pcibus_class_init);
+
+static u64 pci_size(u64 base, u64 maxbase, u64 mask)
+{
+	u64 size = mask & maxbase;	/* Find the significant bits */
+	if (!size)
+		return 0;
+
+	/*
+	 * Get the lowest of them to find the decode size, and from that
+	 * the extent.
+	 */
+	size = size & ~(size-1);
+
+	/*
+	 * base == maxbase can be valid only if the BAR has already been
+	 * programmed with all 1s.
+	 */
+	if (base == maxbase && ((base | (size - 1)) & mask) != mask)
+		return 0;
+
+	return size;
+}
+
+static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar)
+{
+	u32 mem_type;
+	unsigned long flags;
+
+	if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
+		flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
+		flags |= IORESOURCE_IO;
+		return flags;
+	}
+
+	flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
+	flags |= IORESOURCE_MEM;
+	if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+		flags |= IORESOURCE_PREFETCH;
+
+	mem_type = bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+	switch (mem_type) {
+	case PCI_BASE_ADDRESS_MEM_TYPE_32:
+		break;
+	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+		/* 1M mem BAR treated as 32-bit BAR */
+		break;
+	case PCI_BASE_ADDRESS_MEM_TYPE_64:
+		flags |= IORESOURCE_MEM_64;
+		break;
+	default:
+		/* mem unknown type treated as 32-bit BAR */
+		break;
+	}
+	return flags;
+}
+
+#define PCI_COMMAND_DECODE_ENABLE	(PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
+
+/**
+ * __pci_read_base - Read a PCI BAR
+ * @dev: the PCI device
+ * @type: type of the BAR
+ * @res: resource buffer to be filled in
+ * @pos: BAR position in the config space
+ *
+ * Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
+ */
+int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+		    struct resource *res, unsigned int pos)
+{
+	u32 l = 0, sz = 0, mask;
+	u64 l64, sz64, mask64;
+	u16 orig_cmd;
+	struct pci_bus_region region, inverted_region;
+
+	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
+
+	/* No printks while decoding is disabled! */
+	if (!dev->mmio_always_on) {
+		pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
+		if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
+			pci_write_config_word(dev, PCI_COMMAND,
+				orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
+		}
+	}
+
+	res->name = pci_name(dev);
+
+	pci_read_config_dword(dev, pos, &l);
+	pci_write_config_dword(dev, pos, l | mask);
+	pci_read_config_dword(dev, pos, &sz);
+	pci_write_config_dword(dev, pos, l);
+
+	/*
+	 * All bits set in sz means the device isn't working properly.
+	 * If the BAR isn't implemented, all bits must be 0.  If it's a
+	 * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
+	 * 1 must be clear.
+	 */
+	if (PCI_POSSIBLE_ERROR(sz))
+		sz = 0;
+
+	/*
+	 * I don't know how l can have all bits set.  Copied from old code.
+	 * Maybe it fixes a bug on some ancient platform.
+	 */
+	if (PCI_POSSIBLE_ERROR(l))
+		l = 0;
+
+	if (type == pci_bar_unknown) {
+		res->flags = decode_bar(dev, l);
+		res->flags |= IORESOURCE_SIZEALIGN;
+		if (res->flags & IORESOURCE_IO) {
+			l64 = l & PCI_BASE_ADDRESS_IO_MASK;
+			sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
+			mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
+		} else {
+			l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
+			sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
+			mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
+		}
+	} else {
+		if (l & PCI_ROM_ADDRESS_ENABLE)
+			res->flags |= IORESOURCE_ROM_ENABLE;
+		l64 = l & PCI_ROM_ADDRESS_MASK;
+		sz64 = sz & PCI_ROM_ADDRESS_MASK;
+		mask64 = PCI_ROM_ADDRESS_MASK;
+	}
+
+	if (res->flags & IORESOURCE_MEM_64) {
+		pci_read_config_dword(dev, pos + 4, &l);
+		pci_write_config_dword(dev, pos + 4, ~0);
+		pci_read_config_dword(dev, pos + 4, &sz);
+		pci_write_config_dword(dev, pos + 4, l);
+
+		l64 |= ((u64)l << 32);
+		sz64 |= ((u64)sz << 32);
+		mask64 |= ((u64)~0 << 32);
+	}
+
+	if (!dev->mmio_always_on && (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
+		pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
+
+	if (!sz64)
+		goto fail;
+
+	sz64 = pci_size(l64, sz64, mask64);
+	if (!sz64) {
+		pci_info(dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n",
+			 pos);
+		goto fail;
+	}
+
+	if (res->flags & IORESOURCE_MEM_64) {
+		if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
+		    && sz64 > 0x100000000ULL) {
+			res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
+			res->start = 0;
+			res->end = 0;
+			pci_err(dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n",
+				pos, (unsigned long long)sz64);
+			goto out;
+		}
+
+		if ((sizeof(pci_bus_addr_t) < 8) && l) {
+			/* Above 32-bit boundary; try to reallocate */
+			res->flags |= IORESOURCE_UNSET;
+			res->start = 0;
+			res->end = sz64 - 1;
+			pci_info(dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n",
+				 pos, (unsigned long long)l64);
+			goto out;
+		}
+	}
+
+	region.start = l64;
+	region.end = l64 + sz64 - 1;
+
+	pcibios_bus_to_resource(dev->bus, res, &region);
+	pcibios_resource_to_bus(dev->bus, &inverted_region, res);
+
+	/*
+	 * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
+	 * the corresponding resource address (the physical address used by
+	 * the CPU.  Converting that resource address back to a bus address
+	 * should yield the original BAR value:
+	 *
+	 *     resource_to_bus(bus_to_resource(A)) == A
+	 *
+	 * If it doesn't, CPU accesses to "bus_to_resource(A)" will not
+	 * be claimed by the device.
+	 */
+	if (inverted_region.start != region.start) {
+		res->flags |= IORESOURCE_UNSET;
+		res->start = 0;
+		res->end = region.end - region.start;
+		pci_info(dev, "reg 0x%x: initial BAR value %#010llx invalid\n",
+			 pos, (unsigned long long)region.start);
+	}
+
+	goto out;
+
+
+fail:
+	res->flags = 0;
+out:
+	if (res->flags)
+		pci_info(dev, "reg 0x%x: %pR\n", pos, res);
+
+	return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
+}
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+{
+	unsigned int pos, reg;
+
+	if (dev->non_compliant_bars)
+		return;
+
+	/* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
+	if (dev->is_virtfn)
+		return;
+
+	for (pos = 0; pos < howmany; pos++) {
+		struct resource *res = &dev->resource[pos];
+		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+		pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
+	}
+
+	if (rom) {
+		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
+		dev->rom_base_reg = rom;
+		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
+				IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
+		__pci_read_base(dev, pci_bar_mem32, res, rom);
+	}
+}
+
+static void pci_read_bridge_windows(struct pci_dev *bridge)
+{
+	u16 io;
+	u32 pmem, tmp;
+
+	pci_read_config_word(bridge, PCI_IO_BASE, &io);
+	if (!io) {
+		pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0);
+		pci_read_config_word(bridge, PCI_IO_BASE, &io);
+		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+	}
+	if (io)
+		bridge->io_window = 1;
+
+	/*
+	 * DECchip 21050 pass 2 errata: the bridge may miss an address
+	 * disconnect boundary by one PCI data phase.  Workaround: do not
+	 * use prefetching on this device.
+	 */
+	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
+		return;
+
+	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+	if (!pmem) {
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
+					       0xffe0fff0);
+		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
+	}
+	if (!pmem)
+		return;
+
+	bridge->pref_window = 1;
+
+	if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+
+		/*
+		 * Bridge claims to have a 64-bit prefetchable memory
+		 * window; verify that the upper bits are actually
+		 * writable.
+		 */
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem);
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
+				       0xffffffff);
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem);
+		if (tmp)
+			bridge->pref_64_window = 1;
+	}
+}
+
+static void pci_read_bridge_io(struct pci_bus *child)
+{
+	struct pci_dev *dev = child->self;
+	u8 io_base_lo, io_limit_lo;
+	unsigned long io_mask, io_granularity, base, limit;
+	struct pci_bus_region region;
+	struct resource *res;
+
+	io_mask = PCI_IO_RANGE_MASK;
+	io_granularity = 0x1000;
+	if (dev->io_window_1k) {
+		/* Support 1K I/O space granularity */
+		io_mask = PCI_IO_1K_RANGE_MASK;
+		io_granularity = 0x400;
+	}
+
+	res = child->resource[0];
+	pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+	pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+	base = (io_base_lo & io_mask) << 8;
+	limit = (io_limit_lo & io_mask) << 8;
+
+	if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+		u16 io_base_hi, io_limit_hi;
+
+		pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+		pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+		base |= ((unsigned long) io_base_hi << 16);
+		limit |= ((unsigned long) io_limit_hi << 16);
+	}
+
+	if (base <= limit) {
+		res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+		region.start = base;
+		region.end = limit + io_granularity - 1;
+		pcibios_bus_to_resource(dev->bus, res, &region);
+		pci_info(dev, "  bridge window %pR\n", res);
+	}
+}
+
+static void pci_read_bridge_mmio(struct pci_bus *child)
+{
+	struct pci_dev *dev = child->self;
+	u16 mem_base_lo, mem_limit_lo;
+	unsigned long base, limit;
+	struct pci_bus_region region;
+	struct resource *res;
+
+	res = child->resource[1];
+	pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+	pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+	base = ((unsigned long) mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	limit = ((unsigned long) mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	if (base <= limit) {
+		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
+		region.start = base;
+		region.end = limit + 0xfffff;
+		pcibios_bus_to_resource(dev->bus, res, &region);
+		pci_info(dev, "  bridge window %pR\n", res);
+	}
+}
+
+static void pci_read_bridge_mmio_pref(struct pci_bus *child)
+{
+	struct pci_dev *dev = child->self;
+	u16 mem_base_lo, mem_limit_lo;
+	u64 base64, limit64;
+	pci_bus_addr_t base, limit;
+	struct pci_bus_region region;
+	struct resource *res;
+
+	res = child->resource[2];
+	pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+	pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+	base64 = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+	limit64 = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+	if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+		u32 mem_base_hi, mem_limit_hi;
+
+		pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+		pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+
+		/*
+		 * Some bridges set the base > limit by default, and some
+		 * (broken) BIOSes do not initialize them.  If we find
+		 * this, just assume they are not being used.
+		 */
+		if (mem_base_hi <= mem_limit_hi) {
+			base64 |= (u64) mem_base_hi << 32;
+			limit64 |= (u64) mem_limit_hi << 32;
+		}
+	}
+
+	base = (pci_bus_addr_t) base64;
+	limit = (pci_bus_addr_t) limit64;
+
+	if (base != base64) {
+		pci_err(dev, "can't handle bridge window above 4GB (bus address %#010llx)\n",
+			(unsigned long long) base64);
+		return;
+	}
+
+	if (base <= limit) {
+		res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) |
+					 IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		if (res->flags & PCI_PREF_RANGE_TYPE_64)
+			res->flags |= IORESOURCE_MEM_64;
+		region.start = base;
+		region.end = limit + 0xfffff;
+		pcibios_bus_to_resource(dev->bus, res, &region);
+		pci_info(dev, "  bridge window %pR\n", res);
+	}
+}
+
+void pci_read_bridge_bases(struct pci_bus *child)
+{
+	struct pci_dev *dev = child->self;
+	struct resource *res;
+	int i;
+
+	if (pci_is_root_bus(child))	/* It's a host bus, nothing to read */
+		return;
+
+	pci_info(dev, "PCI bridge to %pR%s\n",
+		 &child->busn_res,
+		 dev->transparent ? " (subtractive decode)" : "");
+
+	pci_bus_remove_resources(child);
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
+	pci_read_bridge_io(child);
+	pci_read_bridge_mmio(child);
+	pci_read_bridge_mmio_pref(child);
+
+	if (dev->transparent) {
+		pci_bus_for_each_resource(child->parent, res) {
+			if (res && res->flags) {
+				pci_bus_add_resource(child, res,
+						     PCI_SUBTRACTIVE_DECODE);
+				pci_info(dev, "  bridge window %pR (subtractive decode)\n",
+					   res);
+			}
+		}
+	}
+}
+
+static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
+{
+	struct pci_bus *b;
+
+	b = kzalloc(sizeof(*b), GFP_KERNEL);
+	if (!b)
+		return NULL;
+
+	INIT_LIST_HEAD(&b->node);
+	INIT_LIST_HEAD(&b->children);
+	INIT_LIST_HEAD(&b->devices);
+	INIT_LIST_HEAD(&b->slots);
+	INIT_LIST_HEAD(&b->resources);
+	b->max_bus_speed = PCI_SPEED_UNKNOWN;
+	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	if (parent)
+		b->domain_nr = parent->domain_nr;
+#endif
+	return b;
+}
+
+static void pci_release_host_bridge_dev(struct device *dev)
+{
+	struct pci_host_bridge *bridge = to_pci_host_bridge(dev);
+
+	if (bridge->release_fn)
+		bridge->release_fn(bridge);
+
+	pci_free_resource_list(&bridge->windows);
+	pci_free_resource_list(&bridge->dma_ranges);
+	kfree(bridge);
+}
+
+static void pci_init_host_bridge(struct pci_host_bridge *bridge)
+{
+	INIT_LIST_HEAD(&bridge->windows);
+	INIT_LIST_HEAD(&bridge->dma_ranges);
+
+	/*
+	 * We assume we can manage these PCIe features.  Some systems may
+	 * reserve these for use by the platform itself, e.g., an ACPI BIOS
+	 * may implement its own AER handling and use _OSC to prevent the
+	 * OS from interfering.
+	 */
+	bridge->native_aer = 1;
+	bridge->native_pcie_hotplug = 1;
+	bridge->native_shpc_hotplug = 1;
+	bridge->native_pme = 1;
+	bridge->native_ltr = 1;
+	bridge->native_dpc = 1;
+	bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET;
+	bridge->native_cxl_error = 1;
+
+	device_initialize(&bridge->dev);
+}
+
+struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
+{
+	struct pci_host_bridge *bridge;
+
+	bridge = kzalloc(sizeof(*bridge) + priv, GFP_KERNEL);
+	if (!bridge)
+		return NULL;
+
+	pci_init_host_bridge(bridge);
+	bridge->dev.release = pci_release_host_bridge_dev;
+
+	return bridge;
+}
+EXPORT_SYMBOL(pci_alloc_host_bridge);
+
+static void devm_pci_alloc_host_bridge_release(void *data)
+{
+	pci_free_host_bridge(data);
+}
+
+struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
+						   size_t priv)
+{
+	int ret;
+	struct pci_host_bridge *bridge;
+
+	bridge = pci_alloc_host_bridge(priv);
+	if (!bridge)
+		return NULL;
+
+	bridge->dev.parent = dev;
+
+	ret = devm_add_action_or_reset(dev, devm_pci_alloc_host_bridge_release,
+				       bridge);
+	if (ret)
+		return NULL;
+
+	ret = devm_of_pci_bridge_init(dev, bridge);
+	if (ret)
+		return NULL;
+
+	return bridge;
+}
+EXPORT_SYMBOL(devm_pci_alloc_host_bridge);
+
+void pci_free_host_bridge(struct pci_host_bridge *bridge)
+{
+	put_device(&bridge->dev);
+}
+EXPORT_SYMBOL(pci_free_host_bridge);
+
+/* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */
+static const unsigned char pcix_bus_speed[] = {
+	PCI_SPEED_UNKNOWN,		/* 0 */
+	PCI_SPEED_66MHz_PCIX,		/* 1 */
+	PCI_SPEED_100MHz_PCIX,		/* 2 */
+	PCI_SPEED_133MHz_PCIX,		/* 3 */
+	PCI_SPEED_UNKNOWN,		/* 4 */
+	PCI_SPEED_66MHz_PCIX_ECC,	/* 5 */
+	PCI_SPEED_100MHz_PCIX_ECC,	/* 6 */
+	PCI_SPEED_133MHz_PCIX_ECC,	/* 7 */
+	PCI_SPEED_UNKNOWN,		/* 8 */
+	PCI_SPEED_66MHz_PCIX_266,	/* 9 */
+	PCI_SPEED_100MHz_PCIX_266,	/* A */
+	PCI_SPEED_133MHz_PCIX_266,	/* B */
+	PCI_SPEED_UNKNOWN,		/* C */
+	PCI_SPEED_66MHz_PCIX_533,	/* D */
+	PCI_SPEED_100MHz_PCIX_533,	/* E */
+	PCI_SPEED_133MHz_PCIX_533	/* F */
+};
+
+/* Indexed by PCI_EXP_LNKCAP_SLS, PCI_EXP_LNKSTA_CLS */
+const unsigned char pcie_link_speed[] = {
+	PCI_SPEED_UNKNOWN,		/* 0 */
+	PCIE_SPEED_2_5GT,		/* 1 */
+	PCIE_SPEED_5_0GT,		/* 2 */
+	PCIE_SPEED_8_0GT,		/* 3 */
+	PCIE_SPEED_16_0GT,		/* 4 */
+	PCIE_SPEED_32_0GT,		/* 5 */
+	PCIE_SPEED_64_0GT,		/* 6 */
+	PCI_SPEED_UNKNOWN,		/* 7 */
+	PCI_SPEED_UNKNOWN,		/* 8 */
+	PCI_SPEED_UNKNOWN,		/* 9 */
+	PCI_SPEED_UNKNOWN,		/* A */
+	PCI_SPEED_UNKNOWN,		/* B */
+	PCI_SPEED_UNKNOWN,		/* C */
+	PCI_SPEED_UNKNOWN,		/* D */
+	PCI_SPEED_UNKNOWN,		/* E */
+	PCI_SPEED_UNKNOWN		/* F */
+};
+EXPORT_SYMBOL_GPL(pcie_link_speed);
+
+const char *pci_speed_string(enum pci_bus_speed speed)
+{
+	/* Indexed by the pci_bus_speed enum */
+	static const char *speed_strings[] = {
+	    "33 MHz PCI",		/* 0x00 */
+	    "66 MHz PCI",		/* 0x01 */
+	    "66 MHz PCI-X",		/* 0x02 */
+	    "100 MHz PCI-X",		/* 0x03 */
+	    "133 MHz PCI-X",		/* 0x04 */
+	    NULL,			/* 0x05 */
+	    NULL,			/* 0x06 */
+	    NULL,			/* 0x07 */
+	    NULL,			/* 0x08 */
+	    "66 MHz PCI-X 266",		/* 0x09 */
+	    "100 MHz PCI-X 266",	/* 0x0a */
+	    "133 MHz PCI-X 266",	/* 0x0b */
+	    "Unknown AGP",		/* 0x0c */
+	    "1x AGP",			/* 0x0d */
+	    "2x AGP",			/* 0x0e */
+	    "4x AGP",			/* 0x0f */
+	    "8x AGP",			/* 0x10 */
+	    "66 MHz PCI-X 533",		/* 0x11 */
+	    "100 MHz PCI-X 533",	/* 0x12 */
+	    "133 MHz PCI-X 533",	/* 0x13 */
+	    "2.5 GT/s PCIe",		/* 0x14 */
+	    "5.0 GT/s PCIe",		/* 0x15 */
+	    "8.0 GT/s PCIe",		/* 0x16 */
+	    "16.0 GT/s PCIe",		/* 0x17 */
+	    "32.0 GT/s PCIe",		/* 0x18 */
+	    "64.0 GT/s PCIe",		/* 0x19 */
+	};
+
+	if (speed < ARRAY_SIZE(speed_strings))
+		return speed_strings[speed];
+	return "Unknown";
+}
+EXPORT_SYMBOL_GPL(pci_speed_string);
+
+void pcie_update_link_speed(struct pci_bus *bus, u16 linksta)
+{
+	bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS];
+}
+EXPORT_SYMBOL_GPL(pcie_update_link_speed);
+
+static unsigned char agp_speeds[] = {
+	AGP_UNKNOWN,
+	AGP_1X,
+	AGP_2X,
+	AGP_4X,
+	AGP_8X
+};
+
+static enum pci_bus_speed agp_speed(int agp3, int agpstat)
+{
+	int index = 0;
+
+	if (agpstat & 4)
+		index = 3;
+	else if (agpstat & 2)
+		index = 2;
+	else if (agpstat & 1)
+		index = 1;
+	else
+		goto out;
+
+	if (agp3) {
+		index += 2;
+		if (index == 5)
+			index = 0;
+	}
+
+ out:
+	return agp_speeds[index];
+}
+
+static void pci_set_bus_speed(struct pci_bus *bus)
+{
+	struct pci_dev *bridge = bus->self;
+	int pos;
+
+	pos = pci_find_capability(bridge, PCI_CAP_ID_AGP);
+	if (!pos)
+		pos = pci_find_capability(bridge, PCI_CAP_ID_AGP3);
+	if (pos) {
+		u32 agpstat, agpcmd;
+
+		pci_read_config_dword(bridge, pos + PCI_AGP_STATUS, &agpstat);
+		bus->max_bus_speed = agp_speed(agpstat & 8, agpstat & 7);
+
+		pci_read_config_dword(bridge, pos + PCI_AGP_COMMAND, &agpcmd);
+		bus->cur_bus_speed = agp_speed(agpstat & 8, agpcmd & 7);
+	}
+
+	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+	if (pos) {
+		u16 status;
+		enum pci_bus_speed max;
+
+		pci_read_config_word(bridge, pos + PCI_X_BRIDGE_SSTATUS,
+				     &status);
+
+		if (status & PCI_X_SSTATUS_533MHZ) {
+			max = PCI_SPEED_133MHz_PCIX_533;
+		} else if (status & PCI_X_SSTATUS_266MHZ) {
+			max = PCI_SPEED_133MHz_PCIX_266;
+		} else if (status & PCI_X_SSTATUS_133MHZ) {
+			if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2)
+				max = PCI_SPEED_133MHz_PCIX_ECC;
+			else
+				max = PCI_SPEED_133MHz_PCIX;
+		} else {
+			max = PCI_SPEED_66MHz_PCIX;
+		}
+
+		bus->max_bus_speed = max;
+		bus->cur_bus_speed = pcix_bus_speed[
+			(status & PCI_X_SSTATUS_FREQ) >> 6];
+
+		return;
+	}
+
+	if (pci_is_pcie(bridge)) {
+		u32 linkcap;
+		u16 linksta;
+
+		pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
+		bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS];
+
+		pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
+		pcie_update_link_speed(bus, linksta);
+	}
+}
+
+static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
+{
+	struct irq_domain *d;
+
+	/* If the host bridge driver sets a MSI domain of the bridge, use it */
+	d = dev_get_msi_domain(bus->bridge);
+
+	/*
+	 * Any firmware interface that can resolve the msi_domain
+	 * should be called from here.
+	 */
+	if (!d)
+		d = pci_host_bridge_of_msi_domain(bus);
+	if (!d)
+		d = pci_host_bridge_acpi_msi_domain(bus);
+
+	/*
+	 * If no IRQ domain was found via the OF tree, try looking it up
+	 * directly through the fwnode_handle.
+	 */
+	if (!d) {
+		struct fwnode_handle *fwnode = pci_root_bus_fwnode(bus);
+
+		if (fwnode)
+			d = irq_find_matching_fwnode(fwnode,
+						     DOMAIN_BUS_PCI_MSI);
+	}
+
+	return d;
+}
+
+static void pci_set_bus_msi_domain(struct pci_bus *bus)
+{
+	struct irq_domain *d;
+	struct pci_bus *b;
+
+	/*
+	 * The bus can be a root bus, a subordinate bus, or a virtual bus
+	 * created by an SR-IOV device.  Walk up to the first bridge device
+	 * found or derive the domain from the host bridge.
+	 */
+	for (b = bus, d = NULL; !d && !pci_is_root_bus(b); b = b->parent) {
+		if (b->self)
+			d = dev_get_msi_domain(&b->self->dev);
+	}
+
+	if (!d)
+		d = pci_host_bridge_msi_domain(b);
+
+	dev_set_msi_domain(&bus->dev, d);
+}
+
+static int pci_register_host_bridge(struct pci_host_bridge *bridge)
+{
+	struct device *parent = bridge->dev.parent;
+	struct resource_entry *window, *next, *n;
+	struct pci_bus *bus, *b;
+	resource_size_t offset, next_offset;
+	LIST_HEAD(resources);
+	struct resource *res, *next_res;
+	char addr[64], *fmt;
+	const char *name;
+	int err;
+
+	bus = pci_alloc_bus(NULL);
+	if (!bus)
+		return -ENOMEM;
+
+	bridge->bus = bus;
+
+	bus->sysdata = bridge->sysdata;
+	bus->ops = bridge->ops;
+	bus->number = bus->busn_res.start = bridge->busnr;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
+	else
+		bus->domain_nr = bridge->domain_nr;
+	if (bus->domain_nr < 0) {
+		err = bus->domain_nr;
+		goto free;
+	}
+#endif
+
+	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
+	if (b) {
+		/* Ignore it if we already got here via a different bridge */
+		dev_dbg(&b->dev, "bus already known\n");
+		err = -EEXIST;
+		goto free;
+	}
+
+	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus),
+		     bridge->busnr);
+
+	err = pcibios_root_bridge_prepare(bridge);
+	if (err)
+		goto free;
+
+	/* Temporarily move resources off the list */
+	list_splice_init(&bridge->windows, &resources);
+	err = device_add(&bridge->dev);
+	if (err) {
+		put_device(&bridge->dev);
+		goto free;
+	}
+	bus->bridge = get_device(&bridge->dev);
+	device_enable_async_suspend(bus->bridge);
+	pci_set_bus_of_node(bus);
+	pci_set_bus_msi_domain(bus);
+	if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev) &&
+	    !pci_host_of_has_msi_map(parent))
+		bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+
+	if (!parent)
+		set_dev_node(bus->bridge, pcibus_to_node(bus));
+
+	bus->dev.class = &pcibus_class;
+	bus->dev.parent = bus->bridge;
+
+	dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
+	name = dev_name(&bus->dev);
+
+	err = device_register(&bus->dev);
+	if (err)
+		goto unregister;
+
+	pcibios_add_bus(bus);
+
+	if (bus->ops->add_bus) {
+		err = bus->ops->add_bus(bus);
+		if (WARN_ON(err < 0))
+			dev_err(&bus->dev, "failed to add bus: %d\n", err);
+	}
+
+	/* Create legacy_io and legacy_mem files for this bus */
+	pci_create_legacy_files(bus);
+
+	if (parent)
+		dev_info(parent, "PCI host bridge to bus %s\n", name);
+	else
+		pr_info("PCI host bridge to bus %s\n", name);
+
+	if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
+		dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
+
+	/* Coalesce contiguous windows */
+	resource_list_for_each_entry_safe(window, n, &resources) {
+		if (list_is_last(&window->node, &resources))
+			break;
+
+		next = list_next_entry(window, node);
+		offset = window->offset;
+		res = window->res;
+		next_offset = next->offset;
+		next_res = next->res;
+
+		if (res->flags != next_res->flags || offset != next_offset)
+			continue;
+
+		if (res->end + 1 == next_res->start) {
+			next_res->start = res->start;
+			res->flags = res->start = res->end = 0;
+		}
+	}
+
+	/* Add initial resources to the bus */
+	resource_list_for_each_entry_safe(window, n, &resources) {
+		offset = window->offset;
+		res = window->res;
+		if (!res->flags && !res->start && !res->end) {
+			release_resource(res);
+			resource_list_destroy_entry(window);
+			continue;
+		}
+
+		list_move_tail(&window->node, &bridge->windows);
+
+		if (res->flags & IORESOURCE_BUS)
+			pci_bus_insert_busn_res(bus, bus->number, res->end);
+		else
+			pci_bus_add_resource(bus, res, 0);
+
+		if (offset) {
+			if (resource_type(res) == IORESOURCE_IO)
+				fmt = " (bus address [%#06llx-%#06llx])";
+			else
+				fmt = " (bus address [%#010llx-%#010llx])";
+
+			snprintf(addr, sizeof(addr), fmt,
+				 (unsigned long long)(res->start - offset),
+				 (unsigned long long)(res->end - offset));
+		} else
+			addr[0] = '\0';
+
+		dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
+	}
+
+	down_write(&pci_bus_sem);
+	list_add_tail(&bus->node, &pci_root_buses);
+	up_write(&pci_bus_sem);
+
+	return 0;
+
+unregister:
+	put_device(&bridge->dev);
+	device_del(&bridge->dev);
+
+free:
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	pci_bus_release_domain_nr(bus, parent);
+#endif
+	kfree(bus);
+	return err;
+}
+
+static bool pci_bridge_child_ext_cfg_accessible(struct pci_dev *bridge)
+{
+	int pos;
+	u32 status;
+
+	/*
+	 * If extended config space isn't accessible on a bridge's primary
+	 * bus, we certainly can't access it on the secondary bus.
+	 */
+	if (bridge->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
+		return false;
+
+	/*
+	 * PCIe Root Ports and switch ports are PCIe on both sides, so if
+	 * extended config space is accessible on the primary, it's also
+	 * accessible on the secondary.
+	 */
+	if (pci_is_pcie(bridge) &&
+	    (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT ||
+	     pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM ||
+	     pci_pcie_type(bridge) == PCI_EXP_TYPE_DOWNSTREAM))
+		return true;
+
+	/*
+	 * For the other bridge types:
+	 *   - PCI-to-PCI bridges
+	 *   - PCIe-to-PCI/PCI-X forward bridges
+	 *   - PCI/PCI-X-to-PCIe reverse bridges
+	 * extended config space on the secondary side is only accessible
+	 * if the bridge supports PCI-X Mode 2.
+	 */
+	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+	if (!pos)
+		return false;
+
+	pci_read_config_dword(bridge, pos + PCI_X_STATUS, &status);
+	return status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ);
+}
+
+static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
+					   struct pci_dev *bridge, int busnr)
+{
+	struct pci_bus *child;
+	struct pci_host_bridge *host;
+	int i;
+	int ret;
+
+	/* Allocate a new bus and inherit stuff from the parent */
+	child = pci_alloc_bus(parent);
+	if (!child)
+		return NULL;
+
+	child->parent = parent;
+	child->sysdata = parent->sysdata;
+	child->bus_flags = parent->bus_flags;
+
+	host = pci_find_host_bridge(parent);
+	if (host->child_ops)
+		child->ops = host->child_ops;
+	else
+		child->ops = parent->ops;
+
+	/*
+	 * Initialize some portions of the bus device, but don't register
+	 * it now as the parent is not properly set up yet.
+	 */
+	child->dev.class = &pcibus_class;
+	dev_set_name(&child->dev, "%04x:%02x", pci_domain_nr(child), busnr);
+
+	/* Set up the primary, secondary and subordinate bus numbers */
+	child->number = child->busn_res.start = busnr;
+	child->primary = parent->busn_res.start;
+	child->busn_res.end = 0xff;
+
+	if (!bridge) {
+		child->dev.parent = parent->bridge;
+		goto add_dev;
+	}
+
+	child->self = bridge;
+	child->bridge = get_device(&bridge->dev);
+	child->dev.parent = child->bridge;
+	pci_set_bus_of_node(child);
+	pci_set_bus_speed(child);
+
+	/*
+	 * Check whether extended config space is accessible on the child
+	 * bus.  Note that we currently assume it is always accessible on
+	 * the root bus.
+	 */
+	if (!pci_bridge_child_ext_cfg_accessible(bridge)) {
+		child->bus_flags |= PCI_BUS_FLAGS_NO_EXTCFG;
+		pci_info(child, "extended config space not accessible\n");
+	}
+
+	/* Set up default resource pointers and names */
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+		child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i];
+		child->resource[i]->name = child->name;
+	}
+	bridge->subordinate = child;
+
+add_dev:
+	pci_set_bus_msi_domain(child);
+	ret = device_register(&child->dev);
+	WARN_ON(ret < 0);
+
+	pcibios_add_bus(child);
+
+	if (child->ops->add_bus) {
+		ret = child->ops->add_bus(child);
+		if (WARN_ON(ret < 0))
+			dev_err(&child->dev, "failed to add bus: %d\n", ret);
+	}
+
+	/* Create legacy_io and legacy_mem files for this bus */
+	pci_create_legacy_files(child);
+
+	return child;
+}
+
+struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
+				int busnr)
+{
+	struct pci_bus *child;
+
+	child = pci_alloc_child_bus(parent, dev, busnr);
+	if (child) {
+		down_write(&pci_bus_sem);
+		list_add_tail(&child->node, &parent->children);
+		up_write(&pci_bus_sem);
+	}
+	return child;
+}
+EXPORT_SYMBOL(pci_add_new_bus);
+
+static void pci_enable_crs(struct pci_dev *pdev)
+{
+	u16 root_cap = 0;
+
+	/* Enable CRS Software Visibility if supported */
+	pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap);
+	if (root_cap & PCI_EXP_RTCAP_CRSVIS)
+		pcie_capability_set_word(pdev, PCI_EXP_RTCTL,
+					 PCI_EXP_RTCTL_CRSSVE);
+}
+
+static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
+					      unsigned int available_buses);
+/**
+ * pci_ea_fixed_busnrs() - Read fixed Secondary and Subordinate bus
+ * numbers from EA capability.
+ * @dev: Bridge
+ * @sec: updated with secondary bus number from EA
+ * @sub: updated with subordinate bus number from EA
+ *
+ * If @dev is a bridge with EA capability that specifies valid secondary
+ * and subordinate bus numbers, return true with the bus numbers in @sec
+ * and @sub.  Otherwise return false.
+ */
+static bool pci_ea_fixed_busnrs(struct pci_dev *dev, u8 *sec, u8 *sub)
+{
+	int ea, offset;
+	u32 dw;
+	u8 ea_sec, ea_sub;
+
+	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+		return false;
+
+	/* find PCI EA capability in list */
+	ea = pci_find_capability(dev, PCI_CAP_ID_EA);
+	if (!ea)
+		return false;
+
+	offset = ea + PCI_EA_FIRST_ENT;
+	pci_read_config_dword(dev, offset, &dw);
+	ea_sec =  dw & PCI_EA_SEC_BUS_MASK;
+	ea_sub = (dw & PCI_EA_SUB_BUS_MASK) >> PCI_EA_SUB_BUS_SHIFT;
+	if (ea_sec  == 0 || ea_sub < ea_sec)
+		return false;
+
+	*sec = ea_sec;
+	*sub = ea_sub;
+	return true;
+}
+
+/*
+ * pci_scan_bridge_extend() - Scan buses behind a bridge
+ * @bus: Parent bus the bridge is on
+ * @dev: Bridge itself
+ * @max: Starting subordinate number of buses behind this bridge
+ * @available_buses: Total number of buses available for this bridge and
+ *		     the devices below. After the minimal bus space has
+ *		     been allocated the remaining buses will be
+ *		     distributed equally between hotplug-capable bridges.
+ * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
+ *        that need to be reconfigured.
+ *
+ * If it's a bridge, configure it and scan the bus behind it.
+ * For CardBus bridges, we don't scan behind as the devices will
+ * be handled by the bridge driver itself.
+ *
+ * We need to process bridges in two passes -- first we scan those
+ * already configured by the BIOS and after we are done with all of
+ * them, we proceed to assigning numbers to the remaining buses in
+ * order to avoid overlaps between old and new bus numbers.
+ *
+ * Return: New subordinate number covering all buses behind this bridge.
+ */
+static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
+				  int max, unsigned int available_buses,
+				  int pass)
+{
+	struct pci_bus *child;
+	int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
+	u32 buses, i, j = 0;
+	u16 bctl;
+	u8 primary, secondary, subordinate;
+	int broken = 0;
+	bool fixed_buses;
+	u8 fixed_sec, fixed_sub;
+	int next_busnr;
+
+	/*
+	 * Make sure the bridge is powered on to be able to access config
+	 * space of devices below it.
+	 */
+	pm_runtime_get_sync(&dev->dev);
+
+	pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
+	primary = buses & 0xFF;
+	secondary = (buses >> 8) & 0xFF;
+	subordinate = (buses >> 16) & 0xFF;
+
+	pci_dbg(dev, "scanning [bus %02x-%02x] behind bridge, pass %d\n",
+		secondary, subordinate, pass);
+
+	if (!primary && (primary != bus->number) && secondary && subordinate) {
+		pci_warn(dev, "Primary bus is hard wired to 0\n");
+		primary = bus->number;
+	}
+
+	/* Check if setup is sensible at all */
+	if (!pass &&
+	    (primary != bus->number || secondary <= bus->number ||
+	     secondary > subordinate)) {
+		pci_info(dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n",
+			 secondary, subordinate);
+		broken = 1;
+	}
+
+	/*
+	 * Disable Master-Abort Mode during probing to avoid reporting of
+	 * bus errors in some architectures.
+	 */
+	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
+			      bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
+
+	pci_enable_crs(dev);
+
+	if ((secondary || subordinate) && !pcibios_assign_all_busses() &&
+	    !is_cardbus && !broken) {
+		unsigned int cmax, buses;
+
+		/*
+		 * Bus already configured by firmware, process it in the
+		 * first pass and just note the configuration.
+		 */
+		if (pass)
+			goto out;
+
+		/*
+		 * The bus might already exist for two reasons: Either we
+		 * are rescanning the bus or the bus is reachable through
+		 * more than one bridge. The second case can happen with
+		 * the i450NX chipset.
+		 */
+		child = pci_find_bus(pci_domain_nr(bus), secondary);
+		if (!child) {
+			child = pci_add_new_bus(bus, dev, secondary);
+			if (!child)
+				goto out;
+			child->primary = primary;
+			pci_bus_insert_busn_res(child, secondary, subordinate);
+			child->bridge_ctl = bctl;
+		}
+
+		buses = subordinate - secondary;
+		cmax = pci_scan_child_bus_extend(child, buses);
+		if (cmax > subordinate)
+			pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
+				 subordinate, cmax);
+
+		/* Subordinate should equal child->busn_res.end */
+		if (subordinate > max)
+			max = subordinate;
+	} else {
+
+		/*
+		 * We need to assign a number to this bus which we always
+		 * do in the second pass.
+		 */
+		if (!pass) {
+			if (pcibios_assign_all_busses() || broken || is_cardbus)
+
+				/*
+				 * Temporarily disable forwarding of the
+				 * configuration cycles on all bridges in
+				 * this bus segment to avoid possible
+				 * conflicts in the second pass between two
+				 * bridges programmed with overlapping bus
+				 * ranges.
+				 */
+				pci_write_config_dword(dev, PCI_PRIMARY_BUS,
+						       buses & ~0xffffff);
+			goto out;
+		}
+
+		/* Clear errors */
+		pci_write_config_word(dev, PCI_STATUS, 0xffff);
+
+		/* Read bus numbers from EA Capability (if present) */
+		fixed_buses = pci_ea_fixed_busnrs(dev, &fixed_sec, &fixed_sub);
+		if (fixed_buses)
+			next_busnr = fixed_sec;
+		else
+			next_busnr = max + 1;
+
+		/*
+		 * Prevent assigning a bus number that already exists.
+		 * This can happen when a bridge is hot-plugged, so in this
+		 * case we only re-scan this bus.
+		 */
+		child = pci_find_bus(pci_domain_nr(bus), next_busnr);
+		if (!child) {
+			child = pci_add_new_bus(bus, dev, next_busnr);
+			if (!child)
+				goto out;
+			pci_bus_insert_busn_res(child, next_busnr,
+						bus->busn_res.end);
+		}
+		max++;
+		if (available_buses)
+			available_buses--;
+
+		buses = (buses & 0xff000000)
+		      | ((unsigned int)(child->primary)     <<  0)
+		      | ((unsigned int)(child->busn_res.start)   <<  8)
+		      | ((unsigned int)(child->busn_res.end) << 16);
+
+		/*
+		 * yenta.c forces a secondary latency timer of 176.
+		 * Copy that behaviour here.
+		 */
+		if (is_cardbus) {
+			buses &= ~0xff000000;
+			buses |= CARDBUS_LATENCY_TIMER << 24;
+		}
+
+		/* We need to blast all three values with a single write */
+		pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
+
+		if (!is_cardbus) {
+			child->bridge_ctl = bctl;
+			max = pci_scan_child_bus_extend(child, available_buses);
+		} else {
+
+			/*
+			 * For CardBus bridges, we leave 4 bus numbers as
+			 * cards with a PCI-to-PCI bridge can be inserted
+			 * later.
+			 */
+			for (i = 0; i < CARDBUS_RESERVE_BUSNR; i++) {
+				struct pci_bus *parent = bus;
+				if (pci_find_bus(pci_domain_nr(bus),
+							max+i+1))
+					break;
+				while (parent->parent) {
+					if ((!pcibios_assign_all_busses()) &&
+					    (parent->busn_res.end > max) &&
+					    (parent->busn_res.end <= max+i)) {
+						j = 1;
+					}
+					parent = parent->parent;
+				}
+				if (j) {
+
+					/*
+					 * Often, there are two CardBus
+					 * bridges -- try to leave one
+					 * valid bus number for each one.
+					 */
+					i /= 2;
+					break;
+				}
+			}
+			max += i;
+		}
+
+		/*
+		 * Set subordinate bus number to its real value.
+		 * If fixed subordinate bus number exists from EA
+		 * capability then use it.
+		 */
+		if (fixed_buses)
+			max = fixed_sub;
+		pci_bus_update_busn_res_end(child, max);
+		pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
+	}
+
+	sprintf(child->name,
+		(is_cardbus ? "PCI CardBus %04x:%02x" : "PCI Bus %04x:%02x"),
+		pci_domain_nr(bus), child->number);
+
+	/* Check that all devices are accessible */
+	while (bus->parent) {
+		if ((child->busn_res.end > bus->busn_res.end) ||
+		    (child->number > bus->busn_res.end) ||
+		    (child->number < bus->number) ||
+		    (child->busn_res.end < bus->number)) {
+			dev_info(&dev->dev, "devices behind bridge are unusable because %pR cannot be assigned for them\n",
+				 &child->busn_res);
+			break;
+		}
+		bus = bus->parent;
+	}
+
+out:
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
+
+	pm_runtime_put(&dev->dev);
+
+	return max;
+}
+
+/*
+ * pci_scan_bridge() - Scan buses behind a bridge
+ * @bus: Parent bus the bridge is on
+ * @dev: Bridge itself
+ * @max: Starting subordinate number of buses behind this bridge
+ * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
+ *        that need to be reconfigured.
+ *
+ * If it's a bridge, configure it and scan the bus behind it.
+ * For CardBus bridges, we don't scan behind as the devices will
+ * be handled by the bridge driver itself.
+ *
+ * We need to process bridges in two passes -- first we scan those
+ * already configured by the BIOS and after we are done with all of
+ * them, we proceed to assigning numbers to the remaining buses in
+ * order to avoid overlaps between old and new bus numbers.
+ *
+ * Return: New subordinate number covering all buses behind this bridge.
+ */
+int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
+{
+	return pci_scan_bridge_extend(bus, dev, max, 0, pass);
+}
+EXPORT_SYMBOL(pci_scan_bridge);
+
+/*
+ * Read interrupt line and base address registers.
+ * The architecture-dependent code can tweak these, of course.
+ */
+static void pci_read_irq(struct pci_dev *dev)
+{
+	unsigned char irq;
+
+	/* VFs are not allowed to use INTx, so skip the config reads */
+	if (dev->is_virtfn) {
+		dev->pin = 0;
+		dev->irq = 0;
+		return;
+	}
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
+	dev->pin = irq;
+	if (irq)
+		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	dev->irq = irq;
+}
+
+void set_pcie_port_type(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+	u32 reg32;
+	int type;
+	struct pci_dev *parent;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	pdev->pcie_cap = pos;
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	pdev->pcie_flags_reg = reg16;
+	pci_read_config_dword(pdev, pos + PCI_EXP_DEVCAP, &pdev->devcap);
+	pdev->pcie_mpss = FIELD_GET(PCI_EXP_DEVCAP_PAYLOAD, pdev->devcap);
+
+	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &reg32);
+	if (reg32 & PCI_EXP_LNKCAP_DLLLARC)
+		pdev->link_active_reporting = 1;
+
+	parent = pci_upstream_bridge(pdev);
+	if (!parent)
+		return;
+
+	/*
+	 * Some systems do not identify their upstream/downstream ports
+	 * correctly so detect impossible configurations here and correct
+	 * the port type accordingly.
+	 */
+	type = pci_pcie_type(pdev);
+	if (type == PCI_EXP_TYPE_DOWNSTREAM) {
+		/*
+		 * If pdev claims to be downstream port but the parent
+		 * device is also downstream port assume pdev is actually
+		 * upstream port.
+		 */
+		if (pcie_downstream_port(parent)) {
+			pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
+			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+			pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
+		}
+	} else if (type == PCI_EXP_TYPE_UPSTREAM) {
+		/*
+		 * If pdev claims to be upstream port but the parent
+		 * device is also upstream port assume pdev is actually
+		 * downstream port.
+		 */
+		if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
+			pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
+			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+			pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
+		}
+	}
+}
+
+void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+{
+	u32 reg32;
+
+	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &reg32);
+	if (reg32 & PCI_EXP_SLTCAP_HPC)
+		pdev->is_hotplug_bridge = 1;
+}
+
+static void set_pcie_thunderbolt(struct pci_dev *dev)
+{
+	u16 vsec;
+
+	/* Is the device part of a Thunderbolt controller? */
+	vsec = pci_find_vsec_capability(dev, PCI_VENDOR_ID_INTEL, PCI_VSEC_ID_INTEL_TBT);
+	if (vsec)
+		dev->is_thunderbolt = 1;
+}
+
+static void set_pcie_untrusted(struct pci_dev *dev)
+{
+	struct pci_dev *parent;
+
+	/*
+	 * If the upstream bridge is untrusted we treat this device
+	 * untrusted as well.
+	 */
+	parent = pci_upstream_bridge(dev);
+	if (parent && (parent->untrusted || parent->external_facing))
+		dev->untrusted = true;
+}
+
+static void pci_set_removable(struct pci_dev *dev)
+{
+	struct pci_dev *parent = pci_upstream_bridge(dev);
+
+	/*
+	 * We (only) consider everything downstream from an external_facing
+	 * device to be removable by the user. We're mainly concerned with
+	 * consumer platforms with user accessible thunderbolt ports that are
+	 * vulnerable to DMA attacks, and we expect those ports to be marked by
+	 * the firmware as external_facing. Devices in traditional hotplug
+	 * slots can technically be removed, but the expectation is that unless
+	 * the port is marked with external_facing, such devices are less
+	 * accessible to user / may not be removed by end user, and thus not
+	 * exposed as "removable" to userspace.
+	 */
+	if (parent &&
+	    (parent->external_facing || dev_is_removable(&parent->dev)))
+		dev_set_removable(&dev->dev, DEVICE_REMOVABLE);
+}
+
+/**
+ * pci_ext_cfg_is_aliased - Is ext config space just an alias of std config?
+ * @dev: PCI device
+ *
+ * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says that
+ * when forwarding a type1 configuration request the bridge must check that
+ * the extended register address field is zero.  The bridge is not permitted
+ * to forward the transactions and must handle it as an Unsupported Request.
+ * Some bridges do not follow this rule and simply drop the extended register
+ * bits, resulting in the standard config space being aliased, every 256
+ * bytes across the entire configuration space.  Test for this condition by
+ * comparing the first dword of each potential alias to the vendor/device ID.
+ * Known offenders:
+ *   ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03)
+ *   AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40)
+ */
+static bool pci_ext_cfg_is_aliased(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_QUIRKS
+	int pos, ret;
+	u32 header, tmp;
+
+	pci_read_config_dword(dev, PCI_VENDOR_ID, &header);
+
+	for (pos = PCI_CFG_SPACE_SIZE;
+	     pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE) {
+		ret = pci_read_config_dword(dev, pos, &tmp);
+		if ((ret != PCIBIOS_SUCCESSFUL) || (header != tmp))
+			return false;
+	}
+
+	return true;
+#else
+	return false;
+#endif
+}
+
+/**
+ * pci_cfg_space_size_ext - Get the configuration space size of the PCI device
+ * @dev: PCI device
+ *
+ * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices
+ * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
+ * access it.  Maybe we don't have a way to generate extended config space
+ * accesses, or the device is behind a reverse Express bridge.  So we try
+ * reading the dword at 0x100 which must either be 0 or a valid extended
+ * capability header.
+ */
+static int pci_cfg_space_size_ext(struct pci_dev *dev)
+{
+	u32 status;
+	int pos = PCI_CFG_SPACE_SIZE;
+
+	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
+		return PCI_CFG_SPACE_SIZE;
+	if (PCI_POSSIBLE_ERROR(status) || pci_ext_cfg_is_aliased(dev))
+		return PCI_CFG_SPACE_SIZE;
+
+	return PCI_CFG_SPACE_EXP_SIZE;
+}
+
+int pci_cfg_space_size(struct pci_dev *dev)
+{
+	int pos;
+	u32 status;
+	u16 class;
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * Per the SR-IOV specification (rev 1.1, sec 3.5), VFs are required to
+	 * implement a PCIe capability and therefore must implement extended
+	 * config space.  We can skip the NO_EXTCFG test below and the
+	 * reachability/aliasing test in pci_cfg_space_size_ext() by virtue of
+	 * the fact that the SR-IOV capability on the PF resides in extended
+	 * config space and must be accessible and non-aliased to have enabled
+	 * support for this VF.  This is a micro performance optimization for
+	 * systems supporting many VFs.
+	 */
+	if (dev->is_virtfn)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+
+	if (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
+		return PCI_CFG_SPACE_SIZE;
+
+	class = dev->class >> 8;
+	if (class == PCI_CLASS_BRIDGE_HOST)
+		return pci_cfg_space_size_ext(dev);
+
+	if (pci_is_pcie(dev))
+		return pci_cfg_space_size_ext(dev);
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!pos)
+		return PCI_CFG_SPACE_SIZE;
+
+	pci_read_config_dword(dev, pos + PCI_X_STATUS, &status);
+	if (status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ))
+		return pci_cfg_space_size_ext(dev);
+
+	return PCI_CFG_SPACE_SIZE;
+}
+
+static u32 pci_class(struct pci_dev *dev)
+{
+	u32 class;
+
+#ifdef CONFIG_PCI_IOV
+	if (dev->is_virtfn)
+		return dev->physfn->sriov->class;
+#endif
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+	return class;
+}
+
+static void pci_subsystem_ids(struct pci_dev *dev, u16 *vendor, u16 *device)
+{
+#ifdef CONFIG_PCI_IOV
+	if (dev->is_virtfn) {
+		*vendor = dev->physfn->sriov->subsystem_vendor;
+		*device = dev->physfn->sriov->subsystem_device;
+		return;
+	}
+#endif
+	pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, vendor);
+	pci_read_config_word(dev, PCI_SUBSYSTEM_ID, device);
+}
+
+static u8 pci_hdr_type(struct pci_dev *dev)
+{
+	u8 hdr_type;
+
+#ifdef CONFIG_PCI_IOV
+	if (dev->is_virtfn)
+		return dev->physfn->sriov->hdr_type;
+#endif
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
+	return hdr_type;
+}
+
+#define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
+
+/**
+ * pci_intx_mask_broken - Test PCI_COMMAND_INTX_DISABLE writability
+ * @dev: PCI device
+ *
+ * Test whether PCI_COMMAND_INTX_DISABLE is writable for @dev.  Check this
+ * at enumeration-time to avoid modifying PCI_COMMAND at run-time.
+ */
+static int pci_intx_mask_broken(struct pci_dev *dev)
+{
+	u16 orig, toggle, new;
+
+	pci_read_config_word(dev, PCI_COMMAND, &orig);
+	toggle = orig ^ PCI_COMMAND_INTX_DISABLE;
+	pci_write_config_word(dev, PCI_COMMAND, toggle);
+	pci_read_config_word(dev, PCI_COMMAND, &new);
+
+	pci_write_config_word(dev, PCI_COMMAND, orig);
+
+	/*
+	 * PCI_COMMAND_INTX_DISABLE was reserved and read-only prior to PCI
+	 * r2.3, so strictly speaking, a device is not *broken* if it's not
+	 * writable.  But we'll live with the misnomer for now.
+	 */
+	if (new != toggle)
+		return 1;
+	return 0;
+}
+
+static void early_dump_pci_device(struct pci_dev *pdev)
+{
+	u32 value[256 / 4];
+	int i;
+
+	pci_info(pdev, "config space:\n");
+
+	for (i = 0; i < 256; i += 4)
+		pci_read_config_dword(pdev, i, &value[i / 4]);
+
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
+		       value, 256, false);
+}
+
+/**
+ * pci_setup_device - Fill in class and map information of a device
+ * @dev: the device structure to fill
+ *
+ * Initialize the device structure with information about the device's
+ * vendor,class,memory and IO-space addresses, IRQ lines etc.
+ * Called at initialisation of the PCI subsystem and by CardBus services.
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
+ */
+int pci_setup_device(struct pci_dev *dev)
+{
+	u32 class;
+	u16 cmd;
+	u8 hdr_type;
+	int err, pos = 0;
+	struct pci_bus_region region;
+	struct resource *res;
+
+	hdr_type = pci_hdr_type(dev);
+
+	dev->sysdata = dev->bus->sysdata;
+	dev->dev.parent = dev->bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->hdr_type = hdr_type & 0x7f;
+	dev->multifunction = !!(hdr_type & 0x80);
+	dev->error_state = pci_channel_io_normal;
+	set_pcie_port_type(dev);
+
+	err = pci_set_of_node(dev);
+	if (err)
+		return err;
+	pci_set_acpi_fwnode(dev);
+
+	pci_dev_assign_slot(dev);
+
+	/*
+	 * Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+	 * set this higher, assuming the system even supports it.
+	 */
+	dev->dma_mask = 0xffffffff;
+
+	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
+		     dev->bus->number, PCI_SLOT(dev->devfn),
+		     PCI_FUNC(dev->devfn));
+
+	class = pci_class(dev);
+
+	dev->revision = class & 0xff;
+	dev->class = class >> 8;		    /* upper 3 bytes */
+
+	if (pci_early_dump)
+		early_dump_pci_device(dev);
+
+	/* Need to have dev->class ready */
+	dev->cfg_size = pci_cfg_space_size(dev);
+
+	/* Need to have dev->cfg_size ready */
+	set_pcie_thunderbolt(dev);
+
+	set_pcie_untrusted(dev);
+
+	/* "Unknown power state" */
+	dev->current_state = PCI_UNKNOWN;
+
+	/* Early fixups, before probing the BARs */
+	pci_fixup_device(pci_fixup_early, dev);
+
+	pci_set_removable(dev);
+
+	pci_info(dev, "[%04x:%04x] type %02x class %#08x\n",
+		 dev->vendor, dev->device, dev->hdr_type, dev->class);
+
+	/* Device class may be changed after fixup */
+	class = dev->class >> 8;
+
+	if (dev->non_compliant_bars && !dev->mmio_always_on) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+			pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
+			cmd &= ~PCI_COMMAND_IO;
+			cmd &= ~PCI_COMMAND_MEMORY;
+			pci_write_config_word(dev, PCI_COMMAND, cmd);
+		}
+	}
+
+	dev->broken_intx_masking = pci_intx_mask_broken(dev);
+
+	switch (dev->hdr_type) {		    /* header type */
+	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
+		if (class == PCI_CLASS_BRIDGE_PCI)
+			goto bad;
+		pci_read_irq(dev);
+		pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
+
+		pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
+
+		/*
+		 * Do the ugly legacy mode stuff here rather than broken chip
+		 * quirk code. Legacy mode ATA controllers have fixed
+		 * addresses. These are not always echoed in BAR0-3, and
+		 * BAR0-3 in a few cases contain junk!
+		 */
+		if (class == PCI_CLASS_STORAGE_IDE) {
+			u8 progif;
+			pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
+			if ((progif & 1) == 0) {
+				region.start = 0x1F0;
+				region.end = 0x1F7;
+				res = &dev->resource[0];
+				res->flags = LEGACY_IO_RESOURCE;
+				pcibios_bus_to_resource(dev->bus, res, &region);
+				pci_info(dev, "legacy IDE quirk: reg 0x10: %pR\n",
+					 res);
+				region.start = 0x3F6;
+				region.end = 0x3F6;
+				res = &dev->resource[1];
+				res->flags = LEGACY_IO_RESOURCE;
+				pcibios_bus_to_resource(dev->bus, res, &region);
+				pci_info(dev, "legacy IDE quirk: reg 0x14: %pR\n",
+					 res);
+			}
+			if ((progif & 4) == 0) {
+				region.start = 0x170;
+				region.end = 0x177;
+				res = &dev->resource[2];
+				res->flags = LEGACY_IO_RESOURCE;
+				pcibios_bus_to_resource(dev->bus, res, &region);
+				pci_info(dev, "legacy IDE quirk: reg 0x18: %pR\n",
+					 res);
+				region.start = 0x376;
+				region.end = 0x376;
+				res = &dev->resource[3];
+				res->flags = LEGACY_IO_RESOURCE;
+				pcibios_bus_to_resource(dev->bus, res, &region);
+				pci_info(dev, "legacy IDE quirk: reg 0x1c: %pR\n",
+					 res);
+			}
+		}
+		break;
+
+	case PCI_HEADER_TYPE_BRIDGE:		    /* bridge header */
+		/*
+		 * The PCI-to-PCI bridge spec requires that subtractive
+		 * decoding (i.e. transparent) bridge must have programming
+		 * interface code of 0x01.
+		 */
+		pci_read_irq(dev);
+		dev->transparent = ((dev->class & 0xff) == 1);
+		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+		pci_read_bridge_windows(dev);
+		set_pcie_hotplug_bridge(dev);
+		pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
+		if (pos) {
+			pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
+			pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
+		}
+		break;
+
+	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
+		if (class != PCI_CLASS_BRIDGE_CARDBUS)
+			goto bad;
+		pci_read_irq(dev);
+		pci_read_bases(dev, 1, 0);
+		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
+		break;
+
+	default:				    /* unknown header */
+		pci_err(dev, "unknown header type %02x, ignoring device\n",
+			dev->hdr_type);
+		pci_release_of_node(dev);
+		return -EIO;
+
+	bad:
+		pci_err(dev, "ignoring class %#08x (doesn't match header type %02x)\n",
+			dev->class, dev->hdr_type);
+		dev->class = PCI_CLASS_NOT_DEFINED << 8;
+	}
+
+	/* We found a fine healthy device, go go go... */
+	return 0;
+}
+
+static void pci_configure_mps(struct pci_dev *dev)
+{
+	struct pci_dev *bridge = pci_upstream_bridge(dev);
+	int mps, mpss, p_mps, rc;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */
+	if (dev->is_virtfn)
+		return;
+
+	/*
+	 * For Root Complex Integrated Endpoints, program the maximum
+	 * supported value unless limited by the PCIE_BUS_PEER2PEER case.
+	 */
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+		if (pcie_bus_config == PCIE_BUS_PEER2PEER)
+			mps = 128;
+		else
+			mps = 128 << dev->pcie_mpss;
+		rc = pcie_set_mps(dev, mps);
+		if (rc) {
+			pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
+				 mps);
+		}
+		return;
+	}
+
+	if (!bridge || !pci_is_pcie(bridge))
+		return;
+
+	mps = pcie_get_mps(dev);
+	p_mps = pcie_get_mps(bridge);
+
+	if (mps == p_mps)
+		return;
+
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF) {
+		pci_warn(dev, "Max Payload Size %d, but upstream %s set to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
+			 mps, pci_name(bridge), p_mps);
+		return;
+	}
+
+	/*
+	 * Fancier MPS configuration is done later by
+	 * pcie_bus_configure_settings()
+	 */
+	if (pcie_bus_config != PCIE_BUS_DEFAULT)
+		return;
+
+	mpss = 128 << dev->pcie_mpss;
+	if (mpss < p_mps && pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
+		pcie_set_mps(bridge, mpss);
+		pci_info(dev, "Upstream bridge's Max Payload Size set to %d (was %d, max %d)\n",
+			 mpss, p_mps, 128 << bridge->pcie_mpss);
+		p_mps = pcie_get_mps(bridge);
+	}
+
+	rc = pcie_set_mps(dev, p_mps);
+	if (rc) {
+		pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
+			 p_mps);
+		return;
+	}
+
+	pci_info(dev, "Max Payload Size set to %d (was %d, max %d)\n",
+		 p_mps, mps, mpss);
+}
+
+int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
+{
+	struct pci_host_bridge *host;
+	u32 cap;
+	u16 ctl;
+	int ret;
+
+	if (!pci_is_pcie(dev))
+		return 0;
+
+	ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
+	if (ret)
+		return 0;
+
+	if (!(cap & PCI_EXP_DEVCAP_EXT_TAG))
+		return 0;
+
+	ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
+	if (ret)
+		return 0;
+
+	host = pci_find_host_bridge(dev->bus);
+	if (!host)
+		return 0;
+
+	/*
+	 * If some device in the hierarchy doesn't handle Extended Tags
+	 * correctly, make sure they're disabled.
+	 */
+	if (host->no_ext_tags) {
+		if (ctl & PCI_EXP_DEVCTL_EXT_TAG) {
+			pci_info(dev, "disabling Extended Tags\n");
+			pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+						   PCI_EXP_DEVCTL_EXT_TAG);
+		}
+		return 0;
+	}
+
+	if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) {
+		pci_info(dev, "enabling Extended Tags\n");
+		pcie_capability_set_word(dev, PCI_EXP_DEVCTL,
+					 PCI_EXP_DEVCTL_EXT_TAG);
+	}
+	return 0;
+}
+
+/**
+ * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
+ * @dev: PCI device to query
+ *
+ * Returns true if the device has enabled relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_enabled(struct pci_dev *dev)
+{
+	u16 v;
+
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &v);
+
+	return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+	struct pci_dev *root;
+
+	/* PCI_EXP_DEVCTL_RELAX_EN is RsvdP in VFs */
+	if (dev->is_virtfn)
+		return;
+
+	if (!pcie_relaxed_ordering_enabled(dev))
+		return;
+
+	/*
+	 * For now, we only deal with Relaxed Ordering issues with Root
+	 * Ports. Peer-to-Peer DMA is another can of worms.
+	 */
+	root = pcie_find_root_port(dev);
+	if (!root)
+		return;
+
+	if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
+		pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+					   PCI_EXP_DEVCTL_RELAX_EN);
+		pci_info(dev, "Relaxed Ordering disabled because the Root Port didn't support it\n");
+	}
+}
+
+static void pci_configure_ltr(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCIEASPM
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+	struct pci_dev *bridge;
+	u32 cap, ctl;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* Read L1 PM substate capabilities */
+	dev->l1ss = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_L1SS);
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_LTR))
+		return;
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCTL2, &ctl);
+	if (ctl & PCI_EXP_DEVCTL2_LTR_EN) {
+		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+			dev->ltr_path = 1;
+			return;
+		}
+
+		bridge = pci_upstream_bridge(dev);
+		if (bridge && bridge->ltr_path)
+			dev->ltr_path = 1;
+
+		return;
+	}
+
+	if (!host->native_ltr)
+		return;
+
+	/*
+	 * Software must not enable LTR in an Endpoint unless the Root
+	 * Complex and all intermediate Switches indicate support for LTR.
+	 * PCIe r4.0, sec 6.18.
+	 */
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_LTR_EN);
+		dev->ltr_path = 1;
+		return;
+	}
+
+	/*
+	 * If we're configuring a hot-added device, LTR was likely
+	 * disabled in the upstream bridge, so re-enable it before enabling
+	 * it in the new device.
+	 */
+	bridge = pci_upstream_bridge(dev);
+	if (bridge && bridge->ltr_path) {
+		pci_bridge_reconfigure_ltr(dev);
+		pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_LTR_EN);
+		dev->ltr_path = 1;
+	}
+#endif
+}
+
+static void pci_configure_eetlp_prefix(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_PASID
+	struct pci_dev *bridge;
+	int pcie_type;
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
+		return;
+
+	pcie_type = pci_pcie_type(dev);
+	if (pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
+	    pcie_type == PCI_EXP_TYPE_RC_END)
+		dev->eetlp_prefix_path = 1;
+	else {
+		bridge = pci_upstream_bridge(dev);
+		if (bridge && bridge->eetlp_prefix_path)
+			dev->eetlp_prefix_path = 1;
+	}
+#endif
+}
+
+static void pci_configure_serr(struct pci_dev *dev)
+{
+	u16 control;
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+
+		/*
+		 * A bridge will not forward ERR_ messages coming from an
+		 * endpoint unless SERR# forwarding is enabled.
+		 */
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &control);
+		if (!(control & PCI_BRIDGE_CTL_SERR)) {
+			control |= PCI_BRIDGE_CTL_SERR;
+			pci_write_config_word(dev, PCI_BRIDGE_CONTROL, control);
+		}
+	}
+}
+
+static void pci_configure_device(struct pci_dev *dev)
+{
+	pci_configure_mps(dev);
+	pci_configure_extended_tags(dev, NULL);
+	pci_configure_relaxed_ordering(dev);
+	pci_configure_ltr(dev);
+	pci_configure_eetlp_prefix(dev);
+	pci_configure_serr(dev);
+
+	pci_acpi_program_hp_params(dev);
+}
+
+static void pci_release_capabilities(struct pci_dev *dev)
+{
+	pci_aer_exit(dev);
+	pci_rcec_exit(dev);
+	pci_iov_release(dev);
+	pci_free_cap_save_buffers(dev);
+}
+
+/**
+ * pci_release_dev - Free a PCI device structure when all users of it are
+ *		     finished
+ * @dev: device that's been disconnected
+ *
+ * Will be called only by the device core when all users of this PCI device are
+ * done.
+ */
+static void pci_release_dev(struct device *dev)
+{
+	struct pci_dev *pci_dev;
+
+	pci_dev = to_pci_dev(dev);
+	pci_release_capabilities(pci_dev);
+	pci_release_of_node(pci_dev);
+	pcibios_release_device(pci_dev);
+	pci_bus_put(pci_dev->bus);
+	kfree(pci_dev->driver_override);
+	bitmap_free(pci_dev->dma_alias_mask);
+	dev_dbg(dev, "device released\n");
+	kfree(pci_dev);
+}
+
+struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&dev->bus_list);
+	dev->dev.type = &pci_dev_type;
+	dev->bus = pci_bus_get(bus);
+	dev->driver_exclusive_resource = (struct resource) {
+		.name = "PCI Exclusive",
+		.start = 0,
+		.end = -1,
+	};
+
+	spin_lock_init(&dev->pcie_cap_lock);
+#ifdef CONFIG_PCI_MSI
+	raw_spin_lock_init(&dev->msi_lock);
+#endif
+	return dev;
+}
+EXPORT_SYMBOL(pci_alloc_dev);
+
+static bool pci_bus_crs_vendor_id(u32 l)
+{
+	return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG;
+}
+
+static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l,
+			     int timeout)
+{
+	int delay = 1;
+
+	if (!pci_bus_crs_vendor_id(*l))
+		return true;	/* not a CRS completion */
+
+	if (!timeout)
+		return false;	/* CRS, but caller doesn't want to wait */
+
+	/*
+	 * We got the reserved Vendor ID that indicates a completion with
+	 * Configuration Request Retry Status (CRS).  Retry until we get a
+	 * valid Vendor ID or we time out.
+	 */
+	while (pci_bus_crs_vendor_id(*l)) {
+		if (delay > timeout) {
+			pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n",
+				pci_domain_nr(bus), bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+			return false;
+		}
+		if (delay >= 1000)
+			pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n",
+				pci_domain_nr(bus), bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+		msleep(delay);
+		delay *= 2;
+
+		if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
+			return false;
+	}
+
+	if (delay >= 1000)
+		pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n",
+			pci_domain_nr(bus), bus->number,
+			PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+	return true;
+}
+
+bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
+					int timeout)
+{
+	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
+		return false;
+
+	/* Some broken boards return 0 or ~0 (PCI_ERROR_RESPONSE) if a slot is empty: */
+	if (PCI_POSSIBLE_ERROR(*l) || *l == 0x00000000 ||
+	    *l == 0x0000ffff || *l == 0xffff0000)
+		return false;
+
+	if (pci_bus_crs_vendor_id(*l))
+		return pci_bus_wait_crs(bus, devfn, l, timeout);
+
+	return true;
+}
+
+bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
+				int timeout)
+{
+#ifdef CONFIG_PCI_QUIRKS
+	struct pci_dev *bridge = bus->self;
+
+	/*
+	 * Certain IDT switches have an issue where they improperly trigger
+	 * ACS Source Validation errors on completions for config reads.
+	 */
+	if (bridge && bridge->vendor == PCI_VENDOR_ID_IDT &&
+	    bridge->device == 0x80b5)
+		return pci_idt_bus_quirk(bus, devfn, l, timeout);
+#endif
+
+	return pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
+}
+EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
+
+/*
+ * Read the config data for a PCI device, sanity-check it,
+ * and fill in the dev structure.
+ */
+static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
+{
+	struct pci_dev *dev;
+	u32 l;
+
+	if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
+		return NULL;
+
+	dev = pci_alloc_dev(bus);
+	if (!dev)
+		return NULL;
+
+	dev->devfn = devfn;
+	dev->vendor = l & 0xffff;
+	dev->device = (l >> 16) & 0xffff;
+
+	if (pci_setup_device(dev)) {
+		pci_bus_put(dev->bus);
+		kfree(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+void pcie_report_downtraining(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* Look from the device up to avoid downstream ports with no devices */
+	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
+		return;
+
+	/* Multi-function PCIe devices share the same link/status */
+	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
+		return;
+
+	/* Print link status only if the device is constrained by the fabric */
+	__pcie_print_link_status(dev, false);
+}
+
+static void pci_init_capabilities(struct pci_dev *dev)
+{
+	pci_ea_init(dev);		/* Enhanced Allocation */
+	pci_msi_init(dev);		/* Disable MSI */
+	pci_msix_init(dev);		/* Disable MSI-X */
+
+	/* Buffers for saving PCIe and PCI-X capabilities */
+	pci_allocate_cap_save_buffers(dev);
+
+	pci_pm_init(dev);		/* Power Management */
+	pci_vpd_init(dev);		/* Vital Product Data */
+	pci_configure_ari(dev);		/* Alternative Routing-ID Forwarding */
+	pci_iov_init(dev);		/* Single Root I/O Virtualization */
+	pci_ats_init(dev);		/* Address Translation Services */
+	pci_pri_init(dev);		/* Page Request Interface */
+	pci_pasid_init(dev);		/* Process Address Space ID */
+	pci_acs_init(dev);		/* Access Control Services */
+	pci_ptm_init(dev);		/* Precision Time Measurement */
+	pci_aer_init(dev);		/* Advanced Error Reporting */
+	pci_dpc_init(dev);		/* Downstream Port Containment */
+	pci_rcec_init(dev);		/* Root Complex Event Collector */
+	pci_doe_init(dev);		/* Data Object Exchange */
+
+	pcie_report_downtraining(dev);
+	pci_init_reset_methods(dev);
+}
+
+/*
+ * This is the equivalent of pci_host_bridge_msi_domain() that acts on
+ * devices. Firmware interfaces that can select the MSI domain on a
+ * per-device basis should be called from here.
+ */
+static struct irq_domain *pci_dev_msi_domain(struct pci_dev *dev)
+{
+	struct irq_domain *d;
+
+	/*
+	 * If a domain has been set through the pcibios_device_add()
+	 * callback, then this is the one (platform code knows best).
+	 */
+	d = dev_get_msi_domain(&dev->dev);
+	if (d)
+		return d;
+
+	/*
+	 * Let's see if we have a firmware interface able to provide
+	 * the domain.
+	 */
+	d = pci_msi_get_device_domain(dev);
+	if (d)
+		return d;
+
+	return NULL;
+}
+
+static void pci_set_msi_domain(struct pci_dev *dev)
+{
+	struct irq_domain *d;
+
+	/*
+	 * If the platform or firmware interfaces cannot supply a
+	 * device-specific MSI domain, then inherit the default domain
+	 * from the host bridge itself.
+	 */
+	d = pci_dev_msi_domain(dev);
+	if (!d)
+		d = dev_get_msi_domain(&dev->bus->dev);
+
+	dev_set_msi_domain(&dev->dev, d);
+}
+
+void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
+{
+	int ret;
+
+	pci_configure_device(dev);
+
+	device_initialize(&dev->dev);
+	dev->dev.release = pci_release_dev;
+
+	set_dev_node(&dev->dev, pcibus_to_node(bus));
+	dev->dev.dma_mask = &dev->dma_mask;
+	dev->dev.dma_parms = &dev->dma_parms;
+	dev->dev.coherent_dma_mask = 0xffffffffull;
+
+	dma_set_max_seg_size(&dev->dev, 65536);
+	dma_set_seg_boundary(&dev->dev, 0xffffffff);
+
+	pcie_failed_link_retrain(dev);
+
+	/* Fix up broken headers */
+	pci_fixup_device(pci_fixup_header, dev);
+
+	pci_reassigndev_resource_alignment(dev);
+
+	dev->state_saved = false;
+
+	pci_init_capabilities(dev);
+
+	/*
+	 * Add the device to our list of discovered devices
+	 * and the bus list for fixup functions, etc.
+	 */
+	down_write(&pci_bus_sem);
+	list_add_tail(&dev->bus_list, &bus->devices);
+	up_write(&pci_bus_sem);
+
+	ret = pcibios_device_add(dev);
+	WARN_ON(ret < 0);
+
+	/* Set up MSI IRQ domain */
+	pci_set_msi_domain(dev);
+
+	/* Notifier could use PCI capabilities */
+	dev->match_driver = false;
+	ret = device_add(&dev->dev);
+	WARN_ON(ret < 0);
+}
+
+struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
+{
+	struct pci_dev *dev;
+
+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
+	dev = pci_scan_device(bus, devfn);
+	if (!dev)
+		return NULL;
+
+	pci_device_add(dev, bus);
+
+	return dev;
+}
+EXPORT_SYMBOL(pci_scan_single_device);
+
+static int next_ari_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
+{
+	int pos;
+	u16 cap = 0;
+	unsigned int next_fn;
+
+	if (!dev)
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI);
+	if (!pos)
+		return -ENODEV;
+
+	pci_read_config_word(dev, pos + PCI_ARI_CAP, &cap);
+	next_fn = PCI_ARI_CAP_NFN(cap);
+	if (next_fn <= fn)
+		return -ENODEV;	/* protect against malformed list */
+
+	return next_fn;
+}
+
+static int next_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
+{
+	if (pci_ari_enabled(bus))
+		return next_ari_fn(bus, dev, fn);
+
+	if (fn >= 7)
+		return -ENODEV;
+	/* only multifunction devices may have more functions */
+	if (dev && !dev->multifunction)
+		return -ENODEV;
+
+	return fn + 1;
+}
+
+static int only_one_child(struct pci_bus *bus)
+{
+	struct pci_dev *bridge = bus->self;
+
+	/*
+	 * Systems with unusual topologies set PCI_SCAN_ALL_PCIE_DEVS so
+	 * we scan for all possible devices, not just Device 0.
+	 */
+	if (pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
+		return 0;
+
+	/*
+	 * A PCIe Downstream Port normally leads to a Link with only Device
+	 * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
+	 * only for Device 0 in that situation.
+	 */
+	if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
+		return 1;
+
+	return 0;
+}
+
+/**
+ * pci_scan_slot - Scan a PCI slot on a bus for devices
+ * @bus: PCI bus to scan
+ * @devfn: slot number to scan (must have zero function)
+ *
+ * Scan a PCI slot on the specified PCI bus for devices, adding
+ * discovered devices to the @bus->devices list.  New devices
+ * will not have is_added set.
+ *
+ * Returns the number of new devices found.
+ */
+int pci_scan_slot(struct pci_bus *bus, int devfn)
+{
+	struct pci_dev *dev;
+	int fn = 0, nr = 0;
+
+	if (only_one_child(bus) && (devfn > 0))
+		return 0; /* Already scanned the entire slot */
+
+	do {
+		dev = pci_scan_single_device(bus, devfn + fn);
+		if (dev) {
+			if (!pci_dev_is_added(dev))
+				nr++;
+			if (fn > 0)
+				dev->multifunction = 1;
+		} else if (fn == 0) {
+			/*
+			 * Function 0 is required unless we are running on
+			 * a hypervisor that passes through individual PCI
+			 * functions.
+			 */
+			if (!hypervisor_isolated_pci_functions())
+				break;
+		}
+		fn = next_fn(bus, dev, fn);
+	} while (fn >= 0);
+
+	/* Only one slot has PCIe device */
+	if (bus->self && nr)
+		pcie_aspm_init_link_state(bus->self);
+
+	return nr;
+}
+EXPORT_SYMBOL(pci_scan_slot);
+
+static int pcie_find_smpss(struct pci_dev *dev, void *data)
+{
+	u8 *smpss = data;
+
+	if (!pci_is_pcie(dev))
+		return 0;
+
+	/*
+	 * We don't have a way to change MPS settings on devices that have
+	 * drivers attached.  A hot-added device might support only the minimum
+	 * MPS setting (MPS=128).  Therefore, if the fabric contains a bridge
+	 * where devices may be hot-added, we limit the fabric MPS to 128 so
+	 * hot-added devices will work correctly.
+	 *
+	 * However, if we hot-add a device to a slot directly below a Root
+	 * Port, it's impossible for there to be other existing devices below
+	 * the port.  We don't limit the MPS in this case because we can
+	 * reconfigure MPS on both the Root Port and the hot-added device,
+	 * and there are no other devices involved.
+	 *
+	 * Note that this PCIE_BUS_SAFE path assumes no peer-to-peer DMA.
+	 */
+	if (dev->is_hotplug_bridge &&
+	    pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		*smpss = 0;
+
+	if (*smpss > dev->pcie_mpss)
+		*smpss = dev->pcie_mpss;
+
+	return 0;
+}
+
+static void pcie_write_mps(struct pci_dev *dev, int mps)
+{
+	int rc;
+
+	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
+		mps = 128 << dev->pcie_mpss;
+
+		if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT &&
+		    dev->bus->self)
+
+			/*
+			 * For "Performance", the assumption is made that
+			 * downstream communication will never be larger than
+			 * the MRRS.  So, the MPS only needs to be configured
+			 * for the upstream communication.  This being the case,
+			 * walk from the top down and set the MPS of the child
+			 * to that of the parent bus.
+			 *
+			 * Configure the device MPS with the smaller of the
+			 * device MPSS or the bridge MPS (which is assumed to be
+			 * properly configured at this point to the largest
+			 * allowable MPS based on its parent bus).
+			 */
+			mps = min(mps, pcie_get_mps(dev->bus->self));
+	}
+
+	rc = pcie_set_mps(dev, mps);
+	if (rc)
+		pci_err(dev, "Failed attempting to set the MPS\n");
+}
+
+static void pcie_write_mrrs(struct pci_dev *dev)
+{
+	int rc, mrrs;
+
+	/*
+	 * In the "safe" case, do not configure the MRRS.  There appear to be
+	 * issues with setting MRRS to 0 on a number of devices.
+	 */
+	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
+		return;
+
+	/*
+	 * For max performance, the MRRS must be set to the largest supported
+	 * value.  However, it cannot be configured larger than the MPS the
+	 * device or the bus can support.  This should already be properly
+	 * configured by a prior call to pcie_write_mps().
+	 */
+	mrrs = pcie_get_mps(dev);
+
+	/*
+	 * MRRS is a R/W register.  Invalid values can be written, but a
+	 * subsequent read will verify if the value is acceptable or not.
+	 * If the MRRS value provided is not acceptable (e.g., too large),
+	 * shrink the value until it is acceptable to the HW.
+	 */
+	while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
+		rc = pcie_set_readrq(dev, mrrs);
+		if (!rc)
+			break;
+
+		pci_warn(dev, "Failed attempting to set the MRRS\n");
+		mrrs /= 2;
+	}
+
+	if (mrrs < 128)
+		pci_err(dev, "MRRS was unable to be configured with a safe value.  If problems are experienced, try running with pci=pcie_bus_safe\n");
+}
+
+static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
+{
+	int mps, orig_mps;
+
+	if (!pci_is_pcie(dev))
+		return 0;
+
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
+	    pcie_bus_config == PCIE_BUS_DEFAULT)
+		return 0;
+
+	mps = 128 << *(u8 *)data;
+	orig_mps = pcie_get_mps(dev);
+
+	pcie_write_mps(dev, mps);
+	pcie_write_mrrs(dev);
+
+	pci_info(dev, "Max Payload Size set to %4d/%4d (was %4d), Max Read Rq %4d\n",
+		 pcie_get_mps(dev), 128 << dev->pcie_mpss,
+		 orig_mps, pcie_get_readrq(dev));
+
+	return 0;
+}
+
+/*
+ * pcie_bus_configure_settings() requires that pci_walk_bus work in a top-down,
+ * parents then children fashion.  If this changes, then this code will not
+ * work as designed.
+ */
+void pcie_bus_configure_settings(struct pci_bus *bus)
+{
+	u8 smpss = 0;
+
+	if (!bus->self)
+		return;
+
+	if (!pci_is_pcie(bus->self))
+		return;
+
+	/*
+	 * FIXME - Peer to peer DMA is possible, though the endpoint would need
+	 * to be aware of the MPS of the destination.  To work around this,
+	 * simply force the MPS of the entire system to the smallest possible.
+	 */
+	if (pcie_bus_config == PCIE_BUS_PEER2PEER)
+		smpss = 0;
+
+	if (pcie_bus_config == PCIE_BUS_SAFE) {
+		smpss = bus->self->pcie_mpss;
+
+		pcie_find_smpss(bus->self, &smpss);
+		pci_walk_bus(bus, pcie_find_smpss, &smpss);
+	}
+
+	pcie_bus_configure_set(bus->self, &smpss);
+	pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
+}
+EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
+
+/*
+ * Called after each bus is probed, but before its children are examined.  This
+ * is marked as __weak because multiple architectures define it.
+ */
+void __weak pcibios_fixup_bus(struct pci_bus *bus)
+{
+       /* nothing to do, expected to be removed in the future */
+}
+
+/**
+ * pci_scan_child_bus_extend() - Scan devices below a bus
+ * @bus: Bus to scan for devices
+ * @available_buses: Total number of buses available (%0 does not try to
+ *		     extend beyond the minimal)
+ *
+ * Scans devices below @bus including subordinate buses. Returns new
+ * subordinate number including all the found devices. Passing
+ * @available_buses causes the remaining bus space to be distributed
+ * equally between hotplug-capable bridges to allow future extension of the
+ * hierarchy.
+ */
+static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
+					      unsigned int available_buses)
+{
+	unsigned int used_buses, normal_bridges = 0, hotplug_bridges = 0;
+	unsigned int start = bus->busn_res.start;
+	unsigned int devfn, cmax, max = start;
+	struct pci_dev *dev;
+
+	dev_dbg(&bus->dev, "scanning bus\n");
+
+	/* Go find them, Rover! */
+	for (devfn = 0; devfn < 256; devfn += 8)
+		pci_scan_slot(bus, devfn);
+
+	/* Reserve buses for SR-IOV capability */
+	used_buses = pci_iov_bus_range(bus);
+	max += used_buses;
+
+	/*
+	 * After performing arch-dependent fixup of the bus, look behind
+	 * all PCI-to-PCI bridges on this bus.
+	 */
+	if (!bus->is_added) {
+		dev_dbg(&bus->dev, "fixups for bus\n");
+		pcibios_fixup_bus(bus);
+		bus->is_added = 1;
+	}
+
+	/*
+	 * Calculate how many hotplug bridges and normal bridges there
+	 * are on this bus. We will distribute the additional available
+	 * buses between hotplug bridges.
+	 */
+	for_each_pci_bridge(dev, bus) {
+		if (dev->is_hotplug_bridge)
+			hotplug_bridges++;
+		else
+			normal_bridges++;
+	}
+
+	/*
+	 * Scan bridges that are already configured. We don't touch them
+	 * unless they are misconfigured (which will be done in the second
+	 * scan below).
+	 */
+	for_each_pci_bridge(dev, bus) {
+		cmax = max;
+		max = pci_scan_bridge_extend(bus, dev, max, 0, 0);
+
+		/*
+		 * Reserve one bus for each bridge now to avoid extending
+		 * hotplug bridges too much during the second scan below.
+		 */
+		used_buses++;
+		if (max - cmax > 1)
+			used_buses += max - cmax - 1;
+	}
+
+	/* Scan bridges that need to be reconfigured */
+	for_each_pci_bridge(dev, bus) {
+		unsigned int buses = 0;
+
+		if (!hotplug_bridges && normal_bridges == 1) {
+			/*
+			 * There is only one bridge on the bus (upstream
+			 * port) so it gets all available buses which it
+			 * can then distribute to the possible hotplug
+			 * bridges below.
+			 */
+			buses = available_buses;
+		} else if (dev->is_hotplug_bridge) {
+			/*
+			 * Distribute the extra buses between hotplug
+			 * bridges if any.
+			 */
+			buses = available_buses / hotplug_bridges;
+			buses = min(buses, available_buses - used_buses + 1);
+		}
+
+		cmax = max;
+		max = pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
+		/* One bus is already accounted so don't add it again */
+		if (max - cmax > 1)
+			used_buses += max - cmax - 1;
+	}
+
+	/*
+	 * Make sure a hotplug bridge has at least the minimum requested
+	 * number of buses but allow it to grow up to the maximum available
+	 * bus number if there is room.
+	 */
+	if (bus->self && bus->self->is_hotplug_bridge) {
+		used_buses = max_t(unsigned int, available_buses,
+				   pci_hotplug_bus_size - 1);
+		if (max - start < used_buses) {
+			max = start + used_buses;
+
+			/* Do not allocate more buses than we have room left */
+			if (max > bus->busn_res.end)
+				max = bus->busn_res.end;
+
+			dev_dbg(&bus->dev, "%pR extended by %#02x\n",
+				&bus->busn_res, max - start);
+		}
+	}
+
+	/*
+	 * We've scanned the bus and so we know all about what's on
+	 * the other side of any bridges that may be on this bus plus
+	 * any devices.
+	 *
+	 * Return how far we've got finding sub-buses.
+	 */
+	dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
+	return max;
+}
+
+/**
+ * pci_scan_child_bus() - Scan devices below a bus
+ * @bus: Bus to scan for devices
+ *
+ * Scans devices below @bus including subordinate buses. Returns new
+ * subordinate number including all the found devices.
+ */
+unsigned int pci_scan_child_bus(struct pci_bus *bus)
+{
+	return pci_scan_child_bus_extend(bus, 0);
+}
+EXPORT_SYMBOL_GPL(pci_scan_child_bus);
+
+/**
+ * pcibios_root_bridge_prepare - Platform-specific host bridge setup
+ * @bridge: Host bridge to set up
+ *
+ * Default empty implementation.  Replace with an architecture-specific setup
+ * routine, if necessary.
+ */
+int __weak pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	return 0;
+}
+
+void __weak pcibios_add_bus(struct pci_bus *bus)
+{
+}
+
+void __weak pcibios_remove_bus(struct pci_bus *bus)
+{
+}
+
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	int error;
+	struct pci_host_bridge *bridge;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return NULL;
+
+	bridge->dev.parent = parent;
+
+	list_splice_init(resources, &bridge->windows);
+	bridge->sysdata = sysdata;
+	bridge->busnr = bus;
+	bridge->ops = ops;
+
+	error = pci_register_host_bridge(bridge);
+	if (error < 0)
+		goto err_out;
+
+	return bridge->bus;
+
+err_out:
+	put_device(&bridge->dev);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_create_root_bus);
+
+int pci_host_probe(struct pci_host_bridge *bridge)
+{
+	struct pci_bus *bus, *child;
+	int ret;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret < 0) {
+		dev_err(bridge->dev.parent, "Scanning root bridge failed");
+		return ret;
+	}
+
+	bus = bridge->bus;
+
+	/*
+	 * We insert PCI resources into the iomem_resource and
+	 * ioport_resource trees in either pci_bus_claim_resources()
+	 * or pci_bus_assign_resources().
+	 */
+	if (pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_claim_resources(bus);
+	} else {
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
+	}
+
+	pci_bus_add_devices(bus);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_host_probe);
+
+int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
+{
+	struct resource *res = &b->busn_res;
+	struct resource *parent_res, *conflict;
+
+	res->start = bus;
+	res->end = bus_max;
+	res->flags = IORESOURCE_BUS;
+
+	if (!pci_is_root_bus(b))
+		parent_res = &b->parent->busn_res;
+	else {
+		parent_res = get_pci_domain_busn_res(pci_domain_nr(b));
+		res->flags |= IORESOURCE_PCI_FIXED;
+	}
+
+	conflict = request_resource_conflict(parent_res, res);
+
+	if (conflict)
+		dev_info(&b->dev,
+			   "busn_res: can not insert %pR under %s%pR (conflicts with %s %pR)\n",
+			    res, pci_is_root_bus(b) ? "domain " : "",
+			    parent_res, conflict->name, conflict);
+
+	return conflict == NULL;
+}
+
+int pci_bus_update_busn_res_end(struct pci_bus *b, int bus_max)
+{
+	struct resource *res = &b->busn_res;
+	struct resource old_res = *res;
+	resource_size_t size;
+	int ret;
+
+	if (res->start > bus_max)
+		return -EINVAL;
+
+	size = bus_max - res->start + 1;
+	ret = adjust_resource(res, res->start, size);
+	dev_info(&b->dev, "busn_res: %pR end %s updated to %02x\n",
+			&old_res, ret ? "can not be" : "is", bus_max);
+
+	if (!ret && !res->parent)
+		pci_bus_insert_busn_res(b, res->start, res->end);
+
+	return ret;
+}
+
+void pci_bus_release_busn_res(struct pci_bus *b)
+{
+	struct resource *res = &b->busn_res;
+	int ret;
+
+	if (!res->flags || !res->parent)
+		return;
+
+	ret = release_resource(res);
+	dev_info(&b->dev, "busn_res: %pR %s released\n",
+			res, ret ? "can not be" : "is");
+}
+
+int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
+{
+	struct resource_entry *window;
+	bool found = false;
+	struct pci_bus *b;
+	int max, bus, ret;
+
+	if (!bridge)
+		return -EINVAL;
+
+	resource_list_for_each_entry(window, &bridge->windows)
+		if (window->res->flags & IORESOURCE_BUS) {
+			bridge->busnr = window->res->start;
+			found = true;
+			break;
+		}
+
+	ret = pci_register_host_bridge(bridge);
+	if (ret < 0)
+		return ret;
+
+	b = bridge->bus;
+	bus = bridge->busnr;
+
+	if (!found) {
+		dev_info(&b->dev,
+		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
+			bus);
+		pci_bus_insert_busn_res(b, bus, 255);
+	}
+
+	max = pci_scan_child_bus(b);
+
+	if (!found)
+		pci_bus_update_busn_res_end(b, max);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_scan_root_bus_bridge);
+
+struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	struct resource_entry *window;
+	bool found = false;
+	struct pci_bus *b;
+	int max;
+
+	resource_list_for_each_entry(window, resources)
+		if (window->res->flags & IORESOURCE_BUS) {
+			found = true;
+			break;
+		}
+
+	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
+	if (!b)
+		return NULL;
+
+	if (!found) {
+		dev_info(&b->dev,
+		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
+			bus);
+		pci_bus_insert_busn_res(b, bus, 255);
+	}
+
+	max = pci_scan_child_bus(b);
+
+	if (!found)
+		pci_bus_update_busn_res_end(b, max);
+
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_root_bus);
+
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
+					void *sysdata)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *b;
+
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	pci_add_resource(&resources, &busn_resource);
+	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
+	if (b) {
+		pci_scan_child_bus(b);
+	} else {
+		pci_free_resource_list(&resources);
+	}
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_bus);
+
+/**
+ * pci_rescan_bus_bridge_resize - Scan a PCI bus for devices
+ * @bridge: PCI bridge for the bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, add them,
+ * and enable them, resizing bridge mmio/io resource if necessary
+ * and possible.  The caller must ensure the child devices are already
+ * removed for resizing to occur.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge)
+{
+	unsigned int max;
+	struct pci_bus *bus = bridge->subordinate;
+
+	max = pci_scan_child_bus(bus);
+
+	pci_assign_unassigned_bridge_resources(bridge);
+
+	pci_bus_add_devices(bus);
+
+	return max;
+}
+
+/**
+ * pci_rescan_bus - Scan a PCI bus for devices
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, add them,
+ * and enable them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int pci_rescan_bus(struct pci_bus *bus)
+{
+	unsigned int max;
+
+	max = pci_scan_child_bus(bus);
+	pci_assign_unassigned_bus_resources(bus);
+	pci_bus_add_devices(bus);
+
+	return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+
+/*
+ * pci_rescan_bus(), pci_rescan_bus_bridge_resize() and PCI device removal
+ * routines should always be executed under this mutex.
+ */
+static DEFINE_MUTEX(pci_rescan_remove_lock);
+
+void pci_lock_rescan_remove(void)
+{
+	mutex_lock(&pci_rescan_remove_lock);
+}
+EXPORT_SYMBOL_GPL(pci_lock_rescan_remove);
+
+void pci_unlock_rescan_remove(void)
+{
+	mutex_unlock(&pci_rescan_remove_lock);
+}
+EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
+
+static int __init pci_sort_bf_cmp(const struct device *d_a,
+				  const struct device *d_b)
+{
+	const struct pci_dev *a = to_pci_dev(d_a);
+	const struct pci_dev *b = to_pci_dev(d_b);
+
+	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
+	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
+
+	if      (a->bus->number < b->bus->number) return -1;
+	else if (a->bus->number > b->bus->number) return  1;
+
+	if      (a->devfn < b->devfn) return -1;
+	else if (a->devfn > b->devfn) return  1;
+
+	return 0;
+}
+
+void __init pci_sort_breadthfirst(void)
+{
+	bus_sort_breadthfirst(&pci_bus_type, &pci_sort_bf_cmp);
+}
+
+int pci_hp_add_bridge(struct pci_dev *dev)
+{
+	struct pci_bus *parent = dev->bus;
+	int busnr, start = parent->busn_res.start;
+	unsigned int available_buses = 0;
+	int end = parent->busn_res.end;
+
+	for (busnr = start; busnr <= end; busnr++) {
+		if (!pci_find_bus(pci_domain_nr(parent), busnr))
+			break;
+	}
+	if (busnr-- > end) {
+		pci_err(dev, "No bus number available for hot-added bridge\n");
+		return -1;
+	}
+
+	/* Scan bridges that are already configured */
+	busnr = pci_scan_bridge(parent, dev, busnr, 0);
+
+	/*
+	 * Distribute the available bus numbers between hotplug-capable
+	 * bridges to make extending the chain later possible.
+	 */
+	available_buses = end - busnr;
+
+	/* Scan bridges that need to be reconfigured */
+	pci_scan_bridge_extend(parent, dev, busnr, available_buses, 1);
+
+	if (!dev->subordinate)
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_hp_add_bridge);
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
new file mode 100644
index 000000000..f96770908
--- /dev/null
+++ b/drivers/pci/proc.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Procfs interface for the PCI bus
+ *
+ * Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/capability.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <asm/byteorder.h>
+#include "pci.h"
+
+static int proc_initialized;	/* = 0 */
+
+static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
+{
+	struct pci_dev *dev = pde_data(file_inode(file));
+	return fixed_size_llseek(file, off, whence, dev->cfg_size);
+}
+
+static ssize_t proc_bus_pci_read(struct file *file, char __user *buf,
+				 size_t nbytes, loff_t *ppos)
+{
+	struct pci_dev *dev = pde_data(file_inode(file));
+	unsigned int pos = *ppos;
+	unsigned int cnt, size;
+
+	/*
+	 * Normal users can read only the standardized portion of the
+	 * configuration space as several chips lock up when trying to read
+	 * undefined locations (think of Intel PIIX4 as a typical example).
+	 */
+
+	if (capable(CAP_SYS_ADMIN))
+		size = dev->cfg_size;
+	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+		size = 128;
+	else
+		size = 64;
+
+	if (pos >= size)
+		return 0;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
+	cnt = nbytes;
+
+	if (!access_ok(buf, cnt))
+		return -EINVAL;
+
+	pci_config_pm_runtime_get(dev);
+
+	if ((pos & 1) && cnt) {
+		unsigned char val;
+		pci_user_read_config_byte(dev, pos, &val);
+		__put_user(val, buf);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	if ((pos & 3) && cnt > 2) {
+		unsigned short val;
+		pci_user_read_config_word(dev, pos, &val);
+		__put_user(cpu_to_le16(val), (__le16 __user *) buf);
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	while (cnt >= 4) {
+		unsigned int val;
+		pci_user_read_config_dword(dev, pos, &val);
+		__put_user(cpu_to_le32(val), (__le32 __user *) buf);
+		buf += 4;
+		pos += 4;
+		cnt -= 4;
+		cond_resched();
+	}
+
+	if (cnt >= 2) {
+		unsigned short val;
+		pci_user_read_config_word(dev, pos, &val);
+		__put_user(cpu_to_le16(val), (__le16 __user *) buf);
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	if (cnt) {
+		unsigned char val;
+		pci_user_read_config_byte(dev, pos, &val);
+		__put_user(val, buf);
+		pos++;
+	}
+
+	pci_config_pm_runtime_put(dev);
+
+	*ppos = pos;
+	return nbytes;
+}
+
+static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
+				  size_t nbytes, loff_t *ppos)
+{
+	struct inode *ino = file_inode(file);
+	struct pci_dev *dev = pde_data(ino);
+	int pos = *ppos;
+	int size = dev->cfg_size;
+	int cnt, ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
+	if (pos >= size)
+		return 0;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
+	cnt = nbytes;
+
+	if (!access_ok(buf, cnt))
+		return -EINVAL;
+
+	pci_config_pm_runtime_get(dev);
+
+	if ((pos & 1) && cnt) {
+		unsigned char val;
+		__get_user(val, buf);
+		pci_user_write_config_byte(dev, pos, val);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	if ((pos & 3) && cnt > 2) {
+		__le16 val;
+		__get_user(val, (__le16 __user *) buf);
+		pci_user_write_config_word(dev, pos, le16_to_cpu(val));
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	while (cnt >= 4) {
+		__le32 val;
+		__get_user(val, (__le32 __user *) buf);
+		pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
+		buf += 4;
+		pos += 4;
+		cnt -= 4;
+	}
+
+	if (cnt >= 2) {
+		__le16 val;
+		__get_user(val, (__le16 __user *) buf);
+		pci_user_write_config_word(dev, pos, le16_to_cpu(val));
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	if (cnt) {
+		unsigned char val;
+		__get_user(val, buf);
+		pci_user_write_config_byte(dev, pos, val);
+		pos++;
+	}
+
+	pci_config_pm_runtime_put(dev);
+
+	*ppos = pos;
+	i_size_write(ino, dev->cfg_size);
+	return nbytes;
+}
+
+#ifdef HAVE_PCI_MMAP
+struct pci_filp_private {
+	enum pci_mmap_state mmap_state;
+	int write_combine;
+};
+#endif /* HAVE_PCI_MMAP */
+
+static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg)
+{
+	struct pci_dev *dev = pde_data(file_inode(file));
+#ifdef HAVE_PCI_MMAP
+	struct pci_filp_private *fpriv = file->private_data;
+#endif /* HAVE_PCI_MMAP */
+	int ret = 0;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
+	switch (cmd) {
+	case PCIIOC_CONTROLLER:
+		ret = pci_domain_nr(dev->bus);
+		break;
+
+#ifdef HAVE_PCI_MMAP
+	case PCIIOC_MMAP_IS_IO:
+		if (!arch_can_pci_mmap_io())
+			return -EINVAL;
+		fpriv->mmap_state = pci_mmap_io;
+		break;
+
+	case PCIIOC_MMAP_IS_MEM:
+		fpriv->mmap_state = pci_mmap_mem;
+		break;
+
+	case PCIIOC_WRITE_COMBINE:
+		if (arch_can_pci_mmap_wc()) {
+			if (arg)
+				fpriv->write_combine = 1;
+			else
+				fpriv->write_combine = 0;
+			break;
+		}
+		/* If arch decided it can't, fall through... */
+		fallthrough;
+#endif /* HAVE_PCI_MMAP */
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+#ifdef HAVE_PCI_MMAP
+static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct pci_dev *dev = pde_data(file_inode(file));
+	struct pci_filp_private *fpriv = file->private_data;
+	resource_size_t start, end;
+	int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
+
+	if (!capable(CAP_SYS_RAWIO) ||
+	    security_locked_down(LOCKDOWN_PCI_ACCESS))
+		return -EPERM;
+
+	if (fpriv->mmap_state == pci_mmap_io) {
+		if (!arch_can_pci_mmap_io())
+			return -EINVAL;
+		res_bit = IORESOURCE_IO;
+	}
+
+	/* Make sure the caller is mapping a real resource for this device */
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (dev->resource[i].flags & res_bit &&
+		    pci_mmap_fits(dev, i, vma,  PCI_MMAP_PROCFS))
+			break;
+	}
+
+	if (i >= PCI_STD_NUM_BARS)
+		return -ENODEV;
+
+	if (fpriv->mmap_state == pci_mmap_mem &&
+	    fpriv->write_combine) {
+		if (dev->resource[i].flags & IORESOURCE_PREFETCH)
+			write_combine = 1;
+		else
+			return -EINVAL;
+	}
+
+	if (dev->resource[i].flags & IORESOURCE_MEM &&
+	    iomem_is_exclusive(dev->resource[i].start))
+		return -EINVAL;
+
+	pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+
+	/* Adjust vm_pgoff to be the offset within the resource */
+	vma->vm_pgoff -= start >> PAGE_SHIFT;
+	ret = pci_mmap_resource_range(dev, i, vma,
+				  fpriv->mmap_state, write_combine);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int proc_bus_pci_open(struct inode *inode, struct file *file)
+{
+	struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
+
+	if (!fpriv)
+		return -ENOMEM;
+
+	fpriv->mmap_state = pci_mmap_io;
+	fpriv->write_combine = 0;
+
+	file->private_data = fpriv;
+	file->f_mapping = iomem_get_mapping();
+
+	return 0;
+}
+
+static int proc_bus_pci_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	file->private_data = NULL;
+
+	return 0;
+}
+#endif /* HAVE_PCI_MMAP */
+
+static const struct proc_ops proc_bus_pci_ops = {
+	.proc_lseek	= proc_bus_pci_lseek,
+	.proc_read	= proc_bus_pci_read,
+	.proc_write	= proc_bus_pci_write,
+	.proc_ioctl	= proc_bus_pci_ioctl,
+#ifdef CONFIG_COMPAT
+	.proc_compat_ioctl = proc_bus_pci_ioctl,
+#endif
+#ifdef HAVE_PCI_MMAP
+	.proc_open	= proc_bus_pci_open,
+	.proc_release	= proc_bus_pci_release,
+	.proc_mmap	= proc_bus_pci_mmap,
+#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+	.proc_get_unmapped_area = get_pci_unmapped_area,
+#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
+#endif /* HAVE_PCI_MMAP */
+};
+
+/* iterator */
+static void *pci_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct pci_dev *dev = NULL;
+	loff_t n = *pos;
+
+	for_each_pci_dev(dev) {
+		if (!n--)
+			break;
+	}
+	return dev;
+}
+
+static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct pci_dev *dev = v;
+
+	(*pos)++;
+	dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+	return dev;
+}
+
+static void pci_seq_stop(struct seq_file *m, void *v)
+{
+	if (v) {
+		struct pci_dev *dev = v;
+		pci_dev_put(dev);
+	}
+}
+
+static int show_device(struct seq_file *m, void *v)
+{
+	const struct pci_dev *dev = v;
+	const struct pci_driver *drv;
+	int i;
+
+	if (dev == NULL)
+		return 0;
+
+	drv = pci_dev_driver(dev);
+	seq_printf(m, "%02x%02x\t%04x%04x\t%x",
+			dev->bus->number,
+			dev->devfn,
+			dev->vendor,
+			dev->device,
+			dev->irq);
+
+	/* only print standard and ROM resources to preserve compatibility */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		resource_size_t start, end;
+		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+		seq_printf(m, "\t%16llx",
+			(unsigned long long)(start |
+			(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
+	}
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		resource_size_t start, end;
+		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+		seq_printf(m, "\t%16llx",
+			dev->resource[i].start < dev->resource[i].end ?
+			(unsigned long long)(end - start) + 1 : 0);
+	}
+	seq_putc(m, '\t');
+	if (drv)
+		seq_puts(m, drv->name);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static const struct seq_operations proc_bus_pci_devices_op = {
+	.start	= pci_seq_start,
+	.next	= pci_seq_next,
+	.stop	= pci_seq_stop,
+	.show	= show_device
+};
+
+static struct proc_dir_entry *proc_bus_pci_dir;
+
+int pci_proc_attach_device(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	struct proc_dir_entry *e;
+	char name[16];
+
+	if (!proc_initialized)
+		return -EACCES;
+
+	if (!bus->procdir) {
+		if (pci_proc_domain(bus)) {
+			sprintf(name, "%04x:%02x", pci_domain_nr(bus),
+					bus->number);
+		} else {
+			sprintf(name, "%02x", bus->number);
+		}
+		bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+		if (!bus->procdir)
+			return -ENOMEM;
+	}
+
+	sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+	e = proc_create_data(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir,
+			     &proc_bus_pci_ops, dev);
+	if (!e)
+		return -ENOMEM;
+	proc_set_size(e, dev->cfg_size);
+	dev->procent = e;
+
+	return 0;
+}
+
+int pci_proc_detach_device(struct pci_dev *dev)
+{
+	proc_remove(dev->procent);
+	dev->procent = NULL;
+	return 0;
+}
+
+int pci_proc_detach_bus(struct pci_bus *bus)
+{
+	proc_remove(bus->procdir);
+	return 0;
+}
+
+static int __init pci_proc_init(void)
+{
+	struct pci_dev *dev = NULL;
+	proc_bus_pci_dir = proc_mkdir("bus/pci", NULL);
+	proc_create_seq("devices", 0, proc_bus_pci_dir,
+		    &proc_bus_pci_devices_op);
+	proc_initialized = 1;
+	for_each_pci_dev(dev)
+		pci_proc_attach_device(dev);
+
+	return 0;
+}
+device_initcall(pci_proc_init);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
new file mode 100644
index 000000000..e00819140
--- /dev/null
+++ b/drivers/pci/quirks.c
@@ -0,0 +1,6229 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains work-arounds for many known PCI hardware bugs.
+ * Devices present only on certain architectures (host bridges et cetera)
+ * should be handled in arch-specific code.
+ *
+ * Note: any quirks for hotpluggable devices must _NOT_ be declared __init.
+ *
+ * Copyright (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * Init/reset quirks for USB host controllers should be in the USB quirks
+ * file, where their drivers can use them.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/isa-dma.h> /* isa_dma_bridge_buggy */
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
+#include <linux/mm.h>
+#include <linux/nvme.h>
+#include <linux/platform_data/x86/apple.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/switchtec.h>
+#include "pci.h"
+
+/*
+ * Retrain the link of a downstream PCIe port by hand if necessary.
+ *
+ * This is needed at least where a downstream port of the ASMedia ASM2824
+ * Gen 3 switch is wired to the upstream port of the Pericom PI7C9X2G304
+ * Gen 2 switch, and observed with the Delock Riser Card PCI Express x1 >
+ * 2 x PCIe x1 device, P/N 41433, plugged into the SiFive HiFive Unmatched
+ * board.
+ *
+ * In such a configuration the switches are supposed to negotiate the link
+ * speed of preferably 5.0GT/s, falling back to 2.5GT/s.  However the link
+ * continues switching between the two speeds indefinitely and the data
+ * link layer never reaches the active state, with link training reported
+ * repeatedly active ~84% of the time.  Forcing the target link speed to
+ * 2.5GT/s with the upstream ASM2824 device makes the two switches talk to
+ * each other correctly however.  And more interestingly retraining with a
+ * higher target link speed afterwards lets the two successfully negotiate
+ * 5.0GT/s.
+ *
+ * With the ASM2824 we can rely on the otherwise optional Data Link Layer
+ * Link Active status bit and in the failed link training scenario it will
+ * be off along with the Link Bandwidth Management Status indicating that
+ * hardware has changed the link speed or width in an attempt to correct
+ * unreliable link operation.  For a port that has been left unconnected
+ * both bits will be clear.  So use this information to detect the problem
+ * rather than polling the Link Training bit and watching out for flips or
+ * at least the active status.
+ *
+ * Since the exact nature of the problem isn't known and in principle this
+ * could trigger where an ASM2824 device is downstream rather upstream,
+ * apply this erratum workaround to any downstream ports as long as they
+ * support Link Active reporting and have the Link Control 2 register.
+ * Restrict the speed to 2.5GT/s then with the Target Link Speed field,
+ * request a retrain and wait 200ms for the data link to go up.
+ *
+ * If this turns out successful and we know by the Vendor:Device ID it is
+ * safe to do so, then lift the restriction, letting the devices negotiate
+ * a higher speed.  Also check for a similar 2.5GT/s speed restriction the
+ * firmware may have already arranged and lift it with ports that already
+ * report their data link being up.
+ *
+ * Return TRUE if the link has been successfully retrained, otherwise FALSE.
+ */
+bool pcie_failed_link_retrain(struct pci_dev *dev)
+{
+	static const struct pci_device_id ids[] = {
+		{ PCI_VDEVICE(ASMEDIA, 0x2824) }, /* ASMedia ASM2824 */
+		{}
+	};
+	u16 lnksta, lnkctl2;
+
+	if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) ||
+	    !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting)
+		return false;
+
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2);
+	pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
+	if ((lnksta & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_DLLLA)) ==
+	    PCI_EXP_LNKSTA_LBMS) {
+		pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n");
+
+		lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
+		lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+		pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
+
+		if (pcie_retrain_link(dev, false)) {
+			pci_info(dev, "retraining failed\n");
+			return false;
+		}
+
+		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
+	}
+
+	if ((lnksta & PCI_EXP_LNKSTA_DLLLA) &&
+	    (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT &&
+	    pci_match_id(ids, dev)) {
+		u32 lnkcap;
+
+		pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n");
+		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
+		lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
+		lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS;
+		pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
+
+		if (pcie_retrain_link(dev, false)) {
+			pci_info(dev, "retraining failed\n");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static ktime_t fixup_debug_start(struct pci_dev *dev,
+				 void (*fn)(struct pci_dev *dev))
+{
+	if (initcall_debug)
+		pci_info(dev, "calling  %pS @ %i\n", fn, task_pid_nr(current));
+
+	return ktime_get();
+}
+
+static void fixup_debug_report(struct pci_dev *dev, ktime_t calltime,
+			       void (*fn)(struct pci_dev *dev))
+{
+	ktime_t delta, rettime;
+	unsigned long long duration;
+
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+	if (initcall_debug || duration > 10000)
+		pci_info(dev, "%pS took %lld usecs\n", fn, duration);
+}
+
+static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
+			  struct pci_fixup *end)
+{
+	ktime_t calltime;
+
+	for (; f < end; f++)
+		if ((f->class == (u32) (dev->class >> f->class_shift) ||
+		     f->class == (u32) PCI_ANY_ID) &&
+		    (f->vendor == dev->vendor ||
+		     f->vendor == (u16) PCI_ANY_ID) &&
+		    (f->device == dev->device ||
+		     f->device == (u16) PCI_ANY_ID)) {
+			void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+			hook = offset_to_ptr(&f->hook_offset);
+#else
+			hook = f->hook;
+#endif
+			calltime = fixup_debug_start(dev, hook);
+			hook(dev);
+			fixup_debug_report(dev, calltime, hook);
+		}
+}
+
+extern struct pci_fixup __start_pci_fixups_early[];
+extern struct pci_fixup __end_pci_fixups_early[];
+extern struct pci_fixup __start_pci_fixups_header[];
+extern struct pci_fixup __end_pci_fixups_header[];
+extern struct pci_fixup __start_pci_fixups_final[];
+extern struct pci_fixup __end_pci_fixups_final[];
+extern struct pci_fixup __start_pci_fixups_enable[];
+extern struct pci_fixup __end_pci_fixups_enable[];
+extern struct pci_fixup __start_pci_fixups_resume[];
+extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
+extern struct pci_fixup __start_pci_fixups_suspend_late[];
+extern struct pci_fixup __end_pci_fixups_suspend_late[];
+
+static bool pci_apply_fixup_final_quirks;
+
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
+{
+	struct pci_fixup *start, *end;
+
+	switch (pass) {
+	case pci_fixup_early:
+		start = __start_pci_fixups_early;
+		end = __end_pci_fixups_early;
+		break;
+
+	case pci_fixup_header:
+		start = __start_pci_fixups_header;
+		end = __end_pci_fixups_header;
+		break;
+
+	case pci_fixup_final:
+		if (!pci_apply_fixup_final_quirks)
+			return;
+		start = __start_pci_fixups_final;
+		end = __end_pci_fixups_final;
+		break;
+
+	case pci_fixup_enable:
+		start = __start_pci_fixups_enable;
+		end = __end_pci_fixups_enable;
+		break;
+
+	case pci_fixup_resume:
+		start = __start_pci_fixups_resume;
+		end = __end_pci_fixups_resume;
+		break;
+
+	case pci_fixup_resume_early:
+		start = __start_pci_fixups_resume_early;
+		end = __end_pci_fixups_resume_early;
+		break;
+
+	case pci_fixup_suspend:
+		start = __start_pci_fixups_suspend;
+		end = __end_pci_fixups_suspend;
+		break;
+
+	case pci_fixup_suspend_late:
+		start = __start_pci_fixups_suspend_late;
+		end = __end_pci_fixups_suspend_late;
+		break;
+
+	default:
+		/* stupid compiler warning, you would think with an enum... */
+		return;
+	}
+	pci_do_fixups(dev, start, end);
+}
+EXPORT_SYMBOL(pci_fixup_device);
+
+static int __init pci_apply_final_quirks(void)
+{
+	struct pci_dev *dev = NULL;
+	u8 cls = 0;
+	u8 tmp;
+
+	if (pci_cache_line_size)
+		pr_info("PCI: CLS %u bytes\n", pci_cache_line_size << 2);
+
+	pci_apply_fixup_final_quirks = true;
+	for_each_pci_dev(dev) {
+		pci_fixup_device(pci_fixup_final, dev);
+		/*
+		 * If arch hasn't set it explicitly yet, use the CLS
+		 * value shared by all PCI devices.  If there's a
+		 * mismatch, fall back to the default value.
+		 */
+		if (!pci_cache_line_size) {
+			pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &tmp);
+			if (!cls)
+				cls = tmp;
+			if (!tmp || cls == tmp)
+				continue;
+
+			pci_info(dev, "CLS mismatch (%u != %u), using %u bytes\n",
+			         cls << 2, tmp << 2,
+				 pci_dfl_cache_line_size << 2);
+			pci_cache_line_size = pci_dfl_cache_line_size;
+		}
+	}
+
+	if (!pci_cache_line_size) {
+		pr_info("PCI: CLS %u bytes, default %u\n", cls << 2,
+			pci_dfl_cache_line_size << 2);
+		pci_cache_line_size = cls ? cls : pci_dfl_cache_line_size;
+	}
+
+	return 0;
+}
+fs_initcall_sync(pci_apply_final_quirks);
+
+/*
+ * Decoding should be disabled for a PCI device during BAR sizing to avoid
+ * conflict. But doing so may cause problems on host bridge and perhaps other
+ * key system devices. For devices that need to have mmio decoding always-on,
+ * we need to set the dev->mmio_always_on bit.
+ */
+static void quirk_mmio_always_on(struct pci_dev *dev)
+{
+	dev->mmio_always_on = 1;
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID,
+				PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on);
+
+/*
+ * The Mellanox Tavor device gives false positive parity errors.  Disable
+ * parity error reporting.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR, pci_disable_parity);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE, pci_disable_parity);
+
+/*
+ * Deal with broken BIOSes that neglect to enable passive release,
+ * which can cause problems in combination with the 82441FX/PPro MTRRs
+ */
+static void quirk_passive_release(struct pci_dev *dev)
+{
+	struct pci_dev *d = NULL;
+	unsigned char dlc;
+
+	/*
+	 * We have to make sure a particular bit is set in the PIIX3
+	 * ISA bridge, so we have to go out and find it.
+	 */
+	while ((d = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) {
+		pci_read_config_byte(d, 0x82, &dlc);
+		if (!(dlc & 1<<1)) {
+			pci_info(d, "PIIX3: Enabling Passive Release\n");
+			dlc |= 1<<1;
+			pci_write_config_byte(d, 0x82, dlc);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_passive_release);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_passive_release);
+
+#ifdef CONFIG_X86_32
+/*
+ * The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a
+ * workaround but VIA don't answer queries. If you happen to have good
+ * contacts at VIA ask them for me please -- Alan
+ *
+ * This appears to be BIOS not version dependent. So presumably there is a
+ * chipset level fix.
+ */
+static void quirk_isa_dma_hangs(struct pci_dev *dev)
+{
+	if (!isa_dma_bridge_buggy) {
+		isa_dma_bridge_buggy = 1;
+		pci_info(dev, "Activating ISA DMA hang workarounds\n");
+	}
+}
+/*
+ * It's not totally clear which chipsets are the problematic ones.  We know
+ * 82C586 and 82C596 variants are affected.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_0,	quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C596,	quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371SB_0,  quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1533,		quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_1,	quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_2,	quirk_isa_dma_hangs);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,	PCI_DEVICE_ID_NEC_CBUS_3,	quirk_isa_dma_hangs);
+#endif
+
+#ifdef CONFIG_HAS_IOPORT
+/*
+ * Intel NM10 "Tiger Point" LPC PM1a_STS.BM_STS must be clear
+ * for some HT machines to use C4 w/o hanging.
+ */
+static void quirk_tigerpoint_bm_sts(struct pci_dev *dev)
+{
+	u32 pmbase;
+	u16 pm1a;
+
+	pci_read_config_dword(dev, 0x40, &pmbase);
+	pmbase = pmbase & 0xff80;
+	pm1a = inw(pmbase);
+
+	if (pm1a & 0x10) {
+		pci_info(dev, FW_BUG "Tiger Point LPC.BM_STS cleared\n");
+		outw(0x10, pmbase);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGP_LPC, quirk_tigerpoint_bm_sts);
+#endif
+
+/* Chipsets where PCI->PCI transfers vanish or hang */
+static void quirk_nopcipci(struct pci_dev *dev)
+{
+	if ((pci_pci_problems & PCIPCI_FAIL) == 0) {
+		pci_info(dev, "Disabling direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_FAIL;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5597,		quirk_nopcipci);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_496,		quirk_nopcipci);
+
+static void quirk_nopciamd(struct pci_dev *dev)
+{
+	u8 rev;
+	pci_read_config_byte(dev, 0x08, &rev);
+	if (rev == 0x13) {
+		/* Erratum 24 */
+		pci_info(dev, "Chipset erratum: Disabling direct PCI/AGP transfers\n");
+		pci_pci_problems |= PCIAGP_FAIL;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8151_0,	quirk_nopciamd);
+
+/* Triton requires workarounds to be used by the drivers */
+static void quirk_triton(struct pci_dev *dev)
+{
+	if ((pci_pci_problems&PCIPCI_TRITON) == 0) {
+		pci_info(dev, "Limiting direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_TRITON;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82437,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82437VX,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82439,	quirk_triton);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82439TX,	quirk_triton);
+
+/*
+ * VIA Apollo KT133 needs PCI latency patch
+ * Made according to a Windows driver-based patch by George E. Breese;
+ * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
+ * Also see http://www.au-ja.org/review-kt133a-1-en.phtml for the info on
+ * which Mr Breese based his work.
+ *
+ * Updated based on further information from the site and also on
+ * information provided by VIA
+ */
+static void quirk_vialatency(struct pci_dev *dev)
+{
+	struct pci_dev *p;
+	u8 busarb;
+
+	/*
+	 * Ok, we have a potential problem chipset here. Now see if we have
+	 * a buggy southbridge.
+	 */
+	p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
+	if (p != NULL) {
+
+		/*
+		 * 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A;
+		 * thanks Dan Hollis.
+		 * Check for buggy part revisions
+		 */
+		if (p->revision < 0x40 || p->revision > 0x42)
+			goto exit;
+	} else {
+		p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+		if (p == NULL)	/* No problem parts */
+			goto exit;
+
+		/* Check for buggy part revisions */
+		if (p->revision < 0x10 || p->revision > 0x12)
+			goto exit;
+	}
+
+	/*
+	 * Ok we have the problem. Now set the PCI master grant to occur
+	 * every master grant. The apparent bug is that under high PCI load
+	 * (quite common in Linux of course) you can get data loss when the
+	 * CPU is held off the bus for 3 bus master requests.  This happens
+	 * to include the IDE controllers....
+	 *
+	 * VIA only apply this fix when an SB Live! is present but under
+	 * both Linux and Windows this isn't enough, and we have seen
+	 * corruption without SB Live! but with things like 3 UDMA IDE
+	 * controllers. So we ignore that bit of the VIA recommendation..
+	 */
+	pci_read_config_byte(dev, 0x76, &busarb);
+
+	/*
+	 * Set bit 4 and bit 5 of byte 76 to 0x01
+	 * "Master priority rotation on every PCI master grant"
+	 */
+	busarb &= ~(1<<5);
+	busarb |= (1<<4);
+	pci_write_config_byte(dev, 0x76, busarb);
+	pci_info(dev, "Applying VIA southbridge workaround\n");
+exit:
+	pci_dev_put(p);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8363_0,	quirk_vialatency);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8371_1,	quirk_vialatency);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8361,		quirk_vialatency);
+/* Must restore this on a resume from RAM */
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8363_0,	quirk_vialatency);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8371_1,	quirk_vialatency);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8361,		quirk_vialatency);
+
+/* VIA Apollo VP3 needs ETBF on BT848/878 */
+static void quirk_viaetbf(struct pci_dev *dev)
+{
+	if ((pci_pci_problems&PCIPCI_VIAETBF) == 0) {
+		pci_info(dev, "Limiting direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_VIAETBF;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_viaetbf);
+
+static void quirk_vsfx(struct pci_dev *dev)
+{
+	if ((pci_pci_problems&PCIPCI_VSFX) == 0) {
+		pci_info(dev, "Limiting direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_VSFX;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C576,	quirk_vsfx);
+
+/*
+ * ALi Magik requires workarounds to be used by the drivers that DMA to AGP
+ * space. Latency must be set to 0xA and Triton workaround applied too.
+ * [Info kindly provided by ALi]
+ */
+static void quirk_alimagik(struct pci_dev *dev)
+{
+	if ((pci_pci_problems&PCIPCI_ALIMAGIK) == 0) {
+		pci_info(dev, "Limiting direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_ALIMAGIK|PCIPCI_TRITON;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1647,		quirk_alimagik);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M1651,		quirk_alimagik);
+
+/* Natoma has some interesting boundary conditions with Zoran stuff at least */
+static void quirk_natoma(struct pci_dev *dev)
+{
+	if ((pci_pci_problems&PCIPCI_NATOMA) == 0) {
+		pci_info(dev, "Limiting direct PCI/PCI transfers\n");
+		pci_pci_problems |= PCIPCI_NATOMA;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443LX_0,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443LX_1,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_0,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_1,	quirk_natoma);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443BX_2,	quirk_natoma);
+
+/*
+ * This chip can cause PCI parity errors if config register 0xA0 is read
+ * while DMAs are occurring.
+ */
+static void quirk_citrine(struct pci_dev *dev)
+{
+	dev->cfg_size = 0xA0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM,	PCI_DEVICE_ID_IBM_CITRINE,	quirk_citrine);
+
+/*
+ * This chip can cause bus lockups if config addresses above 0x600
+ * are read or written.
+ */
+static void quirk_nfp6000(struct pci_dev *dev)
+{
+	dev->cfg_size = 0x600;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP4000,	quirk_nfp6000);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000,	quirk_nfp6000);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP5000,	quirk_nfp6000);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000_VF,	quirk_nfp6000);
+
+/*  On IBM Crocodile ipr SAS adapters, expand BAR to system page size */
+static void quirk_extend_bar_to_page(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) {
+			r->end = PAGE_SIZE - 1;
+			r->start = 0;
+			r->flags |= IORESOURCE_UNSET;
+			pci_info(dev, "expanded BAR %d to page size: %pR\n",
+				 i, r);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, 0x034a, quirk_extend_bar_to_page);
+
+/*
+ * S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
+ * If it's needed, re-allocate the region.
+ */
+static void quirk_s3_64M(struct pci_dev *dev)
+{
+	struct resource *r = &dev->resource[0];
+
+	if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) {
+		r->flags |= IORESOURCE_UNSET;
+		r->start = 0;
+		r->end = 0x3ffffff;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_868,		quirk_s3_64M);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_968,		quirk_s3_64M);
+
+static void quirk_io(struct pci_dev *dev, int pos, unsigned int size,
+		     const char *name)
+{
+	u32 region;
+	struct pci_bus_region bus_region;
+	struct resource *res = dev->resource + pos;
+
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0 + (pos << 2), &region);
+
+	if (!region)
+		return;
+
+	res->name = pci_name(dev);
+	res->flags = region & ~PCI_BASE_ADDRESS_IO_MASK;
+	res->flags |=
+		(IORESOURCE_IO | IORESOURCE_PCI_FIXED | IORESOURCE_SIZEALIGN);
+	region &= ~(size - 1);
+
+	/* Convert from PCI bus to resource space */
+	bus_region.start = region;
+	bus_region.end = region + size - 1;
+	pcibios_bus_to_resource(dev->bus, res, &bus_region);
+
+	pci_info(dev, FW_BUG "%s quirk: reg 0x%x: %pR\n",
+		 name, PCI_BASE_ADDRESS_0 + (pos << 2), res);
+}
+
+/*
+ * Some CS5536 BIOSes (for example, the Soekris NET5501 board w/ comBIOS
+ * ver. 1.33  20070103) don't set the correct ISA PCI region header info.
+ * BAR0 should be 8 bytes; instead, it may be set to something like 8k
+ * (which conflicts w/ BAR1's memory range).
+ *
+ * CS553x's ISA PCI BARs may also be read-only (ref:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=85991 - Comment #4 forward).
+ */
+static void quirk_cs5536_vsa(struct pci_dev *dev)
+{
+	static char *name = "CS5536 ISA bridge";
+
+	if (pci_resource_len(dev, 0) != 8) {
+		quirk_io(dev, 0,   8, name);	/* SMB */
+		quirk_io(dev, 1, 256, name);	/* GPIO */
+		quirk_io(dev, 2,  64, name);	/* MFGPT */
+		pci_info(dev, "%s bug detected (incorrect header); workaround applied\n",
+			 name);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, quirk_cs5536_vsa);
+
+static void quirk_io_region(struct pci_dev *dev, int port,
+			    unsigned int size, int nr, const char *name)
+{
+	u16 region;
+	struct pci_bus_region bus_region;
+	struct resource *res = dev->resource + nr;
+
+	pci_read_config_word(dev, port, &region);
+	region &= ~(size - 1);
+
+	if (!region)
+		return;
+
+	res->name = pci_name(dev);
+	res->flags = IORESOURCE_IO;
+
+	/* Convert from PCI bus to resource space */
+	bus_region.start = region;
+	bus_region.end = region + size - 1;
+	pcibios_bus_to_resource(dev->bus, res, &bus_region);
+
+	if (!pci_claim_resource(dev, nr))
+		pci_info(dev, "quirk: %pR claimed by %s\n", res, name);
+}
+
+/*
+ * ATI Northbridge setups MCE the processor if you even read somewhere
+ * between 0x3b0->0x3bb or read 0x3d3
+ */
+static void quirk_ati_exploding_mce(struct pci_dev *dev)
+{
+	pci_info(dev, "ATI Northbridge, reserving I/O ports 0x3b0 to 0x3bb\n");
+	/* Mae rhaid i ni beidio ag edrych ar y lleoliadiau I/O hyn */
+	request_region(0x3b0, 0x0C, "RadeonIGP");
+	request_region(0x3d3, 0x01, "RadeonIGP");
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI,	PCI_DEVICE_ID_ATI_RS100,   quirk_ati_exploding_mce);
+
+/*
+ * In the AMD NL platform, this device ([1022:7912]) has a class code of
+ * PCI_CLASS_SERIAL_USB_XHCI (0x0c0330), which means the xhci driver will
+ * claim it. The same applies on the VanGogh platform device ([1022:163a]).
+ *
+ * But the dwc3 driver is a more specific driver for this device, and we'd
+ * prefer to use it instead of xhci. To prevent xhci from claiming the
+ * device, change the class code to 0x0c03fe, which the PCI r3.0 spec
+ * defines as "USB device (not host controller)". The dwc3 driver can then
+ * claim it based on its Vendor and Device ID.
+ */
+static void quirk_amd_dwc_class(struct pci_dev *pdev)
+{
+	u32 class = pdev->class;
+
+	/* Use "USB Device (not host controller)" class */
+	pdev->class = PCI_CLASS_SERIAL_USB_DEVICE;
+	pci_info(pdev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n",
+		 class, pdev->class);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB,
+		quirk_amd_dwc_class);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VANGOGH_USB,
+		quirk_amd_dwc_class);
+
+/*
+ * Synopsys USB 3.x host HAPS platform has a class code of
+ * PCI_CLASS_SERIAL_USB_XHCI, and xhci driver can claim it.  However, these
+ * devices should use dwc3-haps driver.  Change these devices' class code to
+ * PCI_CLASS_SERIAL_USB_DEVICE to prevent the xhci-pci driver from claiming
+ * them.
+ */
+static void quirk_synopsys_haps(struct pci_dev *pdev)
+{
+	u32 class = pdev->class;
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3:
+	case PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI:
+	case PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31:
+		pdev->class = PCI_CLASS_SERIAL_USB_DEVICE;
+		pci_info(pdev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n",
+			 class, pdev->class);
+		break;
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID,
+			       PCI_CLASS_SERIAL_USB_XHCI, 0,
+			       quirk_synopsys_haps);
+
+/*
+ * Let's make the southbridge information explicit instead of having to
+ * worry about people probing the ACPI areas, for example.. (Yes, it
+ * happens, and if you read the wrong ACPI register it will put the machine
+ * to sleep with no way of waking it up again. Bummer).
+ *
+ * ALI M7101: Two IO regions pointed to by words at
+ *	0xE0 (64 bytes of ACPI registers)
+ *	0xE2 (32 bytes of SMB registers)
+ */
+static void quirk_ali7101_acpi(struct pci_dev *dev)
+{
+	quirk_io_region(dev, 0xE0, 64, PCI_BRIDGE_RESOURCES, "ali7101 ACPI");
+	quirk_io_region(dev, 0xE2, 32, PCI_BRIDGE_RESOURCES+1, "ali7101 SMB");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M7101,		quirk_ali7101_acpi);
+
+static void piix4_io_quirk(struct pci_dev *dev, const char *name, unsigned int port, unsigned int enable)
+{
+	u32 devres;
+	u32 mask, size, base;
+
+	pci_read_config_dword(dev, port, &devres);
+	if ((devres & enable) != enable)
+		return;
+	mask = (devres >> 16) & 15;
+	base = devres & 0xffff;
+	size = 16;
+	for (;;) {
+		unsigned int bit = size >> 1;
+		if ((bit & mask) == bit)
+			break;
+		size = bit;
+	}
+	/*
+	 * For now we only print it out. Eventually we'll want to
+	 * reserve it (at least if it's in the 0x1000+ range), but
+	 * let's get enough confirmation reports first.
+	 */
+	base &= -size;
+	pci_info(dev, "%s PIO at %04x-%04x\n", name, base, base + size - 1);
+}
+
+static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int port, unsigned int enable)
+{
+	u32 devres;
+	u32 mask, size, base;
+
+	pci_read_config_dword(dev, port, &devres);
+	if ((devres & enable) != enable)
+		return;
+	base = devres & 0xffff0000;
+	mask = (devres & 0x3f) << 16;
+	size = 128 << 16;
+	for (;;) {
+		unsigned int bit = size >> 1;
+		if ((bit & mask) == bit)
+			break;
+		size = bit;
+	}
+
+	/*
+	 * For now we only print it out. Eventually we'll want to
+	 * reserve it, but let's get enough confirmation reports first.
+	 */
+	base &= -size;
+	pci_info(dev, "%s MMIO at %04x-%04x\n", name, base, base + size - 1);
+}
+
+/*
+ * PIIX4 ACPI: Two IO regions pointed to by longwords at
+ *	0x40 (64 bytes of ACPI registers)
+ *	0x90 (16 bytes of SMB registers)
+ * and a few strange programmable PIIX4 device resources.
+ */
+static void quirk_piix4_acpi(struct pci_dev *dev)
+{
+	u32 res_a;
+
+	quirk_io_region(dev, 0x40, 64, PCI_BRIDGE_RESOURCES, "PIIX4 ACPI");
+	quirk_io_region(dev, 0x90, 16, PCI_BRIDGE_RESOURCES+1, "PIIX4 SMB");
+
+	/* Device resource A has enables for some of the other ones */
+	pci_read_config_dword(dev, 0x5c, &res_a);
+
+	piix4_io_quirk(dev, "PIIX4 devres B", 0x60, 3 << 21);
+	piix4_io_quirk(dev, "PIIX4 devres C", 0x64, 3 << 21);
+
+	/* Device resource D is just bitfields for static resources */
+
+	/* Device 12 enabled? */
+	if (res_a & (1 << 29)) {
+		piix4_io_quirk(dev, "PIIX4 devres E", 0x68, 1 << 20);
+		piix4_mem_quirk(dev, "PIIX4 devres F", 0x6c, 1 << 7);
+	}
+	/* Device 13 enabled? */
+	if (res_a & (1 << 30)) {
+		piix4_io_quirk(dev, "PIIX4 devres G", 0x70, 1 << 20);
+		piix4_mem_quirk(dev, "PIIX4 devres H", 0x74, 1 << 7);
+	}
+	piix4_io_quirk(dev, "PIIX4 devres I", 0x78, 1 << 20);
+	piix4_io_quirk(dev, "PIIX4 devres J", 0x7c, 1 << 20);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82371AB_3,	quirk_piix4_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82443MX_3,	quirk_piix4_acpi);
+
+#define ICH_PMBASE	0x40
+#define ICH_ACPI_CNTL	0x44
+#define  ICH4_ACPI_EN	0x10
+#define  ICH6_ACPI_EN	0x80
+#define ICH4_GPIOBASE	0x58
+#define ICH4_GPIO_CNTL	0x5c
+#define  ICH4_GPIO_EN	0x10
+#define ICH6_GPIOBASE	0x48
+#define ICH6_GPIO_CNTL	0x4c
+#define  ICH6_GPIO_EN	0x10
+
+/*
+ * ICH4, ICH4-M, ICH5, ICH5-M ACPI: Three IO regions pointed to by longwords at
+ *	0x40 (128 bytes of ACPI, GPIO & TCO registers)
+ *	0x58 (64 bytes of GPIO I/O space)
+ */
+static void quirk_ich4_lpc_acpi(struct pci_dev *dev)
+{
+	u8 enable;
+
+	/*
+	 * The check for PCIBIOS_MIN_IO is to ensure we won't create a conflict
+	 * with low legacy (and fixed) ports. We don't know the decoding
+	 * priority and can't tell whether the legacy device or the one created
+	 * here is really at that address.  This happens on boards with broken
+	 * BIOSes.
+	 */
+	pci_read_config_byte(dev, ICH_ACPI_CNTL, &enable);
+	if (enable & ICH4_ACPI_EN)
+		quirk_io_region(dev, ICH_PMBASE, 128, PCI_BRIDGE_RESOURCES,
+				 "ICH4 ACPI/GPIO/TCO");
+
+	pci_read_config_byte(dev, ICH4_GPIO_CNTL, &enable);
+	if (enable & ICH4_GPIO_EN)
+		quirk_io_region(dev, ICH4_GPIOBASE, 64, PCI_BRIDGE_RESOURCES+1,
+				"ICH4 GPIO");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801AA_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801AB_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801BA_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801BA_10,	quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801CA_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801CA_12,	quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801DB_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801DB_12,	quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801EB_0,		quirk_ich4_lpc_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_ESB_1,		quirk_ich4_lpc_acpi);
+
+static void ich6_lpc_acpi_gpio(struct pci_dev *dev)
+{
+	u8 enable;
+
+	pci_read_config_byte(dev, ICH_ACPI_CNTL, &enable);
+	if (enable & ICH6_ACPI_EN)
+		quirk_io_region(dev, ICH_PMBASE, 128, PCI_BRIDGE_RESOURCES,
+				 "ICH6 ACPI/GPIO/TCO");
+
+	pci_read_config_byte(dev, ICH6_GPIO_CNTL, &enable);
+	if (enable & ICH6_GPIO_EN)
+		quirk_io_region(dev, ICH6_GPIOBASE, 64, PCI_BRIDGE_RESOURCES+1,
+				"ICH6 GPIO");
+}
+
+static void ich6_lpc_generic_decode(struct pci_dev *dev, unsigned int reg,
+				    const char *name, int dynsize)
+{
+	u32 val;
+	u32 size, base;
+
+	pci_read_config_dword(dev, reg, &val);
+
+	/* Enabled? */
+	if (!(val & 1))
+		return;
+	base = val & 0xfffc;
+	if (dynsize) {
+		/*
+		 * This is not correct. It is 16, 32 or 64 bytes depending on
+		 * register D31:F0:ADh bits 5:4.
+		 *
+		 * But this gets us at least _part_ of it.
+		 */
+		size = 16;
+	} else {
+		size = 128;
+	}
+	base &= ~(size-1);
+
+	/*
+	 * Just print it out for now. We should reserve it after more
+	 * debugging.
+	 */
+	pci_info(dev, "%s PIO at %04x-%04x\n", name, base, base+size-1);
+}
+
+static void quirk_ich6_lpc(struct pci_dev *dev)
+{
+	/* Shared ACPI/GPIO decode with all ICH6+ */
+	ich6_lpc_acpi_gpio(dev);
+
+	/* ICH6-specific generic IO decode */
+	ich6_lpc_generic_decode(dev, 0x84, "LPC Generic IO decode 1", 0);
+	ich6_lpc_generic_decode(dev, 0x88, "LPC Generic IO decode 2", 1);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc);
+
+static void ich7_lpc_generic_decode(struct pci_dev *dev, unsigned int reg,
+				    const char *name)
+{
+	u32 val;
+	u32 mask, base;
+
+	pci_read_config_dword(dev, reg, &val);
+
+	/* Enabled? */
+	if (!(val & 1))
+		return;
+
+	/* IO base in bits 15:2, mask in bits 23:18, both are dword-based */
+	base = val & 0xfffc;
+	mask = (val >> 16) & 0xfc;
+	mask |= 3;
+
+	/*
+	 * Just print it out for now. We should reserve it after more
+	 * debugging.
+	 */
+	pci_info(dev, "%s PIO at %04x (mask %04x)\n", name, base, mask);
+}
+
+/* ICH7-10 has the same common LPC generic IO decode registers */
+static void quirk_ich7_lpc(struct pci_dev *dev)
+{
+	/* We share the common ACPI/GPIO decode with ICH6 */
+	ich6_lpc_acpi_gpio(dev);
+
+	/* And have 4 ICH7+ generic decodes */
+	ich7_lpc_generic_decode(dev, 0x84, "ICH7 LPC Generic IO decode 1");
+	ich7_lpc_generic_decode(dev, 0x88, "ICH7 LPC Generic IO decode 2");
+	ich7_lpc_generic_decode(dev, 0x8c, "ICH7 LPC Generic IO decode 3");
+	ich7_lpc_generic_decode(dev, 0x90, "ICH7 LPC Generic IO decode 4");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH7_0, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH7_1, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH7_31, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH8_0, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH8_2, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH8_3, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH8_1, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH8_4, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH9_2, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH9_4, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH9_7, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH9_8, quirk_ich7_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_ICH10_1, quirk_ich7_lpc);
+
+/*
+ * VIA ACPI: One IO region pointed to by longword at
+ *	0x48 or 0x20 (256 bytes of ACPI registers)
+ */
+static void quirk_vt82c586_acpi(struct pci_dev *dev)
+{
+	if (dev->revision & 0x10)
+		quirk_io_region(dev, 0x48, 256, PCI_BRIDGE_RESOURCES,
+				"vt82c586 ACPI");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_3,	quirk_vt82c586_acpi);
+
+/*
+ * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at
+ *	0x48 (256 bytes of ACPI registers)
+ *	0x70 (128 bytes of hardware monitoring register)
+ *	0x90 (16 bytes of SMB registers)
+ */
+static void quirk_vt82c686_acpi(struct pci_dev *dev)
+{
+	quirk_vt82c586_acpi(dev);
+
+	quirk_io_region(dev, 0x70, 128, PCI_BRIDGE_RESOURCES+1,
+				 "vt82c686 HW-mon");
+
+	quirk_io_region(dev, 0x90, 16, PCI_BRIDGE_RESOURCES+2, "vt82c686 SMB");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_4,	quirk_vt82c686_acpi);
+
+/*
+ * VIA VT8235 ISA Bridge: Two IO regions pointed to by words at
+ *	0x88 (128 bytes of power management registers)
+ *	0xd0 (16 bytes of SMB registers)
+ */
+static void quirk_vt8235_acpi(struct pci_dev *dev)
+{
+	quirk_io_region(dev, 0x88, 128, PCI_BRIDGE_RESOURCES, "vt8235 PM");
+	quirk_io_region(dev, 0xd0, 16, PCI_BRIDGE_RESOURCES+1, "vt8235 SMB");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8235,	quirk_vt8235_acpi);
+
+/*
+ * TI XIO2000a PCIe-PCI Bridge erroneously reports it supports fast
+ * back-to-back: Disable fast back-to-back on the secondary bus segment
+ */
+static void quirk_xio2000a(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+	u16 command;
+
+	pci_warn(dev, "TI XIO2000a quirk detected; secondary bus fast back-to-back transfers disabled\n");
+	list_for_each_entry(pdev, &dev->subordinate->devices, bus_list) {
+		pci_read_config_word(pdev, PCI_COMMAND, &command);
+		if (command & PCI_COMMAND_FAST_BACK)
+			pci_write_config_word(pdev, PCI_COMMAND, command & ~PCI_COMMAND_FAST_BACK);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A,
+			quirk_xio2000a);
+
+#ifdef CONFIG_X86_IO_APIC
+
+#include <asm/io_apic.h>
+
+/*
+ * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip
+ * devices to the external APIC.
+ *
+ * TODO: When we have device-specific interrupt routers, this code will go
+ * away from quirks.
+ */
+static void quirk_via_ioapic(struct pci_dev *dev)
+{
+	u8 tmp;
+
+	if (nr_ioapics < 1)
+		tmp = 0;    /* nothing routed to external APIC */
+	else
+		tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
+
+	pci_info(dev, "%s VIA external APIC routing\n",
+		 tmp ? "Enabling" : "Disabling");
+
+	/* Offset 0x58: External APIC IRQ output control */
+	pci_write_config_byte(dev, 0x58, tmp);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic);
+
+/*
+ * VIA 8237: Some BIOSes don't set the 'Bypass APIC De-Assert Message' Bit.
+ * This leads to doubled level interrupt rates.
+ * Set this bit to get rid of cycle wastage.
+ * Otherwise uncritical.
+ */
+static void quirk_via_vt8237_bypass_apic_deassert(struct pci_dev *dev)
+{
+	u8 misc_control2;
+#define BYPASS_APIC_DEASSERT 8
+
+	pci_read_config_byte(dev, 0x5B, &misc_control2);
+	if (!(misc_control2 & BYPASS_APIC_DEASSERT)) {
+		pci_info(dev, "Bypassing VIA 8237 APIC De-Assert Message\n");
+		pci_write_config_byte(dev, 0x5B, misc_control2|BYPASS_APIC_DEASSERT);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237,		quirk_via_vt8237_bypass_apic_deassert);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237,		quirk_via_vt8237_bypass_apic_deassert);
+
+/*
+ * The AMD IO-APIC can hang the box when an APIC IRQ is masked.
+ * We check all revs >= B0 (yet not in the pre production!) as the bug
+ * is currently marked NoFix
+ *
+ * We have multiple reports of hangs with this chipset that went away with
+ * noapic specified. For the moment we assume it's the erratum. We may be wrong
+ * of course. However the advice is demonstrably good even if so.
+ */
+static void quirk_amd_ioapic(struct pci_dev *dev)
+{
+	if (dev->revision >= 0x02) {
+		pci_warn(dev, "I/O APIC: AMD Erratum #22 may be present. In the event of instability try\n");
+		pci_warn(dev, "        : booting with the \"noapic\" option\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_VIPER_7410,	quirk_amd_ioapic);
+#endif /* CONFIG_X86_IO_APIC */
+
+#if defined(CONFIG_ARM64) && defined(CONFIG_PCI_ATS)
+
+static void quirk_cavium_sriov_rnm_link(struct pci_dev *dev)
+{
+	/* Fix for improper SR-IOV configuration on Cavium cn88xx RNM device */
+	if (dev->subsystem_device == 0xa118)
+		dev->sriov->link = dev->devfn;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CAVIUM, 0xa018, quirk_cavium_sriov_rnm_link);
+#endif
+
+/*
+ * Some settings of MMRBC can lead to data corruption so block changes.
+ * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide
+ */
+static void quirk_amd_8131_mmrbc(struct pci_dev *dev)
+{
+	if (dev->subordinate && dev->revision <= 0x12) {
+		pci_info(dev, "AMD8131 rev %x detected; disabling PCI-X MMRBC\n",
+			 dev->revision);
+		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MMRBC;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_mmrbc);
+
+/*
+ * FIXME: it is questionable that quirk_via_acpi() is needed.  It shows up
+ * as an ISA bridge, and does not support the PCI_INTERRUPT_LINE register
+ * at all.  Therefore it seems like setting the pci_dev's IRQ to the value
+ * of the ACPI SCI interrupt is only done for convenience.
+ *	-jgarzik
+ */
+static void quirk_via_acpi(struct pci_dev *d)
+{
+	u8 irq;
+
+	/* VIA ACPI device: SCI IRQ line in PCI config byte 0x42 */
+	pci_read_config_byte(d, 0x42, &irq);
+	irq &= 0xf;
+	if (irq && (irq != 2))
+		d->irq = irq;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_3,	quirk_via_acpi);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_4,	quirk_via_acpi);
+
+/* VIA bridges which have VLink */
+static int via_vlink_dev_lo = -1, via_vlink_dev_hi = 18;
+
+static void quirk_via_bridge(struct pci_dev *dev)
+{
+	/* See what bridge we have and find the device ranges */
+	switch (dev->device) {
+	case PCI_DEVICE_ID_VIA_82C686:
+		/*
+		 * The VT82C686 is special; it attaches to PCI and can have
+		 * any device number. All its subdevices are functions of
+		 * that single device.
+		 */
+		via_vlink_dev_lo = PCI_SLOT(dev->devfn);
+		via_vlink_dev_hi = PCI_SLOT(dev->devfn);
+		break;
+	case PCI_DEVICE_ID_VIA_8237:
+	case PCI_DEVICE_ID_VIA_8237A:
+		via_vlink_dev_lo = 15;
+		break;
+	case PCI_DEVICE_ID_VIA_8235:
+		via_vlink_dev_lo = 16;
+		break;
+	case PCI_DEVICE_ID_VIA_8231:
+	case PCI_DEVICE_ID_VIA_8233_0:
+	case PCI_DEVICE_ID_VIA_8233A:
+	case PCI_DEVICE_ID_VIA_8233C_0:
+		via_vlink_dev_lo = 17;
+		break;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8231,		quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8233_0,	quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8233A,	quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8233C_0,	quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8235,		quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237,		quirk_via_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237A,	quirk_via_bridge);
+
+/*
+ * quirk_via_vlink		-	VIA VLink IRQ number update
+ * @dev: PCI device
+ *
+ * If the device we are dealing with is on a PIC IRQ we need to ensure that
+ * the IRQ line register which usually is not relevant for PCI cards, is
+ * actually written so that interrupts get sent to the right place.
+ *
+ * We only do this on systems where a VIA south bridge was detected, and
+ * only for VIA devices on the motherboard (see quirk_via_bridge above).
+ */
+static void quirk_via_vlink(struct pci_dev *dev)
+{
+	u8 irq, new_irq;
+
+	/* Check if we have VLink at all */
+	if (via_vlink_dev_lo == -1)
+		return;
+
+	new_irq = dev->irq;
+
+	/* Don't quirk interrupts outside the legacy IRQ range */
+	if (!new_irq || new_irq > 15)
+		return;
+
+	/* Internal device ? */
+	if (dev->bus->number != 0 || PCI_SLOT(dev->devfn) > via_vlink_dev_hi ||
+	    PCI_SLOT(dev->devfn) < via_vlink_dev_lo)
+		return;
+
+	/*
+	 * This is an internal VLink device on a PIC interrupt. The BIOS
+	 * ought to have set this but may not have, so we redo it.
+	 */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	if (new_irq != irq) {
+		pci_info(dev, "VIA VLink IRQ fixup, from %d to %d\n",
+			irq, new_irq);
+		udelay(15);	/* unknown if delay really needed */
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
+	}
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_via_vlink);
+
+/*
+ * VIA VT82C598 has its device ID settable and many BIOSes set it to the ID
+ * of VT82C597 for backward compatibility.  We need to switch it off to be
+ * able to recognize the real type of the chip.
+ */
+static void quirk_vt82c598_id(struct pci_dev *dev)
+{
+	pci_write_config_byte(dev, 0xfc, 0);
+	pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_vt82c598_id);
+
+/*
+ * CardBus controllers have a legacy base address that enables them to
+ * respond as i82365 pcmcia controllers.  We don't want them to do this
+ * even if the Linux CardBus driver is not loaded, because the Linux i82365
+ * driver does not (and should not) handle CardBus.
+ */
+static void quirk_cardbus_legacy(struct pci_dev *dev)
+{
+	pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+			PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy);
+DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID,
+			PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy);
+
+/*
+ * Following the PCI ordering rules is optional on the AMD762. I'm not sure
+ * what the designers were smoking but let's not inhale...
+ *
+ * To be fair to AMD, it follows the spec by default, it's BIOS people who
+ * turn it off!
+ */
+static void quirk_amd_ordering(struct pci_dev *dev)
+{
+	u32 pcic;
+	pci_read_config_dword(dev, 0x4C, &pcic);
+	if ((pcic & 6) != 6) {
+		pcic |= 6;
+		pci_warn(dev, "BIOS failed to enable PCI standards compliance; fixing this error\n");
+		pci_write_config_dword(dev, 0x4C, pcic);
+		pci_read_config_dword(dev, 0x84, &pcic);
+		pcic |= (1 << 23);	/* Required in this mode */
+		pci_write_config_dword(dev, 0x84, pcic);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering);
+
+/*
+ * DreamWorks-provided workaround for Dunord I-3000 problem
+ *
+ * This card decodes and responds to addresses not apparently assigned to
+ * it.  We force a larger allocation to ensure that nothing gets put too
+ * close to it.
+ */
+static void quirk_dunord(struct pci_dev *dev)
+{
+	struct resource *r = &dev->resource[1];
+
+	r->flags |= IORESOURCE_UNSET;
+	r->start = 0;
+	r->end = 0xffffff;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DUNORD,	PCI_DEVICE_ID_DUNORD_I3000,	quirk_dunord);
+
+/*
+ * i82380FB mobile docking controller: its PCI-to-PCI bridge is subtractive
+ * decoding (transparent), and does indicate this in the ProgIf.
+ * Unfortunately, the ProgIf value is wrong - 0x80 instead of 0x01.
+ */
+static void quirk_transparent_bridge(struct pci_dev *dev)
+{
+	dev->transparent = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82380FB,	quirk_transparent_bridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA,	0x605,	quirk_transparent_bridge);
+
+/*
+ * Common misconfiguration of the MediaGX/Geode PCI master that will reduce
+ * PCI bandwidth from 70MB/s to 25MB/s.  See the GXM/GXLV/GX1 datasheets
+ * found at http://www.national.com/analog for info on what these bits do.
+ * <christer@weinigel.se>
+ */
+static void quirk_mediagx_master(struct pci_dev *dev)
+{
+	u8 reg;
+
+	pci_read_config_byte(dev, 0x41, &reg);
+	if (reg & 2) {
+		reg &= ~2;
+		pci_info(dev, "Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n",
+			 reg);
+		pci_write_config_byte(dev, 0x41, reg);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CYRIX,	PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX,	PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master);
+
+/*
+ * Ensure C0 rev restreaming is off. This is normally done by the BIOS but
+ * in the odd case it is not the results are corruption hence the presence
+ * of a Linux check.
+ */
+static void quirk_disable_pxb(struct pci_dev *pdev)
+{
+	u16 config;
+
+	if (pdev->revision != 0x04)		/* Only C0 requires this */
+		return;
+	pci_read_config_word(pdev, 0x40, &config);
+	if (config & (1<<6)) {
+		config &= ~(1<<6);
+		pci_write_config_word(pdev, 0x40, config);
+		pci_info(pdev, "C0 revision 450NX. Disabling PCI restreaming\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454NX,	quirk_disable_pxb);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454NX,	quirk_disable_pxb);
+
+static void quirk_amd_ide_mode(struct pci_dev *pdev)
+{
+	/* set SBX00/Hudson-2 SATA in IDE mode to AHCI mode */
+	u8 tmp;
+
+	pci_read_config_byte(pdev, PCI_CLASS_DEVICE, &tmp);
+	if (tmp == 0x01) {
+		pci_read_config_byte(pdev, 0x40, &tmp);
+		pci_write_config_byte(pdev, 0x40, tmp|1);
+		pci_write_config_byte(pdev, 0x9, 1);
+		pci_write_config_byte(pdev, 0xa, 6);
+		pci_write_config_byte(pdev, 0x40, tmp);
+
+		pdev->class = PCI_CLASS_STORAGE_SATA_AHCI;
+		pci_info(pdev, "set SATA to AHCI mode\n");
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x7900, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, 0x7900, quirk_amd_ide_mode);
+
+/* Serverworks CSB5 IDE does not fully support native mode */
+static void quirk_svwks_csb5ide(struct pci_dev *pdev)
+{
+	u8 prog;
+	pci_read_config_byte(pdev, PCI_CLASS_PROG, &prog);
+	if (prog & 5) {
+		prog &= ~5;
+		pdev->class &= ~5;
+		pci_write_config_byte(pdev, PCI_CLASS_PROG, prog);
+		/* PCI layer will sort out resources */
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, quirk_svwks_csb5ide);
+
+/* Intel 82801CAM ICH3-M datasheet says IDE modes must be the same */
+static void quirk_ide_samemode(struct pci_dev *pdev)
+{
+	u8 prog;
+
+	pci_read_config_byte(pdev, PCI_CLASS_PROG, &prog);
+
+	if (((prog & 1) && !(prog & 4)) || ((prog & 4) && !(prog & 1))) {
+		pci_info(pdev, "IDE mode mismatch; forcing legacy mode\n");
+		prog &= ~5;
+		pdev->class &= ~5;
+		pci_write_config_byte(pdev, PCI_CLASS_PROG, prog);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode);
+
+/* Some ATA devices break if put into D3 */
+static void quirk_no_ata_d3(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+}
+/* Quirk the legacy ATA devices only. The AHCI ones are ok */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID,
+				PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+				PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
+/* ALi loses some register settings that we cannot then restore */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID,
+				PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
+/* VIA comes back fine but we need to keep it alive or ACPI GTM failures
+   occur when mode detecting */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+				PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
+
+/*
+ * This was originally an Alpha-specific thing, but it really fits here.
+ * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
+ */
+static void quirk_eisa_bridge(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_EISA << 8;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82375,	quirk_eisa_bridge);
+
+/*
+ * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge
+ * is not activated. The myth is that Asus said that they do not want the
+ * users to be irritated by just another PCI Device in the Win98 device
+ * manager. (see the file prog/hotplug/README.p4b in the lm_sensors
+ * package 2.7.0 for details)
+ *
+ * The SMBus PCI Device can be activated by setting a bit in the ICH LPC
+ * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it
+ * becomes necessary to do this tweak in two steps -- the chosen trigger
+ * is either the Host bridge (preferred) or on-board VGA controller.
+ *
+ * Note that we used to unhide the SMBus that way on Toshiba laptops
+ * (Satellite A40 and Tecra M2) but then found that the thermal management
+ * was done by SMM code, which could cause unsynchronized concurrent
+ * accesses to the SMBus registers, with potentially bad effects. Thus you
+ * should be very careful when adding new entries: if SMM is accessing the
+ * Intel SMBus, this is a very good reason to leave it hidden.
+ *
+ * Likewise, many recent laptops use ACPI for thermal management. If the
+ * ACPI DSDT code accesses the SMBus, then Linux should not access it
+ * natively, and keeping the SMBus hidden is the right thing to do. If you
+ * are about to add an entry in the table below, please first disassemble
+ * the DSDT and double-check that there is no code accessing the SMBus.
+ */
+static int asus_hides_smbus;
+
+static void asus_hides_smbus_hostbridge(struct pci_dev *dev)
+{
+	if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK)) {
+		if (dev->device == PCI_DEVICE_ID_INTEL_82845_HB)
+			switch (dev->subsystem_device) {
+			case 0x8025: /* P4B-LX */
+			case 0x8070: /* P4B */
+			case 0x8088: /* P4B533 */
+			case 0x1626: /* L3C notebook */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_HB)
+			switch (dev->subsystem_device) {
+			case 0x80b1: /* P4GE-V */
+			case 0x80b2: /* P4PE */
+			case 0x8093: /* P4B533-V */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82850_HB)
+			switch (dev->subsystem_device) {
+			case 0x8030: /* P4T533 */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_7205_0)
+			switch (dev->subsystem_device) {
+			case 0x8070: /* P4G8X Deluxe */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_E7501_MCH)
+			switch (dev->subsystem_device) {
+			case 0x80c9: /* PU-DLS */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82855GM_HB)
+			switch (dev->subsystem_device) {
+			case 0x1751: /* M2N notebook */
+			case 0x1821: /* M5N notebook */
+			case 0x1897: /* A6L notebook */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82855PM_HB)
+			switch (dev->subsystem_device) {
+			case 0x184b: /* W1N notebook */
+			case 0x186a: /* M6Ne notebook */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82865_HB)
+			switch (dev->subsystem_device) {
+			case 0x80f2: /* P4P800-X */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB)
+			switch (dev->subsystem_device) {
+			case 0x1882: /* M6V notebook */
+			case 0x1977: /* A6VA notebook */
+				asus_hides_smbus = 1;
+			}
+	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+		if (dev->device ==  PCI_DEVICE_ID_INTEL_82855PM_HB)
+			switch (dev->subsystem_device) {
+			case 0x088C: /* HP Compaq nc8000 */
+			case 0x0890: /* HP Compaq nc6000 */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82865_HB)
+			switch (dev->subsystem_device) {
+			case 0x12bc: /* HP D330L */
+			case 0x12bd: /* HP D530 */
+			case 0x006a: /* HP Compaq nx9500 */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82875_HB)
+			switch (dev->subsystem_device) {
+			case 0x12bf: /* HP xw4100 */
+				asus_hides_smbus = 1;
+			}
+	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG)) {
+		if (dev->device ==  PCI_DEVICE_ID_INTEL_82855PM_HB)
+			switch (dev->subsystem_device) {
+			case 0xC00C: /* Samsung P35 notebook */
+				asus_hides_smbus = 1;
+		}
+	} else if (unlikely(dev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ)) {
+		if (dev->device == PCI_DEVICE_ID_INTEL_82855PM_HB)
+			switch (dev->subsystem_device) {
+			case 0x0058: /* Compaq Evo N620c */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82810_IG3)
+			switch (dev->subsystem_device) {
+			case 0xB16C: /* Compaq Deskpro EP 401963-001 (PCA# 010174) */
+				/* Motherboard doesn't have Host bridge
+				 * subvendor/subdevice IDs, therefore checking
+				 * its on-board VGA controller */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
+			switch (dev->subsystem_device) {
+			case 0x00b8: /* Compaq Evo D510 CMT */
+			case 0x00b9: /* Compaq Evo D510 SFF */
+			case 0x00ba: /* Compaq Evo D510 USDT */
+				/* Motherboard doesn't have Host bridge
+				 * subvendor/subdevice IDs and on-board VGA
+				 * controller is disabled if an AGP card is
+				 * inserted, therefore checking USB UHCI
+				 * Controller #1 */
+				asus_hides_smbus = 1;
+			}
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
+			switch (dev->subsystem_device) {
+			case 0x001A: /* Compaq Deskpro EN SSF P667 815E */
+				/* Motherboard doesn't have host bridge
+				 * subvendor/subdevice IDs, therefore checking
+				 * its on-board VGA controller */
+				asus_hides_smbus = 1;
+			}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82845_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82845G_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82850_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82865_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82875_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_7205_0,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7501_MCH,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82855PM_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82855GM_HB,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82810_IG3,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_2,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82815_CGC,	asus_hides_smbus_hostbridge);
+
+static void asus_hides_smbus_lpc(struct pci_dev *dev)
+{
+	u16 val;
+
+	if (likely(!asus_hides_smbus))
+		return;
+
+	pci_read_config_word(dev, 0xF2, &val);
+	if (val & 0x8) {
+		pci_write_config_word(dev, 0xF2, val & (~0x8));
+		pci_read_config_word(dev, 0xF2, &val);
+		if (val & 0x8)
+			pci_info(dev, "i801 SMBus device continues to play 'hide and seek'! 0x%x\n",
+				 val);
+		else
+			pci_info(dev, "Enabled i801 SMBus device\n");
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801AA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801BA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_12,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_12,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801EB_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801AA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801BA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_0,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_12,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_12,	asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801EB_0,	asus_hides_smbus_lpc);
+
+/* It appears we just have one such device. If not, we have a warning */
+static void __iomem *asus_rcba_base;
+static void asus_hides_smbus_lpc_ich6_suspend(struct pci_dev *dev)
+{
+	u32 rcba;
+
+	if (likely(!asus_hides_smbus))
+		return;
+	WARN_ON(asus_rcba_base);
+
+	pci_read_config_dword(dev, 0xF0, &rcba);
+	/* use bits 31:14, 16 kB aligned */
+	asus_rcba_base = ioremap(rcba & 0xFFFFC000, 0x4000);
+	if (asus_rcba_base == NULL)
+		return;
+}
+
+static void asus_hides_smbus_lpc_ich6_resume_early(struct pci_dev *dev)
+{
+	u32 val;
+
+	if (likely(!asus_hides_smbus || !asus_rcba_base))
+		return;
+
+	/* read the Function Disable register, dword mode only */
+	val = readl(asus_rcba_base + 0x3418);
+
+	/* enable the SMBus device */
+	writel(val & 0xFFFFFFF7, asus_rcba_base + 0x3418);
+}
+
+static void asus_hides_smbus_lpc_ich6_resume(struct pci_dev *dev)
+{
+	if (likely(!asus_hides_smbus || !asus_rcba_base))
+		return;
+
+	iounmap(asus_rcba_base);
+	asus_rcba_base = NULL;
+	pci_info(dev, "Enabled ICH6/i801 SMBus device\n");
+}
+
+static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev)
+{
+	asus_hides_smbus_lpc_ich6_suspend(dev);
+	asus_hides_smbus_lpc_ich6_resume_early(dev);
+	asus_hides_smbus_lpc_ich6_resume(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6);
+DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6_suspend);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6_resume);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc_ich6_resume_early);
+
+/* SiS 96x south bridge: BIOS typically hides SMBus device...  */
+static void quirk_sis_96x_smbus(struct pci_dev *dev)
+{
+	u8 val = 0;
+	pci_read_config_byte(dev, 0x77, &val);
+	if (val & 0x10) {
+		pci_info(dev, "Enabling SiS 96x SMBus\n");
+		pci_write_config_byte(dev, 0x77, val & ~0x10);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_961,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_962,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_963,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_LPC,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_961,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_962,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_963,		quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_LPC,		quirk_sis_96x_smbus);
+
+/*
+ * ... This is further complicated by the fact that some SiS96x south
+ * bridges pretend to be 85C503/5513 instead.  In that case see if we
+ * spotted a compatible north bridge to make sure.
+ * (pci_find_device() doesn't work yet)
+ *
+ * We can also enable the sis96x bit in the discovery register..
+ */
+#define SIS_DETECT_REGISTER 0x40
+
+static void quirk_sis_503(struct pci_dev *dev)
+{
+	u8 reg;
+	u16 devid;
+
+	pci_read_config_byte(dev, SIS_DETECT_REGISTER, &reg);
+	pci_write_config_byte(dev, SIS_DETECT_REGISTER, reg | (1 << 6));
+	pci_read_config_word(dev, PCI_DEVICE_ID, &devid);
+	if (((devid & 0xfff0) != 0x0960) && (devid != 0x0018)) {
+		pci_write_config_byte(dev, SIS_DETECT_REGISTER, reg);
+		return;
+	}
+
+	/*
+	 * Ok, it now shows up as a 96x.  Run the 96x quirk by hand in case
+	 * it has already been processed.  (Depends on link order, which is
+	 * apparently not guaranteed)
+	 */
+	dev->device = devid;
+	quirk_sis_96x_smbus(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_503,		quirk_sis_503);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_503,		quirk_sis_503);
+
+/*
+ * On ASUS A8V and A8V Deluxe boards, the onboard AC97 audio controller
+ * and MC97 modem controller are disabled when a second PCI soundcard is
+ * present. This patch, tweaking the VT8237 ISA bridge, enables them.
+ * -- bjd
+ */
+static void asus_hides_ac97_lpc(struct pci_dev *dev)
+{
+	u8 val;
+	int asus_hides_ac97 = 0;
+
+	if (likely(dev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK)) {
+		if (dev->device == PCI_DEVICE_ID_VIA_8237)
+			asus_hides_ac97 = 1;
+	}
+
+	if (!asus_hides_ac97)
+		return;
+
+	pci_read_config_byte(dev, 0x50, &val);
+	if (val & 0xc0) {
+		pci_write_config_byte(dev, 0x50, val & (~0xc0));
+		pci_read_config_byte(dev, 0x50, &val);
+		if (val & 0xc0)
+			pci_info(dev, "Onboard AC97/MC97 devices continue to play 'hide and seek'! 0x%x\n",
+				 val);
+		else
+			pci_info(dev, "Enabled onboard AC97/MC97 devices\n");
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc);
+
+#if defined(CONFIG_ATA) || defined(CONFIG_ATA_MODULE)
+
+/*
+ * If we are using libata we can drive this chip properly but must do this
+ * early on to make the additional device appear during the PCI scanning.
+ */
+static void quirk_jmicron_ata(struct pci_dev *pdev)
+{
+	u32 conf1, conf5, class;
+	u8 hdr;
+
+	/* Only poke fn 0 */
+	if (PCI_FUNC(pdev->devfn))
+		return;
+
+	pci_read_config_dword(pdev, 0x40, &conf1);
+	pci_read_config_dword(pdev, 0x80, &conf5);
+
+	conf1 &= ~0x00CFF302; /* Clear bit 1, 8, 9, 12-19, 22, 23 */
+	conf5 &= ~(1 << 24);  /* Clear bit 24 */
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_JMICRON_JMB360: /* SATA single port */
+	case PCI_DEVICE_ID_JMICRON_JMB362: /* SATA dual ports */
+	case PCI_DEVICE_ID_JMICRON_JMB364: /* SATA dual ports */
+		/* The controller should be in single function ahci mode */
+		conf1 |= 0x0002A100; /* Set 8, 13, 15, 17 */
+		break;
+
+	case PCI_DEVICE_ID_JMICRON_JMB365:
+	case PCI_DEVICE_ID_JMICRON_JMB366:
+		/* Redirect IDE second PATA port to the right spot */
+		conf5 |= (1 << 24);
+		fallthrough;
+	case PCI_DEVICE_ID_JMICRON_JMB361:
+	case PCI_DEVICE_ID_JMICRON_JMB363:
+	case PCI_DEVICE_ID_JMICRON_JMB369:
+		/* Enable dual function mode, AHCI on fn 0, IDE fn1 */
+		/* Set the class codes correctly and then direct IDE 0 */
+		conf1 |= 0x00C2A1B3; /* Set 0, 1, 4, 5, 7, 8, 13, 15, 17, 22, 23 */
+		break;
+
+	case PCI_DEVICE_ID_JMICRON_JMB368:
+		/* The controller should be in single function IDE mode */
+		conf1 |= 0x00C00000; /* Set 22, 23 */
+		break;
+	}
+
+	pci_write_config_dword(pdev, 0x40, conf1);
+	pci_write_config_dword(pdev, 0x80, conf5);
+
+	/* Update pdev accordingly */
+	pci_read_config_byte(pdev, PCI_HEADER_TYPE, &hdr);
+	pdev->hdr_type = hdr & 0x7f;
+	pdev->multifunction = !!(hdr & 0x80);
+
+	pci_read_config_dword(pdev, PCI_CLASS_REVISION, &class);
+	pdev->class = class >> 8;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB362, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB364, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB369, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB362, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB364, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB369, quirk_jmicron_ata);
+
+#endif
+
+static void quirk_jmicron_async_suspend(struct pci_dev *dev)
+{
+	if (dev->multifunction) {
+		device_disable_async_suspend(&dev->dev);
+		pci_info(dev, "async suspend disabled to avoid multi-function power-on ordering issue\n");
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE, 8, quirk_jmicron_async_suspend);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_SATA_AHCI, 0, quirk_jmicron_async_suspend);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x2362, quirk_jmicron_async_suspend);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x236f, quirk_jmicron_async_suspend);
+
+#ifdef CONFIG_X86_IO_APIC
+static void quirk_alder_ioapic(struct pci_dev *pdev)
+{
+	int i;
+
+	if ((pdev->class >> 8) != 0xff00)
+		return;
+
+	/*
+	 * The first BAR is the location of the IO-APIC... we must
+	 * not touch this (and it's already covered by the fixmap), so
+	 * forcibly insert it into the resource tree.
+	 */
+	if (pci_resource_start(pdev, 0) && pci_resource_len(pdev, 0))
+		insert_resource(&iomem_resource, &pdev->resource[0]);
+
+	/*
+	 * The next five BARs all seem to be rubbish, so just clean
+	 * them out.
+	 */
+	for (i = 1; i < PCI_STD_NUM_BARS; i++)
+		memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic);
+#endif
+
+static void quirk_no_msi(struct pci_dev *dev)
+{
+	pci_info(dev, "avoiding MSI to work around a hardware defect\n");
+	dev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4386, quirk_no_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4387, quirk_no_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4388, quirk_no_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4389, quirk_no_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438a, quirk_no_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438b, quirk_no_msi);
+
+static void quirk_pcie_mch(struct pci_dev *pdev)
+{
+	pdev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_pcie_mch);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_pcie_mch);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_pcie_mch);
+
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, PCI_CLASS_BRIDGE_PCI, 8, quirk_pcie_mch);
+
+/*
+ * HiSilicon KunPeng920 and KunPeng930 have devices appear as PCI but are
+ * actually on the AMBA bus. These fake PCI devices can support SVA via
+ * SMMU stall feature, by setting dma-can-stall for ACPI platforms.
+ *
+ * Normally stalling must not be enabled for PCI devices, since it would
+ * break the PCI requirement for free-flowing writes and may lead to
+ * deadlock.  We expect PCI devices to support ATS and PRI if they want to
+ * be fault-tolerant, so there's no ACPI binding to describe anything else,
+ * even when a "PCI" device turns out to be a regular old SoC device
+ * dressed up as a RCiEP and normal rules don't apply.
+ */
+static void quirk_huawei_pcie_sva(struct pci_dev *pdev)
+{
+	struct property_entry properties[] = {
+		PROPERTY_ENTRY_BOOL("dma-can-stall"),
+		{},
+	};
+
+	if (pdev->revision != 0x21 && pdev->revision != 0x30)
+		return;
+
+	pdev->pasid_no_tlp = 1;
+
+	/*
+	 * Set the dma-can-stall property on ACPI platforms. Device tree
+	 * can set it directly.
+	 */
+	if (!pdev->dev.of_node &&
+	    device_create_managed_software_node(&pdev->dev, properties, NULL))
+		pci_warn(pdev, "could not add stall property");
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva);
+
+/*
+ * It's possible for the MSI to get corrupted if SHPC and ACPI are used
+ * together on certain PXH-based systems.
+ */
+static void quirk_pcie_pxh(struct pci_dev *dev)
+{
+	dev->no_msi = 1;
+	pci_warn(dev, "PXH quirk detected; SHPC device MSI disabled\n");
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHD_0,	quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHD_1,	quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_0,	quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_1,	quirk_pcie_pxh);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHV,	quirk_pcie_pxh);
+
+/*
+ * Some Intel PCI Express chipsets have trouble with downstream device
+ * power management.
+ */
+static void quirk_intel_pcie_pm(struct pci_dev *dev)
+{
+	pci_pm_d3hot_delay = 120;
+	dev->no_d1d2 = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e2, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e3, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e4, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e5, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e6, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e7, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25f7, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25f8, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25f9, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25fa, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2601, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2602, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2603, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2604, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2605, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2606, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2607, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2608, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2609, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260a, quirk_intel_pcie_pm);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260b, quirk_intel_pcie_pm);
+
+static void quirk_d3hot_delay(struct pci_dev *dev, unsigned int delay)
+{
+	if (dev->d3hot_delay >= delay)
+		return;
+
+	dev->d3hot_delay = delay;
+	pci_info(dev, "extending delay after power-on from D3hot to %d msec\n",
+		 dev->d3hot_delay);
+}
+
+static void quirk_radeon_pm(struct pci_dev *dev)
+{
+	if (dev->subsystem_vendor == PCI_VENDOR_ID_APPLE &&
+	    dev->subsystem_device == 0x00e2)
+		quirk_d3hot_delay(dev, 20);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6741, quirk_radeon_pm);
+
+/*
+ * NVIDIA Ampere-based HDA controllers can wedge the whole device if a bus
+ * reset is performed too soon after transition to D0, extend d3hot_delay
+ * to previous effective default for all NVIDIA HDA controllers.
+ */
+static void quirk_nvidia_hda_pm(struct pci_dev *dev)
+{
+	quirk_d3hot_delay(dev, 20);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			      PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8,
+			      quirk_nvidia_hda_pm);
+
+/*
+ * Ryzen5/7 XHCI controllers fail upon resume from runtime suspend or s2idle.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=205587
+ *
+ * The kernel attempts to transition these devices to D3cold, but that seems
+ * to be ineffective on the platforms in question; the PCI device appears to
+ * remain on in D3hot state. The D3hot-to-D0 transition then requires an
+ * extended delay in order to succeed.
+ */
+static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev)
+{
+	quirk_d3hot_delay(dev, 20);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1639, quirk_ryzen_xhci_d3hot);
+
+#ifdef CONFIG_X86_IO_APIC
+static int dmi_disable_ioapicreroute(const struct dmi_system_id *d)
+{
+	noioapicreroute = 1;
+	pr_info("%s detected: disable boot interrupt reroute\n", d->ident);
+
+	return 0;
+}
+
+static const struct dmi_system_id boot_interrupt_dmi_table[] = {
+	/*
+	 * Systems to exclude from boot interrupt reroute quirks
+	 */
+	{
+		.callback = dmi_disable_ioapicreroute,
+		.ident = "ASUSTek Computer INC. M2N-LR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "M2N-LR"),
+		},
+	},
+	{}
+};
+
+/*
+ * Boot interrupts on some chipsets cannot be turned off. For these chipsets,
+ * remap the original interrupt in the Linux kernel to the boot interrupt, so
+ * that a PCI device's interrupt handler is installed on the boot interrupt
+ * line instead.
+ */
+static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
+{
+	dmi_check_system(boot_interrupt_dmi_table);
+	if (noioapicquirk || noioapicreroute)
+		return;
+
+	dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
+	pci_info(dev, "rerouting interrupts for [%04x:%04x]\n",
+		 dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ESB2_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHV,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ESB2_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHV,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_1,	quirk_reroute_to_boot_interrupts_intel);
+
+/*
+ * On some chipsets we can disable the generation of legacy INTx boot
+ * interrupts.
+ */
+
+/*
+ * IO-APIC1 on 6300ESB generates boot interrupts, see Intel order no
+ * 300641-004US, section 5.7.3.
+ *
+ * Core IO on Xeon E5 1600/2600/4600, see Intel order no 326509-003.
+ * Core IO on Xeon E5 v2, see Intel order no 329188-003.
+ * Core IO on Xeon E7 v2, see Intel order no 329595-002.
+ * Core IO on Xeon E5 v3, see Intel order no 330784-003.
+ * Core IO on Xeon E7 v3, see Intel order no 332315-001US.
+ * Core IO on Xeon E5 v4, see Intel order no 333810-002US.
+ * Core IO on Xeon E7 v4, see Intel order no 332315-001US.
+ * Core IO on Xeon D-1500, see Intel order no 332051-001.
+ * Core IO on Xeon Scalable, see Intel order no 610950.
+ */
+#define INTEL_6300_IOAPIC_ABAR		0x40	/* Bus 0, Dev 29, Func 5 */
+#define INTEL_6300_DISABLE_BOOT_IRQ	(1<<14)
+
+#define INTEL_CIPINTRC_CFG_OFFSET	0x14C	/* Bus 0, Dev 5, Func 0 */
+#define INTEL_CIPINTRC_DIS_INTX_ICH	(1<<25)
+
+static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev)
+{
+	u16 pci_config_word;
+	u32 pci_config_dword;
+
+	if (noioapicquirk)
+		return;
+
+	switch (dev->device) {
+	case PCI_DEVICE_ID_INTEL_ESB_10:
+		pci_read_config_word(dev, INTEL_6300_IOAPIC_ABAR,
+				     &pci_config_word);
+		pci_config_word |= INTEL_6300_DISABLE_BOOT_IRQ;
+		pci_write_config_word(dev, INTEL_6300_IOAPIC_ABAR,
+				      pci_config_word);
+		break;
+	case 0x3c28:	/* Xeon E5 1600/2600/4600	*/
+	case 0x0e28:	/* Xeon E5/E7 V2		*/
+	case 0x2f28:	/* Xeon E5/E7 V3,V4		*/
+	case 0x6f28:	/* Xeon D-1500			*/
+	case 0x2034:	/* Xeon Scalable Family		*/
+		pci_read_config_dword(dev, INTEL_CIPINTRC_CFG_OFFSET,
+				      &pci_config_dword);
+		pci_config_dword |= INTEL_CIPINTRC_DIS_INTX_ICH;
+		pci_write_config_dword(dev, INTEL_CIPINTRC_CFG_OFFSET,
+				       pci_config_dword);
+		break;
+	default:
+		return;
+	}
+	pci_info(dev, "disabled boot interrupts on device [%04x:%04x]\n",
+		 dev->vendor, dev->device);
+}
+/*
+ * Device 29 Func 5 Device IDs of IO-APIC
+ * containing ABAR—APIC1 Alternate Base Address Register
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ESB_10,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ESB_10,
+		quirk_disable_intel_boot_interrupt);
+
+/*
+ * Device 5 Func 0 Device IDs of Core IO modules/hubs
+ * containing Coherent Interface Protocol Interrupt Control
+ *
+ * Device IDs obtained from volume 2 datasheets of commented
+ * families above.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x3c28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x0e28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2f28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x6f28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x2034,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	0x3c28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	0x0e28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	0x2f28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	0x6f28,
+		quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,	0x2034,
+		quirk_disable_intel_boot_interrupt);
+
+/* Disable boot interrupts on HT-1000 */
+#define BC_HT1000_FEATURE_REG		0x64
+#define BC_HT1000_PIC_REGS_ENABLE	(1<<0)
+#define BC_HT1000_MAP_IDX		0xC00
+#define BC_HT1000_MAP_DATA		0xC01
+
+static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev)
+{
+	u32 pci_config_dword;
+	u8 irq;
+
+	if (noioapicquirk)
+		return;
+
+	pci_read_config_dword(dev, BC_HT1000_FEATURE_REG, &pci_config_dword);
+	pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword |
+			BC_HT1000_PIC_REGS_ENABLE);
+
+	for (irq = 0x10; irq < 0x10 + 32; irq++) {
+		outb(irq, BC_HT1000_MAP_IDX);
+		outb(0x00, BC_HT1000_MAP_DATA);
+	}
+
+	pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword);
+
+	pci_info(dev, "disabled boot interrupts on device [%04x:%04x]\n",
+		 dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB,	quirk_disable_broadcom_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS,   PCI_DEVICE_ID_SERVERWORKS_HT1000SB,	quirk_disable_broadcom_boot_interrupt);
+
+/* Disable boot interrupts on AMD and ATI chipsets */
+
+/*
+ * NOIOAMODE needs to be disabled to disable "boot interrupts". For AMD 8131
+ * rev. A0 and B0, NOIOAMODE needs to be disabled anyway to fix IO-APIC mode
+ * (due to an erratum).
+ */
+#define AMD_813X_MISC			0x40
+#define AMD_813X_NOIOAMODE		(1<<0)
+#define AMD_813X_REV_B1			0x12
+#define AMD_813X_REV_B2			0x13
+
+static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev)
+{
+	u32 pci_config_dword;
+
+	if (noioapicquirk)
+		return;
+	if ((dev->revision == AMD_813X_REV_B1) ||
+	    (dev->revision == AMD_813X_REV_B2))
+		return;
+
+	pci_read_config_dword(dev, AMD_813X_MISC, &pci_config_dword);
+	pci_config_dword &= ~AMD_813X_NOIOAMODE;
+	pci_write_config_dword(dev, AMD_813X_MISC, pci_config_dword);
+
+	pci_info(dev, "disabled boot interrupts on device [%04x:%04x]\n",
+		 dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8131_BRIDGE,	quirk_disable_amd_813x_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8131_BRIDGE,	quirk_disable_amd_813x_boot_interrupt);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8132_BRIDGE,	quirk_disable_amd_813x_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_8132_BRIDGE,	quirk_disable_amd_813x_boot_interrupt);
+
+#define AMD_8111_PCI_IRQ_ROUTING	0x56
+
+static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev)
+{
+	u16 pci_config_word;
+
+	if (noioapicquirk)
+		return;
+
+	pci_read_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, &pci_config_word);
+	if (!pci_config_word) {
+		pci_info(dev, "boot interrupts on device [%04x:%04x] already disabled\n",
+			 dev->vendor, dev->device);
+		return;
+	}
+	pci_write_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, 0);
+	pci_info(dev, "disabled boot interrupts on device [%04x:%04x]\n",
+		 dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS,	quirk_disable_amd_8111_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,   PCI_DEVICE_ID_AMD_8111_SMBUS,	quirk_disable_amd_8111_boot_interrupt);
+#endif /* CONFIG_X86_IO_APIC */
+
+/*
+ * Toshiba TC86C001 IDE controller reports the standard 8-byte BAR0 size
+ * but the PIO transfers won't work if BAR0 falls at the odd 8 bytes.
+ * Re-allocate the region if needed...
+ */
+static void quirk_tc86c001_ide(struct pci_dev *dev)
+{
+	struct resource *r = &dev->resource[0];
+
+	if (r->start & 0x8) {
+		r->flags |= IORESOURCE_UNSET;
+		r->start = 0;
+		r->end = 0xf;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
+			 PCI_DEVICE_ID_TOSHIBA_TC86C001_IDE,
+			 quirk_tc86c001_ide);
+
+/*
+ * PLX PCI 9050 PCI Target bridge controller has an erratum that prevents the
+ * local configuration registers accessible via BAR0 (memory) or BAR1 (i/o)
+ * being read correctly if bit 7 of the base address is set.
+ * The BAR0 or BAR1 region may be disabled (size 0) or enabled (size 128).
+ * Re-allocate the regions to a 256-byte boundary if necessary.
+ */
+static void quirk_plx_pci9050(struct pci_dev *dev)
+{
+	unsigned int bar;
+
+	/* Fixed in revision 2 (PCI 9052). */
+	if (dev->revision >= 2)
+		return;
+	for (bar = 0; bar <= 1; bar++)
+		if (pci_resource_len(dev, bar) == 0x80 &&
+		    (pci_resource_start(dev, bar) & 0x80)) {
+			struct resource *r = &dev->resource[bar];
+			pci_info(dev, "Re-allocating PLX PCI 9050 BAR %u to length 256 to avoid bit 7 bug\n",
+				 bar);
+			r->flags |= IORESOURCE_UNSET;
+			r->start = 0;
+			r->end = 0xff;
+		}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
+			 quirk_plx_pci9050);
+/*
+ * The following Meilhaus (vendor ID 0x1402) device IDs (amongst others)
+ * may be using the PLX PCI 9050: 0x0630, 0x0940, 0x0950, 0x0960, 0x100b,
+ * 0x1400, 0x140a, 0x140b, 0x14e0, 0x14ea, 0x14eb, 0x1604, 0x1608, 0x160c,
+ * 0x168f, 0x2000, 0x2600, 0x3000, 0x810a, 0x810b.
+ *
+ * Currently, device IDs 0x2000 and 0x2600 are used by the Comedi "me_daq"
+ * driver.
+ */
+DECLARE_PCI_FIXUP_HEADER(0x1402, 0x2000, quirk_plx_pci9050);
+DECLARE_PCI_FIXUP_HEADER(0x1402, 0x2600, quirk_plx_pci9050);
+
+static void quirk_netmos(struct pci_dev *dev)
+{
+	unsigned int num_parallel = (dev->subsystem_device & 0xf0) >> 4;
+	unsigned int num_serial = dev->subsystem_device & 0xf;
+
+	/*
+	 * These Netmos parts are multiport serial devices with optional
+	 * parallel ports.  Even when parallel ports are present, they
+	 * are identified as class SERIAL, which means the serial driver
+	 * will claim them.  To prevent this, mark them as class OTHER.
+	 * These combo devices should be claimed by parport_serial.
+	 *
+	 * The subdevice ID is of the form 0x00PS, where <P> is the number
+	 * of parallel ports and <S> is the number of serial ports.
+	 */
+	switch (dev->device) {
+	case PCI_DEVICE_ID_NETMOS_9835:
+		/* Well, this rule doesn't hold for the following 9835 device */
+		if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
+				dev->subsystem_device == 0x0299)
+			return;
+		fallthrough;
+	case PCI_DEVICE_ID_NETMOS_9735:
+	case PCI_DEVICE_ID_NETMOS_9745:
+	case PCI_DEVICE_ID_NETMOS_9845:
+	case PCI_DEVICE_ID_NETMOS_9855:
+		if (num_parallel) {
+			pci_info(dev, "Netmos %04x (%u parallel, %u serial); changing class SERIAL to OTHER (use parport_serial)\n",
+				dev->device, num_parallel, num_serial);
+			dev->class = (PCI_CLASS_COMMUNICATION_OTHER << 8) |
+			    (dev->class & 0xff);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID,
+			 PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos);
+
+static void quirk_e100_interrupt(struct pci_dev *dev)
+{
+	u16 command, pmcsr;
+	u8 __iomem *csr;
+	u8 cmd_hi;
+
+	switch (dev->device) {
+	/* PCI IDs taken from drivers/net/e100.c */
+	case 0x1029:
+	case 0x1030 ... 0x1034:
+	case 0x1038 ... 0x103E:
+	case 0x1050 ... 0x1057:
+	case 0x1059:
+	case 0x1064 ... 0x106B:
+	case 0x1091 ... 0x1095:
+	case 0x1209:
+	case 0x1229:
+	case 0x2449:
+	case 0x2459:
+	case 0x245D:
+	case 0x27DC:
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * Some firmware hands off the e100 with interrupts enabled,
+	 * which can cause a flood of interrupts if packets are
+	 * received before the driver attaches to the device.  So
+	 * disable all e100 interrupts here.  The driver will
+	 * re-enable them when it's ready.
+	 */
+	pci_read_config_word(dev, PCI_COMMAND, &command);
+
+	if (!(command & PCI_COMMAND_MEMORY) || !pci_resource_start(dev, 0))
+		return;
+
+	/*
+	 * Check that the device is in the D0 power state. If it's not,
+	 * there is no point to look any further.
+	 */
+	if (dev->pm_cap) {
+		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0)
+			return;
+	}
+
+	/* Convert from PCI bus to resource space.  */
+	csr = ioremap(pci_resource_start(dev, 0), 8);
+	if (!csr) {
+		pci_warn(dev, "Can't map e100 registers\n");
+		return;
+	}
+
+	cmd_hi = readb(csr + 3);
+	if (cmd_hi == 0) {
+		pci_warn(dev, "Firmware left e100 interrupts enabled; disabling\n");
+		writeb(1, csr + 3);
+	}
+
+	iounmap(csr);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+			PCI_CLASS_NETWORK_ETHERNET, 8, quirk_e100_interrupt);
+
+/*
+ * The 82575 and 82598 may experience data corruption issues when transitioning
+ * out of L0S.  To prevent this we need to disable L0S on the PCIe link.
+ */
+static void quirk_disable_aspm_l0s(struct pci_dev *dev)
+{
+	pci_info(dev, "Disabling L0s\n");
+	pci_disable_link_state(dev, PCIE_LINK_STATE_L0S);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s);
+
+static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev)
+{
+	pci_info(dev, "Disabling ASPM L0s/L1\n");
+	pci_disable_link_state(dev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+}
+
+/*
+ * ASM1083/1085 PCIe-PCI bridge devices cause AER timeout errors on the
+ * upstream PCIe root port when ASPM is enabled. At least L0s mode is affected;
+ * disable both L0s and L1 for now to be safe.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1);
+
+/*
+ * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain
+ * Link bit cleared after starting the link retrain process to allow this
+ * process to finish.
+ *
+ * Affected devices: PI7C9X110, PI7C9X111SL, PI7C9X130.  See also the
+ * Pericom Errata Sheet PI7C9X111SLB_errata_rev1.2_102711.pdf.
+ */
+static void quirk_enable_clear_retrain_link(struct pci_dev *dev)
+{
+	dev->clear_retrain_link = 1;
+	pci_info(dev, "Enable PCIe Retrain Link quirk\n");
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link);
+
+static void fixup_rev1_53c810(struct pci_dev *dev)
+{
+	u32 class = dev->class;
+
+	/*
+	 * rev 1 ncr53c810 chips don't set the class at all which means
+	 * they don't get their resources remapped. Fix that here.
+	 */
+	if (class)
+		return;
+
+	dev->class = PCI_CLASS_STORAGE_SCSI << 8;
+	pci_info(dev, "NCR 53c810 rev 1 PCI class overridden (%#08x -> %#08x)\n",
+		 class, dev->class);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
+
+/* Enable 1k I/O space granularity on the Intel P64H2 */
+static void quirk_p64h2_1k_io(struct pci_dev *dev)
+{
+	u16 en1k;
+
+	pci_read_config_word(dev, 0x40, &en1k);
+
+	if (en1k & 0x200) {
+		pci_info(dev, "Enable I/O Space to 1KB granularity\n");
+		dev->io_window_1k = 1;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1460, quirk_p64h2_1k_io);
+
+/*
+ * Under some circumstances, AER is not linked with extended capabilities.
+ * Force it to be linked by setting the corresponding control bit in the
+ * config space.
+ */
+static void quirk_nvidia_ck804_pcie_aer_ext_cap(struct pci_dev *dev)
+{
+	uint8_t b;
+
+	if (pci_read_config_byte(dev, 0xf41, &b) == 0) {
+		if (!(b & 0x20)) {
+			pci_write_config_byte(dev, 0xf41, b | 0x20);
+			pci_info(dev, "Linking AER extended capability\n");
+		}
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
+			quirk_nvidia_ck804_pcie_aer_ext_cap);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
+			quirk_nvidia_ck804_pcie_aer_ext_cap);
+
+static void quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
+{
+	/*
+	 * Disable PCI Bus Parking and PCI Master read caching on CX700
+	 * which causes unspecified timing errors with a VT6212L on the PCI
+	 * bus leading to USB2.0 packet loss.
+	 *
+	 * This quirk is only enabled if a second (on the external PCI bus)
+	 * VT6212L is found -- the CX700 core itself also contains a USB
+	 * host controller with the same PCI ID as the VT6212L.
+	 */
+
+	/* Count VT6212L instances */
+	struct pci_dev *p = pci_get_device(PCI_VENDOR_ID_VIA,
+		PCI_DEVICE_ID_VIA_8235_USB_2, NULL);
+	uint8_t b;
+
+	/*
+	 * p should contain the first (internal) VT6212L -- see if we have
+	 * an external one by searching again.
+	 */
+	p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235_USB_2, p);
+	if (!p)
+		return;
+	pci_dev_put(p);
+
+	if (pci_read_config_byte(dev, 0x76, &b) == 0) {
+		if (b & 0x40) {
+			/* Turn off PCI Bus Parking */
+			pci_write_config_byte(dev, 0x76, b ^ 0x40);
+
+			pci_info(dev, "Disabling VIA CX700 PCI parking\n");
+		}
+	}
+
+	if (pci_read_config_byte(dev, 0x72, &b) == 0) {
+		if (b != 0) {
+			/* Turn off PCI Master read caching */
+			pci_write_config_byte(dev, 0x72, 0x0);
+
+			/* Set PCI Master Bus time-out to "1x16 PCLK" */
+			pci_write_config_byte(dev, 0x75, 0x1);
+
+			/* Disable "Read FIFO Timer" */
+			pci_write_config_byte(dev, 0x77, 0x0);
+
+			pci_info(dev, "Disabling VIA CX700 PCI caching\n");
+		}
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, quirk_via_cx700_pci_parking_caching);
+
+static void quirk_brcm_5719_limit_mrrs(struct pci_dev *dev)
+{
+	u32 rev;
+
+	pci_read_config_dword(dev, 0xf4, &rev);
+
+	/* Only CAP the MRRS if the device is a 5719 A0 */
+	if (rev == 0x05719000) {
+		int readrq = pcie_get_readrq(dev);
+		if (readrq > 2048)
+			pcie_set_readrq(dev, 2048);
+	}
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_BROADCOM,
+			 PCI_DEVICE_ID_TIGON3_5719,
+			 quirk_brcm_5719_limit_mrrs);
+
+/*
+ * Originally in EDAC sources for i82875P: Intel tells BIOS developers to
+ * hide device 6 which configures the overflow device access containing the
+ * DRBs - this is where we expose device 6.
+ * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm
+ */
+static void quirk_unhide_mch_dev6(struct pci_dev *dev)
+{
+	u8 reg;
+
+	if (pci_read_config_byte(dev, 0xF4, &reg) == 0 && !(reg & 0x02)) {
+		pci_info(dev, "Enabling MCH 'Overflow' Device\n");
+		pci_write_config_byte(dev, 0xF4, reg | 0x02);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
+			quirk_unhide_mch_dev6);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
+			quirk_unhide_mch_dev6);
+
+#ifdef CONFIG_PCI_MSI
+/*
+ * Some chipsets do not support MSI. We cannot easily rely on setting
+ * PCI_BUS_FLAGS_NO_MSI in its bus flags because there are actually some
+ * other buses controlled by the chipset even if Linux is not aware of it.
+ * Instead of setting the flag on all buses in the machine, simply disable
+ * MSI globally.
+ */
+static void quirk_disable_all_msi(struct pci_dev *dev)
+{
+	pci_no_msi();
+	pci_warn(dev, "MSI quirk detected; MSI disabled\n");
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_GCNB_LE, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS400_200, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS480, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, 0x0761, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SAMSUNG, 0xa5e3, quirk_disable_all_msi);
+
+/* Disable MSI on chipsets that are known to not support it */
+static void quirk_disable_msi(struct pci_dev *dev)
+{
+	if (dev->subordinate) {
+		pci_warn(dev, "MSI quirk detected; subordinate MSI disabled\n");
+		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0xa238, quirk_disable_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x5a3f, quirk_disable_msi);
+
+/*
+ * The APC bridge device in AMD 780 family northbridges has some random
+ * OEM subsystem ID in its vendor ID register (erratum 18), so instead
+ * we use the possible vendor/device IDs of the host bridge for the
+ * declared quirk, and search for the APC bridge by slot number.
+ */
+static void quirk_amd_780_apc_msi(struct pci_dev *host_bridge)
+{
+	struct pci_dev *apc_bridge;
+
+	apc_bridge = pci_get_slot(host_bridge->bus, PCI_DEVFN(1, 0));
+	if (apc_bridge) {
+		if (apc_bridge->device == 0x9602)
+			quirk_disable_msi(apc_bridge);
+		pci_dev_put(apc_bridge);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9600, quirk_amd_780_apc_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9601, quirk_amd_780_apc_msi);
+
+/*
+ * Go through the list of HyperTransport capabilities and return 1 if a HT
+ * MSI capability is found and enabled.
+ */
+static int msi_ht_cap_enabled(struct pci_dev *dev)
+{
+	int pos, ttl = PCI_FIND_CAP_TTL;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			pci_info(dev, "Found %s HT MSI Mapping\n",
+				flags & HT_MSI_FLAGS_ENABLE ?
+				"enabled" : "disabled");
+			return (flags & HT_MSI_FLAGS_ENABLE) != 0;
+		}
+
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+	return 0;
+}
+
+/* Check the HyperTransport MSI mapping to know whether MSI is enabled or not */
+static void quirk_msi_ht_cap(struct pci_dev *dev)
+{
+	if (!msi_ht_cap_enabled(dev))
+		quirk_disable_msi(dev);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE,
+			quirk_msi_ht_cap);
+
+/*
+ * The nVidia CK804 chipset may have 2 HT MSI mappings.  MSI is supported
+ * if the MSI capability is set in any of these mappings.
+ */
+static void quirk_nvidia_ck804_msi_ht_cap(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+
+	/*
+	 * Check HT MSI cap on this chipset and the root one.  A single one
+	 * having MSI is enough to be sure that MSI is supported.
+	 */
+	pdev = pci_get_slot(dev->bus, 0);
+	if (!pdev)
+		return;
+	if (!msi_ht_cap_enabled(pdev))
+		quirk_msi_ht_cap(dev);
+	pci_dev_put(pdev);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
+			quirk_nvidia_ck804_msi_ht_cap);
+
+/* Force enable MSI mapping capability on HT bridges */
+static void ht_enable_msi_mapping(struct pci_dev *dev)
+{
+	int pos, ttl = PCI_FIND_CAP_TTL;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			pci_info(dev, "Enabling HT MSI Mapping\n");
+
+			pci_write_config_byte(dev, pos + HT_MSI_FLAGS,
+					      flags | HT_MSI_FLAGS_ENABLE);
+		}
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS,
+			 PCI_DEVICE_ID_SERVERWORKS_HT1000_PXB,
+			 ht_enable_msi_mapping);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE,
+			 ht_enable_msi_mapping);
+
+/*
+ * The P5N32-SLI motherboards from Asus have a problem with MSI
+ * for the MCP55 NIC. It is not yet determined whether the MSI problem
+ * also affects other devices. As for now, turn off MSI for this device.
+ */
+static void nvenet_msi_disable(struct pci_dev *dev)
+{
+	const char *board_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+	if (board_name &&
+	    (strstr(board_name, "P5N32-SLI PREMIUM") ||
+	     strstr(board_name, "P5N32-E SLI"))) {
+		pci_info(dev, "Disabling MSI for MCP55 NIC on P5N32-SLI\n");
+		dev->no_msi = 1;
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+			PCI_DEVICE_ID_NVIDIA_NVENET_15,
+			nvenet_msi_disable);
+
+/*
+ * PCIe spec r6.0 sec 6.1.4.3 says that if MSI/MSI-X is enabled, the device
+ * can't use INTx interrupts. Tegra's PCIe Root Ports don't generate MSI
+ * interrupts for PME and AER events; instead only INTx interrupts are
+ * generated. Though Tegra's PCIe Root Ports can generate MSI interrupts
+ * for other events, since PCIe specification doesn't support using a mix of
+ * INTx and MSI/MSI-X, it is required to disable MSI interrupts to avoid port
+ * service drivers registering their respective ISRs for MSIs.
+ */
+static void pci_quirk_nvidia_tegra_disable_rp_msi(struct pci_dev *dev)
+{
+	dev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad0,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad1,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x1ad2,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf0,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e12,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e13,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0fae,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0faf,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e5,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x10e6,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x229a,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x229c,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_NVIDIA, 0x229e,
+			      PCI_CLASS_BRIDGE_PCI, 8,
+			      pci_quirk_nvidia_tegra_disable_rp_msi);
+
+/*
+ * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing
+ * config register.  This register controls the routing of legacy
+ * interrupts from devices that route through the MCP55.  If this register
+ * is misprogrammed, interrupts are only sent to the BSP, unlike
+ * conventional systems where the IRQ is broadcast to all online CPUs.  Not
+ * having this register set properly prevents kdump from booting up
+ * properly, so let's make sure that we have it set correctly.
+ * Note that this is an undocumented register.
+ */
+static void nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
+{
+	u32 cfg;
+
+	if (!pci_find_capability(dev, PCI_CAP_ID_HT))
+		return;
+
+	pci_read_config_dword(dev, 0x74, &cfg);
+
+	if (cfg & ((1 << 2) | (1 << 15))) {
+		pr_info("Rewriting IRQ routing register on MCP55\n");
+		cfg &= ~((1 << 2) | (1 << 15));
+		pci_write_config_dword(dev, 0x74, cfg);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+			PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V0,
+			nvbridge_check_legacy_irq_routing);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+			PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4,
+			nvbridge_check_legacy_irq_routing);
+
+static int ht_check_msi_mapping(struct pci_dev *dev)
+{
+	int pos, ttl = PCI_FIND_CAP_TTL;
+	int found = 0;
+
+	/* Check if there is HT MSI cap or enabled on this device */
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (found < 1)
+			found = 1;
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			if (flags & HT_MSI_FLAGS_ENABLE) {
+				if (found < 2) {
+					found = 2;
+					break;
+				}
+			}
+		}
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+
+	return found;
+}
+
+static int host_bridge_with_leaf(struct pci_dev *host_bridge)
+{
+	struct pci_dev *dev;
+	int pos;
+	int i, dev_no;
+	int found = 0;
+
+	dev_no = host_bridge->devfn >> 3;
+	for (i = dev_no + 1; i < 0x20; i++) {
+		dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0));
+		if (!dev)
+			continue;
+
+		/* found next host bridge? */
+		pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+		if (pos != 0) {
+			pci_dev_put(dev);
+			break;
+		}
+
+		if (ht_check_msi_mapping(dev)) {
+			found = 1;
+			pci_dev_put(dev);
+			break;
+		}
+		pci_dev_put(dev);
+	}
+
+	return found;
+}
+
+#define PCI_HT_CAP_SLAVE_CTRL0     4    /* link control */
+#define PCI_HT_CAP_SLAVE_CTRL1     8    /* link control to */
+
+static int is_end_of_ht_chain(struct pci_dev *dev)
+{
+	int pos, ctrl_off;
+	int end = 0;
+	u16 flags, ctrl;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+
+	if (!pos)
+		goto out;
+
+	pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags);
+
+	ctrl_off = ((flags >> 10) & 1) ?
+			PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1;
+	pci_read_config_word(dev, pos + ctrl_off, &ctrl);
+
+	if (ctrl & (1 << 6))
+		end = 1;
+
+out:
+	return end;
+}
+
+static void nv_ht_enable_msi_mapping(struct pci_dev *dev)
+{
+	struct pci_dev *host_bridge;
+	int pos;
+	int i, dev_no;
+	int found = 0;
+
+	dev_no = dev->devfn >> 3;
+	for (i = dev_no; i >= 0; i--) {
+		host_bridge = pci_get_slot(dev->bus, PCI_DEVFN(i, 0));
+		if (!host_bridge)
+			continue;
+
+		pos = pci_find_ht_capability(host_bridge, HT_CAPTYPE_SLAVE);
+		if (pos != 0) {
+			found = 1;
+			break;
+		}
+		pci_dev_put(host_bridge);
+	}
+
+	if (!found)
+		return;
+
+	/* don't enable end_device/host_bridge with leaf directly here */
+	if (host_bridge == dev && is_end_of_ht_chain(host_bridge) &&
+	    host_bridge_with_leaf(host_bridge))
+		goto out;
+
+	/* root did that ! */
+	if (msi_ht_cap_enabled(host_bridge))
+		goto out;
+
+	ht_enable_msi_mapping(dev);
+
+out:
+	pci_dev_put(host_bridge);
+}
+
+static void ht_disable_msi_mapping(struct pci_dev *dev)
+{
+	int pos, ttl = PCI_FIND_CAP_TTL;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			pci_info(dev, "Disabling HT MSI Mapping\n");
+
+			pci_write_config_byte(dev, pos + HT_MSI_FLAGS,
+					      flags & ~HT_MSI_FLAGS_ENABLE);
+		}
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+}
+
+static void __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
+{
+	struct pci_dev *host_bridge;
+	int pos;
+	int found;
+
+	if (!pci_msi_enabled())
+		return;
+
+	/* check if there is HT MSI cap or enabled on this device */
+	found = ht_check_msi_mapping(dev);
+
+	/* no HT MSI CAP */
+	if (found == 0)
+		return;
+
+	/*
+	 * HT MSI mapping should be disabled on devices that are below
+	 * a non-HyperTransport host bridge. Locate the host bridge.
+	 */
+	host_bridge = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus), 0,
+						  PCI_DEVFN(0, 0));
+	if (host_bridge == NULL) {
+		pci_warn(dev, "nv_msi_ht_cap_quirk didn't locate host bridge\n");
+		return;
+	}
+
+	pos = pci_find_ht_capability(host_bridge, HT_CAPTYPE_SLAVE);
+	if (pos != 0) {
+		/* Host bridge is to HT */
+		if (found == 1) {
+			/* it is not enabled, try to enable it */
+			if (all)
+				ht_enable_msi_mapping(dev);
+			else
+				nv_ht_enable_msi_mapping(dev);
+		}
+		goto out;
+	}
+
+	/* HT MSI is not enabled */
+	if (found == 1)
+		goto out;
+
+	/* Host bridge is not to HT, disable HT MSI mapping on this device */
+	ht_disable_msi_mapping(dev);
+
+out:
+	pci_dev_put(host_bridge);
+}
+
+static void nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 1);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
+
+static void nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 0);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+
+static void quirk_msi_intx_disable_bug(struct pci_dev *dev)
+{
+	dev->dev_flags |= PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG;
+}
+
+static void quirk_msi_intx_disable_ati_bug(struct pci_dev *dev)
+{
+	struct pci_dev *p;
+
+	/*
+	 * SB700 MSI issue will be fixed at HW level from revision A21;
+	 * we need check PCI REVISION ID of SMBus controller to get SB700
+	 * revision.
+	 */
+	p = pci_get_device(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
+			   NULL);
+	if (!p)
+		return;
+
+	if ((p->revision < 0x3B) && (p->revision >= 0x30))
+		dev->dev_flags |= PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG;
+	pci_dev_put(p);
+}
+
+static void quirk_msi_intx_disable_qca_bug(struct pci_dev *dev)
+{
+	/* AR816X/AR817X/E210X MSI is fixed at HW level from revision 0x18 */
+	if (dev->revision < 0x18) {
+		pci_info(dev, "set MSI_INTX_DISABLE_BUG flag\n");
+		dev->dev_flags |= PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5780,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5780S,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5714,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5714S,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5715,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_TIGON3_5715S,
+			quirk_msi_intx_disable_bug);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4390,
+			quirk_msi_intx_disable_ati_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4391,
+			quirk_msi_intx_disable_ati_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4392,
+			quirk_msi_intx_disable_ati_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4393,
+			quirk_msi_intx_disable_ati_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4394,
+			quirk_msi_intx_disable_ati_bug);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4373,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4374,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
+			quirk_msi_intx_disable_bug);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1062,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1063,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x2060,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x2062,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1073,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1083,
+			quirk_msi_intx_disable_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1090,
+			quirk_msi_intx_disable_qca_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x1091,
+			quirk_msi_intx_disable_qca_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x10a0,
+			quirk_msi_intx_disable_qca_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0x10a1,
+			quirk_msi_intx_disable_qca_bug);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091,
+			quirk_msi_intx_disable_qca_bug);
+
+/*
+ * Amazon's Annapurna Labs 1c36:0031 Root Ports don't support MSI-X, so it
+ * should be disabled on platforms where the device (mistakenly) advertises it.
+ *
+ * Notice that this quirk also disables MSI (which may work, but hasn't been
+ * tested), since currently there is no standard way to disable only MSI-X.
+ *
+ * The 0031 device id is reused for other non Root Port device types,
+ * therefore the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+static void quirk_al_msi_disable(struct pci_dev *dev)
+{
+	dev->no_msi = 1;
+	pci_warn(dev, "Disabling MSI/MSI-X\n");
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_al_msi_disable);
+#endif /* CONFIG_PCI_MSI */
+
+/*
+ * Allow manual resource allocation for PCI hotplug bridges via
+ * pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For some PCI-PCI
+ * hotplug bridges, like PLX 6254 (former HINT HB6), kernel fails to
+ * allocate resources when hotplug device is inserted and PCI bus is
+ * rescanned.
+ */
+static void quirk_hotplug_bridge(struct pci_dev *dev)
+{
+	dev->is_hotplug_bridge = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
+
+/*
+ * This is a quirk for the Ricoh MMC controller found as a part of some
+ * multifunction chips.
+ *
+ * This is very similar and based on the ricoh_mmc driver written by
+ * Philip Langdale. Thank you for these magic sequences.
+ *
+ * These chips implement the four main memory card controllers (SD, MMC,
+ * MS, xD) and one or both of CardBus or FireWire.
+ *
+ * It happens that they implement SD and MMC support as separate
+ * controllers (and PCI functions). The Linux SDHCI driver supports MMC
+ * cards but the chip detects MMC cards in hardware and directs them to the
+ * MMC controller - so the SDHCI driver never sees them.
+ *
+ * To get around this, we must disable the useless MMC controller.  At that
+ * point, the SDHCI controller will start seeing them.  It seems to be the
+ * case that the relevant PCI registers to deactivate the MMC controller
+ * live on PCI function 0, which might be the CardBus controller or the
+ * FireWire controller, depending on the particular chip in question
+ *
+ * This has to be done early, because as soon as we disable the MMC controller
+ * other PCI functions shift up one level, e.g. function #2 becomes function
+ * #1, and this will confuse the PCI core.
+ */
+#ifdef CONFIG_MMC_RICOH_MMC
+static void ricoh_mmc_fixup_rl5c476(struct pci_dev *dev)
+{
+	u8 write_enable;
+	u8 write_target;
+	u8 disable;
+
+	/*
+	 * Disable via CardBus interface
+	 *
+	 * This must be done via function #0
+	 */
+	if (PCI_FUNC(dev->devfn))
+		return;
+
+	pci_read_config_byte(dev, 0xB7, &disable);
+	if (disable & 0x02)
+		return;
+
+	pci_read_config_byte(dev, 0x8E, &write_enable);
+	pci_write_config_byte(dev, 0x8E, 0xAA);
+	pci_read_config_byte(dev, 0x8D, &write_target);
+	pci_write_config_byte(dev, 0x8D, 0xB7);
+	pci_write_config_byte(dev, 0xB7, disable | 0x02);
+	pci_write_config_byte(dev, 0x8E, write_enable);
+	pci_write_config_byte(dev, 0x8D, write_target);
+
+	pci_notice(dev, "proprietary Ricoh MMC controller disabled (via CardBus function)\n");
+	pci_notice(dev, "MMC cards are now supported by standard SDHCI controller\n");
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476, ricoh_mmc_fixup_rl5c476);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476, ricoh_mmc_fixup_rl5c476);
+
+static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
+{
+	u8 write_enable;
+	u8 disable;
+
+	/*
+	 * Disable via FireWire interface
+	 *
+	 * This must be done via function #0
+	 */
+	if (PCI_FUNC(dev->devfn))
+		return;
+	/*
+	 * RICOH 0xe822 and 0xe823 SD/MMC card readers fail to recognize
+	 * certain types of SD/MMC cards. Lowering the SD base clock
+	 * frequency from 200Mhz to 50Mhz fixes this issue.
+	 *
+	 * 0x150 - SD2.0 mode enable for changing base clock
+	 *	   frequency to 50Mhz
+	 * 0xe1  - Base clock frequency
+	 * 0x32  - 50Mhz new clock frequency
+	 * 0xf9  - Key register for 0x150
+	 * 0xfc  - key register for 0xe1
+	 */
+	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE822 ||
+	    dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
+		pci_write_config_byte(dev, 0xf9, 0xfc);
+		pci_write_config_byte(dev, 0x150, 0x10);
+		pci_write_config_byte(dev, 0xf9, 0x00);
+		pci_write_config_byte(dev, 0xfc, 0x01);
+		pci_write_config_byte(dev, 0xe1, 0x32);
+		pci_write_config_byte(dev, 0xfc, 0x00);
+
+		pci_notice(dev, "MMC controller base frequency changed to 50Mhz.\n");
+	}
+
+	pci_read_config_byte(dev, 0xCB, &disable);
+
+	if (disable & 0x02)
+		return;
+
+	pci_read_config_byte(dev, 0xCA, &write_enable);
+	pci_write_config_byte(dev, 0xCA, 0x57);
+	pci_write_config_byte(dev, 0xCB, disable | 0x02);
+	pci_write_config_byte(dev, 0xCA, write_enable);
+
+	pci_notice(dev, "proprietary Ricoh MMC controller disabled (via FireWire function)\n");
+	pci_notice(dev, "MMC cards are now supported by standard SDHCI controller\n");
+
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
+#endif /*CONFIG_MMC_RICOH_MMC*/
+
+#ifdef CONFIG_DMAR_TABLE
+#define VTUNCERRMSK_REG	0x1ac
+#define VTD_MSK_SPEC_ERRORS	(1 << 31)
+/*
+ * This is a quirk for masking VT-d spec-defined errors to platform error
+ * handling logic. Without this, platforms using Intel 7500, 5500 chipsets
+ * (and the derivative chipsets like X58 etc) seem to generate NMI/SMI (based
+ * on the RAS config settings of the platform) when a VT-d fault happens.
+ * The resulting SMI caused the system to hang.
+ *
+ * VT-d spec-related errors are already handled by the VT-d OS code, so no
+ * need to report the same error through other channels.
+ */
+static void vtd_mask_spec_errors(struct pci_dev *dev)
+{
+	u32 word;
+
+	pci_read_config_dword(dev, VTUNCERRMSK_REG, &word);
+	pci_write_config_dword(dev, VTUNCERRMSK_REG, word | VTD_MSK_SPEC_ERRORS);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
+#endif
+
+static void fixup_ti816x_class(struct pci_dev *dev)
+{
+	u32 class = dev->class;
+
+	/* TI 816x devices do not have class code set when in PCIe boot mode */
+	dev->class = PCI_CLASS_MULTIMEDIA_VIDEO << 8;
+	pci_info(dev, "PCI class overridden (%#08x -> %#08x)\n",
+		 class, dev->class);
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800,
+			      PCI_CLASS_NOT_DEFINED, 8, fixup_ti816x_class);
+
+/*
+ * Some PCIe devices do not work reliably with the claimed maximum
+ * payload size supported.
+ */
+static void fixup_mpss_256(struct pci_dev *dev)
+{
+	dev->pcie_mpss = 1; /* 256 bytes */
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ASMEDIA, 0x0612, fixup_mpss_256);
+
+/*
+ * Intel 5000 and 5100 Memory controllers have an erratum with read completion
+ * coalescing (which is enabled by default on some BIOSes) and MPS of 256B.
+ * Since there is no way of knowing what the PCIe MPS on each fabric will be
+ * until all of the devices are discovered and buses walked, read completion
+ * coalescing must be disabled.  Unfortunately, it cannot be re-enabled because
+ * it is possible to hotplug a device with MPS of 256B.
+ */
+static void quirk_intel_mc_errata(struct pci_dev *dev)
+{
+	int err;
+	u16 rcc;
+
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
+	    pcie_bus_config == PCIE_BUS_DEFAULT)
+		return;
+
+	/*
+	 * Intel erratum specifies bits to change but does not say what
+	 * they are.  Keeping them magical until such time as the registers
+	 * and values can be explained.
+	 */
+	err = pci_read_config_word(dev, 0x48, &rcc);
+	if (err) {
+		pci_err(dev, "Error attempting to read the read completion coalescing register\n");
+		return;
+	}
+
+	if (!(rcc & (1 << 10)))
+		return;
+
+	rcc &= ~(1 << 10);
+
+	err = pci_write_config_word(dev, 0x48, rcc);
+	if (err) {
+		pci_err(dev, "Error attempting to write the read completion coalescing register\n");
+		return;
+	}
+
+	pr_info_once("Read completion coalescing disabled due to hardware erratum relating to 256B MPS\n");
+}
+/* Intel 5000 series memory controllers and ports 2-7 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25c0, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d0, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d4, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25d8, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e2, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e3, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e4, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e5, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e6, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25e7, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f7, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f8, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25f9, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x25fa, quirk_intel_mc_errata);
+/* Intel 5100 series memory controllers and ports 2-7 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65c0, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e2, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e3, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e4, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e5, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e6, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65e7, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f7, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f8, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata);
+
+/*
+ * Ivytown NTB BAR sizes are misreported by the hardware due to an erratum.
+ * To work around this, query the size it should be configured to by the
+ * device and modify the resource end to correspond to this new size.
+ */
+static void quirk_intel_ntb(struct pci_dev *dev)
+{
+	int rc;
+	u8 val;
+
+	rc = pci_read_config_byte(dev, 0x00D0, &val);
+	if (rc)
+		return;
+
+	dev->resource[2].end = dev->resource[2].start + ((u64) 1 << val) - 1;
+
+	rc = pci_read_config_byte(dev, 0x00D1, &val);
+	if (rc)
+		return;
+
+	dev->resource[4].end = dev->resource[4].start + ((u64) 1 << val) - 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
+
+/*
+ * Some BIOS implementations leave the Intel GPU interrupts enabled, even
+ * though no one is handling them (e.g., if the i915 driver is never
+ * loaded).  Additionally the interrupt destination is not set up properly
+ * and the interrupt ends up -somewhere-.
+ *
+ * These spurious interrupts are "sticky" and the kernel disables the
+ * (shared) interrupt line after 100,000+ generated interrupts.
+ *
+ * Fix it by disabling the still enabled interrupts.  This resolves crashes
+ * often seen on monitor unplug.
+ */
+#define I915_DEIER_REG 0x4400c
+static void disable_igfx_irq(struct pci_dev *dev)
+{
+	void __iomem *regs = pci_iomap(dev, 0, 0);
+	if (regs == NULL) {
+		pci_warn(dev, "igfx quirk: Can't iomap PCI device\n");
+		return;
+	}
+
+	/* Check if any interrupt line is still enabled */
+	if (readl(regs + I915_DEIER_REG) != 0) {
+		pci_warn(dev, "BIOS left Intel GPU interrupts enabled; disabling\n");
+
+		writel(0, regs + I915_DEIER_REG);
+	}
+
+	pci_iounmap(dev, regs);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0042, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0046, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x004a, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0106, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq);
+
+/*
+ * PCI devices which are on Intel chips can skip the 10ms delay
+ * before entering D3 mode.
+ */
+static void quirk_remove_d3hot_delay(struct pci_dev *dev)
+{
+	dev->d3hot_delay = 0;
+}
+/* C600 Series devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0412, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c00, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c0c, quirk_remove_d3hot_delay);
+/* Lynxpoint-H PCH devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c02, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c18, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c1c, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c20, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c22, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c26, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c2d, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c31, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3a, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3d, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c4e, quirk_remove_d3hot_delay);
+/* Intel Cherrytrail devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2280, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2298, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x229c, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b0, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b5, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b7, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b8, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22d8, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22dc, quirk_remove_d3hot_delay);
+
+/*
+ * Some devices may pass our check in pci_intx_mask_supported() if
+ * PCI_COMMAND_INTX_DISABLE works though they actually do not properly
+ * support this feature.
+ */
+static void quirk_broken_intx_masking(struct pci_dev *dev)
+{
+	dev->broken_intx_masking = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x0030,
+			quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */
+			quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(0x1b7c, 0x0004, /* Ceton InfiniTV4 */
+			quirk_broken_intx_masking);
+
+/*
+ * Realtek RTL8169 PCI Gigabit Ethernet Controller (rev 10)
+ * Subsystem: Realtek RTL8169/8110 Family PCI Gigabit Ethernet NIC
+ *
+ * RTL8110SC - Fails under PCI device assignment using DisINTx masking.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REALTEK, 0x8169,
+			quirk_broken_intx_masking);
+
+/*
+ * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
+ * DisINTx can be set but the interrupt status bit is non-functional.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1572, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1574, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1580, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1581, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1583, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1584, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1585, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1586, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1587, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1588, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1589, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x158a, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x158b, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d0, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d1, quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d2, quirk_broken_intx_masking);
+
+static u16 mellanox_broken_intx_devs[] = {
+	PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
+	PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
+	PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
+	PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
+	PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
+	PCI_DEVICE_ID_MELLANOX_HERMON_EN,
+	PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX2,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX3,
+	PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
+};
+
+#define CONNECTX_4_CURR_MAX_MINOR 99
+#define CONNECTX_4_INTX_SUPPORT_MINOR 14
+
+/*
+ * Check ConnectX-4/LX FW version to see if it supports legacy interrupts.
+ * If so, don't mark it as broken.
+ * FW minor > 99 means older FW version format and no INTx masking support.
+ * FW minor < 14 means new FW version format and no INTx masking support.
+ */
+static void mellanox_check_broken_intx_masking(struct pci_dev *pdev)
+{
+	__be32 __iomem *fw_ver;
+	u16 fw_major;
+	u16 fw_minor;
+	u16 fw_subminor;
+	u32 fw_maj_min;
+	u32 fw_sub_min;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx_devs); i++) {
+		if (pdev->device == mellanox_broken_intx_devs[i]) {
+			pdev->broken_intx_masking = 1;
+			return;
+		}
+	}
+
+	/*
+	 * Getting here means Connect-IB cards and up. Connect-IB has no INTx
+	 * support so shouldn't be checked further
+	 */
+	if (pdev->device == PCI_DEVICE_ID_MELLANOX_CONNECTIB)
+		return;
+
+	if (pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4 &&
+	    pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX)
+		return;
+
+	/* For ConnectX-4 and ConnectX-4LX, need to check FW support */
+	if (pci_enable_device_mem(pdev)) {
+		pci_warn(pdev, "Can't enable device memory\n");
+		return;
+	}
+
+	fw_ver = ioremap(pci_resource_start(pdev, 0), 4);
+	if (!fw_ver) {
+		pci_warn(pdev, "Can't map ConnectX-4 initialization segment\n");
+		goto out;
+	}
+
+	/* Reading from resource space should be 32b aligned */
+	fw_maj_min = ioread32be(fw_ver);
+	fw_sub_min = ioread32be(fw_ver + 1);
+	fw_major = fw_maj_min & 0xffff;
+	fw_minor = fw_maj_min >> 16;
+	fw_subminor = fw_sub_min & 0xffff;
+	if (fw_minor > CONNECTX_4_CURR_MAX_MINOR ||
+	    fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR) {
+		pci_warn(pdev, "ConnectX-4: FW %u.%u.%u doesn't support INTx masking, disabling. Please upgrade FW to %d.14.1100 and up for INTx support\n",
+			 fw_major, fw_minor, fw_subminor, pdev->device ==
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX4 ? 12 : 14);
+		pdev->broken_intx_masking = 1;
+	}
+
+	iounmap(fw_ver);
+
+out:
+	pci_disable_device(pdev);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
+			mellanox_check_broken_intx_masking);
+
+static void quirk_no_bus_reset(struct pci_dev *dev)
+{
+	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
+}
+
+/*
+ * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
+ * prevented for those affected devices.
+ */
+static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
+{
+	if ((dev->device & 0xffc0) == 0x2340)
+		quirk_no_bus_reset(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			 quirk_nvidia_no_bus_reset);
+
+/*
+ * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
+ * The device will throw a Link Down error on AER-capable systems and
+ * regardless of AER, config space of the device is never accessible again
+ * and typically causes the system to hang or reset when access is attempted.
+ * https://lore.kernel.org/r/20140923210318.498dacbd@dualc.maya.org/
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0030, quirk_no_bus_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0033, quirk_no_bus_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
+
+/*
+ * Root port on some Cavium CN8xxx chips do not successfully complete a bus
+ * reset when used with certain child devices.  After the reset, config
+ * accesses to the child may fail.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
+
+/*
+ * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
+ * automatically disables LTSSM when Secondary Bus Reset is received and
+ * the device stops working.  Prevent bus reset for these devices.  With
+ * this change, the device can be assigned to VMs with VFIO, but it will
+ * leak state between VMs.  Reference
+ * https://e2e.ti.com/support/processors/f/791/t/954382
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset);
+
+static void quirk_no_pm_reset(struct pci_dev *dev)
+{
+	/*
+	 * We can't do a bus reset on root bus devices, but an ineffective
+	 * PM reset may be better than nothing.
+	 */
+	if (!pci_is_root_bus(dev->bus))
+		dev->dev_flags |= PCI_DEV_FLAGS_NO_PM_RESET;
+}
+
+/*
+ * Some AMD/ATI GPUS (HD8570 - Oland) report that a D3hot->D0 transition
+ * causes a reset (i.e., they advertise NoSoftRst-).  This transition seems
+ * to have no effect on the device: it retains the framebuffer contents and
+ * monitor sync.  Advertising this support makes other layers, like VFIO,
+ * assume pci_reset_function() is viable for this device.  Mark it as
+ * unavailable to skip it when testing reset methods.
+ */
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+			       PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);
+
+/*
+ * Thunderbolt controllers with broken MSI hotplug signaling:
+ * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
+ * of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge).
+ */
+static void quirk_thunderbolt_hotplug_msi(struct pci_dev *pdev)
+{
+	if (pdev->is_hotplug_bridge &&
+	    (pdev->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C ||
+	     pdev->revision <= 1))
+		pdev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LIGHT_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EAGLE_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LIGHT_PEAK,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
+			quirk_thunderbolt_hotplug_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
+			quirk_thunderbolt_hotplug_msi);
+
+#ifdef CONFIG_ACPI
+/*
+ * Apple: Shutdown Cactus Ridge Thunderbolt controller.
+ *
+ * On Apple hardware the Cactus Ridge Thunderbolt controller needs to be
+ * shutdown before suspend. Otherwise the native host interface (NHI) will not
+ * be present after resume if a device was plugged in before suspend.
+ *
+ * The Thunderbolt controller consists of a PCIe switch with downstream
+ * bridges leading to the NHI and to the tunnel PCI bridges.
+ *
+ * This quirk cuts power to the whole chip. Therefore we have to apply it
+ * during suspend_noirq of the upstream bridge.
+ *
+ * Power is automagically restored before resume. No action is needed.
+ */
+static void quirk_apple_poweroff_thunderbolt(struct pci_dev *dev)
+{
+	acpi_handle bridge, SXIO, SXFP, SXLV;
+
+	if (!x86_apple_machine)
+		return;
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM)
+		return;
+
+	/*
+	 * SXIO/SXFP/SXLF turns off power to the Thunderbolt controller.
+	 * We don't know how to turn it back on again, but firmware does,
+	 * so we can only use SXIO/SXFP/SXLF if we're suspending via
+	 * firmware.
+	 */
+	if (!pm_suspend_via_firmware())
+		return;
+
+	bridge = ACPI_HANDLE(&dev->dev);
+	if (!bridge)
+		return;
+
+	/*
+	 * SXIO and SXLV are present only on machines requiring this quirk.
+	 * Thunderbolt bridges in external devices might have the same
+	 * device ID as those on the host, but they will not have the
+	 * associated ACPI methods. This implicitly checks that we are at
+	 * the right bridge.
+	 */
+	if (ACPI_FAILURE(acpi_get_handle(bridge, "DSB0.NHI0.SXIO", &SXIO))
+	    || ACPI_FAILURE(acpi_get_handle(bridge, "DSB0.NHI0.SXFP", &SXFP))
+	    || ACPI_FAILURE(acpi_get_handle(bridge, "DSB0.NHI0.SXLV", &SXLV)))
+		return;
+	pci_info(dev, "quirk: cutting power to Thunderbolt controller...\n");
+
+	/* magic sequence */
+	acpi_execute_simple_method(SXIO, NULL, 1);
+	acpi_execute_simple_method(SXFP, NULL, 0);
+	msleep(300);
+	acpi_execute_simple_method(SXLV, NULL, 0);
+	acpi_execute_simple_method(SXIO, NULL, 0);
+	acpi_execute_simple_method(SXLV, NULL, 0);
+}
+DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
+			       quirk_apple_poweroff_thunderbolt);
+#endif
+
+/*
+ * Following are device-specific reset methods which can be used to
+ * reset a single function if other methods (e.g. FLR, PM D0->D3) are
+ * not available.
+ */
+static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, bool probe)
+{
+	/*
+	 * http://www.intel.com/content/dam/doc/datasheet/82599-10-gbe-controller-datasheet.pdf
+	 *
+	 * The 82599 supports FLR on VFs, but FLR support is reported only
+	 * in the PF DEVCAP (sec 9.3.10.4), not in the VF DEVCAP (sec 9.5).
+	 * Thus we must call pcie_flr() directly without first checking if it is
+	 * supported.
+	 */
+	if (!probe)
+		pcie_flr(dev);
+	return 0;
+}
+
+#define SOUTH_CHICKEN2		0xc2004
+#define PCH_PP_STATUS		0xc7200
+#define PCH_PP_CONTROL		0xc7204
+#define MSG_CTL			0x45010
+#define NSDE_PWR_STATE		0xd0100
+#define IGD_OPERATION_TIMEOUT	10000     /* set timeout 10 seconds */
+
+static int reset_ivb_igd(struct pci_dev *dev, bool probe)
+{
+	void __iomem *mmio_base;
+	unsigned long timeout;
+	u32 val;
+
+	if (probe)
+		return 0;
+
+	mmio_base = pci_iomap(dev, 0, 0);
+	if (!mmio_base)
+		return -ENOMEM;
+
+	iowrite32(0x00000002, mmio_base + MSG_CTL);
+
+	/*
+	 * Clobbering SOUTH_CHICKEN2 register is fine only if the next
+	 * driver loaded sets the right bits. However, this's a reset and
+	 * the bits have been set by i915 previously, so we clobber
+	 * SOUTH_CHICKEN2 register directly here.
+	 */
+	iowrite32(0x00000005, mmio_base + SOUTH_CHICKEN2);
+
+	val = ioread32(mmio_base + PCH_PP_CONTROL) & 0xfffffffe;
+	iowrite32(val, mmio_base + PCH_PP_CONTROL);
+
+	timeout = jiffies + msecs_to_jiffies(IGD_OPERATION_TIMEOUT);
+	do {
+		val = ioread32(mmio_base + PCH_PP_STATUS);
+		if ((val & 0xb0000000) == 0)
+			goto reset_complete;
+		msleep(10);
+	} while (time_before(jiffies, timeout));
+	pci_warn(dev, "timeout during reset\n");
+
+reset_complete:
+	iowrite32(0x00000002, mmio_base + NSDE_PWR_STATE);
+
+	pci_iounmap(dev, mmio_base);
+	return 0;
+}
+
+/* Device-specific reset method for Chelsio T4-based adapters */
+static int reset_chelsio_generic_dev(struct pci_dev *dev, bool probe)
+{
+	u16 old_command;
+	u16 msix_flags;
+
+	/*
+	 * If this isn't a Chelsio T4-based device, return -ENOTTY indicating
+	 * that we have no device-specific reset method.
+	 */
+	if ((dev->device & 0xf000) != 0x4000)
+		return -ENOTTY;
+
+	/*
+	 * If this is the "probe" phase, return 0 indicating that we can
+	 * reset this device.
+	 */
+	if (probe)
+		return 0;
+
+	/*
+	 * T4 can wedge if there are DMAs in flight within the chip and Bus
+	 * Master has been disabled.  We need to have it on till the Function
+	 * Level Reset completes.  (BUS_MASTER is disabled in
+	 * pci_reset_function()).
+	 */
+	pci_read_config_word(dev, PCI_COMMAND, &old_command);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      old_command | PCI_COMMAND_MASTER);
+
+	/*
+	 * Perform the actual device function reset, saving and restoring
+	 * configuration information around the reset.
+	 */
+	pci_save_state(dev);
+
+	/*
+	 * T4 also suffers a Head-Of-Line blocking problem if MSI-X interrupts
+	 * are disabled when an MSI-X interrupt message needs to be delivered.
+	 * So we briefly re-enable MSI-X interrupts for the duration of the
+	 * FLR.  The pci_restore_state() below will restore the original
+	 * MSI-X state.
+	 */
+	pci_read_config_word(dev, dev->msix_cap+PCI_MSIX_FLAGS, &msix_flags);
+	if ((msix_flags & PCI_MSIX_FLAGS_ENABLE) == 0)
+		pci_write_config_word(dev, dev->msix_cap+PCI_MSIX_FLAGS,
+				      msix_flags |
+				      PCI_MSIX_FLAGS_ENABLE |
+				      PCI_MSIX_FLAGS_MASKALL);
+
+	pcie_flr(dev);
+
+	/*
+	 * Restore the configuration information (BAR values, etc.) including
+	 * the original PCI Configuration Space Command word, and return
+	 * success.
+	 */
+	pci_restore_state(dev);
+	pci_write_config_word(dev, PCI_COMMAND, old_command);
+	return 0;
+}
+
+#define PCI_DEVICE_ID_INTEL_82599_SFP_VF   0x10ed
+#define PCI_DEVICE_ID_INTEL_IVB_M_VGA      0x0156
+#define PCI_DEVICE_ID_INTEL_IVB_M2_VGA     0x0166
+
+/*
+ * The Samsung SM961/PM961 controller can sometimes enter a fatal state after
+ * FLR where config space reads from the device return -1.  We seem to be
+ * able to avoid this condition if we disable the NVMe controller prior to
+ * FLR.  This quirk is generic for any NVMe class device requiring similar
+ * assistance to quiesce the device prior to FLR.
+ *
+ * NVMe specification: https://nvmexpress.org/resources/specifications/
+ * Revision 1.0e:
+ *    Chapter 2: Required and optional PCI config registers
+ *    Chapter 3: NVMe control registers
+ *    Chapter 7.3: Reset behavior
+ */
+static int nvme_disable_and_flr(struct pci_dev *dev, bool probe)
+{
+	void __iomem *bar;
+	u16 cmd;
+	u32 cfg;
+
+	if (dev->class != PCI_CLASS_STORAGE_EXPRESS ||
+	    pcie_reset_flr(dev, PCI_RESET_PROBE) || !pci_resource_start(dev, 0))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	bar = pci_iomap(dev, 0, NVME_REG_CC + sizeof(cfg));
+	if (!bar)
+		return -ENOTTY;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	pci_write_config_word(dev, PCI_COMMAND, cmd | PCI_COMMAND_MEMORY);
+
+	cfg = readl(bar + NVME_REG_CC);
+
+	/* Disable controller if enabled */
+	if (cfg & NVME_CC_ENABLE) {
+		u32 cap = readl(bar + NVME_REG_CAP);
+		unsigned long timeout;
+
+		/*
+		 * Per nvme_disable_ctrl() skip shutdown notification as it
+		 * could complete commands to the admin queue.  We only intend
+		 * to quiesce the device before reset.
+		 */
+		cfg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE);
+
+		writel(cfg, bar + NVME_REG_CC);
+
+		/*
+		 * Some controllers require an additional delay here, see
+		 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY.  None of those are yet
+		 * supported by this quirk.
+		 */
+
+		/* Cap register provides max timeout in 500ms increments */
+		timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
+		for (;;) {
+			u32 status = readl(bar + NVME_REG_CSTS);
+
+			/* Ready status becomes zero on disable complete */
+			if (!(status & NVME_CSTS_RDY))
+				break;
+
+			msleep(100);
+
+			if (time_after(jiffies, timeout)) {
+				pci_warn(dev, "Timeout waiting for NVMe ready status to clear after disable\n");
+				break;
+			}
+		}
+	}
+
+	pci_iounmap(dev, bar);
+
+	pcie_flr(dev);
+
+	return 0;
+}
+
+/*
+ * Some NVMe controllers such as Intel DC P3700 and Solidigm P44 Pro will
+ * timeout waiting for ready status to change after NVMe enable if the driver
+ * starts interacting with the device too soon after FLR.  A 250ms delay after
+ * FLR has heuristically proven to produce reliably working results for device
+ * assignment cases.
+ */
+static int delay_250ms_after_flr(struct pci_dev *dev, bool probe)
+{
+	if (probe)
+		return pcie_reset_flr(dev, PCI_RESET_PROBE);
+
+	pcie_reset_flr(dev, PCI_RESET_DO_RESET);
+
+	msleep(250);
+
+	return 0;
+}
+
+#define PCI_DEVICE_ID_HINIC_VF      0x375E
+#define HINIC_VF_FLR_TYPE           0x1000
+#define HINIC_VF_FLR_CAP_BIT        (1UL << 30)
+#define HINIC_VF_OP                 0xE80
+#define HINIC_VF_FLR_PROC_BIT       (1UL << 18)
+#define HINIC_OPERATION_TIMEOUT     15000	/* 15 seconds */
+
+/* Device-specific reset method for Huawei Intelligent NIC virtual functions */
+static int reset_hinic_vf_dev(struct pci_dev *pdev, bool probe)
+{
+	unsigned long timeout;
+	void __iomem *bar;
+	u32 val;
+
+	if (probe)
+		return 0;
+
+	bar = pci_iomap(pdev, 0, 0);
+	if (!bar)
+		return -ENOTTY;
+
+	/* Get and check firmware capabilities */
+	val = ioread32be(bar + HINIC_VF_FLR_TYPE);
+	if (!(val & HINIC_VF_FLR_CAP_BIT)) {
+		pci_iounmap(pdev, bar);
+		return -ENOTTY;
+	}
+
+	/* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */
+	val = ioread32be(bar + HINIC_VF_OP);
+	val = val | HINIC_VF_FLR_PROC_BIT;
+	iowrite32be(val, bar + HINIC_VF_OP);
+
+	pcie_flr(pdev);
+
+	/*
+	 * The device must recapture its Bus and Device Numbers after FLR
+	 * in order generate Completions.  Issue a config write to let the
+	 * device capture this information.
+	 */
+	pci_write_config_word(pdev, PCI_VENDOR_ID, 0);
+
+	/* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */
+	timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT);
+	do {
+		val = ioread32be(bar + HINIC_VF_OP);
+		if (!(val & HINIC_VF_FLR_PROC_BIT))
+			goto reset_complete;
+		msleep(20);
+	} while (time_before(jiffies, timeout));
+
+	val = ioread32be(bar + HINIC_VF_OP);
+	if (!(val & HINIC_VF_FLR_PROC_BIT))
+		goto reset_complete;
+
+	pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val);
+
+reset_complete:
+	pci_iounmap(pdev, bar);
+
+	return 0;
+}
+
+static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
+		 reset_intel_82599_sfp_virtfn },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_M_VGA,
+		reset_ivb_igd },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_M2_VGA,
+		reset_ivb_igd },
+	{ PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr },
+	{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
+	{ PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
+	{ PCI_VENDOR_ID_SOLIDIGM, 0xf1ac, delay_250ms_after_flr },
+	{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+		reset_chelsio_generic_dev },
+	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
+		reset_hinic_vf_dev },
+	{ 0 }
+};
+
+/*
+ * These device-specific reset methods are here rather than in a driver
+ * because when a host assigns a device to a guest VM, the host may need
+ * to reset the device but probably doesn't have a driver for it.
+ */
+int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
+{
+	const struct pci_dev_reset_methods *i;
+
+	for (i = pci_dev_reset_methods; i->reset; i++) {
+		if ((i->vendor == dev->vendor ||
+		     i->vendor == (u16)PCI_ANY_ID) &&
+		    (i->device == dev->device ||
+		     i->device == (u16)PCI_ANY_ID))
+			return i->reset(dev, probe);
+	}
+
+	return -ENOTTY;
+}
+
+static void quirk_dma_func0_alias(struct pci_dev *dev)
+{
+	if (PCI_FUNC(dev->devfn) != 0)
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0), 1);
+}
+
+/*
+ * https://bugzilla.redhat.com/show_bug.cgi?id=605888
+ *
+ * Some Ricoh devices use function 0 as the PCIe requester ID for DMA.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, quirk_dma_func0_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias);
+
+static void quirk_dma_func1_alias(struct pci_dev *dev)
+{
+	if (PCI_FUNC(dev->devfn) != 1)
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1), 1);
+}
+
+/*
+ * Marvell 88SE9123 uses function 1 as the requester ID for DMA.  In some
+ * SKUs function 1 is present and is a legacy IDE controller, in other
+ * SKUs this function is not present, making this a ghost requester.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=42679
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c136 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9125,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c59 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x917a,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c78 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9182,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c134 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9183,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c135 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9215,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c127 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9235,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
+			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645,
+			 quirk_dma_func1_alias);
+/* https://bugs.gentoo.org/show_bug.cgi?id=497630 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON,
+			 PCI_DEVICE_ID_JMICRON_JMB388_ESD,
+			 quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c117 */
+DECLARE_PCI_FIXUP_HEADER(0x1c28, /* Lite-On */
+			 0x0122, /* Plextor M6E (Marvell 88SS9183)*/
+			 quirk_dma_func1_alias);
+
+/*
+ * Some devices DMA with the wrong devfn, not just the wrong function.
+ * quirk_fixed_dma_alias() uses this table to create fixed aliases, where
+ * the alias is "fixed" and independent of the device devfn.
+ *
+ * For example, the Adaptec 3405 is a PCIe card with an Intel 80333 I/O
+ * processor.  To software, this appears as a PCIe-to-PCI/X bridge with a
+ * single device on the secondary bus.  In reality, the single exposed
+ * device at 0e.0 is the Address Translation Unit (ATU) of the controller
+ * that provides a bridge to the internal bus of the I/O processor.  The
+ * controller supports private devices, which can be hidden from PCI config
+ * space.  In the case of the Adaptec 3405, a private device at 01.0
+ * appears to be the DMA engine, which therefore needs to become a DMA
+ * alias for the device.
+ */
+static const struct pci_device_id fixed_dma_alias_tbl[] = {
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x0285,
+			 PCI_VENDOR_ID_ADAPTEC2, 0x02bb), /* Adaptec 3405 */
+	  .driver_data = PCI_DEVFN(1, 0) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x0285,
+			 PCI_VENDOR_ID_ADAPTEC2, 0x02bc), /* Adaptec 3805 */
+	  .driver_data = PCI_DEVFN(1, 0) },
+	{ 0 }
+};
+
+static void quirk_fixed_dma_alias(struct pci_dev *dev)
+{
+	const struct pci_device_id *id;
+
+	id = pci_match_id(fixed_dma_alias_tbl, dev);
+	if (id)
+		pci_add_dma_alias(dev, id->driver_data, 1);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ADAPTEC2, 0x0285, quirk_fixed_dma_alias);
+
+/*
+ * A few PCIe-to-PCI bridges fail to expose a PCIe capability, resulting in
+ * using the wrong DMA alias for the device.  Some of these devices can be
+ * used as either forward or reverse bridges, so we need to test whether the
+ * device is operating in the correct mode.  We could probably apply this
+ * quirk to PCI_ANY_ID, but for now we'll just use known offenders.  The test
+ * is for a non-root, non-PCIe bridge where the upstream device is PCIe and
+ * is not a PCIe-to-PCI bridge, then @pdev is actually a PCIe-to-PCI bridge.
+ */
+static void quirk_use_pcie_bridge_dma_alias(struct pci_dev *pdev)
+{
+	if (!pci_is_root_bus(pdev->bus) &&
+	    pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+	    !pci_is_pcie(pdev) && pci_is_pcie(pdev->bus->self) &&
+	    pci_pcie_type(pdev->bus->self) != PCI_EXP_TYPE_PCI_BRIDGE)
+		pdev->dev_flags |= PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS;
+}
+/* ASM1083/1085, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c46 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080,
+			 quirk_use_pcie_bridge_dma_alias);
+/* Tundra 8113, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c43 */
+DECLARE_PCI_FIXUP_HEADER(0x10e3, 0x8113, quirk_use_pcie_bridge_dma_alias);
+/* ITE 8892, https://bugzilla.kernel.org/show_bug.cgi?id=73551 */
+DECLARE_PCI_FIXUP_HEADER(0x1283, 0x8892, quirk_use_pcie_bridge_dma_alias);
+/* ITE 8893 has the same problem as the 8892 */
+DECLARE_PCI_FIXUP_HEADER(0x1283, 0x8893, quirk_use_pcie_bridge_dma_alias);
+/* Intel 82801, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c49 */
+DECLARE_PCI_FIXUP_HEADER(0x8086, 0x244e, quirk_use_pcie_bridge_dma_alias);
+
+/*
+ * MIC x200 NTB forwards PCIe traffic using multiple alien RIDs. They have to
+ * be added as aliases to the DMA device in order to allow buffer access
+ * when IOMMU is enabled. Following devfns have to match RIT-LUT table
+ * programmed in the EEPROM.
+ */
+static void quirk_mic_x200_dma_alias(struct pci_dev *pdev)
+{
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x10, 0x0), 1);
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x11, 0x0), 1);
+	pci_add_dma_alias(pdev, PCI_DEVFN(0x12, 0x3), 1);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias);
+
+/*
+ * Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices
+ * exposing computational units via Non Transparent Bridges (NTB, PEX 87xx).
+ *
+ * Similarly to MIC x200, we need to add DMA aliases to allow buffer access
+ * when IOMMU is enabled.  These aliases allow computational unit access to
+ * host memory.  These aliases mark the whole VCA device as one IOMMU
+ * group.
+ *
+ * All possible slot numbers (0x20) are used, since we are unable to tell
+ * what slot is used on other side.  This quirk is intended for both host
+ * and computational unit sides.  The VCA devices have up to five functions
+ * (four for DMA channels and one additional).
+ */
+static void quirk_pex_vca_alias(struct pci_dev *pdev)
+{
+	const unsigned int num_pci_slots = 0x20;
+	unsigned int slot;
+
+	for (slot = 0; slot < num_pci_slots; slot++)
+		pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0), 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2954, quirk_pex_vca_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2955, quirk_pex_vca_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2956, quirk_pex_vca_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2958, quirk_pex_vca_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2959, quirk_pex_vca_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x295A, quirk_pex_vca_alias);
+
+/*
+ * The IOMMU and interrupt controller on Broadcom Vulcan/Cavium ThunderX2 are
+ * associated not at the root bus, but at a bridge below. This quirk avoids
+ * generating invalid DMA aliases.
+ */
+static void quirk_bridge_cavm_thrx2_pcie_root(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000,
+				quirk_bridge_cavm_thrx2_pcie_root);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084,
+				quirk_bridge_cavm_thrx2_pcie_root);
+
+/*
+ * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
+ * class code.  Fix it.
+ */
+static void quirk_tw686x_class(struct pci_dev *pdev)
+{
+	u32 class = pdev->class;
+
+	/* Use "Multimedia controller" class */
+	pdev->class = (PCI_CLASS_MULTIMEDIA_OTHER << 8) | 0x01;
+	pci_info(pdev, "TW686x PCI class overridden (%#08x -> %#08x)\n",
+		 class, pdev->class);
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6864, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_tw686x_class);
+DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6865, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_tw686x_class);
+DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6868, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_tw686x_class);
+DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6869, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_tw686x_class);
+
+/*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * device drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+	dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+	pci_info(dev, "Disable Relaxed Ordering Attributes to avoid PCIe Completion erratum\n");
+}
+
+/*
+ * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root
+ * Complex have a Flow Control Credit issue which can cause performance
+ * problems with Upstream Transaction Layer Packets with Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f06, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f07, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f08, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f09, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0a, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0b, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0c, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0d, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0e, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (aka "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, PCI_CLASS_NOT_DEFINED, 8,
+			      quirk_relaxedordering_disable);
+
+/*
+ * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
+ * values for the Attribute as were supplied in the header of the
+ * corresponding Request, except as explicitly allowed when IDO is used."
+ *
+ * If a non-compliant device generates a completion with a different
+ * attribute than the request, the receiver may accept it (which itself
+ * seems non-compliant based on sec 2.3.2), or it may handle it as a
+ * Malformed TLP or an Unexpected Completion, which will probably lead to a
+ * device access timeout.
+ *
+ * If the non-compliant device generates completions with zero attributes
+ * (instead of copying the attributes from the request), we can work around
+ * this by disabling the "Relaxed Ordering" and "No Snoop" attributes in
+ * upstream devices so they always generate requests with zero attributes.
+ *
+ * This affects other devices under the same Root Port, but since these
+ * attributes are performance hints, there should be no functional problem.
+ *
+ * Note that Configuration Space accesses are never supposed to have TLP
+ * Attributes, so we're safe waiting till after any Configuration Space
+ * accesses to do the Root Port fixup.
+ */
+static void quirk_disable_root_port_attributes(struct pci_dev *pdev)
+{
+	struct pci_dev *root_port = pcie_find_root_port(pdev);
+
+	if (!root_port) {
+		pci_warn(pdev, "PCIe Completion erratum may cause device errors\n");
+		return;
+	}
+
+	pci_info(root_port, "Disabling No Snoop/Relaxed Ordering Attributes to avoid PCIe Completion erratum in %s\n",
+		 dev_name(&pdev->dev));
+	pcie_capability_clear_and_set_word(root_port, PCI_EXP_DEVCTL,
+					   PCI_EXP_DEVCTL_RELAX_EN |
+					   PCI_EXP_DEVCTL_NOSNOOP_EN, 0);
+}
+
+/*
+ * The Chelsio T5 chip fails to copy TLP Attributes from a Request to the
+ * Completion it generates.
+ */
+static void quirk_chelsio_T5_disable_root_port_attributes(struct pci_dev *pdev)
+{
+	/*
+	 * This mask/compare operation selects for Physical Function 4 on a
+	 * T5.  We only need to fix up the Root Port once for any of the
+	 * PFs.  PF[0..3] have PCI Device IDs of 0x50xx, but PF4 is uniquely
+	 * 0x54xx so we use that one.
+	 */
+	if ((pdev->device & 0xff00) == 0x5400)
+		quirk_disable_root_port_attributes(pdev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+			 quirk_chelsio_T5_disable_root_port_attributes);
+
+/*
+ * pci_acs_ctrl_enabled - compare desired ACS controls with those provided
+ *			  by a device
+ * @acs_ctrl_req: Bitmask of desired ACS controls
+ * @acs_ctrl_ena: Bitmask of ACS controls enabled or provided implicitly by
+ *		  the hardware design
+ *
+ * Return 1 if all ACS controls in the @acs_ctrl_req bitmask are included
+ * in @acs_ctrl_ena, i.e., the device provides all the access controls the
+ * caller desires.  Return 0 otherwise.
+ */
+static int pci_acs_ctrl_enabled(u16 acs_ctrl_req, u16 acs_ctrl_ena)
+{
+	if ((acs_ctrl_req & acs_ctrl_ena) == acs_ctrl_req)
+		return 1;
+	return 0;
+}
+
+/*
+ * AMD has indicated that the devices below do not support peer-to-peer
+ * in any system where they are found in the southbridge with an AMD
+ * IOMMU in the system.  Multifunction devices that do not support
+ * peer-to-peer between functions can claim to support a subset of ACS.
+ * Such devices effectively enable request redirect (RR) and completion
+ * redirect (CR) since all transactions are redirected to the upstream
+ * root complex.
+ *
+ * https://lore.kernel.org/r/201207111426.q6BEQTbh002928@mail.maya.org/
+ * https://lore.kernel.org/r/20120711165854.GM25282@amd.com/
+ * https://lore.kernel.org/r/20121005130857.GX4009@amd.com/
+ *
+ * 1002:4385 SBx00 SMBus Controller
+ * 1002:439c SB7x0/SB8x0/SB9x0 IDE Controller
+ * 1002:4383 SBx00 Azalia (Intel HDA)
+ * 1002:439d SB7x0/SB8x0/SB9x0 LPC host controller
+ * 1002:4384 SBx00 PCI to PCI Bridge
+ * 1002:4399 SB7x0/SB8x0/SB9x0 USB OHCI2 Controller
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=81841#c15
+ *
+ * 1022:780f [AMD] FCH PCI Bridge
+ * 1022:7809 [AMD] FCH USB OHCI Controller
+ */
+static int pci_quirk_amd_sb_acs(struct pci_dev *dev, u16 acs_flags)
+{
+#ifdef CONFIG_ACPI
+	struct acpi_table_header *header = NULL;
+	acpi_status status;
+
+	/* Targeting multifunction devices on the SB (appears on root bus) */
+	if (!dev->multifunction || !pci_is_root_bus(dev->bus))
+		return -ENODEV;
+
+	/* The IVRS table describes the AMD IOMMU */
+	status = acpi_get_table("IVRS", 0, &header);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	acpi_put_table(header);
+
+	/* Filter out flags not applicable to multifunction */
+	acs_flags &= (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC | PCI_ACS_DT);
+
+	return pci_acs_ctrl_enabled(acs_flags, PCI_ACS_RR | PCI_ACS_CR);
+#else
+	return -ENODEV;
+#endif
+}
+
+static bool pci_quirk_cavium_acs_match(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		return false;
+
+	switch (dev->device) {
+	/*
+	 * Effectively selects all downstream ports for whole ThunderX1
+	 * (which represents 8 SoCs).
+	 */
+	case 0xa000 ... 0xa7ff: /* ThunderX1 */
+	case 0xaf84:  /* ThunderX2 */
+	case 0xb884:  /* ThunderX3 */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	if (!pci_quirk_cavium_acs_match(dev))
+		return -ENOTTY;
+
+	/*
+	 * Cavium Root Ports don't advertise an ACS capability.  However,
+	 * the RTL internally implements similar protection as if ACS had
+	 * Source Validation, Request Redirection, Completion Redirection,
+	 * and Upstream Forwarding features enabled.  Assert that the
+	 * hardware implements and enables equivalent ACS functionality for
+	 * these flags.
+	 */
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+static int pci_quirk_xgene_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	/*
+	 * X-Gene Root Ports matching this quirk do not allow peer-to-peer
+	 * transactions with others, allowing masking out these bits as if they
+	 * were unimplemented in the ACS capability.
+	 */
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+/*
+ * Many Zhaoxin Root Ports and Switch Downstream Ports have no ACS capability.
+ * But the implementation could block peer-to-peer transactions between them
+ * and provide ACS-like functionality.
+ */
+static int pci_quirk_zhaoxin_pcie_ports_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	if (!pci_is_pcie(dev) ||
+	    ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) &&
+	     (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM)))
+		return -ENOTTY;
+
+	/*
+	 * Future Zhaoxin Root Ports and Switch Downstream Ports will
+	 * implement ACS capability in accordance with the PCIe Spec.
+	 */
+	switch (dev->device) {
+	case 0x0710 ... 0x071e:
+	case 0x0721:
+	case 0x0723 ... 0x0752:
+		return pci_acs_ctrl_enabled(acs_flags,
+			PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+	}
+
+	return false;
+}
+
+/*
+ * Many Intel PCH Root Ports do provide ACS-like features to disable peer
+ * transactions and validate bus numbers in requests, but do not provide an
+ * actual PCIe ACS capability.  This is the list of device IDs known to fall
+ * into that category as provided by Intel in Red Hat bugzilla 1037684.
+ */
+static const u16 pci_quirk_intel_pch_acs_ids[] = {
+	/* Ibexpeak PCH */
+	0x3b42, 0x3b43, 0x3b44, 0x3b45, 0x3b46, 0x3b47, 0x3b48, 0x3b49,
+	0x3b4a, 0x3b4b, 0x3b4c, 0x3b4d, 0x3b4e, 0x3b4f, 0x3b50, 0x3b51,
+	/* Cougarpoint PCH */
+	0x1c10, 0x1c11, 0x1c12, 0x1c13, 0x1c14, 0x1c15, 0x1c16, 0x1c17,
+	0x1c18, 0x1c19, 0x1c1a, 0x1c1b, 0x1c1c, 0x1c1d, 0x1c1e, 0x1c1f,
+	/* Pantherpoint PCH */
+	0x1e10, 0x1e11, 0x1e12, 0x1e13, 0x1e14, 0x1e15, 0x1e16, 0x1e17,
+	0x1e18, 0x1e19, 0x1e1a, 0x1e1b, 0x1e1c, 0x1e1d, 0x1e1e, 0x1e1f,
+	/* Lynxpoint-H PCH */
+	0x8c10, 0x8c11, 0x8c12, 0x8c13, 0x8c14, 0x8c15, 0x8c16, 0x8c17,
+	0x8c18, 0x8c19, 0x8c1a, 0x8c1b, 0x8c1c, 0x8c1d, 0x8c1e, 0x8c1f,
+	/* Lynxpoint-LP PCH */
+	0x9c10, 0x9c11, 0x9c12, 0x9c13, 0x9c14, 0x9c15, 0x9c16, 0x9c17,
+	0x9c18, 0x9c19, 0x9c1a, 0x9c1b,
+	/* Wildcat PCH */
+	0x9c90, 0x9c91, 0x9c92, 0x9c93, 0x9c94, 0x9c95, 0x9c96, 0x9c97,
+	0x9c98, 0x9c99, 0x9c9a, 0x9c9b,
+	/* Patsburg (X79) PCH */
+	0x1d10, 0x1d12, 0x1d14, 0x1d16, 0x1d18, 0x1d1a, 0x1d1c, 0x1d1e,
+	/* Wellsburg (X99) PCH */
+	0x8d10, 0x8d11, 0x8d12, 0x8d13, 0x8d14, 0x8d15, 0x8d16, 0x8d17,
+	0x8d18, 0x8d19, 0x8d1a, 0x8d1b, 0x8d1c, 0x8d1d, 0x8d1e,
+	/* Lynx Point (9 series) PCH */
+	0x8c90, 0x8c92, 0x8c94, 0x8c96, 0x8c98, 0x8c9a, 0x8c9c, 0x8c9e,
+};
+
+static bool pci_quirk_intel_pch_acs_match(struct pci_dev *dev)
+{
+	int i;
+
+	/* Filter out a few obvious non-matches first */
+	if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(pci_quirk_intel_pch_acs_ids); i++)
+		if (pci_quirk_intel_pch_acs_ids[i] == dev->device)
+			return true;
+
+	return false;
+}
+
+static int pci_quirk_intel_pch_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	if (!pci_quirk_intel_pch_acs_match(dev))
+		return -ENOTTY;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_ACS_ENABLED_QUIRK)
+		return pci_acs_ctrl_enabled(acs_flags,
+			PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+	return pci_acs_ctrl_enabled(acs_flags, 0);
+}
+
+/*
+ * These QCOM Root Ports do provide ACS-like features to disable peer
+ * transactions and validate bus numbers in requests, but do not provide an
+ * actual PCIe ACS capability.  Hardware supports source validation but it
+ * will report the issue as Completer Abort instead of ACS Violation.
+ * Hardware doesn't support peer-to-peer and each Root Port is a Root
+ * Complex with unique segment numbers.  It is not possible for one Root
+ * Port to pass traffic to another Root Port.  All PCIe transactions are
+ * terminated inside the Root Port.
+ */
+static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+/*
+ * Each of these NXP Root Ports is in a Root Complex with a unique segment
+ * number and does provide isolation features to disable peer transactions
+ * and validate bus numbers in requests, but does not provide an ACS
+ * capability.
+ */
+static int pci_quirk_nxp_rp_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+static int pci_quirk_al_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		return -ENOTTY;
+
+	/*
+	 * Amazon's Annapurna Labs root ports don't include an ACS capability,
+	 * but do include ACS-like functionality. The hardware doesn't support
+	 * peer-to-peer transactions via the root port and each has a unique
+	 * segment number.
+	 *
+	 * Additionally, the root ports cannot send traffic to each other.
+	 */
+	acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+	return acs_flags ? 0 : 1;
+}
+
+/*
+ * Sunrise Point PCH root ports implement ACS, but unfortunately as shown in
+ * the datasheet (Intel 100 Series Chipset Family PCH Datasheet, Vol. 2,
+ * 12.1.46, 12.1.47)[1] this chipset uses dwords for the ACS capability and
+ * control registers whereas the PCIe spec packs them into words (Rev 3.0,
+ * 7.16 ACS Extended Capability).  The bit definitions are correct, but the
+ * control register is at offset 8 instead of 6 and we should probably use
+ * dword accesses to them.  This applies to the following PCI Device IDs, as
+ * found in volume 1 of the datasheet[2]:
+ *
+ * 0xa110-0xa11f Sunrise Point-H PCI Express Root Port #{0-16}
+ * 0xa167-0xa16a Sunrise Point-H PCI Express Root Port #{17-20}
+ *
+ * N.B. This doesn't fix what lspci shows.
+ *
+ * The 100 series chipset specification update includes this as errata #23[3].
+ *
+ * The 200 series chipset (Union Point) has the same bug according to the
+ * specification update (Intel 200 Series Chipset Family Platform Controller
+ * Hub, Specification Update, January 2017, Revision 001, Document# 335194-001,
+ * Errata 22)[4].  Per the datasheet[5], root port PCI Device IDs for this
+ * chipset include:
+ *
+ * 0xa290-0xa29f PCI Express Root port #{0-16}
+ * 0xa2e7-0xa2ee PCI Express Root port #{17-24}
+ *
+ * Mobile chipsets are also affected, 7th & 8th Generation
+ * Specification update confirms ACS errata 22, status no fix: (7th Generation
+ * Intel Processor Family I/O for U/Y Platforms and 8th Generation Intel
+ * Processor Family I/O for U Quad Core Platforms Specification Update,
+ * August 2017, Revision 002, Document#: 334660-002)[6]
+ * Device IDs from I/O datasheet: (7th Generation Intel Processor Family I/O
+ * for U/Y Platforms and 8th Generation Intel ® Processor Family I/O for U
+ * Quad Core Platforms, Vol 1 of 2, August 2017, Document#: 334658-003)[7]
+ *
+ * 0x9d10-0x9d1b PCI Express Root port #{1-12}
+ *
+ * [1] https://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html
+ * [2] https://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html
+ * [3] https://www.intel.com/content/www/us/en/chipsets/100-series-chipset-spec-update.html
+ * [4] https://www.intel.com/content/www/us/en/chipsets/200-series-chipset-pch-spec-update.html
+ * [5] https://www.intel.com/content/www/us/en/chipsets/200-series-chipset-pch-datasheet-vol-1.html
+ * [6] https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-u-y-processor-lines-i-o-spec-update.html
+ * [7] https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.html
+ */
+static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
+		return false;
+
+	switch (dev->device) {
+	case 0xa110 ... 0xa11f: case 0xa167 ... 0xa16a: /* Sunrise Point */
+	case 0xa290 ... 0xa29f: case 0xa2e7 ... 0xa2ee: /* Union Point */
+	case 0x9d10 ... 0x9d1b: /* 7th & 8th Gen Mobile */
+		return true;
+	}
+
+	return false;
+}
+
+#define INTEL_SPT_ACS_CTRL (PCI_ACS_CAP + 4)
+
+static int pci_quirk_intel_spt_pch_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = dev->acs_cap;
+	if (!pos)
+		return -ENOTTY;
+
+	/* see pci_acs_flags_enabled() */
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	acs_flags &= (cap | PCI_ACS_EC);
+
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	return pci_acs_ctrl_enabled(acs_flags, ctrl);
+}
+
+static int pci_quirk_mf_endpoint_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	/*
+	 * SV, TB, and UF are not relevant to multifunction endpoints.
+	 *
+	 * Multifunction devices are only required to implement RR, CR, and DT
+	 * in their ACS capability if they support peer-to-peer transactions.
+	 * Devices matching this quirk have been verified by the vendor to not
+	 * perform peer-to-peer with other functions, allowing us to mask out
+	 * these bits as if they were unimplemented in the ACS capability.
+	 */
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
+		PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT);
+}
+
+static int pci_quirk_rciep_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	/*
+	 * Intel RCiEP's are required to allow p2p only on translated
+	 * addresses.  Refer to Intel VT-d specification, r3.1, sec 3.16,
+	 * "Root-Complex Peer to Peer Considerations".
+	 */
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_END)
+		return -ENOTTY;
+
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	/*
+	 * iProc PAXB Root Ports don't advertise an ACS capability, but
+	 * they do not allow peer-to-peer transactions between Root Ports.
+	 * Allow each Root Port to be in a separate IOMMU group by masking
+	 * SV/RR/CR/UF bits.
+	 */
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
+/*
+ * Wangxun 10G/1G NICs have no ACS capability, and on multi-function
+ * devices, peer-to-peer transactions are not be used between the functions.
+ * So add an ACS quirk for below devices to isolate functions.
+ * SFxxx 1G NICs(em).
+ * RP1000/RP2000 10G NICs(sp).
+ */
+static int  pci_quirk_wangxun_nic_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	switch (dev->device) {
+	case 0x0100 ... 0x010F:
+	case 0x1001:
+	case 0x2001:
+		return pci_acs_ctrl_enabled(acs_flags,
+			PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+	}
+
+	return false;
+}
+
+static const struct pci_dev_acs_enabled {
+	u16 vendor;
+	u16 device;
+	int (*acs_enabled)(struct pci_dev *dev, u16 acs_flags);
+} pci_dev_acs_enabled[] = {
+	{ PCI_VENDOR_ID_ATI, 0x4385, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_ATI, 0x439c, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_ATI, 0x4383, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_ATI, 0x439d, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_ATI, 0x4384, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_ATI, 0x4399, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_AMD, 0x780f, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_AMD, 0x7809, pci_quirk_amd_sb_acs },
+	{ PCI_VENDOR_ID_SOLARFLARE, 0x0903, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_SOLARFLARE, 0x0923, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_SOLARFLARE, 0x0A03, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10C6, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10DB, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10DD, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E1, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10F1, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10F7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10F8, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10F9, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10FA, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10FB, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10FC, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1507, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1514, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x151C, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1529, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x152A, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x154D, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x154F, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1551, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1558, pci_quirk_mf_endpoint_acs },
+	/* 82580 */
+	{ PCI_VENDOR_ID_INTEL, 0x1509, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150E, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150F, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1510, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1511, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1516, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1527, pci_quirk_mf_endpoint_acs },
+	/* 82576 */
+	{ PCI_VENDOR_ID_INTEL, 0x10C9, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E6, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E8, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150A, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150D, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1518, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1526, pci_quirk_mf_endpoint_acs },
+	/* 82575 */
+	{ PCI_VENDOR_ID_INTEL, 0x10A7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10A9, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10D6, pci_quirk_mf_endpoint_acs },
+	/* I350 */
+	{ PCI_VENDOR_ID_INTEL, 0x1521, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1522, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1523, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1524, pci_quirk_mf_endpoint_acs },
+	/* 82571 (Quads omitted due to non-ACS switch) */
+	{ PCI_VENDOR_ID_INTEL, 0x105E, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x105F, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1060, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10D9, pci_quirk_mf_endpoint_acs },
+	/* I219 */
+	{ PCI_VENDOR_ID_INTEL, 0x15b7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x15b8, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_rciep_acs },
+	/* QCOM QDF2xxx root ports */
+	{ PCI_VENDOR_ID_QCOM, 0x0400, pci_quirk_qcom_rp_acs },
+	{ PCI_VENDOR_ID_QCOM, 0x0401, pci_quirk_qcom_rp_acs },
+	/* HXT SD4800 root ports. The ACS design is same as QCOM QDF2xxx */
+	{ PCI_VENDOR_ID_HXT, 0x0401, pci_quirk_qcom_rp_acs },
+	/* Intel PCH root ports */
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_pch_acs },
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_spt_pch_acs },
+	{ 0x19a2, 0x710, pci_quirk_mf_endpoint_acs }, /* Emulex BE3-R */
+	{ 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */
+	/* Cavium ThunderX */
+	{ PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs },
+	/* Cavium multi-function devices */
+	{ PCI_VENDOR_ID_CAVIUM, 0xA026, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_CAVIUM, 0xA059, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
+	/* APM X-Gene */
+	{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
+	/* Ampere Computing */
+	{ PCI_VENDOR_ID_AMPERE, 0xE005, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE006, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE007, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE008, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE009, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
+	{ PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+	/* Broadcom multi-function device */
+	{ PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_BROADCOM, 0x1750, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_BROADCOM, 0x1751, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_BROADCOM, 0x1752, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
+	/* Amazon Annapurna Labs */
+	{ PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
+	/* Zhaoxin multi-function devices */
+	{ PCI_VENDOR_ID_ZHAOXIN, 0x3038, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_ZHAOXIN, 0x3104, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
+	/* NXP root ports, xx=16, 12, or 08 cores */
+	/* LX2xx0A : without security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d81, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da1, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d83, pci_quirk_nxp_rp_acs },
+	/* LX2xx0C : security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d80, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da0, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d82, pci_quirk_nxp_rp_acs },
+	/* LX2xx0E : security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d90, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db0, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d92, pci_quirk_nxp_rp_acs },
+	/* LX2xx0N : without security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d91, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db1, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d93, pci_quirk_nxp_rp_acs },
+	/* LX2xx2A : without security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d89, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da9, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d8b, pci_quirk_nxp_rp_acs },
+	/* LX2xx2C : security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d88, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da8, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d8a, pci_quirk_nxp_rp_acs },
+	/* LX2xx2E : security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d98, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db8, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d9a, pci_quirk_nxp_rp_acs },
+	/* LX2xx2N : without security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d99, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db9, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
+	/* Zhaoxin Root/Downstream Ports */
+	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+	/* Wangxun nics */
+	{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+	{ 0 }
+};
+
+/*
+ * pci_dev_specific_acs_enabled - check whether device provides ACS controls
+ * @dev:	PCI device
+ * @acs_flags:	Bitmask of desired ACS controls
+ *
+ * Returns:
+ *   -ENOTTY:	No quirk applies to this device; we can't tell whether the
+ *		device provides the desired controls
+ *   0:		Device does not provide all the desired controls
+ *   >0:	Device provides all the controls in @acs_flags
+ */
+int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags)
+{
+	const struct pci_dev_acs_enabled *i;
+	int ret;
+
+	/*
+	 * Allow devices that do not expose standard PCIe ACS capabilities
+	 * or control to indicate their support here.  Multi-function express
+	 * devices which do not allow internal peer-to-peer between functions,
+	 * but do not implement PCIe ACS may wish to return true here.
+	 */
+	for (i = pci_dev_acs_enabled; i->acs_enabled; i++) {
+		if ((i->vendor == dev->vendor ||
+		     i->vendor == (u16)PCI_ANY_ID) &&
+		    (i->device == dev->device ||
+		     i->device == (u16)PCI_ANY_ID)) {
+			ret = i->acs_enabled(dev, acs_flags);
+			if (ret >= 0)
+				return ret;
+		}
+	}
+
+	return -ENOTTY;
+}
+
+/* Config space offset of Root Complex Base Address register */
+#define INTEL_LPC_RCBA_REG 0xf0
+/* 31:14 RCBA address */
+#define INTEL_LPC_RCBA_MASK 0xffffc000
+/* RCBA Enable */
+#define INTEL_LPC_RCBA_ENABLE (1 << 0)
+
+/* Backbone Scratch Pad Register */
+#define INTEL_BSPR_REG 0x1104
+/* Backbone Peer Non-Posted Disable */
+#define INTEL_BSPR_REG_BPNPD (1 << 8)
+/* Backbone Peer Posted Disable */
+#define INTEL_BSPR_REG_BPPD  (1 << 9)
+
+/* Upstream Peer Decode Configuration Register */
+#define INTEL_UPDCR_REG 0x1014
+/* 5:0 Peer Decode Enable bits */
+#define INTEL_UPDCR_REG_MASK 0x3f
+
+static int pci_quirk_enable_intel_lpc_acs(struct pci_dev *dev)
+{
+	u32 rcba, bspr, updcr;
+	void __iomem *rcba_mem;
+
+	/*
+	 * Read the RCBA register from the LPC (D31:F0).  PCH root ports
+	 * are D28:F* and therefore get probed before LPC, thus we can't
+	 * use pci_get_slot()/pci_read_config_dword() here.
+	 */
+	pci_bus_read_config_dword(dev->bus, PCI_DEVFN(31, 0),
+				  INTEL_LPC_RCBA_REG, &rcba);
+	if (!(rcba & INTEL_LPC_RCBA_ENABLE))
+		return -EINVAL;
+
+	rcba_mem = ioremap(rcba & INTEL_LPC_RCBA_MASK,
+				   PAGE_ALIGN(INTEL_UPDCR_REG));
+	if (!rcba_mem)
+		return -ENOMEM;
+
+	/*
+	 * The BSPR can disallow peer cycles, but it's set by soft strap and
+	 * therefore read-only.  If both posted and non-posted peer cycles are
+	 * disallowed, we're ok.  If either are allowed, then we need to use
+	 * the UPDCR to disable peer decodes for each port.  This provides the
+	 * PCIe ACS equivalent of PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF
+	 */
+	bspr = readl(rcba_mem + INTEL_BSPR_REG);
+	bspr &= INTEL_BSPR_REG_BPNPD | INTEL_BSPR_REG_BPPD;
+	if (bspr != (INTEL_BSPR_REG_BPNPD | INTEL_BSPR_REG_BPPD)) {
+		updcr = readl(rcba_mem + INTEL_UPDCR_REG);
+		if (updcr & INTEL_UPDCR_REG_MASK) {
+			pci_info(dev, "Disabling UPDCR peer decodes\n");
+			updcr &= ~INTEL_UPDCR_REG_MASK;
+			writel(updcr, rcba_mem + INTEL_UPDCR_REG);
+		}
+	}
+
+	iounmap(rcba_mem);
+	return 0;
+}
+
+/* Miscellaneous Port Configuration register */
+#define INTEL_MPC_REG 0xd8
+/* MPC: Invalid Receive Bus Number Check Enable */
+#define INTEL_MPC_REG_IRBNCE (1 << 26)
+
+static void pci_quirk_enable_intel_rp_mpc_acs(struct pci_dev *dev)
+{
+	u32 mpc;
+
+	/*
+	 * When enabled, the IRBNCE bit of the MPC register enables the
+	 * equivalent of PCI ACS Source Validation (PCI_ACS_SV), which
+	 * ensures that requester IDs fall within the bus number range
+	 * of the bridge.  Enable if not already.
+	 */
+	pci_read_config_dword(dev, INTEL_MPC_REG, &mpc);
+	if (!(mpc & INTEL_MPC_REG_IRBNCE)) {
+		pci_info(dev, "Enabling MPC IRBNCE\n");
+		mpc |= INTEL_MPC_REG_IRBNCE;
+		pci_write_config_word(dev, INTEL_MPC_REG, mpc);
+	}
+}
+
+/*
+ * Currently this quirk does the equivalent of
+ * PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF
+ *
+ * TODO: This quirk also needs to do equivalent of PCI_ACS_TB,
+ * if dev->external_facing || dev->untrusted
+ */
+static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev)
+{
+	if (!pci_quirk_intel_pch_acs_match(dev))
+		return -ENOTTY;
+
+	if (pci_quirk_enable_intel_lpc_acs(dev)) {
+		pci_warn(dev, "Failed to enable Intel PCH ACS quirk\n");
+		return 0;
+	}
+
+	pci_quirk_enable_intel_rp_mpc_acs(dev);
+
+	dev->dev_flags |= PCI_DEV_FLAGS_ACS_ENABLED_QUIRK;
+
+	pci_info(dev, "Intel PCH root port ACS workaround enabled\n");
+
+	return 0;
+}
+
+static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = dev->acs_cap;
+	if (!pos)
+		return -ENOTTY;
+
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	ctrl |= (cap & PCI_ACS_SV);
+	ctrl |= (cap & PCI_ACS_RR);
+	ctrl |= (cap & PCI_ACS_CR);
+	ctrl |= (cap & PCI_ACS_UF);
+
+	if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
+		ctrl |= (cap & PCI_ACS_TB);
+
+	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
+
+	pci_info(dev, "Intel SPT PCH root port ACS workaround enabled\n");
+
+	return 0;
+}
+
+static int pci_quirk_disable_intel_spt_pch_acs_redir(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = dev->acs_cap;
+	if (!pos)
+		return -ENOTTY;
+
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
+
+	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
+
+	pci_info(dev, "Intel SPT PCH root port workaround: disabled ACS redirect\n");
+
+	return 0;
+}
+
+static const struct pci_dev_acs_ops {
+	u16 vendor;
+	u16 device;
+	int (*enable_acs)(struct pci_dev *dev);
+	int (*disable_acs_redir)(struct pci_dev *dev);
+} pci_dev_acs_ops[] = {
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+	    .enable_acs = pci_quirk_enable_intel_pch_acs,
+	},
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+	    .enable_acs = pci_quirk_enable_intel_spt_pch_acs,
+	    .disable_acs_redir = pci_quirk_disable_intel_spt_pch_acs_redir,
+	},
+};
+
+int pci_dev_specific_enable_acs(struct pci_dev *dev)
+{
+	const struct pci_dev_acs_ops *p;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(pci_dev_acs_ops); i++) {
+		p = &pci_dev_acs_ops[i];
+		if ((p->vendor == dev->vendor ||
+		     p->vendor == (u16)PCI_ANY_ID) &&
+		    (p->device == dev->device ||
+		     p->device == (u16)PCI_ANY_ID) &&
+		    p->enable_acs) {
+			ret = p->enable_acs(dev);
+			if (ret >= 0)
+				return ret;
+		}
+	}
+
+	return -ENOTTY;
+}
+
+int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
+{
+	const struct pci_dev_acs_ops *p;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(pci_dev_acs_ops); i++) {
+		p = &pci_dev_acs_ops[i];
+		if ((p->vendor == dev->vendor ||
+		     p->vendor == (u16)PCI_ANY_ID) &&
+		    (p->device == dev->device ||
+		     p->device == (u16)PCI_ANY_ID) &&
+		    p->disable_acs_redir) {
+			ret = p->disable_acs_redir(dev);
+			if (ret >= 0)
+				return ret;
+		}
+	}
+
+	return -ENOTTY;
+}
+
+/*
+ * The PCI capabilities list for Intel DH895xCC VFs (device ID 0x0443) with
+ * QuickAssist Technology (QAT) is prematurely terminated in hardware.  The
+ * Next Capability pointer in the MSI Capability Structure should point to
+ * the PCIe Capability Structure but is incorrectly hardwired as 0 terminating
+ * the list.
+ */
+static void quirk_intel_qat_vf_cap(struct pci_dev *pdev)
+{
+	int pos, i = 0, ret;
+	u8 next_cap;
+	u16 reg16, *cap;
+	struct pci_cap_saved_state *state;
+
+	/* Bail if the hardware bug is fixed */
+	if (pdev->pcie_cap || pci_find_capability(pdev, PCI_CAP_ID_EXP))
+		return;
+
+	/* Bail if MSI Capability Structure is not found for some reason */
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+	if (!pos)
+		return;
+
+	/*
+	 * Bail if Next Capability pointer in the MSI Capability Structure
+	 * is not the expected incorrect 0x00.
+	 */
+	pci_read_config_byte(pdev, pos + 1, &next_cap);
+	if (next_cap)
+		return;
+
+	/*
+	 * PCIe Capability Structure is expected to be at 0x50 and should
+	 * terminate the list (Next Capability pointer is 0x00).  Verify
+	 * Capability Id and Next Capability pointer is as expected.
+	 * Open-code some of set_pcie_port_type() and pci_cfg_space_size_ext()
+	 * to correctly set kernel data structures which have already been
+	 * set incorrectly due to the hardware bug.
+	 */
+	pos = 0x50;
+	pci_read_config_word(pdev, pos, &reg16);
+	if (reg16 == (0x0000 | PCI_CAP_ID_EXP)) {
+		u32 status;
+#ifndef PCI_EXP_SAVE_REGS
+#define PCI_EXP_SAVE_REGS     7
+#endif
+		int size = PCI_EXP_SAVE_REGS * sizeof(u16);
+
+		pdev->pcie_cap = pos;
+		pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+		pdev->pcie_flags_reg = reg16;
+		pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
+		pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
+
+		pdev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
+		ret = pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &status);
+		if ((ret != PCIBIOS_SUCCESSFUL) || (PCI_POSSIBLE_ERROR(status)))
+			pdev->cfg_size = PCI_CFG_SPACE_SIZE;
+
+		if (pci_find_saved_cap(pdev, PCI_CAP_ID_EXP))
+			return;
+
+		/* Save PCIe cap */
+		state = kzalloc(sizeof(*state) + size, GFP_KERNEL);
+		if (!state)
+			return;
+
+		state->cap.cap_nr = PCI_CAP_ID_EXP;
+		state->cap.cap_extended = 0;
+		state->cap.size = size;
+		cap = (u16 *)&state->cap.data[0];
+		pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_RTCTL,  &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_LNKCTL2, &cap[i++]);
+		pcie_capability_read_word(pdev, PCI_EXP_SLTCTL2, &cap[i++]);
+		hlist_add_head(&state->next, &pdev->saved_cap_space);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap);
+
+/*
+ * FLR may cause the following to devices to hang:
+ *
+ * AMD Starship/Matisse HD Audio Controller 0x1487
+ * AMD Starship USB 3.0 Host Controller 0x148c
+ * AMD Matisse USB 3.0 Host Controller 0x149c
+ * Intel 82579LM Gigabit Ethernet Controller 0x1502
+ * Intel 82579V Gigabit Ethernet Controller 0x1503
+ *
+ */
+static void quirk_no_flr(struct pci_dev *dev)
+{
+	dev->dev_flags |= PCI_DEV_FLAGS_NO_FLR_RESET;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr);
+
+/* FLR may cause the SolidRun SNET DPU (rev 0x1) to hang */
+static void quirk_no_flr_snet(struct pci_dev *dev)
+{
+	if (dev->revision == 0x1)
+		quirk_no_flr(dev);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLIDRUN, 0x1000, quirk_no_flr_snet);
+
+static void quirk_no_ext_tags(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+
+	if (!bridge)
+		return;
+
+	bridge->no_ext_tags = 1;
+	pci_info(pdev, "disabling Extended Tags (this device can't handle them)\n");
+
+	pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0142, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
+
+#ifdef CONFIG_PCI_ATS
+static void quirk_no_ats(struct pci_dev *pdev)
+{
+	pci_info(pdev, "disabling ATS\n");
+	pdev->ats_cap = 0;
+}
+
+/*
+ * Some devices require additional driver setup to enable ATS.  Don't use
+ * ATS for those devices as ATS will be enabled before the driver has had a
+ * chance to load and configure the device.
+ */
+static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
+{
+	if (pdev->device == 0x15d8) {
+		if (pdev->revision == 0xcf &&
+		    pdev->subsystem_vendor == 0xea50 &&
+		    (pdev->subsystem_device == 0xce19 ||
+		     pdev->subsystem_device == 0xcc10 ||
+		     pdev->subsystem_device == 0xcc08))
+			quirk_no_ats(pdev);
+	} else {
+		quirk_no_ats(pdev);
+	}
+}
+
+/* AMD Stoney platform GPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats);
+/* AMD Iceland dGPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
+/* AMD Navi10 dGPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats);
+/* AMD Navi14 dGPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats);
+/* AMD Raven platform iGPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
+
+/*
+ * Intel IPU E2000 revisions before C0 implement incorrect endianness
+ * in ATS Invalidate Request message body. Disable ATS for those devices.
+ */
+static void quirk_intel_e2000_no_ats(struct pci_dev *pdev)
+{
+	if (pdev->revision < 0x20)
+		quirk_no_ats(pdev);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1451, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1452, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1453, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1454, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1455, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats);
+#endif /* CONFIG_PCI_ATS */
+
+/* Freescale PCIe doesn't support MSI in RC mode */
+static void quirk_fsl_no_msi(struct pci_dev *pdev)
+{
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
+		pdev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, quirk_fsl_no_msi);
+
+/*
+ * Although not allowed by the spec, some multi-function devices have
+ * dependencies of one function (consumer) on another (supplier).  For the
+ * consumer to work in D0, the supplier must also be in D0.  Create a
+ * device link from the consumer to the supplier to enforce this
+ * dependency.  Runtime PM is allowed by default on the consumer to prevent
+ * it from permanently keeping the supplier awake.
+ */
+static void pci_create_device_link(struct pci_dev *pdev, unsigned int consumer,
+				   unsigned int supplier, unsigned int class,
+				   unsigned int class_shift)
+{
+	struct pci_dev *supplier_pdev;
+
+	if (PCI_FUNC(pdev->devfn) != consumer)
+		return;
+
+	supplier_pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+				pdev->bus->number,
+				PCI_DEVFN(PCI_SLOT(pdev->devfn), supplier));
+	if (!supplier_pdev || (supplier_pdev->class >> class_shift) != class) {
+		pci_dev_put(supplier_pdev);
+		return;
+	}
+
+	if (device_link_add(&pdev->dev, &supplier_pdev->dev,
+			    DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME))
+		pci_info(pdev, "D0 power state depends on %s\n",
+			 pci_name(supplier_pdev));
+	else
+		pci_err(pdev, "Cannot enforce power dependency on %s\n",
+			pci_name(supplier_pdev));
+
+	pm_runtime_allow(&pdev->dev);
+	pci_dev_put(supplier_pdev);
+}
+
+/*
+ * Create device link for GPUs with integrated HDA controller for streaming
+ * audio to attached displays.
+ */
+static void quirk_gpu_hda(struct pci_dev *hda)
+{
+	pci_create_device_link(hda, 1, 0, PCI_BASE_CLASS_DISPLAY, 16);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+			      PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMD, PCI_ANY_ID,
+			      PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			      PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
+
+/*
+ * Create device link for GPUs with integrated USB xHCI Host
+ * controller to VGA.
+ */
+static void quirk_gpu_usb(struct pci_dev *usb)
+{
+	pci_create_device_link(usb, 2, 0, PCI_BASE_CLASS_DISPLAY, 16);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			      PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+			      PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb);
+
+/*
+ * Create device link for GPUs with integrated Type-C UCSI controller
+ * to VGA. Currently there is no class code defined for UCSI device over PCI
+ * so using UNKNOWN class for now and it will be updated when UCSI
+ * over PCI gets a class code.
+ */
+#define PCI_CLASS_SERIAL_UNKNOWN	0x0c80
+static void quirk_gpu_usb_typec_ucsi(struct pci_dev *ucsi)
+{
+	pci_create_device_link(ucsi, 3, 0, PCI_BASE_CLASS_DISPLAY, 16);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			      PCI_CLASS_SERIAL_UNKNOWN, 8,
+			      quirk_gpu_usb_typec_ucsi);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+			      PCI_CLASS_SERIAL_UNKNOWN, 8,
+			      quirk_gpu_usb_typec_ucsi);
+
+/*
+ * Enable the NVIDIA GPU integrated HDA controller if the BIOS left it
+ * disabled.  https://devtalk.nvidia.com/default/topic/1024022
+ */
+static void quirk_nvidia_hda(struct pci_dev *gpu)
+{
+	u8 hdr_type;
+	u32 val;
+
+	/* There was no integrated HDA controller before MCP89 */
+	if (gpu->device < PCI_DEVICE_ID_NVIDIA_GEFORCE_320M)
+		return;
+
+	/* Bit 25 at offset 0x488 enables the HDA controller */
+	pci_read_config_dword(gpu, 0x488, &val);
+	if (val & BIT(25))
+		return;
+
+	pci_info(gpu, "Enabling HDA controller\n");
+	pci_write_config_dword(gpu, 0x488, val | BIT(25));
+
+	/* The GPU becomes a multi-function device when the HDA is enabled */
+	pci_read_config_byte(gpu, PCI_HEADER_TYPE, &hdr_type);
+	gpu->multifunction = !!(hdr_type & 0x80);
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			       PCI_BASE_CLASS_DISPLAY, 16, quirk_nvidia_hda);
+DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			       PCI_BASE_CLASS_DISPLAY, 16, quirk_nvidia_hda);
+
+/*
+ * Some IDT switches incorrectly flag an ACS Source Validation error on
+ * completions for config read requests even though PCIe r4.0, sec
+ * 6.12.1.1, says that completions are never affected by ACS Source
+ * Validation.  Here's the text of IDT 89H32H8G3-YC, erratum #36:
+ *
+ *   Item #36 - Downstream port applies ACS Source Validation to Completions
+ *   Section 6.12.1.1 of the PCI Express Base Specification 3.1 states that
+ *   completions are never affected by ACS Source Validation.  However,
+ *   completions received by a downstream port of the PCIe switch from a
+ *   device that has not yet captured a PCIe bus number are incorrectly
+ *   dropped by ACS Source Validation by the switch downstream port.
+ *
+ * The workaround suggested by IDT is to issue a config write to the
+ * downstream device before issuing the first config read.  This allows the
+ * downstream device to capture its bus and device numbers (see PCIe r4.0,
+ * sec 2.2.9), thus avoiding the ACS error on the completion.
+ *
+ * However, we don't know when the device is ready to accept the config
+ * write, so we do config reads until we receive a non-Config Request Retry
+ * Status, then do the config write.
+ *
+ * To avoid hitting the erratum when doing the config reads, we disable ACS
+ * SV around this process.
+ */
+int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *l, int timeout)
+{
+	int pos;
+	u16 ctrl = 0;
+	bool found;
+	struct pci_dev *bridge = bus->self;
+
+	pos = bridge->acs_cap;
+
+	/* Disable ACS SV before initial config reads */
+	if (pos) {
+		pci_read_config_word(bridge, pos + PCI_ACS_CTRL, &ctrl);
+		if (ctrl & PCI_ACS_SV)
+			pci_write_config_word(bridge, pos + PCI_ACS_CTRL,
+					      ctrl & ~PCI_ACS_SV);
+	}
+
+	found = pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
+
+	/* Write Vendor ID (read-only) so the endpoint latches its bus/dev */
+	if (found)
+		pci_bus_write_config_word(bus, devfn, PCI_VENDOR_ID, 0);
+
+	/* Re-enable ACS_SV if it was previously enabled */
+	if (ctrl & PCI_ACS_SV)
+		pci_write_config_word(bridge, pos + PCI_ACS_CTRL, ctrl);
+
+	return found;
+}
+
+/*
+ * Microsemi Switchtec NTB uses devfn proxy IDs to move TLPs between
+ * NT endpoints via the internal switch fabric. These IDs replace the
+ * originating Requester ID TLPs which access host memory on peer NTB
+ * ports. Therefore, all proxy IDs must be aliased to the NTB device
+ * to permit access when the IOMMU is turned on.
+ */
+static void quirk_switchtec_ntb_dma_alias(struct pci_dev *pdev)
+{
+	void __iomem *mmio;
+	struct ntb_info_regs __iomem *mmio_ntb;
+	struct ntb_ctrl_regs __iomem *mmio_ctrl;
+	u64 partition_map;
+	u8 partition;
+	int pp;
+
+	if (pci_enable_device(pdev)) {
+		pci_err(pdev, "Cannot enable Switchtec device\n");
+		return;
+	}
+
+	mmio = pci_iomap(pdev, 0, 0);
+	if (mmio == NULL) {
+		pci_disable_device(pdev);
+		pci_err(pdev, "Cannot iomap Switchtec device\n");
+		return;
+	}
+
+	pci_info(pdev, "Setting Switchtec proxy ID aliases\n");
+
+	mmio_ntb = mmio + SWITCHTEC_GAS_NTB_OFFSET;
+	mmio_ctrl = (void __iomem *) mmio_ntb + SWITCHTEC_NTB_REG_CTRL_OFFSET;
+
+	partition = ioread8(&mmio_ntb->partition_id);
+
+	partition_map = ioread32(&mmio_ntb->ep_map);
+	partition_map |= ((u64) ioread32(&mmio_ntb->ep_map + 4)) << 32;
+	partition_map &= ~(1ULL << partition);
+
+	for (pp = 0; pp < (sizeof(partition_map) * 8); pp++) {
+		struct ntb_ctrl_regs __iomem *mmio_peer_ctrl;
+		u32 table_sz = 0;
+		int te;
+
+		if (!(partition_map & (1ULL << pp)))
+			continue;
+
+		pci_dbg(pdev, "Processing partition %d\n", pp);
+
+		mmio_peer_ctrl = &mmio_ctrl[pp];
+
+		table_sz = ioread16(&mmio_peer_ctrl->req_id_table_size);
+		if (!table_sz) {
+			pci_warn(pdev, "Partition %d table_sz 0\n", pp);
+			continue;
+		}
+
+		if (table_sz > 512) {
+			pci_warn(pdev,
+				 "Invalid Switchtec partition %d table_sz %d\n",
+				 pp, table_sz);
+			continue;
+		}
+
+		for (te = 0; te < table_sz; te++) {
+			u32 rid_entry;
+			u8 devfn;
+
+			rid_entry = ioread32(&mmio_peer_ctrl->req_id_table[te]);
+			devfn = (rid_entry >> 1) & 0xFF;
+			pci_dbg(pdev,
+				"Aliasing Partition %d Proxy ID %02x.%d\n",
+				pp, PCI_SLOT(devfn), PCI_FUNC(devfn));
+			pci_add_dma_alias(pdev, devfn, 1);
+		}
+	}
+
+	pci_iounmap(pdev, mmio);
+	pci_disable_device(pdev);
+}
+#define SWITCHTEC_QUIRK(vid) \
+	DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_MICROSEMI, vid, \
+		PCI_CLASS_BRIDGE_OTHER, 8, quirk_switchtec_ntb_dma_alias)
+
+SWITCHTEC_QUIRK(0x8531);  /* PFX 24xG3 */
+SWITCHTEC_QUIRK(0x8532);  /* PFX 32xG3 */
+SWITCHTEC_QUIRK(0x8533);  /* PFX 48xG3 */
+SWITCHTEC_QUIRK(0x8534);  /* PFX 64xG3 */
+SWITCHTEC_QUIRK(0x8535);  /* PFX 80xG3 */
+SWITCHTEC_QUIRK(0x8536);  /* PFX 96xG3 */
+SWITCHTEC_QUIRK(0x8541);  /* PSX 24xG3 */
+SWITCHTEC_QUIRK(0x8542);  /* PSX 32xG3 */
+SWITCHTEC_QUIRK(0x8543);  /* PSX 48xG3 */
+SWITCHTEC_QUIRK(0x8544);  /* PSX 64xG3 */
+SWITCHTEC_QUIRK(0x8545);  /* PSX 80xG3 */
+SWITCHTEC_QUIRK(0x8546);  /* PSX 96xG3 */
+SWITCHTEC_QUIRK(0x8551);  /* PAX 24XG3 */
+SWITCHTEC_QUIRK(0x8552);  /* PAX 32XG3 */
+SWITCHTEC_QUIRK(0x8553);  /* PAX 48XG3 */
+SWITCHTEC_QUIRK(0x8554);  /* PAX 64XG3 */
+SWITCHTEC_QUIRK(0x8555);  /* PAX 80XG3 */
+SWITCHTEC_QUIRK(0x8556);  /* PAX 96XG3 */
+SWITCHTEC_QUIRK(0x8561);  /* PFXL 24XG3 */
+SWITCHTEC_QUIRK(0x8562);  /* PFXL 32XG3 */
+SWITCHTEC_QUIRK(0x8563);  /* PFXL 48XG3 */
+SWITCHTEC_QUIRK(0x8564);  /* PFXL 64XG3 */
+SWITCHTEC_QUIRK(0x8565);  /* PFXL 80XG3 */
+SWITCHTEC_QUIRK(0x8566);  /* PFXL 96XG3 */
+SWITCHTEC_QUIRK(0x8571);  /* PFXI 24XG3 */
+SWITCHTEC_QUIRK(0x8572);  /* PFXI 32XG3 */
+SWITCHTEC_QUIRK(0x8573);  /* PFXI 48XG3 */
+SWITCHTEC_QUIRK(0x8574);  /* PFXI 64XG3 */
+SWITCHTEC_QUIRK(0x8575);  /* PFXI 80XG3 */
+SWITCHTEC_QUIRK(0x8576);  /* PFXI 96XG3 */
+SWITCHTEC_QUIRK(0x4000);  /* PFX 100XG4 */
+SWITCHTEC_QUIRK(0x4084);  /* PFX 84XG4  */
+SWITCHTEC_QUIRK(0x4068);  /* PFX 68XG4  */
+SWITCHTEC_QUIRK(0x4052);  /* PFX 52XG4  */
+SWITCHTEC_QUIRK(0x4036);  /* PFX 36XG4  */
+SWITCHTEC_QUIRK(0x4028);  /* PFX 28XG4  */
+SWITCHTEC_QUIRK(0x4100);  /* PSX 100XG4 */
+SWITCHTEC_QUIRK(0x4184);  /* PSX 84XG4  */
+SWITCHTEC_QUIRK(0x4168);  /* PSX 68XG4  */
+SWITCHTEC_QUIRK(0x4152);  /* PSX 52XG4  */
+SWITCHTEC_QUIRK(0x4136);  /* PSX 36XG4  */
+SWITCHTEC_QUIRK(0x4128);  /* PSX 28XG4  */
+SWITCHTEC_QUIRK(0x4200);  /* PAX 100XG4 */
+SWITCHTEC_QUIRK(0x4284);  /* PAX 84XG4  */
+SWITCHTEC_QUIRK(0x4268);  /* PAX 68XG4  */
+SWITCHTEC_QUIRK(0x4252);  /* PAX 52XG4  */
+SWITCHTEC_QUIRK(0x4236);  /* PAX 36XG4  */
+SWITCHTEC_QUIRK(0x4228);  /* PAX 28XG4  */
+SWITCHTEC_QUIRK(0x4352);  /* PFXA 52XG4 */
+SWITCHTEC_QUIRK(0x4336);  /* PFXA 36XG4 */
+SWITCHTEC_QUIRK(0x4328);  /* PFXA 28XG4 */
+SWITCHTEC_QUIRK(0x4452);  /* PSXA 52XG4 */
+SWITCHTEC_QUIRK(0x4436);  /* PSXA 36XG4 */
+SWITCHTEC_QUIRK(0x4428);  /* PSXA 28XG4 */
+SWITCHTEC_QUIRK(0x4552);  /* PAXA 52XG4 */
+SWITCHTEC_QUIRK(0x4536);  /* PAXA 36XG4 */
+SWITCHTEC_QUIRK(0x4528);  /* PAXA 28XG4 */
+SWITCHTEC_QUIRK(0x5000);  /* PFX 100XG5 */
+SWITCHTEC_QUIRK(0x5084);  /* PFX 84XG5 */
+SWITCHTEC_QUIRK(0x5068);  /* PFX 68XG5 */
+SWITCHTEC_QUIRK(0x5052);  /* PFX 52XG5 */
+SWITCHTEC_QUIRK(0x5036);  /* PFX 36XG5 */
+SWITCHTEC_QUIRK(0x5028);  /* PFX 28XG5 */
+SWITCHTEC_QUIRK(0x5100);  /* PSX 100XG5 */
+SWITCHTEC_QUIRK(0x5184);  /* PSX 84XG5 */
+SWITCHTEC_QUIRK(0x5168);  /* PSX 68XG5 */
+SWITCHTEC_QUIRK(0x5152);  /* PSX 52XG5 */
+SWITCHTEC_QUIRK(0x5136);  /* PSX 36XG5 */
+SWITCHTEC_QUIRK(0x5128);  /* PSX 28XG5 */
+SWITCHTEC_QUIRK(0x5200);  /* PAX 100XG5 */
+SWITCHTEC_QUIRK(0x5284);  /* PAX 84XG5 */
+SWITCHTEC_QUIRK(0x5268);  /* PAX 68XG5 */
+SWITCHTEC_QUIRK(0x5252);  /* PAX 52XG5 */
+SWITCHTEC_QUIRK(0x5236);  /* PAX 36XG5 */
+SWITCHTEC_QUIRK(0x5228);  /* PAX 28XG5 */
+SWITCHTEC_QUIRK(0x5300);  /* PFXA 100XG5 */
+SWITCHTEC_QUIRK(0x5384);  /* PFXA 84XG5 */
+SWITCHTEC_QUIRK(0x5368);  /* PFXA 68XG5 */
+SWITCHTEC_QUIRK(0x5352);  /* PFXA 52XG5 */
+SWITCHTEC_QUIRK(0x5336);  /* PFXA 36XG5 */
+SWITCHTEC_QUIRK(0x5328);  /* PFXA 28XG5 */
+SWITCHTEC_QUIRK(0x5400);  /* PSXA 100XG5 */
+SWITCHTEC_QUIRK(0x5484);  /* PSXA 84XG5 */
+SWITCHTEC_QUIRK(0x5468);  /* PSXA 68XG5 */
+SWITCHTEC_QUIRK(0x5452);  /* PSXA 52XG5 */
+SWITCHTEC_QUIRK(0x5436);  /* PSXA 36XG5 */
+SWITCHTEC_QUIRK(0x5428);  /* PSXA 28XG5 */
+SWITCHTEC_QUIRK(0x5500);  /* PAXA 100XG5 */
+SWITCHTEC_QUIRK(0x5584);  /* PAXA 84XG5 */
+SWITCHTEC_QUIRK(0x5568);  /* PAXA 68XG5 */
+SWITCHTEC_QUIRK(0x5552);  /* PAXA 52XG5 */
+SWITCHTEC_QUIRK(0x5536);  /* PAXA 36XG5 */
+SWITCHTEC_QUIRK(0x5528);  /* PAXA 28XG5 */
+
+/*
+ * The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints.
+ * These IDs are used to forward responses to the originator on the other
+ * side of the NTB.  Alias all possible IDs to the NTB to permit access when
+ * the IOMMU is turned on.
+ */
+static void quirk_plx_ntb_dma_alias(struct pci_dev *pdev)
+{
+	pci_info(pdev, "Setting PLX NTB proxy ID aliases\n");
+	/* PLX NTB may use all 256 devfns */
+	pci_add_dma_alias(pdev, 0, 256);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, 0x87b0, quirk_plx_ntb_dma_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, 0x87b1, quirk_plx_ntb_dma_alias);
+
+/*
+ * On Lenovo Thinkpad P50 SKUs with a Nvidia Quadro M1000M, the BIOS does
+ * not always reset the secondary Nvidia GPU between reboots if the system
+ * is configured to use Hybrid Graphics mode.  This results in the GPU
+ * being left in whatever state it was in during the *previous* boot, which
+ * causes spurious interrupts from the GPU, which in turn causes us to
+ * disable the wrong IRQ and end up breaking the touchpad.  Unsurprisingly,
+ * this also completely breaks nouveau.
+ *
+ * Luckily, it seems a simple reset of the Nvidia GPU brings it back to a
+ * clean state and fixes all these issues.
+ *
+ * When the machine is configured in Dedicated display mode, the issue
+ * doesn't occur.  Fortunately the GPU advertises NoReset+ when in this
+ * mode, so we can detect that and avoid resetting it.
+ */
+static void quirk_reset_lenovo_thinkpad_p50_nvgpu(struct pci_dev *pdev)
+{
+	void __iomem *map;
+	int ret;
+
+	if (pdev->subsystem_vendor != PCI_VENDOR_ID_LENOVO ||
+	    pdev->subsystem_device != 0x222e ||
+	    !pci_reset_supported(pdev))
+		return;
+
+	if (pci_enable_device_mem(pdev))
+		return;
+
+	/*
+	 * Based on nvkm_device_ctor() in
+	 * drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+	 */
+	map = pci_iomap(pdev, 0, 0x23000);
+	if (!map) {
+		pci_err(pdev, "Can't map MMIO space\n");
+		goto out_disable;
+	}
+
+	/*
+	 * Make sure the GPU looks like it's been POSTed before resetting
+	 * it.
+	 */
+	if (ioread32(map + 0x2240c) & 0x2) {
+		pci_info(pdev, FW_BUG "GPU left initialized by EFI, resetting\n");
+		ret = pci_reset_bus(pdev);
+		if (ret < 0)
+			pci_err(pdev, "Failed to reset GPU: %d\n", ret);
+	}
+
+	iounmap(map);
+out_disable:
+	pci_disable_device(pdev);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, 0x13b1,
+			      PCI_CLASS_DISPLAY_VGA, 8,
+			      quirk_reset_lenovo_thinkpad_p50_nvgpu);
+
+/*
+ * Device [1b21:2142]
+ * When in D0, PME# doesn't get asserted when plugging USB 3.0 device.
+ */
+static void pci_fixup_no_d0_pme(struct pci_dev *dev)
+{
+	pci_info(dev, "PME# does not work under D0, disabling it\n");
+	dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x2142, pci_fixup_no_d0_pme);
+
+/*
+ * Device 12d8:0x400e [OHCI] and 12d8:0x400f [EHCI]
+ *
+ * These devices advertise PME# support in all power states but don't
+ * reliably assert it.
+ *
+ * These devices also advertise MSI, but documentation (PI7C9X440SL.pdf)
+ * says "The MSI Function is not implemented on this device" in chapters
+ * 7.3.27, 7.3.29-7.3.31.
+ */
+static void pci_fixup_no_msi_no_pme(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MSI
+	pci_info(dev, "MSI is not implemented on this device, disabling it\n");
+	dev->no_msi = 1;
+#endif
+	pci_info(dev, "PME# is unreliable, disabling it\n");
+	dev->pme_support = 0;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400e, pci_fixup_no_msi_no_pme);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_PERICOM, 0x400f, pci_fixup_no_msi_no_pme);
+
+static void apex_pci_fixup_class(struct pci_dev *pdev)
+{
+	pdev->class = (PCI_CLASS_SYSTEM_OTHER << 8) | pdev->class;
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a,
+			       PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class);
+
+/*
+ * Pericom PI7C9X2G404/PI7C9X2G304/PI7C9X2G303 switch erratum E5 -
+ * ACS P2P Request Redirect is not functional
+ *
+ * When ACS P2P Request Redirect is enabled and bandwidth is not balanced
+ * between upstream and downstream ports, packets are queued in an internal
+ * buffer until CPLD packet. The workaround is to use the switch in store and
+ * forward mode.
+ */
+#define PI7C9X2Gxxx_MODE_REG		0x74
+#define PI7C9X2Gxxx_STORE_FORWARD_MODE	BIT(0)
+static void pci_fixup_pericom_acs_store_forward(struct pci_dev *pdev)
+{
+	struct pci_dev *upstream;
+	u16 val;
+
+	/* Downstream ports only */
+	if (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)
+		return;
+
+	/* Check for ACS P2P Request Redirect use */
+	if (!pdev->acs_cap)
+		return;
+	pci_read_config_word(pdev, pdev->acs_cap + PCI_ACS_CTRL, &val);
+	if (!(val & PCI_ACS_RR))
+		return;
+
+	upstream = pci_upstream_bridge(pdev);
+	if (!upstream)
+		return;
+
+	pci_read_config_word(upstream, PI7C9X2Gxxx_MODE_REG, &val);
+	if (!(val & PI7C9X2Gxxx_STORE_FORWARD_MODE)) {
+		pci_info(upstream, "Setting PI7C9X2Gxxx store-forward mode to avoid ACS erratum\n");
+		pci_write_config_word(upstream, PI7C9X2Gxxx_MODE_REG, val |
+				      PI7C9X2Gxxx_STORE_FORWARD_MODE);
+	}
+}
+/*
+ * Apply fixup on enable and on resume, in order to apply the fix up whenever
+ * ACS configuration changes or switch mode is reset
+ */
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2404,
+			 pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2404,
+			 pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2304,
+			 pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2304,
+			 pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2303,
+			 pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303,
+			 pci_fixup_pericom_acs_store_forward);
+
+static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
+
+static void rom_bar_overlap_defect(struct pci_dev *dev)
+{
+	pci_info(dev, "working around ROM BAR overlap defect\n");
+	dev->rom_bar_overlap = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1533, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1536, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1537, rom_bar_overlap_defect);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1538, rom_bar_overlap_defect);
+
+#ifdef CONFIG_PCIEASPM
+/*
+ * Several Intel DG2 graphics devices advertise that they can only tolerate
+ * 1us latency when transitioning from L1 to L0, which may prevent ASPM L1
+ * from being enabled.  But in fact these devices can tolerate unlimited
+ * latency.  Override their Device Capabilities value to allow ASPM L1 to
+ * be enabled.
+ */
+static void aspm_l1_acceptable_latency(struct pci_dev *dev)
+{
+	u32 l1_lat = FIELD_GET(PCI_EXP_DEVCAP_L1, dev->devcap);
+
+	if (l1_lat < 7) {
+		dev->devcap |= FIELD_PREP(PCI_EXP_DEVCAP_L1, 7);
+		pci_info(dev, "ASPM: overriding L1 acceptable latency from %#x to 0x7\n",
+			 l1_lat);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f80, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f81, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f82, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f83, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f84, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f85, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f86, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f87, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f88, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5690, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5691, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5692, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5693, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5694, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5695, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a0, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a1, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a2, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a3, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a4, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a5, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a6, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56b0, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56b1, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56c0, aspm_l1_acceptable_latency);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56c1, aspm_l1_acceptable_latency);
+#endif
+
+#ifdef CONFIG_PCIE_DPC
+/*
+ * Intel Ice Lake, Tiger Lake and Alder Lake BIOS has a bug that clears
+ * the DPC RP PIO Log Size of the integrated Thunderbolt PCIe Root
+ * Ports.
+ */
+static void dpc_log_size(struct pci_dev *dev)
+{
+	u16 dpc, val;
+
+	dpc = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC);
+	if (!dpc)
+		return;
+
+	pci_read_config_word(dev, dpc + PCI_EXP_DPC_CAP, &val);
+	if (!(val & PCI_EXP_DPC_CAP_RP_EXT))
+		return;
+
+	if (!((val & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8)) {
+		pci_info(dev, "Overriding RP PIO Log Size to 4\n");
+		dev->dpc_rp_log_size = 4;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x461f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x462f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x463f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x466e, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1d, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a21, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a23, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a23, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a25, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a27, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a29, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size);
+#endif
+
+/*
+ * For a PCI device with multiple downstream devices, its driver may use
+ * a flattened device tree to describe the downstream devices.
+ * To overlay the flattened device tree, the PCI device and all its ancestor
+ * devices need to have device tree nodes on system base device tree. Thus,
+ * before driver probing, it might need to add a device tree node as the final
+ * fixup.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node);
+
+/*
+ * Devices known to require a longer delay before first config space access
+ * after reset recovery or resume from D3cold:
+ *
+ * VideoPropulsion (aka Genroco) Torrent QN16e MPEG QAM Modulator
+ */
+static void pci_fixup_d3cold_delay_1sec(struct pci_dev *pdev)
+{
+	pdev->d3cold_delay = 1000;
+}
+DECLARE_PCI_FIXUP_FINAL(0x5555, 0x0004, pci_fixup_d3cold_delay_1sec);
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
new file mode 100644
index 000000000..d749ea825
--- /dev/null
+++ b/drivers/pci/remove.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pci.h>
+#include <linux/module.h>
+#include "pci.h"
+
+static void pci_free_resources(struct pci_dev *dev)
+{
+	struct resource *res;
+
+	pci_dev_for_each_resource(dev, res) {
+		if (res->parent)
+			release_resource(res);
+	}
+}
+
+static void pci_stop_dev(struct pci_dev *dev)
+{
+	pci_pme_active(dev, false);
+
+	if (pci_dev_is_added(dev)) {
+
+		device_release_driver(&dev->dev);
+		pci_proc_detach_device(dev);
+		pci_remove_sysfs_dev_files(dev);
+		of_pci_remove_node(dev);
+
+		pci_dev_assign_added(dev, false);
+	}
+}
+
+static void pci_destroy_dev(struct pci_dev *dev)
+{
+	if (!dev->dev.kobj.parent)
+		return;
+
+	device_del(&dev->dev);
+
+	down_write(&pci_bus_sem);
+	list_del(&dev->bus_list);
+	up_write(&pci_bus_sem);
+
+	pci_doe_destroy(dev);
+	pcie_aspm_exit_link_state(dev);
+	pci_bridge_d3_update(dev);
+	pci_free_resources(dev);
+	put_device(&dev->dev);
+}
+
+void pci_remove_bus(struct pci_bus *bus)
+{
+	pci_proc_detach_bus(bus);
+
+	down_write(&pci_bus_sem);
+	list_del(&bus->node);
+	pci_bus_release_busn_res(bus);
+	up_write(&pci_bus_sem);
+	pci_remove_legacy_files(bus);
+
+	if (bus->ops->remove_bus)
+		bus->ops->remove_bus(bus);
+
+	pcibios_remove_bus(bus);
+	device_unregister(&bus->dev);
+}
+EXPORT_SYMBOL(pci_remove_bus);
+
+static void pci_stop_bus_device(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->subordinate;
+	struct pci_dev *child, *tmp;
+
+	/*
+	 * Stopping an SR-IOV PF device removes all the associated VFs,
+	 * which will update the bus->devices list and confuse the
+	 * iterator.  Therefore, iterate in reverse so we remove the VFs
+	 * first, then the PF.
+	 */
+	if (bus) {
+		list_for_each_entry_safe_reverse(child, tmp,
+						 &bus->devices, bus_list)
+			pci_stop_bus_device(child);
+	}
+
+	pci_stop_dev(dev);
+}
+
+static void pci_remove_bus_device(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->subordinate;
+	struct pci_dev *child, *tmp;
+
+	if (bus) {
+		list_for_each_entry_safe(child, tmp,
+					 &bus->devices, bus_list)
+			pci_remove_bus_device(child);
+
+		pci_remove_bus(bus);
+		dev->subordinate = NULL;
+	}
+
+	pci_destroy_dev(dev);
+}
+
+/**
+ * pci_stop_and_remove_bus_device - remove a PCI device and any children
+ * @dev: the device to remove
+ *
+ * Remove a PCI device from the device lists, informing the drivers
+ * that the device has been removed.  We also remove any subordinate
+ * buses and children in a depth-first manner.
+ *
+ * For each device we remove, delete the device structure from the
+ * device lists, remove the /proc entry, and notify userspace
+ * (/sbin/hotplug).
+ */
+void pci_stop_and_remove_bus_device(struct pci_dev *dev)
+{
+	pci_stop_bus_device(dev);
+	pci_remove_bus_device(dev);
+}
+EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
+
+void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev)
+{
+	pci_lock_rescan_remove();
+	pci_stop_and_remove_bus_device(dev);
+	pci_unlock_rescan_remove();
+}
+EXPORT_SYMBOL_GPL(pci_stop_and_remove_bus_device_locked);
+
+void pci_stop_root_bus(struct pci_bus *bus)
+{
+	struct pci_dev *child, *tmp;
+	struct pci_host_bridge *host_bridge;
+
+	if (!pci_is_root_bus(bus))
+		return;
+
+	host_bridge = to_pci_host_bridge(bus->bridge);
+	list_for_each_entry_safe_reverse(child, tmp,
+					 &bus->devices, bus_list)
+		pci_stop_bus_device(child);
+
+	/* stop the host bridge */
+	device_release_driver(&host_bridge->dev);
+}
+EXPORT_SYMBOL_GPL(pci_stop_root_bus);
+
+void pci_remove_root_bus(struct pci_bus *bus)
+{
+	struct pci_dev *child, *tmp;
+	struct pci_host_bridge *host_bridge;
+
+	if (!pci_is_root_bus(bus))
+		return;
+
+	host_bridge = to_pci_host_bridge(bus->bridge);
+	list_for_each_entry_safe(child, tmp,
+				 &bus->devices, bus_list)
+		pci_remove_bus_device(child);
+
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	/* Release domain_nr if it was dynamically allocated */
+	if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+		pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+#endif
+
+	pci_remove_bus(bus);
+	host_bridge->bus = NULL;
+
+	/* remove the host bridge */
+	device_del(&host_bridge->dev);
+}
+EXPORT_SYMBOL_GPL(pci_remove_root_bus);
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
new file mode 100644
index 000000000..e18d3a438
--- /dev/null
+++ b/drivers/pci/rom.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI ROM access routines
+ *
+ * (C) Copyright 2004 Jon Smirl <jonsmirl@yahoo.com>
+ * (C) Copyright 2004 Silicon Graphics, Inc. Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "pci.h"
+
+/**
+ * pci_enable_rom - enable ROM decoding for a PCI device
+ * @pdev: PCI device to enable
+ *
+ * Enable ROM decoding on @dev.  This involves simply turning on the last
+ * bit of the PCI ROM BAR.  Note that some cards may share address decoders
+ * between the ROM and other resources, so enabling it may disable access
+ * to MMIO registers or other card memory.
+ */
+int pci_enable_rom(struct pci_dev *pdev)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+	struct pci_bus_region region;
+	u32 rom_addr;
+
+	if (!res->flags)
+		return -1;
+
+	/* Nothing to enable if we're using a shadow copy in RAM */
+	if (res->flags & IORESOURCE_ROM_SHADOW)
+		return 0;
+
+	/*
+	 * Ideally pci_update_resource() would update the ROM BAR address,
+	 * and we would only set the enable bit here.  But apparently some
+	 * devices have buggy ROM BARs that read as zero when disabled.
+	 */
+	pcibios_resource_to_bus(pdev->bus, &region, res);
+	pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
+	rom_addr &= ~PCI_ROM_ADDRESS_MASK;
+	rom_addr |= region.start | PCI_ROM_ADDRESS_ENABLE;
+	pci_write_config_dword(pdev, pdev->rom_base_reg, rom_addr);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_enable_rom);
+
+/**
+ * pci_disable_rom - disable ROM decoding for a PCI device
+ * @pdev: PCI device to disable
+ *
+ * Disable ROM decoding on a PCI device by turning off the last bit in the
+ * ROM BAR.
+ */
+void pci_disable_rom(struct pci_dev *pdev)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+	u32 rom_addr;
+
+	if (res->flags & IORESOURCE_ROM_SHADOW)
+		return;
+
+	pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
+	rom_addr &= ~PCI_ROM_ADDRESS_ENABLE;
+	pci_write_config_dword(pdev, pdev->rom_base_reg, rom_addr);
+}
+EXPORT_SYMBOL_GPL(pci_disable_rom);
+
+/**
+ * pci_get_rom_size - obtain the actual size of the ROM image
+ * @pdev: target PCI device
+ * @rom: kernel virtual pointer to image of ROM
+ * @size: size of PCI window
+ *  return: size of actual ROM image
+ *
+ * Determine the actual length of the ROM image.
+ * The PCI window size could be much larger than the
+ * actual image size.
+ */
+static size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom,
+			       size_t size)
+{
+	void __iomem *image;
+	int last_image;
+	unsigned int length;
+
+	image = rom;
+	do {
+		void __iomem *pds;
+		/* Standard PCI ROMs start out with these bytes 55 AA */
+		if (readw(image) != 0xAA55) {
+			pci_info(pdev, "Invalid PCI ROM header signature: expecting 0xaa55, got %#06x\n",
+				 readw(image));
+			break;
+		}
+		/* get the PCI data structure and check its "PCIR" signature */
+		pds = image + readw(image + 24);
+		if (readl(pds) != 0x52494350) {
+			pci_info(pdev, "Invalid PCI ROM data signature: expecting 0x52494350, got %#010x\n",
+				 readl(pds));
+			break;
+		}
+		last_image = readb(pds + 21) & 0x80;
+		length = readw(pds + 16);
+		image += length * 512;
+		/* Avoid iterating through memory outside the resource window */
+		if (image >= rom + size)
+			break;
+		if (!last_image) {
+			if (readw(image) != 0xAA55) {
+				pci_info(pdev, "No more image in the PCI ROM\n");
+				break;
+			}
+		}
+	} while (length && !last_image);
+
+	/* never return a size larger than the PCI resource window */
+	/* there are known ROMs that get the size wrong */
+	return min((size_t)(image - rom), size);
+}
+
+/**
+ * pci_map_rom - map a PCI ROM to kernel space
+ * @pdev: pointer to pci device struct
+ * @size: pointer to receive size of pci window over ROM
+ *
+ * Return: kernel virtual pointer to image of ROM
+ *
+ * Map a PCI ROM into kernel space. If ROM is boot video ROM,
+ * the shadow BIOS copy will be returned instead of the
+ * actual ROM.
+ */
+void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+	loff_t start;
+	void __iomem *rom;
+
+	/* assign the ROM an address if it doesn't have one */
+	if (res->parent == NULL && pci_assign_resource(pdev, PCI_ROM_RESOURCE))
+		return NULL;
+
+	start = pci_resource_start(pdev, PCI_ROM_RESOURCE);
+	*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+	if (*size == 0)
+		return NULL;
+
+	/* Enable ROM space decodes */
+	if (pci_enable_rom(pdev))
+		return NULL;
+
+	rom = ioremap(start, *size);
+	if (!rom)
+		goto err_ioremap;
+
+	/*
+	 * Try to find the true size of the ROM since sometimes the PCI window
+	 * size is much larger than the actual size of the ROM.
+	 * True size is important if the ROM is going to be copied.
+	 */
+	*size = pci_get_rom_size(pdev, rom, *size);
+	if (!*size)
+		goto invalid_rom;
+
+	return rom;
+
+invalid_rom:
+	iounmap(rom);
+err_ioremap:
+	/* restore enable if ioremap fails */
+	if (!(res->flags & IORESOURCE_ROM_ENABLE))
+		pci_disable_rom(pdev);
+	return NULL;
+}
+EXPORT_SYMBOL(pci_map_rom);
+
+/**
+ * pci_unmap_rom - unmap the ROM from kernel space
+ * @pdev: pointer to pci device struct
+ * @rom: virtual address of the previous mapping
+ *
+ * Remove a mapping of a previously mapped ROM
+ */
+void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+
+	iounmap(rom);
+
+	/* Disable again before continuing */
+	if (!(res->flags & IORESOURCE_ROM_ENABLE))
+		pci_disable_rom(pdev);
+}
+EXPORT_SYMBOL(pci_unmap_rom);
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
new file mode 100644
index 000000000..b4c138a6e
--- /dev/null
+++ b/drivers/pci/search.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI searching functions
+ *
+ * Copyright (C) 1993 -- 1997 Drew Eckhardt, Frederic Potter,
+ *					David Mosberger-Tang
+ * Copyright (C) 1997 -- 2000 Martin Mares <mj@ucw.cz>
+ * Copyright (C) 2003 -- 2004 Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include "pci.h"
+
+DECLARE_RWSEM(pci_bus_sem);
+
+/*
+ * pci_for_each_dma_alias - Iterate over DMA aliases for a device
+ * @pdev: starting downstream device
+ * @fn: function to call for each alias
+ * @data: opaque data to pass to @fn
+ *
+ * Starting @pdev, walk up the bus calling @fn for each possible alias
+ * of @pdev at the root bus.
+ */
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data)
+{
+	struct pci_bus *bus;
+	int ret;
+
+	/*
+	 * The device may have an explicit alias requester ID for DMA where the
+	 * requester is on another PCI bus.
+	 */
+	pdev = pci_real_dma_dev(pdev);
+
+	ret = fn(pdev, pci_dev_id(pdev), data);
+	if (ret)
+		return ret;
+
+	/*
+	 * If the device is broken and uses an alias requester ID for
+	 * DMA, iterate over that too.
+	 */
+	if (unlikely(pdev->dma_alias_mask)) {
+		unsigned int devfn;
+
+		for_each_set_bit(devfn, pdev->dma_alias_mask, MAX_NR_DEVFNS) {
+			ret = fn(pdev, PCI_DEVID(pdev->bus->number, devfn),
+				 data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+		struct pci_dev *tmp;
+
+		/* Skip virtual buses */
+		if (!bus->self)
+			continue;
+
+		tmp = bus->self;
+
+		/* stop at bridge where translation unit is associated */
+		if (tmp->dev_flags & PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT)
+			return ret;
+
+		/*
+		 * PCIe-to-PCI/X bridges alias transactions from downstream
+		 * devices using the subordinate bus number (PCI Express to
+		 * PCI/PCI-X Bridge Spec, rev 1.0, sec 2.3).  For all cases
+		 * where the upstream bus is PCI/X we alias to the bridge
+		 * (there are various conditions in the previous reference
+		 * where the bridge may take ownership of transactions, even
+		 * when the secondary interface is PCI-X).
+		 */
+		if (pci_is_pcie(tmp)) {
+			switch (pci_pcie_type(tmp)) {
+			case PCI_EXP_TYPE_ROOT_PORT:
+			case PCI_EXP_TYPE_UPSTREAM:
+			case PCI_EXP_TYPE_DOWNSTREAM:
+				continue;
+			case PCI_EXP_TYPE_PCI_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+				if (ret)
+					return ret;
+				continue;
+			case PCI_EXP_TYPE_PCIE_BRIDGE:
+				ret = fn(tmp, pci_dev_id(tmp), data);
+				if (ret)
+					return ret;
+				continue;
+			}
+		} else {
+			if (tmp->dev_flags & PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS)
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+			else
+				ret = fn(tmp, pci_dev_id(tmp), data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr)
+{
+	struct pci_bus *child;
+	struct pci_bus *tmp;
+
+	if (bus->number == busnr)
+		return bus;
+
+	list_for_each_entry(tmp, &bus->children, node) {
+		child = pci_do_find_bus(tmp, busnr);
+		if (child)
+			return child;
+	}
+	return NULL;
+}
+
+/**
+ * pci_find_bus - locate PCI bus from a given domain and bus number
+ * @domain: number of PCI domain to search
+ * @busnr: number of desired PCI bus
+ *
+ * Given a PCI bus number and domain number, the desired PCI bus is located
+ * in the global list of PCI buses.  If the bus is found, a pointer to its
+ * data structure is returned.  If no bus is found, %NULL is returned.
+ */
+struct pci_bus *pci_find_bus(int domain, int busnr)
+{
+	struct pci_bus *bus = NULL;
+	struct pci_bus *tmp_bus;
+
+	while ((bus = pci_find_next_bus(bus)) != NULL)  {
+		if (pci_domain_nr(bus) != domain)
+			continue;
+		tmp_bus = pci_do_find_bus(bus, busnr);
+		if (tmp_bus)
+			return tmp_bus;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(pci_find_bus);
+
+/**
+ * pci_find_next_bus - begin or continue searching for a PCI bus
+ * @from: Previous PCI bus found, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI buses.  A new search is
+ * initiated by passing %NULL as the @from argument.  Otherwise if
+ * @from is not %NULL, searches continue from next device on the
+ * global list.
+ */
+struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
+{
+	struct list_head *n;
+	struct pci_bus *b = NULL;
+
+	down_read(&pci_bus_sem);
+	n = from ? from->node.next : pci_root_buses.next;
+	if (n != &pci_root_buses)
+		b = list_entry(n, struct pci_bus, node);
+	up_read(&pci_bus_sem);
+	return b;
+}
+EXPORT_SYMBOL(pci_find_next_bus);
+
+/**
+ * pci_get_slot - locate PCI device for a given PCI slot
+ * @bus: PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI
+ * device resides and the logical device number within that slot
+ * in case of multi-function devices.
+ *
+ * Given a PCI bus and slot/function number, the desired PCI device
+ * is located in the list of PCI devices.
+ * If the device is found, its reference count is increased and this
+ * function returns a pointer to its data structure.  The caller must
+ * decrement the reference count by calling pci_dev_put().
+ * If no device is found, %NULL is returned.
+ */
+struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn)
+{
+	struct pci_dev *dev;
+
+	down_read(&pci_bus_sem);
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->devfn == devfn)
+			goto out;
+	}
+
+	dev = NULL;
+ out:
+	pci_dev_get(dev);
+	up_read(&pci_bus_sem);
+	return dev;
+}
+EXPORT_SYMBOL(pci_get_slot);
+
+/**
+ * pci_get_domain_bus_and_slot - locate PCI device for a given PCI domain (segment), bus, and slot
+ * @domain: PCI domain/segment on which the PCI device resides.
+ * @bus: PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI device
+ * resides and the logical device number within that slot in case of
+ * multi-function devices.
+ *
+ * Given a PCI domain, bus, and slot/function number, the desired PCI
+ * device is located in the list of PCI devices. If the device is
+ * found, its reference count is increased and this function returns a
+ * pointer to its data structure.  The caller must decrement the
+ * reference count by calling pci_dev_put().  If no device is found,
+ * %NULL is returned.
+ */
+struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
+					    unsigned int devfn)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev) {
+		if (pci_domain_nr(dev->bus) == domain &&
+		    (dev->bus->number == bus && dev->devfn == devfn))
+			return dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(pci_get_domain_bus_and_slot);
+
+static int match_pci_dev_by_id(struct device *dev, const void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	const struct pci_device_id *id = data;
+
+	if (pci_match_one_device(id, pdev))
+		return 1;
+	return 0;
+}
+
+/*
+ * pci_get_dev_by_id - begin or continue searching for a PCI device by id
+ * @id: pointer to struct pci_device_id to match for the device
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is found
+ * with a matching id a pointer to its device structure is returned, and the
+ * reference count to the device is incremented.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL as the @from argument.  Otherwise
+ * if @from is not %NULL, searches continue from next device on the global
+ * list.  The reference count for @from is always decremented if it is not
+ * %NULL.
+ *
+ * This is an internal function for use by the other search functions in
+ * this file.
+ */
+static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id,
+					 struct pci_dev *from)
+{
+	struct device *dev;
+	struct device *dev_start = NULL;
+	struct pci_dev *pdev = NULL;
+
+	if (from)
+		dev_start = &from->dev;
+	dev = bus_find_device(&pci_bus_type, dev_start, (void *)id,
+			      match_pci_dev_by_id);
+	if (dev)
+		pdev = to_pci_dev(dev);
+	pci_dev_put(from);
+	return pdev;
+}
+
+/**
+ * pci_get_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is found
+ * with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its
+ * device structure is returned, and the reference count to the device is
+ * incremented.  Otherwise, %NULL is returned.  A new search is initiated by
+ * passing %NULL as the @from argument.  Otherwise if @from is not %NULL,
+ * searches continue from next device on the global list.
+ * The reference count for @from is always decremented if it is not %NULL.
+ */
+struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device,
+			       unsigned int ss_vendor, unsigned int ss_device,
+			       struct pci_dev *from)
+{
+	struct pci_device_id id = {
+		.vendor = vendor,
+		.device = device,
+		.subvendor = ss_vendor,
+		.subdevice = ss_device,
+	};
+
+	return pci_get_dev_by_id(&id, from);
+}
+EXPORT_SYMBOL(pci_get_subsys);
+
+/**
+ * pci_get_device - begin or continue searching for a PCI device by vendor/device id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @vendor and @device, the reference count to the
+ * device is incremented and a pointer to its device structure is returned.
+ * Otherwise, %NULL is returned.  A new search is initiated by passing %NULL
+ * as the @from argument.  Otherwise if @from is not %NULL, searches continue
+ * from next device on the global list.  The reference count for @from is
+ * always decremented if it is not %NULL.
+ */
+struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
+			       struct pci_dev *from)
+{
+	return pci_get_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
+}
+EXPORT_SYMBOL(pci_get_device);
+
+/**
+ * pci_get_class - begin or continue searching for a PCI device by class
+ * @class: search for a PCI device with this class designation
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @class, the reference count to the device is
+ * incremented and a pointer to its device structure is returned.
+ * Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL as the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device
+ * on the global list.  The reference count for @from is always decremented
+ * if it is not %NULL.
+ */
+struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from)
+{
+	struct pci_device_id id = {
+		.vendor = PCI_ANY_ID,
+		.device = PCI_ANY_ID,
+		.subvendor = PCI_ANY_ID,
+		.subdevice = PCI_ANY_ID,
+		.class_mask = PCI_ANY_ID,
+		.class = class,
+	};
+
+	return pci_get_dev_by_id(&id, from);
+}
+EXPORT_SYMBOL(pci_get_class);
+
+/**
+ * pci_dev_present - Returns 1 if device matching the device list is present, 0 if not.
+ * @ids: A pointer to a null terminated list of struct pci_device_id structures
+ * that describe the type of PCI device the caller is trying to find.
+ *
+ * Obvious fact: You do not have a reference to any device that might be found
+ * by this function, so if that device is removed from the system right after
+ * this function is finished, the value will be stale.  Use this function to
+ * find devices that are usually built into a system, or for a general hint as
+ * to if another device happens to be present at this specific moment in time.
+ */
+int pci_dev_present(const struct pci_device_id *ids)
+{
+	struct pci_dev *found = NULL;
+
+	while (ids->vendor || ids->subvendor || ids->class_mask) {
+		found = pci_get_dev_by_id(ids, NULL);
+		if (found) {
+			pci_dev_put(found);
+			return 1;
+		}
+		ids++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_dev_present);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
new file mode 100644
index 000000000..dae490f25
--- /dev/null
+++ b/drivers/pci/setup-bus.c
@@ -0,0 +1,2337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support routines for initializing a PCI subsystem
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     PCI-PCI bridges cleanup, sorted resource allocation.
+ * Feb 2002, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     Converted to allocation in 3 passes, which gives
+ *	     tighter packing. Prefetchable range support.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include "pci.h"
+
+unsigned int pci_flags;
+EXPORT_SYMBOL_GPL(pci_flags);
+
+struct pci_dev_resource {
+	struct list_head list;
+	struct resource *res;
+	struct pci_dev *dev;
+	resource_size_t start;
+	resource_size_t end;
+	resource_size_t add_size;
+	resource_size_t min_align;
+	unsigned long flags;
+};
+
+static void free_list(struct list_head *head)
+{
+	struct pci_dev_resource *dev_res, *tmp;
+
+	list_for_each_entry_safe(dev_res, tmp, head, list) {
+		list_del(&dev_res->list);
+		kfree(dev_res);
+	}
+}
+
+/**
+ * add_to_list() - Add a new resource tracker to the list
+ * @head:	Head of the list
+ * @dev:	Device to which the resource belongs
+ * @res:	Resource to be tracked
+ * @add_size:	Additional size to be optionally added to the resource
+ * @min_align:	Minimum memory window alignment
+ */
+static int add_to_list(struct list_head *head, struct pci_dev *dev,
+		       struct resource *res, resource_size_t add_size,
+		       resource_size_t min_align)
+{
+	struct pci_dev_resource *tmp;
+
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->res = res;
+	tmp->dev = dev;
+	tmp->start = res->start;
+	tmp->end = res->end;
+	tmp->flags = res->flags;
+	tmp->add_size = add_size;
+	tmp->min_align = min_align;
+
+	list_add(&tmp->list, head);
+
+	return 0;
+}
+
+static void remove_from_list(struct list_head *head, struct resource *res)
+{
+	struct pci_dev_resource *dev_res, *tmp;
+
+	list_for_each_entry_safe(dev_res, tmp, head, list) {
+		if (dev_res->res == res) {
+			list_del(&dev_res->list);
+			kfree(dev_res);
+			break;
+		}
+	}
+}
+
+static struct pci_dev_resource *res_to_dev_res(struct list_head *head,
+					       struct resource *res)
+{
+	struct pci_dev_resource *dev_res;
+
+	list_for_each_entry(dev_res, head, list) {
+		if (dev_res->res == res)
+			return dev_res;
+	}
+
+	return NULL;
+}
+
+static resource_size_t get_res_add_size(struct list_head *head,
+					struct resource *res)
+{
+	struct pci_dev_resource *dev_res;
+
+	dev_res = res_to_dev_res(head, res);
+	return dev_res ? dev_res->add_size : 0;
+}
+
+static resource_size_t get_res_add_align(struct list_head *head,
+					 struct resource *res)
+{
+	struct pci_dev_resource *dev_res;
+
+	dev_res = res_to_dev_res(head, res);
+	return dev_res ? dev_res->min_align : 0;
+}
+
+/* Sort resources by alignment */
+static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head)
+{
+	struct resource *r;
+	int i;
+
+	pci_dev_for_each_resource(dev, r, i) {
+		struct pci_dev_resource *dev_res, *tmp;
+		resource_size_t r_align;
+		struct list_head *n;
+
+		if (r->flags & IORESOURCE_PCI_FIXED)
+			continue;
+
+		if (!(r->flags) || r->parent)
+			continue;
+
+		r_align = pci_resource_alignment(dev, r);
+		if (!r_align) {
+			pci_warn(dev, "BAR %d: %pR has bogus alignment\n",
+				 i, r);
+			continue;
+		}
+
+		tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+		if (!tmp)
+			panic("%s: kzalloc() failed!\n", __func__);
+		tmp->res = r;
+		tmp->dev = dev;
+
+		/* Fallback is smallest one or list is empty */
+		n = head;
+		list_for_each_entry(dev_res, head, list) {
+			resource_size_t align;
+
+			align = pci_resource_alignment(dev_res->dev,
+							 dev_res->res);
+
+			if (r_align > align) {
+				n = &dev_res->list;
+				break;
+			}
+		}
+		/* Insert it just before n */
+		list_add_tail(&tmp->list, n);
+	}
+}
+
+static void __dev_sort_resources(struct pci_dev *dev, struct list_head *head)
+{
+	u16 class = dev->class >> 8;
+
+	/* Don't touch classless devices or host bridges or IOAPICs */
+	if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+		return;
+
+	/* Don't touch IOAPIC devices already enabled by firmware */
+	if (class == PCI_CLASS_SYSTEM_PIC) {
+		u16 command;
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+			return;
+	}
+
+	pdev_sort_resources(dev, head);
+}
+
+static inline void reset_resource(struct resource *res)
+{
+	res->start = 0;
+	res->end = 0;
+	res->flags = 0;
+}
+
+/**
+ * reassign_resources_sorted() - Satisfy any additional resource requests
+ *
+ * @realloc_head:	Head of the list tracking requests requiring
+ *			additional resources
+ * @head:		Head of the list tracking requests with allocated
+ *			resources
+ *
+ * Walk through each element of the realloc_head and try to procure additional
+ * resources for the element, provided the element is in the head list.
+ */
+static void reassign_resources_sorted(struct list_head *realloc_head,
+				      struct list_head *head)
+{
+	struct resource *res;
+	struct pci_dev_resource *add_res, *tmp;
+	struct pci_dev_resource *dev_res;
+	resource_size_t add_size, align;
+	int idx;
+
+	list_for_each_entry_safe(add_res, tmp, realloc_head, list) {
+		bool found_match = false;
+
+		res = add_res->res;
+		/* Skip resource that has been reset */
+		if (!res->flags)
+			goto out;
+
+		/* Skip this resource if not found in head list */
+		list_for_each_entry(dev_res, head, list) {
+			if (dev_res->res == res) {
+				found_match = true;
+				break;
+			}
+		}
+		if (!found_match) /* Just skip */
+			continue;
+
+		idx = res - &add_res->dev->resource[0];
+		add_size = add_res->add_size;
+		align = add_res->min_align;
+		if (!resource_size(res)) {
+			res->start = align;
+			res->end = res->start + add_size - 1;
+			if (pci_assign_resource(add_res->dev, idx))
+				reset_resource(res);
+		} else {
+			res->flags |= add_res->flags &
+				 (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
+			if (pci_reassign_resource(add_res->dev, idx,
+						  add_size, align))
+				pci_info(add_res->dev, "failed to add %llx res[%d]=%pR\n",
+					 (unsigned long long) add_size, idx,
+					 res);
+		}
+out:
+		list_del(&add_res->list);
+		kfree(add_res);
+	}
+}
+
+/**
+ * assign_requested_resources_sorted() - Satisfy resource requests
+ *
+ * @head:	Head of the list tracking requests for resources
+ * @fail_head:	Head of the list tracking requests that could not be
+ *		allocated
+ *
+ * Satisfy resource requests of each element in the list.  Add requests that
+ * could not be satisfied to the failed_list.
+ */
+static void assign_requested_resources_sorted(struct list_head *head,
+				 struct list_head *fail_head)
+{
+	struct resource *res;
+	struct pci_dev_resource *dev_res;
+	int idx;
+
+	list_for_each_entry(dev_res, head, list) {
+		res = dev_res->res;
+		idx = res - &dev_res->dev->resource[0];
+		if (resource_size(res) &&
+		    pci_assign_resource(dev_res->dev, idx)) {
+			if (fail_head) {
+				/*
+				 * If the failed resource is a ROM BAR and
+				 * it will be enabled later, don't add it
+				 * to the list.
+				 */
+				if (!((idx == PCI_ROM_RESOURCE) &&
+				      (!(res->flags & IORESOURCE_ROM_ENABLE))))
+					add_to_list(fail_head,
+						    dev_res->dev, res,
+						    0 /* don't care */,
+						    0 /* don't care */);
+			}
+			reset_resource(res);
+		}
+	}
+}
+
+static unsigned long pci_fail_res_type_mask(struct list_head *fail_head)
+{
+	struct pci_dev_resource *fail_res;
+	unsigned long mask = 0;
+
+	/* Check failed type */
+	list_for_each_entry(fail_res, fail_head, list)
+		mask |= fail_res->flags;
+
+	/*
+	 * One pref failed resource will set IORESOURCE_MEM, as we can
+	 * allocate pref in non-pref range.  Will release all assigned
+	 * non-pref sibling resources according to that bit.
+	 */
+	return mask & (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH);
+}
+
+static bool pci_need_to_release(unsigned long mask, struct resource *res)
+{
+	if (res->flags & IORESOURCE_IO)
+		return !!(mask & IORESOURCE_IO);
+
+	/* Check pref at first */
+	if (res->flags & IORESOURCE_PREFETCH) {
+		if (mask & IORESOURCE_PREFETCH)
+			return true;
+		/* Count pref if its parent is non-pref */
+		else if ((mask & IORESOURCE_MEM) &&
+			 !(res->parent->flags & IORESOURCE_PREFETCH))
+			return true;
+		else
+			return false;
+	}
+
+	if (res->flags & IORESOURCE_MEM)
+		return !!(mask & IORESOURCE_MEM);
+
+	return false;	/* Should not get here */
+}
+
+static void __assign_resources_sorted(struct list_head *head,
+				      struct list_head *realloc_head,
+				      struct list_head *fail_head)
+{
+	/*
+	 * Should not assign requested resources at first.  They could be
+	 * adjacent, so later reassign can not reallocate them one by one in
+	 * parent resource window.
+	 *
+	 * Try to assign requested + add_size at beginning.  If could do that,
+	 * could get out early.  If could not do that, we still try to assign
+	 * requested at first, then try to reassign add_size for some resources.
+	 *
+	 * Separate three resource type checking if we need to release
+	 * assigned resource after requested + add_size try.
+	 *
+	 *	1. If IO port assignment fails, will release assigned IO
+	 *	   port.
+	 *	2. If pref MMIO assignment fails, release assigned pref
+	 *	   MMIO.  If assigned pref MMIO's parent is non-pref MMIO
+	 *	   and non-pref MMIO assignment fails, will release that
+	 *	   assigned pref MMIO.
+	 *	3. If non-pref MMIO assignment fails or pref MMIO
+	 *	   assignment fails, will release assigned non-pref MMIO.
+	 */
+	LIST_HEAD(save_head);
+	LIST_HEAD(local_fail_head);
+	struct pci_dev_resource *save_res;
+	struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
+	unsigned long fail_type;
+	resource_size_t add_align, align;
+
+	/* Check if optional add_size is there */
+	if (!realloc_head || list_empty(realloc_head))
+		goto requested_and_reassign;
+
+	/* Save original start, end, flags etc at first */
+	list_for_each_entry(dev_res, head, list) {
+		if (add_to_list(&save_head, dev_res->dev, dev_res->res, 0, 0)) {
+			free_list(&save_head);
+			goto requested_and_reassign;
+		}
+	}
+
+	/* Update res in head list with add_size in realloc_head list */
+	list_for_each_entry_safe(dev_res, tmp_res, head, list) {
+		dev_res->res->end += get_res_add_size(realloc_head,
+							dev_res->res);
+
+		/*
+		 * There are two kinds of additional resources in the list:
+		 * 1. bridge resource  -- IORESOURCE_STARTALIGN
+		 * 2. SR-IOV resource  -- IORESOURCE_SIZEALIGN
+		 * Here just fix the additional alignment for bridge
+		 */
+		if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+			continue;
+
+		add_align = get_res_add_align(realloc_head, dev_res->res);
+
+		/*
+		 * The "head" list is sorted by alignment so resources with
+		 * bigger alignment will be assigned first.  After we
+		 * change the alignment of a dev_res in "head" list, we
+		 * need to reorder the list by alignment to make it
+		 * consistent.
+		 */
+		if (add_align > dev_res->res->start) {
+			resource_size_t r_size = resource_size(dev_res->res);
+
+			dev_res->res->start = add_align;
+			dev_res->res->end = add_align + r_size - 1;
+
+			list_for_each_entry(dev_res2, head, list) {
+				align = pci_resource_alignment(dev_res2->dev,
+							       dev_res2->res);
+				if (add_align > align) {
+					list_move_tail(&dev_res->list,
+						       &dev_res2->list);
+					break;
+				}
+			}
+		}
+
+	}
+
+	/* Try updated head list with add_size added */
+	assign_requested_resources_sorted(head, &local_fail_head);
+
+	/* All assigned with add_size? */
+	if (list_empty(&local_fail_head)) {
+		/* Remove head list from realloc_head list */
+		list_for_each_entry(dev_res, head, list)
+			remove_from_list(realloc_head, dev_res->res);
+		free_list(&save_head);
+		free_list(head);
+		return;
+	}
+
+	/* Check failed type */
+	fail_type = pci_fail_res_type_mask(&local_fail_head);
+	/* Remove not need to be released assigned res from head list etc */
+	list_for_each_entry_safe(dev_res, tmp_res, head, list)
+		if (dev_res->res->parent &&
+		    !pci_need_to_release(fail_type, dev_res->res)) {
+			/* Remove it from realloc_head list */
+			remove_from_list(realloc_head, dev_res->res);
+			remove_from_list(&save_head, dev_res->res);
+			list_del(&dev_res->list);
+			kfree(dev_res);
+		}
+
+	free_list(&local_fail_head);
+	/* Release assigned resource */
+	list_for_each_entry(dev_res, head, list)
+		if (dev_res->res->parent)
+			release_resource(dev_res->res);
+	/* Restore start/end/flags from saved list */
+	list_for_each_entry(save_res, &save_head, list) {
+		struct resource *res = save_res->res;
+
+		res->start = save_res->start;
+		res->end = save_res->end;
+		res->flags = save_res->flags;
+	}
+	free_list(&save_head);
+
+requested_and_reassign:
+	/* Satisfy the must-have resource requests */
+	assign_requested_resources_sorted(head, fail_head);
+
+	/* Try to satisfy any additional optional resource requests */
+	if (realloc_head)
+		reassign_resources_sorted(realloc_head, head);
+	free_list(head);
+}
+
+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+					 struct list_head *add_head,
+					 struct list_head *fail_head)
+{
+	LIST_HEAD(head);
+
+	__dev_sort_resources(dev, &head);
+	__assign_resources_sorted(&head, add_head, fail_head);
+
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+					 struct list_head *realloc_head,
+					 struct list_head *fail_head)
+{
+	struct pci_dev *dev;
+	LIST_HEAD(head);
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		__dev_sort_resources(dev, &head);
+
+	__assign_resources_sorted(&head, realloc_head, fail_head);
+}
+
+void pci_setup_cardbus(struct pci_bus *bus)
+{
+	struct pci_dev *bridge = bus->self;
+	struct resource *res;
+	struct pci_bus_region region;
+
+	pci_info(bridge, "CardBus bridge to %pR\n",
+		 &bus->busn_res);
+
+	res = bus->resource[0];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_IO) {
+		/*
+		 * The IO resource is allocated a range twice as large as it
+		 * would normally need.  This allows us to set both IO regs.
+		 */
+		pci_info(bridge, "  bridge window %pR\n", res);
+		pci_write_config_dword(bridge, PCI_CB_IO_BASE_0,
+					region.start);
+		pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_0,
+					region.end);
+	}
+
+	res = bus->resource[1];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_IO) {
+		pci_info(bridge, "  bridge window %pR\n", res);
+		pci_write_config_dword(bridge, PCI_CB_IO_BASE_1,
+					region.start);
+		pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_1,
+					region.end);
+	}
+
+	res = bus->resource[2];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_MEM) {
+		pci_info(bridge, "  bridge window %pR\n", res);
+		pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0,
+					region.start);
+		pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_0,
+					region.end);
+	}
+
+	res = bus->resource[3];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_MEM) {
+		pci_info(bridge, "  bridge window %pR\n", res);
+		pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1,
+					region.start);
+		pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_1,
+					region.end);
+	}
+}
+EXPORT_SYMBOL(pci_setup_cardbus);
+
+/*
+ * Initialize bridges with base/limit values we have collected.  PCI-to-PCI
+ * Bridge Architecture Specification rev. 1.1 (1998) requires that if there
+ * are no I/O ports or memory behind the bridge, the corresponding range
+ * must be turned off by writing base value greater than limit to the
+ * bridge's base/limit registers.
+ *
+ * Note: care must be taken when updating I/O base/limit registers of
+ * bridges which support 32-bit I/O.  This update requires two config space
+ * writes, so it's quite possible that an I/O window of the bridge will
+ * have some undesirable address (e.g. 0) after the first write.  Ditto
+ * 64-bit prefetchable MMIO.
+ */
+static void pci_setup_bridge_io(struct pci_dev *bridge)
+{
+	struct resource *res;
+	struct pci_bus_region region;
+	unsigned long io_mask;
+	u8 io_base_lo, io_limit_lo;
+	u16 l;
+	u32 io_upper16;
+
+	io_mask = PCI_IO_RANGE_MASK;
+	if (bridge->io_window_1k)
+		io_mask = PCI_IO_1K_RANGE_MASK;
+
+	/* Set up the top and bottom of the PCI I/O segment for this bus */
+	res = &bridge->resource[PCI_BRIDGE_IO_WINDOW];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_IO) {
+		pci_read_config_word(bridge, PCI_IO_BASE, &l);
+		io_base_lo = (region.start >> 8) & io_mask;
+		io_limit_lo = (region.end >> 8) & io_mask;
+		l = ((u16) io_limit_lo << 8) | io_base_lo;
+		/* Set up upper 16 bits of I/O base/limit */
+		io_upper16 = (region.end & 0xffff0000) | (region.start >> 16);
+		pci_info(bridge, "  bridge window %pR\n", res);
+	} else {
+		/* Clear upper 16 bits of I/O base/limit */
+		io_upper16 = 0;
+		l = 0x00f0;
+	}
+	/* Temporarily disable the I/O range before updating PCI_IO_BASE */
+	pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
+	/* Update lower 16 bits of I/O base/limit */
+	pci_write_config_word(bridge, PCI_IO_BASE, l);
+	/* Update upper 16 bits of I/O base/limit */
+	pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
+}
+
+static void pci_setup_bridge_mmio(struct pci_dev *bridge)
+{
+	struct resource *res;
+	struct pci_bus_region region;
+	u32 l;
+
+	/* Set up the top and bottom of the PCI Memory segment for this bus */
+	res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_MEM) {
+		l = (region.start >> 16) & 0xfff0;
+		l |= region.end & 0xfff00000;
+		pci_info(bridge, "  bridge window %pR\n", res);
+	} else {
+		l = 0x0000fff0;
+	}
+	pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+
+static void pci_setup_bridge_mmio_pref(struct pci_dev *bridge)
+{
+	struct resource *res;
+	struct pci_bus_region region;
+	u32 l, bu, lu;
+
+	/*
+	 * Clear out the upper 32 bits of PREF limit.  If
+	 * PCI_PREF_BASE_UPPER32 was non-zero, this temporarily disables
+	 * PREF range, which is ok.
+	 */
+	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+
+	/* Set up PREF base/limit */
+	bu = lu = 0;
+	res = &bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+	pcibios_resource_to_bus(bridge->bus, &region, res);
+	if (res->flags & IORESOURCE_PREFETCH) {
+		l = (region.start >> 16) & 0xfff0;
+		l |= region.end & 0xfff00000;
+		if (res->flags & IORESOURCE_MEM_64) {
+			bu = upper_32_bits(region.start);
+			lu = upper_32_bits(region.end);
+		}
+		pci_info(bridge, "  bridge window %pR\n", res);
+	} else {
+		l = 0x0000fff0;
+	}
+	pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
+
+	/* Set the upper 32 bits of PREF base & limit */
+	pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
+	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+}
+
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+	struct pci_dev *bridge = bus->self;
+
+	pci_info(bridge, "PCI bridge to %pR\n",
+		 &bus->busn_res);
+
+	if (type & IORESOURCE_IO)
+		pci_setup_bridge_io(bridge);
+
+	if (type & IORESOURCE_MEM)
+		pci_setup_bridge_mmio(bridge);
+
+	if (type & IORESOURCE_PREFETCH)
+		pci_setup_bridge_mmio_pref(bridge);
+
+	pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
+}
+
+void __weak pcibios_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+}
+
+void pci_setup_bridge(struct pci_bus *bus)
+{
+	unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+				  IORESOURCE_PREFETCH;
+
+	pcibios_setup_bridge(bus, type);
+	__pci_setup_bridge(bus, type);
+}
+
+
+int pci_claim_bridge_resource(struct pci_dev *bridge, int i)
+{
+	if (i < PCI_BRIDGE_RESOURCES || i > PCI_BRIDGE_RESOURCE_END)
+		return 0;
+
+	if (pci_claim_resource(bridge, i) == 0)
+		return 0;	/* Claimed the window */
+
+	if ((bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		return 0;
+
+	if (!pci_bus_clip_resource(bridge, i))
+		return -EINVAL;	/* Clipping didn't change anything */
+
+	switch (i) {
+	case PCI_BRIDGE_IO_WINDOW:
+		pci_setup_bridge_io(bridge);
+		break;
+	case PCI_BRIDGE_MEM_WINDOW:
+		pci_setup_bridge_mmio(bridge);
+		break;
+	case PCI_BRIDGE_PREF_MEM_WINDOW:
+		pci_setup_bridge_mmio_pref(bridge);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (pci_claim_resource(bridge, i) == 0)
+		return 0;	/* Claimed a smaller window */
+
+	return -EINVAL;
+}
+
+/*
+ * Check whether the bridge supports optional I/O and prefetchable memory
+ * ranges.  If not, the respective base/limit registers must be read-only
+ * and read as 0.
+ */
+static void pci_bridge_check_ranges(struct pci_bus *bus)
+{
+	struct pci_dev *bridge = bus->self;
+	struct resource *b_res;
+
+	b_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW];
+	b_res->flags |= IORESOURCE_MEM;
+
+	if (bridge->io_window) {
+		b_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW];
+		b_res->flags |= IORESOURCE_IO;
+	}
+
+	if (bridge->pref_window) {
+		b_res = &bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+		b_res->flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		if (bridge->pref_64_window) {
+			b_res->flags |= IORESOURCE_MEM_64 |
+					PCI_PREF_RANGE_TYPE_64;
+		}
+	}
+}
+
+/*
+ * Helper function for sizing routines.  Assigned resources have non-NULL
+ * parent resource.
+ *
+ * Return first unassigned resource of the correct type.  If there is none,
+ * return first assigned resource of the correct type.  If none of the
+ * above, return NULL.
+ *
+ * Returning an assigned resource of the correct type allows the caller to
+ * distinguish between already assigned and no resource of the correct type.
+ */
+static struct resource *find_bus_resource_of_type(struct pci_bus *bus,
+						  unsigned long type_mask,
+						  unsigned long type)
+{
+	struct resource *r, *r_assigned = NULL;
+
+	pci_bus_for_each_resource(bus, r) {
+		if (r == &ioport_resource || r == &iomem_resource)
+			continue;
+		if (r && (r->flags & type_mask) == type && !r->parent)
+			return r;
+		if (r && (r->flags & type_mask) == type && !r_assigned)
+			r_assigned = r;
+	}
+	return r_assigned;
+}
+
+static resource_size_t calculate_iosize(resource_size_t size,
+					resource_size_t min_size,
+					resource_size_t size1,
+					resource_size_t add_size,
+					resource_size_t children_add_size,
+					resource_size_t old_size,
+					resource_size_t align)
+{
+	if (size < min_size)
+		size = min_size;
+	if (old_size == 1)
+		old_size = 0;
+	/*
+	 * To be fixed in 2.5: we should have sort of HAVE_ISA flag in the
+	 * struct pci_bus.
+	 */
+#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
+	size = (size & 0xff) + ((size & ~0xffUL) << 2);
+#endif
+	size = size + size1;
+	if (size < old_size)
+		size = old_size;
+
+	size = ALIGN(max(size, add_size) + children_add_size, align);
+	return size;
+}
+
+static resource_size_t calculate_memsize(resource_size_t size,
+					 resource_size_t min_size,
+					 resource_size_t add_size,
+					 resource_size_t children_add_size,
+					 resource_size_t old_size,
+					 resource_size_t align)
+{
+	if (size < min_size)
+		size = min_size;
+	if (old_size == 1)
+		old_size = 0;
+	if (size < old_size)
+		size = old_size;
+
+	size = ALIGN(max(size, add_size) + children_add_size, align);
+	return size;
+}
+
+resource_size_t __weak pcibios_window_alignment(struct pci_bus *bus,
+						unsigned long type)
+{
+	return 1;
+}
+
+#define PCI_P2P_DEFAULT_MEM_ALIGN	0x100000	/* 1MiB */
+#define PCI_P2P_DEFAULT_IO_ALIGN	0x1000		/* 4KiB */
+#define PCI_P2P_DEFAULT_IO_ALIGN_1K	0x400		/* 1KiB */
+
+static resource_size_t window_alignment(struct pci_bus *bus, unsigned long type)
+{
+	resource_size_t align = 1, arch_align;
+
+	if (type & IORESOURCE_MEM)
+		align = PCI_P2P_DEFAULT_MEM_ALIGN;
+	else if (type & IORESOURCE_IO) {
+		/*
+		 * Per spec, I/O windows are 4K-aligned, but some bridges have
+		 * an extension to support 1K alignment.
+		 */
+		if (bus->self && bus->self->io_window_1k)
+			align = PCI_P2P_DEFAULT_IO_ALIGN_1K;
+		else
+			align = PCI_P2P_DEFAULT_IO_ALIGN;
+	}
+
+	arch_align = pcibios_window_alignment(bus, type);
+	return max(align, arch_align);
+}
+
+/**
+ * pbus_size_io() - Size the I/O window of a given bus
+ *
+ * @bus:		The bus
+ * @min_size:		The minimum I/O window that must be allocated
+ * @add_size:		Additional optional I/O window
+ * @realloc_head:	Track the additional I/O window on this list
+ *
+ * Sizing the I/O windows of the PCI-PCI bridge is trivial, since these
+ * windows have 1K or 4K granularity and the I/O ranges of non-bridge PCI
+ * devices are limited to 256 bytes.  We must be careful with the ISA
+ * aliasing though.
+ */
+static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
+			 resource_size_t add_size,
+			 struct list_head *realloc_head)
+{
+	struct pci_dev *dev;
+	struct resource *b_res = find_bus_resource_of_type(bus, IORESOURCE_IO,
+							   IORESOURCE_IO);
+	resource_size_t size = 0, size0 = 0, size1 = 0;
+	resource_size_t children_add_size = 0;
+	resource_size_t min_align, align;
+
+	if (!b_res)
+		return;
+
+	/* If resource is already assigned, nothing more to do */
+	if (b_res->parent)
+		return;
+
+	min_align = window_alignment(bus, IORESOURCE_IO);
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct resource *r;
+
+		pci_dev_for_each_resource(dev, r) {
+			unsigned long r_size;
+
+			if (r->parent || !(r->flags & IORESOURCE_IO))
+				continue;
+			r_size = resource_size(r);
+
+			if (r_size < 0x400)
+				/* Might be re-aligned for ISA */
+				size += r_size;
+			else
+				size1 += r_size;
+
+			align = pci_resource_alignment(dev, r);
+			if (align > min_align)
+				min_align = align;
+
+			if (realloc_head)
+				children_add_size += get_res_add_size(realloc_head, r);
+		}
+	}
+
+	size0 = calculate_iosize(size, min_size, size1, 0, 0,
+			resource_size(b_res), min_align);
+	size1 = (!realloc_head || (realloc_head && !add_size && !children_add_size)) ? size0 :
+		calculate_iosize(size, min_size, size1, add_size, children_add_size,
+			resource_size(b_res), min_align);
+	if (!size0 && !size1) {
+		if (bus->self && (b_res->start || b_res->end))
+			pci_info(bus->self, "disabling bridge window %pR to %pR (unused)\n",
+				 b_res, &bus->busn_res);
+		b_res->flags = 0;
+		return;
+	}
+
+	b_res->start = min_align;
+	b_res->end = b_res->start + size0 - 1;
+	b_res->flags |= IORESOURCE_STARTALIGN;
+	if (bus->self && size1 > size0 && realloc_head) {
+		add_to_list(realloc_head, bus->self, b_res, size1-size0,
+			    min_align);
+		pci_info(bus->self, "bridge window %pR to %pR add_size %llx\n",
+			 b_res, &bus->busn_res,
+			 (unsigned long long) size1 - size0);
+	}
+}
+
+static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
+						  int max_order)
+{
+	resource_size_t align = 0;
+	resource_size_t min_align = 0;
+	int order;
+
+	for (order = 0; order <= max_order; order++) {
+		resource_size_t align1 = 1;
+
+		align1 <<= (order + 20);
+
+		if (!align)
+			min_align = align1;
+		else if (ALIGN(align + min_align, min_align) < align1)
+			min_align = align1 >> 1;
+		align += aligns[order];
+	}
+
+	return min_align;
+}
+
+/**
+ * pbus_size_mem() - Size the memory window of a given bus
+ *
+ * @bus:		The bus
+ * @mask:		Mask the resource flag, then compare it with type
+ * @type:		The type of free resource from bridge
+ * @type2:		Second match type
+ * @type3:		Third match type
+ * @min_size:		The minimum memory window that must be allocated
+ * @add_size:		Additional optional memory window
+ * @realloc_head:	Track the additional memory window on this list
+ *
+ * Calculate the size of the bus and minimal alignment which guarantees
+ * that all child resources fit in this size.
+ *
+ * Return -ENOSPC if there's no available bus resource of the desired
+ * type.  Otherwise, set the bus resource start/end to indicate the
+ * required size, add things to realloc_head (if supplied), and return 0.
+ */
+static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
+			 unsigned long type, unsigned long type2,
+			 unsigned long type3, resource_size_t min_size,
+			 resource_size_t add_size,
+			 struct list_head *realloc_head)
+{
+	struct pci_dev *dev;
+	resource_size_t min_align, align, size, size0, size1;
+	resource_size_t aligns[24]; /* Alignments from 1MB to 8TB */
+	int order, max_order;
+	struct resource *b_res = find_bus_resource_of_type(bus,
+					mask | IORESOURCE_PREFETCH, type);
+	resource_size_t children_add_size = 0;
+	resource_size_t children_add_align = 0;
+	resource_size_t add_align = 0;
+
+	if (!b_res)
+		return -ENOSPC;
+
+	/* If resource is already assigned, nothing more to do */
+	if (b_res->parent)
+		return 0;
+
+	memset(aligns, 0, sizeof(aligns));
+	max_order = 0;
+	size = 0;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct resource *r;
+		int i;
+
+		pci_dev_for_each_resource(dev, r, i) {
+			resource_size_t r_size;
+
+			if (r->parent || (r->flags & IORESOURCE_PCI_FIXED) ||
+			    ((r->flags & mask) != type &&
+			     (r->flags & mask) != type2 &&
+			     (r->flags & mask) != type3))
+				continue;
+			r_size = resource_size(r);
+#ifdef CONFIG_PCI_IOV
+			/* Put SRIOV requested res to the optional list */
+			if (realloc_head && i >= PCI_IOV_RESOURCES &&
+					i <= PCI_IOV_RESOURCE_END) {
+				add_align = max(pci_resource_alignment(dev, r), add_align);
+				r->end = r->start - 1;
+				add_to_list(realloc_head, dev, r, r_size, 0 /* Don't care */);
+				children_add_size += r_size;
+				continue;
+			}
+#endif
+			/*
+			 * aligns[0] is for 1MB (since bridge memory
+			 * windows are always at least 1MB aligned), so
+			 * keep "order" from being negative for smaller
+			 * resources.
+			 */
+			align = pci_resource_alignment(dev, r);
+			order = __ffs(align) - 20;
+			if (order < 0)
+				order = 0;
+			if (order >= ARRAY_SIZE(aligns)) {
+				pci_warn(dev, "disabling BAR %d: %pR (bad alignment %#llx)\n",
+					 i, r, (unsigned long long) align);
+				r->flags = 0;
+				continue;
+			}
+			size += max(r_size, align);
+			/*
+			 * Exclude ranges with size > align from calculation of
+			 * the alignment.
+			 */
+			if (r_size <= align)
+				aligns[order] += align;
+			if (order > max_order)
+				max_order = order;
+
+			if (realloc_head) {
+				children_add_size += get_res_add_size(realloc_head, r);
+				children_add_align = get_res_add_align(realloc_head, r);
+				add_align = max(add_align, children_add_align);
+			}
+		}
+	}
+
+	min_align = calculate_mem_align(aligns, max_order);
+	min_align = max(min_align, window_alignment(bus, b_res->flags));
+	size0 = calculate_memsize(size, min_size, 0, 0, resource_size(b_res), min_align);
+	add_align = max(min_align, add_align);
+	size1 = (!realloc_head || (realloc_head && !add_size && !children_add_size)) ? size0 :
+		calculate_memsize(size, min_size, add_size, children_add_size,
+				resource_size(b_res), add_align);
+	if (!size0 && !size1) {
+		if (bus->self && (b_res->start || b_res->end))
+			pci_info(bus->self, "disabling bridge window %pR to %pR (unused)\n",
+				 b_res, &bus->busn_res);
+		b_res->flags = 0;
+		return 0;
+	}
+	b_res->start = min_align;
+	b_res->end = size0 + min_align - 1;
+	b_res->flags |= IORESOURCE_STARTALIGN;
+	if (bus->self && size1 > size0 && realloc_head) {
+		add_to_list(realloc_head, bus->self, b_res, size1-size0, add_align);
+		pci_info(bus->self, "bridge window %pR to %pR add_size %llx add_align %llx\n",
+			   b_res, &bus->busn_res,
+			   (unsigned long long) (size1 - size0),
+			   (unsigned long long) add_align);
+	}
+	return 0;
+}
+
+unsigned long pci_cardbus_resource_alignment(struct resource *res)
+{
+	if (res->flags & IORESOURCE_IO)
+		return pci_cardbus_io_size;
+	if (res->flags & IORESOURCE_MEM)
+		return pci_cardbus_mem_size;
+	return 0;
+}
+
+static void pci_bus_size_cardbus(struct pci_bus *bus,
+				 struct list_head *realloc_head)
+{
+	struct pci_dev *bridge = bus->self;
+	struct resource *b_res;
+	resource_size_t b_res_3_size = pci_cardbus_mem_size * 2;
+	u16 ctrl;
+
+	b_res = &bridge->resource[PCI_CB_BRIDGE_IO_0_WINDOW];
+	if (b_res->parent)
+		goto handle_b_res_1;
+	/*
+	 * Reserve some resources for CardBus.  We reserve a fixed amount
+	 * of bus space for CardBus bridges.
+	 */
+	b_res->start = pci_cardbus_io_size;
+	b_res->end = b_res->start + pci_cardbus_io_size - 1;
+	b_res->flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+	if (realloc_head) {
+		b_res->end -= pci_cardbus_io_size;
+		add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size,
+			    pci_cardbus_io_size);
+	}
+
+handle_b_res_1:
+	b_res = &bridge->resource[PCI_CB_BRIDGE_IO_1_WINDOW];
+	if (b_res->parent)
+		goto handle_b_res_2;
+	b_res->start = pci_cardbus_io_size;
+	b_res->end = b_res->start + pci_cardbus_io_size - 1;
+	b_res->flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+	if (realloc_head) {
+		b_res->end -= pci_cardbus_io_size;
+		add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size,
+			    pci_cardbus_io_size);
+	}
+
+handle_b_res_2:
+	/* MEM1 must not be pref MMIO */
+	pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+	if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM1) {
+		ctrl &= ~PCI_CB_BRIDGE_CTL_PREFETCH_MEM1;
+		pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl);
+		pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+	}
+
+	/* Check whether prefetchable memory is supported by this bridge. */
+	pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+	if (!(ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0)) {
+		ctrl |= PCI_CB_BRIDGE_CTL_PREFETCH_MEM0;
+		pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl);
+		pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+	}
+
+	b_res = &bridge->resource[PCI_CB_BRIDGE_MEM_0_WINDOW];
+	if (b_res->parent)
+		goto handle_b_res_3;
+	/*
+	 * If we have prefetchable memory support, allocate two regions.
+	 * Otherwise, allocate one region of twice the size.
+	 */
+	if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
+		b_res->start = pci_cardbus_mem_size;
+		b_res->end = b_res->start + pci_cardbus_mem_size - 1;
+		b_res->flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH |
+				    IORESOURCE_STARTALIGN;
+		if (realloc_head) {
+			b_res->end -= pci_cardbus_mem_size;
+			add_to_list(realloc_head, bridge, b_res,
+				    pci_cardbus_mem_size, pci_cardbus_mem_size);
+		}
+
+		/* Reduce that to half */
+		b_res_3_size = pci_cardbus_mem_size;
+	}
+
+handle_b_res_3:
+	b_res = &bridge->resource[PCI_CB_BRIDGE_MEM_1_WINDOW];
+	if (b_res->parent)
+		goto handle_done;
+	b_res->start = pci_cardbus_mem_size;
+	b_res->end = b_res->start + b_res_3_size - 1;
+	b_res->flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN;
+	if (realloc_head) {
+		b_res->end -= b_res_3_size;
+		add_to_list(realloc_head, bridge, b_res, b_res_3_size,
+			    pci_cardbus_mem_size);
+	}
+
+handle_done:
+	;
+}
+
+void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head)
+{
+	struct pci_dev *dev;
+	unsigned long mask, prefmask, type2 = 0, type3 = 0;
+	resource_size_t additional_io_size = 0, additional_mmio_size = 0,
+			additional_mmio_pref_size = 0;
+	struct resource *pref;
+	struct pci_host_bridge *host;
+	int hdr_type, ret;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct pci_bus *b = dev->subordinate;
+		if (!b)
+			continue;
+
+		switch (dev->hdr_type) {
+		case PCI_HEADER_TYPE_CARDBUS:
+			pci_bus_size_cardbus(b, realloc_head);
+			break;
+
+		case PCI_HEADER_TYPE_BRIDGE:
+		default:
+			__pci_bus_size_bridges(b, realloc_head);
+			break;
+		}
+	}
+
+	/* The root bus? */
+	if (pci_is_root_bus(bus)) {
+		host = to_pci_host_bridge(bus->bridge);
+		if (!host->size_windows)
+			return;
+		pci_bus_for_each_resource(bus, pref)
+			if (pref && (pref->flags & IORESOURCE_PREFETCH))
+				break;
+		hdr_type = -1;	/* Intentionally invalid - not a PCI device. */
+	} else {
+		pref = &bus->self->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+		hdr_type = bus->self->hdr_type;
+	}
+
+	switch (hdr_type) {
+	case PCI_HEADER_TYPE_CARDBUS:
+		/* Don't size CardBuses yet */
+		break;
+
+	case PCI_HEADER_TYPE_BRIDGE:
+		pci_bridge_check_ranges(bus);
+		if (bus->self->is_hotplug_bridge) {
+			additional_io_size  = pci_hotplug_io_size;
+			additional_mmio_size = pci_hotplug_mmio_size;
+			additional_mmio_pref_size = pci_hotplug_mmio_pref_size;
+		}
+		fallthrough;
+	default:
+		pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
+			     additional_io_size, realloc_head);
+
+		/*
+		 * If there's a 64-bit prefetchable MMIO window, compute
+		 * the size required to put all 64-bit prefetchable
+		 * resources in it.
+		 */
+		mask = IORESOURCE_MEM;
+		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		if (pref && (pref->flags & IORESOURCE_MEM_64)) {
+			prefmask |= IORESOURCE_MEM_64;
+			ret = pbus_size_mem(bus, prefmask, prefmask,
+				prefmask, prefmask,
+				realloc_head ? 0 : additional_mmio_pref_size,
+				additional_mmio_pref_size, realloc_head);
+
+			/*
+			 * If successful, all non-prefetchable resources
+			 * and any 32-bit prefetchable resources will go in
+			 * the non-prefetchable window.
+			 */
+			if (ret == 0) {
+				mask = prefmask;
+				type2 = prefmask & ~IORESOURCE_MEM_64;
+				type3 = prefmask & ~IORESOURCE_PREFETCH;
+			}
+		}
+
+		/*
+		 * If there is no 64-bit prefetchable window, compute the
+		 * size required to put all prefetchable resources in the
+		 * 32-bit prefetchable window (if there is one).
+		 */
+		if (!type2) {
+			prefmask &= ~IORESOURCE_MEM_64;
+			ret = pbus_size_mem(bus, prefmask, prefmask,
+				prefmask, prefmask,
+				realloc_head ? 0 : additional_mmio_pref_size,
+				additional_mmio_pref_size, realloc_head);
+
+			/*
+			 * If successful, only non-prefetchable resources
+			 * will go in the non-prefetchable window.
+			 */
+			if (ret == 0)
+				mask = prefmask;
+			else
+				additional_mmio_size += additional_mmio_pref_size;
+
+			type2 = type3 = IORESOURCE_MEM;
+		}
+
+		/*
+		 * Compute the size required to put everything else in the
+		 * non-prefetchable window. This includes:
+		 *
+		 *   - all non-prefetchable resources
+		 *   - 32-bit prefetchable resources if there's a 64-bit
+		 *     prefetchable window or no prefetchable window at all
+		 *   - 64-bit prefetchable resources if there's no prefetchable
+		 *     window at all
+		 *
+		 * Note that the strategy in __pci_assign_resource() must match
+		 * that used here. Specifically, we cannot put a 32-bit
+		 * prefetchable resource in a 64-bit prefetchable window.
+		 */
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
+			      realloc_head ? 0 : additional_mmio_size,
+			      additional_mmio_size, realloc_head);
+		break;
+	}
+}
+
+void pci_bus_size_bridges(struct pci_bus *bus)
+{
+	__pci_bus_size_bridges(bus, NULL);
+}
+EXPORT_SYMBOL(pci_bus_size_bridges);
+
+static void assign_fixed_resource_on_bus(struct pci_bus *b, struct resource *r)
+{
+	struct resource *parent_r;
+	unsigned long mask = IORESOURCE_IO | IORESOURCE_MEM |
+			     IORESOURCE_PREFETCH;
+
+	pci_bus_for_each_resource(b, parent_r) {
+		if (!parent_r)
+			continue;
+
+		if ((r->flags & mask) == (parent_r->flags & mask) &&
+		    resource_contains(parent_r, r))
+			request_resource(parent_r, r);
+	}
+}
+
+/*
+ * Try to assign any resources marked as IORESOURCE_PCI_FIXED, as they are
+ * skipped by pbus_assign_resources_sorted().
+ */
+static void pdev_assign_fixed_resources(struct pci_dev *dev)
+{
+	struct resource *r;
+
+	pci_dev_for_each_resource(dev, r) {
+		struct pci_bus *b;
+
+		if (r->parent || !(r->flags & IORESOURCE_PCI_FIXED) ||
+		    !(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+
+		b = dev->bus;
+		while (b && !r->parent) {
+			assign_fixed_resource_on_bus(b, r);
+			b = b->parent;
+		}
+	}
+}
+
+void __pci_bus_assign_resources(const struct pci_bus *bus,
+				struct list_head *realloc_head,
+				struct list_head *fail_head)
+{
+	struct pci_bus *b;
+	struct pci_dev *dev;
+
+	pbus_assign_resources_sorted(bus, realloc_head, fail_head);
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pdev_assign_fixed_resources(dev);
+
+		b = dev->subordinate;
+		if (!b)
+			continue;
+
+		__pci_bus_assign_resources(b, realloc_head, fail_head);
+
+		switch (dev->hdr_type) {
+		case PCI_HEADER_TYPE_BRIDGE:
+			if (!pci_is_enabled(dev))
+				pci_setup_bridge(b);
+			break;
+
+		case PCI_HEADER_TYPE_CARDBUS:
+			pci_setup_cardbus(b);
+			break;
+
+		default:
+			pci_info(dev, "not setting up bridge for bus %04x:%02x\n",
+				 pci_domain_nr(b), b->number);
+			break;
+		}
+	}
+}
+
+void pci_bus_assign_resources(const struct pci_bus *bus)
+{
+	__pci_bus_assign_resources(bus, NULL, NULL);
+}
+EXPORT_SYMBOL(pci_bus_assign_resources);
+
+static void pci_claim_device_resources(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (!r->flags || r->parent)
+			continue;
+
+		pci_claim_resource(dev, i);
+	}
+}
+
+static void pci_claim_bridge_resources(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (!r->flags || r->parent)
+			continue;
+
+		pci_claim_bridge_resource(dev, i);
+	}
+}
+
+static void pci_bus_allocate_dev_resources(struct pci_bus *b)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child;
+
+	list_for_each_entry(dev, &b->devices, bus_list) {
+		pci_claim_device_resources(dev);
+
+		child = dev->subordinate;
+		if (child)
+			pci_bus_allocate_dev_resources(child);
+	}
+}
+
+static void pci_bus_allocate_resources(struct pci_bus *b)
+{
+	struct pci_bus *child;
+
+	/*
+	 * Carry out a depth-first search on the PCI bus tree to allocate
+	 * bridge apertures.  Read the programmed bridge bases and
+	 * recursively claim the respective bridge resources.
+	 */
+	if (b->self) {
+		pci_read_bridge_bases(b);
+		pci_claim_bridge_resources(b->self);
+	}
+
+	list_for_each_entry(child, &b->children, node)
+		pci_bus_allocate_resources(child);
+}
+
+void pci_bus_claim_resources(struct pci_bus *b)
+{
+	pci_bus_allocate_resources(b);
+	pci_bus_allocate_dev_resources(b);
+}
+EXPORT_SYMBOL(pci_bus_claim_resources);
+
+static void __pci_bridge_assign_resources(const struct pci_dev *bridge,
+					  struct list_head *add_head,
+					  struct list_head *fail_head)
+{
+	struct pci_bus *b;
+
+	pdev_assign_resources_sorted((struct pci_dev *)bridge,
+					 add_head, fail_head);
+
+	b = bridge->subordinate;
+	if (!b)
+		return;
+
+	__pci_bus_assign_resources(b, add_head, fail_head);
+
+	switch (bridge->class >> 8) {
+	case PCI_CLASS_BRIDGE_PCI:
+		pci_setup_bridge(b);
+		break;
+
+	case PCI_CLASS_BRIDGE_CARDBUS:
+		pci_setup_cardbus(b);
+		break;
+
+	default:
+		pci_info(bridge, "not setting up bridge for bus %04x:%02x\n",
+			 pci_domain_nr(b), b->number);
+		break;
+	}
+}
+
+#define PCI_RES_TYPE_MASK \
+	(IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\
+	 IORESOURCE_MEM_64)
+
+static void pci_bridge_release_resources(struct pci_bus *bus,
+					 unsigned long type)
+{
+	struct pci_dev *dev = bus->self;
+	struct resource *r;
+	unsigned int old_flags;
+	struct resource *b_res;
+	int idx = 1;
+
+	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
+
+	/*
+	 * 1. If IO port assignment fails, release bridge IO port.
+	 * 2. If non pref MMIO assignment fails, release bridge nonpref MMIO.
+	 * 3. If 64bit pref MMIO assignment fails, and bridge pref is 64bit,
+	 *    release bridge pref MMIO.
+	 * 4. If pref MMIO assignment fails, and bridge pref is 32bit,
+	 *    release bridge pref MMIO.
+	 * 5. If pref MMIO assignment fails, and bridge pref is not
+	 *    assigned, release bridge nonpref MMIO.
+	 */
+	if (type & IORESOURCE_IO)
+		idx = 0;
+	else if (!(type & IORESOURCE_PREFETCH))
+		idx = 1;
+	else if ((type & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_MEM_64))
+		idx = 2;
+	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_PREFETCH))
+		idx = 2;
+	else
+		idx = 1;
+
+	r = &b_res[idx];
+
+	if (!r->parent)
+		return;
+
+	/* If there are children, release them all */
+	release_child_resources(r);
+	if (!release_resource(r)) {
+		type = old_flags = r->flags & PCI_RES_TYPE_MASK;
+		pci_info(dev, "resource %d %pR released\n",
+			 PCI_BRIDGE_RESOURCES + idx, r);
+		/* Keep the old size */
+		r->end = resource_size(r) - 1;
+		r->start = 0;
+		r->flags = 0;
+
+		/* Avoiding touch the one without PREF */
+		if (type & IORESOURCE_PREFETCH)
+			type = IORESOURCE_PREFETCH;
+		__pci_setup_bridge(bus, type);
+		/* For next child res under same bridge */
+		r->flags = old_flags;
+	}
+}
+
+enum release_type {
+	leaf_only,
+	whole_subtree,
+};
+
+/*
+ * Try to release PCI bridge resources from leaf bridge, so we can allocate
+ * a larger window later.
+ */
+static void pci_bus_release_bridge_resources(struct pci_bus *bus,
+					     unsigned long type,
+					     enum release_type rel_type)
+{
+	struct pci_dev *dev;
+	bool is_leaf_bridge = true;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct pci_bus *b = dev->subordinate;
+		if (!b)
+			continue;
+
+		is_leaf_bridge = false;
+
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			continue;
+
+		if (rel_type == whole_subtree)
+			pci_bus_release_bridge_resources(b, type,
+						 whole_subtree);
+	}
+
+	if (pci_is_root_bus(bus))
+		return;
+
+	if ((bus->self->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		return;
+
+	if ((rel_type == whole_subtree) || is_leaf_bridge)
+		pci_bridge_release_resources(bus, type);
+}
+
+static void pci_bus_dump_res(struct pci_bus *bus)
+{
+	struct resource *res;
+	int i;
+
+	pci_bus_for_each_resource(bus, res, i) {
+		if (!res || !res->end || !res->flags)
+			continue;
+
+		dev_info(&bus->dev, "resource %d %pR\n", i, res);
+	}
+}
+
+static void pci_bus_dump_resources(struct pci_bus *bus)
+{
+	struct pci_bus *b;
+	struct pci_dev *dev;
+
+
+	pci_bus_dump_res(bus);
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		b = dev->subordinate;
+		if (!b)
+			continue;
+
+		pci_bus_dump_resources(b);
+	}
+}
+
+static int pci_bus_get_depth(struct pci_bus *bus)
+{
+	int depth = 0;
+	struct pci_bus *child_bus;
+
+	list_for_each_entry(child_bus, &bus->children, node) {
+		int ret;
+
+		ret = pci_bus_get_depth(child_bus);
+		if (ret + 1 > depth)
+			depth = ret + 1;
+	}
+
+	return depth;
+}
+
+/*
+ * -1: undefined, will auto detect later
+ *  0: disabled by user
+ *  1: disabled by auto detect
+ *  2: enabled by user
+ *  3: enabled by auto detect
+ */
+enum enable_type {
+	undefined = -1,
+	user_disabled,
+	auto_disabled,
+	user_enabled,
+	auto_enabled,
+};
+
+static enum enable_type pci_realloc_enable = undefined;
+void __init pci_realloc_get_opt(char *str)
+{
+	if (!strncmp(str, "off", 3))
+		pci_realloc_enable = user_disabled;
+	else if (!strncmp(str, "on", 2))
+		pci_realloc_enable = user_enabled;
+}
+static bool pci_realloc_enabled(enum enable_type enable)
+{
+	return enable >= user_enabled;
+}
+
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PCI_REALLOC_ENABLE_AUTO)
+static int iov_resources_unassigned(struct pci_dev *dev, void *data)
+{
+	int i;
+	bool *unassigned = data;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		struct resource *r = &dev->resource[i + PCI_IOV_RESOURCES];
+		struct pci_bus_region region;
+
+		/* Not assigned or rejected by kernel? */
+		if (!r->flags)
+			continue;
+
+		pcibios_resource_to_bus(dev->bus, &region, r);
+		if (!region.start) {
+			*unassigned = true;
+			return 1; /* Return early from pci_walk_bus() */
+		}
+	}
+
+	return 0;
+}
+
+static enum enable_type pci_realloc_detect(struct pci_bus *bus,
+					   enum enable_type enable_local)
+{
+	bool unassigned = false;
+	struct pci_host_bridge *host;
+
+	if (enable_local != undefined)
+		return enable_local;
+
+	host = pci_find_host_bridge(bus);
+	if (host->preserve_config)
+		return auto_disabled;
+
+	pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
+	if (unassigned)
+		return auto_enabled;
+
+	return enable_local;
+}
+#else
+static enum enable_type pci_realloc_detect(struct pci_bus *bus,
+					   enum enable_type enable_local)
+{
+	return enable_local;
+}
+#endif
+
+static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res,
+				 struct list_head *add_list,
+				 resource_size_t new_size)
+{
+	resource_size_t add_size, size = resource_size(res);
+
+	if (res->parent)
+		return;
+
+	if (!new_size)
+		return;
+
+	if (new_size > size) {
+		add_size = new_size - size;
+		pci_dbg(bridge, "bridge window %pR extended by %pa\n", res,
+			&add_size);
+	} else if (new_size < size) {
+		add_size = size - new_size;
+		pci_dbg(bridge, "bridge window %pR shrunken by %pa\n", res,
+			&add_size);
+	} else {
+		return;
+	}
+
+	res->end = res->start + new_size - 1;
+
+	/* If the resource is part of the add_list, remove it now */
+	if (add_list)
+		remove_from_list(add_list, res);
+}
+
+static void remove_dev_resource(struct resource *avail, struct pci_dev *dev,
+				struct resource *res)
+{
+	resource_size_t size, align, tmp;
+
+	size = resource_size(res);
+	if (!size)
+		return;
+
+	align = pci_resource_alignment(dev, res);
+	align = align ? ALIGN(avail->start, align) - avail->start : 0;
+	tmp = align + size;
+	avail->start = min(avail->start + tmp, avail->end + 1);
+}
+
+static void remove_dev_resources(struct pci_dev *dev, struct resource *io,
+				 struct resource *mmio,
+				 struct resource *mmio_pref)
+{
+	struct resource *res;
+
+	pci_dev_for_each_resource(dev, res) {
+		if (resource_type(res) == IORESOURCE_IO) {
+			remove_dev_resource(io, dev, res);
+		} else if (resource_type(res) == IORESOURCE_MEM) {
+
+			/*
+			 * Make sure prefetchable memory is reduced from
+			 * the correct resource. Specifically we put 32-bit
+			 * prefetchable memory in non-prefetchable window
+			 * if there is an 64-bit prefetchable window.
+			 *
+			 * See comments in __pci_bus_size_bridges() for
+			 * more information.
+			 */
+			if ((res->flags & IORESOURCE_PREFETCH) &&
+			    ((res->flags & IORESOURCE_MEM_64) ==
+			     (mmio_pref->flags & IORESOURCE_MEM_64)))
+				remove_dev_resource(mmio_pref, dev, res);
+			else
+				remove_dev_resource(mmio, dev, res);
+		}
+	}
+}
+
+/*
+ * io, mmio and mmio_pref contain the total amount of bridge window space
+ * available. This includes the minimal space needed to cover all the
+ * existing devices on the bus and the possible extra space that can be
+ * shared with the bridges.
+ */
+static void pci_bus_distribute_available_resources(struct pci_bus *bus,
+					    struct list_head *add_list,
+					    struct resource io,
+					    struct resource mmio,
+					    struct resource mmio_pref)
+{
+	unsigned int normal_bridges = 0, hotplug_bridges = 0;
+	struct resource *io_res, *mmio_res, *mmio_pref_res;
+	struct pci_dev *dev, *bridge = bus->self;
+	resource_size_t io_per_b, mmio_per_b, mmio_pref_per_b, align;
+
+	io_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW];
+	mmio_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW];
+	mmio_pref_res = &bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+
+	/*
+	 * The alignment of this bridge is yet to be considered, hence it must
+	 * be done now before extending its bridge window.
+	 */
+	align = pci_resource_alignment(bridge, io_res);
+	if (!io_res->parent && align)
+		io.start = min(ALIGN(io.start, align), io.end + 1);
+
+	align = pci_resource_alignment(bridge, mmio_res);
+	if (!mmio_res->parent && align)
+		mmio.start = min(ALIGN(mmio.start, align), mmio.end + 1);
+
+	align = pci_resource_alignment(bridge, mmio_pref_res);
+	if (!mmio_pref_res->parent && align)
+		mmio_pref.start = min(ALIGN(mmio_pref.start, align),
+			mmio_pref.end + 1);
+
+	/*
+	 * Now that we have adjusted for alignment, update the bridge window
+	 * resources to fill as much remaining resource space as possible.
+	 */
+	adjust_bridge_window(bridge, io_res, add_list, resource_size(&io));
+	adjust_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
+	adjust_bridge_window(bridge, mmio_pref_res, add_list,
+			     resource_size(&mmio_pref));
+
+	/*
+	 * Calculate how many hotplug bridges and normal bridges there
+	 * are on this bus.  We will distribute the additional available
+	 * resources between hotplug bridges.
+	 */
+	for_each_pci_bridge(dev, bus) {
+		if (dev->is_hotplug_bridge)
+			hotplug_bridges++;
+		else
+			normal_bridges++;
+	}
+
+	if (!(hotplug_bridges + normal_bridges))
+		return;
+
+	/*
+	 * Calculate the amount of space we can forward from "bus" to any
+	 * downstream buses, i.e., the space left over after assigning the
+	 * BARs and windows on "bus".
+	 */
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!dev->is_virtfn)
+			remove_dev_resources(dev, &io, &mmio, &mmio_pref);
+	}
+
+	/*
+	 * If there is at least one hotplug bridge on this bus it gets all
+	 * the extra resource space that was left after the reductions
+	 * above.
+	 *
+	 * If there are no hotplug bridges the extra resource space is
+	 * split between non-hotplug bridges. This is to allow possible
+	 * hotplug bridges below them to get the extra space as well.
+	 */
+	if (hotplug_bridges) {
+		io_per_b = div64_ul(resource_size(&io), hotplug_bridges);
+		mmio_per_b = div64_ul(resource_size(&mmio), hotplug_bridges);
+		mmio_pref_per_b = div64_ul(resource_size(&mmio_pref),
+					   hotplug_bridges);
+	} else {
+		io_per_b = div64_ul(resource_size(&io), normal_bridges);
+		mmio_per_b = div64_ul(resource_size(&mmio), normal_bridges);
+		mmio_pref_per_b = div64_ul(resource_size(&mmio_pref),
+					   normal_bridges);
+	}
+
+	for_each_pci_bridge(dev, bus) {
+		struct resource *res;
+		struct pci_bus *b;
+
+		b = dev->subordinate;
+		if (!b)
+			continue;
+		if (hotplug_bridges && !dev->is_hotplug_bridge)
+			continue;
+
+		res = &dev->resource[PCI_BRIDGE_IO_WINDOW];
+
+		/*
+		 * Make sure the split resource space is properly aligned
+		 * for bridge windows (align it down to avoid going above
+		 * what is available).
+		 */
+		align = pci_resource_alignment(dev, res);
+		io.end = align ? io.start + ALIGN_DOWN(io_per_b, align) - 1
+			       : io.start + io_per_b - 1;
+
+		/*
+		 * The x_per_b holds the extra resource space that can be
+		 * added for each bridge but there is the minimal already
+		 * reserved as well so adjust x.start down accordingly to
+		 * cover the whole space.
+		 */
+		io.start -= resource_size(res);
+
+		res = &dev->resource[PCI_BRIDGE_MEM_WINDOW];
+		align = pci_resource_alignment(dev, res);
+		mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_b, align) - 1
+				 : mmio.start + mmio_per_b - 1;
+		mmio.start -= resource_size(res);
+
+		res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+		align = pci_resource_alignment(dev, res);
+		mmio_pref.end = align ? mmio_pref.start +
+					ALIGN_DOWN(mmio_pref_per_b, align) - 1
+				      : mmio_pref.start + mmio_pref_per_b - 1;
+		mmio_pref.start -= resource_size(res);
+
+		pci_bus_distribute_available_resources(b, add_list, io, mmio,
+						       mmio_pref);
+
+		io.start += io.end + 1;
+		mmio.start += mmio.end + 1;
+		mmio_pref.start += mmio_pref.end + 1;
+	}
+}
+
+static void pci_bridge_distribute_available_resources(struct pci_dev *bridge,
+						      struct list_head *add_list)
+{
+	struct resource available_io, available_mmio, available_mmio_pref;
+
+	if (!bridge->is_hotplug_bridge)
+		return;
+
+	pci_dbg(bridge, "distributing available resources\n");
+
+	/* Take the initial extra resources from the hotplug port */
+	available_io = bridge->resource[PCI_BRIDGE_IO_WINDOW];
+	available_mmio = bridge->resource[PCI_BRIDGE_MEM_WINDOW];
+	available_mmio_pref = bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+
+	pci_bus_distribute_available_resources(bridge->subordinate,
+					       add_list, available_io,
+					       available_mmio,
+					       available_mmio_pref);
+}
+
+static bool pci_bridge_resources_not_assigned(struct pci_dev *dev)
+{
+	const struct resource *r;
+
+	/*
+	 * If the child device's resources are not yet assigned it means we
+	 * are configuring them (not the boot firmware), so we should be
+	 * able to extend the upstream bridge resources in the same way we
+	 * do with the normal hotplug case.
+	 */
+	r = &dev->resource[PCI_BRIDGE_IO_WINDOW];
+	if (r->flags && !(r->flags & IORESOURCE_STARTALIGN))
+		return false;
+	r = &dev->resource[PCI_BRIDGE_MEM_WINDOW];
+	if (r->flags && !(r->flags & IORESOURCE_STARTALIGN))
+		return false;
+	r = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+	if (r->flags && !(r->flags & IORESOURCE_STARTALIGN))
+		return false;
+
+	return true;
+}
+
+static void
+pci_root_bus_distribute_available_resources(struct pci_bus *bus,
+					    struct list_head *add_list)
+{
+	struct pci_dev *dev, *bridge = bus->self;
+
+	for_each_pci_bridge(dev, bus) {
+		struct pci_bus *b;
+
+		b = dev->subordinate;
+		if (!b)
+			continue;
+
+		/*
+		 * Need to check "bridge" here too because it is NULL
+		 * in case of root bus.
+		 */
+		if (bridge && pci_bridge_resources_not_assigned(dev))
+			pci_bridge_distribute_available_resources(bridge,
+								  add_list);
+		else
+			pci_root_bus_distribute_available_resources(b, add_list);
+	}
+}
+
+/*
+ * First try will not touch PCI bridge res.
+ * Second and later try will clear small leaf bridge res.
+ * Will stop till to the max depth if can not find good one.
+ */
+void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus)
+{
+	LIST_HEAD(realloc_head);
+	/* List of resources that want additional resources */
+	struct list_head *add_list = NULL;
+	int tried_times = 0;
+	enum release_type rel_type = leaf_only;
+	LIST_HEAD(fail_head);
+	struct pci_dev_resource *fail_res;
+	int pci_try_num = 1;
+	enum enable_type enable_local;
+
+	/* Don't realloc if asked to do so */
+	enable_local = pci_realloc_detect(bus, pci_realloc_enable);
+	if (pci_realloc_enabled(enable_local)) {
+		int max_depth = pci_bus_get_depth(bus);
+
+		pci_try_num = max_depth + 1;
+		dev_info(&bus->dev, "max bus depth: %d pci_try_num: %d\n",
+			 max_depth, pci_try_num);
+	}
+
+again:
+	/*
+	 * Last try will use add_list, otherwise will try good to have as must
+	 * have, so can realloc parent bridge resource
+	 */
+	if (tried_times + 1 == pci_try_num)
+		add_list = &realloc_head;
+	/*
+	 * Depth first, calculate sizes and alignments of all subordinate buses.
+	 */
+	__pci_bus_size_bridges(bus, add_list);
+
+	pci_root_bus_distribute_available_resources(bus, add_list);
+
+	/* Depth last, allocate resources and update the hardware. */
+	__pci_bus_assign_resources(bus, add_list, &fail_head);
+	if (add_list)
+		BUG_ON(!list_empty(add_list));
+	tried_times++;
+
+	/* Any device complain? */
+	if (list_empty(&fail_head))
+		goto dump;
+
+	if (tried_times >= pci_try_num) {
+		if (enable_local == undefined)
+			dev_info(&bus->dev, "Some PCI device resources are unassigned, try booting with pci=realloc\n");
+		else if (enable_local == auto_enabled)
+			dev_info(&bus->dev, "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n");
+
+		free_list(&fail_head);
+		goto dump;
+	}
+
+	dev_info(&bus->dev, "No. %d try to assign unassigned res\n",
+		 tried_times + 1);
+
+	/* Third times and later will not check if it is leaf */
+	if ((tried_times + 1) > 2)
+		rel_type = whole_subtree;
+
+	/*
+	 * Try to release leaf bridge's resources that doesn't fit resource of
+	 * child device under that bridge.
+	 */
+	list_for_each_entry(fail_res, &fail_head, list)
+		pci_bus_release_bridge_resources(fail_res->dev->bus,
+						 fail_res->flags & PCI_RES_TYPE_MASK,
+						 rel_type);
+
+	/* Restore size and flags */
+	list_for_each_entry(fail_res, &fail_head, list) {
+		struct resource *res = fail_res->res;
+		int idx;
+
+		res->start = fail_res->start;
+		res->end = fail_res->end;
+		res->flags = fail_res->flags;
+
+		if (pci_is_bridge(fail_res->dev)) {
+			idx = res - &fail_res->dev->resource[0];
+			if (idx >= PCI_BRIDGE_RESOURCES &&
+			    idx <= PCI_BRIDGE_RESOURCE_END)
+				res->flags = 0;
+		}
+	}
+	free_list(&fail_head);
+
+	goto again;
+
+dump:
+	/* Dump the resource on buses */
+	pci_bus_dump_resources(bus);
+}
+
+void __init pci_assign_unassigned_resources(void)
+{
+	struct pci_bus *root_bus;
+
+	list_for_each_entry(root_bus, &pci_root_buses, node) {
+		pci_assign_unassigned_root_bus_resources(root_bus);
+
+		/* Make sure the root bridge has a companion ACPI device */
+		if (ACPI_HANDLE(root_bus->bridge))
+			acpi_ioapic_add(ACPI_HANDLE(root_bus->bridge));
+	}
+}
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+	struct pci_bus *parent = bridge->subordinate;
+	/* List of resources that want additional resources */
+	LIST_HEAD(add_list);
+
+	int tried_times = 0;
+	LIST_HEAD(fail_head);
+	struct pci_dev_resource *fail_res;
+	int retval;
+
+again:
+	__pci_bus_size_bridges(parent, &add_list);
+
+	/*
+	 * Distribute remaining resources (if any) equally between hotplug
+	 * bridges below.  This makes it possible to extend the hierarchy
+	 * later without running out of resources.
+	 */
+	pci_bridge_distribute_available_resources(bridge, &add_list);
+
+	__pci_bridge_assign_resources(bridge, &add_list, &fail_head);
+	BUG_ON(!list_empty(&add_list));
+	tried_times++;
+
+	if (list_empty(&fail_head))
+		goto enable_all;
+
+	if (tried_times >= 2) {
+		/* Still fail, don't need to try more */
+		free_list(&fail_head);
+		goto enable_all;
+	}
+
+	printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
+			 tried_times + 1);
+
+	/*
+	 * Try to release leaf bridge's resources that aren't big enough
+	 * to contain child device resources.
+	 */
+	list_for_each_entry(fail_res, &fail_head, list)
+		pci_bus_release_bridge_resources(fail_res->dev->bus,
+						 fail_res->flags & PCI_RES_TYPE_MASK,
+						 whole_subtree);
+
+	/* Restore size and flags */
+	list_for_each_entry(fail_res, &fail_head, list) {
+		struct resource *res = fail_res->res;
+		int idx;
+
+		res->start = fail_res->start;
+		res->end = fail_res->end;
+		res->flags = fail_res->flags;
+
+		if (pci_is_bridge(fail_res->dev)) {
+			idx = res - &fail_res->dev->resource[0];
+			if (idx >= PCI_BRIDGE_RESOURCES &&
+			    idx <= PCI_BRIDGE_RESOURCE_END)
+				res->flags = 0;
+		}
+	}
+	free_list(&fail_head);
+
+	goto again;
+
+enable_all:
+	retval = pci_reenable_device(bridge);
+	if (retval)
+		pci_err(bridge, "Error reenabling bridge (%d)\n", retval);
+	pci_set_master(bridge);
+}
+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
+
+int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type)
+{
+	struct pci_dev_resource *dev_res;
+	struct pci_dev *next;
+	LIST_HEAD(saved);
+	LIST_HEAD(added);
+	LIST_HEAD(failed);
+	unsigned int i;
+	int ret;
+
+	down_read(&pci_bus_sem);
+
+	/* Walk to the root hub, releasing bridge BARs when possible */
+	next = bridge;
+	do {
+		bridge = next;
+		for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCE_END;
+		     i++) {
+			struct resource *res = &bridge->resource[i];
+
+			if ((res->flags ^ type) & PCI_RES_TYPE_MASK)
+				continue;
+
+			/* Ignore BARs which are still in use */
+			if (res->child)
+				continue;
+
+			ret = add_to_list(&saved, bridge, res, 0, 0);
+			if (ret)
+				goto cleanup;
+
+			pci_info(bridge, "BAR %d: releasing %pR\n",
+				 i, res);
+
+			if (res->parent)
+				release_resource(res);
+			res->start = 0;
+			res->end = 0;
+			break;
+		}
+		if (i == PCI_BRIDGE_RESOURCE_END)
+			break;
+
+		next = bridge->bus ? bridge->bus->self : NULL;
+	} while (next);
+
+	if (list_empty(&saved)) {
+		up_read(&pci_bus_sem);
+		return -ENOENT;
+	}
+
+	__pci_bus_size_bridges(bridge->subordinate, &added);
+	__pci_bridge_assign_resources(bridge, &added, &failed);
+	BUG_ON(!list_empty(&added));
+
+	if (!list_empty(&failed)) {
+		ret = -ENOSPC;
+		goto cleanup;
+	}
+
+	list_for_each_entry(dev_res, &saved, list) {
+		/* Skip the bridge we just assigned resources for */
+		if (bridge == dev_res->dev)
+			continue;
+
+		bridge = dev_res->dev;
+		pci_setup_bridge(bridge->subordinate);
+	}
+
+	free_list(&saved);
+	up_read(&pci_bus_sem);
+	return 0;
+
+cleanup:
+	/* Restore size and flags */
+	list_for_each_entry(dev_res, &failed, list) {
+		struct resource *res = dev_res->res;
+
+		res->start = dev_res->start;
+		res->end = dev_res->end;
+		res->flags = dev_res->flags;
+	}
+	free_list(&failed);
+
+	/* Revert to the old configuration */
+	list_for_each_entry(dev_res, &saved, list) {
+		struct resource *res = dev_res->res;
+
+		bridge = dev_res->dev;
+		i = res - bridge->resource;
+
+		res->start = dev_res->start;
+		res->end = dev_res->end;
+		res->flags = dev_res->flags;
+
+		pci_claim_resource(bridge, i);
+		pci_setup_bridge(bridge->subordinate);
+	}
+	free_list(&saved);
+	up_read(&pci_bus_sem);
+
+	return ret;
+}
+
+void pci_assign_unassigned_bus_resources(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	/* List of resources that want additional resources */
+	LIST_HEAD(add_list);
+
+	down_read(&pci_bus_sem);
+	for_each_pci_bridge(dev, bus)
+		if (pci_has_subordinate(dev))
+			__pci_bus_size_bridges(dev->subordinate, &add_list);
+	up_read(&pci_bus_sem);
+	__pci_bus_assign_resources(bus, &add_list, NULL);
+	BUG_ON(!list_empty(&add_list));
+}
+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bus_resources);
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
new file mode 100644
index 000000000..cc7d26b01
--- /dev/null
+++ b/drivers/pci/setup-irq.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support routines for initializing a PCI subsystem
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include "pci.h"
+
+void pci_assign_irq(struct pci_dev *dev)
+{
+	u8 pin;
+	u8 slot = -1;
+	int irq = 0;
+	struct pci_host_bridge *hbrg = pci_find_host_bridge(dev->bus);
+
+	if (!(hbrg->map_irq)) {
+		pci_dbg(dev, "runtime IRQ mapping not provided by arch\n");
+		return;
+	}
+
+	/*
+	 * If this device is not on the primary bus, we need to figure out
+	 * which interrupt pin it will come in on. We know which slot it
+	 * will come in on because that slot is where the bridge is. Each
+	 * time the interrupt line passes through a PCI-PCI bridge we must
+	 * apply the swizzle function.
+	 */
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	/* Cope with illegal. */
+	if (pin > 4)
+		pin = 1;
+
+	if (pin) {
+		/* Follow the chain of bridges, swizzling as we go. */
+		if (hbrg->swizzle_irq)
+			slot = (*(hbrg->swizzle_irq))(dev, &pin);
+
+		/*
+		 * If a swizzling function is not used, map_irq() must
+		 * ignore slot.
+		 */
+		irq = (*(hbrg->map_irq))(dev, slot, pin);
+		if (irq == -1)
+			irq = 0;
+	}
+	dev->irq = irq;
+
+	pci_dbg(dev, "assign IRQ: got %d\n", dev->irq);
+
+	/*
+	 * Always tell the device, so the driver knows what is the real IRQ
+	 * to use; the device does not use it.
+	 */
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
new file mode 100644
index 000000000..ceaa69491
--- /dev/null
+++ b/drivers/pci/setup-res.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support routines for initializing a PCI subsystem
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *
+ * Fixed for multiple PCI buses, 1999 Andrea Arcangeli <andrea@suse.de>
+ *
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     Resource sorting
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include "pci.h"
+
+static void pci_std_update_resource(struct pci_dev *dev, int resno)
+{
+	struct pci_bus_region region;
+	bool disable;
+	u16 cmd;
+	u32 new, check, mask;
+	int reg;
+	struct resource *res = dev->resource + resno;
+
+	/* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */
+	if (dev->is_virtfn)
+		return;
+
+	/*
+	 * Ignore resources for unimplemented BARs and unused resource slots
+	 * for 64 bit BARs.
+	 */
+	if (!res->flags)
+		return;
+
+	if (res->flags & IORESOURCE_UNSET)
+		return;
+
+	/*
+	 * Ignore non-moveable resources.  This might be legacy resources for
+	 * which no functional BAR register exists or another important
+	 * system resource we shouldn't move around.
+	 */
+	if (res->flags & IORESOURCE_PCI_FIXED)
+		return;
+
+	pcibios_resource_to_bus(dev->bus, &region, res);
+	new = region.start;
+
+	if (res->flags & IORESOURCE_IO) {
+		mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
+		new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK;
+	} else if (resno == PCI_ROM_RESOURCE) {
+		mask = PCI_ROM_ADDRESS_MASK;
+	} else {
+		mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
+		new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
+	}
+
+	if (resno < PCI_ROM_RESOURCE) {
+		reg = PCI_BASE_ADDRESS_0 + 4 * resno;
+	} else if (resno == PCI_ROM_RESOURCE) {
+
+		/*
+		 * Apparently some Matrox devices have ROM BARs that read
+		 * as zero when disabled, so don't update ROM BARs unless
+		 * they're enabled.  See
+		 * https://lore.kernel.org/r/43147B3D.1030309@vc.cvut.cz/
+		 * But we must update ROM BAR for buggy devices where even a
+		 * disabled ROM can conflict with other BARs.
+		 */
+		if (!(res->flags & IORESOURCE_ROM_ENABLE) &&
+		    !dev->rom_bar_overlap)
+			return;
+
+		reg = dev->rom_base_reg;
+		if (res->flags & IORESOURCE_ROM_ENABLE)
+			new |= PCI_ROM_ADDRESS_ENABLE;
+	} else
+		return;
+
+	/*
+	 * We can't update a 64-bit BAR atomically, so when possible,
+	 * disable decoding so that a half-updated BAR won't conflict
+	 * with another device.
+	 */
+	disable = (res->flags & IORESOURCE_MEM_64) && !dev->mmio_always_on;
+	if (disable) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		pci_write_config_word(dev, PCI_COMMAND,
+				      cmd & ~PCI_COMMAND_MEMORY);
+	}
+
+	pci_write_config_dword(dev, reg, new);
+	pci_read_config_dword(dev, reg, &check);
+
+	if ((new ^ check) & mask) {
+		pci_err(dev, "BAR %d: error updating (%#010x != %#010x)\n",
+			resno, new, check);
+	}
+
+	if (res->flags & IORESOURCE_MEM_64) {
+		new = region.start >> 16 >> 16;
+		pci_write_config_dword(dev, reg + 4, new);
+		pci_read_config_dword(dev, reg + 4, &check);
+		if (check != new) {
+			pci_err(dev, "BAR %d: error updating (high %#010x != %#010x)\n",
+				resno, new, check);
+		}
+	}
+
+	if (disable)
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+}
+
+void pci_update_resource(struct pci_dev *dev, int resno)
+{
+	if (resno <= PCI_ROM_RESOURCE)
+		pci_std_update_resource(dev, resno);
+#ifdef CONFIG_PCI_IOV
+	else if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END)
+		pci_iov_update_resource(dev, resno);
+#endif
+}
+
+int pci_claim_resource(struct pci_dev *dev, int resource)
+{
+	struct resource *res = &dev->resource[resource];
+	struct resource *root, *conflict;
+
+	if (res->flags & IORESOURCE_UNSET) {
+		pci_info(dev, "can't claim BAR %d %pR: no address assigned\n",
+			 resource, res);
+		return -EINVAL;
+	}
+
+	/*
+	 * If we have a shadow copy in RAM, the PCI device doesn't respond
+	 * to the shadow range, so we don't need to claim it, and upstream
+	 * bridges don't need to route the range to the device.
+	 */
+	if (res->flags & IORESOURCE_ROM_SHADOW)
+		return 0;
+
+	root = pci_find_parent_resource(dev, res);
+	if (!root) {
+		pci_info(dev, "can't claim BAR %d %pR: no compatible bridge window\n",
+			 resource, res);
+		res->flags |= IORESOURCE_UNSET;
+		return -EINVAL;
+	}
+
+	conflict = request_resource_conflict(root, res);
+	if (conflict) {
+		pci_info(dev, "can't claim BAR %d %pR: address conflict with %s %pR\n",
+			 resource, res, conflict->name, conflict);
+		res->flags |= IORESOURCE_UNSET;
+		return -EBUSY;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_claim_resource);
+
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+	/* MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+
+	/* Prefetchable MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+
+/*
+ * Generic function that returns a value indicating that the device's
+ * original BIOS BAR address was not saved and so is not available for
+ * reinstatement.
+ *
+ * Can be over-ridden by architecture specific code that implements
+ * reinstatement functionality rather than leaving it disabled when
+ * normal allocation attempts fail.
+ */
+resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
+{
+	return 0;
+}
+
+static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
+		int resno, resource_size_t size)
+{
+	struct resource *root, *conflict;
+	resource_size_t fw_addr, start, end;
+
+	fw_addr = pcibios_retrieve_fw_addr(dev, resno);
+	if (!fw_addr)
+		return -ENOMEM;
+
+	start = res->start;
+	end = res->end;
+	res->start = fw_addr;
+	res->end = res->start + size - 1;
+	res->flags &= ~IORESOURCE_UNSET;
+
+	root = pci_find_parent_resource(dev, res);
+	if (!root) {
+		/*
+		 * If dev is behind a bridge, accesses will only reach it
+		 * if res is inside the relevant bridge window.
+		 */
+		if (pci_upstream_bridge(dev))
+			return -ENXIO;
+
+		/*
+		 * On the root bus, assume the host bridge will forward
+		 * everything.
+		 */
+		if (res->flags & IORESOURCE_IO)
+			root = &ioport_resource;
+		else
+			root = &iomem_resource;
+	}
+
+	pci_info(dev, "BAR %d: trying firmware assignment %pR\n",
+		 resno, res);
+	conflict = request_resource_conflict(root, res);
+	if (conflict) {
+		pci_info(dev, "BAR %d: %pR conflicts with %s %pR\n",
+			 resno, res, conflict->name, conflict);
+		res->start = start;
+		res->end = end;
+		res->flags |= IORESOURCE_UNSET;
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/*
+ * We don't have to worry about legacy ISA devices, so nothing to do here.
+ * This is marked as __weak because multiple architectures define it; it should
+ * eventually go away.
+ */
+resource_size_t __weak pcibios_align_resource(void *data,
+					      const struct resource *res,
+					      resource_size_t size,
+					      resource_size_t align)
+{
+       return res->start;
+}
+
+static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
+		int resno, resource_size_t size, resource_size_t align)
+{
+	struct resource *res = dev->resource + resno;
+	resource_size_t min;
+	int ret;
+
+	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
+
+	/*
+	 * First, try exact prefetching match.  Even if a 64-bit
+	 * prefetchable bridge window is below 4GB, we can't put a 32-bit
+	 * prefetchable resource in it because pbus_size_mem() assumes a
+	 * 64-bit window will contain no 32-bit resources.  If we assign
+	 * things differently than they were sized, not everything will fit.
+	 */
+	ret = pci_bus_alloc_resource(bus, res, size, align, min,
+				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
+				     pcibios_align_resource, dev);
+	if (ret == 0)
+		return 0;
+
+	/*
+	 * If the prefetchable window is only 32 bits wide, we can put
+	 * 64-bit prefetchable resources in it.
+	 */
+	if ((res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
+	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
+		ret = pci_bus_alloc_resource(bus, res, size, align, min,
+					     IORESOURCE_PREFETCH,
+					     pcibios_align_resource, dev);
+		if (ret == 0)
+			return 0;
+	}
+
+	/*
+	 * If we didn't find a better match, we can put any memory resource
+	 * in a non-prefetchable window.  If this resource is 32 bits and
+	 * non-prefetchable, the first call already tried the only possibility
+	 * so we don't need to try again.
+	 */
+	if (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))
+		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
+					     pcibios_align_resource, dev);
+
+	return ret;
+}
+
+static int _pci_assign_resource(struct pci_dev *dev, int resno,
+				resource_size_t size, resource_size_t min_align)
+{
+	struct pci_bus *bus;
+	int ret;
+
+	bus = dev->bus;
+	while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) {
+		if (!bus->parent || !bus->self->transparent)
+			break;
+		bus = bus->parent;
+	}
+
+	return ret;
+}
+
+int pci_assign_resource(struct pci_dev *dev, int resno)
+{
+	struct resource *res = dev->resource + resno;
+	resource_size_t align, size;
+	int ret;
+
+	if (res->flags & IORESOURCE_PCI_FIXED)
+		return 0;
+
+	res->flags |= IORESOURCE_UNSET;
+	align = pci_resource_alignment(dev, res);
+	if (!align) {
+		pci_info(dev, "BAR %d: can't assign %pR (bogus alignment)\n",
+			 resno, res);
+		return -EINVAL;
+	}
+
+	size = resource_size(res);
+	ret = _pci_assign_resource(dev, resno, size, align);
+
+	/*
+	 * If we failed to assign anything, let's try the address
+	 * where firmware left it.  That at least has a chance of
+	 * working, which is better than just leaving it disabled.
+	 */
+	if (ret < 0) {
+		pci_info(dev, "BAR %d: no space for %pR\n", resno, res);
+		ret = pci_revert_fw_address(res, dev, resno, size);
+	}
+
+	if (ret < 0) {
+		pci_info(dev, "BAR %d: failed to assign %pR\n", resno, res);
+		return ret;
+	}
+
+	res->flags &= ~IORESOURCE_UNSET;
+	res->flags &= ~IORESOURCE_STARTALIGN;
+	pci_info(dev, "BAR %d: assigned %pR\n", resno, res);
+	if (resno < PCI_BRIDGE_RESOURCES)
+		pci_update_resource(dev, resno);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_assign_resource);
+
+int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize,
+			resource_size_t min_align)
+{
+	struct resource *res = dev->resource + resno;
+	unsigned long flags;
+	resource_size_t new_size;
+	int ret;
+
+	if (res->flags & IORESOURCE_PCI_FIXED)
+		return 0;
+
+	flags = res->flags;
+	res->flags |= IORESOURCE_UNSET;
+	if (!res->parent) {
+		pci_info(dev, "BAR %d: can't reassign an unassigned resource %pR\n",
+			 resno, res);
+		return -EINVAL;
+	}
+
+	/* already aligned with min_align */
+	new_size = resource_size(res) + addsize;
+	ret = _pci_assign_resource(dev, resno, new_size, min_align);
+	if (ret) {
+		res->flags = flags;
+		pci_info(dev, "BAR %d: %pR (failed to expand by %#llx)\n",
+			 resno, res, (unsigned long long) addsize);
+		return ret;
+	}
+
+	res->flags &= ~IORESOURCE_UNSET;
+	res->flags &= ~IORESOURCE_STARTALIGN;
+	pci_info(dev, "BAR %d: reassigned %pR (expanded by %#llx)\n",
+		 resno, res, (unsigned long long) addsize);
+	if (resno < PCI_BRIDGE_RESOURCES)
+		pci_update_resource(dev, resno);
+
+	return 0;
+}
+
+void pci_release_resource(struct pci_dev *dev, int resno)
+{
+	struct resource *res = dev->resource + resno;
+
+	pci_info(dev, "BAR %d: releasing %pR\n", resno, res);
+
+	if (!res->parent)
+		return;
+
+	release_resource(res);
+	res->end = resource_size(res) - 1;
+	res->start = 0;
+	res->flags |= IORESOURCE_UNSET;
+}
+EXPORT_SYMBOL(pci_release_resource);
+
+int pci_resize_resource(struct pci_dev *dev, int resno, int size)
+{
+	struct resource *res = dev->resource + resno;
+	struct pci_host_bridge *host;
+	int old, ret;
+	u32 sizes;
+	u16 cmd;
+
+	/* Check if we must preserve the firmware's resource assignment */
+	host = pci_find_host_bridge(dev->bus);
+	if (host->preserve_config)
+		return -ENOTSUPP;
+
+	/* Make sure the resource isn't assigned before resizing it. */
+	if (!(res->flags & IORESOURCE_UNSET))
+		return -EBUSY;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_MEMORY)
+		return -EBUSY;
+
+	sizes = pci_rebar_get_possible_sizes(dev, resno);
+	if (!sizes)
+		return -ENOTSUPP;
+
+	if (!(sizes & BIT(size)))
+		return -EINVAL;
+
+	old = pci_rebar_get_current_size(dev, resno);
+	if (old < 0)
+		return old;
+
+	ret = pci_rebar_set_size(dev, resno, size);
+	if (ret)
+		return ret;
+
+	res->end = res->start + pci_rebar_size_to_bytes(size) - 1;
+
+	/* Check if the new config works by trying to assign everything. */
+	if (dev->bus->self) {
+		ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
+		if (ret)
+			goto error_resize;
+	}
+	return 0;
+
+error_resize:
+	pci_rebar_set_size(dev, resno, old);
+	res->end = res->start + pci_rebar_size_to_bytes(old) - 1;
+	return ret;
+}
+EXPORT_SYMBOL(pci_resize_resource);
+
+int pci_enable_resources(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int i;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+
+	pci_dev_for_each_resource(dev, r, i) {
+		if (!(mask & (1 << i)))
+			continue;
+
+		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if ((i == PCI_ROM_RESOURCE) &&
+				(!(r->flags & IORESOURCE_ROM_ENABLE)))
+			continue;
+
+		if (r->flags & IORESOURCE_UNSET) {
+			pci_err(dev, "can't enable device: BAR %d %pR not assigned\n",
+				i, r);
+			return -EINVAL;
+		}
+
+		if (!r->parent) {
+			pci_err(dev, "can't enable device: BAR %d %pR not claimed\n",
+				i, r);
+			return -EINVAL;
+		}
+
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != old_cmd) {
+		pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
new file mode 100644
index 000000000..0f87cade1
--- /dev/null
+++ b/drivers/pci/slot.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
+ * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P.
+ *	Alex Chiang <achiang@hp.com>
+ */
+
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include "pci.h"
+
+struct kset *pci_slots_kset;
+EXPORT_SYMBOL_GPL(pci_slots_kset);
+
+static ssize_t pci_slot_attr_show(struct kobject *kobj,
+					struct attribute *attr, char *buf)
+{
+	struct pci_slot *slot = to_pci_slot(kobj);
+	struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+	return attribute->show ? attribute->show(slot, buf) : -EIO;
+}
+
+static ssize_t pci_slot_attr_store(struct kobject *kobj,
+			struct attribute *attr, const char *buf, size_t len)
+{
+	struct pci_slot *slot = to_pci_slot(kobj);
+	struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+	return attribute->store ? attribute->store(slot, buf, len) : -EIO;
+}
+
+static const struct sysfs_ops pci_slot_sysfs_ops = {
+	.show = pci_slot_attr_show,
+	.store = pci_slot_attr_store,
+};
+
+static ssize_t address_read_file(struct pci_slot *slot, char *buf)
+{
+	if (slot->number == 0xff)
+		return sysfs_emit(buf, "%04x:%02x\n",
+				  pci_domain_nr(slot->bus),
+				  slot->bus->number);
+
+	return sysfs_emit(buf, "%04x:%02x:%02x\n",
+			  pci_domain_nr(slot->bus),
+			  slot->bus->number,
+			  slot->number);
+}
+
+static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", pci_speed_string(speed));
+}
+
+static ssize_t max_speed_read_file(struct pci_slot *slot, char *buf)
+{
+	return bus_speed_read(slot->bus->max_bus_speed, buf);
+}
+
+static ssize_t cur_speed_read_file(struct pci_slot *slot, char *buf)
+{
+	return bus_speed_read(slot->bus->cur_bus_speed, buf);
+}
+
+static void pci_slot_release(struct kobject *kobj)
+{
+	struct pci_dev *dev;
+	struct pci_slot *slot = to_pci_slot(kobj);
+
+	dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
+		slot->number, pci_slot_name(slot));
+
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &slot->bus->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = NULL;
+	up_read(&pci_bus_sem);
+
+	list_del(&slot->list);
+
+	kfree(slot);
+}
+
+static struct pci_slot_attribute pci_slot_attr_address =
+	__ATTR(address, S_IRUGO, address_read_file, NULL);
+static struct pci_slot_attribute pci_slot_attr_max_speed =
+	__ATTR(max_bus_speed, S_IRUGO, max_speed_read_file, NULL);
+static struct pci_slot_attribute pci_slot_attr_cur_speed =
+	__ATTR(cur_bus_speed, S_IRUGO, cur_speed_read_file, NULL);
+
+static struct attribute *pci_slot_default_attrs[] = {
+	&pci_slot_attr_address.attr,
+	&pci_slot_attr_max_speed.attr,
+	&pci_slot_attr_cur_speed.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(pci_slot_default);
+
+static const struct kobj_type pci_slot_ktype = {
+	.sysfs_ops = &pci_slot_sysfs_ops,
+	.release = &pci_slot_release,
+	.default_groups = pci_slot_default_groups,
+};
+
+static char *make_slot_name(const char *name)
+{
+	char *new_name;
+	int len, max, dup;
+
+	new_name = kstrdup(name, GFP_KERNEL);
+	if (!new_name)
+		return NULL;
+
+	/*
+	 * Make sure we hit the realloc case the first time through the
+	 * loop.  'len' will be strlen(name) + 3 at that point which is
+	 * enough space for "name-X" and the trailing NUL.
+	 */
+	len = strlen(name) + 2;
+	max = 1;
+	dup = 1;
+
+	for (;;) {
+		struct kobject *dup_slot;
+		dup_slot = kset_find_obj(pci_slots_kset, new_name);
+		if (!dup_slot)
+			break;
+		kobject_put(dup_slot);
+		if (dup == max) {
+			len++;
+			max *= 10;
+			kfree(new_name);
+			new_name = kmalloc(len, GFP_KERNEL);
+			if (!new_name)
+				break;
+		}
+		sprintf(new_name, "%s-%d", name, dup++);
+	}
+
+	return new_name;
+}
+
+static int rename_slot(struct pci_slot *slot, const char *name)
+{
+	int result = 0;
+	char *slot_name;
+
+	if (strcmp(pci_slot_name(slot), name) == 0)
+		return result;
+
+	slot_name = make_slot_name(name);
+	if (!slot_name)
+		return -ENOMEM;
+
+	result = kobject_rename(&slot->kobj, slot_name);
+	kfree(slot_name);
+
+	return result;
+}
+
+void pci_dev_assign_slot(struct pci_dev *dev)
+{
+	struct pci_slot *slot;
+
+	mutex_lock(&pci_slot_mutex);
+	list_for_each_entry(slot, &dev->bus->slots, list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = slot;
+	mutex_unlock(&pci_slot_mutex);
+}
+
+static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
+{
+	struct pci_slot *slot;
+
+	/* We already hold pci_slot_mutex */
+	list_for_each_entry(slot, &parent->slots, list)
+		if (slot->number == slot_nr) {
+			kobject_get(&slot->kobj);
+			return slot;
+		}
+
+	return NULL;
+}
+
+/**
+ * pci_create_slot - create or increment refcount for physical PCI slot
+ * @parent: struct pci_bus of parent bridge
+ * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder
+ * @name: user visible string presented in /sys/bus/pci/slots/<name>
+ * @hotplug: set if caller is hotplug driver, NULL otherwise
+ *
+ * PCI slots have first class attributes such as address, speed, width,
+ * and a &struct pci_slot is used to manage them. This interface will
+ * either return a new &struct pci_slot to the caller, or if the pci_slot
+ * already exists, its refcount will be incremented.
+ *
+ * Slots are uniquely identified by a @pci_bus, @slot_nr tuple.
+ *
+ * There are known platforms with broken firmware that assign the same
+ * name to multiple slots. Workaround these broken platforms by renaming
+ * the slots on behalf of the caller. If firmware assigns name N to
+ * multiple slots:
+ *
+ * The first slot is assigned N
+ * The second slot is assigned N-1
+ * The third slot is assigned N-2
+ * etc.
+ *
+ * Placeholder slots:
+ * In most cases, @pci_bus, @slot_nr will be sufficient to uniquely identify
+ * a slot. There is one notable exception - pSeries (rpaphp), where the
+ * @slot_nr cannot be determined until a device is actually inserted into
+ * the slot. In this scenario, the caller may pass -1 for @slot_nr.
+ *
+ * The following semantics are imposed when the caller passes @slot_nr ==
+ * -1. First, we no longer check for an existing %struct pci_slot, as there
+ * may be many slots with @slot_nr of -1.  The other change in semantics is
+ * user-visible, which is the 'address' parameter presented in sysfs will
+ * consist solely of a dddd:bb tuple, where dddd is the PCI domain of the
+ * %struct pci_bus and bb is the bus number. In other words, the devfn of
+ * the 'placeholder' slot will not be displayed.
+ */
+struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
+				 const char *name,
+				 struct hotplug_slot *hotplug)
+{
+	struct pci_dev *dev;
+	struct pci_slot *slot;
+	int err = 0;
+	char *slot_name = NULL;
+
+	mutex_lock(&pci_slot_mutex);
+
+	if (slot_nr == -1)
+		goto placeholder;
+
+	/*
+	 * Hotplug drivers are allowed to rename an existing slot,
+	 * but only if not already claimed.
+	 */
+	slot = get_slot(parent, slot_nr);
+	if (slot) {
+		if (hotplug) {
+			if ((err = slot->hotplug ? -EBUSY : 0)
+			     || (err = rename_slot(slot, name))) {
+				kobject_put(&slot->kobj);
+				slot = NULL;
+				goto err;
+			}
+		}
+		goto out;
+	}
+
+placeholder:
+	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+	if (!slot) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	slot->bus = parent;
+	slot->number = slot_nr;
+
+	slot->kobj.kset = pci_slots_kset;
+
+	slot_name = make_slot_name(name);
+	if (!slot_name) {
+		err = -ENOMEM;
+		kfree(slot);
+		goto err;
+	}
+
+	INIT_LIST_HEAD(&slot->list);
+	list_add(&slot->list, &parent->slots);
+
+	err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL,
+				   "%s", slot_name);
+	if (err) {
+		kobject_put(&slot->kobj);
+		goto err;
+	}
+
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &parent->devices, bus_list)
+		if (PCI_SLOT(dev->devfn) == slot_nr)
+			dev->slot = slot;
+	up_read(&pci_bus_sem);
+
+	dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
+		slot_nr, pci_slot_name(slot));
+
+out:
+	kfree(slot_name);
+	mutex_unlock(&pci_slot_mutex);
+	return slot;
+err:
+	slot = ERR_PTR(err);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(pci_create_slot);
+
+/**
+ * pci_destroy_slot - decrement refcount for physical PCI slot
+ * @slot: struct pci_slot to decrement
+ *
+ * %struct pci_slot is refcounted, so destroying them is really easy; we
+ * just call kobject_put on its kobj and let our release methods do the
+ * rest.
+ */
+void pci_destroy_slot(struct pci_slot *slot)
+{
+	dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
+		slot->number, kref_read(&slot->kobj.kref) - 1);
+
+	mutex_lock(&pci_slot_mutex);
+	kobject_put(&slot->kobj);
+	mutex_unlock(&pci_slot_mutex);
+}
+EXPORT_SYMBOL_GPL(pci_destroy_slot);
+
+#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
+#include <linux/pci_hotplug.h>
+/**
+ * pci_hp_create_module_link - create symbolic link to hotplug driver module
+ * @pci_slot: struct pci_slot
+ *
+ * Helper function for pci_hotplug_core.c to create symbolic link to
+ * the hotplug driver module.
+ */
+void pci_hp_create_module_link(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	struct kobject *kobj = NULL;
+	int ret;
+
+	if (!slot || !slot->ops)
+		return;
+	kobj = kset_find_obj(module_kset, slot->mod_name);
+	if (!kobj)
+		return;
+	ret = sysfs_create_link(&pci_slot->kobj, kobj, "module");
+	if (ret)
+		dev_err(&pci_slot->bus->dev, "Error creating sysfs link (%d)\n",
+			ret);
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(pci_hp_create_module_link);
+
+/**
+ * pci_hp_remove_module_link - remove symbolic link to the hotplug driver
+ * 	module.
+ * @pci_slot: struct pci_slot
+ *
+ * Helper function for pci_hotplug_core.c to remove symbolic link to
+ * the hotplug driver module.
+ */
+void pci_hp_remove_module_link(struct pci_slot *pci_slot)
+{
+	sysfs_remove_link(&pci_slot->kobj, "module");
+}
+EXPORT_SYMBOL_GPL(pci_hp_remove_module_link);
+#endif
+
+static int pci_slot_init(void)
+{
+	struct kset *pci_bus_kset;
+
+	pci_bus_kset = bus_get_kset(&pci_bus_type);
+	pci_slots_kset = kset_create_and_add("slots", NULL,
+						&pci_bus_kset->kobj);
+	if (!pci_slots_kset) {
+		pr_err("PCI: Slot initialization failure\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+subsys_initcall(pci_slot_init);
diff --git a/drivers/pci/switch/Kconfig b/drivers/pci/switch/Kconfig
new file mode 100644
index 000000000..d370f4ce0
--- /dev/null
+++ b/drivers/pci/switch/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "PCI switch controller drivers"
+	depends on PCI
+
+config PCI_SW_SWITCHTEC
+	tristate "MicroSemi Switchtec PCIe Switch Management Driver"
+	help
+	 Enables support for the management interface for the MicroSemi
+	 Switchtec series of PCIe switches. Supports userspace access
+	 to submit MRPC commands to the switch via /dev/switchtecX
+	 devices. See <file:Documentation/driver-api/switchtec.rst> for more
+	 information.
+
+endmenu
diff --git a/drivers/pci/switch/Makefile b/drivers/pci/switch/Makefile
new file mode 100644
index 000000000..acd56d3b4
--- /dev/null
+++ b/drivers/pci/switch/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PCI_SW_SWITCHTEC) += switchtec.o
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
new file mode 100644
index 000000000..5b921387e
--- /dev/null
+++ b/drivers/pci/switch/switchtec.c
@@ -0,0 +1,1876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microsemi Switchtec(tm) PCIe Management Driver
+ * Copyright (c) 2017, Microsemi Corporation
+ */
+
+#include <linux/switchtec.h>
+#include <linux/switchtec_ioctl.h>
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/nospec.h>
+
+MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCIe Management Driver");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Microsemi Corporation");
+
+static int max_devices = 16;
+module_param(max_devices, int, 0644);
+MODULE_PARM_DESC(max_devices, "max number of switchtec device instances");
+
+static bool use_dma_mrpc = true;
+module_param(use_dma_mrpc, bool, 0644);
+MODULE_PARM_DESC(use_dma_mrpc,
+		 "Enable the use of the DMA MRPC feature");
+
+static int nirqs = 32;
+module_param(nirqs, int, 0644);
+MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)");
+
+static dev_t switchtec_devt;
+static DEFINE_IDA(switchtec_minor_ida);
+
+struct class *switchtec_class;
+EXPORT_SYMBOL_GPL(switchtec_class);
+
+enum mrpc_state {
+	MRPC_IDLE = 0,
+	MRPC_QUEUED,
+	MRPC_RUNNING,
+	MRPC_DONE,
+	MRPC_IO_ERROR,
+};
+
+struct switchtec_user {
+	struct switchtec_dev *stdev;
+
+	enum mrpc_state state;
+
+	wait_queue_head_t cmd_comp;
+	struct kref kref;
+	struct list_head list;
+
+	bool cmd_done;
+	u32 cmd;
+	u32 status;
+	u32 return_code;
+	size_t data_len;
+	size_t read_len;
+	unsigned char data[SWITCHTEC_MRPC_PAYLOAD_SIZE];
+	int event_cnt;
+};
+
+/*
+ * The MMIO reads to the device_id register should always return the device ID
+ * of the device, otherwise the firmware is probably stuck or unreachable
+ * due to a firmware reset which clears PCI state including the BARs and Memory
+ * Space Enable bits.
+ */
+static int is_firmware_running(struct switchtec_dev *stdev)
+{
+	u32 device = ioread32(&stdev->mmio_sys_info->device_id);
+
+	return stdev->pdev->device == device;
+}
+
+static struct switchtec_user *stuser_create(struct switchtec_dev *stdev)
+{
+	struct switchtec_user *stuser;
+
+	stuser = kzalloc(sizeof(*stuser), GFP_KERNEL);
+	if (!stuser)
+		return ERR_PTR(-ENOMEM);
+
+	get_device(&stdev->dev);
+	stuser->stdev = stdev;
+	kref_init(&stuser->kref);
+	INIT_LIST_HEAD(&stuser->list);
+	init_waitqueue_head(&stuser->cmd_comp);
+	stuser->event_cnt = atomic_read(&stdev->event_cnt);
+
+	dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
+
+	return stuser;
+}
+
+static void stuser_free(struct kref *kref)
+{
+	struct switchtec_user *stuser;
+
+	stuser = container_of(kref, struct switchtec_user, kref);
+
+	dev_dbg(&stuser->stdev->dev, "%s: %p\n", __func__, stuser);
+
+	put_device(&stuser->stdev->dev);
+	kfree(stuser);
+}
+
+static void stuser_put(struct switchtec_user *stuser)
+{
+	kref_put(&stuser->kref, stuser_free);
+}
+
+static void stuser_set_state(struct switchtec_user *stuser,
+			     enum mrpc_state state)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	static const char * const state_names[] = {
+		[MRPC_IDLE] = "IDLE",
+		[MRPC_QUEUED] = "QUEUED",
+		[MRPC_RUNNING] = "RUNNING",
+		[MRPC_DONE] = "DONE",
+		[MRPC_IO_ERROR] = "IO_ERROR",
+	};
+
+	stuser->state = state;
+
+	dev_dbg(&stuser->stdev->dev, "stuser state %p -> %s",
+		stuser, state_names[state]);
+}
+
+static void mrpc_complete_cmd(struct switchtec_dev *stdev);
+
+static void flush_wc_buf(struct switchtec_dev *stdev)
+{
+	struct ntb_dbmsg_regs __iomem *mmio_dbmsg;
+
+	/*
+	 * odb (outbound doorbell) register is processed by low latency
+	 * hardware and w/o side effect
+	 */
+	mmio_dbmsg = (void __iomem *)stdev->mmio_ntb +
+		SWITCHTEC_NTB_REG_DBMSG_OFFSET;
+	ioread32(&mmio_dbmsg->odb);
+}
+
+static void mrpc_cmd_submit(struct switchtec_dev *stdev)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	struct switchtec_user *stuser;
+
+	if (stdev->mrpc_busy)
+		return;
+
+	if (list_empty(&stdev->mrpc_queue))
+		return;
+
+	stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user,
+			    list);
+
+	if (stdev->dma_mrpc) {
+		stdev->dma_mrpc->status = SWITCHTEC_MRPC_STATUS_INPROGRESS;
+		memset(stdev->dma_mrpc->data, 0xFF, SWITCHTEC_MRPC_PAYLOAD_SIZE);
+	}
+
+	stuser_set_state(stuser, MRPC_RUNNING);
+	stdev->mrpc_busy = 1;
+	memcpy_toio(&stdev->mmio_mrpc->input_data,
+		    stuser->data, stuser->data_len);
+	flush_wc_buf(stdev);
+	iowrite32(stuser->cmd, &stdev->mmio_mrpc->cmd);
+
+	schedule_delayed_work(&stdev->mrpc_timeout,
+			      msecs_to_jiffies(500));
+}
+
+static int mrpc_queue_cmd(struct switchtec_user *stuser)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	struct switchtec_dev *stdev = stuser->stdev;
+
+	kref_get(&stuser->kref);
+	stuser->read_len = sizeof(stuser->data);
+	stuser_set_state(stuser, MRPC_QUEUED);
+	stuser->cmd_done = false;
+	list_add_tail(&stuser->list, &stdev->mrpc_queue);
+
+	mrpc_cmd_submit(stdev);
+
+	return 0;
+}
+
+static void mrpc_cleanup_cmd(struct switchtec_dev *stdev)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	struct switchtec_user *stuser = list_entry(stdev->mrpc_queue.next,
+						   struct switchtec_user, list);
+
+	stuser->cmd_done = true;
+	wake_up_interruptible(&stuser->cmd_comp);
+	list_del_init(&stuser->list);
+	stuser_put(stuser);
+	stdev->mrpc_busy = 0;
+
+	mrpc_cmd_submit(stdev);
+}
+
+static void mrpc_complete_cmd(struct switchtec_dev *stdev)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	struct switchtec_user *stuser;
+
+	if (list_empty(&stdev->mrpc_queue))
+		return;
+
+	stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user,
+			    list);
+
+	if (stdev->dma_mrpc)
+		stuser->status = stdev->dma_mrpc->status;
+	else
+		stuser->status = ioread32(&stdev->mmio_mrpc->status);
+
+	if (stuser->status == SWITCHTEC_MRPC_STATUS_INPROGRESS)
+		return;
+
+	stuser_set_state(stuser, MRPC_DONE);
+	stuser->return_code = 0;
+
+	if (stuser->status != SWITCHTEC_MRPC_STATUS_DONE &&
+	    stuser->status != SWITCHTEC_MRPC_STATUS_ERROR)
+		goto out;
+
+	if (stdev->dma_mrpc)
+		stuser->return_code = stdev->dma_mrpc->rtn_code;
+	else
+		stuser->return_code = ioread32(&stdev->mmio_mrpc->ret_value);
+	if (stuser->return_code != 0)
+		goto out;
+
+	if (stdev->dma_mrpc)
+		memcpy(stuser->data, &stdev->dma_mrpc->data,
+			      stuser->read_len);
+	else
+		memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
+			      stuser->read_len);
+out:
+	mrpc_cleanup_cmd(stdev);
+}
+
+static void mrpc_event_work(struct work_struct *work)
+{
+	struct switchtec_dev *stdev;
+
+	stdev = container_of(work, struct switchtec_dev, mrpc_work);
+
+	dev_dbg(&stdev->dev, "%s\n", __func__);
+
+	mutex_lock(&stdev->mrpc_mutex);
+	cancel_delayed_work(&stdev->mrpc_timeout);
+	mrpc_complete_cmd(stdev);
+	mutex_unlock(&stdev->mrpc_mutex);
+}
+
+static void mrpc_error_complete_cmd(struct switchtec_dev *stdev)
+{
+	/* requires the mrpc_mutex to already be held when called */
+
+	struct switchtec_user *stuser;
+
+	if (list_empty(&stdev->mrpc_queue))
+		return;
+
+	stuser = list_entry(stdev->mrpc_queue.next,
+			    struct switchtec_user, list);
+
+	stuser_set_state(stuser, MRPC_IO_ERROR);
+
+	mrpc_cleanup_cmd(stdev);
+}
+
+static void mrpc_timeout_work(struct work_struct *work)
+{
+	struct switchtec_dev *stdev;
+	u32 status;
+
+	stdev = container_of(work, struct switchtec_dev, mrpc_timeout.work);
+
+	dev_dbg(&stdev->dev, "%s\n", __func__);
+
+	mutex_lock(&stdev->mrpc_mutex);
+
+	if (!is_firmware_running(stdev)) {
+		mrpc_error_complete_cmd(stdev);
+		goto out;
+	}
+
+	if (stdev->dma_mrpc)
+		status = stdev->dma_mrpc->status;
+	else
+		status = ioread32(&stdev->mmio_mrpc->status);
+	if (status == SWITCHTEC_MRPC_STATUS_INPROGRESS) {
+		schedule_delayed_work(&stdev->mrpc_timeout,
+				      msecs_to_jiffies(500));
+		goto out;
+	}
+
+	mrpc_complete_cmd(stdev);
+out:
+	mutex_unlock(&stdev->mrpc_mutex);
+}
+
+static ssize_t device_version_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	u32 ver;
+
+	ver = ioread32(&stdev->mmio_sys_info->device_version);
+
+	return sysfs_emit(buf, "%x\n", ver);
+}
+static DEVICE_ATTR_RO(device_version);
+
+static ssize_t fw_version_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	u32 ver;
+
+	ver = ioread32(&stdev->mmio_sys_info->firmware_version);
+
+	return sysfs_emit(buf, "%08x\n", ver);
+}
+static DEVICE_ATTR_RO(fw_version);
+
+static ssize_t io_string_show(char *buf, void __iomem *attr, size_t len)
+{
+	int i;
+
+	memcpy_fromio(buf, attr, len);
+	buf[len] = '\n';
+	buf[len + 1] = 0;
+
+	for (i = len - 1; i > 0; i--) {
+		if (buf[i] != ' ')
+			break;
+		buf[i] = '\n';
+		buf[i + 1] = 0;
+	}
+
+	return strlen(buf);
+}
+
+#define DEVICE_ATTR_SYS_INFO_STR(field) \
+static ssize_t field ## _show(struct device *dev, \
+	struct device_attribute *attr, char *buf) \
+{ \
+	struct switchtec_dev *stdev = to_stdev(dev); \
+	struct sys_info_regs __iomem *si = stdev->mmio_sys_info; \
+	if (stdev->gen == SWITCHTEC_GEN3) \
+		return io_string_show(buf, &si->gen3.field, \
+				      sizeof(si->gen3.field)); \
+	else if (stdev->gen >= SWITCHTEC_GEN4) \
+		return io_string_show(buf, &si->gen4.field, \
+				      sizeof(si->gen4.field)); \
+	else \
+		return -EOPNOTSUPP; \
+} \
+\
+static DEVICE_ATTR_RO(field)
+
+DEVICE_ATTR_SYS_INFO_STR(vendor_id);
+DEVICE_ATTR_SYS_INFO_STR(product_id);
+DEVICE_ATTR_SYS_INFO_STR(product_revision);
+
+static ssize_t component_vendor_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	struct sys_info_regs __iomem *si = stdev->mmio_sys_info;
+
+	/* component_vendor field not supported after gen3 */
+	if (stdev->gen != SWITCHTEC_GEN3)
+		return sysfs_emit(buf, "none\n");
+
+	return io_string_show(buf, &si->gen3.component_vendor,
+			      sizeof(si->gen3.component_vendor));
+}
+static DEVICE_ATTR_RO(component_vendor);
+
+static ssize_t component_id_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	int id = ioread16(&stdev->mmio_sys_info->gen3.component_id);
+
+	/* component_id field not supported after gen3 */
+	if (stdev->gen != SWITCHTEC_GEN3)
+		return sysfs_emit(buf, "none\n");
+
+	return sysfs_emit(buf, "PM%04X\n", id);
+}
+static DEVICE_ATTR_RO(component_id);
+
+static ssize_t component_revision_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	int rev = ioread8(&stdev->mmio_sys_info->gen3.component_revision);
+
+	/* component_revision field not supported after gen3 */
+	if (stdev->gen != SWITCHTEC_GEN3)
+		return sysfs_emit(buf, "255\n");
+
+	return sysfs_emit(buf, "%d\n", rev);
+}
+static DEVICE_ATTR_RO(component_revision);
+
+static ssize_t partition_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+
+	return sysfs_emit(buf, "%d\n", stdev->partition);
+}
+static DEVICE_ATTR_RO(partition);
+
+static ssize_t partition_count_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+
+	return sysfs_emit(buf, "%d\n", stdev->partition_count);
+}
+static DEVICE_ATTR_RO(partition_count);
+
+static struct attribute *switchtec_device_attrs[] = {
+	&dev_attr_device_version.attr,
+	&dev_attr_fw_version.attr,
+	&dev_attr_vendor_id.attr,
+	&dev_attr_product_id.attr,
+	&dev_attr_product_revision.attr,
+	&dev_attr_component_vendor.attr,
+	&dev_attr_component_id.attr,
+	&dev_attr_component_revision.attr,
+	&dev_attr_partition.attr,
+	&dev_attr_partition_count.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(switchtec_device);
+
+static int switchtec_dev_open(struct inode *inode, struct file *filp)
+{
+	struct switchtec_dev *stdev;
+	struct switchtec_user *stuser;
+
+	stdev = container_of(inode->i_cdev, struct switchtec_dev, cdev);
+
+	stuser = stuser_create(stdev);
+	if (IS_ERR(stuser))
+		return PTR_ERR(stuser);
+
+	filp->private_data = stuser;
+	stream_open(inode, filp);
+
+	dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
+
+	return 0;
+}
+
+static int switchtec_dev_release(struct inode *inode, struct file *filp)
+{
+	struct switchtec_user *stuser = filp->private_data;
+
+	stuser_put(stuser);
+
+	return 0;
+}
+
+static int lock_mutex_and_test_alive(struct switchtec_dev *stdev)
+{
+	if (mutex_lock_interruptible(&stdev->mrpc_mutex))
+		return -EINTR;
+
+	if (!stdev->alive) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static ssize_t switchtec_dev_write(struct file *filp, const char __user *data,
+				   size_t size, loff_t *off)
+{
+	struct switchtec_user *stuser = filp->private_data;
+	struct switchtec_dev *stdev = stuser->stdev;
+	int rc;
+
+	if (size < sizeof(stuser->cmd) ||
+	    size > sizeof(stuser->cmd) + sizeof(stuser->data))
+		return -EINVAL;
+
+	stuser->data_len = size - sizeof(stuser->cmd);
+
+	rc = lock_mutex_and_test_alive(stdev);
+	if (rc)
+		return rc;
+
+	if (stuser->state != MRPC_IDLE) {
+		rc = -EBADE;
+		goto out;
+	}
+
+	rc = copy_from_user(&stuser->cmd, data, sizeof(stuser->cmd));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+	if (((MRPC_CMD_ID(stuser->cmd) == MRPC_GAS_WRITE) ||
+	     (MRPC_CMD_ID(stuser->cmd) == MRPC_GAS_READ)) &&
+	    !capable(CAP_SYS_ADMIN)) {
+		rc = -EPERM;
+		goto out;
+	}
+
+	data += sizeof(stuser->cmd);
+	rc = copy_from_user(&stuser->data, data, size - sizeof(stuser->cmd));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = mrpc_queue_cmd(stuser);
+
+out:
+	mutex_unlock(&stdev->mrpc_mutex);
+
+	if (rc)
+		return rc;
+
+	return size;
+}
+
+static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
+				  size_t size, loff_t *off)
+{
+	struct switchtec_user *stuser = filp->private_data;
+	struct switchtec_dev *stdev = stuser->stdev;
+	int rc;
+
+	if (size < sizeof(stuser->cmd) ||
+	    size > sizeof(stuser->cmd) + sizeof(stuser->data))
+		return -EINVAL;
+
+	rc = lock_mutex_and_test_alive(stdev);
+	if (rc)
+		return rc;
+
+	if (stuser->state == MRPC_IDLE) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -EBADE;
+	}
+
+	stuser->read_len = size - sizeof(stuser->return_code);
+
+	mutex_unlock(&stdev->mrpc_mutex);
+
+	if (filp->f_flags & O_NONBLOCK) {
+		if (!stuser->cmd_done)
+			return -EAGAIN;
+	} else {
+		rc = wait_event_interruptible(stuser->cmd_comp,
+					      stuser->cmd_done);
+		if (rc < 0)
+			return rc;
+	}
+
+	rc = lock_mutex_and_test_alive(stdev);
+	if (rc)
+		return rc;
+
+	if (stuser->state == MRPC_IO_ERROR) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -EIO;
+	}
+
+	if (stuser->state != MRPC_DONE) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -EBADE;
+	}
+
+	rc = copy_to_user(data, &stuser->return_code,
+			  sizeof(stuser->return_code));
+	if (rc) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -EFAULT;
+	}
+
+	data += sizeof(stuser->return_code);
+	rc = copy_to_user(data, &stuser->data,
+			  size - sizeof(stuser->return_code));
+	if (rc) {
+		mutex_unlock(&stdev->mrpc_mutex);
+		return -EFAULT;
+	}
+
+	stuser_set_state(stuser, MRPC_IDLE);
+
+	mutex_unlock(&stdev->mrpc_mutex);
+
+	if (stuser->status == SWITCHTEC_MRPC_STATUS_DONE ||
+	    stuser->status == SWITCHTEC_MRPC_STATUS_ERROR)
+		return size;
+	else if (stuser->status == SWITCHTEC_MRPC_STATUS_INTERRUPTED)
+		return -ENXIO;
+	else
+		return -EBADMSG;
+}
+
+static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
+{
+	struct switchtec_user *stuser = filp->private_data;
+	struct switchtec_dev *stdev = stuser->stdev;
+	__poll_t ret = 0;
+
+	poll_wait(filp, &stuser->cmd_comp, wait);
+	poll_wait(filp, &stdev->event_wq, wait);
+
+	if (lock_mutex_and_test_alive(stdev))
+		return EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLERR | EPOLLHUP;
+
+	mutex_unlock(&stdev->mrpc_mutex);
+
+	if (stuser->cmd_done)
+		ret |= EPOLLIN | EPOLLRDNORM;
+
+	if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
+		ret |= EPOLLPRI | EPOLLRDBAND;
+
+	return ret;
+}
+
+static int ioctl_flash_info(struct switchtec_dev *stdev,
+			    struct switchtec_ioctl_flash_info __user *uinfo)
+{
+	struct switchtec_ioctl_flash_info info = {0};
+	struct flash_info_regs __iomem *fi = stdev->mmio_flash_info;
+
+	if (stdev->gen == SWITCHTEC_GEN3) {
+		info.flash_length = ioread32(&fi->gen3.flash_length);
+		info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN3;
+	} else if (stdev->gen >= SWITCHTEC_GEN4) {
+		info.flash_length = ioread32(&fi->gen4.flash_length);
+		info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN4;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (copy_to_user(uinfo, &info, sizeof(info)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void set_fw_info_part(struct switchtec_ioctl_flash_part_info *info,
+			     struct partition_info __iomem *pi)
+{
+	info->address = ioread32(&pi->address);
+	info->length = ioread32(&pi->length);
+}
+
+static int flash_part_info_gen3(struct switchtec_dev *stdev,
+		struct switchtec_ioctl_flash_part_info *info)
+{
+	struct flash_info_regs_gen3 __iomem *fi =
+		&stdev->mmio_flash_info->gen3;
+	struct sys_info_regs_gen3 __iomem *si = &stdev->mmio_sys_info->gen3;
+	u32 active_addr = -1;
+
+	switch (info->flash_partition) {
+	case SWITCHTEC_IOCTL_PART_CFG0:
+		active_addr = ioread32(&fi->active_cfg);
+		set_fw_info_part(info, &fi->cfg0);
+		if (ioread16(&si->cfg_running) == SWITCHTEC_GEN3_CFG0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_CFG1:
+		active_addr = ioread32(&fi->active_cfg);
+		set_fw_info_part(info, &fi->cfg1);
+		if (ioread16(&si->cfg_running) == SWITCHTEC_GEN3_CFG1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_IMG0:
+		active_addr = ioread32(&fi->active_img);
+		set_fw_info_part(info, &fi->img0);
+		if (ioread16(&si->img_running) == SWITCHTEC_GEN3_IMG0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_IMG1:
+		active_addr = ioread32(&fi->active_img);
+		set_fw_info_part(info, &fi->img1);
+		if (ioread16(&si->img_running) == SWITCHTEC_GEN3_IMG1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_NVLOG:
+		set_fw_info_part(info, &fi->nvlog);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR0:
+		set_fw_info_part(info, &fi->vendor[0]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR1:
+		set_fw_info_part(info, &fi->vendor[1]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR2:
+		set_fw_info_part(info, &fi->vendor[2]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR3:
+		set_fw_info_part(info, &fi->vendor[3]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR4:
+		set_fw_info_part(info, &fi->vendor[4]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR5:
+		set_fw_info_part(info, &fi->vendor[5]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR6:
+		set_fw_info_part(info, &fi->vendor[6]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR7:
+		set_fw_info_part(info, &fi->vendor[7]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (info->address == active_addr)
+		info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+
+	return 0;
+}
+
+static int flash_part_info_gen4(struct switchtec_dev *stdev,
+		struct switchtec_ioctl_flash_part_info *info)
+{
+	struct flash_info_regs_gen4 __iomem *fi = &stdev->mmio_flash_info->gen4;
+	struct sys_info_regs_gen4 __iomem *si = &stdev->mmio_sys_info->gen4;
+	struct active_partition_info_gen4 __iomem *af = &fi->active_flag;
+
+	switch (info->flash_partition) {
+	case SWITCHTEC_IOCTL_PART_MAP_0:
+		set_fw_info_part(info, &fi->map0);
+		break;
+	case SWITCHTEC_IOCTL_PART_MAP_1:
+		set_fw_info_part(info, &fi->map1);
+		break;
+	case SWITCHTEC_IOCTL_PART_KEY_0:
+		set_fw_info_part(info, &fi->key0);
+		if (ioread8(&af->key) == SWITCHTEC_GEN4_KEY0_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->key_running) == SWITCHTEC_GEN4_KEY0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_KEY_1:
+		set_fw_info_part(info, &fi->key1);
+		if (ioread8(&af->key) == SWITCHTEC_GEN4_KEY1_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->key_running) == SWITCHTEC_GEN4_KEY1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_BL2_0:
+		set_fw_info_part(info, &fi->bl2_0);
+		if (ioread8(&af->bl2) == SWITCHTEC_GEN4_BL2_0_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->bl2_running) == SWITCHTEC_GEN4_BL2_0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_BL2_1:
+		set_fw_info_part(info, &fi->bl2_1);
+		if (ioread8(&af->bl2) == SWITCHTEC_GEN4_BL2_1_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->bl2_running) == SWITCHTEC_GEN4_BL2_1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_CFG0:
+		set_fw_info_part(info, &fi->cfg0);
+		if (ioread8(&af->cfg) == SWITCHTEC_GEN4_CFG0_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->cfg_running) == SWITCHTEC_GEN4_CFG0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_CFG1:
+		set_fw_info_part(info, &fi->cfg1);
+		if (ioread8(&af->cfg) == SWITCHTEC_GEN4_CFG1_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->cfg_running) == SWITCHTEC_GEN4_CFG1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_IMG0:
+		set_fw_info_part(info, &fi->img0);
+		if (ioread8(&af->img) == SWITCHTEC_GEN4_IMG0_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->img_running) == SWITCHTEC_GEN4_IMG0_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_IMG1:
+		set_fw_info_part(info, &fi->img1);
+		if (ioread8(&af->img) == SWITCHTEC_GEN4_IMG1_ACTIVE)
+			info->active |= SWITCHTEC_IOCTL_PART_ACTIVE;
+		if (ioread16(&si->img_running) == SWITCHTEC_GEN4_IMG1_RUNNING)
+			info->active |= SWITCHTEC_IOCTL_PART_RUNNING;
+		break;
+	case SWITCHTEC_IOCTL_PART_NVLOG:
+		set_fw_info_part(info, &fi->nvlog);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR0:
+		set_fw_info_part(info, &fi->vendor[0]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR1:
+		set_fw_info_part(info, &fi->vendor[1]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR2:
+		set_fw_info_part(info, &fi->vendor[2]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR3:
+		set_fw_info_part(info, &fi->vendor[3]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR4:
+		set_fw_info_part(info, &fi->vendor[4]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR5:
+		set_fw_info_part(info, &fi->vendor[5]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR6:
+		set_fw_info_part(info, &fi->vendor[6]);
+		break;
+	case SWITCHTEC_IOCTL_PART_VENDOR7:
+		set_fw_info_part(info, &fi->vendor[7]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ioctl_flash_part_info(struct switchtec_dev *stdev,
+		struct switchtec_ioctl_flash_part_info __user *uinfo)
+{
+	int ret;
+	struct switchtec_ioctl_flash_part_info info = {0};
+
+	if (copy_from_user(&info, uinfo, sizeof(info)))
+		return -EFAULT;
+
+	if (stdev->gen == SWITCHTEC_GEN3) {
+		ret = flash_part_info_gen3(stdev, &info);
+		if (ret)
+			return ret;
+	} else if (stdev->gen >= SWITCHTEC_GEN4) {
+		ret = flash_part_info_gen4(stdev, &info);
+		if (ret)
+			return ret;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (copy_to_user(uinfo, &info, sizeof(info)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ioctl_event_summary(struct switchtec_dev *stdev,
+	struct switchtec_user *stuser,
+	struct switchtec_ioctl_event_summary __user *usum,
+	size_t size)
+{
+	struct switchtec_ioctl_event_summary *s;
+	int i;
+	u32 reg;
+	int ret = 0;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	s->global = ioread32(&stdev->mmio_sw_event->global_summary);
+	s->part_bitmap = ioread64(&stdev->mmio_sw_event->part_event_bitmap);
+	s->local_part = ioread32(&stdev->mmio_part_cfg->part_event_summary);
+
+	for (i = 0; i < stdev->partition_count; i++) {
+		reg = ioread32(&stdev->mmio_part_cfg_all[i].part_event_summary);
+		s->part[i] = reg;
+	}
+
+	for (i = 0; i < stdev->pff_csr_count; i++) {
+		reg = ioread32(&stdev->mmio_pff_csr[i].pff_event_summary);
+		s->pff[i] = reg;
+	}
+
+	if (copy_to_user(usum, s, size)) {
+		ret = -EFAULT;
+		goto error_case;
+	}
+
+	stuser->event_cnt = atomic_read(&stdev->event_cnt);
+
+error_case:
+	kfree(s);
+	return ret;
+}
+
+static u32 __iomem *global_ev_reg(struct switchtec_dev *stdev,
+				  size_t offset, int index)
+{
+	return (void __iomem *)stdev->mmio_sw_event + offset;
+}
+
+static u32 __iomem *part_ev_reg(struct switchtec_dev *stdev,
+				size_t offset, int index)
+{
+	return (void __iomem *)&stdev->mmio_part_cfg_all[index] + offset;
+}
+
+static u32 __iomem *pff_ev_reg(struct switchtec_dev *stdev,
+			       size_t offset, int index)
+{
+	return (void __iomem *)&stdev->mmio_pff_csr[index] + offset;
+}
+
+#define EV_GLB(i, r)[i] = {offsetof(struct sw_event_regs, r), global_ev_reg}
+#define EV_PAR(i, r)[i] = {offsetof(struct part_cfg_regs, r), part_ev_reg}
+#define EV_PFF(i, r)[i] = {offsetof(struct pff_csr_regs, r), pff_ev_reg}
+
+static const struct event_reg {
+	size_t offset;
+	u32 __iomem *(*map_reg)(struct switchtec_dev *stdev,
+				size_t offset, int index);
+} event_regs[] = {
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_STACK_ERROR, stack_error_event_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_PPU_ERROR, ppu_error_event_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_ISP_ERROR, isp_error_event_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_SYS_RESET, sys_reset_event_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_EXC, fw_exception_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_NMI, fw_nmi_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_NON_FATAL, fw_non_fatal_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_FW_FATAL, fw_fatal_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP, twi_mrpc_comp_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP_ASYNC,
+	       twi_mrpc_comp_async_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP, cli_mrpc_comp_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP_ASYNC,
+	       cli_mrpc_comp_async_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_GPIO_INT, gpio_interrupt_hdr),
+	EV_GLB(SWITCHTEC_IOCTL_EVENT_GFMS, gfms_event_hdr),
+	EV_PAR(SWITCHTEC_IOCTL_EVENT_PART_RESET, part_reset_hdr),
+	EV_PAR(SWITCHTEC_IOCTL_EVENT_MRPC_COMP, mrpc_comp_hdr),
+	EV_PAR(SWITCHTEC_IOCTL_EVENT_MRPC_COMP_ASYNC, mrpc_comp_async_hdr),
+	EV_PAR(SWITCHTEC_IOCTL_EVENT_DYN_PART_BIND_COMP, dyn_binding_hdr),
+	EV_PAR(SWITCHTEC_IOCTL_EVENT_INTERCOMM_REQ_NOTIFY,
+	       intercomm_notify_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_AER_IN_P2P, aer_in_p2p_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_AER_IN_VEP, aer_in_vep_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_DPC, dpc_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_CTS, cts_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_UEC, uec_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_HOTPLUG, hotplug_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_IER, ier_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_THRESH, threshold_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_POWER_MGMT, power_mgmt_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_TLP_THROTTLING, tlp_throttling_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_FORCE_SPEED, force_speed_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_CREDIT_TIMEOUT, credit_timeout_hdr),
+	EV_PFF(SWITCHTEC_IOCTL_EVENT_LINK_STATE, link_state_hdr),
+};
+
+static u32 __iomem *event_hdr_addr(struct switchtec_dev *stdev,
+				   int event_id, int index)
+{
+	size_t off;
+
+	if (event_id < 0 || event_id >= SWITCHTEC_IOCTL_MAX_EVENTS)
+		return (u32 __iomem *)ERR_PTR(-EINVAL);
+
+	off = event_regs[event_id].offset;
+
+	if (event_regs[event_id].map_reg == part_ev_reg) {
+		if (index == SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX)
+			index = stdev->partition;
+		else if (index < 0 || index >= stdev->partition_count)
+			return (u32 __iomem *)ERR_PTR(-EINVAL);
+	} else if (event_regs[event_id].map_reg == pff_ev_reg) {
+		if (index < 0 || index >= stdev->pff_csr_count)
+			return (u32 __iomem *)ERR_PTR(-EINVAL);
+	}
+
+	return event_regs[event_id].map_reg(stdev, off, index);
+}
+
+static int event_ctl(struct switchtec_dev *stdev,
+		     struct switchtec_ioctl_event_ctl *ctl)
+{
+	int i;
+	u32 __iomem *reg;
+	u32 hdr;
+
+	reg = event_hdr_addr(stdev, ctl->event_id, ctl->index);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	hdr = ioread32(reg);
+	if (hdr & SWITCHTEC_EVENT_NOT_SUPP)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < ARRAY_SIZE(ctl->data); i++)
+		ctl->data[i] = ioread32(&reg[i + 1]);
+
+	ctl->occurred = hdr & SWITCHTEC_EVENT_OCCURRED;
+	ctl->count = (hdr >> 5) & 0xFF;
+
+	if (!(ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_CLEAR))
+		hdr &= ~SWITCHTEC_EVENT_CLEAR;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL)
+		hdr |= SWITCHTEC_EVENT_EN_IRQ;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_POLL)
+		hdr &= ~SWITCHTEC_EVENT_EN_IRQ;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG)
+		hdr |= SWITCHTEC_EVENT_EN_LOG;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_LOG)
+		hdr &= ~SWITCHTEC_EVENT_EN_LOG;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI)
+		hdr |= SWITCHTEC_EVENT_EN_CLI;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_CLI)
+		hdr &= ~SWITCHTEC_EVENT_EN_CLI;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL)
+		hdr |= SWITCHTEC_EVENT_FATAL;
+	if (ctl->flags & SWITCHTEC_IOCTL_EVENT_FLAG_DIS_FATAL)
+		hdr &= ~SWITCHTEC_EVENT_FATAL;
+
+	if (ctl->flags)
+		iowrite32(hdr, reg);
+
+	ctl->flags = 0;
+	if (hdr & SWITCHTEC_EVENT_EN_IRQ)
+		ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL;
+	if (hdr & SWITCHTEC_EVENT_EN_LOG)
+		ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG;
+	if (hdr & SWITCHTEC_EVENT_EN_CLI)
+		ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI;
+	if (hdr & SWITCHTEC_EVENT_FATAL)
+		ctl->flags |= SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL;
+
+	return 0;
+}
+
+static int ioctl_event_ctl(struct switchtec_dev *stdev,
+	struct switchtec_ioctl_event_ctl __user *uctl)
+{
+	int ret;
+	int nr_idxs;
+	unsigned int event_flags;
+	struct switchtec_ioctl_event_ctl ctl;
+
+	if (copy_from_user(&ctl, uctl, sizeof(ctl)))
+		return -EFAULT;
+
+	if (ctl.event_id >= SWITCHTEC_IOCTL_MAX_EVENTS)
+		return -EINVAL;
+
+	if (ctl.flags & SWITCHTEC_IOCTL_EVENT_FLAG_UNUSED)
+		return -EINVAL;
+
+	if (ctl.index == SWITCHTEC_IOCTL_EVENT_IDX_ALL) {
+		if (event_regs[ctl.event_id].map_reg == global_ev_reg)
+			nr_idxs = 1;
+		else if (event_regs[ctl.event_id].map_reg == part_ev_reg)
+			nr_idxs = stdev->partition_count;
+		else if (event_regs[ctl.event_id].map_reg == pff_ev_reg)
+			nr_idxs = stdev->pff_csr_count;
+		else
+			return -EINVAL;
+
+		event_flags = ctl.flags;
+		for (ctl.index = 0; ctl.index < nr_idxs; ctl.index++) {
+			ctl.flags = event_flags;
+			ret = event_ctl(stdev, &ctl);
+			if (ret < 0 && ret != -EOPNOTSUPP)
+				return ret;
+		}
+	} else {
+		ret = event_ctl(stdev, &ctl);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (copy_to_user(uctl, &ctl, sizeof(ctl)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ioctl_pff_to_port(struct switchtec_dev *stdev,
+			     struct switchtec_ioctl_pff_port __user *up)
+{
+	int i, part;
+	u32 reg;
+	struct part_cfg_regs __iomem *pcfg;
+	struct switchtec_ioctl_pff_port p;
+
+	if (copy_from_user(&p, up, sizeof(p)))
+		return -EFAULT;
+
+	p.port = -1;
+	for (part = 0; part < stdev->partition_count; part++) {
+		pcfg = &stdev->mmio_part_cfg_all[part];
+		p.partition = part;
+
+		reg = ioread32(&pcfg->usp_pff_inst_id);
+		if (reg == p.pff) {
+			p.port = 0;
+			break;
+		}
+
+		reg = ioread32(&pcfg->vep_pff_inst_id) & 0xFF;
+		if (reg == p.pff) {
+			p.port = SWITCHTEC_IOCTL_PFF_VEP;
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pcfg->dsp_pff_inst_id); i++) {
+			reg = ioread32(&pcfg->dsp_pff_inst_id[i]);
+			if (reg != p.pff)
+				continue;
+
+			p.port = i + 1;
+			break;
+		}
+
+		if (p.port != -1)
+			break;
+	}
+
+	if (copy_to_user(up, &p, sizeof(p)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ioctl_port_to_pff(struct switchtec_dev *stdev,
+			     struct switchtec_ioctl_pff_port __user *up)
+{
+	struct switchtec_ioctl_pff_port p;
+	struct part_cfg_regs __iomem *pcfg;
+
+	if (copy_from_user(&p, up, sizeof(p)))
+		return -EFAULT;
+
+	if (p.partition == SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX)
+		pcfg = stdev->mmio_part_cfg;
+	else if (p.partition < stdev->partition_count)
+		pcfg = &stdev->mmio_part_cfg_all[p.partition];
+	else
+		return -EINVAL;
+
+	switch (p.port) {
+	case 0:
+		p.pff = ioread32(&pcfg->usp_pff_inst_id);
+		break;
+	case SWITCHTEC_IOCTL_PFF_VEP:
+		p.pff = ioread32(&pcfg->vep_pff_inst_id) & 0xFF;
+		break;
+	default:
+		if (p.port > ARRAY_SIZE(pcfg->dsp_pff_inst_id))
+			return -EINVAL;
+		p.port = array_index_nospec(p.port,
+					ARRAY_SIZE(pcfg->dsp_pff_inst_id) + 1);
+		p.pff = ioread32(&pcfg->dsp_pff_inst_id[p.port - 1]);
+		break;
+	}
+
+	if (copy_to_user(up, &p, sizeof(p)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long switchtec_dev_ioctl(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	struct switchtec_user *stuser = filp->private_data;
+	struct switchtec_dev *stdev = stuser->stdev;
+	int rc;
+	void __user *argp = (void __user *)arg;
+
+	rc = lock_mutex_and_test_alive(stdev);
+	if (rc)
+		return rc;
+
+	switch (cmd) {
+	case SWITCHTEC_IOCTL_FLASH_INFO:
+		rc = ioctl_flash_info(stdev, argp);
+		break;
+	case SWITCHTEC_IOCTL_FLASH_PART_INFO:
+		rc = ioctl_flash_part_info(stdev, argp);
+		break;
+	case SWITCHTEC_IOCTL_EVENT_SUMMARY_LEGACY:
+		rc = ioctl_event_summary(stdev, stuser, argp,
+					 sizeof(struct switchtec_ioctl_event_summary_legacy));
+		break;
+	case SWITCHTEC_IOCTL_EVENT_CTL:
+		rc = ioctl_event_ctl(stdev, argp);
+		break;
+	case SWITCHTEC_IOCTL_PFF_TO_PORT:
+		rc = ioctl_pff_to_port(stdev, argp);
+		break;
+	case SWITCHTEC_IOCTL_PORT_TO_PFF:
+		rc = ioctl_port_to_pff(stdev, argp);
+		break;
+	case SWITCHTEC_IOCTL_EVENT_SUMMARY:
+		rc = ioctl_event_summary(stdev, stuser, argp,
+					 sizeof(struct switchtec_ioctl_event_summary));
+		break;
+	default:
+		rc = -ENOTTY;
+		break;
+	}
+
+	mutex_unlock(&stdev->mrpc_mutex);
+	return rc;
+}
+
+static const struct file_operations switchtec_fops = {
+	.owner = THIS_MODULE,
+	.open = switchtec_dev_open,
+	.release = switchtec_dev_release,
+	.write = switchtec_dev_write,
+	.read = switchtec_dev_read,
+	.poll = switchtec_dev_poll,
+	.unlocked_ioctl = switchtec_dev_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
+};
+
+static void link_event_work(struct work_struct *work)
+{
+	struct switchtec_dev *stdev;
+
+	stdev = container_of(work, struct switchtec_dev, link_event_work);
+
+	if (stdev->link_notifier)
+		stdev->link_notifier(stdev);
+}
+
+static void check_link_state_events(struct switchtec_dev *stdev)
+{
+	int idx;
+	u32 reg;
+	int count;
+	int occurred = 0;
+
+	for (idx = 0; idx < stdev->pff_csr_count; idx++) {
+		reg = ioread32(&stdev->mmio_pff_csr[idx].link_state_hdr);
+		dev_dbg(&stdev->dev, "link_state: %d->%08x\n", idx, reg);
+		count = (reg >> 5) & 0xFF;
+
+		if (count != stdev->link_event_count[idx]) {
+			occurred = 1;
+			stdev->link_event_count[idx] = count;
+		}
+	}
+
+	if (occurred)
+		schedule_work(&stdev->link_event_work);
+}
+
+static void enable_link_state_events(struct switchtec_dev *stdev)
+{
+	int idx;
+
+	for (idx = 0; idx < stdev->pff_csr_count; idx++) {
+		iowrite32(SWITCHTEC_EVENT_CLEAR |
+			  SWITCHTEC_EVENT_EN_IRQ,
+			  &stdev->mmio_pff_csr[idx].link_state_hdr);
+	}
+}
+
+static void enable_dma_mrpc(struct switchtec_dev *stdev)
+{
+	writeq(stdev->dma_mrpc_dma_addr, &stdev->mmio_mrpc->dma_addr);
+	flush_wc_buf(stdev);
+	iowrite32(SWITCHTEC_DMA_MRPC_EN, &stdev->mmio_mrpc->dma_en);
+}
+
+static void stdev_release(struct device *dev)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+
+	if (stdev->dma_mrpc) {
+		iowrite32(0, &stdev->mmio_mrpc->dma_en);
+		flush_wc_buf(stdev);
+		writeq(0, &stdev->mmio_mrpc->dma_addr);
+		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
+				stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
+	}
+	kfree(stdev);
+}
+
+static void stdev_kill(struct switchtec_dev *stdev)
+{
+	struct switchtec_user *stuser, *tmpuser;
+
+	pci_clear_master(stdev->pdev);
+
+	cancel_delayed_work_sync(&stdev->mrpc_timeout);
+
+	/* Mark the hardware as unavailable and complete all completions */
+	mutex_lock(&stdev->mrpc_mutex);
+	stdev->alive = false;
+
+	/* Wake up and kill any users waiting on an MRPC request */
+	list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
+		stuser->cmd_done = true;
+		wake_up_interruptible(&stuser->cmd_comp);
+		list_del_init(&stuser->list);
+		stuser_put(stuser);
+	}
+
+	mutex_unlock(&stdev->mrpc_mutex);
+
+	/* Wake up any users waiting on event_wq */
+	wake_up_interruptible(&stdev->event_wq);
+}
+
+static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
+{
+	struct switchtec_dev *stdev;
+	int minor;
+	struct device *dev;
+	struct cdev *cdev;
+	int rc;
+
+	stdev = kzalloc_node(sizeof(*stdev), GFP_KERNEL,
+			     dev_to_node(&pdev->dev));
+	if (!stdev)
+		return ERR_PTR(-ENOMEM);
+
+	stdev->alive = true;
+	stdev->pdev = pdev;
+	INIT_LIST_HEAD(&stdev->mrpc_queue);
+	mutex_init(&stdev->mrpc_mutex);
+	stdev->mrpc_busy = 0;
+	INIT_WORK(&stdev->mrpc_work, mrpc_event_work);
+	INIT_DELAYED_WORK(&stdev->mrpc_timeout, mrpc_timeout_work);
+	INIT_WORK(&stdev->link_event_work, link_event_work);
+	init_waitqueue_head(&stdev->event_wq);
+	atomic_set(&stdev->event_cnt, 0);
+
+	dev = &stdev->dev;
+	device_initialize(dev);
+	dev->class = switchtec_class;
+	dev->parent = &pdev->dev;
+	dev->groups = switchtec_device_groups;
+	dev->release = stdev_release;
+
+	minor = ida_alloc(&switchtec_minor_ida, GFP_KERNEL);
+	if (minor < 0) {
+		rc = minor;
+		goto err_put;
+	}
+
+	dev->devt = MKDEV(MAJOR(switchtec_devt), minor);
+	dev_set_name(dev, "switchtec%d", minor);
+
+	cdev = &stdev->cdev;
+	cdev_init(cdev, &switchtec_fops);
+	cdev->owner = THIS_MODULE;
+
+	return stdev;
+
+err_put:
+	put_device(&stdev->dev);
+	return ERR_PTR(rc);
+}
+
+static int mask_event(struct switchtec_dev *stdev, int eid, int idx)
+{
+	size_t off = event_regs[eid].offset;
+	u32 __iomem *hdr_reg;
+	u32 hdr;
+
+	hdr_reg = event_regs[eid].map_reg(stdev, off, idx);
+	hdr = ioread32(hdr_reg);
+
+	if (hdr & SWITCHTEC_EVENT_NOT_SUPP)
+		return 0;
+
+	if (!(hdr & SWITCHTEC_EVENT_OCCURRED && hdr & SWITCHTEC_EVENT_EN_IRQ))
+		return 0;
+
+	dev_dbg(&stdev->dev, "%s: %d %d %x\n", __func__, eid, idx, hdr);
+	hdr &= ~(SWITCHTEC_EVENT_EN_IRQ | SWITCHTEC_EVENT_OCCURRED);
+	iowrite32(hdr, hdr_reg);
+
+	return 1;
+}
+
+static int mask_all_events(struct switchtec_dev *stdev, int eid)
+{
+	int idx;
+	int count = 0;
+
+	if (event_regs[eid].map_reg == part_ev_reg) {
+		for (idx = 0; idx < stdev->partition_count; idx++)
+			count += mask_event(stdev, eid, idx);
+	} else if (event_regs[eid].map_reg == pff_ev_reg) {
+		for (idx = 0; idx < stdev->pff_csr_count; idx++) {
+			if (!stdev->pff_local[idx])
+				continue;
+
+			count += mask_event(stdev, eid, idx);
+		}
+	} else {
+		count += mask_event(stdev, eid, 0);
+	}
+
+	return count;
+}
+
+static irqreturn_t switchtec_event_isr(int irq, void *dev)
+{
+	struct switchtec_dev *stdev = dev;
+	u32 reg;
+	irqreturn_t ret = IRQ_NONE;
+	int eid, event_count = 0;
+
+	reg = ioread32(&stdev->mmio_part_cfg->mrpc_comp_hdr);
+	if (reg & SWITCHTEC_EVENT_OCCURRED) {
+		dev_dbg(&stdev->dev, "%s: mrpc comp\n", __func__);
+		ret = IRQ_HANDLED;
+		schedule_work(&stdev->mrpc_work);
+		iowrite32(reg, &stdev->mmio_part_cfg->mrpc_comp_hdr);
+	}
+
+	check_link_state_events(stdev);
+
+	for (eid = 0; eid < SWITCHTEC_IOCTL_MAX_EVENTS; eid++) {
+		if (eid == SWITCHTEC_IOCTL_EVENT_LINK_STATE ||
+		    eid == SWITCHTEC_IOCTL_EVENT_MRPC_COMP)
+			continue;
+
+		event_count += mask_all_events(stdev, eid);
+	}
+
+	if (event_count) {
+		atomic_inc(&stdev->event_cnt);
+		wake_up_interruptible(&stdev->event_wq);
+		dev_dbg(&stdev->dev, "%s: %d events\n", __func__,
+			event_count);
+		return IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+
+static irqreturn_t switchtec_dma_mrpc_isr(int irq, void *dev)
+{
+	struct switchtec_dev *stdev = dev;
+
+	iowrite32(SWITCHTEC_EVENT_CLEAR |
+		  SWITCHTEC_EVENT_EN_IRQ,
+		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
+	schedule_work(&stdev->mrpc_work);
+
+	return IRQ_HANDLED;
+}
+
+static int switchtec_init_isr(struct switchtec_dev *stdev)
+{
+	int nvecs;
+	int event_irq;
+	int dma_mrpc_irq;
+	int rc;
+
+	if (nirqs < 4)
+		nirqs = 4;
+
+	nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs,
+				      PCI_IRQ_MSIX | PCI_IRQ_MSI |
+				      PCI_IRQ_VIRTUAL);
+	if (nvecs < 0)
+		return nvecs;
+
+	event_irq = ioread16(&stdev->mmio_part_cfg->vep_vector_number);
+	if (event_irq < 0 || event_irq >= nvecs)
+		return -EFAULT;
+
+	event_irq = pci_irq_vector(stdev->pdev, event_irq);
+	if (event_irq < 0)
+		return event_irq;
+
+	rc = devm_request_irq(&stdev->pdev->dev, event_irq,
+				switchtec_event_isr, 0,
+				KBUILD_MODNAME, stdev);
+
+	if (rc)
+		return rc;
+
+	if (!stdev->dma_mrpc)
+		return rc;
+
+	dma_mrpc_irq = ioread32(&stdev->mmio_mrpc->dma_vector);
+	if (dma_mrpc_irq < 0 || dma_mrpc_irq >= nvecs)
+		return -EFAULT;
+
+	dma_mrpc_irq  = pci_irq_vector(stdev->pdev, dma_mrpc_irq);
+	if (dma_mrpc_irq < 0)
+		return dma_mrpc_irq;
+
+	rc = devm_request_irq(&stdev->pdev->dev, dma_mrpc_irq,
+				switchtec_dma_mrpc_isr, 0,
+				KBUILD_MODNAME, stdev);
+
+	return rc;
+}
+
+static void init_pff(struct switchtec_dev *stdev)
+{
+	int i;
+	u32 reg;
+	struct part_cfg_regs __iomem *pcfg = stdev->mmio_part_cfg;
+
+	for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
+		reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
+		if (reg != PCI_VENDOR_ID_MICROSEMI)
+			break;
+	}
+
+	stdev->pff_csr_count = i;
+
+	reg = ioread32(&pcfg->usp_pff_inst_id);
+	if (reg < stdev->pff_csr_count)
+		stdev->pff_local[reg] = 1;
+
+	reg = ioread32(&pcfg->vep_pff_inst_id) & 0xFF;
+	if (reg < stdev->pff_csr_count)
+		stdev->pff_local[reg] = 1;
+
+	for (i = 0; i < ARRAY_SIZE(pcfg->dsp_pff_inst_id); i++) {
+		reg = ioread32(&pcfg->dsp_pff_inst_id[i]);
+		if (reg < stdev->pff_csr_count)
+			stdev->pff_local[reg] = 1;
+	}
+}
+
+static int switchtec_init_pci(struct switchtec_dev *stdev,
+			      struct pci_dev *pdev)
+{
+	int rc;
+	void __iomem *map;
+	unsigned long res_start, res_len;
+	u32 __iomem *part_id;
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (rc)
+		return rc;
+
+	pci_set_master(pdev);
+
+	res_start = pci_resource_start(pdev, 0);
+	res_len = pci_resource_len(pdev, 0);
+
+	if (!devm_request_mem_region(&pdev->dev, res_start,
+				     res_len, KBUILD_MODNAME))
+		return -EBUSY;
+
+	stdev->mmio_mrpc = devm_ioremap_wc(&pdev->dev, res_start,
+					   SWITCHTEC_GAS_TOP_CFG_OFFSET);
+	if (!stdev->mmio_mrpc)
+		return -ENOMEM;
+
+	map = devm_ioremap(&pdev->dev,
+			   res_start + SWITCHTEC_GAS_TOP_CFG_OFFSET,
+			   res_len - SWITCHTEC_GAS_TOP_CFG_OFFSET);
+	if (!map)
+		return -ENOMEM;
+
+	stdev->mmio = map - SWITCHTEC_GAS_TOP_CFG_OFFSET;
+	stdev->mmio_sw_event = stdev->mmio + SWITCHTEC_GAS_SW_EVENT_OFFSET;
+	stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
+	stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
+	stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
+
+	if (stdev->gen == SWITCHTEC_GEN3)
+		part_id = &stdev->mmio_sys_info->gen3.partition_id;
+	else if (stdev->gen >= SWITCHTEC_GEN4)
+		part_id = &stdev->mmio_sys_info->gen4.partition_id;
+	else
+		return -EOPNOTSUPP;
+
+	stdev->partition = ioread8(part_id);
+	stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
+	stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
+	stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
+	stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
+
+	if (stdev->partition_count < 1)
+		stdev->partition_count = 1;
+
+	init_pff(stdev);
+
+	pci_set_drvdata(pdev, stdev);
+
+	if (!use_dma_mrpc)
+		return 0;
+
+	if (ioread32(&stdev->mmio_mrpc->dma_ver) == 0)
+		return 0;
+
+	stdev->dma_mrpc = dma_alloc_coherent(&stdev->pdev->dev,
+					     sizeof(*stdev->dma_mrpc),
+					     &stdev->dma_mrpc_dma_addr,
+					     GFP_KERNEL);
+	if (stdev->dma_mrpc == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int switchtec_pci_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *id)
+{
+	struct switchtec_dev *stdev;
+	int rc;
+
+	if (pdev->class == (PCI_CLASS_BRIDGE_OTHER << 8))
+		request_module_nowait("ntb_hw_switchtec");
+
+	stdev = stdev_create(pdev);
+	if (IS_ERR(stdev))
+		return PTR_ERR(stdev);
+
+	stdev->gen = id->driver_data;
+
+	rc = switchtec_init_pci(stdev, pdev);
+	if (rc)
+		goto err_put;
+
+	rc = switchtec_init_isr(stdev);
+	if (rc) {
+		dev_err(&stdev->dev, "failed to init isr.\n");
+		goto err_put;
+	}
+
+	iowrite32(SWITCHTEC_EVENT_CLEAR |
+		  SWITCHTEC_EVENT_EN_IRQ,
+		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
+	enable_link_state_events(stdev);
+
+	if (stdev->dma_mrpc)
+		enable_dma_mrpc(stdev);
+
+	rc = cdev_device_add(&stdev->cdev, &stdev->dev);
+	if (rc)
+		goto err_devadd;
+
+	dev_info(&stdev->dev, "Management device registered.\n");
+
+	return 0;
+
+err_devadd:
+	stdev_kill(stdev);
+err_put:
+	ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt));
+	put_device(&stdev->dev);
+	return rc;
+}
+
+static void switchtec_pci_remove(struct pci_dev *pdev)
+{
+	struct switchtec_dev *stdev = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+
+	cdev_device_del(&stdev->cdev, &stdev->dev);
+	ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt));
+	dev_info(&stdev->dev, "unregistered.\n");
+	stdev_kill(stdev);
+	put_device(&stdev->dev);
+}
+
+#define SWITCHTEC_PCI_DEVICE(device_id, gen) \
+	{ \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
+		.device     = device_id, \
+		.subvendor  = PCI_ANY_ID, \
+		.subdevice  = PCI_ANY_ID, \
+		.class      = (PCI_CLASS_MEMORY_OTHER << 8), \
+		.class_mask = 0xFFFFFFFF, \
+		.driver_data = gen, \
+	}, \
+	{ \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
+		.device     = device_id, \
+		.subvendor  = PCI_ANY_ID, \
+		.subdevice  = PCI_ANY_ID, \
+		.class      = (PCI_CLASS_BRIDGE_OTHER << 8), \
+		.class_mask = 0xFFFFFFFF, \
+		.driver_data = gen, \
+	}
+
+static const struct pci_device_id switchtec_pci_tbl[] = {
+	SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3),  /* PFX 24xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3),  /* PFX 32xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8533, SWITCHTEC_GEN3),  /* PFX 48xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8534, SWITCHTEC_GEN3),  /* PFX 64xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8535, SWITCHTEC_GEN3),  /* PFX 80xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8536, SWITCHTEC_GEN3),  /* PFX 96xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8541, SWITCHTEC_GEN3),  /* PSX 24xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8542, SWITCHTEC_GEN3),  /* PSX 32xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8543, SWITCHTEC_GEN3),  /* PSX 48xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8544, SWITCHTEC_GEN3),  /* PSX 64xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8545, SWITCHTEC_GEN3),  /* PSX 80xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8546, SWITCHTEC_GEN3),  /* PSX 96xG3 */
+	SWITCHTEC_PCI_DEVICE(0x8551, SWITCHTEC_GEN3),  /* PAX 24XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8552, SWITCHTEC_GEN3),  /* PAX 32XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8553, SWITCHTEC_GEN3),  /* PAX 48XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8554, SWITCHTEC_GEN3),  /* PAX 64XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8555, SWITCHTEC_GEN3),  /* PAX 80XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8556, SWITCHTEC_GEN3),  /* PAX 96XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8561, SWITCHTEC_GEN3),  /* PFXL 24XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8562, SWITCHTEC_GEN3),  /* PFXL 32XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8563, SWITCHTEC_GEN3),  /* PFXL 48XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8564, SWITCHTEC_GEN3),  /* PFXL 64XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8565, SWITCHTEC_GEN3),  /* PFXL 80XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8566, SWITCHTEC_GEN3),  /* PFXL 96XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8571, SWITCHTEC_GEN3),  /* PFXI 24XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8572, SWITCHTEC_GEN3),  /* PFXI 32XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8573, SWITCHTEC_GEN3),  /* PFXI 48XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8574, SWITCHTEC_GEN3),  /* PFXI 64XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8575, SWITCHTEC_GEN3),  /* PFXI 80XG3 */
+	SWITCHTEC_PCI_DEVICE(0x8576, SWITCHTEC_GEN3),  /* PFXI 96XG3 */
+	SWITCHTEC_PCI_DEVICE(0x4000, SWITCHTEC_GEN4),  /* PFX 100XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4084, SWITCHTEC_GEN4),  /* PFX 84XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4068, SWITCHTEC_GEN4),  /* PFX 68XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4052, SWITCHTEC_GEN4),  /* PFX 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4036, SWITCHTEC_GEN4),  /* PFX 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4028, SWITCHTEC_GEN4),  /* PFX 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4100, SWITCHTEC_GEN4),  /* PSX 100XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4184, SWITCHTEC_GEN4),  /* PSX 84XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4168, SWITCHTEC_GEN4),  /* PSX 68XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4152, SWITCHTEC_GEN4),  /* PSX 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4136, SWITCHTEC_GEN4),  /* PSX 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4128, SWITCHTEC_GEN4),  /* PSX 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4200, SWITCHTEC_GEN4),  /* PAX 100XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4284, SWITCHTEC_GEN4),  /* PAX 84XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4268, SWITCHTEC_GEN4),  /* PAX 68XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4),  /* PAX 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4),  /* PAX 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4),  /* PAX 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4),  /* PFXA 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4),  /* PFXA 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4),  /* PFXA 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4),  /* PSXA 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4),  /* PSXA 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4),  /* PSXA 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4),  /* PAXA 52XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4),  /* PAXA 36XG4 */
+	SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4),  /* PAXA 28XG4 */
+	SWITCHTEC_PCI_DEVICE(0x5000, SWITCHTEC_GEN5),  /* PFX 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5084, SWITCHTEC_GEN5),  /* PFX 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5068, SWITCHTEC_GEN5),  /* PFX 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5052, SWITCHTEC_GEN5),  /* PFX 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5036, SWITCHTEC_GEN5),  /* PFX 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5028, SWITCHTEC_GEN5),  /* PFX 28XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5100, SWITCHTEC_GEN5),  /* PSX 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5184, SWITCHTEC_GEN5),  /* PSX 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5168, SWITCHTEC_GEN5),  /* PSX 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5152, SWITCHTEC_GEN5),  /* PSX 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5136, SWITCHTEC_GEN5),  /* PSX 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5128, SWITCHTEC_GEN5),  /* PSX 28XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5200, SWITCHTEC_GEN5),  /* PAX 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5284, SWITCHTEC_GEN5),  /* PAX 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5268, SWITCHTEC_GEN5),  /* PAX 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5252, SWITCHTEC_GEN5),  /* PAX 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5236, SWITCHTEC_GEN5),  /* PAX 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5228, SWITCHTEC_GEN5),  /* PAX 28XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5300, SWITCHTEC_GEN5),  /* PFXA 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5384, SWITCHTEC_GEN5),  /* PFXA 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5368, SWITCHTEC_GEN5),  /* PFXA 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5352, SWITCHTEC_GEN5),  /* PFXA 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5336, SWITCHTEC_GEN5),  /* PFXA 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5328, SWITCHTEC_GEN5),  /* PFXA 28XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5400, SWITCHTEC_GEN5),  /* PSXA 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5484, SWITCHTEC_GEN5),  /* PSXA 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5468, SWITCHTEC_GEN5),  /* PSXA 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5452, SWITCHTEC_GEN5),  /* PSXA 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5436, SWITCHTEC_GEN5),  /* PSXA 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5428, SWITCHTEC_GEN5),  /* PSXA 28XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5500, SWITCHTEC_GEN5),  /* PAXA 100XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5584, SWITCHTEC_GEN5),  /* PAXA 84XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5568, SWITCHTEC_GEN5),  /* PAXA 68XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5552, SWITCHTEC_GEN5),  /* PAXA 52XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5536, SWITCHTEC_GEN5),  /* PAXA 36XG5 */
+	SWITCHTEC_PCI_DEVICE(0x5528, SWITCHTEC_GEN5),  /* PAXA 28XG5 */
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl);
+
+static struct pci_driver switchtec_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= switchtec_pci_tbl,
+	.probe		= switchtec_pci_probe,
+	.remove		= switchtec_pci_remove,
+};
+
+static int __init switchtec_init(void)
+{
+	int rc;
+
+	rc = alloc_chrdev_region(&switchtec_devt, 0, max_devices,
+				 "switchtec");
+	if (rc)
+		return rc;
+
+	switchtec_class = class_create("switchtec");
+	if (IS_ERR(switchtec_class)) {
+		rc = PTR_ERR(switchtec_class);
+		goto err_create_class;
+	}
+
+	rc = pci_register_driver(&switchtec_pci_driver);
+	if (rc)
+		goto err_pci_register;
+
+	pr_info(KBUILD_MODNAME ": loaded.\n");
+
+	return 0;
+
+err_pci_register:
+	class_destroy(switchtec_class);
+
+err_create_class:
+	unregister_chrdev_region(switchtec_devt, max_devices);
+
+	return rc;
+}
+module_init(switchtec_init);
+
+static void __exit switchtec_exit(void)
+{
+	pci_unregister_driver(&switchtec_pci_driver);
+	class_destroy(switchtec_class);
+	unregister_chrdev_region(switchtec_devt, max_devices);
+	ida_destroy(&switchtec_minor_ida);
+
+	pr_info(KBUILD_MODNAME ": unloaded.\n");
+}
+module_exit(switchtec_exit);
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
new file mode 100644
index 000000000..803acbf33
--- /dev/null
+++ b/drivers/pci/syscall.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * For architectures where we want to allow direct access to the PCI config
+ * stuff - it would probably be preferable on PCs too, but there people
+ * just do it by hand with the magic northbridge registers.
+ */
+
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include "pci.h"
+
+SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
+{
+	struct pci_dev *dev;
+	u8 byte;
+	u16 word;
+	u32 dword;
+	int err, cfg_ret;
+
+	err = -EPERM;
+	dev = NULL;
+	if (!capable(CAP_SYS_ADMIN))
+		goto error;
+
+	err = -ENODEV;
+	dev = pci_get_domain_bus_and_slot(0, bus, dfn);
+	if (!dev)
+		goto error;
+
+	switch (len) {
+	case 1:
+		cfg_ret = pci_user_read_config_byte(dev, off, &byte);
+		break;
+	case 2:
+		cfg_ret = pci_user_read_config_word(dev, off, &word);
+		break;
+	case 4:
+		cfg_ret = pci_user_read_config_dword(dev, off, &dword);
+		break;
+	default:
+		err = -EINVAL;
+		goto error;
+	}
+
+	err = -EIO;
+	if (cfg_ret)
+		goto error;
+
+	switch (len) {
+	case 1:
+		err = put_user(byte, (u8 __user *)buf);
+		break;
+	case 2:
+		err = put_user(word, (u16 __user *)buf);
+		break;
+	case 4:
+		err = put_user(dword, (u32 __user *)buf);
+		break;
+	}
+	pci_dev_put(dev);
+	return err;
+
+error:
+	/* ??? XFree86 doesn't even check the return value.  They
+	   just look for 0xffffffff in the output, since that's what
+	   they get instead of a machine check on x86.  */
+	switch (len) {
+	case 1:
+		put_user(-1, (u8 __user *)buf);
+		break;
+	case 2:
+		put_user(-1, (u16 __user *)buf);
+		break;
+	case 4:
+		put_user(-1, (u32 __user *)buf);
+		break;
+	}
+	pci_dev_put(dev);
+	return err;
+}
+
+SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
+{
+	struct pci_dev *dev;
+	u8 byte;
+	u16 word;
+	u32 dword;
+	int err = 0;
+
+	if (!capable(CAP_SYS_ADMIN) ||
+	    security_locked_down(LOCKDOWN_PCI_ACCESS))
+		return -EPERM;
+
+	dev = pci_get_domain_bus_and_slot(0, bus, dfn);
+	if (!dev)
+		return -ENODEV;
+
+	switch (len) {
+	case 1:
+		err = get_user(byte, (u8 __user *)buf);
+		if (err)
+			break;
+		err = pci_user_write_config_byte(dev, off, byte);
+		if (err)
+			err = -EIO;
+		break;
+
+	case 2:
+		err = get_user(word, (u16 __user *)buf);
+		if (err)
+			break;
+		err = pci_user_write_config_word(dev, off, word);
+		if (err)
+			err = -EIO;
+		break;
+
+	case 4:
+		err = get_user(dword, (u32 __user *)buf);
+		if (err)
+			break;
+		err = pci_user_write_config_dword(dev, off, dword);
+		if (err)
+			err = -EIO;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+	pci_dev_put(dev);
+	return err;
+}
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
new file mode 100644
index 000000000..5fc59ac31
--- /dev/null
+++ b/drivers/pci/vc.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Virtual Channel support
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/types.h>
+
+#include "pci.h"
+
+/**
+ * pci_vc_save_restore_dwords - Save or restore a series of dwords
+ * @dev: device
+ * @pos: starting config space position
+ * @buf: buffer to save to or restore from
+ * @dwords: number of dwords to save/restore
+ * @save: whether to save or restore
+ */
+static void pci_vc_save_restore_dwords(struct pci_dev *dev, int pos,
+				       u32 *buf, int dwords, bool save)
+{
+	int i;
+
+	for (i = 0; i < dwords; i++, buf++) {
+		if (save)
+			pci_read_config_dword(dev, pos + (i * 4), buf);
+		else
+			pci_write_config_dword(dev, pos + (i * 4), *buf);
+	}
+}
+
+/**
+ * pci_vc_load_arb_table - load and wait for VC arbitration table
+ * @dev: device
+ * @pos: starting position of VC capability (VC/VC9/MFVC)
+ *
+ * Set Load VC Arbitration Table bit requesting hardware to apply the VC
+ * Arbitration Table (previously loaded).  When the VC Arbitration Table
+ * Status clears, hardware has latched the table into VC arbitration logic.
+ */
+static void pci_vc_load_arb_table(struct pci_dev *dev, int pos)
+{
+	u16 ctrl;
+
+	pci_read_config_word(dev, pos + PCI_VC_PORT_CTRL, &ctrl);
+	pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
+			      ctrl | PCI_VC_PORT_CTRL_LOAD_TABLE);
+	if (pci_wait_for_pending(dev, pos + PCI_VC_PORT_STATUS,
+				 PCI_VC_PORT_STATUS_TABLE))
+		return;
+
+	pci_err(dev, "VC arbitration table failed to load\n");
+}
+
+/**
+ * pci_vc_load_port_arb_table - Load and wait for VC port arbitration table
+ * @dev: device
+ * @pos: starting position of VC capability (VC/VC9/MFVC)
+ * @res: VC resource number, ie. VCn (0-7)
+ *
+ * Set Load Port Arbitration Table bit requesting hardware to apply the Port
+ * Arbitration Table (previously loaded).  When the Port Arbitration Table
+ * Status clears, hardware has latched the table into port arbitration logic.
+ */
+static void pci_vc_load_port_arb_table(struct pci_dev *dev, int pos, int res)
+{
+	int ctrl_pos, status_pos;
+	u32 ctrl;
+
+	ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF);
+	status_pos = pos + PCI_VC_RES_STATUS + (res * PCI_CAP_VC_PER_VC_SIZEOF);
+
+	pci_read_config_dword(dev, ctrl_pos, &ctrl);
+	pci_write_config_dword(dev, ctrl_pos,
+			       ctrl | PCI_VC_RES_CTRL_LOAD_TABLE);
+
+	if (pci_wait_for_pending(dev, status_pos, PCI_VC_RES_STATUS_TABLE))
+		return;
+
+	pci_err(dev, "VC%d port arbitration table failed to load\n", res);
+}
+
+/**
+ * pci_vc_enable - Enable virtual channel
+ * @dev: device
+ * @pos: starting position of VC capability (VC/VC9/MFVC)
+ * @res: VC res number, ie. VCn (0-7)
+ *
+ * A VC is enabled by setting the enable bit in matching resource control
+ * registers on both sides of a link.  We therefore need to find the opposite
+ * end of the link.  To keep this simple we enable from the downstream device.
+ * RC devices do not have an upstream device, nor does it seem that VC9 do
+ * (spec is unclear).  Once we find the upstream device, match the VC ID to
+ * get the correct resource, disable and enable on both ends.
+ */
+static void pci_vc_enable(struct pci_dev *dev, int pos, int res)
+{
+	int ctrl_pos, status_pos, id, pos2, evcc, i, ctrl_pos2, status_pos2;
+	u32 ctrl, header, cap1, ctrl2;
+	struct pci_dev *link = NULL;
+
+	/* Enable VCs from the downstream device */
+	if (!pci_is_pcie(dev) || !pcie_downstream_port(dev))
+		return;
+
+	ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF);
+	status_pos = pos + PCI_VC_RES_STATUS + (res * PCI_CAP_VC_PER_VC_SIZEOF);
+
+	pci_read_config_dword(dev, ctrl_pos, &ctrl);
+	id = ctrl & PCI_VC_RES_CTRL_ID;
+
+	pci_read_config_dword(dev, pos, &header);
+
+	/* If there is no opposite end of the link, skip to enable */
+	if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_VC9 ||
+	    pci_is_root_bus(dev->bus))
+		goto enable;
+
+	pos2 = pci_find_ext_capability(dev->bus->self, PCI_EXT_CAP_ID_VC);
+	if (!pos2)
+		goto enable;
+
+	pci_read_config_dword(dev->bus->self, pos2 + PCI_VC_PORT_CAP1, &cap1);
+	evcc = cap1 & PCI_VC_CAP1_EVCC;
+
+	/* VC0 is hardwired enabled, so we can start with 1 */
+	for (i = 1; i < evcc + 1; i++) {
+		ctrl_pos2 = pos2 + PCI_VC_RES_CTRL +
+				(i * PCI_CAP_VC_PER_VC_SIZEOF);
+		status_pos2 = pos2 + PCI_VC_RES_STATUS +
+				(i * PCI_CAP_VC_PER_VC_SIZEOF);
+		pci_read_config_dword(dev->bus->self, ctrl_pos2, &ctrl2);
+		if ((ctrl2 & PCI_VC_RES_CTRL_ID) == id) {
+			link = dev->bus->self;
+			break;
+		}
+	}
+
+	if (!link)
+		goto enable;
+
+	/* Disable if enabled */
+	if (ctrl2 & PCI_VC_RES_CTRL_ENABLE) {
+		ctrl2 &= ~PCI_VC_RES_CTRL_ENABLE;
+		pci_write_config_dword(link, ctrl_pos2, ctrl2);
+	}
+
+	/* Enable on both ends */
+	ctrl2 |= PCI_VC_RES_CTRL_ENABLE;
+	pci_write_config_dword(link, ctrl_pos2, ctrl2);
+enable:
+	ctrl |= PCI_VC_RES_CTRL_ENABLE;
+	pci_write_config_dword(dev, ctrl_pos, ctrl);
+
+	if (!pci_wait_for_pending(dev, status_pos, PCI_VC_RES_STATUS_NEGO))
+		pci_err(dev, "VC%d negotiation stuck pending\n", id);
+
+	if (link && !pci_wait_for_pending(link, status_pos2,
+					  PCI_VC_RES_STATUS_NEGO))
+		pci_err(link, "VC%d negotiation stuck pending\n", id);
+}
+
+/**
+ * pci_vc_do_save_buffer - Size, save, or restore VC state
+ * @dev: device
+ * @pos: starting position of VC capability (VC/VC9/MFVC)
+ * @save_state: buffer for save/restore
+ * @save: if provided a buffer, this indicates what to do with it
+ *
+ * Walking Virtual Channel config space to size, save, or restore it
+ * is complicated, so we do it all from one function to reduce code and
+ * guarantee ordering matches in the buffer.  When called with NULL
+ * @save_state, return the size of the necessary save buffer.  When called
+ * with a non-NULL @save_state, @save determines whether we save to the
+ * buffer or restore from it.
+ */
+static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
+				 struct pci_cap_saved_state *save_state,
+				 bool save)
+{
+	u32 cap1;
+	char evcc, lpevcc, parb_size;
+	int i, len = 0;
+	u8 *buf = save_state ? (u8 *)save_state->cap.data : NULL;
+
+	/* Sanity check buffer size for save/restore */
+	if (buf && save_state->cap.size !=
+	    pci_vc_do_save_buffer(dev, pos, NULL, save)) {
+		pci_err(dev, "VC save buffer size does not match @0x%x\n", pos);
+		return -ENOMEM;
+	}
+
+	pci_read_config_dword(dev, pos + PCI_VC_PORT_CAP1, &cap1);
+	/* Extended VC Count (not counting VC0) */
+	evcc = cap1 & PCI_VC_CAP1_EVCC;
+	/* Low Priority Extended VC Count (not counting VC0) */
+	lpevcc = (cap1 & PCI_VC_CAP1_LPEVCC) >> 4;
+	/* Port Arbitration Table Entry Size (bits) */
+	parb_size = 1 << ((cap1 & PCI_VC_CAP1_ARB_SIZE) >> 10);
+
+	/*
+	 * Port VC Control Register contains VC Arbitration Select, which
+	 * cannot be modified when more than one LPVC is in operation.  We
+	 * therefore save/restore it first, as only VC0 should be enabled
+	 * after device reset.
+	 */
+	if (buf) {
+		if (save)
+			pci_read_config_word(dev, pos + PCI_VC_PORT_CTRL,
+					     (u16 *)buf);
+		else
+			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
+					      *(u16 *)buf);
+		buf += 4;
+	}
+	len += 4;
+
+	/*
+	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
+	 * in Port VC Capability Register 2 then save/restore it next.
+	 */
+	if (lpevcc) {
+		u32 cap2;
+		int vcarb_offset;
+
+		pci_read_config_dword(dev, pos + PCI_VC_PORT_CAP2, &cap2);
+		vcarb_offset = ((cap2 & PCI_VC_CAP2_ARB_OFF) >> 24) * 16;
+
+		if (vcarb_offset) {
+			int size, vcarb_phases = 0;
+
+			if (cap2 & PCI_VC_CAP2_128_PHASE)
+				vcarb_phases = 128;
+			else if (cap2 & PCI_VC_CAP2_64_PHASE)
+				vcarb_phases = 64;
+			else if (cap2 & PCI_VC_CAP2_32_PHASE)
+				vcarb_phases = 32;
+
+			/* Fixed 4 bits per phase per lpevcc (plus VC0) */
+			size = ((lpevcc + 1) * vcarb_phases * 4) / 8;
+
+			if (size && buf) {
+				pci_vc_save_restore_dwords(dev,
+							   pos + vcarb_offset,
+							   (u32 *)buf,
+							   size / 4, save);
+				/*
+				 * On restore, we need to signal hardware to
+				 * re-load the VC Arbitration Table.
+				 */
+				if (!save)
+					pci_vc_load_arb_table(dev, pos);
+
+				buf += size;
+			}
+			len += size;
+		}
+	}
+
+	/*
+	 * In addition to each VC Resource Control Register, we may have a
+	 * Port Arbitration Table attached to each VC.  The Port Arbitration
+	 * Table Offset in each VC Resource Capability Register tells us if
+	 * it exists.  The entry size is global from the Port VC Capability
+	 * Register1 above.  The number of phases is determined per VC.
+	 */
+	for (i = 0; i < evcc + 1; i++) {
+		u32 cap;
+		int parb_offset;
+
+		pci_read_config_dword(dev, pos + PCI_VC_RES_CAP +
+				      (i * PCI_CAP_VC_PER_VC_SIZEOF), &cap);
+		parb_offset = ((cap & PCI_VC_RES_CAP_ARB_OFF) >> 24) * 16;
+		if (parb_offset) {
+			int size, parb_phases = 0;
+
+			if (cap & PCI_VC_RES_CAP_256_PHASE)
+				parb_phases = 256;
+			else if (cap & (PCI_VC_RES_CAP_128_PHASE |
+					PCI_VC_RES_CAP_128_PHASE_TB))
+				parb_phases = 128;
+			else if (cap & PCI_VC_RES_CAP_64_PHASE)
+				parb_phases = 64;
+			else if (cap & PCI_VC_RES_CAP_32_PHASE)
+				parb_phases = 32;
+
+			size = (parb_size * parb_phases) / 8;
+
+			if (size && buf) {
+				pci_vc_save_restore_dwords(dev,
+							   pos + parb_offset,
+							   (u32 *)buf,
+							   size / 4, save);
+				buf += size;
+			}
+			len += size;
+		}
+
+		/* VC Resource Control Register */
+		if (buf) {
+			int ctrl_pos = pos + PCI_VC_RES_CTRL +
+						(i * PCI_CAP_VC_PER_VC_SIZEOF);
+			if (save)
+				pci_read_config_dword(dev, ctrl_pos,
+						      (u32 *)buf);
+			else {
+				u32 tmp, ctrl = *(u32 *)buf;
+				/*
+				 * For an FLR case, the VC config may remain.
+				 * Preserve enable bit, restore the rest.
+				 */
+				pci_read_config_dword(dev, ctrl_pos, &tmp);
+				tmp &= PCI_VC_RES_CTRL_ENABLE;
+				tmp |= ctrl & ~PCI_VC_RES_CTRL_ENABLE;
+				pci_write_config_dword(dev, ctrl_pos, tmp);
+				/* Load port arbitration table if used */
+				if (ctrl & PCI_VC_RES_CTRL_ARB_SELECT)
+					pci_vc_load_port_arb_table(dev, pos, i);
+				/* Re-enable if needed */
+				if ((ctrl ^ tmp) & PCI_VC_RES_CTRL_ENABLE)
+					pci_vc_enable(dev, pos, i);
+			}
+			buf += 4;
+		}
+		len += 4;
+	}
+
+	return buf ? 0 : len;
+}
+
+static struct {
+	u16 id;
+	const char *name;
+} vc_caps[] = { { PCI_EXT_CAP_ID_MFVC, "MFVC" },
+		{ PCI_EXT_CAP_ID_VC, "VC" },
+		{ PCI_EXT_CAP_ID_VC9, "VC9" } };
+
+/**
+ * pci_save_vc_state - Save VC state to pre-allocate save buffer
+ * @dev: device
+ *
+ * For each type of VC capability, VC/VC9/MFVC, find the capability and
+ * save it to the pre-allocated save buffer.
+ */
+int pci_save_vc_state(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
+		int pos, ret;
+		struct pci_cap_saved_state *save_state;
+
+		pos = pci_find_ext_capability(dev, vc_caps[i].id);
+		if (!pos)
+			continue;
+
+		save_state = pci_find_saved_ext_cap(dev, vc_caps[i].id);
+		if (!save_state) {
+			pci_err(dev, "%s buffer not found in %s\n",
+				vc_caps[i].name, __func__);
+			return -ENOMEM;
+		}
+
+		ret = pci_vc_do_save_buffer(dev, pos, save_state, true);
+		if (ret) {
+			pci_err(dev, "%s save unsuccessful %s\n",
+				vc_caps[i].name, __func__);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * pci_restore_vc_state - Restore VC state from save buffer
+ * @dev: device
+ *
+ * For each type of VC capability, VC/VC9/MFVC, find the capability and
+ * restore it from the previously saved buffer.
+ */
+void pci_restore_vc_state(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
+		int pos;
+		struct pci_cap_saved_state *save_state;
+
+		pos = pci_find_ext_capability(dev, vc_caps[i].id);
+		save_state = pci_find_saved_ext_cap(dev, vc_caps[i].id);
+		if (!save_state || !pos)
+			continue;
+
+		pci_vc_do_save_buffer(dev, pos, save_state, false);
+	}
+}
+
+/**
+ * pci_allocate_vc_save_buffers - Allocate save buffers for VC caps
+ * @dev: device
+ *
+ * For each type of VC capability, VC/VC9/MFVC, find the capability, size
+ * it, and allocate a buffer for save/restore.
+ */
+void pci_allocate_vc_save_buffers(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vc_caps); i++) {
+		int len, pos = pci_find_ext_capability(dev, vc_caps[i].id);
+
+		if (!pos)
+			continue;
+
+		len = pci_vc_do_save_buffer(dev, pos, NULL, false);
+		if (pci_add_ext_cap_save_buffer(dev, vc_caps[i].id, len))
+			pci_err(dev, "unable to preallocate %s save buffer\n",
+				vc_caps[i].name);
+	}
+}
diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
new file mode 100644
index 000000000..5e6b1eb54
--- /dev/null
+++ b/drivers/pci/vgaarb.c
@@ -0,0 +1,1559 @@
+// SPDX-License-Identifier: MIT
+/*
+ * vgaarb.c: Implements VGA arbitration. For details refer to
+ * Documentation/gpu/vgaarbiter.rst
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
+ */
+
+#define pr_fmt(fmt) "vgaarb: " fmt
+
+#define vgaarb_dbg(dev, fmt, arg...)	dev_dbg(dev, "vgaarb: " fmt, ##arg)
+#define vgaarb_info(dev, fmt, arg...)	dev_info(dev, "vgaarb: " fmt, ##arg)
+#define vgaarb_err(dev, fmt, arg...)	dev_err(dev, "vgaarb: " fmt, ##arg)
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/sched/signal.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/screen_info.h>
+#include <linux/vt.h>
+#include <linux/console.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/vgaarb.h>
+
+static void vga_arbiter_notify_clients(void);
+
+/*
+ * We keep a list of all VGA devices in the system to speed
+ * up the various operations of the arbiter
+ */
+struct vga_device {
+	struct list_head list;
+	struct pci_dev *pdev;
+	unsigned int decodes;		/* what it decodes */
+	unsigned int owns;		/* what it owns */
+	unsigned int locks;		/* what it locks */
+	unsigned int io_lock_cnt;	/* legacy IO lock count */
+	unsigned int mem_lock_cnt;	/* legacy MEM lock count */
+	unsigned int io_norm_cnt;	/* normal IO count */
+	unsigned int mem_norm_cnt;	/* normal MEM count */
+	bool bridge_has_one_vga;
+	bool is_firmware_default;	/* device selected by firmware */
+	unsigned int (*set_decode)(struct pci_dev *pdev, bool decode);
+};
+
+static LIST_HEAD(vga_list);
+static int vga_count, vga_decode_count;
+static bool vga_arbiter_used;
+static DEFINE_SPINLOCK(vga_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
+
+static const char *vga_iostate_to_str(unsigned int iostate)
+{
+	/* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
+	iostate &= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	switch (iostate) {
+	case VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM:
+		return "io+mem";
+	case VGA_RSRC_LEGACY_IO:
+		return "io";
+	case VGA_RSRC_LEGACY_MEM:
+		return "mem";
+	}
+	return "none";
+}
+
+static int vga_str_to_iostate(char *buf, int str_size, unsigned int *io_state)
+{
+	/*
+	 * In theory, we could hand out locks on IO and MEM separately to
+	 * userspace, but this can cause deadlocks.
+	 */
+	if (strncmp(buf, "none", 4) == 0) {
+		*io_state = VGA_RSRC_NONE;
+		return 1;
+	}
+
+	/* XXX We're not checking the str_size! */
+	if (strncmp(buf, "io+mem", 6) == 0)
+		goto both;
+	else if (strncmp(buf, "io", 2) == 0)
+		goto both;
+	else if (strncmp(buf, "mem", 3) == 0)
+		goto both;
+	return 0;
+both:
+	*io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	return 1;
+}
+
+/* This is only used as a cookie, it should not be dereferenced */
+static struct pci_dev *vga_default;
+
+/* Find somebody in our list */
+static struct vga_device *vgadev_find(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+
+	list_for_each_entry(vgadev, &vga_list, list)
+		if (pdev == vgadev->pdev)
+			return vgadev;
+	return NULL;
+}
+
+/**
+ * vga_default_device - return the default VGA device, for vgacon
+ *
+ * This can be defined by the platform. The default implementation is
+ * rather dumb and will probably only work properly on single VGA card
+ * setups and/or x86 platforms.
+ *
+ * If your VGA default device is not PCI, you'll have to return NULL here.
+ * In this case, I assume it will not conflict with any PCI card. If this
+ * is not true, I'll have to define two arch hooks for enabling/disabling
+ * the VGA default device if that is possible. This may be a problem with
+ * real _ISA_ VGA cards, in addition to a PCI one. I don't know at this
+ * point how to deal with that card. Can their IOs be disabled at all? If
+ * not, then I suppose it's a matter of having the proper arch hook telling
+ * us about it, so we basically never allow anybody to succeed a vga_get().
+ */
+struct pci_dev *vga_default_device(void)
+{
+	return vga_default;
+}
+EXPORT_SYMBOL_GPL(vga_default_device);
+
+void vga_set_default_device(struct pci_dev *pdev)
+{
+	if (vga_default == pdev)
+		return;
+
+	pci_dev_put(vga_default);
+	vga_default = pci_dev_get(pdev);
+}
+
+/**
+ * vga_remove_vgacon - deactivate VGA console
+ *
+ * Unbind and unregister vgacon in case pdev is the default VGA device.
+ * Can be called by GPU drivers on initialization to make sure VGA register
+ * access done by vgacon will not disturb the device.
+ *
+ * @pdev: PCI device.
+ */
+#if !defined(CONFIG_VGA_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	return 0;
+}
+#elif !defined(CONFIG_DUMMY_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	return -ENODEV;
+}
+#else
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	int ret = 0;
+
+	if (pdev != vga_default)
+		return 0;
+	vgaarb_info(&pdev->dev, "deactivate vga console\n");
+
+	console_lock();
+	if (con_is_bound(&vga_con))
+		ret = do_take_over_console(&dummy_con, 0,
+					   MAX_NR_CONSOLES - 1, 1);
+	if (ret == 0) {
+		ret = do_unregister_con_driver(&vga_con);
+
+		/* Ignore "already unregistered". */
+		if (ret == -ENODEV)
+			ret = 0;
+	}
+	console_unlock();
+
+	return ret;
+}
+#endif
+EXPORT_SYMBOL(vga_remove_vgacon);
+
+/*
+ * If we don't ever use VGA arbitration, we should avoid turning off
+ * anything anywhere due to old X servers getting confused about the boot
+ * device not being VGA.
+ */
+static void vga_check_first_use(void)
+{
+	/*
+	 * Inform all GPUs in the system that VGA arbitration has occurred
+	 * so they can disable resources if possible.
+	 */
+	if (!vga_arbiter_used) {
+		vga_arbiter_used = true;
+		vga_arbiter_notify_clients();
+	}
+}
+
+static struct vga_device *__vga_tryget(struct vga_device *vgadev,
+				       unsigned int rsrc)
+{
+	struct device *dev = &vgadev->pdev->dev;
+	unsigned int wants, legacy_wants, match;
+	struct vga_device *conflict;
+	unsigned int pci_bits;
+	u32 flags = 0;
+
+	/*
+	 * Account for "normal" resources to lock. If we decode the legacy,
+	 * counterpart, we need to request it as well
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_IO))
+		rsrc |= VGA_RSRC_LEGACY_IO;
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_MEM))
+		rsrc |= VGA_RSRC_LEGACY_MEM;
+
+	vgaarb_dbg(dev, "%s: %d\n", __func__, rsrc);
+	vgaarb_dbg(dev, "%s: owns: %d\n", __func__, vgadev->owns);
+
+	/* Check what resources we need to acquire */
+	wants = rsrc & ~vgadev->owns;
+
+	/* We already own everything, just mark locked & bye bye */
+	if (wants == 0)
+		goto lock_them;
+
+	/*
+	 * We don't need to request a legacy resource, we just enable
+	 * appropriate decoding and go.
+	 */
+	legacy_wants = wants & VGA_RSRC_LEGACY_MASK;
+	if (legacy_wants == 0)
+		goto enable_them;
+
+	/* Ok, we don't, let's find out who we need to kick off */
+	list_for_each_entry(conflict, &vga_list, list) {
+		unsigned int lwants = legacy_wants;
+		unsigned int change_bridge = 0;
+
+		/* Don't conflict with myself */
+		if (vgadev == conflict)
+			continue;
+
+		/*
+		 * We have a possible conflict. Before we go further, we must
+		 * check if we sit on the same bus as the conflicting device.
+		 * If we don't, then we must tie both IO and MEM resources
+		 * together since there is only a single bit controlling
+		 * VGA forwarding on P2P bridges.
+		 */
+		if (vgadev->pdev->bus != conflict->pdev->bus) {
+			change_bridge = 1;
+			lwants = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		}
+
+		/*
+		 * Check if the guy has a lock on the resource. If he does,
+		 * return the conflicting entry.
+		 */
+		if (conflict->locks & lwants)
+			return conflict;
+
+		/*
+		 * Ok, now check if it owns the resource we want.  We can
+		 * lock resources that are not decoded; therefore a device
+		 * can own resources it doesn't decode.
+		 */
+		match = lwants & conflict->owns;
+		if (!match)
+			continue;
+
+		/*
+		 * Looks like he doesn't have a lock, we can steal them
+		 * from him.
+		 */
+
+		flags = 0;
+		pci_bits = 0;
+
+		/*
+		 * If we can't control legacy resources via the bridge, we
+		 * also need to disable normal decoding.
+		 */
+		if (!conflict->bridge_has_one_vga) {
+			if ((match & conflict->decodes) & VGA_RSRC_LEGACY_MEM)
+				pci_bits |= PCI_COMMAND_MEMORY;
+			if ((match & conflict->decodes) & VGA_RSRC_LEGACY_IO)
+				pci_bits |= PCI_COMMAND_IO;
+
+			if (pci_bits)
+				flags |= PCI_VGA_STATE_CHANGE_DECODES;
+		}
+
+		if (change_bridge)
+			flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+		pci_set_vga_state(conflict->pdev, false, pci_bits, flags);
+		conflict->owns &= ~match;
+
+		/* If we disabled normal decoding, reflect it in owns */
+		if (pci_bits & PCI_COMMAND_MEMORY)
+			conflict->owns &= ~VGA_RSRC_NORMAL_MEM;
+		if (pci_bits & PCI_COMMAND_IO)
+			conflict->owns &= ~VGA_RSRC_NORMAL_IO;
+	}
+
+enable_them:
+	/*
+	 * Ok, we got it, everybody conflicting has been disabled, let's
+	 * enable us.  Mark any bits in "owns" regardless of whether we
+	 * decoded them.  We can lock resources we don't decode, therefore
+	 * we must track them via "owns".
+	 */
+	flags = 0;
+	pci_bits = 0;
+
+	if (!vgadev->bridge_has_one_vga) {
+		flags |= PCI_VGA_STATE_CHANGE_DECODES;
+		if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+			pci_bits |= PCI_COMMAND_MEMORY;
+		if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+			pci_bits |= PCI_COMMAND_IO;
+	}
+	if (wants & VGA_RSRC_LEGACY_MASK)
+		flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+	pci_set_vga_state(vgadev->pdev, true, pci_bits, flags);
+
+	vgadev->owns |= wants;
+lock_them:
+	vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
+	if (rsrc & VGA_RSRC_LEGACY_IO)
+		vgadev->io_lock_cnt++;
+	if (rsrc & VGA_RSRC_LEGACY_MEM)
+		vgadev->mem_lock_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_IO)
+		vgadev->io_norm_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_MEM)
+		vgadev->mem_norm_cnt++;
+
+	return NULL;
+}
+
+static void __vga_put(struct vga_device *vgadev, unsigned int rsrc)
+{
+	struct device *dev = &vgadev->pdev->dev;
+	unsigned int old_locks = vgadev->locks;
+
+	vgaarb_dbg(dev, "%s\n", __func__);
+
+	/*
+	 * Update our counters and account for equivalent legacy resources
+	 * if we decode them.
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) && vgadev->io_norm_cnt > 0) {
+		vgadev->io_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_IO)
+			rsrc |= VGA_RSRC_LEGACY_IO;
+	}
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) && vgadev->mem_norm_cnt > 0) {
+		vgadev->mem_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_MEM)
+			rsrc |= VGA_RSRC_LEGACY_MEM;
+	}
+	if ((rsrc & VGA_RSRC_LEGACY_IO) && vgadev->io_lock_cnt > 0)
+		vgadev->io_lock_cnt--;
+	if ((rsrc & VGA_RSRC_LEGACY_MEM) && vgadev->mem_lock_cnt > 0)
+		vgadev->mem_lock_cnt--;
+
+	/*
+	 * Just clear lock bits, we do lazy operations so we don't really
+	 * have to bother about anything else at this point.
+	 */
+	if (vgadev->io_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_IO;
+	if (vgadev->mem_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_MEM;
+
+	/*
+	 * Kick the wait queue in case somebody was waiting if we actually
+	 * released something.
+	 */
+	if (old_locks != vgadev->locks)
+		wake_up_all(&vga_wait_queue);
+}
+
+/**
+ * vga_get - acquire & lock VGA resources
+ * @pdev: PCI device of the VGA card or NULL for the system default
+ * @rsrc: bit mask of resources to acquire and lock
+ * @interruptible: blocking should be interruptible by signals ?
+ *
+ * Acquire VGA resources for the given card and mark those resources
+ * locked. If the resources requested are "normal" (and not legacy)
+ * resources, the arbiter will first check whether the card is doing legacy
+ * decoding for that type of resource. If yes, the lock is "converted" into
+ * a legacy resource lock.
+ *
+ * The arbiter will first look for all VGA cards that might conflict and disable
+ * their IOs and/or Memory access, including VGA forwarding on P2P bridges if
+ * necessary, so that the requested resources can be used. Then, the card is
+ * marked as locking these resources and the IO and/or Memory accesses are
+ * enabled on the card (including VGA forwarding on parent P2P bridges if any).
+ *
+ * This function will block if some conflicting card is already locking one of
+ * the required resources (or any resource on a different bus segment, since P2P
+ * bridges don't differentiate VGA memory and IO afaik). You can indicate
+ * whether this blocking should be interruptible by a signal (for userland
+ * interface) or not.
+ *
+ * Must not be called at interrupt time or in atomic context.  If the card
+ * already owns the resources, the function succeeds.  Nested calls are
+ * supported (a per-resource counter is maintained)
+ *
+ * On success, release the VGA resource again with vga_put().
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
+{
+	struct vga_device *vgadev, *conflict;
+	unsigned long flags;
+	wait_queue_entry_t wait;
+	int rc = 0;
+
+	vga_check_first_use();
+	/* The caller should check for this, but let's be sure */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+
+	for (;;) {
+		spin_lock_irqsave(&vga_lock, flags);
+		vgadev = vgadev_find(pdev);
+		if (vgadev == NULL) {
+			spin_unlock_irqrestore(&vga_lock, flags);
+			rc = -ENODEV;
+			break;
+		}
+		conflict = __vga_tryget(vgadev, rsrc);
+		spin_unlock_irqrestore(&vga_lock, flags);
+		if (conflict == NULL)
+			break;
+
+		/*
+		 * We have a conflict; we wait until somebody kicks the
+		 * work queue. Currently we have one work queue that we
+		 * kick each time some resources are released, but it would
+		 * be fairly easy to have a per-device one so that we only
+		 * need to attach to the conflicting device.
+		 */
+		init_waitqueue_entry(&wait, current);
+		add_wait_queue(&vga_wait_queue, &wait);
+		set_current_state(interruptible ?
+				  TASK_INTERRUPTIBLE :
+				  TASK_UNINTERRUPTIBLE);
+		if (interruptible && signal_pending(current)) {
+			__set_current_state(TASK_RUNNING);
+			remove_wait_queue(&vga_wait_queue, &wait);
+			rc = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+		remove_wait_queue(&vga_wait_queue, &wait);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(vga_get);
+
+/**
+ * vga_tryget - try to acquire & lock legacy VGA resources
+ * @pdev: PCI device of VGA card or NULL for system default
+ * @rsrc: bit mask of resources to acquire and lock
+ *
+ * Perform the same operation as vga_get(), but return an error (-EBUSY)
+ * instead of blocking if the resources are already locked by another card.
+ * Can be called in any context.
+ *
+ * On success, release the VGA resource again with vga_put().
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+static int vga_tryget(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	int rc = 0;
+
+	vga_check_first_use();
+
+	/* The caller should check for this, but let's be sure */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		rc = -ENODEV;
+		goto bail;
+	}
+	if (__vga_tryget(vgadev, rsrc))
+		rc = -EBUSY;
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return rc;
+}
+
+/**
+ * vga_put - release lock on legacy VGA resources
+ * @pdev: PCI device of VGA card or NULL for system default
+ * @rsrc: bit mask of resource to release
+ *
+ * Release resources previously locked by vga_get() or vga_tryget().  The
+ * resources aren't disabled right away, so that a subsequent vga_get() on
+ * the same card will succeed immediately.  Resources have a counter, so
+ * locks are only released if the counter reaches 0.
+ */
+void vga_put(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	/* The caller should check for this, but let's be sure */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+	__vga_put(vgadev, rsrc);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+EXPORT_SYMBOL(vga_put);
+
+static bool vga_is_firmware_default(struct pci_dev *pdev)
+{
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+	u64 base = screen_info.lfb_base;
+	u64 size = screen_info.lfb_size;
+	struct resource *r;
+	u64 limit;
+
+	/* Select the device owning the boot framebuffer if there is one */
+
+	if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		base |= (u64)screen_info.ext_lfb_base << 32;
+
+	limit = base + size;
+
+	/* Does firmware framebuffer belong to us? */
+	pci_dev_for_each_resource(pdev, r) {
+		if (resource_type(r) != IORESOURCE_MEM)
+			continue;
+
+		if (!r->start || !r->end)
+			continue;
+
+		if (base < r->start || limit >= r->end)
+			continue;
+
+		return true;
+	}
+#endif
+	return false;
+}
+
+static bool vga_arb_integrated_gpu(struct device *dev)
+{
+#if defined(CONFIG_ACPI)
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	return adev && !strcmp(acpi_device_hid(adev), ACPI_VIDEO_HID);
+#else
+	return false;
+#endif
+}
+
+/*
+ * Return true if vgadev is a better default VGA device than the best one
+ * we've seen so far.
+ */
+static bool vga_is_boot_device(struct vga_device *vgadev)
+{
+	struct vga_device *boot_vga = vgadev_find(vga_default_device());
+	struct pci_dev *pdev = vgadev->pdev;
+	u16 cmd, boot_cmd;
+
+	/*
+	 * We select the default VGA device in this order:
+	 *   Firmware framebuffer (see vga_arb_select_default_device())
+	 *   Legacy VGA device (owns VGA_RSRC_LEGACY_MASK)
+	 *   Non-legacy integrated device (see vga_arb_select_default_device())
+	 *   Non-legacy discrete device (see vga_arb_select_default_device())
+	 *   Other device (see vga_arb_select_default_device())
+	 */
+
+	/*
+	 * We always prefer a firmware default device, so if we've already
+	 * found one, there's no need to consider vgadev.
+	 */
+	if (boot_vga && boot_vga->is_firmware_default)
+		return false;
+
+	if (vga_is_firmware_default(pdev)) {
+		vgadev->is_firmware_default = true;
+		return true;
+	}
+
+	/*
+	 * A legacy VGA device has MEM and IO enabled and any bridges
+	 * leading to it have PCI_BRIDGE_CTL_VGA enabled so the legacy
+	 * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], etc) are
+	 * routed to it.
+	 *
+	 * We use the first one we find, so if we've already found one,
+	 * vgadev is no better.
+	 */
+	if (boot_vga &&
+	    (boot_vga->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK)
+		return false;
+
+	if ((vgadev->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK)
+		return true;
+
+	/*
+	 * If we haven't found a legacy VGA device, accept a non-legacy
+	 * device.  It may have either IO or MEM enabled, and bridges may
+	 * not have PCI_BRIDGE_CTL_VGA enabled, so it may not be able to
+	 * use legacy VGA resources.  Prefer an integrated GPU over others.
+	 */
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+
+		/*
+		 * An integrated GPU overrides a previous non-legacy
+		 * device.  We expect only a single integrated GPU, but if
+		 * there are more, we use the *last* because that was the
+		 * previous behavior.
+		 */
+		if (vga_arb_integrated_gpu(&pdev->dev))
+			return true;
+
+		/*
+		 * We prefer the first non-legacy discrete device we find.
+		 * If we already found one, vgadev is no better.
+		 */
+		if (boot_vga) {
+			pci_read_config_word(boot_vga->pdev, PCI_COMMAND,
+					     &boot_cmd);
+			if (boot_cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+				return false;
+		}
+		return true;
+	}
+
+	/*
+	 * Vgadev has neither IO nor MEM enabled.  If we haven't found any
+	 * other VGA devices, it is the best candidate so far.
+	 */
+	if (!boot_vga)
+		return true;
+
+	return false;
+}
+
+/*
+ * Rules for using a bridge to control a VGA descendant decoding: if a bridge
+ * has only one VGA descendant then it can be used to control the VGA routing
+ * for that device. It should always use the bridge closest to the device to
+ * control it. If a bridge has a direct VGA descendant, but also have a sub-
+ * bridge VGA descendant then we cannot use that bridge to control the direct
+ * VGA descendant. So for every device we register, we need to iterate all
+ * its parent bridges so we can invalidate any devices using them properly.
+ */
+static void vga_arbiter_check_bridge_sharing(struct vga_device *vgadev)
+{
+	struct vga_device *same_bridge_vgadev;
+	struct pci_bus *new_bus, *bus;
+	struct pci_dev *new_bridge, *bridge;
+
+	vgadev->bridge_has_one_vga = true;
+
+	if (list_empty(&vga_list)) {
+		vgaarb_info(&vgadev->pdev->dev, "bridge control possible\n");
+		return;
+	}
+
+	/* Iterate the new device's bridge hierarchy */
+	new_bus = vgadev->pdev->bus;
+	while (new_bus) {
+		new_bridge = new_bus->self;
+
+		/* Go through list of devices already registered */
+		list_for_each_entry(same_bridge_vgadev, &vga_list, list) {
+			bus = same_bridge_vgadev->pdev->bus;
+			bridge = bus->self;
+
+			/* See if it shares a bridge with this device */
+			if (new_bridge == bridge) {
+				/*
+				 * If its direct parent bridge is the same
+				 * as any bridge of this device then it can't
+				 * be used for that device.
+				 */
+				same_bridge_vgadev->bridge_has_one_vga = false;
+			}
+
+			/*
+			 * Now iterate the previous device's bridge hierarchy.
+			 * If the new device's parent bridge is in the other
+			 * device's hierarchy, we can't use it to control this
+			 * device.
+			 */
+			while (bus) {
+				bridge = bus->self;
+
+				if (bridge && bridge == vgadev->pdev->bus->self)
+					vgadev->bridge_has_one_vga = false;
+
+				bus = bus->parent;
+			}
+		}
+		new_bus = new_bus->parent;
+	}
+
+	if (vgadev->bridge_has_one_vga)
+		vgaarb_info(&vgadev->pdev->dev, "bridge control possible\n");
+	else
+		vgaarb_info(&vgadev->pdev->dev, "no bridge control possible\n");
+}
+
+/*
+ * Currently, we assume that the "initial" setup of the system is not sane,
+ * that is, we come up with conflicting devices and let the arbiter's
+ * client decide if devices decodes legacy things or not.
+ */
+static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+
+	/* Only deal with VGA class devices */
+	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return false;
+
+	/* Allocate structure */
+	vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
+	if (vgadev == NULL) {
+		vgaarb_err(&pdev->dev, "failed to allocate VGA arbiter data\n");
+		/*
+		 * What to do on allocation failure? For now, let's just do
+		 * nothing, I'm not sure there is anything saner to be done.
+		 */
+		return false;
+	}
+
+	/* Take lock & check for duplicates */
+	spin_lock_irqsave(&vga_lock, flags);
+	if (vgadev_find(pdev) != NULL) {
+		BUG_ON(1);
+		goto fail;
+	}
+	vgadev->pdev = pdev;
+
+	/* By default, assume we decode everything */
+	vgadev->decodes = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+			  VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+	/* By default, mark it as decoding */
+	vga_decode_count++;
+
+	/*
+	 * Mark that we "own" resources based on our enables, we will
+	 * clear that below if the bridge isn't forwarding.
+	 */
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_IO)
+		vgadev->owns |= VGA_RSRC_LEGACY_IO;
+	if (cmd & PCI_COMMAND_MEMORY)
+		vgadev->owns |= VGA_RSRC_LEGACY_MEM;
+
+	/* Check if VGA cycles can get down to us */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			u16 l;
+
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &l);
+			if (!(l & PCI_BRIDGE_CTL_VGA)) {
+				vgadev->owns = 0;
+				break;
+			}
+		}
+		bus = bus->parent;
+	}
+
+	if (vga_is_boot_device(vgadev)) {
+		vgaarb_info(&pdev->dev, "setting as boot VGA device%s\n",
+			    vga_default_device() ?
+			    " (overriding previous)" : "");
+		vga_set_default_device(pdev);
+	}
+
+	vga_arbiter_check_bridge_sharing(vgadev);
+
+	/* Add to the list */
+	list_add_tail(&vgadev->list, &vga_list);
+	vga_count++;
+	vgaarb_info(&pdev->dev, "VGA device added: decodes=%s,owns=%s,locks=%s\n",
+		vga_iostate_to_str(vgadev->decodes),
+		vga_iostate_to_str(vgadev->owns),
+		vga_iostate_to_str(vgadev->locks));
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return true;
+fail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return false;
+}
+
+static bool vga_arbiter_del_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		ret = false;
+		goto bail;
+	}
+
+	if (vga_default == pdev)
+		vga_set_default_device(NULL);
+
+	if (vgadev->decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
+		vga_decode_count--;
+
+	/* Remove entry from list */
+	list_del(&vgadev->list);
+	vga_count--;
+
+	/* Wake up all possible waiters */
+	wake_up_all(&vga_wait_queue);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return ret;
+}
+
+/* Called with the lock */
+static void vga_update_device_decodes(struct vga_device *vgadev,
+				      unsigned int new_decodes)
+{
+	struct device *dev = &vgadev->pdev->dev;
+	unsigned int old_decodes = vgadev->decodes;
+	unsigned int decodes_removed = ~new_decodes & old_decodes;
+	unsigned int decodes_unlocked = vgadev->locks & decodes_removed;
+
+	vgadev->decodes = new_decodes;
+
+	vgaarb_info(dev, "VGA decodes changed: olddecodes=%s,decodes=%s:owns=%s\n",
+		    vga_iostate_to_str(old_decodes),
+		    vga_iostate_to_str(vgadev->decodes),
+		    vga_iostate_to_str(vgadev->owns));
+
+	/* If we removed locked decodes, lock count goes to zero, and release */
+	if (decodes_unlocked) {
+		if (decodes_unlocked & VGA_RSRC_LEGACY_IO)
+			vgadev->io_lock_cnt = 0;
+		if (decodes_unlocked & VGA_RSRC_LEGACY_MEM)
+			vgadev->mem_lock_cnt = 0;
+		__vga_put(vgadev, decodes_unlocked);
+	}
+
+	/* Change decodes counter */
+	if (old_decodes & VGA_RSRC_LEGACY_MASK &&
+	    !(new_decodes & VGA_RSRC_LEGACY_MASK))
+		vga_decode_count--;
+	if (!(old_decodes & VGA_RSRC_LEGACY_MASK) &&
+	    new_decodes & VGA_RSRC_LEGACY_MASK)
+		vga_decode_count++;
+	vgaarb_dbg(dev, "decoding count now is: %d\n", vga_decode_count);
+}
+
+static void __vga_set_legacy_decoding(struct pci_dev *pdev,
+				      unsigned int decodes,
+				      bool userspace)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	decodes &= VGA_RSRC_LEGACY_MASK;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+
+	/* Don't let userspace futz with kernel driver decodes */
+	if (userspace && vgadev->set_decode)
+		goto bail;
+
+	/* Update the device decodes + counter */
+	vga_update_device_decodes(vgadev, decodes);
+
+	/*
+	 * XXX If somebody is going from "doesn't decode" to "decodes"
+	 * state here, additional care must be taken as we may have pending
+	 * ownership of non-legacy region.
+	 */
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+/**
+ * vga_set_legacy_decoding
+ * @pdev: PCI device of the VGA card
+ * @decodes: bit mask of what legacy regions the card decodes
+ *
+ * Indicate to the arbiter if the card decodes legacy VGA IOs, legacy VGA
+ * Memory, both, or none. All cards default to both, the card driver (fbdev for
+ * example) should tell the arbiter if it has disabled legacy decoding, so the
+ * card can be left out of the arbitration process (and can be safe to take
+ * interrupts at any time.
+ */
+void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
+{
+	__vga_set_legacy_decoding(pdev, decodes, false);
+}
+EXPORT_SYMBOL(vga_set_legacy_decoding);
+
+/**
+ * vga_client_register - register or unregister a VGA arbitration client
+ * @pdev: PCI device of the VGA client
+ * @set_decode: VGA decode change callback
+ *
+ * Clients have two callback mechanisms they can use.
+ *
+ * @set_decode callback: If a client can disable its GPU VGA resource, it
+ * will get a callback from this to set the encode/decode state.
+ *
+ * Rationale: we cannot disable VGA decode resources unconditionally
+ * because some single GPU laptops seem to require ACPI or BIOS access to
+ * the VGA registers to control things like backlights etc. Hopefully newer
+ * multi-GPU laptops do something saner, and desktops won't have any
+ * special ACPI for this. The driver will get a callback when VGA
+ * arbitration is first used by userspace since some older X servers have
+ * issues.
+ *
+ * Does not check whether a client for @pdev has been registered already.
+ *
+ * To unregister, call vga_client_unregister().
+ *
+ * Returns: 0 on success, -ENODEV on failure
+ */
+int vga_client_register(struct pci_dev *pdev,
+		unsigned int (*set_decode)(struct pci_dev *pdev, bool decode))
+{
+	unsigned long flags;
+	struct vga_device *vgadev;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev)
+		vgadev->set_decode = set_decode;
+	spin_unlock_irqrestore(&vga_lock, flags);
+	if (!vgadev)
+		return -ENODEV;
+	return 0;
+}
+EXPORT_SYMBOL(vga_client_register);
+
+/*
+ * Char driver implementation
+ *
+ * Semantics is:
+ *
+ *  open       : Open user instance of the arbiter. By default, it's
+ *                attached to the default VGA device of the system.
+ *
+ *  close      : Close user instance, release locks
+ *
+ *  read       : Return a string indicating the status of the target.
+ *                An IO state string is of the form {io,mem,io+mem,none},
+ *                mc and ic are respectively mem and io lock counts (for
+ *                debugging/diagnostic only). "decodes" indicate what the
+ *                card currently decodes, "owns" indicates what is currently
+ *                enabled on it, and "locks" indicates what is locked by this
+ *                card. If the card is unplugged, we get "invalid" then for
+ *                card_ID and an -ENODEV error is returned for any command
+ *                until a new card is targeted
+ *
+ *   "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+ *
+ * write       : write a command to the arbiter. List of commands is:
+ *
+ *   target <card_ID>   : switch target to card <card_ID> (see below)
+ *   lock <io_state>    : acquire locks on target ("none" is invalid io_state)
+ *   trylock <io_state> : non-blocking acquire locks on target
+ *   unlock <io_state>  : release locks on target
+ *   unlock all         : release all locks on target held by this user
+ *   decodes <io_state> : set the legacy decoding attributes for the card
+ *
+ * poll         : event if something change on any card (not just the target)
+ *
+ * card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ * to go back to the system default card (TODO: not implemented yet).
+ * Currently, only PCI is supported as a prefix, but the userland API may
+ * support other bus types in the future, even if the current kernel
+ * implementation doesn't.
+ *
+ * Note about locks:
+ *
+ * The driver keeps track of which user has what locks on which card. It
+ * supports stacking, like the kernel one. This complicates the implementation
+ * a bit, but makes the arbiter more tolerant to userspace problems and able
+ * to properly cleanup in all cases when a process dies.
+ * Currently, a max of 16 cards simultaneously can have locks issued from
+ * userspace for a given user (file descriptor instance) of the arbiter.
+ *
+ * If the device is hot-unplugged, there is a hook inside the module to notify
+ * it being added/removed in the system and automatically added/removed in
+ * the arbiter.
+ */
+
+#define MAX_USER_CARDS         CONFIG_VGA_ARB_MAX_GPUS
+#define PCI_INVALID_CARD       ((struct pci_dev *)-1UL)
+
+/* Each user has an array of these, tracking which cards have locks */
+struct vga_arb_user_card {
+	struct pci_dev *pdev;
+	unsigned int mem_cnt;
+	unsigned int io_cnt;
+};
+
+struct vga_arb_private {
+	struct list_head list;
+	struct pci_dev *target;
+	struct vga_arb_user_card cards[MAX_USER_CARDS];
+	spinlock_t lock;
+};
+
+static LIST_HEAD(vga_user_list);
+static DEFINE_SPINLOCK(vga_user_lock);
+
+
+/*
+ * Take a string in the format: "PCI:domain:bus:dev.fn" and return the
+ * respective values. If the string is not in this format, return 0.
+ */
+static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain,
+			       unsigned int *bus, unsigned int *devfn)
+{
+	int n;
+	unsigned int slot, func;
+
+	n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, &slot, &func);
+	if (n != 4)
+		return 0;
+
+	*devfn = PCI_DEVFN(slot, func);
+
+	return 1;
+}
+
+static ssize_t vga_arb_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_device *vgadev;
+	struct pci_dev *pdev;
+	unsigned long flags;
+	size_t len;
+	int rc;
+	char *lbuf;
+
+	lbuf = kmalloc(1024, GFP_KERNEL);
+	if (lbuf == NULL)
+		return -ENOMEM;
+
+	/* Protect vga_list */
+	spin_lock_irqsave(&vga_lock, flags);
+
+	/* If we are targeting the default, use it */
+	pdev = priv->target;
+	if (pdev == NULL || pdev == PCI_INVALID_CARD) {
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Find card vgadev structure */
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		/*
+		 * Wow, it's not in the list, that shouldn't happen, let's
+		 * fix us up and return invalid card.
+		 */
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Fill the buffer with info */
+	len = snprintf(lbuf, 1024,
+		       "count:%d,PCI:%s,decodes=%s,owns=%s,locks=%s(%u:%u)\n",
+		       vga_decode_count, pci_name(pdev),
+		       vga_iostate_to_str(vgadev->decodes),
+		       vga_iostate_to_str(vgadev->owns),
+		       vga_iostate_to_str(vgadev->locks),
+		       vgadev->io_lock_cnt, vgadev->mem_lock_cnt);
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+done:
+
+	/* Copy that to user */
+	if (len > count)
+		len = count;
+	rc = copy_to_user(buf, lbuf, len);
+	kfree(lbuf);
+	if (rc)
+		return -EFAULT;
+	return len;
+}
+
+/*
+ * TODO: To avoid parsing inside kernel and to improve the speed we may
+ * consider use ioctl here
+ */
+static ssize_t vga_arb_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc = NULL;
+	struct pci_dev *pdev;
+
+	unsigned int io_state;
+
+	char kbuf[64], *curr_pos;
+	size_t remaining = count;
+
+	int ret_val;
+	int i;
+
+	if (count >= sizeof(kbuf))
+		return -EINVAL;
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+	curr_pos = kbuf;
+	kbuf[count] = '\0';
+
+	if (strncmp(curr_pos, "lock ", 5) == 0) {
+		curr_pos += 5;
+		remaining -= 5;
+
+		pr_debug("client 0x%p called 'lock'\n", priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		if (io_state == VGA_RSRC_NONE) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		vga_get_uninterruptible(pdev, io_state);
+
+		/* Update the client's locks lists */
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev) {
+				if (io_state & VGA_RSRC_LEGACY_IO)
+					priv->cards[i].io_cnt++;
+				if (io_state & VGA_RSRC_LEGACY_MEM)
+					priv->cards[i].mem_cnt++;
+				break;
+			}
+		}
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "unlock ", 7) == 0) {
+		curr_pos += 7;
+		remaining -= 7;
+
+		pr_debug("client 0x%p called 'unlock'\n", priv);
+
+		if (strncmp(curr_pos, "all", 3) == 0)
+			io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		else {
+			if (!vga_str_to_iostate
+			    (curr_pos, remaining, &io_state)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+			/* TODO: Add this?
+			   if (io_state == VGA_RSRC_NONE) {
+			   ret_val = -EPROTO;
+			   goto done;
+			   }
+			  */
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				uc = &priv->cards[i];
+		}
+
+		if (!uc) {
+			ret_val = -EINVAL;
+			goto done;
+		}
+
+		if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0) {
+			ret_val = -EINVAL;
+			goto done;
+		}
+
+		if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0) {
+			ret_val = -EINVAL;
+			goto done;
+		}
+
+		vga_put(pdev, io_state);
+
+		if (io_state & VGA_RSRC_LEGACY_IO)
+			uc->io_cnt--;
+		if (io_state & VGA_RSRC_LEGACY_MEM)
+			uc->mem_cnt--;
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "trylock ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+
+		pr_debug("client 0x%p called 'trylock'\n", priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		/* TODO: Add this?
+		   if (io_state == VGA_RSRC_NONE) {
+		   ret_val = -EPROTO;
+		   goto done;
+		   }
+		 */
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		if (vga_tryget(pdev, io_state)) {
+			/* Update the client's locks lists... */
+			for (i = 0; i < MAX_USER_CARDS; i++) {
+				if (priv->cards[i].pdev == pdev) {
+					if (io_state & VGA_RSRC_LEGACY_IO)
+						priv->cards[i].io_cnt++;
+					if (io_state & VGA_RSRC_LEGACY_MEM)
+						priv->cards[i].mem_cnt++;
+					break;
+				}
+			}
+			ret_val = count;
+			goto done;
+		} else {
+			ret_val = -EBUSY;
+			goto done;
+		}
+
+	} else if (strncmp(curr_pos, "target ", 7) == 0) {
+		unsigned int domain, bus, devfn;
+		struct vga_device *vgadev;
+
+		curr_pos += 7;
+		remaining -= 7;
+		pr_debug("client 0x%p called 'target'\n", priv);
+		/* If target is default */
+		if (!strncmp(curr_pos, "default", 7))
+			pdev = pci_dev_get(vga_default_device());
+		else {
+			if (!vga_pci_str_to_vars(curr_pos, remaining,
+						 &domain, &bus, &devfn)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+			pdev = pci_get_domain_bus_and_slot(domain, bus, devfn);
+			if (!pdev) {
+				pr_debug("invalid PCI address %04x:%02x:%02x.%x\n",
+					 domain, bus, PCI_SLOT(devfn),
+					 PCI_FUNC(devfn));
+				ret_val = -ENODEV;
+				goto done;
+			}
+
+			pr_debug("%s ==> %04x:%02x:%02x.%x pdev %p\n", curr_pos,
+				domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+				pdev);
+		}
+
+		vgadev = vgadev_find(pdev);
+		pr_debug("vgadev %p\n", vgadev);
+		if (vgadev == NULL) {
+			if (pdev) {
+				vgaarb_dbg(&pdev->dev, "not a VGA device\n");
+				pci_dev_put(pdev);
+			}
+
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		priv->target = pdev;
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				break;
+			if (priv->cards[i].pdev == NULL) {
+				priv->cards[i].pdev = pdev;
+				priv->cards[i].io_cnt = 0;
+				priv->cards[i].mem_cnt = 0;
+				break;
+			}
+		}
+		if (i == MAX_USER_CARDS) {
+			vgaarb_dbg(&pdev->dev, "maximum user cards (%d) number reached, ignoring this one!\n",
+				MAX_USER_CARDS);
+			pci_dev_put(pdev);
+			/* XXX: Which value to return? */
+			ret_val =  -ENOMEM;
+			goto done;
+		}
+
+		ret_val = count;
+		pci_dev_put(pdev);
+		goto done;
+
+
+	} else if (strncmp(curr_pos, "decodes ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+		pr_debug("client 0x%p called 'decodes'\n", priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		__vga_set_legacy_decoding(pdev, io_state, true);
+		ret_val = count;
+		goto done;
+	}
+	/* If we got here, the message written is not part of the protocol! */
+	return -EPROTO;
+
+done:
+	return ret_val;
+}
+
+static __poll_t vga_arb_fpoll(struct file *file, poll_table *wait)
+{
+	pr_debug("%s\n", __func__);
+
+	poll_wait(file, &vga_wait_queue, wait);
+	return EPOLLIN;
+}
+
+static int vga_arb_open(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv;
+	unsigned long flags;
+
+	pr_debug("%s\n", __func__);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+	spin_lock_init(&priv->lock);
+	file->private_data = priv;
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_add(&priv->list, &vga_user_list);
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	/* Set the client's lists of locks */
+	priv->target = vga_default_device(); /* Maybe this is still null! */
+	priv->cards[0].pdev = priv->target;
+	priv->cards[0].io_cnt = 0;
+	priv->cards[0].mem_cnt = 0;
+
+	return 0;
+}
+
+static int vga_arb_release(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc;
+	unsigned long flags;
+	int i;
+
+	pr_debug("%s\n", __func__);
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_del(&priv->list);
+	for (i = 0; i < MAX_USER_CARDS; i++) {
+		uc = &priv->cards[i];
+		if (uc->pdev == NULL)
+			continue;
+		vgaarb_dbg(&uc->pdev->dev, "uc->io_cnt == %d, uc->mem_cnt == %d\n",
+			uc->io_cnt, uc->mem_cnt);
+		while (uc->io_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_IO);
+		while (uc->mem_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_MEM);
+	}
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	kfree(priv);
+
+	return 0;
+}
+
+/*
+ * Callback any registered clients to let them know we have a change in VGA
+ * cards.
+ */
+static void vga_arbiter_notify_clients(void)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	unsigned int new_decodes;
+	bool new_state;
+
+	if (!vga_arbiter_used)
+		return;
+
+	new_state = (vga_count > 1) ? false : true;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	list_for_each_entry(vgadev, &vga_list, list) {
+		if (vgadev->set_decode) {
+			new_decodes = vgadev->set_decode(vgadev->pdev,
+							 new_state);
+			vga_update_device_decodes(vgadev, new_decodes);
+		}
+	}
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+static int pci_notify(struct notifier_block *nb, unsigned long action,
+		      void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool notify = false;
+
+	vgaarb_dbg(dev, "%s\n", __func__);
+
+	/*
+	 * For now, we're only interested in devices added and removed.
+	 * I didn't test this thing here, so someone needs to double check
+	 * for the cases of hot-pluggable VGA cards.
+	 */
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		notify = vga_arbiter_add_pci_device(pdev);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		notify = vga_arbiter_del_pci_device(pdev);
+
+	if (notify)
+		vga_arbiter_notify_clients();
+	return 0;
+}
+
+static struct notifier_block pci_notifier = {
+	.notifier_call = pci_notify,
+};
+
+static const struct file_operations vga_arb_device_fops = {
+	.read = vga_arb_read,
+	.write = vga_arb_write,
+	.poll = vga_arb_fpoll,
+	.open = vga_arb_open,
+	.release = vga_arb_release,
+	.llseek = noop_llseek,
+};
+
+static struct miscdevice vga_arb_device = {
+	MISC_DYNAMIC_MINOR, "vga_arbiter", &vga_arb_device_fops
+};
+
+static int __init vga_arb_device_init(void)
+{
+	int rc;
+	struct pci_dev *pdev;
+
+	rc = misc_register(&vga_arb_device);
+	if (rc < 0)
+		pr_err("error %d registering device\n", rc);
+
+	bus_register_notifier(&pci_bus_type, &pci_notifier);
+
+	/* Add all VGA class PCI devices by default */
+	pdev = NULL;
+	while ((pdev =
+		pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+			       PCI_ANY_ID, pdev)) != NULL)
+		vga_arbiter_add_pci_device(pdev);
+
+	pr_info("loaded\n");
+	return rc;
+}
+subsys_initcall_sync(vga_arb_device_init);
diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c
new file mode 100644
index 000000000..485a642b9
--- /dev/null
+++ b/drivers/pci/vpd.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI VPD support
+ *
+ * Copyright (C) 2010 Broadcom Corporation.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/sched/signal.h>
+#include <asm/unaligned.h>
+#include "pci.h"
+
+#define PCI_VPD_LRDT_TAG_SIZE		3
+#define PCI_VPD_SRDT_LEN_MASK		0x07
+#define PCI_VPD_SRDT_TAG_SIZE		1
+#define PCI_VPD_STIN_END		0x0f
+#define PCI_VPD_INFO_FLD_HDR_SIZE	3
+
+static u16 pci_vpd_lrdt_size(const u8 *lrdt)
+{
+	return get_unaligned_le16(lrdt + 1);
+}
+
+static u8 pci_vpd_srdt_tag(const u8 *srdt)
+{
+	return *srdt >> 3;
+}
+
+static u8 pci_vpd_srdt_size(const u8 *srdt)
+{
+	return *srdt & PCI_VPD_SRDT_LEN_MASK;
+}
+
+static u8 pci_vpd_info_field_size(const u8 *info_field)
+{
+	return info_field[2];
+}
+
+/* VPD access through PCI 2.2+ VPD capability */
+
+static struct pci_dev *pci_get_func0_dev(struct pci_dev *dev)
+{
+	return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+}
+
+#define PCI_VPD_MAX_SIZE	(PCI_VPD_ADDR_MASK + 1)
+#define PCI_VPD_SZ_INVALID	UINT_MAX
+
+/**
+ * pci_vpd_size - determine actual size of Vital Product Data
+ * @dev:	pci device struct
+ */
+static size_t pci_vpd_size(struct pci_dev *dev)
+{
+	size_t off = 0, size;
+	unsigned char tag, header[1+2];	/* 1 byte tag, 2 bytes length */
+
+	while (pci_read_vpd_any(dev, off, 1, header) == 1) {
+		size = 0;
+
+		if (off == 0 && (header[0] == 0x00 || header[0] == 0xff))
+			goto error;
+
+		if (header[0] & PCI_VPD_LRDT) {
+			/* Large Resource Data Type Tag */
+			if (pci_read_vpd_any(dev, off + 1, 2, &header[1]) != 2) {
+				pci_warn(dev, "failed VPD read at offset %zu\n",
+					 off + 1);
+				return off ?: PCI_VPD_SZ_INVALID;
+			}
+			size = pci_vpd_lrdt_size(header);
+			if (off + size > PCI_VPD_MAX_SIZE)
+				goto error;
+
+			off += PCI_VPD_LRDT_TAG_SIZE + size;
+		} else {
+			/* Short Resource Data Type Tag */
+			tag = pci_vpd_srdt_tag(header);
+			size = pci_vpd_srdt_size(header);
+			if (off + size > PCI_VPD_MAX_SIZE)
+				goto error;
+
+			off += PCI_VPD_SRDT_TAG_SIZE + size;
+			if (tag == PCI_VPD_STIN_END)	/* End tag descriptor */
+				return off;
+		}
+	}
+	return off;
+
+error:
+	pci_info(dev, "invalid VPD tag %#04x (size %zu) at offset %zu%s\n",
+		 header[0], size, off, off == 0 ?
+		 "; assume missing optional EEPROM" : "");
+	return off ?: PCI_VPD_SZ_INVALID;
+}
+
+static bool pci_vpd_available(struct pci_dev *dev, bool check_size)
+{
+	struct pci_vpd *vpd = &dev->vpd;
+
+	if (!vpd->cap)
+		return false;
+
+	if (vpd->len == 0 && check_size) {
+		vpd->len = pci_vpd_size(dev);
+		if (vpd->len == PCI_VPD_SZ_INVALID) {
+			vpd->cap = 0;
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * Wait for last operation to complete.
+ * This code has to spin since there is no other notification from the PCI
+ * hardware. Since the VPD is often implemented by serial attachment to an
+ * EEPROM, it may take many milliseconds to complete.
+ * @set: if true wait for flag to be set, else wait for it to be cleared
+ *
+ * Returns 0 on success, negative values indicate error.
+ */
+static int pci_vpd_wait(struct pci_dev *dev, bool set)
+{
+	struct pci_vpd *vpd = &dev->vpd;
+	unsigned long timeout = jiffies + msecs_to_jiffies(125);
+	unsigned long max_sleep = 16;
+	u16 status;
+	int ret;
+
+	do {
+		ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR,
+						&status);
+		if (ret < 0)
+			return ret;
+
+		if (!!(status & PCI_VPD_ADDR_F) == set)
+			return 0;
+
+		if (time_after(jiffies, timeout))
+			break;
+
+		usleep_range(10, max_sleep);
+		if (max_sleep < 1024)
+			max_sleep *= 2;
+	} while (true);
+
+	pci_warn(dev, "VPD access failed.  This is likely a firmware bug on this device.  Contact the card vendor for a firmware update\n");
+	return -ETIMEDOUT;
+}
+
+static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
+			    void *arg, bool check_size)
+{
+	struct pci_vpd *vpd = &dev->vpd;
+	unsigned int max_len;
+	int ret = 0;
+	loff_t end = pos + count;
+	u8 *buf = arg;
+
+	if (!pci_vpd_available(dev, check_size))
+		return -ENODEV;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	max_len = check_size ? vpd->len : PCI_VPD_MAX_SIZE;
+
+	if (pos >= max_len)
+		return 0;
+
+	if (end > max_len) {
+		end = max_len;
+		count = end - pos;
+	}
+
+	if (mutex_lock_killable(&vpd->lock))
+		return -EINTR;
+
+	while (pos < end) {
+		u32 val;
+		unsigned int i, skip;
+
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
+						 pos & ~3);
+		if (ret < 0)
+			break;
+		ret = pci_vpd_wait(dev, true);
+		if (ret < 0)
+			break;
+
+		ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA, &val);
+		if (ret < 0)
+			break;
+
+		skip = pos & 3;
+		for (i = 0;  i < sizeof(u32); i++) {
+			if (i >= skip) {
+				*buf++ = val;
+				if (++pos == end)
+					break;
+			}
+			val >>= 8;
+		}
+	}
+
+	mutex_unlock(&vpd->lock);
+	return ret ? ret : count;
+}
+
+static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count,
+			     const void *arg, bool check_size)
+{
+	struct pci_vpd *vpd = &dev->vpd;
+	unsigned int max_len;
+	const u8 *buf = arg;
+	loff_t end = pos + count;
+	int ret = 0;
+
+	if (!pci_vpd_available(dev, check_size))
+		return -ENODEV;
+
+	if (pos < 0 || (pos & 3) || (count & 3))
+		return -EINVAL;
+
+	max_len = check_size ? vpd->len : PCI_VPD_MAX_SIZE;
+
+	if (end > max_len)
+		return -EINVAL;
+
+	if (mutex_lock_killable(&vpd->lock))
+		return -EINTR;
+
+	while (pos < end) {
+		ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA,
+						  get_unaligned_le32(buf));
+		if (ret < 0)
+			break;
+		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
+						 pos | PCI_VPD_ADDR_F);
+		if (ret < 0)
+			break;
+
+		ret = pci_vpd_wait(dev, false);
+		if (ret < 0)
+			break;
+
+		buf += sizeof(u32);
+		pos += sizeof(u32);
+	}
+
+	mutex_unlock(&vpd->lock);
+	return ret ? ret : count;
+}
+
+void pci_vpd_init(struct pci_dev *dev)
+{
+	if (dev->vpd.len == PCI_VPD_SZ_INVALID)
+		return;
+
+	dev->vpd.cap = pci_find_capability(dev, PCI_CAP_ID_VPD);
+	mutex_init(&dev->vpd.lock);
+}
+
+static ssize_t vpd_read(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr, char *buf, loff_t off,
+			size_t count)
+{
+	struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
+	struct pci_dev *vpd_dev = dev;
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		vpd_dev = pci_get_func0_dev(dev);
+		if (!vpd_dev)
+			return -ENODEV;
+	}
+
+	pci_config_pm_runtime_get(vpd_dev);
+	ret = pci_read_vpd(vpd_dev, off, count, buf);
+	pci_config_pm_runtime_put(vpd_dev);
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0)
+		pci_dev_put(vpd_dev);
+
+	return ret;
+}
+
+static ssize_t vpd_write(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *bin_attr, char *buf, loff_t off,
+			 size_t count)
+{
+	struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
+	struct pci_dev *vpd_dev = dev;
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		vpd_dev = pci_get_func0_dev(dev);
+		if (!vpd_dev)
+			return -ENODEV;
+	}
+
+	pci_config_pm_runtime_get(vpd_dev);
+	ret = pci_write_vpd(vpd_dev, off, count, buf);
+	pci_config_pm_runtime_put(vpd_dev);
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0)
+		pci_dev_put(vpd_dev);
+
+	return ret;
+}
+static BIN_ATTR(vpd, 0600, vpd_read, vpd_write, 0);
+
+static struct bin_attribute *vpd_attrs[] = {
+	&bin_attr_vpd,
+	NULL,
+};
+
+static umode_t vpd_attr_is_visible(struct kobject *kobj,
+				   struct bin_attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pdev->vpd.cap)
+		return 0;
+
+	return a->attr.mode;
+}
+
+const struct attribute_group pci_dev_vpd_attr_group = {
+	.bin_attrs = vpd_attrs,
+	.is_bin_visible = vpd_attr_is_visible,
+};
+
+void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size)
+{
+	unsigned int len;
+	void *buf;
+	int cnt;
+
+	if (!pci_vpd_available(dev, true))
+		return ERR_PTR(-ENODEV);
+
+	len = dev->vpd.len;
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	cnt = pci_read_vpd(dev, 0, len, buf);
+	if (cnt != len) {
+		kfree(buf);
+		return ERR_PTR(-EIO);
+	}
+
+	if (size)
+		*size = len;
+
+	return buf;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_alloc);
+
+static int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt, unsigned int *size)
+{
+	int i = 0;
+
+	/* look for LRDT tags only, end tag is the only SRDT tag */
+	while (i + PCI_VPD_LRDT_TAG_SIZE <= len && buf[i] & PCI_VPD_LRDT) {
+		unsigned int lrdt_len = pci_vpd_lrdt_size(buf + i);
+		u8 tag = buf[i];
+
+		i += PCI_VPD_LRDT_TAG_SIZE;
+		if (tag == rdt) {
+			if (i + lrdt_len > len)
+				lrdt_len = len - i;
+			if (size)
+				*size = lrdt_len;
+			return i;
+		}
+
+		i += lrdt_len;
+	}
+
+	return -ENOENT;
+}
+
+int pci_vpd_find_id_string(const u8 *buf, unsigned int len, unsigned int *size)
+{
+	return pci_vpd_find_tag(buf, len, PCI_VPD_LRDT_ID_STRING, size);
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_id_string);
+
+static int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
+			      unsigned int len, const char *kw)
+{
+	int i;
+
+	for (i = off; i + PCI_VPD_INFO_FLD_HDR_SIZE <= off + len;) {
+		if (buf[i + 0] == kw[0] &&
+		    buf[i + 1] == kw[1])
+			return i;
+
+		i += PCI_VPD_INFO_FLD_HDR_SIZE +
+		     pci_vpd_info_field_size(&buf[i]);
+	}
+
+	return -ENOENT;
+}
+
+static ssize_t __pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf,
+			      bool check_size)
+{
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		dev = pci_get_func0_dev(dev);
+		if (!dev)
+			return -ENODEV;
+
+		ret = pci_vpd_read(dev, pos, count, buf, check_size);
+		pci_dev_put(dev);
+		return ret;
+	}
+
+	return pci_vpd_read(dev, pos, count, buf, check_size);
+}
+
+/**
+ * pci_read_vpd - Read one entry from Vital Product Data
+ * @dev:	PCI device struct
+ * @pos:	offset in VPD space
+ * @count:	number of bytes to read
+ * @buf:	pointer to where to store result
+ */
+ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	return __pci_read_vpd(dev, pos, count, buf, true);
+}
+EXPORT_SYMBOL(pci_read_vpd);
+
+/* Same, but allow to access any address */
+ssize_t pci_read_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	return __pci_read_vpd(dev, pos, count, buf, false);
+}
+EXPORT_SYMBOL(pci_read_vpd_any);
+
+static ssize_t __pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count,
+			       const void *buf, bool check_size)
+{
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		dev = pci_get_func0_dev(dev);
+		if (!dev)
+			return -ENODEV;
+
+		ret = pci_vpd_write(dev, pos, count, buf, check_size);
+		pci_dev_put(dev);
+		return ret;
+	}
+
+	return pci_vpd_write(dev, pos, count, buf, check_size);
+}
+
+/**
+ * pci_write_vpd - Write entry to Vital Product Data
+ * @dev:	PCI device struct
+ * @pos:	offset in VPD space
+ * @count:	number of bytes to write
+ * @buf:	buffer containing write data
+ */
+ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+{
+	return __pci_write_vpd(dev, pos, count, buf, true);
+}
+EXPORT_SYMBOL(pci_write_vpd);
+
+/* Same, but allow to access any address */
+ssize_t pci_write_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+{
+	return __pci_write_vpd(dev, pos, count, buf, false);
+}
+EXPORT_SYMBOL(pci_write_vpd_any);
+
+int pci_vpd_find_ro_info_keyword(const void *buf, unsigned int len,
+				 const char *kw, unsigned int *size)
+{
+	int ro_start, infokw_start;
+	unsigned int ro_len, infokw_size;
+
+	ro_start = pci_vpd_find_tag(buf, len, PCI_VPD_LRDT_RO_DATA, &ro_len);
+	if (ro_start < 0)
+		return ro_start;
+
+	infokw_start = pci_vpd_find_info_keyword(buf, ro_start, ro_len, kw);
+	if (infokw_start < 0)
+		return infokw_start;
+
+	infokw_size = pci_vpd_info_field_size(buf + infokw_start);
+	infokw_start += PCI_VPD_INFO_FLD_HDR_SIZE;
+
+	if (infokw_start + infokw_size > len)
+		return -EINVAL;
+
+	if (size)
+		*size = infokw_size;
+
+	return infokw_start;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_ro_info_keyword);
+
+int pci_vpd_check_csum(const void *buf, unsigned int len)
+{
+	const u8 *vpd = buf;
+	unsigned int size;
+	u8 csum = 0;
+	int rv_start;
+
+	rv_start = pci_vpd_find_ro_info_keyword(buf, len, PCI_VPD_RO_KEYWORD_CHKSUM, &size);
+	if (rv_start == -ENOENT) /* no checksum in VPD */
+		return 1;
+	else if (rv_start < 0)
+		return rv_start;
+
+	if (!size)
+		return -EINVAL;
+
+	while (rv_start >= 0)
+		csum += vpd[rv_start--];
+
+	return csum ? -EILSEQ : 0;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_check_csum);
+
+#ifdef CONFIG_PCI_QUIRKS
+/*
+ * Quirk non-zero PCI functions to route VPD access through function 0 for
+ * devices that share VPD resources between functions.  The functions are
+ * expected to be identical devices.
+ */
+static void quirk_f0_vpd_link(struct pci_dev *dev)
+{
+	struct pci_dev *f0;
+
+	if (!PCI_FUNC(dev->devfn))
+		return;
+
+	f0 = pci_get_func0_dev(dev);
+	if (!f0)
+		return;
+
+	if (f0->vpd.cap && dev->class == f0->class &&
+	    dev->vendor == f0->vendor && dev->device == f0->device)
+		dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0;
+
+	pci_dev_put(f0);
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+			      PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link);
+
+/*
+ * If a device follows the VPD format spec, the PCI core will not read or
+ * write past the VPD End Tag.  But some vendors do not follow the VPD
+ * format spec, so we can't tell how much data is safe to access.  Devices
+ * may behave unpredictably if we access too much.  Blacklist these devices
+ * so we don't touch VPD at all.
+ */
+static void quirk_blacklist_vpd(struct pci_dev *dev)
+{
+	dev->vpd.len = PCI_VPD_SZ_INVALID;
+	pci_warn(dev, FW_BUG "disabling VPD access (can't determine size of non-standard VPD format)\n");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, quirk_blacklist_vpd);
+/*
+ * The Amazon Annapurna Labs 0x0031 device id is reused for other non Root Port
+ * device types, so the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
+ */
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+			       PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
+
+static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
+{
+	int chip = (dev->device & 0xf000) >> 12;
+	int func = (dev->device & 0x0f00) >>  8;
+	int prod = (dev->device & 0x00ff) >>  0;
+
+	/*
+	 * If this is a T3-based adapter, there's a 1KB VPD area at offset
+	 * 0xc00 which contains the preferred VPD values.  If this is a T4 or
+	 * later based adapter, the special VPD is at offset 0x400 for the
+	 * Physical Functions (the SR-IOV Virtual Functions have no VPD
+	 * Capabilities).  The PCI VPD Access core routines will normally
+	 * compute the size of the VPD by parsing the VPD Data Structure at
+	 * offset 0x000.  This will result in silent failures when attempting
+	 * to accesses these other VPD areas which are beyond those computed
+	 * limits.
+	 */
+	if (chip == 0x0 && prod >= 0x20)
+		dev->vpd.len = 8192;
+	else if (chip >= 0x4 && func < 0x8)
+		dev->vpd.len = 2048;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+			 quirk_chelsio_extend_vpd);
+
+#endif
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
new file mode 100644
index 000000000..11636634a
--- /dev/null
+++ b/drivers/pci/xen-pcifront.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xen PCI Frontend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <xen/interface/io/pciif.h>
+#include <asm/xen/pci.h>
+#include <linux/interrupt.h>
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <xen/platform_pci.h>
+
+#include <asm/xen/swiotlb-xen.h>
+
+#define INVALID_EVTCHN    (-1)
+
+struct pci_bus_entry {
+	struct list_head list;
+	struct pci_bus *bus;
+};
+
+#define _PDEVB_op_active		(0)
+#define PDEVB_op_active			(1 << (_PDEVB_op_active))
+
+struct pcifront_device {
+	struct xenbus_device *xdev;
+	struct list_head root_buses;
+
+	int evtchn;
+	grant_ref_t gnt_ref;
+
+	int irq;
+
+	/* Lock this when doing any operations in sh_info */
+	spinlock_t sh_info_lock;
+	struct xen_pci_sharedinfo *sh_info;
+	struct work_struct op_work;
+	unsigned long flags;
+
+};
+
+struct pcifront_sd {
+	struct pci_sysdata sd;
+	struct pcifront_device *pdev;
+};
+
+static inline struct pcifront_device *
+pcifront_get_pdev(struct pcifront_sd *sd)
+{
+	return sd->pdev;
+}
+
+static inline void pcifront_init_sd(struct pcifront_sd *sd,
+				    unsigned int domain, unsigned int bus,
+				    struct pcifront_device *pdev)
+{
+	/* Because we do not expose that information via XenBus. */
+	sd->sd.node = first_online_node;
+	sd->sd.domain = domain;
+	sd->pdev = pdev;
+}
+
+static DEFINE_SPINLOCK(pcifront_dev_lock);
+static struct pcifront_device *pcifront_dev;
+
+static int errno_to_pcibios_err(int errno)
+{
+	switch (errno) {
+	case XEN_PCI_ERR_success:
+		return PCIBIOS_SUCCESSFUL;
+
+	case XEN_PCI_ERR_dev_not_found:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	case XEN_PCI_ERR_invalid_offset:
+	case XEN_PCI_ERR_op_failed:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	case XEN_PCI_ERR_not_implemented:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+
+	case XEN_PCI_ERR_access_denied:
+		return PCIBIOS_SET_FAILED;
+	}
+	return errno;
+}
+
+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
+{
+	if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+		&& !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
+		dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
+		schedule_work(&pdev->op_work);
+	}
+}
+
+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+	int err = 0;
+	struct xen_pci_op *active_op = &pdev->sh_info->op;
+	unsigned long irq_flags;
+	evtchn_port_t port = pdev->evtchn;
+	unsigned int irq = pdev->irq;
+	s64 ns, ns_timeout;
+
+	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
+
+	memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+	/* Go */
+	wmb();
+	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(port);
+
+	/*
+	 * We set a poll timeout of 3 seconds but give up on return after
+	 * 2 seconds. It is better to time out too late rather than too early
+	 * (in the latter case we end up continually re-executing poll() with a
+	 * timeout in the past). 1s difference gives plenty of slack for error.
+	 */
+	ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC;
+
+	xen_clear_irq_pending(irq);
+
+	while (test_bit(_XEN_PCIF_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		xen_poll_irq_timeout(irq, jiffies + 3*HZ);
+		xen_clear_irq_pending(irq);
+		ns = ktime_get_ns();
+		if (ns > ns_timeout) {
+			dev_err(&pdev->xdev->dev,
+				"pciback not responding!!!\n");
+			clear_bit(_XEN_PCIF_active,
+				  (unsigned long *)&pdev->sh_info->flags);
+			err = XEN_PCI_ERR_dev_not_found;
+			goto out;
+		}
+	}
+
+	/*
+	 * We might lose backend service request since we
+	 * reuse same evtchn with pci_conf backend response. So re-schedule
+	 * aer pcifront service.
+	 */
+	if (test_bit(_XEN_PCIB_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		dev_err(&pdev->xdev->dev,
+			"schedule aer pcifront service\n");
+		schedule_pcifront_aer_op(pdev);
+	}
+
+	memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+	err = op->err;
+out:
+	spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
+	return err;
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
+			     int where, int size, u32 *val)
+{
+	int err = 0;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_read,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	dev_dbg(&pdev->xdev->dev,
+		"read dev=%04x:%02x:%02x.%d - offset %x size %d\n",
+		pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+		PCI_FUNC(devfn), where, size);
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		dev_dbg(&pdev->xdev->dev, "read got back value %x\n",
+			op.value);
+
+		*val = op.value;
+	} else if (err == -ENODEV) {
+		/* No device here, pretend that it just returned 0 */
+		err = 0;
+		*val = 0;
+	}
+
+	return errno_to_pcibios_err(err);
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 val)
+{
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_write,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+		.value  = val,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	dev_dbg(&pdev->xdev->dev,
+		"write dev=%04x:%02x:%02x.%d - offset %x size %d val %x\n",
+		pci_domain_nr(bus), bus->number,
+		PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+
+	return errno_to_pcibios_err(do_pci_op(pdev, &op));
+}
+
+static struct pci_ops pcifront_bus_ops = {
+	.read = pcifront_bus_read,
+	.write = pcifront_bus_write,
+};
+
+#ifdef CONFIG_PCI_MSI
+static int pci_frontend_enable_msix(struct pci_dev *dev,
+				    int vector[], int nvec)
+{
+	int err;
+	int i;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+		.value = nvec,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+	struct msi_desc *entry;
+
+	if (nvec > SH_INFO_MAX_VEC) {
+		pci_err(dev, "too many vectors (0x%x) for PCI frontend:"
+				   " Increase SH_INFO_MAX_VEC\n", nvec);
+		return -EINVAL;
+	}
+
+	i = 0;
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+		op.msix_entries[i].entry = entry->msi_index;
+		/* Vector is useless at this point. */
+		op.msix_entries[i].vector = -1;
+		i++;
+	}
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		if (likely(!op.value)) {
+			/* we get the result */
+			for (i = 0; i < nvec; i++) {
+				if (op.msix_entries[i].vector <= 0) {
+					pci_warn(dev, "MSI-X entry %d is invalid: %d!\n",
+						i, op.msix_entries[i].vector);
+					err = -EINVAL;
+					vector[i] = -1;
+					continue;
+				}
+				vector[i] = op.msix_entries[i].vector;
+			}
+		} else {
+			pr_info("enable msix get value %x\n", op.value);
+			err = op.value;
+		}
+	} else {
+		pci_err(dev, "enable msix get err %x\n", err);
+	}
+	return err;
+}
+
+static void pci_frontend_disable_msix(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+
+	/* What should do for error ? */
+	if (err)
+		pci_err(dev, "pci_disable_msix get err %x\n", err);
+}
+
+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (likely(!err)) {
+		vector[0] = op.value;
+		if (op.value <= 0) {
+			pci_warn(dev, "MSI entry is invalid: %d!\n",
+				op.value);
+			err = -EINVAL;
+			vector[0] = -1;
+		}
+	} else {
+		pci_err(dev, "pci frontend enable msi failed for dev "
+				    "%x:%x\n", op.bus, op.devfn);
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static void pci_frontend_disable_msi(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (err == XEN_PCI_ERR_dev_not_found) {
+		/* XXX No response from backend, what shall we do? */
+		pr_info("get no response from backend for disable MSI\n");
+		return;
+	}
+	if (err)
+		/* how can pciback notify us fail? */
+		pr_info("get fake response from backend\n");
+}
+
+static struct xen_pci_frontend_ops pci_frontend_ops = {
+	.enable_msi = pci_frontend_enable_msi,
+	.disable_msi = pci_frontend_disable_msi,
+	.enable_msix = pci_frontend_enable_msix,
+	.disable_msix = pci_frontend_disable_msix,
+};
+
+static void pci_frontend_registrar(int enable)
+{
+	if (enable)
+		xen_pci_frontend = &pci_frontend_ops;
+	else
+		xen_pci_frontend = NULL;
+};
+#else
+static inline void pci_frontend_registrar(int enable) { };
+#endif /* CONFIG_PCI_MSI */
+
+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
+{
+	struct pcifront_device *pdev = data;
+	int i;
+	struct resource *r;
+
+	pci_dev_for_each_resource(dev, r, i) {
+		if (!r->parent && r->start && r->flags) {
+			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
+				pci_name(dev), i);
+			if (pci_claim_resource(dev, i)) {
+				dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
+					"Device offline. Try using e820_host=1 in the guest config.\n",
+					pci_name(dev), i);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int pcifront_scan_bus(struct pcifront_device *pdev,
+				unsigned int domain, unsigned int bus,
+				struct pci_bus *b)
+{
+	struct pci_dev *d;
+	unsigned int devfn;
+
+	/*
+	 * Scan the bus for functions and add.
+	 * We omit handling of PCI bridge attachment because pciback prevents
+	 * bridges from being exported.
+	 */
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		d = pci_get_slot(b, devfn);
+		if (d) {
+			/* Device is already known. */
+			pci_dev_put(d);
+			continue;
+		}
+
+		d = pci_scan_single_device(b, devfn);
+		if (d)
+			dev_info(&pdev->xdev->dev, "New device on "
+				 "%04x:%02x:%02x.%d found.\n", domain, bus,
+				 PCI_SLOT(devfn), PCI_FUNC(devfn));
+	}
+
+	return 0;
+}
+
+static int pcifront_scan_root(struct pcifront_device *pdev,
+				 unsigned int domain, unsigned int bus)
+{
+	struct pci_bus *b;
+	LIST_HEAD(resources);
+	struct pcifront_sd *sd = NULL;
+	struct pci_bus_entry *bus_entry = NULL;
+	int err = 0;
+	static struct resource busn_res = {
+		.start = 0,
+		.end = 255,
+		.flags = IORESOURCE_BUS,
+	};
+
+#ifndef CONFIG_PCI_DOMAINS
+	if (domain != 0) {
+		dev_err(&pdev->xdev->dev,
+			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
+		dev_err(&pdev->xdev->dev,
+			"Please compile with CONFIG_PCI_DOMAINS\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+#endif
+
+	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+	if (!bus_entry || !sd) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	pci_add_resource(&resources, &busn_res);
+	pcifront_init_sd(sd, domain, bus, pdev);
+
+	pci_lock_rescan_remove();
+
+	b = pci_scan_root_bus(&pdev->xdev->dev, bus,
+				  &pcifront_bus_ops, sd, &resources);
+	if (!b) {
+		dev_err(&pdev->xdev->dev,
+			"Error creating PCI Frontend Bus!\n");
+		err = -ENOMEM;
+		pci_unlock_rescan_remove();
+		pci_free_resource_list(&resources);
+		goto err_out;
+	}
+
+	bus_entry->bus = b;
+
+	list_add(&bus_entry->list, &pdev->root_buses);
+
+	/*
+	 * pci_scan_root_bus skips devices which do not have a
+	 * devfn==0. The pcifront_scan_bus enumerates all devfn.
+	 */
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	pci_unlock_rescan_remove();
+	return err;
+
+err_out:
+	kfree(bus_entry);
+	kfree(sd);
+
+	return err;
+}
+
+static int pcifront_rescan_root(struct pcifront_device *pdev,
+				   unsigned int domain, unsigned int bus)
+{
+	int err;
+	struct pci_bus *b;
+
+	b = pci_find_bus(domain, bus);
+	if (!b)
+		/* If the bus is unknown, create it. */
+		return pcifront_scan_root(pdev, domain, bus);
+
+	dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	return err;
+}
+
+static void free_root_bus_devs(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	while (!list_empty(&bus->devices)) {
+		dev = container_of(bus->devices.next, struct pci_dev,
+				   bus_list);
+		pci_dbg(dev, "removing device\n");
+		pci_stop_and_remove_bus_device(dev);
+	}
+}
+
+static void pcifront_free_roots(struct pcifront_device *pdev)
+{
+	struct pci_bus_entry *bus_entry, *t;
+
+	dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
+
+	pci_lock_rescan_remove();
+	list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
+		list_del(&bus_entry->list);
+
+		free_root_bus_devs(bus_entry->bus);
+
+		kfree(bus_entry->bus->sysdata);
+
+		device_unregister(bus_entry->bus->bridge);
+		pci_remove_bus(bus_entry->bus);
+
+		kfree(bus_entry);
+	}
+	pci_unlock_rescan_remove();
+}
+
+static pci_ers_result_t pcifront_common_process(int cmd,
+						struct pcifront_device *pdev,
+						pci_channel_state_t state)
+{
+	struct pci_driver *pdrv;
+	int bus = pdev->sh_info->aer_op.bus;
+	int devfn = pdev->sh_info->aer_op.devfn;
+	int domain = pdev->sh_info->aer_op.domain;
+	struct pci_dev *pcidev;
+
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront AER process: cmd %x (bus:%x, devfn%x)",
+		cmd, bus, devfn);
+
+	pcidev = pci_get_domain_bus_and_slot(domain, bus, devfn);
+	if (!pcidev || !pcidev->dev.driver) {
+		dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
+		pci_dev_put(pcidev);
+		return PCI_ERS_RESULT_NONE;
+	}
+	pdrv = to_pci_driver(pcidev->dev.driver);
+
+	if (pdrv->err_handler && pdrv->err_handler->error_detected) {
+		pci_dbg(pcidev, "trying to call AER service\n");
+		switch (cmd) {
+		case XEN_PCI_OP_aer_detected:
+			return pdrv->err_handler->error_detected(pcidev, state);
+		case XEN_PCI_OP_aer_mmio:
+			return pdrv->err_handler->mmio_enabled(pcidev);
+		case XEN_PCI_OP_aer_slotreset:
+			return pdrv->err_handler->slot_reset(pcidev);
+		case XEN_PCI_OP_aer_resume:
+			pdrv->err_handler->resume(pcidev);
+			return PCI_ERS_RESULT_NONE;
+		default:
+			dev_err(&pdev->xdev->dev,
+				"bad request in aer recovery operation!\n");
+		}
+	}
+
+	return PCI_ERS_RESULT_NONE;
+}
+
+
+static void pcifront_do_aer(struct work_struct *data)
+{
+	struct pcifront_device *pdev =
+		container_of(data, struct pcifront_device, op_work);
+	int cmd = pdev->sh_info->aer_op.cmd;
+	pci_channel_state_t state =
+		(pci_channel_state_t)pdev->sh_info->aer_op.err;
+
+	/*
+	 * If a pci_conf op is in progress, we have to wait until it is done
+	 * before service aer op
+	 */
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront service aer bus %x devfn %x\n",
+		pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
+
+	pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
+
+	/* Post the operation to the guest. */
+	wmb();
+	clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(pdev->evtchn);
+
+	/*in case of we lost an aer request in four lines time_window*/
+	smp_mb__before_atomic();
+	clear_bit(_PDEVB_op_active, &pdev->flags);
+	smp_mb__after_atomic();
+
+	schedule_pcifront_aer_op(pdev);
+
+}
+
+static irqreturn_t pcifront_handler_aer(int irq, void *dev)
+{
+	struct pcifront_device *pdev = dev;
+
+	schedule_pcifront_aer_op(pdev);
+	return IRQ_HANDLED;
+}
+static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
+{
+	int err = 0;
+
+	spin_lock(&pcifront_dev_lock);
+
+	if (!pcifront_dev) {
+		dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
+		pcifront_dev = pdev;
+	} else
+		err = -EEXIST;
+
+	spin_unlock(&pcifront_dev_lock);
+
+	return err;
+}
+
+static void pcifront_disconnect(struct pcifront_device *pdev)
+{
+	spin_lock(&pcifront_dev_lock);
+
+	if (pdev == pcifront_dev) {
+		dev_info(&pdev->xdev->dev,
+			 "Disconnecting PCI Frontend Buses\n");
+		pcifront_dev = NULL;
+	}
+
+	spin_unlock(&pcifront_dev_lock);
+}
+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev;
+
+	pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
+	if (pdev == NULL)
+		goto out;
+
+	if (xenbus_setup_ring(xdev, GFP_KERNEL, (void **)&pdev->sh_info, 1,
+			      &pdev->gnt_ref)) {
+		kfree(pdev);
+		pdev = NULL;
+		goto out;
+	}
+	pdev->sh_info->flags = 0;
+
+	/*Flag for registering PV AER handler*/
+	set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
+
+	dev_set_drvdata(&xdev->dev, pdev);
+	pdev->xdev = xdev;
+
+	INIT_LIST_HEAD(&pdev->root_buses);
+
+	spin_lock_init(&pdev->sh_info_lock);
+
+	pdev->evtchn = INVALID_EVTCHN;
+	pdev->irq = -1;
+
+	INIT_WORK(&pdev->op_work, pcifront_do_aer);
+
+	dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
+		pdev, pdev->sh_info);
+out:
+	return pdev;
+}
+
+static void free_pdev(struct pcifront_device *pdev)
+{
+	dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
+
+	pcifront_free_roots(pdev);
+
+	cancel_work_sync(&pdev->op_work);
+
+	if (pdev->irq >= 0)
+		unbind_from_irqhandler(pdev->irq, pdev);
+
+	if (pdev->evtchn != INVALID_EVTCHN)
+		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+	xenbus_teardown_ring((void **)&pdev->sh_info, 1, &pdev->gnt_ref);
+
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
+
+	kfree(pdev);
+}
+
+static int pcifront_publish_info(struct pcifront_device *pdev)
+{
+	int err = 0;
+	struct xenbus_transaction trans;
+
+	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+	if (err)
+		goto out;
+
+	err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
+		0, "pcifront", pdev);
+
+	if (err < 0)
+		return err;
+
+	pdev->irq = err;
+
+do_publish:
+	err = xenbus_transaction_start(&trans);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend "
+				 "(start transaction)");
+		goto out;
+	}
+
+	err = xenbus_printf(trans, pdev->xdev->nodename,
+			    "pci-op-ref", "%u", pdev->gnt_ref);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "event-channel", "%u", pdev->evtchn);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "magic", XEN_PCI_MAGIC);
+
+	if (err) {
+		xenbus_transaction_end(trans, 1);
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend");
+		goto out;
+	} else {
+		err = xenbus_transaction_end(trans, 0);
+		if (err == -EAGAIN)
+			goto do_publish;
+		else if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error completing transaction "
+					 "for backend");
+			goto out;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
+
+	dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
+
+out:
+	return err;
+}
+
+static void pcifront_connect(struct pcifront_device *pdev)
+{
+	int err;
+	int i, num_roots, len;
+	char str[64];
+	unsigned int domain, bus;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
+			   "root_num", "%d", &num_roots);
+	if (err == -ENOENT) {
+		xenbus_dev_error(pdev->xdev, err,
+				 "No PCI Roots found, trying 0000:00");
+		err = pcifront_rescan_root(pdev, 0, 0);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root 0000:00");
+			return;
+		}
+		num_roots = 0;
+	} else if (err != 1) {
+		xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err,
+				 "Error reading number of PCI roots");
+		return;
+	}
+
+	for (i = 0; i < num_roots; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1)))
+			return;
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err,
+					 "Error reading PCI root %d", i);
+			return;
+		}
+
+		err = pcifront_rescan_root(pdev, domain, bus);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root %04x:%02x",
+					 domain, bus);
+			return;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+}
+
+static void pcifront_try_connect(struct pcifront_device *pdev)
+{
+	int err;
+
+	/* Only connect once */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateInitialised)
+		return;
+
+	err = pcifront_connect_and_init_dma(pdev);
+	if (err && err != -EEXIST) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error setting up PCI Frontend");
+		return;
+	}
+
+	pcifront_connect(pdev);
+}
+
+static int pcifront_try_disconnect(struct pcifront_device *pdev)
+{
+	int err = 0;
+	enum xenbus_state prev_state;
+
+
+	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+	if (prev_state >= XenbusStateClosing)
+		goto out;
+
+	if (prev_state == XenbusStateConnected) {
+		pcifront_free_roots(pdev);
+		pcifront_disconnect(pdev);
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
+
+out:
+
+	return err;
+}
+
+static void pcifront_attach_devices(struct pcifront_device *pdev)
+{
+	if (xenbus_read_driver_state(pdev->xdev->nodename) ==
+	    XenbusStateReconfiguring)
+		pcifront_connect(pdev);
+}
+
+static int pcifront_detach_devices(struct pcifront_device *pdev)
+{
+	int err = 0;
+	int i, num_devs;
+	enum xenbus_state state;
+	unsigned int domain, bus, slot, func;
+	struct pci_dev *pci_dev;
+	char str[64];
+
+	state = xenbus_read_driver_state(pdev->xdev->nodename);
+	if (state == XenbusStateInitialised) {
+		dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n");
+		/* We missed Connected and need to initialize. */
+		err = pcifront_connect_and_init_dma(pdev);
+		if (err && err != -EEXIST) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error setting up PCI Frontend");
+			goto out;
+		}
+
+		goto out_switch_state;
+	} else if (state != XenbusStateConnected) {
+		goto out;
+	}
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
+			   &num_devs);
+	if (err != 1) {
+		if (err >= 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI devices");
+		goto out;
+	}
+
+	/* Find devices being detached and remove them. */
+	for (i = 0; i < num_devs; i++) {
+		int l, state;
+
+		l = snprintf(str, sizeof(str), "state-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		state = xenbus_read_unsigned(pdev->xdev->otherend, str,
+					     XenbusStateUnknown);
+
+		if (state != XenbusStateClosing)
+			continue;
+
+		/* Remove device. */
+		l = snprintf(str, sizeof(str), "vdev-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
+		if (err != 4) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI device %d", i);
+			goto out;
+		}
+
+		pci_dev = pci_get_domain_bus_and_slot(domain, bus,
+				PCI_DEVFN(slot, func));
+		if (!pci_dev) {
+			dev_dbg(&pdev->xdev->dev,
+				"Cannot get PCI device %04x:%02x:%02x.%d\n",
+				domain, bus, slot, func);
+			continue;
+		}
+		pci_lock_rescan_remove();
+		pci_stop_and_remove_bus_device(pci_dev);
+		pci_dev_put(pci_dev);
+		pci_unlock_rescan_remove();
+
+		dev_dbg(&pdev->xdev->dev,
+			"PCI device %04x:%02x:%02x.%d removed.\n",
+			domain, bus, slot, func);
+	}
+
+ out_switch_state:
+	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
+
+out:
+	return err;
+}
+
+static void pcifront_backend_changed(struct xenbus_device *xdev,
+						  enum xenbus_state be_state)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
+	switch (be_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+		break;
+
+	case XenbusStateConnected:
+		pcifront_try_connect(pdev);
+		break;
+
+	case XenbusStateClosed:
+		if (xdev->state == XenbusStateClosed)
+			break;
+		fallthrough;	/* Missed the backend's CLOSING state */
+	case XenbusStateClosing:
+		dev_warn(&xdev->dev, "backend going away!\n");
+		pcifront_try_disconnect(pdev);
+		break;
+
+	case XenbusStateReconfiguring:
+		pcifront_detach_devices(pdev);
+		break;
+
+	case XenbusStateReconfigured:
+		pcifront_attach_devices(pdev);
+		break;
+	}
+}
+
+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
+				 const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pcifront_device *pdev = alloc_pdev(xdev);
+
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err,
+				 "Error allocating pcifront_device struct");
+		goto out;
+	}
+
+	err = pcifront_publish_info(pdev);
+	if (err)
+		free_pdev(pdev);
+
+out:
+	return err;
+}
+
+static void pcifront_xenbus_remove(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
+	if (pdev)
+		free_pdev(pdev);
+}
+
+static const struct xenbus_device_id xenpci_ids[] = {
+	{"pci"},
+	{""},
+};
+
+static struct xenbus_driver xenpci_driver = {
+	.name			= "pcifront",
+	.ids			= xenpci_ids,
+	.probe			= pcifront_xenbus_probe,
+	.remove			= pcifront_xenbus_remove,
+	.otherend_changed	= pcifront_backend_changed,
+};
+
+static int __init pcifront_init(void)
+{
+	if (!xen_pv_domain() || xen_initial_domain())
+		return -ENODEV;
+
+	if (!xen_has_pv_devices())
+		return -ENODEV;
+
+	pci_frontend_registrar(1 /* enable */);
+
+	return xenbus_register_frontend(&xenpci_driver);
+}
+
+static void __exit pcifront_cleanup(void)
+{
+	xenbus_unregister_driver(&xenpci_driver);
+	pci_frontend_registrar(0 /* disable */);
+}
+module_init(pcifront_init);
+module_exit(pcifront_cleanup);
+
+MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:pci");
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/pci
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip