Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /arch/powerpc/sysdev
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
66 files changed, 20406 insertions, 0 deletions
diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S
new file mode 100644
index 0000000000..e882524fff
--- /dev/null
+++ b/arch/powerpc/sysdev/6xx-suspend.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Enter and leave sleep state on chips with 6xx-style HID0
+ * power management bits, which don't leave sleep state via reset.
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+_GLOBAL(mpc6xx_enter_standby)
+	mflr	r4
+
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
+	oris	r5, r5, HID0_SLEEP@h
+	mtspr	SPRN_HID0, r5
+	isync
+
+	lis	r5, ret_from_standby@h
+	ori	r5, r5, ret_from_standby@l
+	mtlr	r5
+
+	lwz	r6, TI_LOCAL_FLAGS(r2)
+	ori	r6, r6, _TLF_SLEEPING
+	stw	r6, TI_LOCAL_FLAGS(r2)
+
+	mfmsr	r5
+	ori	r5, r5, MSR_EE
+	oris	r5, r5, MSR_POW@h
+	sync
+	mtmsr	r5
+	isync
+
+1:	b	1b
+
+ret_from_standby:
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~HID0_SLEEP
+	mtspr	SPRN_HID0, r5
+
+	mtlr	r4
+	blr
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
new file mode 100644
index 0000000000..5aa92ff362
--- /dev/null
+++ b/arch/powerpc/sysdev/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.rst.
+#
+
+config PPC4xx_PCI_EXPRESS
+	bool
+	depends on PCI && 4xx
+
+config PPC4xx_HSTA_MSI
+	bool
+	depends on PCI_MSI
+	depends on PCI && 4xx
+
+config PPC_MSI_BITMAP
+	bool
+	depends on PCI_MSI
+	default y if MPIC
+	default y if FSL_PCI
+	default y if PPC_POWERNV
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+source "arch/powerpc/sysdev/xive/Kconfig"
+
+config GE_FPGA
+	bool
+
+config FSL_CORENET_RCPM
+	bool
+	help
+	  This option enables support for RCPM (Run Control/Power Management).
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
new file mode 100644
index 0000000000..9cb1d02951
--- /dev/null
+++ b/arch/powerpc/sysdev/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64)		:= $(NO_MINIMAL_TOC)
+
+mpic-msi-obj-$(CONFIG_PCI_MSI)	+= mpic_msi.o mpic_u3msi.o
+obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y)
+obj-$(CONFIG_MPIC_TIMER)        += mpic_timer.o
+obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP)	+= fsl_mpic_timer_wakeup.o
+mpic-msgr-obj-$(CONFIG_MPIC_MSGR)	+= mpic_msgr.o
+obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y)
+obj-$(CONFIG_PPC_EPAPR_HV_PIC)	+= ehv_pic.o
+fsl-msi-obj-$(CONFIG_PCI_MSI)	+= fsl_msi.o
+obj-$(CONFIG_PPC_MSI_BITMAP)	+= msi_bitmap.o
+
+obj-$(CONFIG_PPC_MPC106)	+= grackle.o
+obj-$(CONFIG_PPC_DCR_NATIVE)	+= dcr-low.o
+obj-$(CONFIG_PPC_PMI)		+= pmi.o
+obj-$(CONFIG_U3_DART)		+= dart_iommu.o
+obj-$(CONFIG_MMIO_NVRAM)	+= mmio_nvram.o
+obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o fsl_mpic_err.o
+obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
+obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
+obj-$(CONFIG_FSL_CORENET_RCPM)	+= fsl_rcpm.o
+obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
+obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
+obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
+obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
+obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc_cmos_setup.o
+
+obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
+obj-$(CONFIG_PPC_I8259)		+= i8259.o
+obj-$(CONFIG_IPIC)		+= ipic.o
+obj-$(CONFIG_OF_RTC)		+= of_rtc.o
+
+obj-$(CONFIG_CPM)		+= cpm_common.o
+obj-$(CONFIG_CPM2)		+= cpm2.o cpm2_pic.o cpm_gpio.o
+obj-$(CONFIG_8xx_GPIO)		+= cpm_gpio.o
+obj-$(CONFIG_QUICC_ENGINE)	+= cpm_common.o
+obj-$(CONFIG_PPC_DCR)		+= dcr.o
+
+obj-$(CONFIG_PPC_MPC512x)	+= mpc5xxx_clocks.o
+obj-$(CONFIG_PPC_MPC52xx)	+= mpc5xxx_clocks.o
+
+ifdef CONFIG_SUSPEND
+obj-$(CONFIG_PPC_BOOK3S_32)	+= 6xx-suspend.o
+endif
+
+obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS)	+= udbg_memcons.o
+
+obj-$(CONFIG_PPC_XICS)		+= xics/
+obj-$(CONFIG_PPC_XIVE)		+= xive/
+
+obj-$(CONFIG_GE_FPGA)		+= ge/
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
new file mode 100644
index 0000000000..14cc5ea936
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -0,0 +1,344 @@
+/*
+ * General Purpose functions for the global management of the
+ * 8260 Communication Processor Module.
+ * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
+ *	2.3.99 Updates
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ * 	Merged to arch/powerpc from arch/ppc/syslib/cpm2_common.c
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/*
+ *
+ * In addition to the individual control of the communication
+ * channels, there are a few functions that globally affect the
+ * communication processor.
+ *
+ * Buffer descriptors must be allocated from the dual ported memory
+ * space.  The allocator for that is here.  When the communication
+ * process is reset, we reclaim the memory available.  There is
+ * currently no deallocator for this memory.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/cpm2.h>
+#include <asm/rheap.h>
+
+#include <sysdev/fsl_soc.h>
+
+cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor space */
+
+/* We allocate this here because it is used almost exclusively for
+ * the communication processor devices.
+ */
+cpm2_map_t __iomem *cpm2_immr;
+EXPORT_SYMBOL(cpm2_immr);
+
+#define CPM_MAP_SIZE	(0x40000)	/* 256k - the PQ3 reserve this amount
+					   of space for CPM as it is larger
+					   than on PQ2 */
+
+void __init cpm2_reset(void)
+{
+#ifdef CONFIG_PPC_85xx
+	cpm2_immr = ioremap(get_immrbase() + 0x80000, CPM_MAP_SIZE);
+#else
+	cpm2_immr = ioremap(get_immrbase(), CPM_MAP_SIZE);
+#endif
+
+	/* Tell everyone where the comm processor resides.
+	 */
+	cpmp = &cpm2_immr->im_cpm;
+
+#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
+	/* Reset the CPM.
+	 */
+	cpm_command(CPM_CR_RST, 0);
+#endif
+}
+
+static DEFINE_SPINLOCK(cmd_lock);
+
+#define MAX_CR_CMD_LOOPS        10000
+
+int cpm_command(u32 command, u8 opcode)
+{
+	int i, ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd_lock, flags);
+
+	ret = 0;
+	out_be32(&cpmp->cp_cpcr, command | opcode | CPM_CR_FLG);
+	for (i = 0; i < MAX_CR_CMD_LOOPS; i++)
+		if ((in_be32(&cpmp->cp_cpcr) & CPM_CR_FLG) == 0)
+			goto out;
+
+	printk(KERN_ERR "%s(): Not able to issue CPM command\n", __func__);
+	ret = -EIO;
+out:
+	spin_unlock_irqrestore(&cmd_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(cpm_command);
+
+/* Set a baud rate generator.  This needs lots of work.  There are
+ * eight BRGs, which can be connected to the CPM channels or output
+ * as clocks.  The BRGs are in two different block of internal
+ * memory mapped space.
+ * The baud rate clock is the system clock divided by something.
+ * It was set up long ago during the initial boot phase and is
+ * given to us.
+ * Baud rate clocks are zero-based in the driver code (as that maps
+ * to port numbers).  Documentation uses 1-based numbering.
+ */
+void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
+{
+	u32 __iomem *bp;
+	u32 val;
+
+	/* This is good enough to get SMCs running.....
+	*/
+	if (brg < 4) {
+		bp = &cpm2_immr->im_brgc1;
+	} else {
+		bp = &cpm2_immr->im_brgc5;
+		brg -= 4;
+	}
+	bp += brg;
+	/* Round the clock divider to the nearest integer. */
+	val = (((clk * 2 / rate) - 1) & ~1) | CPM_BRG_EN | src;
+	if (div16)
+		val |= CPM_BRG_DIV16;
+
+	out_be32(bp, val);
+}
+EXPORT_SYMBOL(__cpm2_setbrg);
+
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
+{
+	int ret = 0;
+	int shift;
+	int i, bits = 0;
+	u32 __iomem *reg;
+	u32 mask = 7;
+
+	u8 clk_map[][3] = {
+		{CPM_CLK_FCC1, CPM_BRG5, 0},
+		{CPM_CLK_FCC1, CPM_BRG6, 1},
+		{CPM_CLK_FCC1, CPM_BRG7, 2},
+		{CPM_CLK_FCC1, CPM_BRG8, 3},
+		{CPM_CLK_FCC1, CPM_CLK9, 4},
+		{CPM_CLK_FCC1, CPM_CLK10, 5},
+		{CPM_CLK_FCC1, CPM_CLK11, 6},
+		{CPM_CLK_FCC1, CPM_CLK12, 7},
+		{CPM_CLK_FCC2, CPM_BRG5, 0},
+		{CPM_CLK_FCC2, CPM_BRG6, 1},
+		{CPM_CLK_FCC2, CPM_BRG7, 2},
+		{CPM_CLK_FCC2, CPM_BRG8, 3},
+		{CPM_CLK_FCC2, CPM_CLK13, 4},
+		{CPM_CLK_FCC2, CPM_CLK14, 5},
+		{CPM_CLK_FCC2, CPM_CLK15, 6},
+		{CPM_CLK_FCC2, CPM_CLK16, 7},
+		{CPM_CLK_FCC3, CPM_BRG5, 0},
+		{CPM_CLK_FCC3, CPM_BRG6, 1},
+		{CPM_CLK_FCC3, CPM_BRG7, 2},
+		{CPM_CLK_FCC3, CPM_BRG8, 3},
+		{CPM_CLK_FCC3, CPM_CLK13, 4},
+		{CPM_CLK_FCC3, CPM_CLK14, 5},
+		{CPM_CLK_FCC3, CPM_CLK15, 6},
+		{CPM_CLK_FCC3, CPM_CLK16, 7},
+		{CPM_CLK_SCC1, CPM_BRG1, 0},
+		{CPM_CLK_SCC1, CPM_BRG2, 1},
+		{CPM_CLK_SCC1, CPM_BRG3, 2},
+		{CPM_CLK_SCC1, CPM_BRG4, 3},
+		{CPM_CLK_SCC1, CPM_CLK11, 4},
+		{CPM_CLK_SCC1, CPM_CLK12, 5},
+		{CPM_CLK_SCC1, CPM_CLK3, 6},
+		{CPM_CLK_SCC1, CPM_CLK4, 7},
+		{CPM_CLK_SCC2, CPM_BRG1, 0},
+		{CPM_CLK_SCC2, CPM_BRG2, 1},
+		{CPM_CLK_SCC2, CPM_BRG3, 2},
+		{CPM_CLK_SCC2, CPM_BRG4, 3},
+		{CPM_CLK_SCC2, CPM_CLK11, 4},
+		{CPM_CLK_SCC2, CPM_CLK12, 5},
+		{CPM_CLK_SCC2, CPM_CLK3, 6},
+		{CPM_CLK_SCC2, CPM_CLK4, 7},
+		{CPM_CLK_SCC3, CPM_BRG1, 0},
+		{CPM_CLK_SCC3, CPM_BRG2, 1},
+		{CPM_CLK_SCC3, CPM_BRG3, 2},
+		{CPM_CLK_SCC3, CPM_BRG4, 3},
+		{CPM_CLK_SCC3, CPM_CLK5, 4},
+		{CPM_CLK_SCC3, CPM_CLK6, 5},
+		{CPM_CLK_SCC3, CPM_CLK7, 6},
+		{CPM_CLK_SCC3, CPM_CLK8, 7},
+		{CPM_CLK_SCC4, CPM_BRG1, 0},
+		{CPM_CLK_SCC4, CPM_BRG2, 1},
+		{CPM_CLK_SCC4, CPM_BRG3, 2},
+		{CPM_CLK_SCC4, CPM_BRG4, 3},
+		{CPM_CLK_SCC4, CPM_CLK5, 4},
+		{CPM_CLK_SCC4, CPM_CLK6, 5},
+		{CPM_CLK_SCC4, CPM_CLK7, 6},
+		{CPM_CLK_SCC4, CPM_CLK8, 7},
+	};
+
+	switch (target) {
+	case CPM_CLK_SCC1:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 24;
+		break;
+	case CPM_CLK_SCC2:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 16;
+		break;
+	case CPM_CLK_SCC3:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 8;
+		break;
+	case CPM_CLK_SCC4:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 0;
+		break;
+	case CPM_CLK_FCC1:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 24;
+		break;
+	case CPM_CLK_FCC2:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 16;
+		break;
+	case CPM_CLK_FCC3:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 8;
+		break;
+	default:
+		printk(KERN_ERR "cpm2_clock_setup: invalid clock target\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+		if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+			bits = clk_map[i][2];
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(clk_map))
+	    ret = -EINVAL;
+
+	bits <<= shift;
+	mask <<= shift;
+
+	if (mode == CPM_CLK_RTX) {
+		bits |= bits << 3;
+		mask |= mask << 3;
+	} else if (mode == CPM_CLK_RX) {
+		bits <<= 3;
+		mask <<= 3;
+	}
+
+	out_be32(reg, (in_be32(reg) & ~mask) | bits);
+
+	return ret;
+}
+
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
+{
+	int ret = 0;
+	int shift;
+	int i, bits = 0;
+	u8 __iomem *reg;
+	u8 mask = 3;
+
+	u8 clk_map[][3] = {
+		{CPM_CLK_SMC1, CPM_BRG1, 0},
+		{CPM_CLK_SMC1, CPM_BRG7, 1},
+		{CPM_CLK_SMC1, CPM_CLK7, 2},
+		{CPM_CLK_SMC1, CPM_CLK9, 3},
+		{CPM_CLK_SMC2, CPM_BRG2, 0},
+		{CPM_CLK_SMC2, CPM_BRG8, 1},
+		{CPM_CLK_SMC2, CPM_CLK4, 2},
+		{CPM_CLK_SMC2, CPM_CLK15, 3},
+	};
+
+	switch (target) {
+	case CPM_CLK_SMC1:
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
+		mask = 3;
+		shift = 4;
+		break;
+	case CPM_CLK_SMC2:
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
+		mask = 3;
+		shift = 0;
+		break;
+	default:
+		printk(KERN_ERR "cpm2_smc_clock_setup: invalid clock target\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+		if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+			bits = clk_map[i][2];
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(clk_map))
+	    ret = -EINVAL;
+
+	bits <<= shift;
+	mask <<= shift;
+
+	out_8(reg, (in_8(reg) & ~mask) | bits);
+
+	return ret;
+}
+
+struct cpm2_ioports {
+	u32 dir, par, sor, odr, dat;
+	u32 res[3];
+};
+
+void __init cpm2_set_pin(int port, int pin, int flags)
+{
+	struct cpm2_ioports __iomem *iop =
+		(struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport;
+
+	pin = 1 << (31 - pin);
+
+	if (flags & CPM_PIN_OUTPUT)
+		setbits32(&iop[port].dir, pin);
+	else
+		clrbits32(&iop[port].dir, pin);
+
+	if (!(flags & CPM_PIN_GPIO))
+		setbits32(&iop[port].par, pin);
+	else
+		clrbits32(&iop[port].par, pin);
+
+	if (flags & CPM_PIN_SECONDARY)
+		setbits32(&iop[port].sor, pin);
+	else
+		clrbits32(&iop[port].sor, pin);
+
+	if (flags & CPM_PIN_OPENDRAIN)
+		setbits32(&iop[port].odr, pin);
+	else
+		clrbits32(&iop[port].odr, pin);
+}
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
new file mode 100644
index 0000000000..e14493685f
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -0,0 +1,267 @@
+/*
+ * Platform information definitions.
+ *
+ * Copied from arch/ppc/syslib/cpm2_pic.c with minor subsequent updates
+ * to make in work in arch/powerpc/. Original (c) belongs to Dan Malek.
+ *
+ * Author:  Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * 1999-2001 (c) Dan Malek <dan@embeddedalley.com>
+ * 2006 (c) MontaVista Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/* The CPM2 internal interrupt controller.  It is usually
+ * the only interrupt controller.
+ * There are two 32-bit registers (high/low) for up to 64
+ * possible interrupts.
+ *
+ * Now, the fun starts.....Interrupt Numbers DO NOT MAP
+ * in a simple arithmetic fashion to mask or pending registers.
+ * That is, interrupt 4 does not map to bit position 4.
+ * We create two tables, indexed by vector number, to indicate
+ * which register to use and which bit in the register to use.
+ */
+
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/immap_cpm2.h>
+#include <asm/io.h>
+
+#include "cpm2_pic.h"
+
+/* External IRQS */
+#define CPM2_IRQ_EXT1		19
+#define CPM2_IRQ_EXT7		25
+
+/* Port C IRQS */
+#define CPM2_IRQ_PORTC15	48
+#define CPM2_IRQ_PORTC0		63
+
+static intctl_cpm2_t __iomem *cpm2_intctl;
+
+static struct irq_domain *cpm2_pic_host;
+static unsigned long ppc_cached_irq_mask[2]; /* 2 32-bit registers */
+
+static const u_char irq_to_siureg[] = {
+	1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* bit numbers do not match the docs, these are precomputed so the bit for
+ * a given irq is (1 << irq_to_siubit[irq]) */
+static const u_char irq_to_siubit[] = {
+	 0, 15, 14, 13, 12, 11, 10,  9,
+	 8,  7,  6,  5,  4,  3,  2,  1,
+	 2,  1,  0, 14, 13, 12, 11, 10,
+	 9,  8,  7,  6,  5,  4,  3,  0,
+	31, 30, 29, 28, 27, 26, 25, 24,
+	23, 22, 21, 20, 19, 18, 17, 16,
+	16, 17, 18, 19, 20, 21, 22, 23,
+	24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+static void cpm2_mask_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] &= ~(1 << bit);
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+}
+
+static void cpm2_unmask_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] |= 1 << bit;
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+}
+
+static void cpm2_ack(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	out_be32(&cpm2_intctl->ic_sipnrh + word, 1 << bit);
+}
+
+static void cpm2_end_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] |= 1 << bit;
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+
+	/*
+	 * Work around large numbers of spurious IRQs on PowerPC 82xx
+	 * systems.
+	 */
+	mb();
+}
+
+static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vold, vnew, edibit;
+
+	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
+	 * IRQ_TYPE_EDGE_BOTH (default).  All others are IRQ_TYPE_EDGE_FALLING
+	 * or IRQ_TYPE_LEVEL_LOW (default)
+	 */
+	if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0) {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_EDGE_BOTH;
+
+		if (flow_type != IRQ_TYPE_EDGE_BOTH &&
+		    flow_type != IRQ_TYPE_EDGE_FALLING)
+			goto err_sense;
+	} else {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+
+		if (flow_type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH))
+			goto err_sense;
+	}
+
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & IRQ_TYPE_LEVEL_LOW)
+		irq_set_handler_locked(d, handle_level_irq);
+	else
+		irq_set_handler_locked(d, handle_edge_irq);
+
+	/* internal IRQ senses are LEVEL_LOW
+	 * EXT IRQ and Port C IRQ senses are programmable
+	 */
+	if (src >= CPM2_IRQ_EXT1 && src <= CPM2_IRQ_EXT7)
+			edibit = (14 - (src - CPM2_IRQ_EXT1));
+	else
+		if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0)
+			edibit = (31 - (CPM2_IRQ_PORTC0 - src));
+		else
+			return (flow_type & IRQ_TYPE_LEVEL_LOW) ?
+				IRQ_SET_MASK_OK_NOCOPY : -EINVAL;
+
+	vold = in_be32(&cpm2_intctl->ic_siexr);
+
+	if ((flow_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_FALLING)
+		vnew = vold | (1 << edibit);
+	else
+		vnew = vold & ~(1 << edibit);
+
+	if (vold != vnew)
+		out_be32(&cpm2_intctl->ic_siexr, vnew);
+	return IRQ_SET_MASK_OK_NOCOPY;
+
+err_sense:
+	pr_err("CPM2 PIC: sense type 0x%x not supported\n", flow_type);
+	return -EINVAL;
+}
+
+static struct irq_chip cpm2_pic = {
+	.name = "CPM2 SIU",
+	.irq_mask = cpm2_mask_irq,
+	.irq_unmask = cpm2_unmask_irq,
+	.irq_ack = cpm2_ack,
+	.irq_eoi = cpm2_end_irq,
+	.irq_set_type = cpm2_set_irq_type,
+	.flags = IRQCHIP_EOI_IF_HANDLED,
+};
+
+unsigned int cpm2_get_irq(void)
+{
+	int irq;
+	unsigned long bits;
+
+       /* For CPM2, read the SIVEC register and shift the bits down
+         * to get the irq number.         */
+        bits = in_be32(&cpm2_intctl->ic_sivec);
+        irq = bits >> 26;
+
+	if (irq == 0)
+		return(-1);
+	return irq_linear_revmap(cpm2_pic_host, irq);
+}
+
+static int cpm2_pic_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	pr_debug("cpm2_pic_host_map(%d, 0x%lx)\n", virq, hw);
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &cpm2_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops cpm2_pic_host_ops = {
+	.map = cpm2_pic_host_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+void cpm2_pic_init(struct device_node *node)
+{
+	int i;
+
+	cpm2_intctl = &cpm2_immr->im_intctl;
+
+	/* Clear the CPM IRQ controller, in case it has any bits set
+	 * from the bootloader
+	 */
+
+	/* Mask out everything */
+
+	out_be32(&cpm2_intctl->ic_simrh, 0x00000000);
+	out_be32(&cpm2_intctl->ic_simrl, 0x00000000);
+
+	wmb();
+
+	/* Ack everything */
+	out_be32(&cpm2_intctl->ic_sipnrh, 0xffffffff);
+	out_be32(&cpm2_intctl->ic_sipnrl, 0xffffffff);
+	wmb();
+
+	/* Dummy read of the vector */
+	i = in_be32(&cpm2_intctl->ic_sivec);
+	rmb();
+
+	/* Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	out_be16(&cpm2_intctl->ic_sicr, 0);
+	out_be32(&cpm2_intctl->ic_scprrh, 0x05309770);
+	out_be32(&cpm2_intctl->ic_scprrl, 0x05309770);
+
+	/* create a legacy host */
+	cpm2_pic_host = irq_domain_add_linear(node, 64, &cpm2_pic_host_ops, NULL);
+	if (cpm2_pic_host == NULL) {
+		printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n");
+		return;
+	}
+}
diff --git a/arch/powerpc/sysdev/cpm2_pic.h b/arch/powerpc/sysdev/cpm2_pic.h
new file mode 100644
index 0000000000..a8a26951f3
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2_pic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_KERNEL_CPM2_H
+#define _PPC_KERNEL_CPM2_H
+
+extern unsigned int cpm2_get_irq(void);
+
+extern void cpm2_pic_init(struct device_node *);
+
+#endif /* _PPC_KERNEL_CPM2_H */
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
new file mode 100644
index 0000000000..47db732981
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common CPM code
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright 2007-2008,2010 Freescale Semiconductor, Inc.
+ *
+ * Some parts derived from commproc.c/cpm2_common.c, which is:
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/udbg.h>
+#include <asm/io.h>
+#include <asm/cpm.h>
+#include <asm/fixmap.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <mm/mmu_decl.h>
+
+#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO)
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#endif
+
+static int __init cpm_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm1");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL, "fsl,cpm2");
+	if (!np)
+		return -ENODEV;
+	cpm_muram_init();
+	of_node_put(np);
+	return 0;
+}
+subsys_initcall(cpm_init);
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+static u32 __iomem *cpm_udbg_txdesc;
+static u8 __iomem *cpm_udbg_txbuf;
+
+static void udbg_putc_cpm(char c)
+{
+	if (c == '\n')
+		udbg_putc_cpm('\r');
+
+	while (in_be32(&cpm_udbg_txdesc[0]) & 0x80000000)
+		;
+
+	out_8(cpm_udbg_txbuf, c);
+	out_be32(&cpm_udbg_txdesc[0], 0xa0000001);
+}
+
+void __init udbg_init_cpm(void)
+{
+#ifdef CONFIG_PPC_8xx
+	mmu_mapin_immr();
+
+	cpm_udbg_txdesc = (u32 __iomem __force *)
+			  (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE +
+			   VIRT_IMMR_BASE);
+	cpm_udbg_txbuf = (u8 __iomem __force *)
+			 (in_be32(&cpm_udbg_txdesc[1]) - PHYS_IMMR_BASE +
+			  VIRT_IMMR_BASE);
+#else
+	cpm_udbg_txdesc = (u32 __iomem __force *)
+			  CONFIG_PPC_EARLY_DEBUG_CPM_ADDR;
+	cpm_udbg_txbuf = (u8 __iomem __force *)in_be32(&cpm_udbg_txdesc[1]);
+#endif
+
+	if (cpm_udbg_txdesc) {
+#ifdef CONFIG_CPM2
+		setbat(1, 0xf0000000, 0xf0000000, 1024*1024, PAGE_KERNEL_NCG);
+#endif
+		udbg_putc = udbg_putc_cpm;
+	}
+}
+#endif
+
+#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO)
+
+struct cpm2_ioports {
+	u32 dir, par, sor, odr, dat;
+	u32 res[3];
+};
+
+struct cpm2_gpio32_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+
+	/* shadowed data register to clear/set bits safely */
+	u32 cpdata;
+};
+
+static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+	struct cpm2_gpio32_chip *cpm2_gc =
+		container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+
+	cpm2_gc->cpdata = in_be32(&iop->dat);
+}
+
+static int cpm2_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	u32 pin_mask;
+
+	pin_mask = 1 << (31 - gpio);
+
+	return !!(in_be32(&iop->dat) & pin_mask);
+}
+
+static void __cpm2_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
+	int value)
+{
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+
+	if (value)
+		cpm2_gc->cpdata |= pin_mask;
+	else
+		cpm2_gc->cpdata &= ~pin_mask;
+
+	out_be32(&iop->dat, cpm2_gc->cpdata);
+}
+
+static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	__cpm2_gpio32_set(mm_gc, pin_mask, value);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+}
+
+static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	setbits32(&iop->dir, pin_mask);
+	__cpm2_gpio32_set(mm_gc, pin_mask, val);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+	return 0;
+}
+
+static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	clrbits32(&iop->dir, pin_mask);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+	return 0;
+}
+
+int cpm2_gpiochip_add32(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct cpm2_gpio32_chip *cpm2_gc;
+	struct of_mm_gpio_chip *mm_gc;
+	struct gpio_chip *gc;
+
+	cpm2_gc = kzalloc(sizeof(*cpm2_gc), GFP_KERNEL);
+	if (!cpm2_gc)
+		return -ENOMEM;
+
+	spin_lock_init(&cpm2_gc->lock);
+
+	mm_gc = &cpm2_gc->mm_gc;
+	gc = &mm_gc->gc;
+
+	mm_gc->save_regs = cpm2_gpio32_save_regs;
+	gc->ngpio = 32;
+	gc->direction_input = cpm2_gpio32_dir_in;
+	gc->direction_output = cpm2_gpio32_dir_out;
+	gc->get = cpm2_gpio32_get;
+	gc->set = cpm2_gpio32_set;
+	gc->parent = dev;
+	gc->owner = THIS_MODULE;
+
+	return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc);
+}
+#endif /* CONFIG_CPM2 || CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
new file mode 100644
index 0000000000..40f57111e9
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common CPM GPIO wrapper for the CPM GPIO ports
+ *
+ * Author: Christophe Leroy <christophe.leroy@c-s.fr>
+ *
+ * Copyright 2017 CS Systemes d'Information.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpm.h>
+#ifdef CONFIG_8xx_GPIO
+#include <asm/cpm1.h>
+#endif
+
+static int cpm_gpio_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	int (*gp_add)(struct device *dev) = of_device_get_match_data(dev);
+
+	if (!gp_add)
+		return -ENODEV;
+
+	return gp_add(dev);
+}
+
+static const struct of_device_id cpm_gpio_match[] = {
+#ifdef CONFIG_8xx_GPIO
+	{
+		.compatible = "fsl,cpm1-pario-bank-a",
+		.data = cpm1_gpiochip_add16,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-b",
+		.data = cpm1_gpiochip_add32,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-c",
+		.data = cpm1_gpiochip_add16,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-d",
+		.data = cpm1_gpiochip_add16,
+	},
+	/* Port E uses CPM2 layout */
+	{
+		.compatible = "fsl,cpm1-pario-bank-e",
+		.data = cpm2_gpiochip_add32,
+	},
+#endif
+	{
+		.compatible = "fsl,cpm2-pario-bank",
+		.data = cpm2_gpiochip_add32,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpm_gpio_match);
+
+static struct platform_driver cpm_gpio_driver = {
+	.probe		= cpm_gpio_probe,
+	.driver		= {
+		.name	= "cpm-gpio",
+		.of_match_table	= cpm_gpio_match,
+	},
+};
+
+static int __init cpm_gpio_init(void)
+{
+	return platform_driver_register(&cpm_gpio_driver);
+}
+arch_initcall(cpm_gpio_init);
+
+MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>");
+MODULE_DESCRIPTION("Driver for CPM GPIO");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cpm-gpio");
diff --git a/arch/powerpc/sysdev/dart.h b/arch/powerpc/sysdev/dart.h
new file mode 100644
index 0000000000..7f5df03d33
--- /dev/null
+++ b/arch/powerpc/sysdev/dart.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ */
+
+#ifndef _POWERPC_SYSDEV_DART_H
+#define _POWERPC_SYSDEV_DART_H
+
+
+/* Offset from base to control register */
+#define DART_CNTL	0
+
+/* Offset from base to exception register */
+#define DART_EXCP_U3	0x10
+/* Offset from base to TLB tag registers */
+#define DART_TAGS_U3	0x1000
+
+/* U4 registers */
+#define DART_BASE_U4	0x10
+#define DART_SIZE_U4	0x20
+#define DART_EXCP_U4	0x30
+#define DART_TAGS_U4	0x1000
+
+/* Control Register fields */
+
+/* U3 registers */
+#define DART_CNTL_U3_BASE_MASK	0xfffff
+#define DART_CNTL_U3_BASE_SHIFT	12
+#define DART_CNTL_U3_FLUSHTLB	0x400
+#define DART_CNTL_U3_ENABLE	0x200
+#define DART_CNTL_U3_SIZE_MASK	0x1ff
+#define DART_CNTL_U3_SIZE_SHIFT	0
+
+/* U4 registers */
+#define DART_BASE_U4_BASE_MASK	0xffffff
+#define DART_BASE_U4_BASE_SHIFT	0
+#define DART_CNTL_U4_ENABLE	0x80000000
+#define DART_CNTL_U4_IONE	0x40000000
+#define DART_CNTL_U4_FLUSHTLB	0x20000000
+#define DART_CNTL_U4_IDLE	0x10000000
+#define DART_CNTL_U4_PAR_EN	0x08000000
+#define DART_CNTL_U4_IONE_MASK	0x07ffffff
+#define DART_SIZE_U4_SIZE_MASK	0x1fff
+#define DART_SIZE_U4_SIZE_SHIFT	0
+
+#define DART_REG(r)	(dart + ((r) >> 2))
+#define DART_IN(r)	(in_be32(DART_REG(r)))
+#define DART_OUT(r,v)	(out_be32(DART_REG(r), (v)))
+
+
+/* size of table in pages */
+
+
+/* DART table fields */
+
+#define DARTMAP_VALID   0x80000000
+#define DARTMAP_RPNMASK 0x00ffffff
+
+
+#define DART_PAGE_SHIFT		12
+#define DART_PAGE_SIZE		(1 << DART_PAGE_SHIFT)
+
+
+#endif /* _POWERPC_SYSDEV_DART_H */
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
new file mode 100644
index 0000000000..98096bbfd6
--- /dev/null
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/dart_iommu.c
+ *
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
+ *                    IBM Corporation
+ *
+ * Based on pSeries_iommu.c:
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ *
+ * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/suspend.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/kmemleak.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/ppc-pci.h>
+
+#include "dart.h"
+
+/* DART table address and size */
+static u32 *dart_tablebase;
+static unsigned long dart_tablesize;
+
+/* Mapped base address for the dart */
+static unsigned int __iomem *dart;
+
+/* Dummy val that entries are set to when unused */
+static unsigned int dart_emptyval;
+
+static struct iommu_table iommu_table_dart;
+static int iommu_table_dart_inited;
+static int dart_dirty;
+static int dart_is_u4;
+
+#define DART_U4_BYPASS_BASE	0x8000000000ull
+
+#define DBG(...)
+
+static DEFINE_SPINLOCK(invalidate_lock);
+
+static inline void dart_tlb_invalidate_all(void)
+{
+	unsigned long l = 0;
+	unsigned int reg, inv_bit;
+	unsigned long limit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&invalidate_lock, flags);
+
+	DBG("dart: flush\n");
+
+	/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
+	 * control register and wait for it to clear.
+	 *
+	 * Gotcha: Sometimes, the DART won't detect that the bit gets
+	 * set. If so, clear it and set it again.
+	 */
+
+	limit = 0;
+
+	inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
+retry:
+	l = 0;
+	reg = DART_IN(DART_CNTL);
+	reg |= inv_bit;
+	DART_OUT(DART_CNTL, reg);
+
+	while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
+		l++;
+	if (l == (1L << limit)) {
+		if (limit < 4) {
+			limit++;
+			reg = DART_IN(DART_CNTL);
+			reg &= ~inv_bit;
+			DART_OUT(DART_CNTL, reg);
+			goto retry;
+		} else
+			panic("DART: TLB did not flush after waiting a long "
+			      "time. Buggy U3 ?");
+	}
+
+	spin_unlock_irqrestore(&invalidate_lock, flags);
+}
+
+static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
+{
+	unsigned int reg;
+	unsigned int l, limit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&invalidate_lock, flags);
+
+	reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
+		(bus_rpn & DART_CNTL_U4_IONE_MASK);
+	DART_OUT(DART_CNTL, reg);
+
+	limit = 0;
+wait_more:
+	l = 0;
+	while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
+		rmb();
+		l++;
+	}
+
+	if (l == (1L << limit)) {
+		if (limit < 4) {
+			limit++;
+			goto wait_more;
+		} else
+			panic("DART: TLB did not flush after waiting a long "
+			      "time. Buggy U4 ?");
+	}
+
+	spin_unlock_irqrestore(&invalidate_lock, flags);
+}
+
+static void dart_cache_sync(unsigned int *base, unsigned int count)
+{
+	/*
+	 * We add 1 to the number of entries to flush, following a
+	 * comment in Darwin indicating that the memory controller
+	 * can prefetch unmapped memory under some circumstances.
+	 */
+	unsigned long start = (unsigned long)base;
+	unsigned long end = start + (count + 1) * sizeof(unsigned int);
+	unsigned int tmp;
+
+	/* Perform a standard cache flush */
+	flush_dcache_range(start, end);
+
+	/*
+	 * Perform the sequence described in the CPC925 manual to
+	 * ensure all the data gets to a point the cache incoherent
+	 * DART hardware will see.
+	 */
+	asm volatile(" sync;"
+		     " isync;"
+		     " dcbf 0,%1;"
+		     " sync;"
+		     " isync;"
+		     " lwz %0,0(%1);"
+		     " isync" : "=r" (tmp) : "r" (end) : "memory");
+}
+
+static void dart_flush(struct iommu_table *tbl)
+{
+	mb();
+	if (dart_dirty) {
+		dart_tlb_invalidate_all();
+		dart_dirty = 0;
+	}
+}
+
+static int dart_build(struct iommu_table *tbl, long index,
+		       long npages, unsigned long uaddr,
+		       enum dma_data_direction direction,
+		       unsigned long attrs)
+{
+	unsigned int *dp, *orig_dp;
+	unsigned int rpn;
+	long l;
+
+	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
+
+	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
+
+	/* On U3, all memory is contiguous, so we can move this
+	 * out of the loop.
+	 */
+	l = npages;
+	while (l--) {
+		rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
+
+		*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
+
+		uaddr += DART_PAGE_SIZE;
+	}
+	dart_cache_sync(orig_dp, npages);
+
+	if (dart_is_u4) {
+		rpn = index;
+		while (npages--)
+			dart_tlb_invalidate_one(rpn++);
+	} else {
+		dart_dirty = 1;
+	}
+	return 0;
+}
+
+
+static void dart_free(struct iommu_table *tbl, long index, long npages)
+{
+	unsigned int *dp, *orig_dp;
+	long orig_npages = npages;
+
+	/* We don't worry about flushing the TLB cache. The only drawback of
+	 * not doing it is that we won't catch buggy device drivers doing
+	 * bad DMAs, but then no 32-bit architecture ever does either.
+	 */
+
+	DBG("dart: free at: %lx, %lx\n", index, npages);
+
+	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
+
+	while (npages--)
+		*(dp++) = dart_emptyval;
+
+	dart_cache_sync(orig_dp, orig_npages);
+}
+
+static void __init allocate_dart(void)
+{
+	unsigned long tmp;
+
+	/* 512 pages (2MB) is max DART tablesize. */
+	dart_tablesize = 1UL << 21;
+
+	/*
+	 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
+	 * will blow up an entire large page anyway in the kernel mapping.
+	 */
+	dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
+					MEMBLOCK_LOW_LIMIT, SZ_2G,
+					NUMA_NO_NODE);
+	if (!dart_tablebase)
+		panic("Failed to allocate 16MB below 2GB for DART table\n");
+
+	/* There is no point scanning the DART space for leaks*/
+	kmemleak_no_scan((void *)dart_tablebase);
+
+	/* Allocate a spare page to map all invalid DART pages. We need to do
+	 * that to work around what looks like a problem with the HT bridge
+	 * prefetching into invalid pages and corrupting data
+	 */
+	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	if (!tmp)
+		panic("DART: table allocation failed\n");
+
+	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
+					 DARTMAP_RPNMASK);
+
+	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
+}
+
+static int __init dart_init(struct device_node *dart_node)
+{
+	unsigned int i;
+	unsigned long base, size;
+	struct resource r;
+
+	/* IOMMU disabled by the user ? bail out */
+	if (iommu_is_off)
+		return -ENODEV;
+
+	/*
+	 * Only use the DART if the machine has more than 1GB of RAM
+	 * or if requested with iommu=on on cmdline.
+	 *
+	 * 1GB of RAM is picked as limit because some default devices
+	 * (i.e. Airport Extreme) have 30 bit address range limits.
+	 */
+
+	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
+		return -ENODEV;
+
+	/* Get DART registers */
+	if (of_address_to_resource(dart_node, 0, &r))
+		panic("DART: can't get register base ! ");
+
+	/* Map in DART registers */
+	dart = ioremap(r.start, resource_size(&r));
+	if (dart == NULL)
+		panic("DART: Cannot map registers!");
+
+	/* Allocate the DART and dummy page */
+	allocate_dart();
+
+	/* Fill initial table */
+	for (i = 0; i < dart_tablesize/4; i++)
+		dart_tablebase[i] = dart_emptyval;
+
+	/* Push to memory */
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
+
+	/* Initialize DART with table base and enable it. */
+	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
+	size = dart_tablesize >> DART_PAGE_SHIFT;
+	if (dart_is_u4) {
+		size &= DART_SIZE_U4_SIZE_MASK;
+		DART_OUT(DART_BASE_U4, base);
+		DART_OUT(DART_SIZE_U4, size);
+		DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
+	} else {
+		size &= DART_CNTL_U3_SIZE_MASK;
+		DART_OUT(DART_CNTL,
+			 DART_CNTL_U3_ENABLE |
+			 (base << DART_CNTL_U3_BASE_SHIFT) |
+			 (size << DART_CNTL_U3_SIZE_SHIFT));
+	}
+
+	/* Invalidate DART to get rid of possible stale TLBs */
+	dart_tlb_invalidate_all();
+
+	printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
+	       dart_is_u4 ? "U4" : "U3");
+
+	return 0;
+}
+
+static struct iommu_table_ops iommu_dart_ops = {
+	.set = dart_build,
+	.clear = dart_free,
+	.flush = dart_flush,
+};
+
+static void iommu_table_dart_setup(void)
+{
+	iommu_table_dart.it_busno = 0;
+	iommu_table_dart.it_offset = 0;
+	/* it_size is in number of entries */
+	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
+	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+
+	/* Initialize the common IOMMU code */
+	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
+	iommu_table_dart.it_index = 0;
+	iommu_table_dart.it_blocksize = 1;
+	iommu_table_dart.it_ops = &iommu_dart_ops;
+	if (!iommu_init_table(&iommu_table_dart, -1, 0, 0))
+		panic("Failed to initialize iommu table");
+
+	/* Reserve the last page of the DART to avoid possible prefetch
+	 * past the DART mapped area
+	 */
+	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
+}
+
+static void pci_dma_bus_setup_dart(struct pci_bus *bus)
+{
+	if (!iommu_table_dart_inited) {
+		iommu_table_dart_inited = 1;
+		iommu_table_dart_setup();
+	}
+}
+
+static bool dart_device_on_pcie(struct device *dev)
+{
+	struct device_node *np = of_node_get(dev->of_node);
+
+	while(np) {
+		if (of_device_is_compatible(np, "U4-pcie") ||
+		    of_device_is_compatible(np, "u4-pcie")) {
+			of_node_put(np);
+			return true;
+		}
+		np = of_get_next_parent(np);
+	}
+	return false;
+}
+
+static void pci_dma_dev_setup_dart(struct pci_dev *dev)
+{
+	if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
+		dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
+	set_iommu_table_base(&dev->dev, &iommu_table_dart);
+}
+
+static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
+{
+	return dart_is_u4 &&
+		dart_device_on_pcie(&dev->dev) &&
+		mask >= DMA_BIT_MASK(40);
+}
+
+void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
+{
+	struct device_node *dn;
+
+	/* Find the DART in the device-tree */
+	dn = of_find_compatible_node(NULL, "dart", "u3-dart");
+	if (dn == NULL) {
+		dn = of_find_compatible_node(NULL, "dart", "u4-dart");
+		if (dn == NULL)
+			return;	/* use default direct_dma_ops */
+		dart_is_u4 = 1;
+	}
+
+	/* Initialize the DART HW */
+	if (dart_init(dn) != 0) {
+		of_node_put(dn);
+		return;
+	}
+	/*
+	 * U4 supports a DART bypass, we use it for 64-bit capable devices to
+	 * improve performance.  However, that only works for devices connected
+	 * to the U4 own PCIe interface, not bridged through hypertransport.
+	 * We need the device to support at least 40 bits of addresses.
+	 */
+	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
+	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
+	controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
+
+	/* Setup pci_dma ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+	of_node_put(dn);
+}
+
+#ifdef CONFIG_PM
+static void iommu_dart_restore(void)
+{
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
+	dart_tlb_invalidate_all();
+}
+
+static int __init iommu_init_late_dart(void)
+{
+	if (!dart_tablebase)
+		return 0;
+
+	ppc_md.iommu_restore = iommu_dart_restore;
+
+	return 0;
+}
+
+late_initcall(iommu_init_late_dart);
+#endif /* CONFIG_PM */
diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
new file mode 100644
index 0000000000..e8401b205d
--- /dev/null
+++ b/arch/powerpc/sysdev/dcr-low.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * "Indirect" DCR access
+ *
+ * Copyright (c) 2004 Eugene Surovegin <ebs@ebshome.net>
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+#include <asm/bug.h>
+
+#define DCR_ACCESS_PROLOG(table) \
+	cmplwi	cr0,r3,1024;	 \
+	rlwinm  r3,r3,4,18,27;   \
+	lis     r5,table@h;      \
+	ori     r5,r5,table@l;   \
+	add     r3,r3,r5;        \
+	bge-	1f;		 \
+	mtctr   r3;              \
+	bctr;			 \
+1:	trap;			 \
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;	\
+	blr
+
+_GLOBAL(__mfdcr)
+	DCR_ACCESS_PROLOG(__mfdcr_table)
+EXPORT_SYMBOL(__mfdcr)
+
+_GLOBAL(__mtdcr)
+	DCR_ACCESS_PROLOG(__mtdcr_table)
+EXPORT_SYMBOL(__mtdcr)
+
+__mfdcr_table:
+	mfdcr  r3,0; blr
+__mtdcr_table:
+	mtdcr  0,r4; blr
+
+dcr     = 1
+        .rept   1023
+	mfdcr   r3,dcr; blr
+	mtdcr   dcr,r4; blr
+	dcr     = dcr + 1
+	.endr
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
new file mode 100644
index 0000000000..70ce66eadf
--- /dev/null
+++ b/arch/powerpc/sysdev/dcr.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/of_address.h>
+#include <asm/dcr.h>
+
+#ifdef CONFIG_PPC_DCR_MMIO
+static struct device_node *find_dcr_parent(struct device_node *node)
+{
+	struct device_node *par, *tmp;
+	const u32 *p;
+
+	for (par = of_node_get(node); par;) {
+		if (of_property_read_bool(par, "dcr-controller"))
+			break;
+		p = of_get_property(par, "dcr-parent", NULL);
+		tmp = par;
+		if (p == NULL)
+			par = of_get_parent(par);
+		else
+			par = of_find_node_by_phandle(*p);
+		of_node_put(tmp);
+	}
+	return par;
+}
+#endif
+
+#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO)
+
+bool dcr_map_ok_generic(dcr_host_t host)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		return dcr_map_ok_native(host.host.native);
+	else if (host.type == DCR_HOST_MMIO)
+		return dcr_map_ok_mmio(host.host.mmio);
+	else
+		return false;
+}
+EXPORT_SYMBOL_GPL(dcr_map_ok_generic);
+
+dcr_host_t dcr_map_generic(struct device_node *dev,
+			   unsigned int dcr_n,
+			   unsigned int dcr_c)
+{
+	dcr_host_t host;
+	struct device_node *dp;
+	const char *prop;
+
+	host.type = DCR_HOST_INVALID;
+
+	dp = find_dcr_parent(dev);
+	if (dp == NULL)
+		return host;
+
+	prop = of_get_property(dp, "dcr-access-method", NULL);
+
+	pr_debug("dcr_map_generic(dcr-access-method = %s)\n", prop);
+
+	if (!strcmp(prop, "native")) {
+		host.type = DCR_HOST_NATIVE;
+		host.host.native = dcr_map_native(dev, dcr_n, dcr_c);
+	} else if (!strcmp(prop, "mmio")) {
+		host.type = DCR_HOST_MMIO;
+		host.host.mmio = dcr_map_mmio(dev, dcr_n, dcr_c);
+	}
+
+	of_node_put(dp);
+	return host;
+}
+EXPORT_SYMBOL_GPL(dcr_map_generic);
+
+void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		dcr_unmap_native(host.host.native, dcr_c);
+	else if (host.type == DCR_HOST_MMIO)
+		dcr_unmap_mmio(host.host.mmio, dcr_c);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+}
+EXPORT_SYMBOL_GPL(dcr_unmap_generic);
+
+u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		return dcr_read_native(host.host.native, dcr_n);
+	else if (host.type == DCR_HOST_MMIO)
+		return dcr_read_mmio(host.host.mmio, dcr_n);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dcr_read_generic);
+
+void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		dcr_write_native(host.host.native, dcr_n, value);
+	else if (host.type == DCR_HOST_MMIO)
+		dcr_write_mmio(host.host.mmio, dcr_n, value);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+}
+EXPORT_SYMBOL_GPL(dcr_write_generic);
+
+#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */
+
+unsigned int dcr_resource_start(const struct device_node *np,
+				unsigned int index)
+{
+	unsigned int ds;
+	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+
+	if (dr == NULL || ds & 1 || index >= (ds / 8))
+		return 0;
+
+	return dr[index * 2];
+}
+EXPORT_SYMBOL_GPL(dcr_resource_start);
+
+unsigned int dcr_resource_len(const struct device_node *np, unsigned int index)
+{
+	unsigned int ds;
+	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+
+	if (dr == NULL || ds & 1 || index >= (ds / 8))
+		return 0;
+
+	return dr[index * 2 + 1];
+}
+EXPORT_SYMBOL_GPL(dcr_resource_len);
+
+#ifdef CONFIG_PPC_DCR_MMIO
+
+static u64 of_translate_dcr_address(struct device_node *dev,
+				    unsigned int dcr_n,
+				    unsigned int *out_stride)
+{
+	struct device_node *dp;
+	const u32 *p;
+	unsigned int stride;
+	u64 ret = OF_BAD_ADDR;
+
+	dp = find_dcr_parent(dev);
+	if (dp == NULL)
+		return OF_BAD_ADDR;
+
+	/* Stride is not properly defined yet, default to 0x10 for Axon */
+	p = of_get_property(dp, "dcr-mmio-stride", NULL);
+	stride = (p == NULL) ? 0x10 : *p;
+
+	/* XXX FIXME: Which property name is to use of the 2 following ? */
+	p = of_get_property(dp, "dcr-mmio-range", NULL);
+	if (p == NULL)
+		p = of_get_property(dp, "dcr-mmio-space", NULL);
+	if (p == NULL)
+		goto done;
+
+	/* Maybe could do some better range checking here */
+	ret = of_translate_address(dp, p);
+	if (ret != OF_BAD_ADDR)
+		ret += (u64)(stride) * (u64)dcr_n;
+	if (out_stride)
+		*out_stride = stride;
+
+ done:
+	of_node_put(dp);
+	return ret;
+}
+
+dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
+			     unsigned int dcr_n,
+			     unsigned int dcr_c)
+{
+	dcr_host_mmio_t ret = { .token = NULL, .stride = 0, .base = dcr_n };
+	u64 addr;
+
+	pr_debug("dcr_map(%pOF, 0x%x, 0x%x)\n",
+		 dev, dcr_n, dcr_c);
+
+	addr = of_translate_dcr_address(dev, dcr_n, &ret.stride);
+	pr_debug("translates to addr: 0x%llx, stride: 0x%x\n",
+		 (unsigned long long) addr, ret.stride);
+	if (addr == OF_BAD_ADDR)
+		return ret;
+	pr_debug("mapping 0x%x bytes\n", dcr_c * ret.stride);
+	ret.token = ioremap(addr, dcr_c * ret.stride);
+	if (ret.token == NULL)
+		return ret;
+	pr_debug("mapped at 0x%p -> base is 0x%p\n",
+		 ret.token, ret.token - dcr_n * ret.stride);
+	ret.token -= dcr_n * ret.stride;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dcr_map_mmio);
+
+void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c)
+{
+	dcr_host_mmio_t h = host;
+
+	if (h.token == NULL)
+		return;
+	h.token += host.base * h.stride;
+	iounmap(h.token);
+	h.token = NULL;
+}
+EXPORT_SYMBOL_GPL(dcr_unmap_mmio);
+
+#endif /* defined(CONFIG_PPC_DCR_MMIO) */
+
+#ifdef CONFIG_PPC_DCR_NATIVE
+DEFINE_SPINLOCK(dcr_ind_lock);
+EXPORT_SYMBOL_GPL(dcr_ind_lock);
+#endif	/* defined(CONFIG_PPC_DCR_NATIVE) */
+
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
new file mode 100644
index 0000000000..040827671d
--- /dev/null
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -0,0 +1,295 @@
+/*
+ *  Driver for ePAPR Embedded Hypervisor PIC
+ *
+ *  Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ *  Author: Ashish Kalra <ashish.kalra@freescale.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/ehv_pic.h>
+#include <asm/fsl_hcalls.h>
+
+static struct ehv_pic *global_ehv_pic;
+static DEFINE_SPINLOCK(ehv_pic_lock);
+
+static u32 hwirq_intspec[NR_EHV_PIC_INTS];
+static u32 __iomem *mpic_percpu_base_vaddr;
+
+#define IRQ_TYPE_MPIC_DIRECT 4
+#define MPIC_EOI  0x00B0
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+static void ehv_pic_unmask_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_set_mask(src, 0);
+}
+
+static void ehv_pic_mask_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_set_mask(src, 1);
+}
+
+static void ehv_pic_end_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_eoi(src);
+}
+
+static void ehv_pic_direct_end_irq(struct irq_data *d)
+{
+	out_be32(mpic_percpu_base_vaddr + MPIC_EOI / 4, 0);
+}
+
+static int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
+			 bool force)
+{
+	unsigned int src = virq_to_hw(d->irq);
+	unsigned int config, prio, cpu_dest;
+	int cpuid = irq_choose_cpu(dest);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehv_pic_lock, flags);
+	ev_int_get_config(src, &config, &prio, &cpu_dest);
+	ev_int_set_config(src, config, prio, cpuid);
+	spin_unlock_irqrestore(&ehv_pic_lock, flags);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
+{
+	/* Now convert sense value */
+
+	switch (type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		return EHV_PIC_INFO(VECPRI_SENSE_EDGE) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_POSITIVE);
+
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_EDGE_BOTH:
+		return EHV_PIC_INFO(VECPRI_SENSE_EDGE) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_NEGATIVE);
+
+	case IRQ_TYPE_LEVEL_HIGH:
+		return EHV_PIC_INFO(VECPRI_SENSE_LEVEL) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_POSITIVE);
+
+	case IRQ_TYPE_LEVEL_LOW:
+	default:
+		return EHV_PIC_INFO(VECPRI_SENSE_LEVEL) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	}
+}
+
+static int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	unsigned int src = virq_to_hw(d->irq);
+	unsigned int vecpri, vold, vnew, prio, cpu_dest;
+	unsigned long flags;
+
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_LEVEL_LOW;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	vecpri = ehv_pic_type_to_vecpri(flow_type);
+
+	spin_lock_irqsave(&ehv_pic_lock, flags);
+	ev_int_get_config(src, &vold, &prio, &cpu_dest);
+	vnew = vold & ~(EHV_PIC_INFO(VECPRI_POLARITY_MASK) |
+			EHV_PIC_INFO(VECPRI_SENSE_MASK));
+	vnew |= vecpri;
+
+	/*
+	 * TODO : Add specific interface call for platform to set
+	 * individual interrupt priorities.
+	 * platform currently using static/default priority for all ints
+	 */
+
+	prio = 8;
+
+	ev_int_set_config(src, vecpri, prio, cpu_dest);
+
+	spin_unlock_irqrestore(&ehv_pic_lock, flags);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ehv_pic_irq_chip = {
+	.irq_mask	= ehv_pic_mask_irq,
+	.irq_unmask	= ehv_pic_unmask_irq,
+	.irq_eoi	= ehv_pic_end_irq,
+	.irq_set_type	= ehv_pic_set_irq_type,
+};
+
+static struct irq_chip ehv_pic_direct_eoi_irq_chip = {
+	.irq_mask	= ehv_pic_mask_irq,
+	.irq_unmask	= ehv_pic_unmask_irq,
+	.irq_eoi	= ehv_pic_direct_end_irq,
+	.irq_set_type	= ehv_pic_set_irq_type,
+};
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int ehv_pic_get_irq(void)
+{
+	int irq;
+
+	BUG_ON(global_ehv_pic == NULL);
+
+	if (global_ehv_pic->coreint_flag)
+		irq = mfspr(SPRN_EPR); /* if core int mode */
+	else
+		ev_int_iack(0, &irq); /* legacy mode */
+
+	if (irq == 0xFFFF)    /* 0xFFFF --> no irq is pending */
+		return 0;
+
+	/*
+	 * this will also setup revmap[] in the slow path for the first
+	 * time, next calls will always use fast path by indexing revmap
+	 */
+	return irq_linear_revmap(global_ehv_pic->irqhost, irq);
+}
+
+static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node,
+			      enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless ehv_pic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int ehv_pic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ehv_pic *ehv_pic = h->host_data;
+	struct irq_chip *chip;
+
+	/* Default chip */
+	chip = &ehv_pic->hc_irq;
+
+	if (mpic_percpu_base_vaddr)
+		if (hwirq_intspec[hw] & IRQ_TYPE_MPIC_DIRECT)
+			chip = &ehv_pic_direct_eoi_irq_chip;
+
+	irq_set_chip_data(virq, chip);
+	/*
+	 * using handle_fasteoi_irq as our irq handler, this will
+	 * only call the eoi callback and suitable for the MPIC
+	 * controller which set ISR/IPR automatically and clear the
+	 * highest priority active interrupt in ISR/IPR when we do
+	 * a specific eoi
+	 */
+	irq_set_chip_and_handler(virq, chip, handle_fasteoi_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int ehv_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/*
+	 * interrupt sense values coming from the guest device tree
+	 * interrupt specifiers can have four possible sense and
+	 * level encoding information and they need to
+	 * be translated between firmware type & linux type.
+	 */
+
+	static unsigned char map_of_senses_to_linux_irqtype[4] = {
+		IRQ_TYPE_EDGE_FALLING,
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1) {
+		hwirq_intspec[intspec[0]] = intspec[1];
+		*out_flags = map_of_senses_to_linux_irqtype[intspec[1] &
+							~IRQ_TYPE_MPIC_DIRECT];
+	} else {
+		*out_flags = IRQ_TYPE_NONE;
+	}
+
+	return 0;
+}
+
+static const struct irq_domain_ops ehv_pic_host_ops = {
+	.match = ehv_pic_host_match,
+	.map = ehv_pic_host_map,
+	.xlate = ehv_pic_host_xlate,
+};
+
+void __init ehv_pic_init(void)
+{
+	struct device_node *np, *np2;
+	struct ehv_pic *ehv_pic;
+
+	np = of_find_compatible_node(NULL, NULL, "epapr,hv-pic");
+	if (!np) {
+		pr_err("ehv_pic_init: could not find epapr,hv-pic node\n");
+		return;
+	}
+
+	ehv_pic = kzalloc(sizeof(struct ehv_pic), GFP_KERNEL);
+	if (!ehv_pic) {
+		of_node_put(np);
+		return;
+	}
+
+	ehv_pic->irqhost = irq_domain_add_linear(np, NR_EHV_PIC_INTS,
+						 &ehv_pic_host_ops, ehv_pic);
+	if (!ehv_pic->irqhost) {
+		of_node_put(np);
+		kfree(ehv_pic);
+		return;
+	}
+
+	np2 = of_find_compatible_node(NULL, NULL, "fsl,hv-mpic-per-cpu");
+	if (np2) {
+		mpic_percpu_base_vaddr = of_iomap(np2, 0);
+		if (!mpic_percpu_base_vaddr)
+			pr_err("ehv_pic_init: of_iomap failed\n");
+
+		of_node_put(np2);
+	}
+
+	ehv_pic->hc_irq = ehv_pic_irq_chip;
+	ehv_pic->hc_irq.irq_set_affinity = ehv_pic_set_affinity;
+	ehv_pic->coreint_flag = of_property_read_bool(np, "has-external-proxy");
+
+	global_ehv_pic = ehv_pic;
+	irq_set_default_host(global_ehv_pic->irqhost);
+}
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
new file mode 100644
index 0000000000..39186ad6b3
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale General-purpose Timers Module
+ *
+ * Copyright (c) Freescale Semiconductor, Inc. 2006.
+ *               Shlomi Gridish <gridish@freescale.com>
+ *               Jerry Huang <Chang-Ming.Huang@freescale.com>
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *               Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <asm/fsl_gtm.h>
+
+#define GTCFR_STP(x)		((x) & 1 ? 1 << 5 : 1 << 1)
+#define GTCFR_RST(x)		((x) & 1 ? 1 << 4 : 1 << 0)
+
+#define GTMDR_ICLK_MASK		(3 << 1)
+#define GTMDR_ICLK_ICAS		(0 << 1)
+#define GTMDR_ICLK_ICLK		(1 << 1)
+#define GTMDR_ICLK_SLGO		(2 << 1)
+#define GTMDR_FRR		(1 << 3)
+#define GTMDR_ORI		(1 << 4)
+#define GTMDR_SPS(x)		((x) << 8)
+
+struct gtm_timers_regs {
+	u8	gtcfr1;		/* Timer 1, Timer 2 global config register */
+	u8	res0[0x3];
+	u8	gtcfr2;		/* Timer 3, timer 4 global config register */
+	u8	res1[0xB];
+	__be16	gtmdr1;		/* Timer 1 mode register */
+	__be16	gtmdr2;		/* Timer 2 mode register */
+	__be16	gtrfr1;		/* Timer 1 reference register */
+	__be16	gtrfr2;		/* Timer 2 reference register */
+	__be16	gtcpr1;		/* Timer 1 capture register */
+	__be16	gtcpr2;		/* Timer 2 capture register */
+	__be16	gtcnr1;		/* Timer 1 counter */
+	__be16	gtcnr2;		/* Timer 2 counter */
+	__be16	gtmdr3;		/* Timer 3 mode register */
+	__be16	gtmdr4;		/* Timer 4 mode register */
+	__be16	gtrfr3;		/* Timer 3 reference register */
+	__be16	gtrfr4;		/* Timer 4 reference register */
+	__be16	gtcpr3;		/* Timer 3 capture register */
+	__be16	gtcpr4;		/* Timer 4 capture register */
+	__be16	gtcnr3;		/* Timer 3 counter */
+	__be16	gtcnr4;		/* Timer 4 counter */
+	__be16	gtevr1;		/* Timer 1 event register */
+	__be16	gtevr2;		/* Timer 2 event register */
+	__be16	gtevr3;		/* Timer 3 event register */
+	__be16	gtevr4;		/* Timer 4 event register */
+	__be16	gtpsr1;		/* Timer 1 prescale register */
+	__be16	gtpsr2;		/* Timer 2 prescale register */
+	__be16	gtpsr3;		/* Timer 3 prescale register */
+	__be16	gtpsr4;		/* Timer 4 prescale register */
+	u8 res2[0x40];
+} __attribute__ ((packed));
+
+struct gtm {
+	unsigned int clock;
+	struct gtm_timers_regs __iomem *regs;
+	struct gtm_timer timers[4];
+	spinlock_t lock;
+	struct list_head list_node;
+};
+
+static LIST_HEAD(gtms);
+
+/**
+ * gtm_get_timer - request GTM timer to use it with the rest of GTM API
+ * Context:	non-IRQ
+ *
+ * This function reserves GTM timer for later use. It returns gtm_timer
+ * structure to use with the rest of GTM API, you should use timer->irq
+ * to manage timer interrupt.
+ */
+struct gtm_timer *gtm_get_timer16(void)
+{
+	struct gtm *gtm;
+	int i;
+
+	list_for_each_entry(gtm, &gtms, list_node) {
+		spin_lock_irq(&gtm->lock);
+
+		for (i = 0; i < ARRAY_SIZE(gtm->timers); i++) {
+			if (!gtm->timers[i].requested) {
+				gtm->timers[i].requested = true;
+				spin_unlock_irq(&gtm->lock);
+				return &gtm->timers[i];
+			}
+		}
+
+		spin_unlock_irq(&gtm->lock);
+	}
+
+	if (!list_empty(&gtms))
+		return ERR_PTR(-EBUSY);
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL(gtm_get_timer16);
+
+/**
+ * gtm_get_specific_timer - request specific GTM timer
+ * @gtm:	specific GTM, pass here GTM's device_node->data
+ * @timer:	specific timer number, Timer1 is 0.
+ * Context:	non-IRQ
+ *
+ * This function reserves GTM timer for later use. It returns gtm_timer
+ * structure to use with the rest of GTM API, you should use timer->irq
+ * to manage timer interrupt.
+ */
+struct gtm_timer *gtm_get_specific_timer16(struct gtm *gtm,
+					   unsigned int timer)
+{
+	struct gtm_timer *ret = ERR_PTR(-EBUSY);
+
+	if (timer > 3)
+		return ERR_PTR(-EINVAL);
+
+	spin_lock_irq(&gtm->lock);
+
+	if (gtm->timers[timer].requested)
+		goto out;
+
+	ret = &gtm->timers[timer];
+	ret->requested = true;
+
+out:
+	spin_unlock_irq(&gtm->lock);
+	return ret;
+}
+EXPORT_SYMBOL(gtm_get_specific_timer16);
+
+/**
+ * gtm_put_timer16 - release 16 bits GTM timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * Context:	any
+ *
+ * This function releases GTM timer so others may request it.
+ */
+void gtm_put_timer16(struct gtm_timer *tmr)
+{
+	gtm_stop_timer16(tmr);
+
+	spin_lock_irq(&tmr->gtm->lock);
+	tmr->requested = false;
+	spin_unlock_irq(&tmr->gtm->lock);
+}
+EXPORT_SYMBOL(gtm_put_timer16);
+
+/*
+ * This is back-end for the exported functions, it's used to reset single
+ * timer in reference mode.
+ */
+static int gtm_set_ref_timer16(struct gtm_timer *tmr, int frequency,
+			       int reference_value, bool free_run)
+{
+	struct gtm *gtm = tmr->gtm;
+	int num = tmr - &gtm->timers[0];
+	unsigned int prescaler;
+	u8 iclk = GTMDR_ICLK_ICLK;
+	u8 psr;
+	u8 sps;
+	unsigned long flags;
+	int max_prescaler = 256 * 256 * 16;
+
+	/* CPM2 doesn't have primary prescaler */
+	if (!tmr->gtpsr)
+		max_prescaler /= 256;
+
+	prescaler = gtm->clock / frequency;
+	/*
+	 * We have two 8 bit prescalers -- primary and secondary (psr, sps),
+	 * plus "slow go" mode (clk / 16). So, total prescale value is
+	 * 16 * (psr + 1) * (sps + 1). Though, for CPM2 GTMs we losing psr.
+	 */
+	if (prescaler > max_prescaler)
+		return -EINVAL;
+
+	if (prescaler > max_prescaler / 16) {
+		iclk = GTMDR_ICLK_SLGO;
+		prescaler /= 16;
+	}
+
+	if (prescaler <= 256) {
+		psr = 0;
+		sps = prescaler - 1;
+	} else {
+		psr = 256 - 1;
+		sps = prescaler / 256 - 1;
+	}
+
+	spin_lock_irqsave(&gtm->lock, flags);
+
+	/*
+	 * Properly reset timers: stop, reset, set up prescalers, reference
+	 * value and clear event register.
+	 */
+	clrsetbits_8(tmr->gtcfr, ~(GTCFR_STP(num) | GTCFR_RST(num)),
+				 GTCFR_STP(num) | GTCFR_RST(num));
+
+	setbits8(tmr->gtcfr, GTCFR_STP(num));
+
+	if (tmr->gtpsr)
+		out_be16(tmr->gtpsr, psr);
+	clrsetbits_be16(tmr->gtmdr, 0xFFFF, iclk | GTMDR_SPS(sps) |
+			GTMDR_ORI | (free_run ? GTMDR_FRR : 0));
+	out_be16(tmr->gtcnr, 0);
+	out_be16(tmr->gtrfr, reference_value);
+	out_be16(tmr->gtevr, 0xFFFF);
+
+	/* Let it be. */
+	clrbits8(tmr->gtcfr, GTCFR_STP(num));
+
+	spin_unlock_irqrestore(&gtm->lock, flags);
+
+	return 0;
+}
+
+/**
+ * gtm_set_timer16 - (re)set 16 bit timer with arbitrary precision
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @usec:	timer interval in microseconds
+ * @reload:	if set, the timer will reset upon expiry rather than
+ *         	continue running free.
+ * Context:	any
+ *
+ * This function (re)sets the GTM timer so that it counts up to the requested
+ * interval value, and fires the interrupt when the value is reached. This
+ * function will reduce the precision of the timer as needed in order for the
+ * requested timeout to fit in a 16-bit register.
+ */
+int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec, bool reload)
+{
+	/* quite obvious, frequency which is enough for µSec precision */
+	int freq = 1000000;
+	unsigned int bit;
+
+	bit = fls_long(usec);
+	if (bit > 15) {
+		freq >>= bit - 15;
+		usec >>= bit - 15;
+	}
+
+	if (!freq)
+		return -EINVAL;
+
+	return gtm_set_ref_timer16(tmr, freq, usec, reload);
+}
+EXPORT_SYMBOL(gtm_set_timer16);
+
+/**
+ * gtm_set_exact_utimer16 - (re)set 16 bits timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @usec:	timer interval in microseconds
+ * @reload:	if set, the timer will reset upon expiry rather than
+ *         	continue running free.
+ * Context:	any
+ *
+ * This function (re)sets GTM timer so that it counts up to the requested
+ * interval value, and fires the interrupt when the value is reached. If reload
+ * flag was set, timer will also reset itself upon reference value, otherwise
+ * it continues to increment.
+ *
+ * The _exact_ bit in the function name states that this function will not
+ * crop precision of the "usec" argument, thus usec is limited to 16 bits
+ * (single timer width).
+ */
+int gtm_set_exact_timer16(struct gtm_timer *tmr, u16 usec, bool reload)
+{
+	/* quite obvious, frequency which is enough for µSec precision */
+	const int freq = 1000000;
+
+	/*
+	 * We can lower the frequency (and probably power consumption) by
+	 * dividing both frequency and usec by 2 until there is no remainder.
+	 * But we won't bother with this unless savings are measured, so just
+	 * run the timer as is.
+	 */
+
+	return gtm_set_ref_timer16(tmr, freq, usec, reload);
+}
+EXPORT_SYMBOL(gtm_set_exact_timer16);
+
+/**
+ * gtm_stop_timer16 - stop single timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * Context:	any
+ *
+ * This function simply stops the GTM timer.
+ */
+void gtm_stop_timer16(struct gtm_timer *tmr)
+{
+	struct gtm *gtm = tmr->gtm;
+	int num = tmr - &gtm->timers[0];
+	unsigned long flags;
+
+	spin_lock_irqsave(&gtm->lock, flags);
+
+	setbits8(tmr->gtcfr, GTCFR_STP(num));
+	out_be16(tmr->gtevr, 0xFFFF);
+
+	spin_unlock_irqrestore(&gtm->lock, flags);
+}
+EXPORT_SYMBOL(gtm_stop_timer16);
+
+/**
+ * gtm_ack_timer16 - acknowledge timer event (free-run timers only)
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @events:	events mask to ack
+ * Context:	any
+ *
+ * Thus function used to acknowledge timer interrupt event, use it inside the
+ * interrupt handler.
+ */
+void gtm_ack_timer16(struct gtm_timer *tmr, u16 events)
+{
+	out_be16(tmr->gtevr, events);
+}
+EXPORT_SYMBOL(gtm_ack_timer16);
+
+static void __init gtm_set_shortcuts(struct device_node *np,
+				     struct gtm_timer *timers,
+				     struct gtm_timers_regs __iomem *regs)
+{
+	/*
+	 * Yeah, I don't like this either, but timers' registers a bit messed,
+	 * so we have to provide shortcuts to write timer independent code.
+	 * Alternative option is to create gt*() accessors, but that will be
+	 * even uglier and cryptic.
+	 */
+	timers[0].gtcfr = &regs->gtcfr1;
+	timers[0].gtmdr = &regs->gtmdr1;
+	timers[0].gtcnr = &regs->gtcnr1;
+	timers[0].gtrfr = &regs->gtrfr1;
+	timers[0].gtevr = &regs->gtevr1;
+
+	timers[1].gtcfr = &regs->gtcfr1;
+	timers[1].gtmdr = &regs->gtmdr2;
+	timers[1].gtcnr = &regs->gtcnr2;
+	timers[1].gtrfr = &regs->gtrfr2;
+	timers[1].gtevr = &regs->gtevr2;
+
+	timers[2].gtcfr = &regs->gtcfr2;
+	timers[2].gtmdr = &regs->gtmdr3;
+	timers[2].gtcnr = &regs->gtcnr3;
+	timers[2].gtrfr = &regs->gtrfr3;
+	timers[2].gtevr = &regs->gtevr3;
+
+	timers[3].gtcfr = &regs->gtcfr2;
+	timers[3].gtmdr = &regs->gtmdr4;
+	timers[3].gtcnr = &regs->gtcnr4;
+	timers[3].gtrfr = &regs->gtrfr4;
+	timers[3].gtevr = &regs->gtevr4;
+
+	/* CPM2 doesn't have primary prescaler */
+	if (!of_device_is_compatible(np, "fsl,cpm2-gtm")) {
+		timers[0].gtpsr = &regs->gtpsr1;
+		timers[1].gtpsr = &regs->gtpsr2;
+		timers[2].gtpsr = &regs->gtpsr3;
+		timers[3].gtpsr = &regs->gtpsr4;
+	}
+}
+
+static int __init fsl_gtm_init(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "fsl,gtm") {
+		int i;
+		struct gtm *gtm;
+		const u32 *clock;
+		int size;
+
+		gtm = kzalloc(sizeof(*gtm), GFP_KERNEL);
+		if (!gtm) {
+			pr_err("%pOF: unable to allocate memory\n",
+				np);
+			continue;
+		}
+
+		spin_lock_init(&gtm->lock);
+
+		clock = of_get_property(np, "clock-frequency", &size);
+		if (!clock || size != sizeof(*clock)) {
+			pr_err("%pOF: no clock-frequency\n", np);
+			goto err;
+		}
+		gtm->clock = *clock;
+
+		for (i = 0; i < ARRAY_SIZE(gtm->timers); i++) {
+			unsigned int irq;
+
+			irq = irq_of_parse_and_map(np, i);
+			if (!irq) {
+				pr_err("%pOF: not enough interrupts specified\n",
+				       np);
+				goto err;
+			}
+			gtm->timers[i].irq = irq;
+			gtm->timers[i].gtm = gtm;
+		}
+
+		gtm->regs = of_iomap(np, 0);
+		if (!gtm->regs) {
+			pr_err("%pOF: unable to iomap registers\n",
+			       np);
+			goto err;
+		}
+
+		gtm_set_shortcuts(np, gtm->timers, gtm->regs);
+		list_add(&gtm->list_node, &gtms);
+
+		/* We don't want to lose the node and its ->data */
+		np->data = gtm;
+		of_node_get(np);
+
+		continue;
+err:
+		kfree(gtm);
+	}
+	return 0;
+}
+arch_initcall(fsl_gtm_init);
diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
new file mode 100644
index 0000000000..217cea1509
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_lbc.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale LBC and UPM routines.
+ *
+ * Copyright © 2007-2008  MontaVista Software, Inc.
+ * Copyright © 2010 Freescale Semiconductor
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Author: Jack Lan <Jack.Lan@freescale.com>
+ * Author: Roy Zang <tie-fei.zang@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/syscore_ops.h>
+#include <asm/fsl_lbc.h>
+
+static DEFINE_SPINLOCK(fsl_lbc_lock);
+struct fsl_lbc_ctrl *fsl_lbc_ctrl_dev;
+EXPORT_SYMBOL(fsl_lbc_ctrl_dev);
+
+/**
+ * fsl_lbc_addr - convert the base address
+ * @addr_base:	base address of the memory bank
+ *
+ * This function converts a base address of lbc into the right format for the
+ * BR register. If the SOC has eLBC then it returns 32bit physical address
+ * else it converts a 34bit local bus physical address to correct format of
+ * 32bit address for BR register (Example: MPC8641).
+ */
+u32 fsl_lbc_addr(phys_addr_t addr_base)
+{
+	struct device_node *np = fsl_lbc_ctrl_dev->dev->of_node;
+	u32 addr = addr_base & 0xffff8000;
+
+	if (of_device_is_compatible(np, "fsl,elbc"))
+		return addr;
+
+	return addr | ((addr_base & 0x300000000ull) >> 19);
+}
+EXPORT_SYMBOL(fsl_lbc_addr);
+
+/**
+ * fsl_lbc_find - find Localbus bank
+ * @addr_base:	base address of the memory bank
+ *
+ * This function walks LBC banks comparing "Base address" field of the BR
+ * registers with the supplied addr_base argument. When bases match this
+ * function returns bank number (starting with 0), otherwise it returns
+ * appropriate errno value.
+ */
+int fsl_lbc_find(phys_addr_t addr_base)
+{
+	int i;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	lbc = fsl_lbc_ctrl_dev->regs;
+	for (i = 0; i < ARRAY_SIZE(lbc->bank); i++) {
+		u32 br = in_be32(&lbc->bank[i].br);
+		u32 or = in_be32(&lbc->bank[i].or);
+
+		if (br & BR_V && (br & or & BR_BA) == fsl_lbc_addr(addr_base))
+			return i;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(fsl_lbc_find);
+
+/**
+ * fsl_upm_find - find pre-programmed UPM via base address
+ * @addr_base:	base address of the memory bank controlled by the UPM
+ * @upm:	pointer to the allocated fsl_upm structure
+ *
+ * This function fills fsl_upm structure so you can use it with the rest of
+ * UPM API. On success this function returns 0, otherwise it returns
+ * appropriate errno value.
+ */
+int fsl_upm_find(phys_addr_t addr_base, struct fsl_upm *upm)
+{
+	int bank;
+	u32 br;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	bank = fsl_lbc_find(addr_base);
+	if (bank < 0)
+		return bank;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	lbc = fsl_lbc_ctrl_dev->regs;
+	br = in_be32(&lbc->bank[bank].br);
+
+	switch (br & BR_MSEL) {
+	case BR_MS_UPMA:
+		upm->mxmr = &lbc->mamr;
+		break;
+	case BR_MS_UPMB:
+		upm->mxmr = &lbc->mbmr;
+		break;
+	case BR_MS_UPMC:
+		upm->mxmr = &lbc->mcmr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (br & BR_PS) {
+	case BR_PS_8:
+		upm->width = 8;
+		break;
+	case BR_PS_16:
+		upm->width = 16;
+		break;
+	case BR_PS_32:
+		upm->width = 32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fsl_upm_find);
+
+/**
+ * fsl_upm_run_pattern - actually run an UPM pattern
+ * @upm:	pointer to the fsl_upm structure obtained via fsl_upm_find
+ * @io_base:	remapped pointer to where memory access should happen
+ * @mar:	MAR register content during pattern execution
+ *
+ * This function triggers dummy write to the memory specified by the io_base,
+ * thus UPM pattern actually executed. Note that mar usage depends on the
+ * pre-programmed AMX bits in the UPM RAM.
+ */
+int fsl_upm_run_pattern(struct fsl_upm *upm, void __iomem *io_base, u32 mar)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	spin_lock_irqsave(&fsl_lbc_lock, flags);
+
+	out_be32(&fsl_lbc_ctrl_dev->regs->mar, mar);
+
+	switch (upm->width) {
+	case 8:
+		out_8(io_base, 0x0);
+		break;
+	case 16:
+		out_be16(io_base, 0x0);
+		break;
+	case 32:
+		out_be32(io_base, 0x0);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(fsl_upm_run_pattern);
+
+static int fsl_lbc_ctrl_init(struct fsl_lbc_ctrl *ctrl,
+			     struct device_node *node)
+{
+	struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
+
+	/* clear event registers */
+	setbits32(&lbc->ltesr, LTESR_CLEAR);
+	out_be32(&lbc->lteatr, 0);
+	out_be32(&lbc->ltear, 0);
+	out_be32(&lbc->lteccr, LTECCR_CLEAR);
+	out_be32(&lbc->ltedr, LTEDR_ENABLE);
+
+	/* Set the monitor timeout value to the maximum for erratum A001 */
+	if (of_device_is_compatible(node, "fsl,elbc"))
+		clrsetbits_be32(&lbc->lbcr, LBCR_BMT, LBCR_BMTPS);
+
+	return 0;
+}
+
+/*
+ * NOTE: This interrupt is used to report localbus events of various kinds,
+ * such as transaction errors on the chipselects.
+ */
+
+static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data)
+{
+	struct fsl_lbc_ctrl *ctrl = data;
+	struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
+	u32 status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_lbc_lock, flags);
+	status = in_be32(&lbc->ltesr);
+	if (!status) {
+		spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+		return IRQ_NONE;
+	}
+
+	out_be32(&lbc->ltesr, LTESR_CLEAR);
+	out_be32(&lbc->lteatr, 0);
+	out_be32(&lbc->ltear, 0);
+	ctrl->irq_status = status;
+
+	if (status & LTESR_BM)
+		dev_err(ctrl->dev, "Local bus monitor time-out: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_WP)
+		dev_err(ctrl->dev, "Write protect error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_ATMW)
+		dev_err(ctrl->dev, "Atomic write error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_ATMR)
+		dev_err(ctrl->dev, "Atomic read error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_CS)
+		dev_err(ctrl->dev, "Chip select error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_FCT) {
+		dev_err(ctrl->dev, "FCM command time-out: "
+			"LTESR 0x%08X\n", status);
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & LTESR_PAR) {
+		dev_err(ctrl->dev, "Parity or Uncorrectable ECC error: "
+			"LTESR 0x%08X\n", status);
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & LTESR_CC) {
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & ~LTESR_MASK)
+		dev_err(ctrl->dev, "Unknown error: "
+			"LTESR 0x%08X\n", status);
+	spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * fsl_lbc_ctrl_probe
+ *
+ * called by device layer when it finds a device matching
+ * one our driver can handled. This code allocates all of
+ * the resources needed for the controller only.  The
+ * resources for the NAND banks themselves are allocated
+ * in the chip probe function.
+*/
+
+static int fsl_lbc_ctrl_probe(struct platform_device *dev)
+{
+	int ret;
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -EFAULT;
+	}
+
+	fsl_lbc_ctrl_dev = kzalloc(sizeof(*fsl_lbc_ctrl_dev), GFP_KERNEL);
+	if (!fsl_lbc_ctrl_dev)
+		return -ENOMEM;
+
+	dev_set_drvdata(&dev->dev, fsl_lbc_ctrl_dev);
+
+	spin_lock_init(&fsl_lbc_ctrl_dev->lock);
+	init_waitqueue_head(&fsl_lbc_ctrl_dev->irq_wait);
+
+	fsl_lbc_ctrl_dev->regs = of_iomap(dev->dev.of_node, 0);
+	if (!fsl_lbc_ctrl_dev->regs) {
+		dev_err(&dev->dev, "failed to get memory region\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->irq[0] = irq_of_parse_and_map(dev->dev.of_node, 0);
+	if (!fsl_lbc_ctrl_dev->irq[0]) {
+		dev_err(&dev->dev, "failed to get irq resource\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->dev = &dev->dev;
+
+	ret = fsl_lbc_ctrl_init(fsl_lbc_ctrl_dev, dev->dev.of_node);
+	if (ret < 0)
+		goto err;
+
+	ret = request_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_irq, 0,
+				"fsl-lbc", fsl_lbc_ctrl_dev);
+	if (ret != 0) {
+		dev_err(&dev->dev, "failed to install irq (%d)\n",
+			fsl_lbc_ctrl_dev->irq[0]);
+		ret = fsl_lbc_ctrl_dev->irq[0];
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->irq[1] = irq_of_parse_and_map(dev->dev.of_node, 1);
+	if (fsl_lbc_ctrl_dev->irq[1]) {
+		ret = request_irq(fsl_lbc_ctrl_dev->irq[1], fsl_lbc_ctrl_irq,
+				IRQF_SHARED, "fsl-lbc-err", fsl_lbc_ctrl_dev);
+		if (ret) {
+			dev_err(&dev->dev, "failed to install irq (%d)\n",
+					fsl_lbc_ctrl_dev->irq[1]);
+			ret = fsl_lbc_ctrl_dev->irq[1];
+			goto err1;
+		}
+	}
+
+	/* Enable interrupts for any detected events */
+	out_be32(&fsl_lbc_ctrl_dev->regs->lteir, LTEIR_ENABLE);
+
+	return 0;
+
+err1:
+	free_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_dev);
+err:
+	iounmap(fsl_lbc_ctrl_dev->regs);
+	kfree(fsl_lbc_ctrl_dev);
+	fsl_lbc_ctrl_dev = NULL;
+	return ret;
+}
+
+#ifdef CONFIG_SUSPEND
+
+/* save lbc registers */
+static int fsl_lbc_syscore_suspend(void)
+{
+	struct fsl_lbc_ctrl *ctrl;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	ctrl = fsl_lbc_ctrl_dev;
+	if (!ctrl)
+		goto out;
+
+	lbc = ctrl->regs;
+	if (!lbc)
+		goto out;
+
+	ctrl->saved_regs = kmalloc(sizeof(struct fsl_lbc_regs), GFP_KERNEL);
+	if (!ctrl->saved_regs)
+		return -ENOMEM;
+
+	_memcpy_fromio(ctrl->saved_regs, lbc, sizeof(struct fsl_lbc_regs));
+
+out:
+	return 0;
+}
+
+/* restore lbc registers */
+static void fsl_lbc_syscore_resume(void)
+{
+	struct fsl_lbc_ctrl *ctrl;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	ctrl = fsl_lbc_ctrl_dev;
+	if (!ctrl)
+		goto out;
+
+	lbc = ctrl->regs;
+	if (!lbc)
+		goto out;
+
+	if (ctrl->saved_regs) {
+		_memcpy_toio(lbc, ctrl->saved_regs,
+				sizeof(struct fsl_lbc_regs));
+		kfree(ctrl->saved_regs);
+		ctrl->saved_regs = NULL;
+	}
+
+out:
+	return;
+}
+#endif /* CONFIG_SUSPEND */
+
+static const struct of_device_id fsl_lbc_match[] = {
+	{ .compatible = "fsl,elbc", },
+	{ .compatible = "fsl,pq3-localbus", },
+	{ .compatible = "fsl,pq2-localbus", },
+	{ .compatible = "fsl,pq2pro-localbus", },
+	{},
+};
+
+#ifdef CONFIG_SUSPEND
+static struct syscore_ops lbc_syscore_pm_ops = {
+	.suspend = fsl_lbc_syscore_suspend,
+	.resume = fsl_lbc_syscore_resume,
+};
+#endif
+
+static struct platform_driver fsl_lbc_ctrl_driver = {
+	.driver = {
+		.name = "fsl-lbc",
+		.of_match_table = fsl_lbc_match,
+	},
+	.probe = fsl_lbc_ctrl_probe,
+};
+
+static int __init fsl_lbc_init(void)
+{
+#ifdef CONFIG_SUSPEND
+	register_syscore_ops(&lbc_syscore_pm_ops);
+#endif
+	return platform_driver_register(&fsl_lbc_ctrl_driver);
+}
+subsys_initcall(fsl_lbc_init);
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
new file mode 100644
index 0000000000..df06bb6b83
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ */
+
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpic.h>
+
+#include "mpic.h"
+
+#define MPIC_ERR_INT_BASE	0x3900
+#define MPIC_ERR_INT_EISR	0x0000
+#define MPIC_ERR_INT_EIMR	0x0010
+
+static inline u32 mpic_fsl_err_read(u32 __iomem *base, unsigned int err_reg)
+{
+	return in_be32(base + (err_reg >> 2));
+}
+
+static inline void mpic_fsl_err_write(u32 __iomem *base, u32 value)
+{
+	out_be32(base + (MPIC_ERR_INT_EIMR >> 2), value);
+}
+
+static void fsl_mpic_mask_err(struct irq_data *d)
+{
+	u32 eimr;
+	struct mpic *mpic = irq_data_get_irq_chip_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0];
+
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+	eimr |= (1 << (31 - src));
+	mpic_fsl_err_write(mpic->err_regs, eimr);
+}
+
+static void fsl_mpic_unmask_err(struct irq_data *d)
+{
+	u32 eimr;
+	struct mpic *mpic = irq_data_get_irq_chip_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0];
+
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+	eimr &= ~(1 << (31 - src));
+	mpic_fsl_err_write(mpic->err_regs, eimr);
+}
+
+static struct irq_chip fsl_mpic_err_chip = {
+	.irq_disable	= fsl_mpic_mask_err,
+	.irq_mask	= fsl_mpic_mask_err,
+	.irq_unmask	= fsl_mpic_unmask_err,
+};
+
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec)
+{
+	int i;
+
+	mpic->err_regs = ioremap(mpic->paddr + MPIC_ERR_INT_BASE, 0x1000);
+	if (!mpic->err_regs) {
+		pr_err("could not map mpic error registers\n");
+		return -ENOMEM;
+	}
+	mpic->hc_err = fsl_mpic_err_chip;
+	mpic->hc_err.name = mpic->name;
+	mpic->flags |= MPIC_FSL_HAS_EIMR;
+	/* allocate interrupt vectors for error interrupts */
+	for (i = MPIC_MAX_ERR - 1; i >= 0; i--)
+		mpic->err_int_vecs[i] = intvec--;
+
+	return 0;
+}
+
+int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw)
+{
+	if ((mpic->flags & MPIC_FSL_HAS_EIMR) &&
+	    (hw >= mpic->err_int_vecs[0] &&
+	     hw <= mpic->err_int_vecs[MPIC_MAX_ERR - 1])) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		pr_debug("mpic: mapping as Error Interrupt\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_err,
+					 handle_level_irq);
+		return 1;
+	}
+
+	return 0;
+}
+
+static irqreturn_t fsl_error_int_handler(int irq, void *data)
+{
+	struct mpic *mpic = (struct mpic *) data;
+	u32 eisr, eimr;
+	int errint;
+
+	eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR);
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+
+	if (!(eisr & ~eimr))
+		return IRQ_NONE;
+
+	while (eisr) {
+		int ret;
+		errint = __builtin_clz(eisr);
+		ret = generic_handle_domain_irq(mpic->irqhost,
+						mpic->err_int_vecs[errint]);
+		if (WARN_ON(ret)) {
+			eimr |=  1 << (31 - errint);
+			mpic_fsl_err_write(mpic->err_regs, eimr);
+		}
+		eisr &= ~(1 << (31 - errint));
+	}
+
+	return IRQ_HANDLED;
+}
+
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
+{
+	unsigned int virq;
+	int ret;
+
+	virq = irq_create_mapping(mpic->irqhost, irqnum);
+	if (!virq) {
+		pr_err("Error interrupt setup failed\n");
+		return;
+	}
+
+	/* Mask all error interrupts */
+	mpic_fsl_err_write(mpic->err_regs, ~0);
+
+	ret = request_irq(virq, fsl_error_int_handler, IRQF_NO_THREAD,
+		    "mpic-error-int", mpic);
+	if (ret)
+		pr_err("Failed to register error interrupt handler\n");
+}
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
new file mode 100644
index 0000000000..ce6c739c51
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPIC timer wakeup driver
+ *
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+
+#include <asm/mpic_timer.h>
+#include <asm/mpic.h>
+
+struct fsl_mpic_timer_wakeup {
+	struct mpic_timer *timer;
+	struct work_struct free_work;
+};
+
+static struct fsl_mpic_timer_wakeup *fsl_wakeup;
+static DEFINE_MUTEX(sysfs_lock);
+
+static void fsl_free_resource(struct work_struct *ws)
+{
+	struct fsl_mpic_timer_wakeup *wakeup =
+		container_of(ws, struct fsl_mpic_timer_wakeup, free_work);
+
+	mutex_lock(&sysfs_lock);
+
+	if (wakeup->timer) {
+		disable_irq_wake(wakeup->timer->irq);
+		mpic_free_timer(wakeup->timer);
+	}
+
+	wakeup->timer = NULL;
+	mutex_unlock(&sysfs_lock);
+}
+
+static irqreturn_t fsl_mpic_timer_irq(int irq, void *dev_id)
+{
+	struct fsl_mpic_timer_wakeup *wakeup = dev_id;
+
+	schedule_work(&wakeup->free_work);
+
+	return wakeup->timer ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static ssize_t fsl_timer_wakeup_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	time64_t interval = 0;
+
+	mutex_lock(&sysfs_lock);
+	if (fsl_wakeup->timer) {
+		mpic_get_remain_time(fsl_wakeup->timer, &interval);
+		interval++;
+	}
+	mutex_unlock(&sysfs_lock);
+
+	return sprintf(buf, "%lld\n", interval);
+}
+
+static ssize_t fsl_timer_wakeup_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf,
+				size_t count)
+{
+	time64_t interval;
+	int ret;
+
+	if (kstrtoll(buf, 0, &interval))
+		return -EINVAL;
+
+	mutex_lock(&sysfs_lock);
+
+	if (fsl_wakeup->timer) {
+		disable_irq_wake(fsl_wakeup->timer->irq);
+		mpic_free_timer(fsl_wakeup->timer);
+		fsl_wakeup->timer = NULL;
+	}
+
+	if (!interval) {
+		mutex_unlock(&sysfs_lock);
+		return count;
+	}
+
+	fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
+						fsl_wakeup, interval);
+	if (!fsl_wakeup->timer) {
+		mutex_unlock(&sysfs_lock);
+		return -EINVAL;
+	}
+
+	ret = enable_irq_wake(fsl_wakeup->timer->irq);
+	if (ret) {
+		mpic_free_timer(fsl_wakeup->timer);
+		fsl_wakeup->timer = NULL;
+		mutex_unlock(&sysfs_lock);
+
+		return ret;
+	}
+
+	mpic_start_timer(fsl_wakeup->timer);
+
+	mutex_unlock(&sysfs_lock);
+
+	return count;
+}
+
+static struct device_attribute mpic_attributes = __ATTR(timer_wakeup, 0644,
+			fsl_timer_wakeup_show, fsl_timer_wakeup_store);
+
+static int __init fsl_wakeup_sys_init(void)
+{
+	struct device *dev_root;
+	int ret = -EINVAL;
+
+	fsl_wakeup = kzalloc(sizeof(struct fsl_mpic_timer_wakeup), GFP_KERNEL);
+	if (!fsl_wakeup)
+		return -ENOMEM;
+
+	INIT_WORK(&fsl_wakeup->free_work, fsl_free_resource);
+
+	dev_root = bus_get_dev_root(&mpic_subsys);
+	if (dev_root) {
+		ret = device_create_file(dev_root, &mpic_attributes);
+		put_device(dev_root);
+		if (ret)
+			kfree(fsl_wakeup);
+	}
+
+	return ret;
+}
+
+static void __exit fsl_wakeup_sys_exit(void)
+{
+	struct device *dev_root;
+
+	dev_root = bus_get_dev_root(&mpic_subsys);
+	if (dev_root) {
+		device_remove_file(dev_root, &mpic_attributes);
+		put_device(dev_root);
+	}
+
+	mutex_lock(&sysfs_lock);
+
+	if (fsl_wakeup->timer) {
+		disable_irq_wake(fsl_wakeup->timer->irq);
+		mpic_free_timer(fsl_wakeup->timer);
+	}
+
+	kfree(fsl_wakeup);
+
+	mutex_unlock(&sysfs_lock);
+}
+
+module_init(fsl_wakeup_sys_init);
+module_exit(fsl_wakeup_sys_exit);
+
+MODULE_DESCRIPTION("Freescale MPIC global timer wakeup driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Wang Dongsheng <dongsheng.wang@freescale.com>");
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
new file mode 100644
index 0000000000..57978a44d5
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2007-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Tony Li <tony.li@freescale.com>
+ *	   Jason Jin <Jason.jin@freescale.com>
+ *
+ * The hwirq alloc and free code reuse from sysdev/mpic_msi.c
+ */
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/mpic.h>
+#include <asm/fsl_hcalls.h>
+
+#include "fsl_msi.h"
+#include "fsl_pci.h"
+
+#define MSIIR_OFFSET_MASK	0xfffff
+#define MSIIR_IBS_SHIFT		0
+#define MSIIR_SRS_SHIFT		5
+#define MSIIR1_IBS_SHIFT	4
+#define MSIIR1_SRS_SHIFT	0
+#define MSI_SRS_MASK		0xf
+#define MSI_IBS_MASK		0x1f
+
+#define msi_hwirq(msi, msir_index, intr_index) \
+		((msir_index) << (msi)->srs_shift | \
+		 ((intr_index) << (msi)->ibs_shift))
+
+static LIST_HEAD(msi_head);
+
+struct fsl_msi_feature {
+	u32 fsl_pic_ip;
+	u32 msiir_offset; /* Offset of MSIIR, relative to start of MSIR bank */
+};
+
+struct fsl_msi_cascade_data {
+	struct fsl_msi *msi_data;
+	int index;
+	int virq;
+};
+
+static inline u32 fsl_msi_read(u32 __iomem *base, unsigned int reg)
+{
+	return in_be32(base + (reg >> 2));
+}
+
+/*
+ * We do not need this actually. The MSIR register has been read once
+ * in the cascade interrupt. So, this MSI interrupt has been acked
+*/
+static void fsl_msi_end_irq(struct irq_data *d)
+{
+}
+
+static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p)
+{
+	struct fsl_msi *msi_data = irqd->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(irqd);
+	int cascade_virq, srs;
+
+	srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK;
+	cascade_virq = msi_data->cascade_array[srs]->virq;
+
+	seq_printf(p, " fsl-msi-%d", cascade_virq);
+}
+
+
+static struct irq_chip fsl_msi_chip = {
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_ack	= fsl_msi_end_irq,
+	.irq_print_chip = fsl_msi_print_chip,
+};
+
+static int fsl_msi_host_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct fsl_msi *msi_data = h->host_data;
+	struct irq_chip *chip = &fsl_msi_chip;
+
+	irq_set_status_flags(virq, IRQ_TYPE_EDGE_FALLING);
+
+	irq_set_chip_data(virq, msi_data);
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops fsl_msi_host_ops = {
+	.map = fsl_msi_host_map,
+};
+
+static int fsl_msi_init_allocator(struct fsl_msi *msi_data)
+{
+	int rc, hwirq;
+
+	rc = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS_MAX,
+			      irq_domain_get_of_node(msi_data->irqhost));
+	if (rc)
+		return rc;
+
+	/*
+	 * Reserve all the hwirqs
+	 * The available hwirqs will be released in fsl_msi_setup_hwirq()
+	 */
+	for (hwirq = 0; hwirq < NR_MSI_IRQS_MAX; hwirq++)
+		msi_bitmap_reserve_hwirq(&msi_data->bitmap, hwirq);
+
+	return 0;
+}
+
+static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct fsl_msi *msi_data;
+	irq_hw_number_t hwirq;
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		hwirq = virq_to_hw(entry->irq);
+		msi_data = irq_get_chip_data(entry->irq);
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+		msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
+	}
+}
+
+static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
+				struct msi_msg *msg,
+				struct fsl_msi *fsl_msi_data)
+{
+	struct fsl_msi *msi_data = fsl_msi_data;
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	u64 address; /* Physical address of the MSIIR */
+	int len;
+	const __be64 *reg;
+
+	/* If the msi-address-64 property exists, then use it */
+	reg = of_get_property(hose->dn, "msi-address-64", &len);
+	if (reg && (len == sizeof(u64)))
+		address = be64_to_cpup(reg);
+	else
+		address = fsl_pci_immrbar_base(hose) + msi_data->msiir_offset;
+
+	msg->address_lo = lower_32_bits(address);
+	msg->address_hi = upper_32_bits(address);
+
+	/*
+	 * MPIC version 2.0 has erratum PIC1. It causes
+	 * that neither MSI nor MSI-X can work fine.
+	 * This is a workaround to allow MSI-X to function
+	 * properly. It only works for MSI-X, we prevent
+	 * MSI on buggy chips in fsl_setup_msi_irqs().
+	 */
+	if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+		msg->data = __swab32(hwirq);
+	else
+		msg->data = hwirq;
+
+	pr_debug("%s: allocated srs: %d, ibs: %d\n", __func__,
+		 (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK,
+		 (hwirq >> msi_data->ibs_shift) & MSI_IBS_MASK);
+}
+
+static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct device_node *np;
+	phandle phandle = 0;
+	int rc, hwirq = -ENOMEM;
+	unsigned int virq;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct fsl_msi *msi_data;
+
+	if (type == PCI_CAP_ID_MSI) {
+		/*
+		 * MPIC version 2.0 has erratum PIC1. For now MSI
+		 * could not work. So check to prevent MSI from
+		 * being used on the board with this erratum.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list)
+			if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+				return -EINVAL;
+	}
+
+	/*
+	 * If the PCI node has an fsl,msi property, then we need to use it
+	 * to find the specific MSI.
+	 */
+	np = of_parse_phandle(hose->dn, "fsl,msi", 0);
+	if (np) {
+		if (of_device_is_compatible(np, "fsl,mpic-msi") ||
+		    of_device_is_compatible(np, "fsl,vmpic-msi") ||
+		    of_device_is_compatible(np, "fsl,vmpic-msi-v4.3"))
+			phandle = np->phandle;
+		else {
+			dev_err(&pdev->dev,
+				"node %pOF has an invalid fsl,msi phandle %u\n",
+				hose->dn, np->phandle);
+			of_node_put(np);
+			return -EINVAL;
+		}
+		of_node_put(np);
+	}
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		/*
+		 * Loop over all the MSI devices until we find one that has an
+		 * available interrupt.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list) {
+			/*
+			 * If the PCI node has an fsl,msi property, then we
+			 * restrict our search to the corresponding MSI node.
+			 * The simplest way is to skip over MSI nodes with the
+			 * wrong phandle. Under the Freescale hypervisor, this
+			 * has the additional benefit of skipping over MSI
+			 * nodes that are not mapped in the PAMU.
+			 */
+			if (phandle && (phandle != msi_data->phandle))
+				continue;
+
+			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
+			if (hwirq >= 0)
+				break;
+		}
+
+		if (hwirq < 0) {
+			rc = hwirq;
+			dev_err(&pdev->dev, "could not allocate MSI interrupt\n");
+			goto out_free;
+		}
+
+		virq = irq_create_mapping(msi_data->irqhost, hwirq);
+
+		if (!virq) {
+			dev_err(&pdev->dev, "fail mapping hwirq %i\n", hwirq);
+			msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
+			rc = -ENOSPC;
+			goto out_free;
+		}
+		/* chip_data is msi_data via host->hostdata in host->map() */
+		irq_set_msi_desc(virq, entry);
+
+		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
+		pci_write_msi_msg(virq, &msg);
+	}
+	return 0;
+
+out_free:
+	/* free by the caller of this function */
+	return rc;
+}
+
+static irqreturn_t fsl_msi_cascade(int irq, void *data)
+{
+	struct fsl_msi *msi_data;
+	int msir_index = -1;
+	u32 msir_value = 0;
+	u32 intr_index;
+	u32 have_shift = 0;
+	struct fsl_msi_cascade_data *cascade_data = data;
+	irqreturn_t ret = IRQ_NONE;
+
+	msi_data = cascade_data->msi_data;
+
+	msir_index = cascade_data->index;
+
+	switch (msi_data->feature & FSL_PIC_IP_MASK) {
+	case FSL_PIC_IP_MPIC:
+		msir_value = fsl_msi_read(msi_data->msi_regs,
+			msir_index * 0x10);
+		break;
+	case FSL_PIC_IP_IPIC:
+		msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
+		break;
+#ifdef CONFIG_EPAPR_PARAVIRT
+	case FSL_PIC_IP_VMPIC: {
+		unsigned int ret;
+		ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
+		if (ret) {
+			pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
+			       "irq %u (ret=%u)\n", irq, ret);
+			msir_value = 0;
+		}
+		break;
+	}
+#endif
+	}
+
+	while (msir_value) {
+		int err;
+		intr_index = ffs(msir_value) - 1;
+
+		err = generic_handle_domain_irq(msi_data->irqhost,
+				msi_hwirq(msi_data, msir_index,
+					  intr_index + have_shift));
+		if (!err)
+			ret = IRQ_HANDLED;
+
+		have_shift += intr_index + 1;
+		msir_value = msir_value >> (intr_index + 1);
+	}
+
+	return ret;
+}
+
+static int fsl_of_msi_remove(struct platform_device *ofdev)
+{
+	struct fsl_msi *msi = platform_get_drvdata(ofdev);
+	int virq, i;
+
+	if (msi->list.prev != NULL)
+		list_del(&msi->list);
+	for (i = 0; i < NR_MSI_REG_MAX; i++) {
+		if (msi->cascade_array[i]) {
+			virq = msi->cascade_array[i]->virq;
+
+			BUG_ON(!virq);
+
+			free_irq(virq, msi->cascade_array[i]);
+			kfree(msi->cascade_array[i]);
+			irq_dispose_mapping(virq);
+		}
+	}
+	if (msi->bitmap.bitmap)
+		msi_bitmap_free(&msi->bitmap);
+	if ((msi->feature & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC)
+		iounmap(msi->msi_regs);
+	kfree(msi);
+
+	return 0;
+}
+
+static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
+
+static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
+			       int offset, int irq_index)
+{
+	struct fsl_msi_cascade_data *cascade_data = NULL;
+	int virt_msir, i, ret;
+
+	virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index);
+	if (!virt_msir) {
+		dev_err(&dev->dev, "%s: Cannot translate IRQ index %d\n",
+			__func__, irq_index);
+		return 0;
+	}
+
+	cascade_data = kzalloc(sizeof(struct fsl_msi_cascade_data), GFP_KERNEL);
+	if (!cascade_data) {
+		dev_err(&dev->dev, "No memory for MSI cascade data\n");
+		return -ENOMEM;
+	}
+	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+			      &fsl_msi_irq_request_class);
+	cascade_data->index = offset;
+	cascade_data->msi_data = msi;
+	cascade_data->virq = virt_msir;
+	msi->cascade_array[irq_index] = cascade_data;
+
+	ret = request_irq(virt_msir, fsl_msi_cascade, IRQF_NO_THREAD,
+			  "fsl-msi-cascade", cascade_data);
+	if (ret) {
+		dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n",
+			virt_msir, ret);
+		return ret;
+	}
+
+	/* Release the hwirqs corresponding to this MSI register */
+	for (i = 0; i < IRQS_PER_MSI_REG; i++)
+		msi_bitmap_free_hwirqs(&msi->bitmap,
+				       msi_hwirq(msi, offset, i), 1);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_of_msi_ids[];
+static int fsl_of_msi_probe(struct platform_device *dev)
+{
+	const struct of_device_id *match;
+	struct fsl_msi *msi;
+	struct resource res, msiir;
+	int err, i, j, irq_index, count;
+	const u32 *p;
+	const struct fsl_msi_feature *features;
+	int len;
+	u32 offset;
+	struct pci_controller *phb;
+
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
+		return -EINVAL;
+	features = match->data;
+
+	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
+
+	msi = kzalloc(sizeof(struct fsl_msi), GFP_KERNEL);
+	if (!msi) {
+		dev_err(&dev->dev, "No memory for MSI structure\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(dev, msi);
+
+	msi->irqhost = irq_domain_add_linear(dev->dev.of_node,
+				      NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi);
+
+	if (msi->irqhost == NULL) {
+		dev_err(&dev->dev, "No memory for MSI irqhost\n");
+		err = -ENOMEM;
+		goto error_out;
+	}
+
+	/*
+	 * Under the Freescale hypervisor, the msi nodes don't have a 'reg'
+	 * property.  Instead, we use hypercalls to access the MSI.
+	 */
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) {
+		err = of_address_to_resource(dev->dev.of_node, 0, &res);
+		if (err) {
+			dev_err(&dev->dev, "invalid resource for node %pOF\n",
+				dev->dev.of_node);
+			goto error_out;
+		}
+
+		msi->msi_regs = ioremap(res.start, resource_size(&res));
+		if (!msi->msi_regs) {
+			err = -ENOMEM;
+			dev_err(&dev->dev, "could not map node %pOF\n",
+				dev->dev.of_node);
+			goto error_out;
+		}
+		msi->msiir_offset =
+			features->msiir_offset + (res.start & 0xfffff);
+
+		/*
+		 * First read the MSIIR/MSIIR1 offset from dts
+		 * On failure use the hardcode MSIIR offset
+		 */
+		if (of_address_to_resource(dev->dev.of_node, 1, &msiir))
+			msi->msiir_offset = features->msiir_offset +
+					    (res.start & MSIIR_OFFSET_MASK);
+		else
+			msi->msiir_offset = msiir.start & MSIIR_OFFSET_MASK;
+	}
+
+	msi->feature = features->fsl_pic_ip;
+
+	/* For erratum PIC1 on MPIC version 2.0*/
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) == FSL_PIC_IP_MPIC
+			&& (fsl_mpic_primary_get_version() == 0x0200))
+		msi->feature |= MSI_HW_ERRATA_ENDIAN;
+
+	/*
+	 * Remember the phandle, so that we can match with any PCI nodes
+	 * that have an "fsl,msi" property.
+	 */
+	msi->phandle = dev->dev.of_node->phandle;
+
+	err = fsl_msi_init_allocator(msi);
+	if (err) {
+		dev_err(&dev->dev, "Error allocating MSI bitmap\n");
+		goto error_out;
+	}
+
+	p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len);
+
+	if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") ||
+	    of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) {
+		msi->srs_shift = MSIIR1_SRS_SHIFT;
+		msi->ibs_shift = MSIIR1_IBS_SHIFT;
+		if (p)
+			dev_warn(&dev->dev, "%s: dose not support msi-available-ranges property\n",
+				__func__);
+
+		for (irq_index = 0; irq_index < NR_MSI_REG_MSIIR1;
+		     irq_index++) {
+			err = fsl_msi_setup_hwirq(msi, dev,
+						  irq_index, irq_index);
+			if (err)
+				goto error_out;
+		}
+	} else {
+		static const u32 all_avail[] =
+			{ 0, NR_MSI_REG_MSIIR * IRQS_PER_MSI_REG };
+
+		msi->srs_shift = MSIIR_SRS_SHIFT;
+		msi->ibs_shift = MSIIR_IBS_SHIFT;
+
+		if (p && len % (2 * sizeof(u32)) != 0) {
+			dev_err(&dev->dev, "%s: Malformed msi-available-ranges property\n",
+				__func__);
+			err = -EINVAL;
+			goto error_out;
+		}
+
+		if (!p) {
+			p = all_avail;
+			len = sizeof(all_avail);
+		}
+
+		for (irq_index = 0, i = 0; i < len / (2 * sizeof(u32)); i++) {
+			if (p[i * 2] % IRQS_PER_MSI_REG ||
+			    p[i * 2 + 1] % IRQS_PER_MSI_REG) {
+				pr_warn("%s: %pOF: msi available range of %u at %u is not IRQ-aligned\n",
+				       __func__, dev->dev.of_node,
+				       p[i * 2 + 1], p[i * 2]);
+				err = -EINVAL;
+				goto error_out;
+			}
+
+			offset = p[i * 2] / IRQS_PER_MSI_REG;
+			count = p[i * 2 + 1] / IRQS_PER_MSI_REG;
+
+			for (j = 0; j < count; j++, irq_index++) {
+				err = fsl_msi_setup_hwirq(msi, dev, offset + j,
+							  irq_index);
+				if (err)
+					goto error_out;
+			}
+		}
+	}
+
+	list_add_tail(&msi->list, &msi_head);
+
+	/*
+	 * Apply the MSI ops to all the controllers.
+	 * It doesn't hurt to reassign the same ops,
+	 * but bail out if we find another MSI driver.
+	 */
+	list_for_each_entry(phb, &hose_list, list_node) {
+		if (!phb->controller_ops.setup_msi_irqs) {
+			phb->controller_ops.setup_msi_irqs = fsl_setup_msi_irqs;
+			phb->controller_ops.teardown_msi_irqs = fsl_teardown_msi_irqs;
+		} else if (phb->controller_ops.setup_msi_irqs != fsl_setup_msi_irqs) {
+			dev_err(&dev->dev, "Different MSI driver already installed!\n");
+			err = -ENODEV;
+			goto error_out;
+		}
+	}
+	return 0;
+error_out:
+	fsl_of_msi_remove(dev);
+	return err;
+}
+
+static const struct fsl_msi_feature mpic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_MPIC,
+	.msiir_offset = 0x140,
+};
+
+static const struct fsl_msi_feature ipic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_IPIC,
+	.msiir_offset = 0x38,
+};
+
+static const struct fsl_msi_feature vmpic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_VMPIC,
+	.msiir_offset = 0,
+};
+
+static const struct of_device_id fsl_of_msi_ids[] = {
+	{
+		.compatible = "fsl,mpic-msi",
+		.data = &mpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,mpic-msi-v4.3",
+		.data = &mpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,ipic-msi",
+		.data = &ipic_msi_feature,
+	},
+#ifdef CONFIG_EPAPR_PARAVIRT
+	{
+		.compatible = "fsl,vmpic-msi",
+		.data = &vmpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,vmpic-msi-v4.3",
+		.data = &vmpic_msi_feature,
+	},
+#endif
+	{}
+};
+
+static struct platform_driver fsl_of_msi_driver = {
+	.driver = {
+		.name = "fsl-msi",
+		.of_match_table = fsl_of_msi_ids,
+	},
+	.probe = fsl_of_msi_probe,
+	.remove = fsl_of_msi_remove,
+};
+
+static __init int fsl_of_msi_init(void)
+{
+	return platform_driver_register(&fsl_of_msi_driver);
+}
+
+subsys_initcall(fsl_of_msi_init);
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
new file mode 100644
index 0000000000..e2a1bfc7c2
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2007-2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Tony Li <tony.li@freescale.com>
+ *	   Jason Jin <Jason.jin@freescale.com>
+ */
+#ifndef _POWERPC_SYSDEV_FSL_MSI_H
+#define _POWERPC_SYSDEV_FSL_MSI_H
+
+#include <linux/of.h>
+#include <asm/msi_bitmap.h>
+
+#define NR_MSI_REG_MSIIR	8  /* MSIIR can index 8 MSI registers */
+#define NR_MSI_REG_MSIIR1	16 /* MSIIR1 can index 16 MSI registers */
+#define NR_MSI_REG_MAX		NR_MSI_REG_MSIIR1
+#define IRQS_PER_MSI_REG	32
+#define NR_MSI_IRQS_MAX	(NR_MSI_REG_MAX * IRQS_PER_MSI_REG)
+
+#define FSL_PIC_IP_MASK   0x0000000F
+#define FSL_PIC_IP_MPIC   0x00000001
+#define FSL_PIC_IP_IPIC   0x00000002
+#define FSL_PIC_IP_VMPIC  0x00000003
+
+#define MSI_HW_ERRATA_ENDIAN 0x00000010
+
+struct fsl_msi_cascade_data;
+
+struct fsl_msi {
+	struct irq_domain *irqhost;
+
+	unsigned long cascade_irq;
+
+	u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */
+	u32 ibs_shift; /* Shift of interrupt bit select */
+	u32 srs_shift; /* Shift of the shared interrupt register select */
+	void __iomem *msi_regs;
+	u32 feature;
+	struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX];
+
+	struct msi_bitmap bitmap;
+
+	struct list_head list;          /* support multiple MSI banks */
+
+	phandle phandle;
+};
+
+#endif /* _POWERPC_SYSDEV_FSL_MSI_H */
+
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
new file mode 100644
index 0000000000..3868483fbe
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -0,0 +1,1367 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC83xx/85xx/86xx PCI/PCIE support routing.
+ *
+ * Copyright 2007-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2009 MontaVista Software, Inc.
+ *
+ * Initial author: Xianghua Xiao <x.xiao@freescale.com>
+ * Recode: ZHANG WEI <wei.zhang@freescale.com>
+ * Rewrite the routing for Frescale PCI and PCI Express
+ * 	Roy Zang <tie-fei.zang@freescale.com>
+ * MPC83xx PCI-Express support:
+ * 	Tony Li <tony.li@freescale.com>
+ * 	Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/fsl/edac.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/memblock.h>
+#include <linux/log2.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
+#include <linux/uaccess.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/machdep.h>
+#include <asm/mpc85xx.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include <asm/swiotlb.h>
+#include <asm/setup.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+static int fsl_pcie_bus_fixup, is_mpc83xx_pci;
+
+static void quirk_fsl_pcie_early(struct pci_dev *dev)
+{
+	u8 hdr_type;
+
+	/* if we aren't a PCIe don't bother */
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* if we aren't in host mode don't bother */
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
+	if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+		return;
+
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+	fsl_pcie_bus_fixup = 1;
+	return;
+}
+
+static int fsl_indirect_read_config(struct pci_bus *, unsigned int,
+				    int, int, u32 *);
+
+static int fsl_pcie_check_link(struct pci_controller *hose)
+{
+	u32 val = 0;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK) {
+		if (hose->ops->read == fsl_indirect_read_config)
+			__indirect_read_config(hose, hose->first_busno, 0,
+					       PCIE_LTSSM, 4, &val);
+		else
+			early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
+		if (val < PCIE_LTSSM_L0)
+			return 1;
+	} else {
+		struct ccsr_pci __iomem *pci = hose->private_data;
+		/* for PCIe IP rev 3.0 or greater use CSR0 for link state */
+		val = (in_be32(&pci->pex_csr0) & PEX_CSR0_LTSSM_MASK)
+				>> PEX_CSR0_LTSSM_SHIFT;
+		if (val != PEX_CSR0_LTSSM_L0)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int fsl_indirect_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (fsl_pcie_check_link(hose))
+		hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+	else
+		hose->indirect_type &= ~PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+
+	return indirect_read_config(bus, devfn, offset, len, val);
+}
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+
+static struct pci_ops fsl_indirect_pcie_ops =
+{
+	.read = fsl_indirect_read_config,
+	.write = indirect_write_config,
+};
+
+static u64 pci64_dma_offset;
+
+#ifdef CONFIG_SWIOTLB
+static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	pdev->dev.bus_dma_limit =
+		hose->dma_window_base_cur + hose->dma_window_size - 1;
+}
+
+static void setup_swiotlb_ops(struct pci_controller *hose)
+{
+	if (ppc_swiotlb_enable)
+		hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
+}
+#else
+static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
+#endif
+
+static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	/*
+	 * Fix up PCI devices that are able to DMA to the large inbound
+	 * mapping that allows addressing any RAM address from across PCI.
+	 */
+	if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
+		dev->bus_dma_limit = 0;
+		dev->archdata.dma_offset = pci64_dma_offset;
+	}
+}
+
+static int setup_one_atmu(struct ccsr_pci __iomem *pci,
+	unsigned int index, const struct resource *res,
+	resource_size_t offset)
+{
+	resource_size_t pci_addr = res->start - offset;
+	resource_size_t phys_addr = res->start;
+	resource_size_t size = resource_size(res);
+	u32 flags = 0x80044000; /* enable & mem R/W */
+	unsigned int i;
+
+	pr_debug("PCI MEM resource start 0x%016llx, size 0x%016llx.\n",
+		(u64)res->start, (u64)size);
+
+	if (res->flags & IORESOURCE_PREFETCH)
+		flags |= 0x10000000; /* enable relaxed ordering */
+
+	for (i = 0; size > 0; i++) {
+		unsigned int bits = min_t(u32, ilog2(size),
+					__ffs(pci_addr | phys_addr));
+
+		if (index + i >= 5)
+			return -1;
+
+		out_be32(&pci->pow[index + i].potar, pci_addr >> 12);
+		out_be32(&pci->pow[index + i].potear, (u64)pci_addr >> 44);
+		out_be32(&pci->pow[index + i].powbar, phys_addr >> 12);
+		out_be32(&pci->pow[index + i].powar, flags | (bits - 1));
+
+		pci_addr += (resource_size_t)1U << bits;
+		phys_addr += (resource_size_t)1U << bits;
+		size -= (resource_size_t)1U << bits;
+	}
+
+	return i;
+}
+
+static bool is_kdump(void)
+{
+	struct device_node *node;
+	bool ret;
+
+	node = of_find_node_by_type(NULL, "memory");
+	if (!node) {
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	ret = of_property_read_bool(node, "linux,usable-memory");
+	of_node_put(node);
+
+	return ret;
+}
+
+/* atmu setup for fsl pci/pcie controller */
+static void setup_pci_atmu(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	int i, j, n, mem_log, win_idx = 3, start_idx = 1, end_idx = 4;
+	u64 mem, sz, paddr_hi = 0;
+	u64 offset = 0, paddr_lo = ULLONG_MAX;
+	u32 pcicsrbar = 0, pcicsrbar_sz;
+	u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
+			PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
+	const u64 *reg;
+	int len;
+	bool setup_inbound;
+
+	/*
+	 * If this is kdump, we don't want to trigger a bunch of PCI
+	 * errors by closing the window on in-flight DMA.
+	 *
+	 * We still run most of the function's logic so that things like
+	 * hose->dma_window_size still get set.
+	 */
+	setup_inbound = !is_kdump();
+
+	if (of_device_is_compatible(hose->dn, "fsl,bsc9132-pcie")) {
+		/*
+		 * BSC9132 Rev1.0 has an issue where all the PEX inbound
+		 * windows have implemented the default target value as 0xf
+		 * for CCSR space.In all Freescale legacy devices the target
+		 * of 0xf is reserved for local memory space. 9132 Rev1.0
+		 * now has local memory space mapped to target 0x0 instead of
+		 * 0xf. Hence adding a workaround to remove the target 0xf
+		 * defined for memory space from Inbound window attributes.
+		 */
+		piwar &= ~PIWAR_TGI_LOCAL;
+	}
+
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		if (in_be32(&pci->block_rev1) >= PCIE_IP_REV_2_2) {
+			win_idx = 2;
+			start_idx = 0;
+			end_idx = 3;
+		}
+	}
+
+	/* Disable all windows (except powar0 since it's ignored) */
+	for(i = 1; i < 5; i++)
+		out_be32(&pci->pow[i].powar, 0);
+
+	if (setup_inbound) {
+		for (i = start_idx; i < end_idx; i++)
+			out_be32(&pci->piw[i].piwar, 0);
+	}
+
+	/* Setup outbound MEM window */
+	for(i = 0, j = 1; i < 3; i++) {
+		if (!(hose->mem_resources[i].flags & IORESOURCE_MEM))
+			continue;
+
+		paddr_lo = min(paddr_lo, (u64)hose->mem_resources[i].start);
+		paddr_hi = max(paddr_hi, (u64)hose->mem_resources[i].end);
+
+		/* We assume all memory resources have the same offset */
+		offset = hose->mem_offset[i];
+		n = setup_one_atmu(pci, j, &hose->mem_resources[i], offset);
+
+		if (n < 0 || j >= 5) {
+			pr_err("Ran out of outbound PCI ATMUs for resource %d!\n", i);
+			hose->mem_resources[i].flags |= IORESOURCE_DISABLED;
+		} else
+			j += n;
+	}
+
+	/* Setup outbound IO window */
+	if (hose->io_resource.flags & IORESOURCE_IO) {
+		if (j >= 5) {
+			pr_err("Ran out of outbound PCI ATMUs for IO resource\n");
+		} else {
+			pr_debug("PCI IO resource start 0x%016llx, size 0x%016llx, "
+				 "phy base 0x%016llx.\n",
+				 (u64)hose->io_resource.start,
+				 (u64)resource_size(&hose->io_resource),
+				 (u64)hose->io_base_phys);
+			out_be32(&pci->pow[j].potar, (hose->io_resource.start >> 12));
+			out_be32(&pci->pow[j].potear, 0);
+			out_be32(&pci->pow[j].powbar, (hose->io_base_phys >> 12));
+			/* Enable, IO R/W */
+			out_be32(&pci->pow[j].powar, 0x80088000
+				| (ilog2(hose->io_resource.end
+				- hose->io_resource.start + 1) - 1));
+		}
+	}
+
+	/* convert to pci address space */
+	paddr_hi -= offset;
+	paddr_lo -= offset;
+
+	if (paddr_hi == paddr_lo) {
+		pr_err("%pOF: No outbound window space\n", hose->dn);
+		return;
+	}
+
+	if (paddr_lo == 0) {
+		pr_err("%pOF: No space for inbound window\n", hose->dn);
+		return;
+	}
+
+	/* setup PCSRBAR/PEXCSRBAR */
+	early_write_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, 0xffffffff);
+	early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, &pcicsrbar_sz);
+	pcicsrbar_sz = ~pcicsrbar_sz + 1;
+
+	if (paddr_hi < (0x100000000ull - pcicsrbar_sz) ||
+		(paddr_lo > 0x100000000ull))
+		pcicsrbar = 0x100000000ull - pcicsrbar_sz;
+	else
+		pcicsrbar = (paddr_lo - pcicsrbar_sz) & -pcicsrbar_sz;
+	early_write_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, pcicsrbar);
+
+	paddr_lo = min(paddr_lo, (u64)pcicsrbar);
+
+	pr_info("%pOF: PCICSRBAR @ 0x%x\n", hose->dn, pcicsrbar);
+
+	/* Setup inbound mem window */
+	mem = memblock_end_of_DRAM();
+	pr_info("%s: end of DRAM %llx\n", __func__, mem);
+
+	/*
+	 * The msi-address-64 property, if it exists, indicates the physical
+	 * address of the MSIIR register.  Normally, this register is located
+	 * inside CCSR, so the ATMU that covers all of CCSR is used. But if
+	 * this property exists, then we normally need to create a new ATMU
+	 * for it.  For now, however, we cheat.  The only entity that creates
+	 * this property is the Freescale hypervisor, and the address is
+	 * specified in the partition configuration.  Typically, the address
+	 * is located in the page immediately after the end of DDR.  If so, we
+	 * can avoid allocating a new ATMU by extending the DDR ATMU by one
+	 * page.
+	 */
+	reg = of_get_property(hose->dn, "msi-address-64", &len);
+	if (reg && (len == sizeof(u64))) {
+		u64 address = be64_to_cpup(reg);
+
+		if ((address >= mem) && (address < (mem + PAGE_SIZE))) {
+			pr_info("%pOF: extending DDR ATMU to cover MSIIR", hose->dn);
+			mem += PAGE_SIZE;
+		} else {
+			/* TODO: Create a new ATMU for MSIIR */
+			pr_warn("%pOF: msi-address-64 address of %llx is "
+				"unsupported\n", hose->dn, address);
+		}
+	}
+
+	sz = min(mem, paddr_lo);
+	mem_log = ilog2(sz);
+
+	/* PCIe can overmap inbound & outbound since RX & TX are separated */
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		/* Size window to exact size if power-of-two or one size up */
+		if ((1ull << mem_log) != mem) {
+			mem_log++;
+			if ((1ull << mem_log) > mem)
+				pr_info("%pOF: Setting PCI inbound window "
+					"greater than memory size\n", hose->dn);
+		}
+
+		piwar |= ((mem_log - 1) & PIWAR_SZ_MASK);
+
+		if (setup_inbound) {
+			/* Setup inbound memory window */
+			out_be32(&pci->piw[win_idx].pitar,  0x00000000);
+			out_be32(&pci->piw[win_idx].piwbar, 0x00000000);
+			out_be32(&pci->piw[win_idx].piwar,  piwar);
+		}
+
+		win_idx--;
+		hose->dma_window_base_cur = 0x00000000;
+		hose->dma_window_size = (resource_size_t)sz;
+
+		/*
+		 * if we have >4G of memory setup second PCI inbound window to
+		 * let devices that are 64-bit address capable to work w/o
+		 * SWIOTLB and access the full range of memory
+		 */
+		if (sz != mem) {
+			mem_log = ilog2(mem);
+
+			/* Size window up if we dont fit in exact power-of-2 */
+			if ((1ull << mem_log) != mem)
+				mem_log++;
+
+			piwar = (piwar & ~PIWAR_SZ_MASK) | (mem_log - 1);
+			pci64_dma_offset = 1ULL << mem_log;
+
+			if (setup_inbound) {
+				/* Setup inbound memory window */
+				out_be32(&pci->piw[win_idx].pitar,  0x00000000);
+				out_be32(&pci->piw[win_idx].piwbear,
+						pci64_dma_offset >> 44);
+				out_be32(&pci->piw[win_idx].piwbar,
+						pci64_dma_offset >> 12);
+				out_be32(&pci->piw[win_idx].piwar,  piwar);
+			}
+
+			/*
+			 * install our own dma_set_mask handler to fixup dma_ops
+			 * and dma_offset
+			 */
+			ppc_md.dma_set_mask = fsl_pci_dma_set_mask;
+
+			pr_info("%pOF: Setup 64-bit PCI DMA window\n", hose->dn);
+		}
+	} else {
+		u64 paddr = 0;
+
+		if (setup_inbound) {
+			/* Setup inbound memory window */
+			out_be32(&pci->piw[win_idx].pitar,  paddr >> 12);
+			out_be32(&pci->piw[win_idx].piwbar, paddr >> 12);
+			out_be32(&pci->piw[win_idx].piwar,
+				 (piwar | (mem_log - 1)));
+		}
+
+		win_idx--;
+		paddr += 1ull << mem_log;
+		sz -= 1ull << mem_log;
+
+		if (sz) {
+			mem_log = ilog2(sz);
+			piwar |= (mem_log - 1);
+
+			if (setup_inbound) {
+				out_be32(&pci->piw[win_idx].pitar,
+					 paddr >> 12);
+				out_be32(&pci->piw[win_idx].piwbar,
+					 paddr >> 12);
+				out_be32(&pci->piw[win_idx].piwar, piwar);
+			}
+
+			win_idx--;
+			paddr += 1ull << mem_log;
+		}
+
+		hose->dma_window_base_cur = 0x00000000;
+		hose->dma_window_size = (resource_size_t)paddr;
+	}
+
+	if (hose->dma_window_size < mem) {
+#ifdef CONFIG_SWIOTLB
+		ppc_swiotlb_enable = 1;
+#else
+		pr_err("%pOF: ERROR: Memory size exceeds PCI ATMU ability to "
+			"map - enable CONFIG_SWIOTLB to avoid dma errors.\n",
+			 hose->dn);
+#endif
+		/* adjusting outbound windows could reclaim space in mem map */
+		if (paddr_hi < 0xffffffffull)
+			pr_warn("%pOF: WARNING: Outbound window cfg leaves "
+				"gaps in memory map. Adjusting the memory map "
+				"could reduce unnecessary bounce buffering.\n",
+				hose->dn);
+
+		pr_info("%pOF: DMA window size is 0x%llx\n", hose->dn,
+			(u64)hose->dma_window_size);
+	}
+}
+
+static void setup_pci_cmd(struct pci_controller *hose)
+{
+	u16 cmd;
+	int cap_x;
+
+	early_read_config_word(hose, 0, 0, PCI_COMMAND, &cmd);
+	cmd |= PCI_COMMAND_SERR | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY
+		| PCI_COMMAND_IO;
+	early_write_config_word(hose, 0, 0, PCI_COMMAND, cmd);
+
+	cap_x = early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX);
+	if (cap_x) {
+		int pci_x_cmd = cap_x + PCI_X_CMD;
+		cmd = PCI_X_CMD_MAX_SPLIT | PCI_X_CMD_MAX_READ
+			| PCI_X_CMD_ERO | PCI_X_CMD_DPERR_E;
+		early_write_config_word(hose, 0, 0, pci_x_cmd, cmd);
+	} else {
+		early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0x80);
+	}
+}
+
+void fsl_pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	int i, is_pcie = 0, no_link;
+
+	/* The root complex bridge comes up with bogus resources,
+	 * we copy the PHB ones in.
+	 *
+	 * With the current generic PCI code, the PHB bus no longer
+	 * has bus->resource[0..4] set, so things are a bit more
+	 * tricky.
+	 */
+
+	if (fsl_pcie_bus_fixup)
+		is_pcie = early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP);
+	no_link = !!(hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK);
+
+	if (bus->parent == hose->bus && (is_pcie || no_link)) {
+		for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; ++i) {
+			struct resource *res = bus->resource[i];
+			struct resource *par;
+
+			if (!res)
+				continue;
+			if (i == 0)
+				par = &hose->io_resource;
+			else if (i < 4)
+				par = &hose->mem_resources[i-1];
+			else par = NULL;
+
+			res->start = par ? par->start : 0;
+			res->end   = par ? par->end   : 0;
+			res->flags = par ? par->flags : 0;
+		}
+	}
+}
+
+static int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	const int *bus_range;
+	u8 hdr_type, progif;
+	u32 class_code;
+	struct device_node *dev;
+	struct ccsr_pci __iomem *pci;
+	u16 temp;
+	u32 svr = mfspr(SPRN_SVR);
+
+	dev = pdev->dev.of_node;
+
+	if (!of_device_is_available(dev)) {
+		pr_warn("%pOF: disabled\n", dev);
+		return -ENODEV;
+	}
+
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+	/* Fetch host bridge registers address */
+	if (of_address_to_resource(dev, 0, &rsrc)) {
+		printk(KERN_WARNING "Can't get pci register base!");
+		return -ENOMEM;
+	}
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+			" bus 0\n", dev);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	/* set platform device as the parent */
+	hose->parent = &pdev->dev;
+	hose->first_busno = bus_range ? bus_range[0] : 0x0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	pr_debug("PCI memory map start 0x%016llx, size 0x%016llx\n",
+		 (u64)rsrc.start, (u64)resource_size(&rsrc));
+
+	pci = hose->private_data = ioremap(rsrc.start, resource_size(&rsrc));
+	if (!hose->private_data)
+		goto no_bridge;
+
+	setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4,
+			   PPC_INDIRECT_TYPE_BIG_ENDIAN);
+
+	if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0)
+		hose->indirect_type |= PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK;
+
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		/* use fsl_indirect_read_config for PCIe */
+		hose->ops = &fsl_indirect_pcie_ops;
+		/* For PCIE read HEADER_TYPE to identify controller mode */
+		early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type);
+		if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+			goto no_bridge;
+
+	} else {
+		/* For PCI read PROG to identify controller mode */
+		early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif);
+		if ((progif & 1) &&
+		    !of_property_read_bool(dev, "fsl,pci-agent-force-enum"))
+			goto no_bridge;
+	}
+
+	setup_pci_cmd(hose);
+
+	/* check PCI express link status */
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		hose->indirect_type |= PPC_INDIRECT_TYPE_EXT_REG |
+			PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS;
+		if (fsl_pcie_check_link(hose))
+			hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+		/* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */
+		if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) {
+			early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code);
+			class_code &= 0xff;
+			class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+			early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code);
+		}
+	} else {
+		/*
+		 * Set PBFR(PCI Bus Function Register)[10] = 1 to
+		 * disable the combining of crossing cacheline
+		 * boundary requests into one burst transaction.
+		 * PCI-X operation is not affected.
+		 * Fix erratum PCI 5 on MPC8548
+		 */
+#define PCI_BUS_FUNCTION 0x44
+#define PCI_BUS_FUNCTION_MDS 0x400	/* Master disable streaming */
+		if (((SVR_SOC_VER(svr) == SVR_8543) ||
+		     (SVR_SOC_VER(svr) == SVR_8545) ||
+		     (SVR_SOC_VER(svr) == SVR_8547) ||
+		     (SVR_SOC_VER(svr) == SVR_8548)) &&
+		    !early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX)) {
+			early_read_config_word(hose, 0, 0,
+					PCI_BUS_FUNCTION, &temp);
+			temp |= PCI_BUS_FUNCTION_MDS;
+			early_write_config_word(hose, 0, 0,
+					PCI_BUS_FUNCTION, temp);
+		}
+	}
+
+	printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. "
+		"Firmware bus number: %d->%d\n",
+		(unsigned long long)rsrc.start, hose->first_busno,
+		hose->last_busno);
+
+	pr_debug(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+		hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, is_primary);
+
+	/* Setup PEX window registers */
+	setup_pci_atmu(hose);
+
+	/* Set up controller operations */
+	setup_swiotlb_ops(hose);
+
+	return 0;
+
+no_bridge:
+	iounmap(hose->private_data);
+	/* unmap cfg_data & cfg_addr separately if not on same page */
+	if (((unsigned long)hose->cfg_data & PAGE_MASK) !=
+	    ((unsigned long)hose->cfg_addr & PAGE_MASK))
+		iounmap(hose->cfg_data);
+	iounmap(hose->cfg_addr);
+	pcibios_free_controller(hose);
+	return -ENODEV;
+}
+#endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID,
+			quirk_fsl_pcie_early);
+
+#if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
+struct mpc83xx_pcie_priv {
+	void __iomem *cfg_type0;
+	void __iomem *cfg_type1;
+	u32 dev_base;
+};
+
+struct pex_inbound_window {
+	u32 ar;
+	u32 tar;
+	u32 barl;
+	u32 barh;
+};
+
+/*
+ * With the convention of u-boot, the PCIE outbound window 0 serves
+ * as configuration transactions outbound.
+ */
+#define PEX_OUTWIN0_BAR		0xCA4
+#define PEX_OUTWIN0_TAL		0xCA8
+#define PEX_OUTWIN0_TAH		0xCAC
+#define PEX_RC_INWIN_BASE	0xE60
+#define PEX_RCIWARn_EN		0x1
+
+static int mpc83xx_pcie_exclude_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Workaround for the HW bug: for Type 0 configure transactions the
+	 * PCI-E controller does not check the device number bits and just
+	 * assumes that the device number bits are 0.
+	 */
+	if (bus->number == hose->first_busno ||
+			bus->primary == hose->first_busno) {
+		if (devfn & 0xf8)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device) {
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static void __iomem *mpc83xx_pcie_remap_cfg(struct pci_bus *bus,
+					    unsigned int devfn, int offset)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct mpc83xx_pcie_priv *pcie = hose->dn->data;
+	u32 dev_base = bus->number << 24 | devfn << 16;
+	int ret;
+
+	ret = mpc83xx_pcie_exclude_device(bus, devfn);
+	if (ret)
+		return NULL;
+
+	offset &= 0xfff;
+
+	/* Type 0 */
+	if (bus->number == hose->first_busno)
+		return pcie->cfg_type0 + offset;
+
+	if (pcie->dev_base == dev_base)
+		goto mapped;
+
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAL, dev_base);
+
+	pcie->dev_base = dev_base;
+mapped:
+	return pcie->cfg_type1 + offset;
+}
+
+static int mpc83xx_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	/* PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS */
+	if (offset == PCI_PRIMARY_BUS && bus->number == hose->first_busno)
+		val &= 0xffffff00;
+
+	return pci_generic_config_write(bus, devfn, offset, len, val);
+}
+
+static struct pci_ops mpc83xx_pcie_ops = {
+	.map_bus = mpc83xx_pcie_remap_cfg,
+	.read = pci_generic_config_read,
+	.write = mpc83xx_pcie_write_config,
+};
+
+static int __init mpc83xx_pcie_setup(struct pci_controller *hose,
+				     struct resource *reg)
+{
+	struct mpc83xx_pcie_priv *pcie;
+	u32 cfg_bar;
+	int ret = -ENOMEM;
+
+	pcie = kzalloc(sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return ret;
+
+	pcie->cfg_type0 = ioremap(reg->start, resource_size(reg));
+	if (!pcie->cfg_type0)
+		goto err0;
+
+	cfg_bar = in_le32(pcie->cfg_type0 + PEX_OUTWIN0_BAR);
+	if (!cfg_bar) {
+		/* PCI-E isn't configured. */
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	pcie->cfg_type1 = ioremap(cfg_bar, 0x1000);
+	if (!pcie->cfg_type1)
+		goto err1;
+
+	WARN_ON(hose->dn->data);
+	hose->dn->data = pcie;
+	hose->ops = &mpc83xx_pcie_ops;
+	hose->indirect_type |= PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK;
+
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAH, 0);
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAL, 0);
+
+	if (fsl_pcie_check_link(hose))
+		hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+
+	return 0;
+err1:
+	iounmap(pcie->cfg_type0);
+err0:
+	kfree(pcie);
+	return ret;
+
+}
+
+int __init mpc83xx_add_bridge(struct device_node *dev)
+{
+	int ret;
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc_reg;
+	struct resource rsrc_cfg;
+	const int *bus_range;
+	int primary;
+
+	is_mpc83xx_pci = 1;
+
+	if (!of_device_is_available(dev)) {
+		pr_warn("%pOF: disabled by the firmware.\n",
+			dev);
+		return -ENODEV;
+	}
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+	/* Fetch host bridge registers address */
+	if (of_address_to_resource(dev, 0, &rsrc_reg)) {
+		printk(KERN_WARNING "Can't get pci register base!\n");
+		return -ENOMEM;
+	}
+
+	memset(&rsrc_cfg, 0, sizeof(rsrc_cfg));
+
+	if (of_address_to_resource(dev, 1, &rsrc_cfg)) {
+		printk(KERN_WARNING
+			"No pci config register base in dev tree, "
+			"using default\n");
+		/*
+		 * MPC83xx supports up to two host controllers
+		 * 	one at 0x8500 has config space registers at 0x8300
+		 * 	one at 0x8600 has config space registers at 0x8380
+		 */
+		if ((rsrc_reg.start & 0xfffff) == 0x8500)
+			rsrc_cfg.start = (rsrc_reg.start & 0xfff00000) + 0x8300;
+		else if ((rsrc_reg.start & 0xfffff) == 0x8600)
+			rsrc_cfg.start = (rsrc_reg.start & 0xfff00000) + 0x8380;
+	}
+	/*
+	 * Controller at offset 0x8500 is primary
+	 */
+	if ((rsrc_reg.start & 0xfffff) == 0x8500)
+		primary = 1;
+	else
+		primary = 0;
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+	}
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	if (of_device_is_compatible(dev, "fsl,mpc8314-pcie")) {
+		ret = mpc83xx_pcie_setup(hose, &rsrc_reg);
+		if (ret)
+			goto err0;
+	} else {
+		setup_indirect_pci(hose, rsrc_cfg.start,
+				   rsrc_cfg.start + 4, 0);
+	}
+
+	printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. "
+	       "Firmware bus number: %d->%d\n",
+	       (unsigned long long)rsrc_reg.start, hose->first_busno,
+	       hose->last_busno);
+
+	pr_debug(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+	    hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	return 0;
+err0:
+	pcibios_free_controller(hose);
+	return ret;
+}
+#endif /* CONFIG_PPC_83xx */
+
+u64 fsl_pci_immrbar_base(struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC_83xx
+	if (is_mpc83xx_pci) {
+		struct mpc83xx_pcie_priv *pcie = hose->dn->data;
+		struct pex_inbound_window *in;
+		int i;
+
+		/* Walk the Root Complex Inbound windows to match IMMR base */
+		in = pcie->cfg_type0 + PEX_RC_INWIN_BASE;
+		for (i = 0; i < 4; i++) {
+			/* not enabled, skip */
+			if (!(in_le32(&in[i].ar) & PEX_RCIWARn_EN))
+				continue;
+
+			if (get_immrbase() == in_le32(&in[i].tar))
+				return (u64)in_le32(&in[i].barh) << 32 |
+					    in_le32(&in[i].barl);
+		}
+
+		printk(KERN_WARNING "could not find PCI BAR matching IMMR\n");
+	}
+#endif
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+	if (!is_mpc83xx_pci) {
+		u32 base;
+
+		pci_bus_read_config_dword(hose->bus,
+			PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base);
+
+		/*
+		 * For PEXCSRBAR, bit 3-0 indicate prefetchable and
+		 * address type. So when getting base address, these
+		 * bits should be masked
+		 */
+		base &= PCI_BASE_ADDRESS_MEM_MASK;
+
+		return base;
+	}
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC_E500
+static int mcheck_handle_load(struct pt_regs *regs, u32 inst)
+{
+	unsigned int rd, ra, rb, d;
+
+	rd = get_rt(inst);
+	ra = get_ra(inst);
+	rb = get_rb(inst);
+	d = get_d(inst);
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_LWZX:
+		case OP_31_XOP_LWBRX:
+			regs->gpr[rd] = 0xffffffff;
+			break;
+
+		case OP_31_XOP_LWZUX:
+			regs->gpr[rd] = 0xffffffff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LBZX:
+			regs->gpr[rd] = 0xff;
+			break;
+
+		case OP_31_XOP_LBZUX:
+			regs->gpr[rd] = 0xff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LHZX:
+		case OP_31_XOP_LHBRX:
+			regs->gpr[rd] = 0xffff;
+			break;
+
+		case OP_31_XOP_LHZUX:
+			regs->gpr[rd] = 0xffff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LHAX:
+			regs->gpr[rd] = ~0UL;
+			break;
+
+		case OP_31_XOP_LHAUX:
+			regs->gpr[rd] = ~0UL;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		default:
+			return 0;
+		}
+		break;
+
+	case OP_LWZ:
+		regs->gpr[rd] = 0xffffffff;
+		break;
+
+	case OP_LWZU:
+		regs->gpr[rd] = 0xffffffff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LBZ:
+		regs->gpr[rd] = 0xff;
+		break;
+
+	case OP_LBZU:
+		regs->gpr[rd] = 0xff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LHZ:
+		regs->gpr[rd] = 0xffff;
+		break;
+
+	case OP_LHZU:
+		regs->gpr[rd] = 0xffff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LHA:
+		regs->gpr[rd] = ~0UL;
+		break;
+
+	case OP_LHAU:
+		regs->gpr[rd] = ~0UL;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int is_in_pci_mem_space(phys_addr_t addr)
+{
+	struct pci_controller *hose;
+	struct resource *res;
+	int i;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		if (!(hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG))
+			continue;
+
+		for (i = 0; i < 3; i++) {
+			res = &hose->mem_resources[i];
+			if ((res->flags & IORESOURCE_MEM) &&
+				addr >= res->start && addr <= res->end)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+int fsl_pci_mcheck_exception(struct pt_regs *regs)
+{
+	u32 inst;
+	int ret;
+	phys_addr_t addr = 0;
+
+	/* Let KVM/QEMU deal with the exception */
+	if (regs->msr & MSR_GS)
+		return 0;
+
+#ifdef CONFIG_PHYS_64BIT
+	addr = mfspr(SPRN_MCARU);
+	addr <<= 32;
+#endif
+	addr += mfspr(SPRN_MCAR);
+
+	if (is_in_pci_mem_space(addr)) {
+		if (user_mode(regs))
+			ret = copy_from_user_nofault(&inst,
+					(void __user *)regs->nip, sizeof(inst));
+		else
+			ret = get_kernel_nofault(inst, (void *)regs->nip);
+
+		if (!ret && mcheck_handle_load(regs, inst)) {
+			regs_add_return_ip(regs, 4);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+static const struct of_device_id pci_ids[] = {
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{ .compatible = "fsl,qoriq-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.1", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v3.0", },
+
+	/*
+	 * The following entries are for compatibility with older device
+	 * trees.
+	 */
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+
+	{},
+};
+
+struct device_node *fsl_pci_primary;
+
+void __init fsl_pci_assign_primary(void)
+{
+	struct device_node *np;
+
+	/* Callers can specify the primary bus using other means. */
+	if (fsl_pci_primary)
+		return;
+
+	/* If a PCI host bridge contains an ISA node, it's primary. */
+	np = of_find_node_by_type(NULL, "isa");
+	while ((fsl_pci_primary = of_get_parent(np))) {
+		of_node_put(np);
+		np = fsl_pci_primary;
+
+		if (of_match_node(pci_ids, np) && of_device_is_available(np))
+			return;
+	}
+
+	/*
+	 * If there's no PCI host bridge with ISA then check for
+	 * PCI host bridge with alias "pci0" (first PCI host bridge).
+	 */
+	np = of_find_node_by_path("pci0");
+	if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) {
+		fsl_pci_primary = np;
+		of_node_put(np);
+		return;
+	}
+	if (np)
+		of_node_put(np);
+
+	/*
+	 * If there's no PCI host bridge with ISA, arbitrarily
+	 * designate one as primary.  This can go away once
+	 * various bugs with primary-less systems are fixed.
+	 */
+	for_each_matching_node(np, pci_ids) {
+		if (of_device_is_available(np)) {
+			fsl_pci_primary = np;
+			return;
+		}
+	}
+}
+
+#ifdef CONFIG_PM_SLEEP
+static irqreturn_t fsl_pci_pme_handle(int irq, void *dev_id)
+{
+	struct pci_controller *hose = dev_id;
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+
+	dr = in_be32(&pci->pex_pme_mes_dr);
+	if (!dr)
+		return IRQ_NONE;
+
+	out_be32(&pci->pex_pme_mes_dr, dr);
+
+	return IRQ_HANDLED;
+}
+
+static int fsl_pci_pme_probe(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci;
+	struct pci_dev *dev;
+	int pme_irq;
+	int res;
+	u16 pms;
+
+	/* Get hose's pci_dev */
+	dev = list_first_entry(&hose->bus->devices, typeof(*dev), bus_list);
+
+	/* PME Disable */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pms);
+	pms &= ~PCI_PM_CTRL_PME_ENABLE;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pms);
+
+	pme_irq = irq_of_parse_and_map(hose->dn, 0);
+	if (!pme_irq) {
+		dev_err(&dev->dev, "Failed to map PME interrupt.\n");
+
+		return -ENXIO;
+	}
+
+	res = devm_request_irq(hose->parent, pme_irq,
+			fsl_pci_pme_handle,
+			IRQF_SHARED,
+			"[PCI] PME", hose);
+	if (res < 0) {
+		dev_err(&dev->dev, "Unable to request irq %d for PME\n", pme_irq);
+		irq_dispose_mapping(pme_irq);
+
+		return -ENODEV;
+	}
+
+	pci = hose->private_data;
+
+	/* Enable PTOD, ENL23D & EXL23D */
+	clrbits32(&pci->pex_pme_mes_disr,
+		  PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
+
+	out_be32(&pci->pex_pme_mes_ier, 0);
+	setbits32(&pci->pex_pme_mes_ier,
+		  PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
+
+	/* PME Enable */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pms);
+	pms |= PCI_PM_CTRL_PME_ENABLE;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pms);
+
+	return 0;
+}
+
+static void send_pme_turnoff_message(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+	int i;
+
+	/* Send PME_Turn_Off Message Request */
+	setbits32(&pci->pex_pmcr, PEX_PMCR_PTOMR);
+
+	/* Wait trun off done */
+	for (i = 0; i < 150; i++) {
+		dr = in_be32(&pci->pex_pme_mes_dr);
+		if (dr) {
+			out_be32(&pci->pex_pme_mes_dr, dr);
+			break;
+		}
+
+		udelay(1000);
+	}
+}
+
+static void fsl_pci_syscore_do_suspend(struct pci_controller *hose)
+{
+	send_pme_turnoff_message(hose);
+}
+
+static int fsl_pci_syscore_suspend(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		fsl_pci_syscore_do_suspend(hose);
+
+	return 0;
+}
+
+static void fsl_pci_syscore_do_resume(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+	int i;
+
+	/* Send Exit L2 State Message */
+	setbits32(&pci->pex_pmcr, PEX_PMCR_EXL2S);
+
+	/* Wait exit done */
+	for (i = 0; i < 150; i++) {
+		dr = in_be32(&pci->pex_pme_mes_dr);
+		if (dr) {
+			out_be32(&pci->pex_pme_mes_dr, dr);
+			break;
+		}
+
+		udelay(1000);
+	}
+
+	setup_pci_atmu(hose);
+}
+
+static void fsl_pci_syscore_resume(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		fsl_pci_syscore_do_resume(hose);
+}
+
+static struct syscore_ops pci_syscore_pm_ops = {
+	.suspend = fsl_pci_syscore_suspend,
+	.resume = fsl_pci_syscore_resume,
+};
+#endif
+
+void fsl_pcibios_fixup_phb(struct pci_controller *phb)
+{
+#ifdef CONFIG_PM_SLEEP
+	fsl_pci_pme_probe(phb);
+#endif
+}
+
+static int add_err_dev(struct platform_device *pdev)
+{
+	struct platform_device *errdev;
+	struct mpc85xx_edac_pci_plat_data pd = {
+		.of_node = pdev->dev.of_node
+	};
+
+	errdev = platform_device_register_resndata(&pdev->dev,
+						   "mpc85xx-pci-edac",
+						   PLATFORM_DEVID_AUTO,
+						   pdev->resource,
+						   pdev->num_resources,
+						   &pd, sizeof(pd));
+
+	return PTR_ERR_OR_ZERO(errdev);
+}
+
+static int fsl_pci_probe(struct platform_device *pdev)
+{
+	struct device_node *node;
+	int ret;
+
+	node = pdev->dev.of_node;
+	ret = fsl_add_bridge(pdev, fsl_pci_primary == node);
+	if (ret)
+		return ret;
+
+	ret = add_err_dev(pdev);
+	if (ret)
+		dev_err(&pdev->dev, "couldn't register error device: %d\n",
+			ret);
+
+	return 0;
+}
+
+static struct platform_driver fsl_pci_driver = {
+	.driver = {
+		.name = "fsl-pci",
+		.of_match_table = pci_ids,
+	},
+	.probe = fsl_pci_probe,
+	.driver_managed_dma = true,
+};
+
+static int __init fsl_pci_init(void)
+{
+#ifdef CONFIG_PM_SLEEP
+	register_syscore_ops(&pci_syscore_pm_ops);
+#endif
+	return platform_driver_register(&fsl_pci_driver);
+}
+arch_initcall(fsl_pci_init);
+#endif
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
new file mode 100644
index 0000000000..3bc4ab9d83
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * MPC85xx/86xx PCI Express structure define
+ *
+ * Copyright 2007,2011 Freescale Semiconductor, Inc
+ */
+
+#ifdef __KERNEL__
+#ifndef __POWERPC_FSL_PCI_H
+#define __POWERPC_FSL_PCI_H
+
+struct platform_device;
+
+
+/* FSL PCI controller BRR1 register */
+#define PCI_FSL_BRR1      0xbf8
+#define PCI_FSL_BRR1_VER 0xffff
+
+#define PCIE_LTSSM	0x0404		/* PCIE Link Training and Status */
+#define PCIE_LTSSM_L0	0x16		/* L0 state */
+#define PCIE_FSL_CSR_CLASSCODE	0x474	/* FSL GPEX CSR */
+#define PCIE_IP_REV_2_2		0x02080202 /* PCIE IP block version Rev2.2 */
+#define PCIE_IP_REV_3_0		0x02080300 /* PCIE IP block version Rev3.0 */
+#define PIWAR_EN		0x80000000	/* Enable */
+#define PIWAR_PF		0x20000000	/* prefetch */
+#define PIWAR_TGI_LOCAL		0x00f00000	/* target - local memory */
+#define PIWAR_READ_SNOOP	0x00050000
+#define PIWAR_WRITE_SNOOP	0x00005000
+#define PIWAR_SZ_MASK          0x0000003f
+
+#define PEX_PMCR_PTOMR		0x1
+#define PEX_PMCR_EXL2S		0x2
+
+#define PME_DISR_EN_PTOD	0x00008000
+#define PME_DISR_EN_ENL23D	0x00002000
+#define PME_DISR_EN_EXL23D	0x00001000
+
+/* PCI/PCI Express outbound window reg */
+struct pci_outbound_window_regs {
+	__be32	potar;	/* 0x.0 - Outbound translation address register */
+	__be32	potear;	/* 0x.4 - Outbound translation extended address register */
+	__be32	powbar;	/* 0x.8 - Outbound window base address register */
+	u8	res1[4];
+	__be32	powar;	/* 0x.10 - Outbound window attributes register */
+	u8	res2[12];
+};
+
+/* PCI/PCI Express inbound window reg */
+struct pci_inbound_window_regs {
+	__be32	pitar;	/* 0x.0 - Inbound translation address register */
+	u8	res1[4];
+	__be32	piwbar;	/* 0x.8 - Inbound window base address register */
+	__be32	piwbear;	/* 0x.c - Inbound window base extended address register */
+	__be32	piwar;	/* 0x.10 - Inbound window attributes register */
+	u8	res2[12];
+};
+
+/* PCI/PCI Express IO block registers for 85xx/86xx */
+struct ccsr_pci {
+	__be32	config_addr;		/* 0x.000 - PCI/PCIE Configuration Address Register */
+	__be32	config_data;		/* 0x.004 - PCI/PCIE Configuration Data Register */
+	__be32	int_ack;		/* 0x.008 - PCI Interrupt Acknowledge Register */
+	__be32	pex_otb_cpl_tor;	/* 0x.00c - PCIE Outbound completion timeout register */
+	__be32	pex_conf_tor;		/* 0x.010 - PCIE configuration timeout register */
+	__be32	pex_config;		/* 0x.014 - PCIE CONFIG Register */
+	__be32	pex_int_status;		/* 0x.018 - PCIE interrupt status */
+	u8	res2[4];
+	__be32	pex_pme_mes_dr;		/* 0x.020 - PCIE PME and message detect register */
+	__be32	pex_pme_mes_disr;	/* 0x.024 - PCIE PME and message disable register */
+	__be32	pex_pme_mes_ier;	/* 0x.028 - PCIE PME and message interrupt enable register */
+	__be32	pex_pmcr;		/* 0x.02c - PCIE power management command register */
+	u8	res3[3016];
+	__be32	block_rev1;	/* 0x.bf8 - PCIE Block Revision register 1 */
+	__be32	block_rev2;	/* 0x.bfc - PCIE Block Revision register 2 */
+
+/* PCI/PCI Express outbound window 0-4
+ * Window 0 is the default window and is the only window enabled upon reset.
+ * The default outbound register set is used when a transaction misses
+ * in all of the other outbound windows.
+ */
+	struct pci_outbound_window_regs pow[5];
+	u8	res14[96];
+	struct pci_inbound_window_regs	pmit;	/* 0xd00 - 0xd9c Inbound MSI */
+	u8	res6[96];
+/* PCI/PCI Express inbound window 3-0
+ * inbound window 1 supports only a 32-bit base address and does not
+ * define an inbound window base extended address register.
+ */
+	struct pci_inbound_window_regs piw[4];
+
+	__be32	pex_err_dr;		/* 0x.e00 - PCI/PCIE error detect register */
+	u8	res21[4];
+	__be32	pex_err_en;		/* 0x.e08 - PCI/PCIE error interrupt enable register */
+	u8	res22[4];
+	__be32	pex_err_disr;		/* 0x.e10 - PCI/PCIE error disable register */
+	u8	res23[12];
+	__be32	pex_err_cap_stat;	/* 0x.e20 - PCI/PCIE error capture status register */
+	u8	res24[4];
+	__be32	pex_err_cap_r0;		/* 0x.e28 - PCIE error capture register 0 */
+	__be32	pex_err_cap_r1;		/* 0x.e2c - PCIE error capture register 0 */
+	__be32	pex_err_cap_r2;		/* 0x.e30 - PCIE error capture register 0 */
+	__be32	pex_err_cap_r3;		/* 0x.e34 - PCIE error capture register 0 */
+	u8	res_e38[200];
+	__be32	pdb_stat;		/* 0x.f00 - PCIE Debug Status */
+	u8	res_f04[16];
+	__be32	pex_csr0;		/* 0x.f14 - PEX Control/Status register 0*/
+#define PEX_CSR0_LTSSM_MASK	0xFC
+#define PEX_CSR0_LTSSM_SHIFT	2
+#define PEX_CSR0_LTSSM_L0	0x11
+	__be32	pex_csr1;		/* 0x.f18 - PEX Control/Status register 1*/
+	u8	res_f1c[228];
+
+};
+
+extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
+extern void fsl_pcibios_fixup_phb(struct pci_controller *phb);
+extern int mpc83xx_add_bridge(struct device_node *dev);
+u64 fsl_pci_immrbar_base(struct pci_controller *hose);
+
+extern struct device_node *fsl_pci_primary;
+
+#ifdef CONFIG_PCI
+void __init fsl_pci_assign_primary(void);
+#else
+static inline void fsl_pci_assign_primary(void) {}
+#endif
+
+#ifdef CONFIG_FSL_PCI
+extern int fsl_pci_mcheck_exception(struct pt_regs *);
+#else
+static inline int fsl_pci_mcheck_exception(struct pt_regs *regs) {return 0; }
+#endif
+
+#endif /* __POWERPC_FSL_PCI_H */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
new file mode 100644
index 0000000000..9f6dd11c13
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pmc.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Suspend/resume support
+ *
+ * Copyright 2009  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+struct pmc_regs {
+	__be32 devdisr;
+	__be32 devdisr2;
+	__be32 :32;
+	__be32 :32;
+	__be32 pmcsr;
+#define PMCSR_SLP	(1 << 17)
+};
+
+static struct device *pmc_dev;
+static struct pmc_regs __iomem *pmc_regs;
+
+static int pmc_suspend_enter(suspend_state_t state)
+{
+	int ret;
+
+	setbits32(&pmc_regs->pmcsr, PMCSR_SLP);
+	/* At this point, the CPU is asleep. */
+
+	/* Upon resume, wait for SLP bit to be clear. */
+	ret = spin_event_timeout((in_be32(&pmc_regs->pmcsr) & PMCSR_SLP) == 0,
+				 10000, 10) ? 0 : -ETIMEDOUT;
+	if (ret)
+		dev_err(pmc_dev, "tired waiting for SLP bit to clear\n");
+	return ret;
+}
+
+static int pmc_suspend_valid(suspend_state_t state)
+{
+	if (state != PM_SUSPEND_STANDBY)
+		return 0;
+	return 1;
+}
+
+static const struct platform_suspend_ops pmc_suspend_ops = {
+	.valid = pmc_suspend_valid,
+	.enter = pmc_suspend_enter,
+};
+
+static int pmc_probe(struct platform_device *ofdev)
+{
+	pmc_regs = of_iomap(ofdev->dev.of_node, 0);
+	if (!pmc_regs)
+		return -ENOMEM;
+
+	pmc_dev = &ofdev->dev;
+	suspend_set_ops(&pmc_suspend_ops);
+	return 0;
+}
+
+static const struct of_device_id pmc_ids[] = {
+	{ .compatible = "fsl,mpc8548-pmc", },
+	{ .compatible = "fsl,mpc8641d-pmc", },
+	{ },
+};
+
+static struct platform_driver pmc_driver = {
+	.driver = {
+		.name = "fsl-pmc",
+		.of_match_table = pmc_ids,
+	},
+	.probe = pmc_probe,
+};
+
+builtin_platform_driver(pmc_driver);
diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c
new file mode 100644
index 0000000000..aacd0be613
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rcpm.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RCPM(Run Control/Power Management) support
+ *
+ * Copyright 2012-2015 Freescale Semiconductor Inc.
+ *
+ * Author: Chenhui Zhao <chenhui.zhao@freescale.com>
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/export.h>
+
+#include <asm/io.h>
+#include <linux/fsl/guts.h>
+#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
+#include <asm/smp.h>
+
+static struct ccsr_rcpm_v1 __iomem *rcpm_v1_regs;
+static struct ccsr_rcpm_v2 __iomem *rcpm_v2_regs;
+static unsigned int fsl_supported_pm_modes;
+
+static void rcpm_v1_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v1_regs->cpmimr, mask);
+	setbits32(&rcpm_v1_regs->cpmcimr, mask);
+	setbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	setbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v2_regs->tpmimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	setbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v1_regs->cpmimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmcimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	clrbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v2_regs->tpmimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v1_regs->ippdexpcr, mask);
+	else
+		clrbits32(&rcpm_v1_regs->ippdexpcr, mask);
+}
+
+static void rcpm_v2_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+	else
+		clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+}
+
+static void rcpm_v1_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v2_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		/* one bit corresponds to one thread for PH10 of 6500 */
+		setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15setr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20setr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30setr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v1_cpu_die(int cpu)
+{
+	rcpm_v1_cpu_enter_state(cpu, E500_PM_PH15);
+}
+
+#ifdef CONFIG_PPC64
+static void qoriq_disable_thread(int cpu)
+{
+	int thread = cpu_thread_in_core(cpu);
+
+	book3e_stop_thread(thread);
+}
+#endif
+
+static void rcpm_v2_cpu_die(int cpu)
+{
+#ifdef CONFIG_PPC64
+	int primary;
+
+	if (threads_per_core == 2) {
+		primary = cpu_first_thread_sibling(cpu);
+		if (cpu_is_offline(primary) && cpu_is_offline(primary + 1)) {
+			/* if both threads are offline, put the cpu in PH20 */
+			rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+		} else {
+			/* if only one thread is offline, disable the thread */
+			qoriq_disable_thread(cpu);
+		}
+	}
+#endif
+
+	if (threads_per_core == 1)
+		rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+}
+
+static void rcpm_v1_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		clrbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		clrbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v1_cpu_up_prepare(int cpu)
+{
+	rcpm_v1_cpu_exit_state(cpu, E500_PM_PH15);
+	rcpm_v1_irq_unmask(cpu);
+}
+
+static void rcpm_v2_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15clrr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20clrr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30clrr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v2_cpu_up_prepare(int cpu)
+{
+	rcpm_v2_cpu_exit_state(cpu, E500_PM_PH20);
+	rcpm_v2_irq_unmask(cpu);
+}
+
+static int rcpm_v1_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v1_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_SLEEP:
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP);
+
+		/* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_SLP), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for SLP bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v2_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v2_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_LPM20:
+		/* clear previous LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST);
+		/* enter LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ);
+
+		/* At this point, the device is in LPM20 status. */
+
+		/* resume ... */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_LPM20_ST), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for LPM20 bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)\n", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v1_plat_enter_sleep(void)
+{
+	return rcpm_v1_plat_enter_state(PLAT_PM_SLEEP);
+}
+
+static int rcpm_v2_plat_enter_sleep(void)
+{
+	return rcpm_v2_plat_enter_state(PLAT_PM_LPM20);
+}
+
+static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze)
+{
+	static u32 mask;
+
+	if (freeze) {
+		mask = in_be32(tben_reg);
+		clrbits32(tben_reg, mask);
+	} else {
+		setbits32(tben_reg, mask);
+	}
+
+	/* read back to push the previous write */
+	in_be32(tben_reg);
+}
+
+static void rcpm_v1_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v1_regs->ctbenr, freeze);
+}
+
+static void rcpm_v2_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v2_regs->pctbenr, freeze);
+}
+
+static unsigned int rcpm_get_pm_modes(void)
+{
+	return fsl_supported_pm_modes;
+}
+
+static const struct fsl_pm_ops qoriq_rcpm_v1_ops = {
+	.irq_mask = rcpm_v1_irq_mask,
+	.irq_unmask = rcpm_v1_irq_unmask,
+	.cpu_enter_state = rcpm_v1_cpu_enter_state,
+	.cpu_exit_state = rcpm_v1_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v1_cpu_up_prepare,
+	.cpu_die = rcpm_v1_cpu_die,
+	.plat_enter_sleep = rcpm_v1_plat_enter_sleep,
+	.set_ip_power = rcpm_v1_set_ip_power,
+	.freeze_time_base = rcpm_v1_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct fsl_pm_ops qoriq_rcpm_v2_ops = {
+	.irq_mask = rcpm_v2_irq_mask,
+	.irq_unmask = rcpm_v2_irq_unmask,
+	.cpu_enter_state = rcpm_v2_cpu_enter_state,
+	.cpu_exit_state = rcpm_v2_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v2_cpu_up_prepare,
+	.cpu_die = rcpm_v2_cpu_die,
+	.plat_enter_sleep = rcpm_v2_plat_enter_sleep,
+	.set_ip_power = rcpm_v2_set_ip_power,
+	.freeze_time_base = rcpm_v2_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct of_device_id rcpm_matches[] = {
+	{
+		.compatible = "fsl,qoriq-rcpm-1.0",
+		.data = &qoriq_rcpm_v1_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.0",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.1",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{},
+};
+
+int __init fsl_rcpm_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	void __iomem *base;
+
+	np = of_find_matching_node_and_match(NULL, rcpm_matches, &match);
+	if (!np)
+		return 0;
+
+	base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!base) {
+		pr_err("of_iomap() error.\n");
+		return -ENOMEM;
+	}
+
+	rcpm_v1_regs = base;
+	rcpm_v2_regs = base;
+
+	/* support sleep by default */
+	fsl_supported_pm_modes = FSL_PM_SLEEP;
+
+	qoriq_pm_ops = match->data;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
new file mode 100644
index 0000000000..f9b214b299
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#include <linux/init.h>
+#include <linux/extable.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rio.h>
+
+#include "fsl_rio.h"
+
+#undef DEBUG_PW	/* Port-Write debugging */
+
+#define RIO_PORT1_EDCSR		0x0640
+#define RIO_PORT2_EDCSR		0x0680
+#define RIO_PORT1_IECSR		0x10130
+#define RIO_PORT2_IECSR		0x101B0
+
+#define RIO_GCCSR		0x13c
+#define RIO_ESCSR		0x158
+#define ESCSR_CLEAR		0x07120204
+#define RIO_PORT2_ESCSR		0x178
+#define RIO_CCSR		0x15c
+#define RIO_LTLEDCSR_IER	0x80000000
+#define RIO_LTLEDCSR_PRT	0x01000000
+#define IECSR_CLEAR		0x80000000
+#define RIO_ISR_AACR		0x10120
+#define RIO_ISR_AACR_AA		0x1	/* Accept All ID */
+
+#define RIWTAR_TRAD_VAL_SHIFT	12
+#define RIWTAR_TRAD_MASK	0x00FFFFFF
+#define RIWBAR_BADD_VAL_SHIFT	12
+#define RIWBAR_BADD_MASK	0x003FFFFF
+#define RIWAR_ENABLE		0x80000000
+#define RIWAR_TGINT_LOCAL	0x00F00000
+#define RIWAR_RDTYP_NO_SNOOP	0x00040000
+#define RIWAR_RDTYP_SNOOP	0x00050000
+#define RIWAR_WRTYP_NO_SNOOP	0x00004000
+#define RIWAR_WRTYP_SNOOP	0x00005000
+#define RIWAR_WRTYP_ALLOC	0x00006000
+#define RIWAR_SIZE_MASK		0x0000003F
+
+static DEFINE_SPINLOCK(fsl_rio_config_lock);
+
+#define ___fsl_read_rio_config(x, addr, err, op, barrier)	\
+	__asm__ __volatile__(				\
+		"1:	"op" %1,0(%2)\n"		\
+		"	"barrier"\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %1,-1\n"			\
+		"	li %0,%3\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err), "=r" (x)			\
+		: "b" (addr), "i" (-EFAULT), "0" (err))
+
+#ifdef CONFIG_BOOKE
+#define __fsl_read_rio_config(x, addr, err, op)	\
+	___fsl_read_rio_config(x, addr, err, op, "mbar")
+#else
+#define __fsl_read_rio_config(x, addr, err, op)	\
+	___fsl_read_rio_config(x, addr, err, op, "eieio")
+#endif
+
+void __iomem *rio_regs_win;
+void __iomem *rmu_regs_win;
+resource_size_t rio_law_start;
+
+struct fsl_rio_dbell *dbell;
+struct fsl_rio_pw *pw;
+
+#ifdef CONFIG_PPC_E500
+int fsl_rio_mcheck_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *entry;
+	unsigned long reason;
+
+	if (!rio_regs_win)
+		return 0;
+
+	reason = in_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR));
+	if (reason & (RIO_LTLEDCSR_IER | RIO_LTLEDCSR_PRT)) {
+		/* Check if we are prepared to handle this fault */
+		entry = search_exception_tables(regs->nip);
+		if (entry) {
+			pr_debug("RIO: %s - MC Exception handled\n",
+				 __func__);
+			out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
+				 0);
+			regs_set_recoverable(regs);
+			regs_set_return_ip(regs, extable_fixup(entry));
+			return 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_rio_mcheck_exception);
+#endif
+
+/**
+ * fsl_local_config_read - Generate a MPC85xx local config space read
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be read into
+ *
+ * Generates a MPC85xx local configuration space read. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int fsl_local_config_read(struct rio_mport *mport,
+				int index, u32 offset, int len, u32 *data)
+{
+	struct rio_priv *priv = mport->priv;
+	pr_debug("fsl_local_config_read: index %d offset %8.8x\n", index,
+		 offset);
+	*data = in_be32(priv->regs_win + offset);
+
+	return 0;
+}
+
+/**
+ * fsl_local_config_write - Generate a MPC85xx local config space write
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be written
+ *
+ * Generates a MPC85xx local configuration space write. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int fsl_local_config_write(struct rio_mport *mport,
+				int index, u32 offset, int len, u32 data)
+{
+	struct rio_priv *priv = mport->priv;
+	pr_debug
+		("fsl_local_config_write: index %d offset %8.8x data %8.8x\n",
+		index, offset, data);
+	out_be32(priv->regs_win + offset, data);
+
+	return 0;
+}
+
+/**
+ * fsl_rio_config_read - Generate a MPC85xx read maintenance transaction
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Location to be read into
+ *
+ * Generates a MPC85xx read maintenance transaction. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int
+fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid,
+			u8 hopcount, u32 offset, int len, u32 *val)
+{
+	struct rio_priv *priv = mport->priv;
+	unsigned long flags;
+	u8 *data;
+	u32 rval, err = 0;
+
+	pr_debug
+		("fsl_rio_config_read:"
+		" index %d destid %d hopcount %d offset %8.8x len %d\n",
+		index, destid, hopcount, offset, len);
+
+	/* 16MB maintenance window possible */
+	/* allow only aligned access to maintenance registers */
+	if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+		return -EINVAL;
+
+	spin_lock_irqsave(&fsl_rio_config_lock, flags);
+
+	out_be32(&priv->maint_atmu_regs->rowtar,
+		 (destid << 22) | (hopcount << 12) | (offset >> 12));
+	out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10));
+
+	data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
+	switch (len) {
+	case 1:
+		__fsl_read_rio_config(rval, data, err, "lbz");
+		break;
+	case 2:
+		__fsl_read_rio_config(rval, data, err, "lhz");
+		break;
+	case 4:
+		__fsl_read_rio_config(rval, data, err, "lwz");
+		break;
+	default:
+		spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+		return -EINVAL;
+	}
+
+	if (err) {
+		pr_debug("RIO: cfg_read error %d for %x:%x:%x\n",
+			 err, destid, hopcount, offset);
+	}
+
+	spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+	*val = rval;
+
+	return err;
+}
+
+/**
+ * fsl_rio_config_write - Generate a MPC85xx write maintenance transaction
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Value to be written
+ *
+ * Generates an MPC85xx write maintenance transaction. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int
+fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid,
+			u8 hopcount, u32 offset, int len, u32 val)
+{
+	struct rio_priv *priv = mport->priv;
+	unsigned long flags;
+	u8 *data;
+	int ret = 0;
+
+	pr_debug
+		("fsl_rio_config_write:"
+		" index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n",
+		index, destid, hopcount, offset, len, val);
+
+	/* 16MB maintenance windows possible */
+	/* allow only aligned access to maintenance registers */
+	if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+		return -EINVAL;
+
+	spin_lock_irqsave(&fsl_rio_config_lock, flags);
+
+	out_be32(&priv->maint_atmu_regs->rowtar,
+		 (destid << 22) | (hopcount << 12) | (offset >> 12));
+	out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10));
+
+	data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
+	switch (len) {
+	case 1:
+		out_8((u8 *) data, val);
+		break;
+	case 2:
+		out_be16((u16 *) data, val);
+		break;
+	case 4:
+		out_be32((u32 *) data, val);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+
+	return ret;
+}
+
+static void fsl_rio_inbound_mem_init(struct rio_priv *priv)
+{
+	int i;
+
+	/* close inbound windows */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++)
+		out_be32(&priv->inb_atmu_regs[i].riwar, 0);
+}
+
+static int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
+			   u64 rstart, u64 size, u32 flags)
+{
+	struct rio_priv *priv = mport->priv;
+	u32 base_size;
+	unsigned int base_size_log;
+	u64 win_start, win_end;
+	u32 riwar;
+	int i;
+
+	if ((size & (size - 1)) != 0 || size > 0x400000000ULL)
+		return -EINVAL;
+
+	base_size_log = ilog2(size);
+	base_size = 1 << base_size_log;
+
+	/* check if addresses are aligned with the window size */
+	if (lstart & (base_size - 1))
+		return -EINVAL;
+	if (rstart & (base_size - 1))
+		return -EINVAL;
+
+	/* check for conflicting ranges */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			continue;
+		win_start = ((u64)(in_be32(&priv->inb_atmu_regs[i].riwbar) & RIWBAR_BADD_MASK))
+			<< RIWBAR_BADD_VAL_SHIFT;
+		win_end = win_start + ((1 << ((riwar & RIWAR_SIZE_MASK) + 1)) - 1);
+		if (rstart < win_end && (rstart + size) > win_start)
+			return -EINVAL;
+	}
+
+	/* find unused atmu */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			break;
+	}
+	if (i >= RIO_INB_ATMU_COUNT)
+		return -ENOMEM;
+
+	out_be32(&priv->inb_atmu_regs[i].riwtar, lstart >> RIWTAR_TRAD_VAL_SHIFT);
+	out_be32(&priv->inb_atmu_regs[i].riwbar, rstart >> RIWBAR_BADD_VAL_SHIFT);
+	out_be32(&priv->inb_atmu_regs[i].riwar, RIWAR_ENABLE | RIWAR_TGINT_LOCAL |
+		RIWAR_RDTYP_SNOOP | RIWAR_WRTYP_SNOOP | (base_size_log - 1));
+
+	return 0;
+}
+
+static void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
+{
+	u32 win_start_shift, base_start_shift;
+	struct rio_priv *priv = mport->priv;
+	u32 riwar, riwtar;
+	int i;
+
+	/* skip default window */
+	base_start_shift = lstart >> RIWTAR_TRAD_VAL_SHIFT;
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			continue;
+
+		riwtar = in_be32(&priv->inb_atmu_regs[i].riwtar);
+		win_start_shift = riwtar & RIWTAR_TRAD_MASK;
+		if (win_start_shift == base_start_shift) {
+			out_be32(&priv->inb_atmu_regs[i].riwar, riwar & ~RIWAR_ENABLE);
+			return;
+		}
+	}
+}
+
+void fsl_rio_port_error_handler(int offset)
+{
+	/*XXX: Error recovery is not implemented, we just clear errors */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0);
+
+	if (offset == 0) {
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
+		out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
+	} else {
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
+	}
+}
+static inline void fsl_rio_info(struct device *dev, u32 ccsr)
+{
+	const char *str;
+	if (ccsr & 1) {
+		/* Serial phy */
+		switch (ccsr >> 30) {
+		case 0:
+			str = "1";
+			break;
+		case 1:
+			str = "4";
+			break;
+		default:
+			str = "Unknown";
+			break;
+		}
+		dev_info(dev, "Hardware port width: %s\n", str);
+
+		switch ((ccsr >> 27) & 7) {
+		case 0:
+			str = "Single-lane 0";
+			break;
+		case 1:
+			str = "Single-lane 2";
+			break;
+		case 2:
+			str = "Four-lane";
+			break;
+		default:
+			str = "Unknown";
+			break;
+		}
+		dev_info(dev, "Training connection status: %s\n", str);
+	} else {
+		/* Parallel phy */
+		if (!(ccsr & 0x80000000))
+			dev_info(dev, "Output port operating in 8-bit mode\n");
+		if (!(ccsr & 0x08000000))
+			dev_info(dev, "Input port operating in 8-bit mode\n");
+	}
+}
+
+/**
+ * fsl_rio_setup - Setup Freescale PowerPC RapidIO interface
+ * @dev: platform_device pointer
+ *
+ * Initializes MPC85xx RapidIO hardware interface, configures
+ * master port with system-specific info, and registers the
+ * master port with the RapidIO subsystem.
+ */
+static int fsl_rio_setup(struct platform_device *dev)
+{
+	struct rio_ops *ops;
+	struct rio_mport *port;
+	struct rio_priv *priv;
+	int rc = 0;
+	const u32 *port_index;
+	u32 active_ports = 0;
+	struct device_node *np, *rmu_node;
+	u32 ccsr;
+	u64 range_start;
+	u32 i;
+	static int tmp;
+	struct device_node *rmu_np[MAX_MSG_UNIT_NUM] = {NULL};
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -ENODEV;
+	}
+
+	rio_regs_win = of_iomap(dev->dev.of_node, 0);
+	if (!rio_regs_win) {
+		dev_err(&dev->dev, "Unable to map rio register window\n");
+		rc = -ENOMEM;
+		goto err_rio_regs;
+	}
+
+	ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
+	if (!ops) {
+		rc = -ENOMEM;
+		goto err_ops;
+	}
+	ops->lcread = fsl_local_config_read;
+	ops->lcwrite = fsl_local_config_write;
+	ops->cread = fsl_rio_config_read;
+	ops->cwrite = fsl_rio_config_write;
+	ops->dsend = fsl_rio_doorbell_send;
+	ops->pwenable = fsl_rio_pw_enable;
+	ops->open_outb_mbox = fsl_open_outb_mbox;
+	ops->open_inb_mbox = fsl_open_inb_mbox;
+	ops->close_outb_mbox = fsl_close_outb_mbox;
+	ops->close_inb_mbox = fsl_close_inb_mbox;
+	ops->add_outb_message = fsl_add_outb_message;
+	ops->add_inb_buffer = fsl_add_inb_buffer;
+	ops->get_inb_message = fsl_get_inb_message;
+	ops->map_inb = fsl_map_inb_mem;
+	ops->unmap_inb = fsl_unmap_inb_mem;
+
+	rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0);
+	if (!rmu_node) {
+		dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n");
+		rc = -ENOENT;
+		goto err_rmu;
+	}
+	rmu_regs_win = of_iomap(rmu_node, 0);
+
+	of_node_put(rmu_node);
+	if (!rmu_regs_win) {
+		dev_err(&dev->dev, "Unable to map rmu register window\n");
+		rc = -ENOMEM;
+		goto err_rmu;
+	}
+	for_each_compatible_node(np, NULL, "fsl,srio-msg-unit") {
+		rmu_np[tmp] = np;
+		tmp++;
+	}
+
+	/*set up doobell node*/
+	np = of_find_compatible_node(NULL, NULL, "fsl,srio-dbell-unit");
+	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-dbell-unit node\n");
+		rc = -ENODEV;
+		goto err_dbell;
+	}
+	dbell = kzalloc(sizeof(struct fsl_rio_dbell), GFP_KERNEL);
+	if (!(dbell)) {
+		dev_err(&dev->dev, "Can't alloc memory for 'fsl_rio_dbell'\n");
+		rc = -ENOMEM;
+		goto err_dbell;
+	}
+	dbell->dev = &dev->dev;
+	dbell->bellirq = irq_of_parse_and_map(np, 1);
+	dev_info(&dev->dev, "bellirq: %d\n", dbell->bellirq);
+
+	if (of_property_read_reg(np, 0, &range_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
+		rc = -ENOMEM;
+		goto err_pw;
+	}
+	dbell->dbell_regs = (struct rio_dbell_regs *)(rmu_regs_win +
+				(u32)range_start);
+
+	/*set up port write node*/
+	np = of_find_compatible_node(NULL, NULL, "fsl,srio-port-write-unit");
+	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-port-write-unit node\n");
+		rc = -ENODEV;
+		goto err_pw;
+	}
+	pw = kzalloc(sizeof(struct fsl_rio_pw), GFP_KERNEL);
+	if (!(pw)) {
+		dev_err(&dev->dev, "Can't alloc memory for 'fsl_rio_pw'\n");
+		rc = -ENOMEM;
+		goto err_pw;
+	}
+	pw->dev = &dev->dev;
+	pw->pwirq = irq_of_parse_and_map(np, 0);
+	dev_info(&dev->dev, "pwirq: %d\n", pw->pwirq);
+	if (of_property_read_reg(np, 0, &range_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
+		rc = -ENOMEM;
+		goto err;
+	}
+	pw->pw_regs = (struct rio_pw_regs *)(rmu_regs_win + (u32)range_start);
+
+	/*set up ports node*/
+	for_each_child_of_node(dev->dev.of_node, np) {
+		struct resource res;
+
+		port_index = of_get_property(np, "cell-index", NULL);
+		if (!port_index) {
+			dev_err(&dev->dev, "Can't get %pOF property 'cell-index'\n",
+					np);
+			continue;
+		}
+
+		if (of_range_to_resource(np, 0, &res)) {
+			dev_err(&dev->dev, "Can't get %pOF property 'ranges'\n",
+					np);
+			continue;
+		}
+
+		dev_info(&dev->dev, "%pOF: LAW %pR\n",
+				np, &res);
+
+		port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
+		if (!port)
+			continue;
+
+		rc = rio_mport_initialize(port);
+		if (rc) {
+			kfree(port);
+			continue;
+		}
+
+		i = *port_index - 1;
+		port->index = (unsigned char)i;
+
+		priv = kzalloc(sizeof(struct rio_priv), GFP_KERNEL);
+		if (!priv) {
+			dev_err(&dev->dev, "Can't alloc memory for 'priv'\n");
+			kfree(port);
+			continue;
+		}
+
+		INIT_LIST_HEAD(&port->dbells);
+		port->iores = res;	/* struct copy */
+		port->iores.name = "rio_io_win";
+
+		if (request_resource(&iomem_resource, &port->iores) < 0) {
+			dev_err(&dev->dev, "RIO: Error requesting master port region"
+				" 0x%016llx-0x%016llx\n",
+				(u64)port->iores.start, (u64)port->iores.end);
+				kfree(priv);
+				kfree(port);
+				continue;
+		}
+		sprintf(port->name, "RIO mport %d", i);
+
+		priv->dev = &dev->dev;
+		port->dev.parent = &dev->dev;
+		port->ops = ops;
+		port->priv = priv;
+		port->phys_efptr = 0x100;
+		port->phys_rmap = 1;
+		priv->regs_win = rio_regs_win;
+
+		ccsr = in_be32(priv->regs_win + RIO_CCSR + i*0x20);
+
+		/* Checking the port training status */
+		if (in_be32((priv->regs_win + RIO_ESCSR + i*0x20)) & 1) {
+			dev_err(&dev->dev, "Port %d is not ready. "
+			"Try to restart connection...\n", i);
+			/* Disable ports */
+			out_be32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0);
+			/* Set 1x lane */
+			setbits32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0x02000000);
+			/* Enable ports */
+			setbits32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0x00600000);
+			msleep(100);
+			if (in_be32((priv->regs_win
+					+ RIO_ESCSR + i*0x20)) & 1) {
+				dev_err(&dev->dev,
+					"Port %d restart failed.\n", i);
+				release_resource(&port->iores);
+				kfree(priv);
+				kfree(port);
+				continue;
+			}
+			dev_info(&dev->dev, "Port %d restart success!\n", i);
+		}
+		fsl_rio_info(&dev->dev, ccsr);
+
+		port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR))
+					& RIO_PEF_CTLS) >> 4;
+		dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
+				port->sys_size ? 65536 : 256);
+
+		if (port->host_deviceid >= 0)
+			out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
+				RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
+		else
+			out_be32(priv->regs_win + RIO_GCCSR,
+				RIO_PORT_GEN_MASTER);
+
+		priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win
+			+ ((i == 0) ? RIO_ATMU_REGS_PORT1_OFFSET :
+			RIO_ATMU_REGS_PORT2_OFFSET));
+
+		priv->maint_atmu_regs = priv->atmu_regs + 1;
+		priv->inb_atmu_regs = (struct rio_inb_atmu_regs __iomem *)
+			(priv->regs_win +
+			((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET :
+			RIO_INB_ATMU_REGS_PORT2_OFFSET));
+
+		/* Set to receive packets with any dest ID */
+		out_be32((priv->regs_win + RIO_ISR_AACR + i*0x80),
+			 RIO_ISR_AACR_AA);
+
+		/* Configure maintenance transaction window */
+		out_be32(&priv->maint_atmu_regs->rowbar,
+			port->iores.start >> 12);
+		out_be32(&priv->maint_atmu_regs->rowar,
+			 0x80077000 | (ilog2(RIO_MAINT_WIN_SIZE) - 1));
+
+		priv->maint_win = ioremap(port->iores.start,
+				RIO_MAINT_WIN_SIZE);
+
+		rio_law_start = range_start;
+
+		fsl_rio_setup_rmu(port, rmu_np[i]);
+		fsl_rio_inbound_mem_init(priv);
+
+		dbell->mport[i] = port;
+		pw->mport[i] = port;
+
+		if (rio_register_mport(port)) {
+			release_resource(&port->iores);
+			kfree(priv);
+			kfree(port);
+			continue;
+		}
+		active_ports++;
+	}
+
+	if (!active_ports) {
+		rc = -ENOLINK;
+		goto err;
+	}
+
+	fsl_rio_doorbell_init(dbell);
+	fsl_rio_port_write_init(pw);
+
+	return 0;
+err:
+	kfree(pw);
+	pw = NULL;
+err_pw:
+	kfree(dbell);
+	dbell = NULL;
+err_dbell:
+	iounmap(rmu_regs_win);
+	rmu_regs_win = NULL;
+err_rmu:
+	kfree(ops);
+err_ops:
+	iounmap(rio_regs_win);
+	rio_regs_win = NULL;
+err_rio_regs:
+	return rc;
+}
+
+/* The probe function for RapidIO peer-to-peer network.
+ */
+static int fsl_of_rio_rpn_probe(struct platform_device *dev)
+{
+	printk(KERN_INFO "Setting up RapidIO peer-to-peer network %pOF\n",
+			dev->dev.of_node);
+
+	return fsl_rio_setup(dev);
+};
+
+static const struct of_device_id fsl_of_rio_rpn_ids[] = {
+	{
+		.compatible = "fsl,srio",
+	},
+	{},
+};
+
+static struct platform_driver fsl_of_rio_rpn_driver = {
+	.driver = {
+		.name = "fsl-of-rio",
+		.of_match_table = fsl_of_rio_rpn_ids,
+	},
+	.probe = fsl_of_rio_rpn_probe,
+};
+
+static __init int fsl_of_rio_rpn_init(void)
+{
+	return platform_driver_register(&fsl_of_rio_rpn_driver);
+}
+
+subsys_initcall(fsl_of_rio_rpn_init);
diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h
new file mode 100644
index 0000000000..c526b7237a
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rio.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ * Lian Minghuan-B31939 <Minghuan.Lian@freescale.com>
+ * Liu Gang <Gang.Liu@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#ifndef __FSL_RIO_H
+#define __FSL_RIO_H
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/kfifo.h>
+
+#define RIO_REGS_WIN(mport)	(((struct rio_priv *)(mport->priv))->regs_win)
+
+#define RIO_MAINT_WIN_SIZE	0x400000
+#define RIO_LTLEDCSR		0x0608
+
+#define DOORBELL_ROWAR_EN	0x80000000
+#define DOORBELL_ROWAR_TFLOWLV	0x08000000 /* highest priority level */
+#define DOORBELL_ROWAR_PCI	0x02000000 /* PCI window */
+#define DOORBELL_ROWAR_NREAD	0x00040000 /* NREAD */
+#define DOORBELL_ROWAR_MAINTRD	0x00070000  /* maintenance read */
+#define DOORBELL_ROWAR_RES	0x00002000 /* wrtpy: reserved */
+#define DOORBELL_ROWAR_MAINTWD	0x00007000
+#define DOORBELL_ROWAR_SIZE	0x0000000b /* window size is 4k */
+
+#define RIO_ATMU_REGS_PORT1_OFFSET	0x10c00
+#define RIO_ATMU_REGS_PORT2_OFFSET	0x10e00
+#define RIO_S_DBELL_REGS_OFFSET	0x13400
+#define RIO_S_PW_REGS_OFFSET	0x134e0
+#define RIO_ATMU_REGS_DBELL_OFFSET	0x10C40
+#define RIO_INB_ATMU_REGS_PORT1_OFFSET 0x10d60
+#define RIO_INB_ATMU_REGS_PORT2_OFFSET 0x10f60
+
+#define MAX_MSG_UNIT_NUM	2
+#define MAX_PORT_NUM		4
+#define RIO_INB_ATMU_COUNT	4
+
+struct rio_atmu_regs {
+	 u32 rowtar;
+	 u32 rowtear;
+	 u32 rowbar;
+	 u32 pad1;
+	 u32 rowar;
+	 u32 pad2[3];
+};
+
+struct rio_inb_atmu_regs {
+	u32 riwtar;
+	u32 pad1;
+	u32 riwbar;
+	u32 pad2;
+	u32 riwar;
+	u32 pad3[3];
+};
+
+struct rio_dbell_ring {
+	void *virt;
+	dma_addr_t phys;
+};
+
+struct rio_port_write_msg {
+	 void *virt;
+	 dma_addr_t phys;
+	 u32 msg_count;
+	 u32 err_count;
+	 u32 discard_count;
+};
+
+struct fsl_rio_dbell {
+	struct rio_mport *mport[MAX_PORT_NUM];
+	struct device *dev;
+	struct rio_dbell_regs __iomem *dbell_regs;
+	struct rio_dbell_ring dbell_ring;
+	int bellirq;
+};
+
+struct fsl_rio_pw {
+	struct rio_mport *mport[MAX_PORT_NUM];
+	struct device *dev;
+	struct rio_pw_regs __iomem *pw_regs;
+	struct rio_port_write_msg port_write_msg;
+	int pwirq;
+	struct work_struct pw_work;
+	struct kfifo pw_fifo;
+	spinlock_t pw_fifo_lock;
+};
+
+struct rio_priv {
+	struct device *dev;
+	void __iomem *regs_win;
+	struct rio_atmu_regs __iomem *atmu_regs;
+	struct rio_atmu_regs __iomem *maint_atmu_regs;
+	struct rio_inb_atmu_regs __iomem *inb_atmu_regs;
+	void __iomem *maint_win;
+	void *rmm_handle; /* RapidIO message manager(unit) Handle */
+};
+
+extern void __iomem *rio_regs_win;
+extern void __iomem *rmu_regs_win;
+
+extern resource_size_t rio_law_start;
+
+extern struct fsl_rio_dbell *dbell;
+extern struct fsl_rio_pw *pw;
+
+extern int fsl_rio_setup_rmu(struct rio_mport *mport,
+	struct device_node *node);
+extern int fsl_rio_port_write_init(struct fsl_rio_pw *pw);
+extern int fsl_rio_pw_enable(struct rio_mport *mport, int enable);
+extern void fsl_rio_port_error_handler(int offset);
+extern int fsl_rio_doorbell_init(struct fsl_rio_dbell *dbell);
+
+extern int fsl_rio_doorbell_send(struct rio_mport *mport,
+				int index, u16 destid, u16 data);
+extern int fsl_add_outb_message(struct rio_mport *mport,
+	struct rio_dev *rdev,
+	int mbox, void *buffer, size_t len);
+extern int fsl_open_outb_mbox(struct rio_mport *mport,
+	void *dev_id, int mbox, int entries);
+extern void fsl_close_outb_mbox(struct rio_mport *mport, int mbox);
+extern int fsl_open_inb_mbox(struct rio_mport *mport,
+	void *dev_id, int mbox, int entries);
+extern void fsl_close_inb_mbox(struct rio_mport *mport, int mbox);
+extern int fsl_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf);
+extern void *fsl_get_inb_message(struct rio_mport *mport, int mbox);
+
+#endif
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
new file mode 100644
index 0000000000..f956591cb6
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO RMU support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ * Lian Minghuan-B31939 <Minghuan.Lian@freescale.com>
+ * Liu Gang <Gang.Liu@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include "fsl_rio.h"
+
+#define GET_RMM_HANDLE(mport) \
+		(((struct rio_priv *)(mport->priv))->rmm_handle)
+
+/* RapidIO definition irq, which read from OF-tree */
+#define IRQ_RIO_PW(m)		(((struct fsl_rio_pw *)(m))->pwirq)
+#define IRQ_RIO_BELL(m) (((struct fsl_rio_dbell *)(m))->bellirq)
+#define IRQ_RIO_TX(m) (((struct fsl_rmu *)(GET_RMM_HANDLE(m)))->txirq)
+#define IRQ_RIO_RX(m) (((struct fsl_rmu *)(GET_RMM_HANDLE(m)))->rxirq)
+
+#define RIO_MIN_TX_RING_SIZE	2
+#define RIO_MAX_TX_RING_SIZE	2048
+#define RIO_MIN_RX_RING_SIZE	2
+#define RIO_MAX_RX_RING_SIZE	2048
+
+#define RIO_IPWMR_SEN		0x00100000
+#define RIO_IPWMR_QFIE		0x00000100
+#define RIO_IPWMR_EIE		0x00000020
+#define RIO_IPWMR_CQ		0x00000002
+#define RIO_IPWMR_PWE		0x00000001
+
+#define RIO_IPWSR_QF		0x00100000
+#define RIO_IPWSR_TE		0x00000080
+#define RIO_IPWSR_QFI		0x00000010
+#define RIO_IPWSR_PWD		0x00000008
+#define RIO_IPWSR_PWB		0x00000004
+
+#define RIO_EPWISR		0x10010
+/* EPWISR Error match value */
+#define RIO_EPWISR_PINT1	0x80000000
+#define RIO_EPWISR_PINT2	0x40000000
+#define RIO_EPWISR_MU		0x00000002
+#define RIO_EPWISR_PW		0x00000001
+
+#define IPWSR_CLEAR		0x98
+#define OMSR_CLEAR		0x1cb3
+#define IMSR_CLEAR		0x491
+#define IDSR_CLEAR		0x91
+#define ODSR_CLEAR		0x1c00
+#define LTLEECSR_ENABLE_ALL	0xFFC000FC
+#define RIO_LTLEECSR		0x060c
+
+#define RIO_IM0SR		0x64
+#define RIO_IM1SR		0x164
+#define RIO_OM0SR		0x4
+#define RIO_OM1SR		0x104
+
+#define RIO_DBELL_WIN_SIZE	0x1000
+
+#define RIO_MSG_OMR_MUI		0x00000002
+#define RIO_MSG_OSR_TE		0x00000080
+#define RIO_MSG_OSR_QOI		0x00000020
+#define RIO_MSG_OSR_QFI		0x00000010
+#define RIO_MSG_OSR_MUB		0x00000004
+#define RIO_MSG_OSR_EOMI	0x00000002
+#define RIO_MSG_OSR_QEI		0x00000001
+
+#define RIO_MSG_IMR_MI		0x00000002
+#define RIO_MSG_ISR_TE		0x00000080
+#define RIO_MSG_ISR_QFI		0x00000010
+#define RIO_MSG_ISR_DIQI	0x00000001
+
+#define RIO_MSG_DESC_SIZE	32
+#define RIO_MSG_BUFFER_SIZE	4096
+
+#define DOORBELL_DMR_DI		0x00000002
+#define DOORBELL_DSR_TE		0x00000080
+#define DOORBELL_DSR_QFI	0x00000010
+#define DOORBELL_DSR_DIQI	0x00000001
+
+#define DOORBELL_MESSAGE_SIZE	0x08
+
+static DEFINE_SPINLOCK(fsl_rio_doorbell_lock);
+
+struct rio_msg_regs {
+	u32 omr;
+	u32 osr;
+	u32 pad1;
+	u32 odqdpar;
+	u32 pad2;
+	u32 osar;
+	u32 odpr;
+	u32 odatr;
+	u32 odcr;
+	u32 pad3;
+	u32 odqepar;
+	u32 pad4[13];
+	u32 imr;
+	u32 isr;
+	u32 pad5;
+	u32 ifqdpar;
+	u32 pad6;
+	u32 ifqepar;
+};
+
+struct rio_dbell_regs {
+	u32 odmr;
+	u32 odsr;
+	u32 pad1[4];
+	u32 oddpr;
+	u32 oddatr;
+	u32 pad2[3];
+	u32 odretcr;
+	u32 pad3[12];
+	u32 dmr;
+	u32 dsr;
+	u32 pad4;
+	u32 dqdpar;
+	u32 pad5;
+	u32 dqepar;
+};
+
+struct rio_pw_regs {
+	u32 pwmr;
+	u32 pwsr;
+	u32 epwqbar;
+	u32 pwqbar;
+};
+
+
+struct rio_tx_desc {
+	u32 pad1;
+	u32 saddr;
+	u32 dport;
+	u32 dattr;
+	u32 pad2;
+	u32 pad3;
+	u32 dwcnt;
+	u32 pad4;
+};
+
+struct rio_msg_tx_ring {
+	void *virt;
+	dma_addr_t phys;
+	void *virt_buffer[RIO_MAX_TX_RING_SIZE];
+	dma_addr_t phys_buffer[RIO_MAX_TX_RING_SIZE];
+	int tx_slot;
+	int size;
+	void *dev_id;
+};
+
+struct rio_msg_rx_ring {
+	void *virt;
+	dma_addr_t phys;
+	void *virt_buffer[RIO_MAX_RX_RING_SIZE];
+	int rx_slot;
+	int size;
+	void *dev_id;
+};
+
+struct fsl_rmu {
+	struct rio_msg_regs __iomem *msg_regs;
+	struct rio_msg_tx_ring msg_tx_ring;
+	struct rio_msg_rx_ring msg_rx_ring;
+	int txirq;
+	int rxirq;
+};
+
+struct rio_dbell_msg {
+	u16 pad1;
+	u16 tid;
+	u16 sid;
+	u16 info;
+};
+
+/**
+ * fsl_rio_tx_handler - MPC85xx outbound message interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles outbound message interrupts. Executes a register outbound
+ * mailbox event handler and acks the interrupt occurrence.
+ */
+static irqreturn_t
+fsl_rio_tx_handler(int irq, void *dev_instance)
+{
+	int osr;
+	struct rio_mport *port = (struct rio_mport *)dev_instance;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(port);
+
+	osr = in_be32(&rmu->msg_regs->osr);
+
+	if (osr & RIO_MSG_OSR_TE) {
+		pr_info("RIO: outbound message transmission error\n");
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_TE);
+		goto out;
+	}
+
+	if (osr & RIO_MSG_OSR_QOI) {
+		pr_info("RIO: outbound message queue overflow\n");
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_QOI);
+		goto out;
+	}
+
+	if (osr & RIO_MSG_OSR_EOMI) {
+		u32 dqp = in_be32(&rmu->msg_regs->odqdpar);
+		int slot = (dqp - rmu->msg_tx_ring.phys) >> 5;
+		if (port->outb_msg[0].mcback != NULL) {
+			port->outb_msg[0].mcback(port, rmu->msg_tx_ring.dev_id,
+					-1,
+					slot);
+		}
+		/* Ack the end-of-message interrupt */
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_EOMI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/**
+ * fsl_rio_rx_handler - MPC85xx inbound message interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles inbound message interrupts. Executes a registered inbound
+ * mailbox event handler and acks the interrupt occurrence.
+ */
+static irqreturn_t
+fsl_rio_rx_handler(int irq, void *dev_instance)
+{
+	int isr;
+	struct rio_mport *port = (struct rio_mport *)dev_instance;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(port);
+
+	isr = in_be32(&rmu->msg_regs->isr);
+
+	if (isr & RIO_MSG_ISR_TE) {
+		pr_info("RIO: inbound message reception error\n");
+		out_be32((void *)&rmu->msg_regs->isr, RIO_MSG_ISR_TE);
+		goto out;
+	}
+
+	/* XXX Need to check/dispatch until queue empty */
+	if (isr & RIO_MSG_ISR_DIQI) {
+		/*
+		* Can receive messages for any mailbox/letter to that
+		* mailbox destination. So, make the callback with an
+		* unknown/invalid mailbox number argument.
+		*/
+		if (port->inb_msg[0].mcback != NULL)
+			port->inb_msg[0].mcback(port, rmu->msg_rx_ring.dev_id,
+				-1,
+				-1);
+
+		/* Ack the queueing interrupt */
+		out_be32(&rmu->msg_regs->isr, RIO_MSG_ISR_DIQI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/**
+ * fsl_rio_dbell_handler - MPC85xx doorbell interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles doorbell interrupts. Parses a list of registered
+ * doorbell event handlers and executes a matching event handler.
+ */
+static irqreturn_t
+fsl_rio_dbell_handler(int irq, void *dev_instance)
+{
+	int dsr;
+	struct fsl_rio_dbell *fsl_dbell = (struct fsl_rio_dbell *)dev_instance;
+	int i;
+
+	dsr = in_be32(&fsl_dbell->dbell_regs->dsr);
+
+	if (dsr & DOORBELL_DSR_TE) {
+		pr_info("RIO: doorbell reception error\n");
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_TE);
+		goto out;
+	}
+
+	if (dsr & DOORBELL_DSR_QFI) {
+		pr_info("RIO: doorbell queue full\n");
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_QFI);
+	}
+
+	/* XXX Need to check/dispatch until queue empty */
+	if (dsr & DOORBELL_DSR_DIQI) {
+		struct rio_dbell_msg *dmsg =
+			fsl_dbell->dbell_ring.virt +
+			(in_be32(&fsl_dbell->dbell_regs->dqdpar) & 0xfff);
+		struct rio_dbell *dbell;
+		int found = 0;
+
+		pr_debug
+			("RIO: processing doorbell,"
+			" sid %2.2x tid %2.2x info %4.4x\n",
+			dmsg->sid, dmsg->tid, dmsg->info);
+
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (fsl_dbell->mport[i]) {
+				list_for_each_entry(dbell,
+					&fsl_dbell->mport[i]->dbells, node) {
+					if ((dbell->res->start
+						<= dmsg->info)
+						&& (dbell->res->end
+						>= dmsg->info)) {
+						found = 1;
+						break;
+					}
+				}
+				if (found && dbell->dinb) {
+					dbell->dinb(fsl_dbell->mport[i],
+						dbell->dev_id, dmsg->sid,
+						dmsg->tid,
+						dmsg->info);
+					break;
+				}
+			}
+		}
+
+		if (!found) {
+			pr_debug
+				("RIO: spurious doorbell,"
+				" sid %2.2x tid %2.2x info %4.4x\n",
+				dmsg->sid, dmsg->tid,
+				dmsg->info);
+		}
+		setbits32(&fsl_dbell->dbell_regs->dmr, DOORBELL_DMR_DI);
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_DIQI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+static void msg_unit_error_handler(void)
+{
+
+	/*XXX: Error recovery is not implemented, we just clear errors */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0);
+
+	out_be32((u32 *)(rmu_regs_win + RIO_IM0SR), IMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_IM1SR), IMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_OM0SR), OMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_OM1SR), OMSR_CLEAR);
+
+	out_be32(&dbell->dbell_regs->odsr, ODSR_CLEAR);
+	out_be32(&dbell->dbell_regs->dsr, IDSR_CLEAR);
+
+	out_be32(&pw->pw_regs->pwsr, IPWSR_CLEAR);
+}
+
+/**
+ * fsl_rio_port_write_handler - MPC85xx port write interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles port write interrupts. Parses a list of registered
+ * port write event handlers and executes a matching event handler.
+ */
+static irqreturn_t
+fsl_rio_port_write_handler(int irq, void *dev_instance)
+{
+	u32 ipwmr, ipwsr;
+	struct fsl_rio_pw *pw = (struct fsl_rio_pw *)dev_instance;
+	u32 epwisr, tmp;
+
+	epwisr = in_be32(rio_regs_win + RIO_EPWISR);
+	if (!(epwisr & RIO_EPWISR_PW))
+		goto pw_done;
+
+	ipwmr = in_be32(&pw->pw_regs->pwmr);
+	ipwsr = in_be32(&pw->pw_regs->pwsr);
+
+#ifdef DEBUG_PW
+	pr_debug("PW Int->IPWMR: 0x%08x IPWSR: 0x%08x (", ipwmr, ipwsr);
+	if (ipwsr & RIO_IPWSR_QF)
+		pr_debug(" QF");
+	if (ipwsr & RIO_IPWSR_TE)
+		pr_debug(" TE");
+	if (ipwsr & RIO_IPWSR_QFI)
+		pr_debug(" QFI");
+	if (ipwsr & RIO_IPWSR_PWD)
+		pr_debug(" PWD");
+	if (ipwsr & RIO_IPWSR_PWB)
+		pr_debug(" PWB");
+	pr_debug(" )\n");
+#endif
+	/* Schedule deferred processing if PW was received */
+	if (ipwsr & RIO_IPWSR_QFI) {
+		/* Save PW message (if there is room in FIFO),
+		 * otherwise discard it.
+		 */
+		if (kfifo_avail(&pw->pw_fifo) >= RIO_PW_MSG_SIZE) {
+			pw->port_write_msg.msg_count++;
+			kfifo_in(&pw->pw_fifo, pw->port_write_msg.virt,
+				 RIO_PW_MSG_SIZE);
+		} else {
+			pw->port_write_msg.discard_count++;
+			pr_debug("RIO: ISR Discarded Port-Write Msg(s) (%d)\n",
+				 pw->port_write_msg.discard_count);
+		}
+		/* Clear interrupt and issue Clear Queue command. This allows
+		 * another port-write to be received.
+		 */
+		out_be32(&pw->pw_regs->pwsr,	RIO_IPWSR_QFI);
+		out_be32(&pw->pw_regs->pwmr, ipwmr | RIO_IPWMR_CQ);
+
+		schedule_work(&pw->pw_work);
+	}
+
+	if ((ipwmr & RIO_IPWMR_EIE) && (ipwsr & RIO_IPWSR_TE)) {
+		pw->port_write_msg.err_count++;
+		pr_debug("RIO: Port-Write Transaction Err (%d)\n",
+			 pw->port_write_msg.err_count);
+		/* Clear Transaction Error: port-write controller should be
+		 * disabled when clearing this error
+		 */
+		out_be32(&pw->pw_regs->pwmr, ipwmr & ~RIO_IPWMR_PWE);
+		out_be32(&pw->pw_regs->pwsr,	RIO_IPWSR_TE);
+		out_be32(&pw->pw_regs->pwmr, ipwmr);
+	}
+
+	if (ipwsr & RIO_IPWSR_PWD) {
+		pw->port_write_msg.discard_count++;
+		pr_debug("RIO: Port Discarded Port-Write Msg(s) (%d)\n",
+			 pw->port_write_msg.discard_count);
+		out_be32(&pw->pw_regs->pwsr, RIO_IPWSR_PWD);
+	}
+
+pw_done:
+	if (epwisr & RIO_EPWISR_PINT1) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		fsl_rio_port_error_handler(0);
+	}
+
+	if (epwisr & RIO_EPWISR_PINT2) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		fsl_rio_port_error_handler(1);
+	}
+
+	if (epwisr & RIO_EPWISR_MU) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		msg_unit_error_handler();
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void fsl_pw_dpc(struct work_struct *work)
+{
+	struct fsl_rio_pw *pw = container_of(work, struct fsl_rio_pw, pw_work);
+	union rio_pw_msg msg_buffer;
+	int i;
+
+	/*
+	 * Process port-write messages
+	 */
+	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)&msg_buffer,
+			 RIO_PW_MSG_SIZE, &pw->pw_fifo_lock)) {
+#ifdef DEBUG_PW
+		{
+		u32 i;
+		pr_debug("%s : Port-Write Message:", __func__);
+		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); i++) {
+			if ((i%4) == 0)
+				pr_debug("\n0x%02x: 0x%08x", i*4,
+					 msg_buffer.raw[i]);
+			else
+				pr_debug(" 0x%08x", msg_buffer.raw[i]);
+		}
+		pr_debug("\n");
+		}
+#endif
+		/* Pass the port-write message to RIO core for processing */
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (pw->mport[i])
+				rio_inb_pwrite_handler(pw->mport[i],
+						       &msg_buffer);
+		}
+	}
+}
+
+/**
+ * fsl_rio_pw_enable - enable/disable port-write interface init
+ * @mport: Master port implementing the port write unit
+ * @enable:    1=enable; 0=disable port-write message handling
+ */
+int fsl_rio_pw_enable(struct rio_mport *mport, int enable)
+{
+	u32 rval;
+
+	rval = in_be32(&pw->pw_regs->pwmr);
+
+	if (enable)
+		rval |= RIO_IPWMR_PWE;
+	else
+		rval &= ~RIO_IPWMR_PWE;
+
+	out_be32(&pw->pw_regs->pwmr, rval);
+
+	return 0;
+}
+
+/**
+ * fsl_rio_port_write_init - MPC85xx port write interface init
+ * @mport: Master port implementing the port write unit
+ *
+ * Initializes port write unit hardware and DMA buffer
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
+ * or %-ENOMEM on failure.
+ */
+
+int fsl_rio_port_write_init(struct fsl_rio_pw *pw)
+{
+	int rc = 0;
+
+	/* Following configurations require a disabled port write controller */
+	out_be32(&pw->pw_regs->pwmr,
+		 in_be32(&pw->pw_regs->pwmr) & ~RIO_IPWMR_PWE);
+
+	/* Initialize port write */
+	pw->port_write_msg.virt = dma_alloc_coherent(pw->dev,
+					RIO_PW_MSG_SIZE,
+					&pw->port_write_msg.phys, GFP_KERNEL);
+	if (!pw->port_write_msg.virt) {
+		pr_err("RIO: unable allocate port write queue\n");
+		return -ENOMEM;
+	}
+
+	pw->port_write_msg.err_count = 0;
+	pw->port_write_msg.discard_count = 0;
+
+	/* Point dequeue/enqueue pointers at first entry */
+	out_be32(&pw->pw_regs->epwqbar, 0);
+	out_be32(&pw->pw_regs->pwqbar, (u32) pw->port_write_msg.phys);
+
+	pr_debug("EIPWQBAR: 0x%08x IPWQBAR: 0x%08x\n",
+		 in_be32(&pw->pw_regs->epwqbar),
+		 in_be32(&pw->pw_regs->pwqbar));
+
+	/* Clear interrupt status IPWSR */
+	out_be32(&pw->pw_regs->pwsr,
+		 (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
+
+	/* Configure port write controller for snooping enable all reporting,
+	   clear queue full */
+	out_be32(&pw->pw_regs->pwmr,
+		 RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ);
+
+
+	/* Hook up port-write handler */
+	rc = request_irq(IRQ_RIO_PW(pw), fsl_rio_port_write_handler,
+			IRQF_SHARED, "port-write", (void *)pw);
+	if (rc < 0) {
+		pr_err("MPC85xx RIO: unable to request inbound doorbell irq");
+		goto err_out;
+	}
+	/* Enable Error Interrupt */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEECSR), LTLEECSR_ENABLE_ALL);
+
+	INIT_WORK(&pw->pw_work, fsl_pw_dpc);
+	spin_lock_init(&pw->pw_fifo_lock);
+	if (kfifo_alloc(&pw->pw_fifo, RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+		pr_err("FIFO allocation failed\n");
+		rc = -ENOMEM;
+		goto err_out_irq;
+	}
+
+	pr_debug("IPWMR: 0x%08x IPWSR: 0x%08x\n",
+		 in_be32(&pw->pw_regs->pwmr),
+		 in_be32(&pw->pw_regs->pwsr));
+
+	return rc;
+
+err_out_irq:
+	free_irq(IRQ_RIO_PW(pw), (void *)pw);
+err_out:
+	dma_free_coherent(pw->dev, RIO_PW_MSG_SIZE,
+		pw->port_write_msg.virt,
+		pw->port_write_msg.phys);
+	return rc;
+}
+
+/**
+ * fsl_rio_doorbell_send - Send a MPC85xx doorbell message
+ * @mport: RapidIO master port info
+ * @index: ID of RapidIO interface
+ * @destid: Destination ID of target device
+ * @data: 16-bit info field of RapidIO doorbell message
+ *
+ * Sends a MPC85xx doorbell message. Returns %0 on success or
+ * %-EINVAL on failure.
+ */
+int fsl_rio_doorbell_send(struct rio_mport *mport,
+				int index, u16 destid, u16 data)
+{
+	unsigned long flags;
+
+	pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n",
+		 index, destid, data);
+
+	spin_lock_irqsave(&fsl_rio_doorbell_lock, flags);
+
+	/* In the serial version silicons, such as MPC8548, MPC8641,
+	 * below operations is must be.
+	 */
+	out_be32(&dbell->dbell_regs->odmr, 0x00000000);
+	out_be32(&dbell->dbell_regs->odretcr, 0x00000004);
+	out_be32(&dbell->dbell_regs->oddpr, destid << 16);
+	out_be32(&dbell->dbell_regs->oddatr, (index << 20) | data);
+	out_be32(&dbell->dbell_regs->odmr, 0x00000001);
+
+	spin_unlock_irqrestore(&fsl_rio_doorbell_lock, flags);
+
+	return 0;
+}
+
+/**
+ * fsl_add_outb_message - Add message to the MPC85xx outbound message queue
+ * @mport: Master port with outbound message queue
+ * @rdev: Target of outbound message
+ * @mbox: Outbound mailbox
+ * @buffer: Message to add to outbound queue
+ * @len: Length of message
+ *
+ * Adds the @buffer message to the MPC85xx outbound message queue. Returns
+ * %0 on success or %-EINVAL on failure.
+ */
+int
+fsl_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
+			void *buffer, size_t len)
+{
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+	u32 omr;
+	struct rio_tx_desc *desc = (struct rio_tx_desc *)rmu->msg_tx_ring.virt
+					+ rmu->msg_tx_ring.tx_slot;
+	int ret = 0;
+
+	pr_debug("RIO: fsl_add_outb_message(): destid %4.4x mbox %d buffer " \
+		 "%p len %8.8zx\n", rdev->destid, mbox, buffer, len);
+	if ((len < 8) || (len > RIO_MAX_MSG_SIZE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Copy and clear rest of buffer */
+	memcpy(rmu->msg_tx_ring.virt_buffer[rmu->msg_tx_ring.tx_slot], buffer,
+			len);
+	if (len < (RIO_MAX_MSG_SIZE - 4))
+		memset(rmu->msg_tx_ring.virt_buffer[rmu->msg_tx_ring.tx_slot]
+				+ len, 0, RIO_MAX_MSG_SIZE - len);
+
+	/* Set mbox field for message, and set destid */
+	desc->dport = (rdev->destid << 16) | (mbox & 0x3);
+
+	/* Enable EOMI interrupt and priority */
+	desc->dattr = 0x28000000 | ((mport->index) << 20);
+
+	/* Set transfer size aligned to next power of 2 (in double words) */
+	desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len);
+
+	/* Set snooping and source buffer address */
+	desc->saddr = 0x00000004
+		| rmu->msg_tx_ring.phys_buffer[rmu->msg_tx_ring.tx_slot];
+
+	/* Increment enqueue pointer */
+	omr = in_be32(&rmu->msg_regs->omr);
+	out_be32(&rmu->msg_regs->omr, omr | RIO_MSG_OMR_MUI);
+
+	/* Go to next descriptor */
+	if (++rmu->msg_tx_ring.tx_slot == rmu->msg_tx_ring.size)
+		rmu->msg_tx_ring.tx_slot = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * fsl_open_outb_mbox - Initialize MPC85xx outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the outbound mailbox ring
+ *
+ * Initializes buffer ring, request the outbound message interrupt,
+ * and enables the outbound message unit. Returns %0 on success and
+ * %-EINVAL or %-ENOMEM on failure.
+ */
+int
+fsl_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
+{
+	int i, j, rc = 0;
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	if ((entries < RIO_MIN_TX_RING_SIZE) ||
+		(entries > RIO_MAX_TX_RING_SIZE) || (!is_power_of_2(entries))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Initialize shadow copy ring */
+	rmu->msg_tx_ring.dev_id = dev_id;
+	rmu->msg_tx_ring.size = entries;
+
+	for (i = 0; i < rmu->msg_tx_ring.size; i++) {
+		rmu->msg_tx_ring.virt_buffer[i] =
+			dma_alloc_coherent(priv->dev, RIO_MSG_BUFFER_SIZE,
+				&rmu->msg_tx_ring.phys_buffer[i], GFP_KERNEL);
+		if (!rmu->msg_tx_ring.virt_buffer[i]) {
+			rc = -ENOMEM;
+			for (j = 0; j < rmu->msg_tx_ring.size; j++)
+				if (rmu->msg_tx_ring.virt_buffer[j])
+					dma_free_coherent(priv->dev,
+							RIO_MSG_BUFFER_SIZE,
+							rmu->msg_tx_ring.
+							virt_buffer[j],
+							rmu->msg_tx_ring.
+							phys_buffer[j]);
+			goto out;
+		}
+	}
+
+	/* Initialize outbound message descriptor ring */
+	rmu->msg_tx_ring.virt = dma_alloc_coherent(priv->dev,
+						   rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+						   &rmu->msg_tx_ring.phys,
+						   GFP_KERNEL);
+	if (!rmu->msg_tx_ring.virt) {
+		rc = -ENOMEM;
+		goto out_dma;
+	}
+	rmu->msg_tx_ring.tx_slot = 0;
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&rmu->msg_regs->odqdpar, rmu->msg_tx_ring.phys);
+	out_be32(&rmu->msg_regs->odqepar, rmu->msg_tx_ring.phys);
+
+	/* Configure for snooping */
+	out_be32(&rmu->msg_regs->osar, 0x00000004);
+
+	/* Clear interrupt status */
+	out_be32(&rmu->msg_regs->osr, 0x000000b3);
+
+	/* Hook up outbound message handler */
+	rc = request_irq(IRQ_RIO_TX(mport), fsl_rio_tx_handler, 0,
+			 "msg_tx", (void *)mport);
+	if (rc < 0)
+		goto out_irq;
+
+	/*
+	 * Configure outbound message unit
+	 *      Snooping
+	 *      Interrupts (all enabled, except QEIE)
+	 *      Chaining mode
+	 *      Disable
+	 */
+	out_be32(&rmu->msg_regs->omr, 0x00100220);
+
+	/* Set number of entries */
+	out_be32(&rmu->msg_regs->omr,
+		 in_be32(&rmu->msg_regs->omr) |
+		 ((get_bitmask_order(entries) - 2) << 12));
+
+	/* Now enable the unit */
+	out_be32(&rmu->msg_regs->omr, in_be32(&rmu->msg_regs->omr) | 0x1);
+
+out:
+	return rc;
+
+out_irq:
+	dma_free_coherent(priv->dev,
+		rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+		rmu->msg_tx_ring.virt, rmu->msg_tx_ring.phys);
+
+out_dma:
+	for (i = 0; i < rmu->msg_tx_ring.size; i++)
+		dma_free_coherent(priv->dev, RIO_MSG_BUFFER_SIZE,
+		rmu->msg_tx_ring.virt_buffer[i],
+		rmu->msg_tx_ring.phys_buffer[i]);
+
+	return rc;
+}
+
+/**
+ * fsl_close_outb_mbox - Shut down MPC85xx outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @mbox: Mailbox to close
+ *
+ * Disables the outbound message unit, free all buffers, and
+ * frees the outbound message interrupt.
+ */
+void fsl_close_outb_mbox(struct rio_mport *mport, int mbox)
+{
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	/* Disable inbound message unit */
+	out_be32(&rmu->msg_regs->omr, 0);
+
+	/* Free ring */
+	dma_free_coherent(priv->dev,
+	rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+	rmu->msg_tx_ring.virt, rmu->msg_tx_ring.phys);
+
+	/* Free interrupt */
+	free_irq(IRQ_RIO_TX(mport), (void *)mport);
+}
+
+/**
+ * fsl_open_inb_mbox - Initialize MPC85xx inbound mailbox
+ * @mport: Master port implementing the inbound message unit
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the inbound mailbox ring
+ *
+ * Initializes buffer ring, request the inbound message interrupt,
+ * and enables the inbound message unit. Returns %0 on success
+ * and %-EINVAL or %-ENOMEM on failure.
+ */
+int
+fsl_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
+{
+	int i, rc = 0;
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	if ((entries < RIO_MIN_RX_RING_SIZE) ||
+		(entries > RIO_MAX_RX_RING_SIZE) || (!is_power_of_2(entries))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Initialize client buffer ring */
+	rmu->msg_rx_ring.dev_id = dev_id;
+	rmu->msg_rx_ring.size = entries;
+	rmu->msg_rx_ring.rx_slot = 0;
+	for (i = 0; i < rmu->msg_rx_ring.size; i++)
+		rmu->msg_rx_ring.virt_buffer[i] = NULL;
+
+	/* Initialize inbound message ring */
+	rmu->msg_rx_ring.virt = dma_alloc_coherent(priv->dev,
+				rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+				&rmu->msg_rx_ring.phys, GFP_KERNEL);
+	if (!rmu->msg_rx_ring.virt) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&rmu->msg_regs->ifqdpar, (u32) rmu->msg_rx_ring.phys);
+	out_be32(&rmu->msg_regs->ifqepar, (u32) rmu->msg_rx_ring.phys);
+
+	/* Clear interrupt status */
+	out_be32(&rmu->msg_regs->isr, 0x00000091);
+
+	/* Hook up inbound message handler */
+	rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0,
+			 "msg_rx", (void *)mport);
+	if (rc < 0) {
+		dma_free_coherent(priv->dev,
+			rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+			rmu->msg_rx_ring.virt, rmu->msg_rx_ring.phys);
+		goto out;
+	}
+
+	/*
+	 * Configure inbound message unit:
+	 *      Snooping
+	 *      4KB max message size
+	 *      Unmask all interrupt sources
+	 *      Disable
+	 */
+	out_be32(&rmu->msg_regs->imr, 0x001b0060);
+
+	/* Set number of queue entries */
+	setbits32(&rmu->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12);
+
+	/* Now enable the unit */
+	setbits32(&rmu->msg_regs->imr, 0x1);
+
+out:
+	return rc;
+}
+
+/**
+ * fsl_close_inb_mbox - Shut down MPC85xx inbound mailbox
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Mailbox to close
+ *
+ * Disables the inbound message unit, free all buffers, and
+ * frees the inbound message interrupt.
+ */
+void fsl_close_inb_mbox(struct rio_mport *mport, int mbox)
+{
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	/* Disable inbound message unit */
+	out_be32(&rmu->msg_regs->imr, 0);
+
+	/* Free ring */
+	dma_free_coherent(priv->dev, rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+	rmu->msg_rx_ring.virt, rmu->msg_rx_ring.phys);
+
+	/* Free interrupt */
+	free_irq(IRQ_RIO_RX(mport), (void *)mport);
+}
+
+/**
+ * fsl_add_inb_buffer - Add buffer to the MPC85xx inbound message queue
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Inbound mailbox number
+ * @buf: Buffer to add to inbound queue
+ *
+ * Adds the @buf buffer to the MPC85xx inbound message queue. Returns
+ * %0 on success or %-EINVAL on failure.
+ */
+int fsl_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
+{
+	int rc = 0;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	pr_debug("RIO: fsl_add_inb_buffer(), msg_rx_ring.rx_slot %d\n",
+		 rmu->msg_rx_ring.rx_slot);
+
+	if (rmu->msg_rx_ring.virt_buffer[rmu->msg_rx_ring.rx_slot]) {
+		printk(KERN_ERR
+			"RIO: error adding inbound buffer %d, buffer exists\n",
+			rmu->msg_rx_ring.rx_slot);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rmu->msg_rx_ring.virt_buffer[rmu->msg_rx_ring.rx_slot] = buf;
+	if (++rmu->msg_rx_ring.rx_slot == rmu->msg_rx_ring.size)
+		rmu->msg_rx_ring.rx_slot = 0;
+
+out:
+	return rc;
+}
+
+/**
+ * fsl_get_inb_message - Fetch inbound message from the MPC85xx message unit
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Inbound mailbox number
+ *
+ * Gets the next available inbound message from the inbound message queue.
+ * A pointer to the message is returned on success or NULL on failure.
+ */
+void *fsl_get_inb_message(struct rio_mport *mport, int mbox)
+{
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+	u32 phys_buf;
+	void *virt_buf;
+	void *buf = NULL;
+	int buf_idx;
+
+	phys_buf = in_be32(&rmu->msg_regs->ifqdpar);
+
+	/* If no more messages, then bail out */
+	if (phys_buf == in_be32(&rmu->msg_regs->ifqepar))
+		goto out2;
+
+	virt_buf = rmu->msg_rx_ring.virt + (phys_buf
+						- rmu->msg_rx_ring.phys);
+	buf_idx = (phys_buf - rmu->msg_rx_ring.phys) / RIO_MAX_MSG_SIZE;
+	buf = rmu->msg_rx_ring.virt_buffer[buf_idx];
+
+	if (!buf) {
+		printk(KERN_ERR
+			"RIO: inbound message copy failed, no buffers\n");
+		goto out1;
+	}
+
+	/* Copy max message size, caller is expected to allocate that big */
+	memcpy(buf, virt_buf, RIO_MAX_MSG_SIZE);
+
+	/* Clear the available buffer */
+	rmu->msg_rx_ring.virt_buffer[buf_idx] = NULL;
+
+out1:
+	setbits32(&rmu->msg_regs->imr, RIO_MSG_IMR_MI);
+
+out2:
+	return buf;
+}
+
+/**
+ * fsl_rio_doorbell_init - MPC85xx doorbell interface init
+ * @mport: Master port implementing the inbound doorbell unit
+ *
+ * Initializes doorbell unit hardware and inbound DMA buffer
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
+ * or %-ENOMEM on failure.
+ */
+int fsl_rio_doorbell_init(struct fsl_rio_dbell *dbell)
+{
+	int rc = 0;
+
+	/* Initialize inbound doorbells */
+	dbell->dbell_ring.virt = dma_alloc_coherent(dbell->dev, 512 *
+		DOORBELL_MESSAGE_SIZE, &dbell->dbell_ring.phys, GFP_KERNEL);
+	if (!dbell->dbell_ring.virt) {
+		printk(KERN_ERR "RIO: unable allocate inbound doorbell ring\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&dbell->dbell_regs->dqdpar, (u32) dbell->dbell_ring.phys);
+	out_be32(&dbell->dbell_regs->dqepar, (u32) dbell->dbell_ring.phys);
+
+	/* Clear interrupt status */
+	out_be32(&dbell->dbell_regs->dsr, 0x00000091);
+
+	/* Hook up doorbell handler */
+	rc = request_irq(IRQ_RIO_BELL(dbell), fsl_rio_dbell_handler, 0,
+			 "dbell_rx", (void *)dbell);
+	if (rc < 0) {
+		dma_free_coherent(dbell->dev, 512 * DOORBELL_MESSAGE_SIZE,
+			 dbell->dbell_ring.virt, dbell->dbell_ring.phys);
+		printk(KERN_ERR
+			"MPC85xx RIO: unable to request inbound doorbell irq");
+		goto out;
+	}
+
+	/* Configure doorbells for snooping, 512 entries, and enable */
+	out_be32(&dbell->dbell_regs->dmr, 0x00108161);
+
+out:
+	return rc;
+}
+
+int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
+{
+	struct rio_priv *priv;
+	struct fsl_rmu *rmu;
+	u64 msg_start;
+
+	if (!mport || !mport->priv)
+		return -EINVAL;
+
+	priv = mport->priv;
+
+	if (!node) {
+		dev_warn(priv->dev, "Can't get %pOF property 'fsl,rmu'\n",
+			priv->dev->of_node);
+		return -EINVAL;
+	}
+
+	rmu = kzalloc(sizeof(struct fsl_rmu), GFP_KERNEL);
+	if (!rmu)
+		return -ENOMEM;
+
+	if (of_property_read_reg(node, 0, &msg_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property of message-unit\n",
+			node);
+		kfree(rmu);
+		return -ENOMEM;
+	}
+	rmu->msg_regs = (struct rio_msg_regs *)
+			(rmu_regs_win + (u32)msg_start);
+
+	rmu->txirq = irq_of_parse_and_map(node, 0);
+	rmu->rxirq = irq_of_parse_and_map(node, 1);
+	printk(KERN_INFO "%pOF: txirq: %d, rxirq %d\n",
+		node, rmu->txirq, rmu->rxirq);
+
+	priv->rmm_handle = rmu;
+
+	rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
+	rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
+	rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
new file mode 100644
index 0000000000..3949ceb79e
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FSL SoC setup code
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/spi/spi.h>
+#include <linux/fsl_devices.h>
+#include <linux/reboot.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <sysdev/fsl_soc.h>
+#include <mm/mmu_decl.h>
+#include <asm/cpm2.h>
+#include <asm/fsl_hcalls.h>	/* For the Freescale hypervisor */
+
+static phys_addr_t immrbase = -1;
+
+phys_addr_t get_immrbase(void)
+{
+	struct device_node *soc;
+
+	if (immrbase != -1)
+		return immrbase;
+
+	soc = of_find_node_by_type(NULL, "soc");
+	if (soc) {
+		struct resource res;
+
+		if (!of_range_to_resource(soc, 0, &res))
+			immrbase = res.start;
+
+		of_node_put(soc);
+	}
+
+	return immrbase;
+}
+
+EXPORT_SYMBOL(get_immrbase);
+
+u32 fsl_get_sys_freq(void)
+{
+	static u32 sysfreq = -1;
+	struct device_node *soc;
+
+	if (sysfreq != -1)
+		return sysfreq;
+
+	soc = of_find_node_by_type(NULL, "soc");
+	if (!soc)
+		return -1;
+
+	of_property_read_u32(soc, "clock-frequency", &sysfreq);
+	if (sysfreq == -1 || !sysfreq)
+		of_property_read_u32(soc, "bus-frequency", &sysfreq);
+
+	of_node_put(soc);
+	return sysfreq;
+}
+EXPORT_SYMBOL(fsl_get_sys_freq);
+
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
+
+u32 get_brgfreq(void)
+{
+	static u32 brgfreq = -1;
+	struct device_node *node;
+
+	if (brgfreq != -1)
+		return brgfreq;
+
+	node = of_find_compatible_node(NULL, NULL, "fsl,cpm-brg");
+	if (node) {
+		of_property_read_u32(node, "clock-frequency", &brgfreq);
+		of_node_put(node);
+		return brgfreq;
+	}
+
+	/* Legacy device binding -- will go away when no users are left. */
+	node = of_find_node_by_type(NULL, "cpm");
+	if (!node)
+		node = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!node)
+		node = of_find_node_by_type(NULL, "qe");
+
+	if (node) {
+		of_property_read_u32(node, "brg-frequency", &brgfreq);
+		if (brgfreq == -1 || !brgfreq)
+			if (!of_property_read_u32(node, "bus-frequency",
+						  &brgfreq))
+				brgfreq /= 2;
+		of_node_put(node);
+	}
+
+	return brgfreq;
+}
+
+EXPORT_SYMBOL(get_brgfreq);
+
+u32 get_baudrate(void)
+{
+	static u32 fs_baudrate = -1;
+	struct device_node *node;
+
+	if (fs_baudrate != -1)
+		return fs_baudrate;
+
+	node = of_find_node_by_type(NULL, "serial");
+	if (node) {
+		of_property_read_u32(node, "current-speed", &fs_baudrate);
+		of_node_put(node);
+	}
+
+	return fs_baudrate;
+}
+
+EXPORT_SYMBOL(get_baudrate);
+#endif /* CONFIG_CPM2 */
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+static __be32 __iomem *rstcr;
+
+static int fsl_rstcr_restart(struct notifier_block *this,
+			     unsigned long mode, void *cmd)
+{
+	local_irq_disable();
+	/* set reset control register */
+	out_be32(rstcr, 0x2);	/* HRESET_REQ */
+
+	return NOTIFY_DONE;
+}
+
+static int __init setup_rstcr(void)
+{
+	struct device_node *np;
+
+	static struct notifier_block restart_handler = {
+		.notifier_call = fsl_rstcr_restart,
+		.priority = 128,
+	};
+
+	for_each_node_by_name(np, "global-utilities") {
+		if (of_property_read_bool(np, "fsl,has-rstcr")) {
+			rstcr = of_iomap(np, 0) + 0xb0;
+			if (!rstcr) {
+				printk (KERN_ERR "Error: reset control "
+						"register not mapped!\n");
+			} else {
+				register_restart_handler(&restart_handler);
+			}
+			break;
+		}
+	}
+
+	of_node_put(np);
+
+	return 0;
+}
+
+arch_initcall(setup_rstcr);
+
+#endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+struct platform_diu_data_ops diu_ops;
+EXPORT_SYMBOL(diu_ops);
+#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+/*
+ * Restart the current partition
+ *
+ * This function should be assigned to the ppc_md.restart function pointer,
+ * to initiate a partition restart when we're running under the Freescale
+ * hypervisor.
+ */
+void __noreturn fsl_hv_restart(char *cmd)
+{
+	pr_info("hv restart\n");
+	fh_partition_restart(-1);
+	while (1) ;
+}
+
+/*
+ * Halt the current partition
+ *
+ * This function should be assigned to the pm_power_off and ppc_md.halt
+ * function pointers, to shut down the partition when we're running under
+ * the Freescale hypervisor.
+ */
+void __noreturn fsl_hv_halt(void)
+{
+	pr_info("hv exit\n");
+	fh_partition_stop(-1);
+	while (1) ;
+}
+#endif
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
new file mode 100644
index 0000000000..db11b06eb3
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PPC_FSL_SOC_H
+#define __PPC_FSL_SOC_H
+#ifdef __KERNEL__
+
+#include <asm/mmu.h>
+
+struct spi_device;
+
+extern phys_addr_t get_immrbase(void);
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
+extern u32 get_brgfreq(void);
+extern u32 get_baudrate(void);
+#else
+static inline u32 get_brgfreq(void) { return -1; }
+static inline u32 get_baudrate(void) { return -1; }
+#endif
+extern u32 fsl_get_sys_freq(void);
+
+struct spi_board_info;
+struct device_node;
+
+/* The different ports that the DIU can be connected to */
+enum fsl_diu_monitor_port {
+	FSL_DIU_PORT_DVI,	/* DVI */
+	FSL_DIU_PORT_LVDS,	/* Single-link LVDS */
+	FSL_DIU_PORT_DLVDS	/* Dual-link LVDS */
+};
+
+struct platform_diu_data_ops {
+	u32 (*get_pixel_format)(enum fsl_diu_monitor_port port,
+		unsigned int bpp);
+	void (*set_gamma_table)(enum fsl_diu_monitor_port port,
+		char *gamma_table_base);
+	void (*set_monitor_port)(enum fsl_diu_monitor_port port);
+	void (*set_pixel_clock)(unsigned int pixclock);
+	enum fsl_diu_monitor_port (*valid_monitor_port)
+		(enum fsl_diu_monitor_port port);
+	void (*release_bootmem)(void);
+};
+
+extern struct platform_diu_data_ops diu_ops;
+
+void __noreturn fsl_hv_restart(char *cmd);
+void __noreturn fsl_hv_halt(void);
+
+#endif
+#endif
diff --git a/arch/powerpc/sysdev/ge/Makefile b/arch/powerpc/sysdev/ge/Makefile
new file mode 100644
index 0000000000..a63fdb3790
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_GE_FPGA)		+= ge_pic.o
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
new file mode 100644
index 0000000000..a6c424680c
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -0,0 +1,253 @@
+/*
+ * Interrupt handling for GE FPGA based PIC
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * 2008 (c) GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "ge_pic.h"
+
+#define DEBUG
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(KERN_DEBUG "gef_pic: " fmt); } while (0)
+#else
+#define DBG(fmt...) do { } while (0)
+#endif
+
+#define GEF_PIC_NUM_IRQS	32
+
+/* Interrupt Controller Interface Registers */
+#define GEF_PIC_INTR_STATUS	0x0000
+
+#define GEF_PIC_INTR_MASK(cpu)	(0x0010 + (0x4 * cpu))
+#define GEF_PIC_CPU0_INTR_MASK	GEF_PIC_INTR_MASK(0)
+#define GEF_PIC_CPU1_INTR_MASK	GEF_PIC_INTR_MASK(1)
+
+#define GEF_PIC_MCP_MASK(cpu)	(0x0018 + (0x4 * cpu))
+#define GEF_PIC_CPU0_MCP_MASK	GEF_PIC_MCP_MASK(0)
+#define GEF_PIC_CPU1_MCP_MASK	GEF_PIC_MCP_MASK(1)
+
+
+static DEFINE_RAW_SPINLOCK(gef_pic_lock);
+
+static void __iomem *gef_pic_irq_reg_base;
+static struct irq_domain *gef_pic_irq_host;
+static int gef_pic_cascade_irq;
+
+/*
+ * Interrupt Controller Handling
+ *
+ * The interrupt controller handles interrupts for most on board interrupts,
+ * apart from PCI interrupts. For example on SBC610:
+ *
+ * 17:31 RO Reserved
+ * 16    RO PCI Express Doorbell 3 Status
+ * 15    RO PCI Express Doorbell 2 Status
+ * 14    RO PCI Express Doorbell 1 Status
+ * 13    RO PCI Express Doorbell 0 Status
+ * 12    RO Real Time Clock Interrupt Status
+ * 11    RO Temperature Interrupt Status
+ * 10    RO Temperature Critical Interrupt Status
+ * 9     RO Ethernet PHY1 Interrupt Status
+ * 8     RO Ethernet PHY3 Interrupt Status
+ * 7     RO PEX8548 Interrupt Status
+ * 6     RO Reserved
+ * 5     RO Watchdog 0 Interrupt Status
+ * 4     RO Watchdog 1 Interrupt Status
+ * 3     RO AXIS Message FIFO A Interrupt Status
+ * 2     RO AXIS Message FIFO B Interrupt Status
+ * 1     RO AXIS Message FIFO C Interrupt Status
+ * 0     RO AXIS Message FIFO D Interrupt Status
+ *
+ * Interrupts can be forwarded to one of two output lines. Nothing
+ * clever is done, so if the masks are incorrectly set, a single input
+ * interrupt could generate interrupts on both output lines!
+ *
+ * The dual lines are there to allow the chained interrupts to be easily
+ * passed into two different cores. We currently do not use this functionality
+ * in this driver.
+ *
+ * Controller can also be configured to generate Machine checks (MCP), again on
+ * two lines, to be attached to two different cores. It is suggested that these
+ * should be masked out.
+ */
+
+static void gef_pic_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq;
+
+	/*
+	 * See if we actually have an interrupt, call generic handling code if
+	 * we do.
+	 */
+	cascade_irq = gef_pic_get_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void gef_pic_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	u32 mask;
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+	mask &= ~(1 << hwirq);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0), mask);
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+}
+
+static void gef_pic_mask_ack(struct irq_data *d)
+{
+	/* Don't think we actually have to do anything to ack an interrupt,
+	 * we just need to clear down the devices interrupt and it will go away
+	 */
+	gef_pic_mask(d);
+}
+
+static void gef_pic_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	u32 mask;
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+	mask |= (1 << hwirq);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0), mask);
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+}
+
+static struct irq_chip gef_pic_chip = {
+	.name		= "gefp",
+	.irq_mask	= gef_pic_mask,
+	.irq_mask_ack	= gef_pic_mask_ack,
+	.irq_unmask	= gef_pic_unmask,
+};
+
+
+/* When an interrupt is being configured, this call allows some flexibility
+ * in deciding which irq_chip structure is used
+ */
+static int gef_pic_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hwirq)
+{
+	/* All interrupts are LEVEL sensitive */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &gef_pic_chip, handle_level_irq);
+
+	return 0;
+}
+
+static int gef_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1)
+		*out_flags = intspec[1];
+	else
+		*out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+	return 0;
+}
+
+static const struct irq_domain_ops gef_pic_host_ops = {
+	.map	= gef_pic_host_map,
+	.xlate	= gef_pic_host_xlate,
+};
+
+
+/*
+ * Initialisation of PIC, this should be called in BSP
+ */
+void __init gef_pic_init(struct device_node *np)
+{
+	unsigned long flags;
+
+	/* Map the devices registers into memory */
+	gef_pic_irq_reg_base = of_iomap(np, 0);
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+
+	/* Initialise everything as masked. */
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU0_INTR_MASK, 0);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU1_INTR_MASK, 0);
+
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU0_MCP_MASK, 0);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU1_MCP_MASK, 0);
+
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+
+	/* Map controller */
+	gef_pic_cascade_irq = irq_of_parse_and_map(np, 0);
+	if (!gef_pic_cascade_irq) {
+		printk(KERN_ERR "SBC610: failed to map cascade interrupt");
+		return;
+	}
+
+	/* Setup an irq_domain structure */
+	gef_pic_irq_host = irq_domain_add_linear(np, GEF_PIC_NUM_IRQS,
+					  &gef_pic_host_ops, NULL);
+	if (gef_pic_irq_host == NULL)
+		return;
+
+	/* Chain with parent controller */
+	irq_set_chained_handler(gef_pic_cascade_irq, gef_pic_cascade);
+}
+
+/*
+ * This is called when we receive an interrupt with apparently comes from this
+ * chip - check, returning the highest interrupt generated or return 0.
+ */
+unsigned int gef_pic_get_irq(void)
+{
+	u32 cause, mask, active;
+	unsigned int virq = 0;
+	int hwirq;
+
+	cause = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_STATUS);
+
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+
+	active = cause & mask;
+
+	if (active) {
+		for (hwirq = GEF_PIC_NUM_IRQS - 1; hwirq > -1; hwirq--) {
+			if (active & (0x1 << hwirq))
+				break;
+		}
+		virq = irq_linear_revmap(gef_pic_irq_host,
+			(irq_hw_number_t)hwirq);
+	}
+
+	return virq;
+}
+
diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h
new file mode 100644
index 0000000000..923dedba3c
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/ge_pic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GEF_PIC_H__
+#define __GEF_PIC_H__
+
+unsigned int gef_pic_get_irq(void);
+void gef_pic_init(struct device_node *);
+
+#endif /* __GEF_PIC_H__ */
+
diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c
new file mode 100644
index 0000000000..fd2f94a884
--- /dev/null
+++ b/arch/powerpc/sysdev/grackle.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Functions for setting up and using a MPC106 northbridge
+ * Extracted from arch/powerpc/platforms/powermac/pci.c.
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/grackle.h>
+
+#define GRACKLE_CFA(b, d, o)	(0x80 | ((b) << 8) | ((d) << 16) \
+				 | (((o) & ~3) << 24))
+
+#define GRACKLE_PICR1_STG		0x00000040
+#define GRACKLE_PICR1_LOOPSNOOP		0x00000010
+
+/* N.B. this is called before bridges is initialized, so we can't
+   use grackle_pcibios_{read,write}_config_dword. */
+static inline void grackle_set_stg(struct pci_controller* bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_STG) :
+		(val & ~GRACKLE_PICR1_STG);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_LOOPSNOOP) :
+		(val & ~GRACKLE_PICR1_LOOPSNOOP);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+void __init setup_grackle(struct pci_controller *hose)
+{
+	setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
+	if (of_machine_is_compatible("PowerMac1,1"))
+		pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	if (of_machine_is_compatible("AAPL,PowerBook1998"))
+		grackle_set_loop_snoop(hose, 1);
+#if 0	/* Disabled for now, HW problems ??? */
+	grackle_set_stg(hose, 1);
+#endif
+}
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
new file mode 100644
index 0000000000..06e391485d
--- /dev/null
+++ b/arch/powerpc/sysdev/i8259.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * i8259 interrupt controller driver.
+ */
+#undef DEBUG
+
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+
+static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
+
+static unsigned char cached_8259[2] = { 0xff, 0xff };
+#define cached_A1 (cached_8259[0])
+#define cached_21 (cached_8259[1])
+
+static DEFINE_RAW_SPINLOCK(i8259_lock);
+
+static struct irq_domain *i8259_host;
+
+/*
+ * Acknowledge the IRQ using either the PCI host bridge's interrupt
+ * acknowledge feature or poll.  How i8259_init() is called determines
+ * which is called.  It should be noted that polling is broken on some
+ * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
+ */
+unsigned int i8259_irq(void)
+{
+	int irq;
+	int lock = 0;
+
+	/* Either int-ack or poll for the IRQ */
+	if (pci_intack)
+		irq = readb(pci_intack);
+	else {
+		raw_spin_lock(&i8259_lock);
+		lock = 1;
+
+		/* Perform an interrupt acknowledge cycle on controller 1. */
+		outb(0x0C, 0x20);		/* prepare for poll */
+		irq = inb(0x20) & 7;
+		if (irq == 2 ) {
+			/*
+			 * Interrupt is cascaded so perform interrupt
+			 * acknowledge on controller 2.
+			 */
+			outb(0x0C, 0xA0);	/* prepare for poll */
+			irq = (inb(0xA0) & 7) + 8;
+		}
+	}
+
+	if (irq == 7) {
+		/*
+		 * This may be a spurious interrupt.
+		 *
+		 * Read the interrupt status register (ISR). If the most
+		 * significant bit is not set then there is no valid
+		 * interrupt.
+		 */
+		if (!pci_intack)
+			outb(0x0B, 0x20);	/* ISR register */
+		if(~inb(0x20) & 0x80)
+			irq = 0;
+	} else if (irq == 0xff)
+		irq = 0;
+
+	if (lock)
+		raw_spin_unlock(&i8259_lock);
+	return irq;
+}
+
+static void i8259_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq > 7) {
+		cached_A1 |= 1 << (d->irq-8);
+		inb(0xA1); 	/* DUMMY */
+		outb(cached_A1, 0xA1);
+		outb(0x20, 0xA0);	/* Non-specific EOI */
+		outb(0x20, 0x20);	/* Non-specific EOI to cascade */
+	} else {
+		cached_21 |= 1 << d->irq;
+		inb(0x21); 	/* DUMMY */
+		outb(cached_21, 0x21);
+		outb(0x20, 0x20);	/* Non-specific EOI */
+	}
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_set_irq_mask(int irq_nr)
+{
+	outb(cached_A1,0xA1);
+	outb(cached_21,0x21);
+}
+
+static void i8259_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	pr_debug("i8259_mask_irq(%d)\n", d->irq);
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq < 8)
+		cached_21 |= 1 << d->irq;
+	else
+		cached_A1 |= 1 << (d->irq-8);
+	i8259_set_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	pr_debug("i8259_unmask_irq(%d)\n", d->irq);
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq < 8)
+		cached_21 &= ~(1 << d->irq);
+	else
+		cached_A1 &= ~(1 << (d->irq-8));
+	i8259_set_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static struct irq_chip i8259_pic = {
+	.name		= "i8259",
+	.irq_mask	= i8259_mask_irq,
+	.irq_disable	= i8259_mask_irq,
+	.irq_unmask	= i8259_unmask_irq,
+	.irq_mask_ack	= i8259_mask_and_ack_irq,
+};
+
+static struct resource pic1_iores = {
+	.name = "8259 (master)",
+	.start = 0x20,
+	.end = 0x21,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static struct resource pic2_iores = {
+	.name = "8259 (slave)",
+	.start = 0xa0,
+	.end = 0xa1,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static struct resource pic_edgectrl_iores = {
+	.name = "8259 edge control",
+	.start = 0x4d0,
+	.end = 0x4d1,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static int i8259_host_match(struct irq_domain *h, struct device_node *node,
+			    enum irq_domain_bus_token bus_token)
+{
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int i8259_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	pr_debug("i8259_host_map(%d, 0x%lx)\n", virq, hw);
+
+	/* We block the internal cascade */
+	if (hw == 2)
+		irq_set_status_flags(virq, IRQ_NOREQUEST);
+
+	/* We use the level handler only for now, we might want to
+	 * be more cautious here but that works for now
+	 */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &i8259_pic, handle_level_irq);
+	return 0;
+}
+
+static int i8259_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	static unsigned char map_isa_senses[4] = {
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+		IRQ_TYPE_EDGE_RISING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1 && intspec[1] < 4)
+		*out_flags = map_isa_senses[intspec[1]];
+	else
+		*out_flags = IRQ_TYPE_NONE;
+
+	return 0;
+}
+
+static const struct irq_domain_ops i8259_host_ops = {
+	.match = i8259_host_match,
+	.map = i8259_host_map,
+	.xlate = i8259_host_xlate,
+};
+
+struct irq_domain *__init i8259_get_host(void)
+{
+	return i8259_host;
+}
+
+/**
+ * i8259_init - Initialize the legacy controller
+ * @node: device node of the legacy PIC (can be NULL, but then, it will match
+ *        all interrupts, so beware)
+ * @intack_addr: PCI interrupt acknowledge (real) address which will return
+ *             	 the active irq from the 8259
+ */
+void i8259_init(struct device_node *node, unsigned long intack_addr)
+{
+	unsigned long flags;
+
+	/* initialize the controller */
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+
+	/* Mask all first */
+	outb(0xff, 0xA1);
+	outb(0xff, 0x21);
+
+	/* init master interrupt controller */
+	outb(0x11, 0x20); /* Start init sequence */
+	outb(0x00, 0x21); /* Vector base */
+	outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0x21); /* Select 8086 mode */
+
+	/* init slave interrupt controller */
+	outb(0x11, 0xA0); /* Start init sequence */
+	outb(0x08, 0xA1); /* Vector base */
+	outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0xA1); /* Select 8086 mode */
+
+	/* That thing is slow */
+	udelay(100);
+
+	/* always read ISR */
+	outb(0x0B, 0x20);
+	outb(0x0B, 0xA0);
+
+	/* Unmask the internal cascade */
+	cached_21 &= ~(1 << 2);
+
+	/* Set interrupt masks */
+	outb(cached_A1, 0xA1);
+	outb(cached_21, 0x21);
+
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+
+	/* create a legacy host */
+	i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					   &i8259_host_ops, NULL);
+	if (i8259_host == NULL) {
+		printk(KERN_ERR "i8259: failed to allocate irq host !\n");
+		return;
+	}
+
+	/* reserve our resources */
+	/* XXX should we continue doing that ? it seems to cause problems
+	 * with further requesting of PCI IO resources for that range...
+	 * need to look into it.
+	 */
+	request_resource(&ioport_resource, &pic1_iores);
+	request_resource(&ioport_resource, &pic2_iores);
+	request_resource(&ioport_resource, &pic_edgectrl_iores);
+
+	if (intack_addr != 0)
+		pci_intack = ioremap(intack_addr, 1);
+
+	printk(KERN_INFO "i8259 legacy interrupt controller initialized\n");
+}
diff --git a/arch/powerpc/sysdev/indirect_pci.c b/arch/powerpc/sysdev/indirect_pci.c
new file mode 100644
index 0000000000..1aacb403a0
--- /dev/null
+++ b/arch/powerpc/sysdev/indirect_pci.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for indirect PCI bridges.
+ *
+ * Copyright (C) 1998 Gabriel Paubert.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+int __indirect_read_config(struct pci_controller *hose,
+			   unsigned char bus_number, unsigned int devfn,
+			   int offset, int len, u32 *val)
+{
+	volatile void __iomem *cfg_data;
+	u8 cfg_type = 0;
+	u32 bus_no, reg;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) {
+		if (bus_number != hose->first_busno)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		if (devfn != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus_number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE)
+		if (bus_number != hose->first_busno)
+			cfg_type = 1;
+
+	bus_no = (bus_number == hose->first_busno) ?
+			hose->self_busno : bus_number;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG)
+		reg = ((offset & 0xf00) << 16) | (offset & 0xfc);
+	else
+		reg = offset & 0xfc;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN)
+		out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+	else
+		out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + (offset & 3);
+	switch (len) {
+	case 1:
+		*val = in_8(cfg_data);
+		break;
+	case 2:
+		*val = in_le16(cfg_data);
+		break;
+	default:
+		*val = in_le32(cfg_data);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int indirect_read_config(struct pci_bus *bus, unsigned int devfn,
+			 int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	return __indirect_read_config(hose, bus->number, devfn, offset, len,
+				      val);
+}
+
+int indirect_write_config(struct pci_bus *bus, unsigned int devfn,
+			  int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	volatile void __iomem *cfg_data;
+	u8 cfg_type = 0;
+	u32 bus_no, reg;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) {
+		if (bus->number != hose->first_busno)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		if (devfn != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE)
+		if (bus->number != hose->first_busno)
+			cfg_type = 1;
+
+	bus_no = (bus->number == hose->first_busno) ?
+			hose->self_busno : bus->number;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG)
+		reg = ((offset & 0xf00) << 16) | (offset & 0xfc);
+	else
+		reg = offset & 0xfc;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN)
+		out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+	else
+		out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+
+	/* suppress setting of PCI_PRIMARY_BUS */
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS)
+		if ((offset == PCI_PRIMARY_BUS) &&
+			(bus->number == hose->first_busno))
+		val &= 0xffffff00;
+
+	/* Workaround for PCI_28 Errata in 440EPx/GRx */
+	if ((hose->indirect_type & PPC_INDIRECT_TYPE_BROKEN_MRM) &&
+			offset == PCI_CACHE_LINE_SIZE) {
+		val = 0;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + (offset & 3);
+	switch (len) {
+	case 1:
+		out_8(cfg_data, val);
+		break;
+	case 2:
+		out_le16(cfg_data, val);
+		break;
+	default:
+		out_le32(cfg_data, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops indirect_pci_ops =
+{
+	.read = indirect_read_config,
+	.write = indirect_write_config,
+};
+
+void setup_indirect_pci(struct pci_controller *hose, resource_size_t cfg_addr,
+			resource_size_t cfg_data, u32 flags)
+{
+	resource_size_t base = cfg_addr & PAGE_MASK;
+	void __iomem *mbase;
+
+	mbase = ioremap(base, PAGE_SIZE);
+	hose->cfg_addr = mbase + (cfg_addr & ~PAGE_MASK);
+	if ((cfg_data & PAGE_MASK) != base)
+		mbase = ioremap(cfg_data & PAGE_MASK, PAGE_SIZE);
+	hose->cfg_data = mbase + (cfg_data & ~PAGE_MASK);
+	hose->ops = &indirect_pci_ops;
+	hose->indirect_type = flags;
+}
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
new file mode 100644
index 0000000000..5f69e2d50f
--- /dev/null
+++ b/arch/powerpc/sysdev/ipic.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/ipic.c
+ *
+ * IPIC routines implementations.
+ *
+ * Copyright 2005 Freescale Semiconductor, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/syscore_ops.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/fsl_devices.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/ipic.h>
+
+#include "ipic.h"
+
+static struct ipic * primary_ipic;
+static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip;
+static DEFINE_RAW_SPINLOCK(ipic_lock);
+
+static struct ipic_info ipic_info[] = {
+	[1] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 16,
+		.prio_mask = 0,
+	},
+	[2] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 17,
+		.prio_mask = 1,
+	},
+	[3] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 18,
+		.prio_mask = 2,
+	},
+	[4] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 19,
+		.prio_mask = 3,
+	},
+	[5] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 20,
+		.prio_mask = 4,
+	},
+	[6] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 21,
+		.prio_mask = 5,
+	},
+	[7] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 22,
+		.prio_mask = 6,
+	},
+	[8] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 23,
+		.prio_mask = 7,
+	},
+	[9] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 24,
+		.prio_mask = 0,
+	},
+	[10] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 25,
+		.prio_mask = 1,
+	},
+	[11] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 26,
+		.prio_mask = 2,
+	},
+	[12] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 27,
+		.prio_mask = 3,
+	},
+	[13] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 28,
+		.prio_mask = 4,
+	},
+	[14] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 29,
+		.prio_mask = 5,
+	},
+	[15] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 30,
+		.prio_mask = 6,
+	},
+	[16] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 31,
+		.prio_mask = 7,
+	},
+	[17] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 1,
+		.prio_mask = 5,
+	},
+	[18] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 2,
+		.prio_mask = 6,
+	},
+	[19] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 3,
+		.prio_mask = 7,
+	},
+	[20] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 4,
+		.prio_mask = 4,
+	},
+	[21] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 5,
+		.prio_mask = 5,
+	},
+	[22] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 6,
+		.prio_mask = 6,
+	},
+	[23] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 7,
+		.prio_mask = 7,
+	},
+	[32] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 0,
+		.prio_mask = 0,
+	},
+	[33] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 1,
+		.prio_mask = 1,
+	},
+	[34] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 2,
+		.prio_mask = 2,
+	},
+	[35] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 3,
+		.prio_mask = 3,
+	},
+	[36] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 4,
+		.prio_mask = 4,
+	},
+	[37] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 5,
+		.prio_mask = 5,
+	},
+	[38] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 6,
+		.prio_mask = 6,
+	},
+	[39] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 7,
+		.prio_mask = 7,
+	},
+	[40] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 8,
+		.prio_mask = 0,
+	},
+	[41] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 9,
+		.prio_mask = 1,
+	},
+	[42] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 10,
+		.prio_mask = 2,
+	},
+	[43] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 11,
+		.prio_mask = 3,
+	},
+	[44] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 12,
+		.prio_mask = 4,
+	},
+	[45] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 13,
+		.prio_mask = 5,
+	},
+	[46] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 14,
+		.prio_mask = 6,
+	},
+	[47] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 15,
+		.prio_mask = 7,
+	},
+	[48] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 0,
+		.prio_mask = 4,
+	},
+	[64] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 0,
+		.prio_mask = 0,
+	},
+	[65] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 1,
+		.prio_mask = 1,
+	},
+	[66] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 2,
+		.prio_mask = 2,
+	},
+	[67] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 3,
+		.prio_mask = 3,
+	},
+	[68] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 4,
+		.prio_mask = 0,
+	},
+	[69] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 5,
+		.prio_mask = 1,
+	},
+	[70] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 6,
+		.prio_mask = 2,
+	},
+	[71] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 7,
+		.prio_mask = 3,
+	},
+	[72] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 8,
+	},
+	[73] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 9,
+	},
+	[74] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 10,
+	},
+	[75] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 11,
+	},
+	[76] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 12,
+	},
+	[77] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 13,
+	},
+	[78] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 14,
+	},
+	[79] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 15,
+	},
+	[80] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 16,
+	},
+	[81] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 17,
+	},
+	[82] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 18,
+	},
+	[83] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 19,
+	},
+	[84] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 20,
+	},
+	[85] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 21,
+	},
+	[86] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 22,
+	},
+	[87] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 23,
+	},
+	[88] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 24,
+	},
+	[89] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 25,
+	},
+	[90] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 26,
+	},
+	[91] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 27,
+	},
+	[94] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 30,
+	},
+};
+
+static inline u32 ipic_read(volatile u32 __iomem *base, unsigned int reg)
+{
+	return in_be32(base + (reg >> 2));
+}
+
+static inline void ipic_write(volatile u32 __iomem *base, unsigned int reg, u32 value)
+{
+	out_be32(base + (reg >> 2), value);
+}
+
+static inline struct ipic * ipic_from_irq(unsigned int virq)
+{
+	return primary_ipic;
+}
+
+static void ipic_unmask_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp |= (1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_mask_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp &= ~(1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	/* mb() can't guarantee that masking is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_ack_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = 1 << (31 - ipic_info[src].bit);
+	ipic_write(ipic->regs, ipic_info[src].ack, temp);
+
+	/* mb() can't guarantee that ack is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_mask_irq_and_ack(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp &= ~(1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	temp = 1 << (31 - ipic_info[src].bit);
+	ipic_write(ipic->regs, ipic_info[src].ack, temp);
+
+	/* mb() can't guarantee that ack is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vold, vnew, edibit;
+
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_LEVEL_LOW;
+
+	/* ipic supports only low assertion and high-to-low change senses
+	 */
+	if (!(flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING))) {
+		printk(KERN_ERR "ipic: sense type 0x%x not supported\n",
+			flow_type);
+		return -EINVAL;
+	}
+	/* ipic supports only edge mode on external interrupts */
+	if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !ipic_info[src].ack) {
+		printk(KERN_ERR "ipic: edge sense not supported on internal "
+				"interrupts\n");
+		return -EINVAL;
+
+	}
+
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & IRQ_TYPE_LEVEL_LOW)  {
+		irq_set_handler_locked(d, handle_level_irq);
+		d->chip = &ipic_level_irq_chip;
+	} else {
+		irq_set_handler_locked(d, handle_edge_irq);
+		d->chip = &ipic_edge_irq_chip;
+	}
+
+	/* only EXT IRQ senses are programmable on ipic
+	 * internal IRQ senses are LEVEL_LOW
+	 */
+	if (src == IPIC_IRQ_EXT0)
+		edibit = 15;
+	else
+		if (src >= IPIC_IRQ_EXT1 && src <= IPIC_IRQ_EXT7)
+			edibit = (14 - (src - IPIC_IRQ_EXT1));
+		else
+			return (flow_type & IRQ_TYPE_LEVEL_LOW) ? 0 : -EINVAL;
+
+	vold = ipic_read(ipic->regs, IPIC_SECNR);
+	if ((flow_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_FALLING) {
+		vnew = vold | (1 << edibit);
+	} else {
+		vnew = vold & ~(1 << edibit);
+	}
+	if (vold != vnew)
+		ipic_write(ipic->regs, IPIC_SECNR, vnew);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/* level interrupts and edge interrupts have different ack operations */
+static struct irq_chip ipic_level_irq_chip = {
+	.name		= "IPIC",
+	.irq_unmask	= ipic_unmask_irq,
+	.irq_mask	= ipic_mask_irq,
+	.irq_mask_ack	= ipic_mask_irq,
+	.irq_set_type	= ipic_set_irq_type,
+};
+
+static struct irq_chip ipic_edge_irq_chip = {
+	.name		= "IPIC",
+	.irq_unmask	= ipic_unmask_irq,
+	.irq_mask	= ipic_mask_irq,
+	.irq_mask_ack	= ipic_mask_irq_and_ack,
+	.irq_ack	= ipic_ack_irq,
+	.irq_set_type	= ipic_set_irq_type,
+};
+
+static int ipic_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless ipic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int ipic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ipic *ipic = h->host_data;
+
+	irq_set_chip_data(virq, ipic);
+	irq_set_chip_and_handler(virq, &ipic_level_irq_chip, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static const struct irq_domain_ops ipic_host_ops = {
+	.match	= ipic_host_match,
+	.map	= ipic_host_map,
+	.xlate	= irq_domain_xlate_onetwocell,
+};
+
+struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
+{
+	struct ipic	*ipic;
+	struct resource res;
+	u32 temp = 0, ret;
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret)
+		return NULL;
+
+	ipic = kzalloc(sizeof(*ipic), GFP_KERNEL);
+	if (ipic == NULL)
+		return NULL;
+
+	ipic->irqhost = irq_domain_add_linear(node, NR_IPIC_INTS,
+					      &ipic_host_ops, ipic);
+	if (ipic->irqhost == NULL) {
+		kfree(ipic);
+		return NULL;
+	}
+
+	ipic->regs = ioremap(res.start, resource_size(&res));
+
+	/* init hw */
+	ipic_write(ipic->regs, IPIC_SICNR, 0x0);
+
+	/* default priority scheme is grouped. If spread mode is required
+	 * configure SICFR accordingly */
+	if (flags & IPIC_SPREADMODE_GRP_A)
+		temp |= SICFR_IPSA;
+	if (flags & IPIC_SPREADMODE_GRP_B)
+		temp |= SICFR_IPSB;
+	if (flags & IPIC_SPREADMODE_GRP_C)
+		temp |= SICFR_IPSC;
+	if (flags & IPIC_SPREADMODE_GRP_D)
+		temp |= SICFR_IPSD;
+	if (flags & IPIC_SPREADMODE_MIX_A)
+		temp |= SICFR_MPSA;
+	if (flags & IPIC_SPREADMODE_MIX_B)
+		temp |= SICFR_MPSB;
+
+	ipic_write(ipic->regs, IPIC_SICFR, temp);
+
+	/* handle MCP route */
+	temp = 0;
+	if (flags & IPIC_DISABLE_MCP_OUT)
+		temp = SERCR_MCPR;
+	ipic_write(ipic->regs, IPIC_SERCR, temp);
+
+	/* handle routing of IRQ0 to MCP */
+	temp = ipic_read(ipic->regs, IPIC_SEMSR);
+
+	if (flags & IPIC_IRQ0_MCP)
+		temp |= SEMSR_SIRQ0;
+	else
+		temp &= ~SEMSR_SIRQ0;
+
+	ipic_write(ipic->regs, IPIC_SEMSR, temp);
+
+	primary_ipic = ipic;
+	irq_set_default_host(primary_ipic->irqhost);
+
+	ipic_write(ipic->regs, IPIC_SIMSR_H, 0);
+	ipic_write(ipic->regs, IPIC_SIMSR_L, 0);
+
+	printk ("IPIC (%d IRQ sources) at %p\n", NR_IPIC_INTS,
+			primary_ipic->regs);
+
+	return ipic;
+}
+
+void __init ipic_set_default_priority(void)
+{
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_C, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_D, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SMPRR_A, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SMPRR_B, IPIC_PRIORITY_DEFAULT);
+}
+
+u32 ipic_get_mcp_status(void)
+{
+	return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
+}
+
+void ipic_clear_mcp_status(u32 mask)
+{
+	ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
+}
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int ipic_get_irq(void)
+{
+	int irq;
+
+	BUG_ON(primary_ipic == NULL);
+
+#define IPIC_SIVCR_VECTOR_MASK	0x7f
+	irq = ipic_read(primary_ipic->regs, IPIC_SIVCR) & IPIC_SIVCR_VECTOR_MASK;
+
+	if (irq == 0)    /* 0 --> no irq is pending */
+		return 0;
+
+	return irq_linear_revmap(primary_ipic->irqhost, irq);
+}
+
+#ifdef CONFIG_SUSPEND
+static struct {
+	u32 sicfr;
+	u32 siprr[2];
+	u32 simsr[2];
+	u32 sicnr;
+	u32 smprr[2];
+	u32 semsr;
+	u32 secnr;
+	u32 sermr;
+	u32 sercr;
+} ipic_saved_state;
+
+static int ipic_suspend(void)
+{
+	struct ipic *ipic = primary_ipic;
+
+	ipic_saved_state.sicfr = ipic_read(ipic->regs, IPIC_SICFR);
+	ipic_saved_state.siprr[0] = ipic_read(ipic->regs, IPIC_SIPRR_A);
+	ipic_saved_state.siprr[1] = ipic_read(ipic->regs, IPIC_SIPRR_D);
+	ipic_saved_state.simsr[0] = ipic_read(ipic->regs, IPIC_SIMSR_H);
+	ipic_saved_state.simsr[1] = ipic_read(ipic->regs, IPIC_SIMSR_L);
+	ipic_saved_state.sicnr = ipic_read(ipic->regs, IPIC_SICNR);
+	ipic_saved_state.smprr[0] = ipic_read(ipic->regs, IPIC_SMPRR_A);
+	ipic_saved_state.smprr[1] = ipic_read(ipic->regs, IPIC_SMPRR_B);
+	ipic_saved_state.semsr = ipic_read(ipic->regs, IPIC_SEMSR);
+	ipic_saved_state.secnr = ipic_read(ipic->regs, IPIC_SECNR);
+	ipic_saved_state.sermr = ipic_read(ipic->regs, IPIC_SERMR);
+	ipic_saved_state.sercr = ipic_read(ipic->regs, IPIC_SERCR);
+
+	if (fsl_deep_sleep()) {
+		/* In deep sleep, make sure there can be no
+		 * pending interrupts, as this can cause
+		 * problems on 831x.
+		 */
+		ipic_write(ipic->regs, IPIC_SIMSR_H, 0);
+		ipic_write(ipic->regs, IPIC_SIMSR_L, 0);
+		ipic_write(ipic->regs, IPIC_SEMSR, 0);
+		ipic_write(ipic->regs, IPIC_SERMR, 0);
+	}
+
+	return 0;
+}
+
+static void ipic_resume(void)
+{
+	struct ipic *ipic = primary_ipic;
+
+	ipic_write(ipic->regs, IPIC_SICFR, ipic_saved_state.sicfr);
+	ipic_write(ipic->regs, IPIC_SIPRR_A, ipic_saved_state.siprr[0]);
+	ipic_write(ipic->regs, IPIC_SIPRR_D, ipic_saved_state.siprr[1]);
+	ipic_write(ipic->regs, IPIC_SIMSR_H, ipic_saved_state.simsr[0]);
+	ipic_write(ipic->regs, IPIC_SIMSR_L, ipic_saved_state.simsr[1]);
+	ipic_write(ipic->regs, IPIC_SICNR, ipic_saved_state.sicnr);
+	ipic_write(ipic->regs, IPIC_SMPRR_A, ipic_saved_state.smprr[0]);
+	ipic_write(ipic->regs, IPIC_SMPRR_B, ipic_saved_state.smprr[1]);
+	ipic_write(ipic->regs, IPIC_SEMSR, ipic_saved_state.semsr);
+	ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr);
+	ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr);
+	ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr);
+}
+#else
+#define ipic_suspend NULL
+#define ipic_resume NULL
+#endif
+
+static struct syscore_ops ipic_syscore_ops = {
+	.suspend = ipic_suspend,
+	.resume = ipic_resume,
+};
+
+static int __init init_ipic_syscore(void)
+{
+	if (!primary_ipic || !primary_ipic->regs)
+		return -ENODEV;
+
+	printk(KERN_DEBUG "Registering ipic system core operations\n");
+	register_syscore_ops(&ipic_syscore_ops);
+
+	return 0;
+}
+
+subsys_initcall(init_ipic_syscore);
diff --git a/arch/powerpc/sysdev/ipic.h b/arch/powerpc/sysdev/ipic.h
new file mode 100644
index 0000000000..45ab614068
--- /dev/null
+++ b/arch/powerpc/sysdev/ipic.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IPIC private definitions and structure.
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2005 Freescale Semiconductor, Inc
+ */
+#ifndef __IPIC_H__
+#define __IPIC_H__
+
+#include <asm/ipic.h>
+
+#define NR_IPIC_INTS 128
+
+/* External IRQS */
+#define IPIC_IRQ_EXT0 48
+#define IPIC_IRQ_EXT1 17
+#define IPIC_IRQ_EXT7 23
+
+/* Default Priority Registers */
+#define IPIC_PRIORITY_DEFAULT 0x05309770
+
+/* System Global Interrupt Configuration Register */
+#define	SICFR_IPSA	0x00010000
+#define	SICFR_IPSB	0x00020000
+#define	SICFR_IPSC	0x00040000
+#define	SICFR_IPSD	0x00080000
+#define	SICFR_MPSA	0x00200000
+#define	SICFR_MPSB	0x00400000
+
+/* System External Interrupt Mask Register */
+#define	SEMSR_SIRQ0	0x00008000
+
+/* System Error Control Register */
+#define SERCR_MCPR	0x00000001
+
+struct ipic {
+	volatile u32 __iomem	*regs;
+
+	/* The remapper for this IPIC */
+	struct irq_domain		*irqhost;
+};
+
+struct ipic_info {
+	u8	ack;		/* pending register offset from base if the irq
+				   supports ack operation */
+	u8	mask;		/* mask register offset from base */
+	u8	prio;		/* priority register offset from base */
+	u8	force;		/* force register offset from base */
+	u8	bit;		/* register bit position (as per doc)
+				   bit mask = 1 << (31 - bit) */
+	u8	prio_mask;	/* priority mask value */
+};
+
+#endif /* __IPIC_H__ */
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
new file mode 100644
index 0000000000..eb48210ef9
--- /dev/null
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * memory mapped NVRAM
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Authors : Utz Bacher <utz.bacher@de.ibm.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+static void __iomem *mmio_nvram_start;
+static long mmio_nvram_len;
+static DEFINE_SPINLOCK(mmio_nvram_lock);
+
+static ssize_t mmio_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned long flags;
+
+	if (*index >= mmio_nvram_len)
+		return 0;
+	if (*index + count > mmio_nvram_len)
+		count = mmio_nvram_len - *index;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	memcpy_fromio(buf, mmio_nvram_start + *index, count);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	
+	*index += count;
+	return count;
+}
+
+static unsigned char mmio_nvram_read_val(int addr)
+{
+	unsigned long flags;
+	unsigned char val;
+
+	if (addr >= mmio_nvram_len)
+		return 0xff;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	val = ioread8(mmio_nvram_start + addr);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+
+	return val;
+}
+
+static ssize_t mmio_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned long flags;
+
+	if (*index >= mmio_nvram_len)
+		return 0;
+	if (*index + count > mmio_nvram_len)
+		count = mmio_nvram_len - *index;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	memcpy_toio(mmio_nvram_start + *index, buf, count);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	
+	*index += count;
+	return count;
+}
+
+static void mmio_nvram_write_val(int addr, unsigned char val)
+{
+	unsigned long flags;
+
+	if (addr < mmio_nvram_len) {
+		spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+		iowrite8(val, mmio_nvram_start + addr);
+
+		spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	}
+}
+
+static ssize_t mmio_nvram_get_size(void)
+{
+	return mmio_nvram_len;
+}
+
+int __init mmio_nvram_init(void)
+{
+	struct device_node *nvram_node;
+	unsigned long nvram_addr;
+	struct resource r;
+	int ret;
+
+	nvram_node = of_find_node_by_type(NULL, "nvram");
+	if (!nvram_node)
+		nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
+	if (!nvram_node) {
+		printk(KERN_WARNING "nvram: no node found in device-tree\n");
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(nvram_node, 0, &r);
+	if (ret) {
+		printk(KERN_WARNING "nvram: failed to get address (err %d)\n",
+		       ret);
+		goto out;
+	}
+	nvram_addr = r.start;
+	mmio_nvram_len = resource_size(&r);
+	if ( (!mmio_nvram_len) || (!nvram_addr) ) {
+		printk(KERN_WARNING "nvram: address or length is 0\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	mmio_nvram_start = ioremap(nvram_addr, mmio_nvram_len);
+	if (!mmio_nvram_start) {
+		printk(KERN_WARNING "nvram: failed to ioremap\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	printk(KERN_INFO "mmio NVRAM, %luk at 0x%lx mapped to %p\n",
+	       mmio_nvram_len >> 10, nvram_addr, mmio_nvram_start);
+
+	ppc_md.nvram_read_val	= mmio_nvram_read_val;
+	ppc_md.nvram_write_val	= mmio_nvram_write_val;
+	ppc_md.nvram_read	= mmio_nvram_read;
+	ppc_md.nvram_write	= mmio_nvram_write;
+	ppc_md.nvram_size	= mmio_nvram_get_size;
+
+out:
+	of_node_put(nvram_node);
+	return ret;
+}
diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c
new file mode 100644
index 0000000000..58cee28e23
--- /dev/null
+++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/property.h>
+
+#include <asm/mpc5xxx.h>
+
+/**
+ * mpc5xxx_fwnode_get_bus_frequency - Find the bus frequency for a firmware node
+ * @fwnode:	firmware node
+ *
+ * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx),
+ * or 0 if the bus frequency cannot be found.
+ */
+unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+	u32 bus_freq;
+	int ret;
+
+	ret = fwnode_property_read_u32(fwnode, "bus-frequency", &bus_freq);
+	if (!ret)
+		return bus_freq;
+
+	fwnode_for_each_parent_node(fwnode, parent) {
+		ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq);
+		if (!ret) {
+			fwnode_handle_put(parent);
+			return bus_freq;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc5xxx_fwnode_get_bus_frequency);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
new file mode 100644
index 0000000000..ba287abcb0
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic.c
@@ -0,0 +1,2019 @@
+/*
+ *  arch/powerpc/kernel/mpic.c
+ *
+ *  Driver for interrupt controllers following the OpenPIC standard, the
+ *  common implementation being IBM's MPIC. This driver also can deal
+ *  with various broken implementations of this HW.
+ *
+ *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *  Copyright 2010-2012 Freescale Semiconductor, Inc.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#undef DEBUG
+#undef DEBUG_IPI
+#undef DEBUG_IRQ
+#undef DEBUG_LOW
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <linux/ratelimit.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/ptrace.h>
+#include <asm/signal.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+
+#include "mpic.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct bus_type mpic_subsys = {
+	.name = "mpic",
+	.dev_name = "mpic",
+};
+EXPORT_SYMBOL_GPL(mpic_subsys);
+
+static struct mpic *mpics;
+static struct mpic *mpic_primary;
+static DEFINE_RAW_SPINLOCK(mpic_lock);
+
+#ifdef CONFIG_PPC32	/* XXX for now */
+#ifdef CONFIG_IRQ_ALL_CPUS
+#define distribute_irqs	(1)
+#else
+#define distribute_irqs	(0)
+#endif
+#endif
+
+#ifdef CONFIG_MPIC_WEIRD
+static u32 mpic_infos[][MPIC_IDX_END] = {
+	[0] = {	/* Original OpenPIC compatible MPIC */
+		MPIC_GREG_BASE,
+		MPIC_GREG_FEATURE_0,
+		MPIC_GREG_GLOBAL_CONF_0,
+		MPIC_GREG_VENDOR_ID,
+		MPIC_GREG_IPI_VECTOR_PRI_0,
+		MPIC_GREG_IPI_STRIDE,
+		MPIC_GREG_SPURIOUS,
+		MPIC_GREG_TIMER_FREQ,
+
+		MPIC_TIMER_BASE,
+		MPIC_TIMER_STRIDE,
+		MPIC_TIMER_CURRENT_CNT,
+		MPIC_TIMER_BASE_CNT,
+		MPIC_TIMER_VECTOR_PRI,
+		MPIC_TIMER_DESTINATION,
+
+		MPIC_CPU_BASE,
+		MPIC_CPU_STRIDE,
+		MPIC_CPU_IPI_DISPATCH_0,
+		MPIC_CPU_IPI_DISPATCH_STRIDE,
+		MPIC_CPU_CURRENT_TASK_PRI,
+		MPIC_CPU_WHOAMI,
+		MPIC_CPU_INTACK,
+		MPIC_CPU_EOI,
+		MPIC_CPU_MCACK,
+
+		MPIC_IRQ_BASE,
+		MPIC_IRQ_STRIDE,
+		MPIC_IRQ_VECTOR_PRI,
+		MPIC_VECPRI_VECTOR_MASK,
+		MPIC_VECPRI_POLARITY_POSITIVE,
+		MPIC_VECPRI_POLARITY_NEGATIVE,
+		MPIC_VECPRI_SENSE_LEVEL,
+		MPIC_VECPRI_SENSE_EDGE,
+		MPIC_VECPRI_POLARITY_MASK,
+		MPIC_VECPRI_SENSE_MASK,
+		MPIC_IRQ_DESTINATION
+	},
+	[1] = {	/* Tsi108/109 PIC */
+		TSI108_GREG_BASE,
+		TSI108_GREG_FEATURE_0,
+		TSI108_GREG_GLOBAL_CONF_0,
+		TSI108_GREG_VENDOR_ID,
+		TSI108_GREG_IPI_VECTOR_PRI_0,
+		TSI108_GREG_IPI_STRIDE,
+		TSI108_GREG_SPURIOUS,
+		TSI108_GREG_TIMER_FREQ,
+
+		TSI108_TIMER_BASE,
+		TSI108_TIMER_STRIDE,
+		TSI108_TIMER_CURRENT_CNT,
+		TSI108_TIMER_BASE_CNT,
+		TSI108_TIMER_VECTOR_PRI,
+		TSI108_TIMER_DESTINATION,
+
+		TSI108_CPU_BASE,
+		TSI108_CPU_STRIDE,
+		TSI108_CPU_IPI_DISPATCH_0,
+		TSI108_CPU_IPI_DISPATCH_STRIDE,
+		TSI108_CPU_CURRENT_TASK_PRI,
+		TSI108_CPU_WHOAMI,
+		TSI108_CPU_INTACK,
+		TSI108_CPU_EOI,
+		TSI108_CPU_MCACK,
+
+		TSI108_IRQ_BASE,
+		TSI108_IRQ_STRIDE,
+		TSI108_IRQ_VECTOR_PRI,
+		TSI108_VECPRI_VECTOR_MASK,
+		TSI108_VECPRI_POLARITY_POSITIVE,
+		TSI108_VECPRI_POLARITY_NEGATIVE,
+		TSI108_VECPRI_SENSE_LEVEL,
+		TSI108_VECPRI_SENSE_EDGE,
+		TSI108_VECPRI_POLARITY_MASK,
+		TSI108_VECPRI_SENSE_MASK,
+		TSI108_IRQ_DESTINATION
+	},
+};
+
+#define MPIC_INFO(name) mpic->hw_set[MPIC_IDX_##name]
+
+#else /* CONFIG_MPIC_WEIRD */
+
+#define MPIC_INFO(name) MPIC_##name
+
+#endif /* CONFIG_MPIC_WEIRD */
+
+static inline unsigned int mpic_processor_id(struct mpic *mpic)
+{
+	unsigned int cpu = 0;
+
+	if (!(mpic->flags & MPIC_SECONDARY))
+		cpu = hard_smp_processor_id();
+
+	return cpu;
+}
+
+/*
+ * Register accessor functions
+ */
+
+
+static inline u32 _mpic_read(enum mpic_reg_type type,
+			     struct mpic_reg_bank *rb,
+			     unsigned int reg)
+{
+	switch(type) {
+#ifdef CONFIG_PPC_DCR
+	case mpic_access_dcr:
+		return dcr_read(rb->dhost, reg);
+#endif
+	case mpic_access_mmio_be:
+		return in_be32(rb->base + (reg >> 2));
+	case mpic_access_mmio_le:
+	default:
+		return in_le32(rb->base + (reg >> 2));
+	}
+}
+
+static inline void _mpic_write(enum mpic_reg_type type,
+			       struct mpic_reg_bank *rb,
+ 			       unsigned int reg, u32 value)
+{
+	switch(type) {
+#ifdef CONFIG_PPC_DCR
+	case mpic_access_dcr:
+		dcr_write(rb->dhost, reg, value);
+		break;
+#endif
+	case mpic_access_mmio_be:
+		out_be32(rb->base + (reg >> 2), value);
+		break;
+	case mpic_access_mmio_le:
+	default:
+		out_le32(rb->base + (reg >> 2), value);
+		break;
+	}
+}
+
+static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi)
+{
+	enum mpic_reg_type type = mpic->reg_type;
+	unsigned int offset = MPIC_INFO(GREG_IPI_VECTOR_PRI_0) +
+			      (ipi * MPIC_INFO(GREG_IPI_STRIDE));
+
+	if ((mpic->flags & MPIC_BROKEN_IPI) && type == mpic_access_mmio_le)
+		type = mpic_access_mmio_be;
+	return _mpic_read(type, &mpic->gregs, offset);
+}
+
+static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value)
+{
+	unsigned int offset = MPIC_INFO(GREG_IPI_VECTOR_PRI_0) +
+			      (ipi * MPIC_INFO(GREG_IPI_STRIDE));
+
+	_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
+}
+
+static inline unsigned int mpic_tm_offset(struct mpic *mpic, unsigned int tm)
+{
+	return (tm >> 2) * MPIC_TIMER_GROUP_STRIDE +
+	       (tm & 3) * MPIC_INFO(TIMER_STRIDE);
+}
+
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+	unsigned int offset = mpic_tm_offset(mpic, tm) +
+			      MPIC_INFO(TIMER_VECTOR_PRI);
+
+	return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+	unsigned int offset = mpic_tm_offset(mpic, tm) +
+			      MPIC_INFO(TIMER_VECTOR_PRI);
+
+	_mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
+static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
+{
+	unsigned int cpu = mpic_processor_id(mpic);
+
+	return _mpic_read(mpic->reg_type, &mpic->cpuregs[cpu], reg);
+}
+
+static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
+{
+	unsigned int cpu = mpic_processor_id(mpic);
+
+	_mpic_write(mpic->reg_type, &mpic->cpuregs[cpu], reg, value);
+}
+
+static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+	unsigned int	val;
+
+	val = _mpic_read(mpic->reg_type, &mpic->isus[isu],
+			 reg + (idx * MPIC_INFO(IRQ_STRIDE)));
+#ifdef CONFIG_MPIC_BROKEN_REGREAD
+	if (reg == 0)
+		val = (val & (MPIC_VECPRI_MASK | MPIC_VECPRI_ACTIVITY)) |
+			mpic->isu_reg0_shadow[src_no];
+#endif
+	return val;
+}
+
+static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
+				   unsigned int reg, u32 value)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+
+	_mpic_write(mpic->reg_type, &mpic->isus[isu],
+		    reg + (idx * MPIC_INFO(IRQ_STRIDE)), value);
+
+#ifdef CONFIG_MPIC_BROKEN_REGREAD
+	if (reg == 0)
+		mpic->isu_reg0_shadow[src_no] =
+			value & ~(MPIC_VECPRI_MASK | MPIC_VECPRI_ACTIVITY);
+#endif
+}
+
+#define mpic_read(b,r)		_mpic_read(mpic->reg_type,&(b),(r))
+#define mpic_write(b,r,v)	_mpic_write(mpic->reg_type,&(b),(r),(v))
+#define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
+#define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i)		_mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v)	_mpic_tm_write(mpic,(i),(v))
+#define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
+#define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
+#define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
+#define mpic_irq_write(s,r,v)	_mpic_irq_write(mpic,(s),(r),(v))
+
+
+/*
+ * Low level utility functions
+ */
+
+
+static void _mpic_map_mmio(struct mpic *mpic, phys_addr_t phys_addr,
+			   struct mpic_reg_bank *rb, unsigned int offset,
+			   unsigned int size)
+{
+	rb->base = ioremap(phys_addr + offset, size);
+	BUG_ON(rb->base == NULL);
+}
+
+#ifdef CONFIG_PPC_DCR
+static void _mpic_map_dcr(struct mpic *mpic, struct mpic_reg_bank *rb,
+			  unsigned int offset, unsigned int size)
+{
+	phys_addr_t phys_addr = dcr_resource_start(mpic->node, 0);
+	rb->dhost = dcr_map(mpic->node, phys_addr + offset, size);
+	BUG_ON(!DCR_MAP_OK(rb->dhost));
+}
+
+static inline void mpic_map(struct mpic *mpic,
+			    phys_addr_t phys_addr, struct mpic_reg_bank *rb,
+			    unsigned int offset, unsigned int size)
+{
+	if (mpic->flags & MPIC_USES_DCR)
+		_mpic_map_dcr(mpic, rb, offset, size);
+	else
+		_mpic_map_mmio(mpic, phys_addr, rb, offset, size);
+}
+#else /* CONFIG_PPC_DCR */
+#define mpic_map(m,p,b,o,s)	_mpic_map_mmio(m,p,b,o,s)
+#endif /* !CONFIG_PPC_DCR */
+
+
+
+/* Check if we have one of those nice broken MPICs with a flipped endian on
+ * reads from IPI registers
+ */
+static void __init mpic_test_broken_ipi(struct mpic *mpic)
+{
+	u32 r;
+
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0), MPIC_VECPRI_MASK);
+	r = mpic_read(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0));
+
+	if (r == le32_to_cpu(MPIC_VECPRI_MASK)) {
+		printk(KERN_INFO "mpic: Detected reversed IPI registers\n");
+		mpic->flags |= MPIC_BROKEN_IPI;
+	}
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+
+/* Test if an interrupt is sourced from HyperTransport (used on broken U3s)
+ * to force the edge setting on the MPIC and do the ack workaround.
+ */
+static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	if (source >= 128 || !mpic->fixups)
+		return 0;
+	return mpic->fixups[source].base != NULL;
+}
+
+
+static inline void mpic_ht_end_irq(struct mpic *mpic, unsigned int source)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+
+	if (fixup->applebase) {
+		unsigned int soff = (fixup->index >> 3) & ~3;
+		unsigned int mask = 1U << (fixup->index & 0x1f);
+		writel(mask, fixup->applebase + soff);
+	} else {
+		raw_spin_lock(&mpic->fixup_lock);
+		writeb(0x11 + 2 * fixup->index, fixup->base + 2);
+		writel(fixup->data, fixup->base + 4);
+		raw_spin_unlock(&mpic->fixup_lock);
+	}
+}
+
+static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source,
+				      bool level)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+	unsigned long flags;
+	u32 tmp;
+
+	if (fixup->base == NULL)
+		return;
+
+	DBG("startup_ht_interrupt(0x%x) index: %d\n",
+	    source, fixup->index);
+	raw_spin_lock_irqsave(&mpic->fixup_lock, flags);
+	/* Enable and configure */
+	writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+	tmp = readl(fixup->base + 4);
+	tmp &= ~(0x23U);
+	if (level)
+		tmp |= 0x22;
+	writel(tmp, fixup->base + 4);
+	raw_spin_unlock_irqrestore(&mpic->fixup_lock, flags);
+
+#ifdef CONFIG_PM
+	/* use the lowest bit inverted to the actual HW,
+	 * set if this fixup was enabled, clear otherwise */
+	mpic->save_data[source].fixup_data = tmp | 1;
+#endif
+}
+
+static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+	unsigned long flags;
+	u32 tmp;
+
+	if (fixup->base == NULL)
+		return;
+
+	DBG("shutdown_ht_interrupt(0x%x)\n", source);
+
+	/* Disable */
+	raw_spin_lock_irqsave(&mpic->fixup_lock, flags);
+	writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+	tmp = readl(fixup->base + 4);
+	tmp |= 1;
+	writel(tmp, fixup->base + 4);
+	raw_spin_unlock_irqrestore(&mpic->fixup_lock, flags);
+
+#ifdef CONFIG_PM
+	/* use the lowest bit inverted to the actual HW,
+	 * set if this fixup was enabled, clear otherwise */
+	mpic->save_data[source].fixup_data = tmp & ~1;
+#endif
+}
+
+#ifdef CONFIG_PCI_MSI
+static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn)
+{
+	u8 __iomem *base;
+	u8 pos, flags;
+	u64 addr = 0;
+
+	for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0;
+	     pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) {
+		u8 id = readb(devbase + pos + PCI_CAP_LIST_ID);
+		if (id == PCI_CAP_ID_HT) {
+			id = readb(devbase + pos + 3);
+			if ((id & HT_5BIT_CAP_MASK) == HT_CAPTYPE_MSI_MAPPING)
+				break;
+		}
+	}
+
+	if (pos == 0)
+		return;
+
+	base = devbase + pos;
+
+	flags = readb(base + HT_MSI_FLAGS);
+	if (!(flags & HT_MSI_FLAGS_FIXED)) {
+		addr = readl(base + HT_MSI_ADDR_LO) & HT_MSI_ADDR_LO_MASK;
+		addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32);
+	}
+
+	printk(KERN_DEBUG "mpic:   - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
+		PCI_SLOT(devfn), PCI_FUNC(devfn),
+		flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr);
+
+	if (!(flags & HT_MSI_FLAGS_ENABLE))
+		writeb(flags | HT_MSI_FLAGS_ENABLE, base + HT_MSI_FLAGS);
+}
+#else
+static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn)
+{
+	return;
+}
+#endif
+
+static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn, u32 vdid)
+{
+	int i, irq, n;
+	u8 __iomem *base;
+	u32 tmp;
+	u8 pos;
+
+	for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0;
+	     pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) {
+		u8 id = readb(devbase + pos + PCI_CAP_LIST_ID);
+		if (id == PCI_CAP_ID_HT) {
+			id = readb(devbase + pos + 3);
+			if ((id & HT_5BIT_CAP_MASK) == HT_CAPTYPE_IRQ)
+				break;
+		}
+	}
+	if (pos == 0)
+		return;
+
+	base = devbase + pos;
+	writeb(0x01, base + 2);
+	n = (readl(base + 4) >> 16) & 0xff;
+
+	printk(KERN_INFO "mpic:   - HT:%02x.%x [0x%02x] vendor %04x device %04x"
+	       " has %d irqs\n",
+	       devfn >> 3, devfn & 0x7, pos, vdid & 0xffff, vdid >> 16, n + 1);
+
+	for (i = 0; i <= n; i++) {
+		writeb(0x10 + 2 * i, base + 2);
+		tmp = readl(base + 4);
+		irq = (tmp >> 16) & 0xff;
+		DBG("HT PIC index 0x%x, irq 0x%x, tmp: %08x\n", i, irq, tmp);
+		/* mask it , will be unmasked later */
+		tmp |= 0x1;
+		writel(tmp, base + 4);
+		mpic->fixups[irq].index = i;
+		mpic->fixups[irq].base = base;
+		/* Apple HT PIC has a non-standard way of doing EOIs */
+		if ((vdid & 0xffff) == 0x106b)
+			mpic->fixups[irq].applebase = devbase + 0x60;
+		else
+			mpic->fixups[irq].applebase = NULL;
+		writeb(0x11 + 2 * i, base + 2);
+		mpic->fixups[irq].data = readl(base + 4) | 0x80000000;
+	}
+}
+
+
+static void __init mpic_scan_ht_pics(struct mpic *mpic)
+{
+	unsigned int devfn;
+	u8 __iomem *cfgspace;
+
+	printk(KERN_INFO "mpic: Setting up HT PICs workarounds for U3/U4\n");
+
+	/* Allocate fixups array */
+	mpic->fixups = kcalloc(128, sizeof(*mpic->fixups), GFP_KERNEL);
+	BUG_ON(mpic->fixups == NULL);
+
+	/* Init spinlock */
+	raw_spin_lock_init(&mpic->fixup_lock);
+
+	/* Map U3 config space. We assume all IO-APICs are on the primary bus
+	 * so we only need to map 64kB.
+	 */
+	cfgspace = ioremap(0xf2000000, 0x10000);
+	BUG_ON(cfgspace == NULL);
+
+	/* Now we scan all slots. We do a very quick scan, we read the header
+	 * type, vendor ID and device ID only, that's plenty enough
+	 */
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		u8 __iomem *devbase = cfgspace + (devfn << 8);
+		u8 hdr_type = readb(devbase + PCI_HEADER_TYPE);
+		u32 l = readl(devbase + PCI_VENDOR_ID);
+		u16 s;
+
+		DBG("devfn %x, l: %x\n", devfn, l);
+
+		/* If no device, skip */
+		if (l == 0xffffffff || l == 0x00000000 ||
+		    l == 0x0000ffff || l == 0xffff0000)
+			goto next;
+		/* Check if is supports capability lists */
+		s = readw(devbase + PCI_STATUS);
+		if (!(s & PCI_STATUS_CAP_LIST))
+			goto next;
+
+		mpic_scan_ht_pic(mpic, devbase, devfn, l);
+		mpic_scan_ht_msi(mpic, devbase, devfn);
+
+	next:
+		/* next device, if function 0 */
+		if (PCI_FUNC(devfn) == 0 && (hdr_type & 0x80) == 0)
+			devfn += 7;
+	}
+}
+
+#else /* CONFIG_MPIC_U3_HT_IRQS */
+
+static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	return 0;
+}
+
+static void __init mpic_scan_ht_pics(struct mpic *mpic)
+{
+}
+
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+/* Find an mpic associated with a given linux interrupt */
+static struct mpic *mpic_find(unsigned int irq)
+{
+	if (irq < NR_IRQS_LEGACY)
+		return NULL;
+
+	return irq_get_chip_data(irq);
+}
+
+/* Determine if the linux irq is an IPI */
+static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int src)
+{
+	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
+}
+
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int src)
+{
+	return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
+
+/* Convert a cpu mask from logical to physical cpu numbers. */
+static inline u32 mpic_physmask(u32 cpumask)
+{
+	int i;
+	u32 mask = 0;
+
+	for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
+		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
+	return mask;
+}
+
+#ifdef CONFIG_SMP
+/* Get the mpic structure from the IPI number */
+static inline struct mpic * mpic_from_ipi(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+#endif
+
+/* Get the mpic structure from the irq number */
+static inline struct mpic * mpic_from_irq(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+/* Get the mpic structure from the irq data */
+static inline struct mpic * mpic_from_irq_data(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+
+/* Send an EOI */
+static inline void mpic_eoi(struct mpic *mpic)
+{
+	mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
+}
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+
+void mpic_unmask_irq(struct irq_data *d)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+		       mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) &
+		       ~MPIC_VECPRI_MASK);
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "%s: timeout on hwirq %u\n",
+			       __func__, src);
+			break;
+		}
+	} while(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK);
+}
+
+void mpic_mask_irq(struct irq_data *d)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+		       mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) |
+		       MPIC_VECPRI_MASK);
+
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "%s: timeout on hwirq %u\n",
+			       __func__, src);
+			break;
+		}
+	} while(!(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK));
+}
+
+void mpic_end_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+
+#ifdef DEBUG_IRQ
+	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
+#endif
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+	mpic_eoi(mpic);
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+
+static void mpic_unmask_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_unmask_irq(d);
+
+	if (irqd_is_level_type(d))
+		mpic_ht_end_irq(mpic, src);
+}
+
+static unsigned int mpic_startup_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_unmask_irq(d);
+	mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
+
+	return 0;
+}
+
+static void mpic_shutdown_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_shutdown_ht_interrupt(mpic, src);
+	mpic_mask_irq(d);
+}
+
+static void mpic_end_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+#ifdef DEBUG_IRQ
+	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
+#endif
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+	if (irqd_is_level_type(d))
+		mpic_ht_end_irq(mpic, src);
+	mpic_eoi(mpic);
+}
+#endif /* !CONFIG_MPIC_U3_HT_IRQS */
+
+#ifdef CONFIG_SMP
+
+static void mpic_unmask_ipi(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_ipi(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
+
+	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
+	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
+}
+
+static void mpic_mask_ipi(struct irq_data *d)
+{
+	/* NEVER disable an IPI... that's just plain wrong! */
+}
+
+static void mpic_end_ipi(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_ipi(d);
+
+	/*
+	 * IPIs are marked IRQ_PER_CPU. This has the side effect of
+	 * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from
+	 * applying to them. We EOI them late to avoid re-entering.
+	 */
+	mpic_eoi(mpic);
+}
+
+#endif /* CONFIG_SMP */
+
+static void mpic_unmask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, d->irq, src);
+	mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
+		      bool force)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
+		int cpuid = irq_choose_cpu(cpumask);
+
+		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
+	} else {
+		u32 mask = cpumask_bits(cpumask)[0];
+
+		mask &= cpumask_bits(cpu_online_mask)[0];
+
+		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
+			       mpic_physmask(mask));
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
+{
+	/* Now convert sense value */
+	switch(type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		return MPIC_INFO(VECPRI_SENSE_EDGE) |
+		       MPIC_INFO(VECPRI_POLARITY_POSITIVE);
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_EDGE_BOTH:
+		return MPIC_INFO(VECPRI_SENSE_EDGE) |
+		       MPIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	case IRQ_TYPE_LEVEL_HIGH:
+		return MPIC_INFO(VECPRI_SENSE_LEVEL) |
+		       MPIC_INFO(VECPRI_POLARITY_POSITIVE);
+	case IRQ_TYPE_LEVEL_LOW:
+	default:
+		return MPIC_INFO(VECPRI_SENSE_LEVEL) |
+		       MPIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	}
+}
+
+int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vecpri, vold, vnew;
+
+	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
+	    mpic, d->irq, src, flow_type);
+
+	if (src >= mpic->num_sources)
+		return -EINVAL;
+
+	vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
+
+	/* We don't support "none" type */
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_DEFAULT;
+
+	/* Default: read HW settings */
+	if (flow_type == IRQ_TYPE_DEFAULT) {
+		int vold_ps;
+
+		vold_ps = vold & (MPIC_INFO(VECPRI_POLARITY_MASK) |
+				  MPIC_INFO(VECPRI_SENSE_MASK));
+
+		if (vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_EDGE_RISING;
+		else if	(vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_EDGE_FALLING;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_LEVEL_HIGH;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+		else
+			WARN_ONCE(1, "mpic: unknown IRQ type %d\n", vold);
+	}
+
+	/* Apply to irq desc */
+	irqd_set_trigger_type(d, flow_type);
+
+	/* Apply to HW */
+	if (mpic_is_ht_interrupt(mpic, src))
+		vecpri = MPIC_VECPRI_POLARITY_POSITIVE |
+			MPIC_VECPRI_SENSE_EDGE;
+	else
+		vecpri = mpic_type_to_vecpri(mpic, flow_type);
+
+	vnew = vold & ~(MPIC_INFO(VECPRI_POLARITY_MASK) |
+			MPIC_INFO(VECPRI_SENSE_MASK));
+	vnew |= vecpri;
+	if (vold != vnew)
+		mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vnew);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+void mpic_set_vector(unsigned int virq, unsigned int vector)
+{
+	struct mpic *mpic = mpic_from_irq(virq);
+	unsigned int src = virq_to_hw(virq);
+	unsigned int vecpri;
+
+	DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
+	    mpic, virq, src, vector);
+
+	if (src >= mpic->num_sources)
+		return;
+
+	vecpri = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
+	vecpri = vecpri & ~MPIC_INFO(VECPRI_VECTOR_MASK);
+	vecpri |= vector;
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri);
+}
+
+static void mpic_set_destination(unsigned int virq, unsigned int cpuid)
+{
+	struct mpic *mpic = mpic_from_irq(virq);
+	unsigned int src = virq_to_hw(virq);
+
+	DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
+	    mpic, virq, src, cpuid);
+
+	if (src >= mpic->num_sources)
+		return;
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
+}
+
+static struct irq_chip mpic_irq_chip = {
+	.irq_mask	= mpic_mask_irq,
+	.irq_unmask	= mpic_unmask_irq,
+	.irq_eoi	= mpic_end_irq,
+	.irq_set_type	= mpic_set_irq_type,
+};
+
+#ifdef CONFIG_SMP
+static const struct irq_chip mpic_ipi_chip = {
+	.irq_mask	= mpic_mask_ipi,
+	.irq_unmask	= mpic_unmask_ipi,
+	.irq_eoi	= mpic_end_ipi,
+};
+#endif /* CONFIG_SMP */
+
+static struct irq_chip mpic_tm_chip = {
+	.irq_mask	= mpic_mask_tm,
+	.irq_unmask	= mpic_unmask_tm,
+	.irq_eoi	= mpic_end_irq,
+};
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+static const struct irq_chip mpic_irq_ht_chip = {
+	.irq_startup	= mpic_startup_ht_irq,
+	.irq_shutdown	= mpic_shutdown_ht_irq,
+	.irq_mask	= mpic_mask_irq,
+	.irq_unmask	= mpic_unmask_ht_irq,
+	.irq_eoi	= mpic_end_ht_irq,
+	.irq_set_type	= mpic_set_irq_type,
+};
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+
+static int mpic_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless mpic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int mpic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct mpic *mpic = h->host_data;
+	struct irq_chip *chip;
+
+	DBG("mpic: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	if (hw == mpic->spurious_vec)
+		return -EINVAL;
+	if (mpic->protected && test_bit(hw, mpic->protected)) {
+		pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n",
+			(unsigned int)hw);
+		return -EPERM;
+	}
+
+#ifdef CONFIG_SMP
+	else if (hw >= mpic->ipi_vecs[0]) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		DBG("mpic: mapping as IPI\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_ipi,
+					 handle_percpu_irq);
+		return 0;
+	}
+#endif /* CONFIG_SMP */
+
+	if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		DBG("mpic: mapping as timer\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_tm,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
+	if (mpic_map_error_int(mpic, virq, hw))
+		return 0;
+
+	if (hw >= mpic->num_sources) {
+		pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n",
+			(unsigned int)hw);
+		return -EINVAL;
+	}
+
+	mpic_msi_reserve_hwirq(mpic, hw);
+
+	/* Default chip */
+	chip = &mpic->hc_irq;
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	/* Check for HT interrupts, override vecpri */
+	if (mpic_is_ht_interrupt(mpic, hw))
+		chip = &mpic->hc_ht_irq;
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+	DBG("mpic: mapping to irq chip @%p\n", chip);
+
+	irq_set_chip_data(virq, mpic);
+	irq_set_chip_and_handler(virq, chip, handle_fasteoi_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_DEFAULT);
+
+	/* If the MPIC was reset, then all vectors have already been
+	 * initialized.  Otherwise, a per source lazy initialization
+	 * is done here.
+	 */
+	if (!mpic_is_ipi(mpic, hw) && (mpic->flags & MPIC_NO_RESET)) {
+		int cpu;
+
+		preempt_disable();
+		cpu = mpic_processor_id(mpic);
+		preempt_enable();
+
+		mpic_set_vector(virq, hw);
+		mpic_set_destination(virq, cpu);
+		mpic_irq_set_priority(virq, 8);
+	}
+
+	return 0;
+}
+
+static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	struct mpic *mpic = h->host_data;
+	static unsigned char map_mpic_senses[4] = {
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+		/*
+		 * Freescale MPIC with extended intspec:
+		 * First two cells are as usual.  Third specifies
+		 * an "interrupt type".  Fourth is type-specific data.
+		 *
+		 * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+		 */
+		switch (intspec[2]) {
+		case 0:
+			break;
+		case 1:
+			if (!(mpic->flags & MPIC_FSL_HAS_EIMR))
+				break;
+
+			if (intspec[3] >= ARRAY_SIZE(mpic->err_int_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->err_int_vecs[intspec[3]];
+
+			break;
+		case 2:
+			if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->ipi_vecs[intspec[0]];
+			break;
+		case 3:
+			if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->timer_vecs[intspec[0]];
+			break;
+		default:
+			pr_debug("%s: unknown irq type %u\n",
+				 __func__, intspec[2]);
+			return -EINVAL;
+		}
+
+		*out_flags = map_mpic_senses[intspec[1] & 3];
+	} else if (intsize > 1) {
+		u32 mask = 0x3;
+
+		/* Apple invented a new race of encoding on machines with
+		 * an HT APIC. They encode, among others, the index within
+		 * the HT APIC. We don't care about it here since thankfully,
+		 * it appears that they have the APIC already properly
+		 * configured, and thus our current fixup code that reads the
+		 * APIC config works fine. However, we still need to mask out
+		 * bits in the specifier to make sure we only get bit 0 which
+		 * is the level/edge bit (the only sense bit exposed by Apple),
+		 * as their bit 1 means something else.
+		 */
+		if (machine_is(powermac))
+			mask = 0x1;
+		*out_flags = map_mpic_senses[intspec[1] & mask];
+	} else
+		*out_flags = IRQ_TYPE_NONE;
+
+	DBG("mpic: xlate (%d cells: 0x%08x 0x%08x) to line 0x%lx sense 0x%x\n",
+	    intsize, intspec[0], intspec[1], *out_hwirq, *out_flags);
+
+	return 0;
+}
+
+/* IRQ handler for a secondary MPIC cascaded from another IRQ controller */
+static void mpic_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct mpic *mpic = irq_desc_get_handler_data(desc);
+	unsigned int virq;
+
+	BUG_ON(!(mpic->flags & MPIC_SECONDARY));
+
+	virq = mpic_get_one_irq(mpic);
+	if (virq)
+		generic_handle_irq(virq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static const struct irq_domain_ops mpic_host_ops = {
+	.match = mpic_host_match,
+	.map = mpic_host_map,
+	.xlate = mpic_host_xlate,
+};
+
+static u32 fsl_mpic_get_version(struct mpic *mpic)
+{
+	u32 brr1;
+
+	if (!(mpic->flags & MPIC_FSL))
+		return 0;
+
+	brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs,
+			MPIC_FSL_BRR1);
+
+	return brr1 & MPIC_FSL_BRR1_VER;
+}
+
+/*
+ * Exported functions
+ */
+
+u32 fsl_mpic_primary_get_version(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	if (mpic)
+		return fsl_mpic_get_version(mpic);
+
+	return 0;
+}
+
+struct mpic * __init mpic_alloc(struct device_node *node,
+				phys_addr_t phys_addr,
+				unsigned int flags,
+				unsigned int isu_size,
+				unsigned int irq_count,
+				const char *name)
+{
+	int i, psize, intvec_top;
+	struct mpic *mpic;
+	u32 greg_feature;
+	const char *vers;
+	const u32 *psrc;
+	u32 last_irq;
+	u32 fsl_version = 0;
+
+	/* Default MPIC search parameters */
+	static const struct of_device_id __initconst mpic_device_id[] = {
+		{ .type	      = "open-pic", },
+		{ .compatible = "open-pic", },
+		{},
+	};
+
+	/*
+	 * If we were not passed a device-tree node, then perform the default
+	 * search for standardized a standardized OpenPIC.
+	 */
+	if (node) {
+		node = of_node_get(node);
+	} else {
+		node = of_find_matching_node(NULL, mpic_device_id);
+		if (!node)
+			return NULL;
+	}
+
+	/* Pick the physical address from the device tree if unspecified */
+	if (!phys_addr) {
+		/* Check if it is DCR-based */
+		if (of_property_read_bool(node, "dcr-reg")) {
+			flags |= MPIC_USES_DCR;
+		} else {
+			struct resource r;
+			if (of_address_to_resource(node, 0, &r))
+				goto err_of_node_put;
+			phys_addr = r.start;
+		}
+	}
+
+	/* Read extra device-tree properties into the flags variable */
+	if (of_property_read_bool(node, "big-endian"))
+		flags |= MPIC_BIG_ENDIAN;
+	if (of_property_read_bool(node, "pic-no-reset"))
+		flags |= MPIC_NO_RESET;
+	if (of_property_read_bool(node, "single-cpu-affinity"))
+		flags |= MPIC_SINGLE_DEST_CPU;
+	if (of_device_is_compatible(node, "fsl,mpic")) {
+		flags |= MPIC_FSL | MPIC_LARGE_VECTORS;
+		mpic_irq_chip.flags |= IRQCHIP_SKIP_SET_WAKE;
+		mpic_tm_chip.flags |= IRQCHIP_SKIP_SET_WAKE;
+	}
+
+	mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
+	if (mpic == NULL)
+		goto err_of_node_put;
+
+	mpic->name = name;
+	mpic->node = node;
+	mpic->paddr = phys_addr;
+	mpic->flags = flags;
+
+	mpic->hc_irq = mpic_irq_chip;
+	mpic->hc_irq.name = name;
+	if (!(mpic->flags & MPIC_SECONDARY))
+		mpic->hc_irq.irq_set_affinity = mpic_set_affinity;
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	mpic->hc_ht_irq = mpic_irq_ht_chip;
+	mpic->hc_ht_irq.name = name;
+	if (!(mpic->flags & MPIC_SECONDARY))
+		mpic->hc_ht_irq.irq_set_affinity = mpic_set_affinity;
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+#ifdef CONFIG_SMP
+	mpic->hc_ipi = mpic_ipi_chip;
+	mpic->hc_ipi.name = name;
+#endif /* CONFIG_SMP */
+
+	mpic->hc_tm = mpic_tm_chip;
+	mpic->hc_tm.name = name;
+
+	mpic->num_sources = 0; /* so far */
+
+	if (mpic->flags & MPIC_LARGE_VECTORS)
+		intvec_top = 2047;
+	else
+		intvec_top = 255;
+
+	mpic->timer_vecs[0] = intvec_top - 12;
+	mpic->timer_vecs[1] = intvec_top - 11;
+	mpic->timer_vecs[2] = intvec_top - 10;
+	mpic->timer_vecs[3] = intvec_top - 9;
+	mpic->timer_vecs[4] = intvec_top - 8;
+	mpic->timer_vecs[5] = intvec_top - 7;
+	mpic->timer_vecs[6] = intvec_top - 6;
+	mpic->timer_vecs[7] = intvec_top - 5;
+	mpic->ipi_vecs[0]   = intvec_top - 4;
+	mpic->ipi_vecs[1]   = intvec_top - 3;
+	mpic->ipi_vecs[2]   = intvec_top - 2;
+	mpic->ipi_vecs[3]   = intvec_top - 1;
+	mpic->spurious_vec  = intvec_top;
+
+	/* Look for protected sources */
+	psrc = of_get_property(mpic->node, "protected-sources", &psize);
+	if (psrc) {
+		/* Allocate a bitmap with one bit per interrupt */
+		mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL);
+		BUG_ON(mpic->protected == NULL);
+		for (i = 0; i < psize/sizeof(u32); i++) {
+			if (psrc[i] > intvec_top)
+				continue;
+			__set_bit(psrc[i], mpic->protected);
+		}
+	}
+
+#ifdef CONFIG_MPIC_WEIRD
+	mpic->hw_set = mpic_infos[MPIC_GET_REGSET(mpic->flags)];
+#endif
+
+	/* default register type */
+	if (mpic->flags & MPIC_BIG_ENDIAN)
+		mpic->reg_type = mpic_access_mmio_be;
+	else
+		mpic->reg_type = mpic_access_mmio_le;
+
+	/*
+	 * An MPIC with a "dcr-reg" property must be accessed that way, but
+	 * only if the kernel includes DCR support.
+	 */
+#ifdef CONFIG_PPC_DCR
+	if (mpic->flags & MPIC_USES_DCR)
+		mpic->reg_type = mpic_access_dcr;
+#else
+	BUG_ON(mpic->flags & MPIC_USES_DCR);
+#endif
+
+	/* Map the global registers */
+	mpic_map(mpic, mpic->paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
+	mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
+
+	if (mpic->flags & MPIC_FSL) {
+		int ret;
+
+		/*
+		 * Yes, Freescale really did put global registers in the
+		 * magic per-cpu area -- and they don't even show up in the
+		 * non-magic per-cpu copies that this driver normally uses.
+		 */
+		mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs,
+			 MPIC_CPU_THISBASE, 0x1000);
+
+		fsl_version = fsl_mpic_get_version(mpic);
+
+		/* Error interrupt mask register (EIMR) is required for
+		 * handling individual device error interrupts. EIMR
+		 * was added in MPIC version 4.1.
+		 *
+		 * Over here we reserve vector number space for error
+		 * interrupt vectors. This space is stolen from the
+		 * global vector number space, as in case of ipis
+		 * and timer interrupts.
+		 *
+		 * Available vector space = intvec_top - 13, where 13
+		 * is the number of vectors which have been consumed by
+		 * ipis, timer interrupts and spurious.
+		 */
+		if (fsl_version >= 0x401) {
+			ret = mpic_setup_error_int(mpic, intvec_top - 13);
+			if (ret)
+				return NULL;
+		}
+
+	}
+
+	/*
+	 * EPR is only available starting with v4.0.  To support
+	 * platforms that don't know the MPIC version at compile-time,
+	 * such as qemu-e500, turn off coreint if this MPIC doesn't
+	 * support it.  Note that we never enable it if it wasn't
+	 * requested in the first place.
+	 *
+	 * This is done outside the MPIC_FSL check, so that we
+	 * also disable coreint if the MPIC node doesn't have
+	 * an "fsl,mpic" compatible at all.  This will be the case
+	 * with device trees generated by older versions of QEMU.
+	 * fsl_version will be zero if MPIC_FSL is not set.
+	 */
+	if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT))
+		ppc_md.get_irq = mpic_get_irq;
+
+	/* Reset */
+
+	/* When using a device-node, reset requests are only honored if the MPIC
+	 * is allowed to reset.
+	 */
+	if (!(mpic->flags & MPIC_NO_RESET)) {
+		printk(KERN_DEBUG "mpic: Resetting\n");
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_RESET);
+		while( mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+		       & MPIC_GREG_GCONF_RESET)
+			mb();
+	}
+
+	/* CoreInt */
+	if (mpic->flags & MPIC_ENABLE_COREINT)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_COREINT);
+
+	if (mpic->flags & MPIC_ENABLE_MCK)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_MCK);
+
+	/*
+	 * The MPIC driver will crash if there are more cores than we
+	 * can initialize, so we may as well catch that problem here.
+	 */
+	BUG_ON(num_possible_cpus() > MPIC_MAX_CPUS);
+
+	/* Map the per-CPU registers */
+	for_each_possible_cpu(i) {
+		unsigned int cpu = get_hard_smp_processor_id(i);
+
+		mpic_map(mpic, mpic->paddr, &mpic->cpuregs[cpu],
+			 MPIC_INFO(CPU_BASE) + cpu * MPIC_INFO(CPU_STRIDE),
+			 0x1000);
+	}
+
+	/*
+	 * Read feature register.  For non-ISU MPICs, num sources as well. On
+	 * ISU MPICs, sources are counted as ISUs are added
+	 */
+	greg_feature = mpic_read(mpic->gregs, MPIC_INFO(GREG_FEATURE_0));
+
+	/*
+	 * By default, the last source number comes from the MPIC, but the
+	 * device-tree and board support code can override it on buggy hw.
+	 * If we get passed an isu_size (multi-isu MPIC) then we use that
+	 * as a default instead of the value read from the HW.
+	 */
+	last_irq = (greg_feature & MPIC_GREG_FEATURE_LAST_SRC_MASK)
+				>> MPIC_GREG_FEATURE_LAST_SRC_SHIFT;
+	if (isu_size)
+		last_irq = isu_size  * MPIC_MAX_ISU - 1;
+	of_property_read_u32(mpic->node, "last-interrupt-source", &last_irq);
+	if (irq_count)
+		last_irq = irq_count - 1;
+
+	/* Initialize main ISU if none provided */
+	if (!isu_size) {
+		isu_size = last_irq + 1;
+		mpic->num_sources = isu_size;
+		mpic_map(mpic, mpic->paddr, &mpic->isus[0],
+				MPIC_INFO(IRQ_BASE),
+				MPIC_INFO(IRQ_STRIDE) * isu_size);
+	}
+
+	mpic->isu_size = isu_size;
+	mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
+	mpic->isu_mask = (1 << mpic->isu_shift) - 1;
+
+	mpic->irqhost = irq_domain_add_linear(mpic->node,
+				       intvec_top,
+				       &mpic_host_ops, mpic);
+
+	/*
+	 * FIXME: The code leaks the MPIC object and mappings here; this
+	 * is very unlikely to fail but it ought to be fixed anyways.
+	 */
+	if (mpic->irqhost == NULL)
+		return NULL;
+
+	/* Display version */
+	switch (greg_feature & MPIC_GREG_FEATURE_VERSION_MASK) {
+	case 1:
+		vers = "1.0";
+		break;
+	case 2:
+		vers = "1.2";
+		break;
+	case 3:
+		vers = "1.3";
+		break;
+	default:
+		vers = "<unknown>";
+		break;
+	}
+	printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %llx,"
+	       " max %d CPUs\n",
+	       name, vers, (unsigned long long)mpic->paddr, num_possible_cpus());
+	printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n",
+	       mpic->isu_size, mpic->isu_shift, mpic->isu_mask);
+
+	mpic->next = mpics;
+	mpics = mpic;
+
+	if (!(mpic->flags & MPIC_SECONDARY)) {
+		mpic_primary = mpic;
+		irq_set_default_host(mpic->irqhost);
+	}
+
+	return mpic;
+
+err_of_node_put:
+	of_node_put(node);
+	return NULL;
+}
+
+void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
+			    phys_addr_t paddr)
+{
+	unsigned int isu_first = isu_num * mpic->isu_size;
+
+	BUG_ON(isu_num >= MPIC_MAX_ISU);
+
+	mpic_map(mpic,
+		 paddr, &mpic->isus[isu_num], 0,
+		 MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
+
+	if ((isu_first + mpic->isu_size) > mpic->num_sources)
+		mpic->num_sources = isu_first + mpic->isu_size;
+}
+
+void __init mpic_init(struct mpic *mpic)
+{
+	int i, cpu;
+	int num_timers = 4;
+
+	BUG_ON(mpic->num_sources == 0);
+
+	printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
+
+	if (mpic->flags & MPIC_FSL) {
+		u32 version = fsl_mpic_get_version(mpic);
+
+		/*
+		 * Timer group B is present at the latest in MPIC 3.1 (e.g.
+		 * mpc8536).  It is not present in MPIC 2.0 (e.g. mpc8544).
+		 * I don't know about the status of intermediate versions (or
+		 * whether they even exist).
+		 */
+		if (version >= 0x0301)
+			num_timers = 8;
+	}
+
+	/* Initialize timers to our reserved vectors and mask them for now */
+	for (i = 0; i < num_timers; i++) {
+		unsigned int offset = mpic_tm_offset(mpic, i);
+
+		mpic_write(mpic->tmregs,
+			   offset + MPIC_INFO(TIMER_DESTINATION),
+			   1 << hard_smp_processor_id());
+		mpic_write(mpic->tmregs,
+			   offset + MPIC_INFO(TIMER_VECTOR_PRI),
+			   MPIC_VECPRI_MASK |
+			   (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
+			   (mpic->timer_vecs[0] + i));
+	}
+
+	/* Initialize IPIs to our reserved vectors and mark them disabled for now */
+	mpic_test_broken_ipi(mpic);
+	for (i = 0; i < 4; i++) {
+		mpic_ipi_write(i,
+			       MPIC_VECPRI_MASK |
+			       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
+			       (mpic->ipi_vecs[0] + i));
+	}
+
+	/* Do the HT PIC fixups on U3 broken mpic */
+	DBG("MPIC flags: %x\n", mpic->flags);
+	if ((mpic->flags & MPIC_U3_HT_IRQS) && !(mpic->flags & MPIC_SECONDARY)) {
+		mpic_scan_ht_pics(mpic);
+		mpic_u3msi_init(mpic);
+	}
+
+	mpic_pasemi_msi_init(mpic);
+
+	cpu = mpic_processor_id(mpic);
+
+	if (!(mpic->flags & MPIC_NO_RESET)) {
+		for (i = 0; i < mpic->num_sources; i++) {
+			/* start with vector = source number, and masked */
+			u32 vecpri = MPIC_VECPRI_MASK | i |
+				(8 << MPIC_VECPRI_PRIORITY_SHIFT);
+
+			/* check if protected */
+			if (mpic->protected && test_bit(i, mpic->protected))
+				continue;
+			/* init hw */
+			mpic_irq_write(i, MPIC_INFO(IRQ_VECTOR_PRI), vecpri);
+			mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION), 1 << cpu);
+		}
+	}
+
+	/* Init spurious vector */
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_SPURIOUS), mpic->spurious_vec);
+
+	/* Disable 8259 passthrough, if supported */
+	if (!(mpic->flags & MPIC_NO_PTHROU_DIS))
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_8259_PTHROU_DIS);
+
+	if (mpic->flags & MPIC_NO_BIAS)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			| MPIC_GREG_GCONF_NO_BIAS);
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0);
+
+#ifdef CONFIG_PM
+	/* allocate memory to save mpic state */
+	mpic->save_data = kmalloc_array(mpic->num_sources,
+				        sizeof(*mpic->save_data),
+				        GFP_KERNEL);
+	BUG_ON(mpic->save_data == NULL);
+#endif
+
+	/* Check if this MPIC is chained from a parent interrupt controller */
+	if (mpic->flags & MPIC_SECONDARY) {
+		int virq = irq_of_parse_and_map(mpic->node, 0);
+		if (virq) {
+			printk(KERN_INFO "%pOF: hooking up to IRQ %d\n",
+					mpic->node, virq);
+			irq_set_handler_data(virq, mpic);
+			irq_set_chained_handler(virq, &mpic_cascade);
+		}
+	}
+
+	/* FSL mpic error interrupt initialization */
+	if (mpic->flags & MPIC_FSL_HAS_EIMR)
+		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
+}
+
+void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
+{
+	struct mpic *mpic = mpic_find(irq);
+	unsigned int src = virq_to_hw(irq);
+	unsigned long flags;
+	u32 reg;
+
+	if (!mpic)
+		return;
+
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+	if (mpic_is_ipi(mpic, src)) {
+		reg = mpic_ipi_read(src - mpic->ipi_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_ipi_write(src - mpic->ipi_vecs[0],
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else if (mpic_is_tm(mpic, src)) {
+		reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_tm_write(src - mpic->timer_vecs[0],
+			      reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else {
+		reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
+			& ~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	}
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+void mpic_setup_this_cpu(void)
+{
+#ifdef CONFIG_SMP
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+
+ 	/* let the mpic know we want intrs. default affinity is 0xffffffff
+	 * until changed via /proc. That's how it's done on x86. If we want
+	 * it differently, then we should make sure we also change the default
+	 * values of irq_desc[].affinity in irq.c.
+ 	 */
+	if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) {
+	 	for (i = 0; i < mpic->num_sources ; i++)
+			mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+				mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk);
+	}
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0);
+
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+#endif /* CONFIG_SMP */
+}
+
+int mpic_cpu_get_priority(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	return mpic_cpu_read(MPIC_INFO(CPU_CURRENT_TASK_PRI));
+}
+
+void mpic_cpu_set_priority(int prio)
+{
+	struct mpic *mpic = mpic_primary;
+
+	prio &= MPIC_CPU_TASKPRI_MASK;
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), prio);
+}
+
+void mpic_teardown_this_cpu(int secondary)
+{
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+
+	/* let the mpic know we don't want intrs.  */
+	for (i = 0; i < mpic->num_sources ; i++)
+		mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) & ~msk);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
+	/* We need to EOI the IPI since not all platforms reset the MPIC
+	 * on boot and new interrupts wouldn't get delivered otherwise.
+	 */
+	mpic_eoi(mpic);
+
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+
+static unsigned int _mpic_get_one_irq(struct mpic *mpic, int reg)
+{
+	u32 src;
+
+	src = mpic_cpu_read(reg) & MPIC_INFO(VECPRI_VECTOR_MASK);
+#ifdef DEBUG_LOW
+	DBG("%s: get_one_irq(reg 0x%x): %d\n", mpic->name, reg, src);
+#endif
+	if (unlikely(src == mpic->spurious_vec)) {
+		if (mpic->flags & MPIC_SPV_EOI)
+			mpic_eoi(mpic);
+		return 0;
+	}
+	if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
+		printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
+				   mpic->name, (int)src);
+		mpic_eoi(mpic);
+		return 0;
+	}
+
+	return irq_linear_revmap(mpic->irqhost, src);
+}
+
+unsigned int mpic_get_one_irq(struct mpic *mpic)
+{
+	return _mpic_get_one_irq(mpic, MPIC_INFO(CPU_INTACK));
+}
+
+unsigned int mpic_get_irq(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	return mpic_get_one_irq(mpic);
+}
+
+unsigned int mpic_get_coreint_irq(void)
+{
+#ifdef CONFIG_BOOKE
+	struct mpic *mpic = mpic_primary;
+	u32 src;
+
+	BUG_ON(mpic == NULL);
+
+	src = mfspr(SPRN_EPR);
+
+	if (unlikely(src == mpic->spurious_vec)) {
+		if (mpic->flags & MPIC_SPV_EOI)
+			mpic_eoi(mpic);
+		return 0;
+	}
+	if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
+		printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
+				   mpic->name, (int)src);
+		return 0;
+	}
+
+	return irq_linear_revmap(mpic->irqhost, src);
+#else
+	return 0;
+#endif
+}
+
+unsigned int mpic_get_mcirq(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	return _mpic_get_one_irq(mpic, MPIC_INFO(CPU_MCACK));
+}
+
+#ifdef CONFIG_SMP
+void __init mpic_request_ipis(void)
+{
+	struct mpic *mpic = mpic_primary;
+	int i;
+	BUG_ON(mpic == NULL);
+
+	printk(KERN_INFO "mpic: requesting IPIs...\n");
+
+	for (i = 0; i < 4; i++) {
+		unsigned int vipi = irq_create_mapping(mpic->irqhost,
+						       mpic->ipi_vecs[0] + i);
+		if (!vipi) {
+			printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
+			continue;
+		}
+		smp_request_message_ipi(vipi, i);
+	}
+}
+
+void smp_mpic_message_pass(int cpu, int msg)
+{
+	struct mpic *mpic = mpic_primary;
+	u32 physmask;
+
+	BUG_ON(mpic == NULL);
+
+	/* make sure we're sending something that translates to an IPI */
+	if ((unsigned int)msg > 3) {
+		printk("SMP %d: smp_message_pass: unknown msg %d\n",
+		       smp_processor_id(), msg);
+		return;
+	}
+
+#ifdef DEBUG_IPI
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+	physmask = 1 << get_hard_smp_processor_id(cpu);
+
+	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
+}
+
+void __init smp_mpic_probe(void)
+{
+	int nr_cpus;
+
+	DBG("smp_mpic_probe()...\n");
+
+	nr_cpus = num_possible_cpus();
+
+	DBG("nr_cpus: %d\n", nr_cpus);
+
+	if (nr_cpus > 1)
+		mpic_request_ipis();
+}
+
+void smp_mpic_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+void mpic_reset_core(int cpu)
+{
+	struct mpic *mpic = mpic_primary;
+	u32 pir;
+	int cpuid = get_hard_smp_processor_id(cpu);
+	int i;
+
+	/* Set target bit for core reset */
+	pir = mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+	pir |= (1 << cpuid);
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT), pir);
+	mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+
+	/* Restore target bit after reset complete */
+	pir &= ~(1 << cpuid);
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT), pir);
+	mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+
+	/* Perform 15 EOI on each reset core to clear pending interrupts.
+	 * This is required for FSL CoreNet based devices */
+	if (mpic->flags & MPIC_FSL) {
+		for (i = 0; i < 15; i++) {
+			_mpic_write(mpic->reg_type, &mpic->cpuregs[cpuid],
+				      MPIC_CPU_EOI, 0);
+		}
+	}
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PM
+static void mpic_suspend_one(struct mpic *mpic)
+{
+	int i;
+
+	for (i = 0; i < mpic->num_sources; i++) {
+		mpic->save_data[i].vecprio =
+			mpic_irq_read(i, MPIC_INFO(IRQ_VECTOR_PRI));
+		mpic->save_data[i].dest =
+			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION));
+	}
+}
+
+static int mpic_suspend(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_suspend_one(mpic);
+		mpic = mpic->next;
+	}
+
+	return 0;
+}
+
+static void mpic_resume_one(struct mpic *mpic)
+{
+	int i;
+
+	for (i = 0; i < mpic->num_sources; i++) {
+		mpic_irq_write(i, MPIC_INFO(IRQ_VECTOR_PRI),
+			       mpic->save_data[i].vecprio);
+		mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+			       mpic->save_data[i].dest);
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	if (mpic->fixups) {
+		struct mpic_irq_fixup *fixup = &mpic->fixups[i];
+
+		if (fixup->base) {
+			/* we use the lowest bit in an inverted meaning */
+			if ((mpic->save_data[i].fixup_data & 1) == 0)
+				continue;
+
+			/* Enable and configure */
+			writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+
+			writel(mpic->save_data[i].fixup_data & ~1,
+			       fixup->base + 4);
+		}
+	}
+#endif
+	} /* end for loop */
+}
+
+static void mpic_resume(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_resume_one(mpic);
+		mpic = mpic->next;
+	}
+}
+
+static struct syscore_ops mpic_syscore_ops = {
+	.resume = mpic_resume,
+	.suspend = mpic_suspend,
+};
+
+static int mpic_init_sys(void)
+{
+	int rc;
+
+	register_syscore_ops(&mpic_syscore_ops);
+	rc = subsys_system_register(&mpic_subsys, NULL);
+	if (rc) {
+		unregister_syscore_ops(&mpic_syscore_ops);
+		pr_err("mpic: Failed to register subsystem!\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+device_initcall(mpic_init_sys);
+#endif
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
new file mode 100644
index 0000000000..bb460ff57a
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _POWERPC_SYSDEV_MPIC_H
+#define _POWERPC_SYSDEV_MPIC_H
+
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#ifdef CONFIG_PCI_MSI
+extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq);
+int __init mpic_msi_init_allocator(struct mpic *mpic);
+int __init mpic_u3msi_init(struct mpic *mpic);
+#else
+static inline void mpic_msi_reserve_hwirq(struct mpic *mpic,
+					  irq_hw_number_t hwirq)
+{
+	return;
+}
+
+static inline int mpic_u3msi_init(struct mpic *mpic)
+{
+	return -1;
+}
+#endif
+
+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI)
+int __init mpic_pasemi_msi_init(struct mpic *mpic);
+#else
+static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; }
+#endif
+
+extern int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern void mpic_set_vector(unsigned int virq, unsigned int vector);
+extern int mpic_set_affinity(struct irq_data *d,
+			     const struct cpumask *cpumask, bool force);
+extern void mpic_reset_core(int cpu);
+
+#ifdef CONFIG_FSL_SOC
+extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw);
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum);
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec);
+#else
+static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw)
+{
+	return 0;
+}
+
+
+static inline void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
+{
+	return;
+}
+
+static inline int mpic_setup_error_int(struct mpic *mpic, int intvec)
+{
+	return -1;
+}
+#endif
+
+#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
new file mode 100644
index 0000000000..7b449cc51a
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation.
+ *
+ * Some ideas based on un-pushed work done by Vivek Mahajan, Jason Jin, and
+ * Mingkai Hu from Freescale Semiconductor, Inc.
+ */
+
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/mpic_msgr.h>
+
+#define MPIC_MSGR_REGISTERS_PER_BLOCK	4
+#define MPIC_MSGR_STRIDE		0x10
+#define MPIC_MSGR_MER_OFFSET		(0x100 / sizeof(u32))
+#define MSGR_INUSE			0
+#define MSGR_FREE			1
+
+static struct mpic_msgr **mpic_msgrs;
+static unsigned int mpic_msgr_count;
+static DEFINE_RAW_SPINLOCK(msgrs_lock);
+
+static inline void _mpic_msgr_mer_write(struct mpic_msgr *msgr, u32 value)
+{
+	out_be32(msgr->mer, value);
+}
+
+static inline u32 _mpic_msgr_mer_read(struct mpic_msgr *msgr)
+{
+	return in_be32(msgr->mer);
+}
+
+static inline void _mpic_msgr_disable(struct mpic_msgr *msgr)
+{
+	u32 mer = _mpic_msgr_mer_read(msgr);
+
+	_mpic_msgr_mer_write(msgr, mer & ~(1 << msgr->num));
+}
+
+struct mpic_msgr *mpic_msgr_get(unsigned int reg_num)
+{
+	unsigned long flags;
+	struct mpic_msgr *msgr;
+
+	/* Assume busy until proven otherwise.  */
+	msgr = ERR_PTR(-EBUSY);
+
+	if (reg_num >= mpic_msgr_count)
+		return ERR_PTR(-ENODEV);
+
+	raw_spin_lock_irqsave(&msgrs_lock, flags);
+	msgr = mpic_msgrs[reg_num];
+	if (msgr->in_use == MSGR_FREE)
+		msgr->in_use = MSGR_INUSE;
+	raw_spin_unlock_irqrestore(&msgrs_lock, flags);
+
+	return msgr;
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_get);
+
+void mpic_msgr_put(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	msgr->in_use = MSGR_FREE;
+	_mpic_msgr_disable(msgr);
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_put);
+
+void mpic_msgr_enable(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+	u32 mer;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	mer = _mpic_msgr_mer_read(msgr);
+	_mpic_msgr_mer_write(msgr, mer | (1 << msgr->num));
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_enable);
+
+void mpic_msgr_disable(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	_mpic_msgr_disable(msgr);
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_disable);
+
+/* The following three functions are used to compute the order and number of
+ * the message register blocks.  They are clearly very inefficient.  However,
+ * they are called *only* a few times during device initialization.
+ */
+static unsigned int mpic_msgr_number_of_blocks(void)
+{
+	unsigned int count;
+	struct device_node *aliases;
+
+	count = 0;
+	aliases = of_find_node_by_name(NULL, "aliases");
+
+	if (aliases) {
+		char buf[32];
+
+		for (;;) {
+			snprintf(buf, sizeof(buf), "mpic-msgr-block%d", count);
+			if (!of_property_present(aliases, buf))
+				break;
+
+			count += 1;
+		}
+		of_node_put(aliases);
+	}
+
+	return count;
+}
+
+static unsigned int mpic_msgr_number_of_registers(void)
+{
+	return mpic_msgr_number_of_blocks() * MPIC_MSGR_REGISTERS_PER_BLOCK;
+}
+
+static int mpic_msgr_block_number(struct device_node *node)
+{
+	struct device_node *aliases;
+	unsigned int index, number_of_blocks;
+	char buf[64];
+
+	number_of_blocks = mpic_msgr_number_of_blocks();
+	aliases = of_find_node_by_name(NULL, "aliases");
+	if (!aliases)
+		return -1;
+
+	for (index = 0; index < number_of_blocks; ++index) {
+		struct property *prop;
+		struct device_node *tn;
+
+		snprintf(buf, sizeof(buf), "mpic-msgr-block%d", index);
+		prop = of_find_property(aliases, buf, NULL);
+		tn = of_find_node_by_path(prop->value);
+		if (node == tn) {
+			of_node_put(tn);
+			break;
+		}
+		of_node_put(tn);
+	}
+	of_node_put(aliases);
+
+	return index == number_of_blocks ? -1 : index;
+}
+
+/* The probe function for a single message register block.
+ */
+static int mpic_msgr_probe(struct platform_device *dev)
+{
+	void __iomem *msgr_block_addr;
+	int block_number;
+	struct resource rsrc;
+	unsigned int i;
+	unsigned int irq_index;
+	struct device_node *np = dev->dev.of_node;
+	unsigned int receive_mask;
+	const unsigned int *prop;
+
+	if (!np) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -EFAULT;
+	}
+
+	/* Allocate the message register array upon the first device
+	 * registered.
+	 */
+	if (!mpic_msgrs) {
+		mpic_msgr_count = mpic_msgr_number_of_registers();
+		dev_info(&dev->dev, "Found %d message registers\n",
+				mpic_msgr_count);
+
+		mpic_msgrs = kcalloc(mpic_msgr_count, sizeof(*mpic_msgrs),
+							 GFP_KERNEL);
+		if (!mpic_msgrs) {
+			dev_err(&dev->dev,
+				"No memory for message register blocks\n");
+			return -ENOMEM;
+		}
+	}
+	dev_info(&dev->dev, "Of-device full name %pOF\n", np);
+
+	/* IO map the message register block. */
+	of_address_to_resource(np, 0, &rsrc);
+	msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc));
+	if (!msgr_block_addr) {
+		dev_err(&dev->dev, "Failed to iomap MPIC message registers");
+		return -EFAULT;
+	}
+
+	/* Ensure the block has a defined order. */
+	block_number = mpic_msgr_block_number(np);
+	if (block_number < 0) {
+		dev_err(&dev->dev,
+			"Failed to find message register block alias\n");
+		return -ENODEV;
+	}
+	dev_info(&dev->dev, "Setting up message register block %d\n",
+			block_number);
+
+	/* Grab the receive mask which specifies what registers can receive
+	 * interrupts.
+	 */
+	prop = of_get_property(np, "mpic-msgr-receive-mask", NULL);
+	receive_mask = (prop) ? *prop : 0xF;
+
+	/* Build up the appropriate message register data structures. */
+	for (i = 0, irq_index = 0; i < MPIC_MSGR_REGISTERS_PER_BLOCK; ++i) {
+		struct mpic_msgr *msgr;
+		unsigned int reg_number;
+
+		msgr = kzalloc(sizeof(struct mpic_msgr), GFP_KERNEL);
+		if (!msgr) {
+			dev_err(&dev->dev, "No memory for message register\n");
+			return -ENOMEM;
+		}
+
+		reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i;
+		msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE;
+		msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET;
+		msgr->in_use = MSGR_FREE;
+		msgr->num = i;
+		raw_spin_lock_init(&msgr->lock);
+
+		if (receive_mask & (1 << i)) {
+			msgr->irq = irq_of_parse_and_map(np, irq_index);
+			if (!msgr->irq) {
+				dev_err(&dev->dev,
+						"Missing interrupt specifier");
+				kfree(msgr);
+				return -EFAULT;
+			}
+			irq_index += 1;
+		} else {
+			msgr->irq = 0;
+		}
+
+		mpic_msgrs[reg_number] = msgr;
+		mpic_msgr_disable(msgr);
+		dev_info(&dev->dev, "Register %d initialized: irq %d\n",
+				reg_number, msgr->irq);
+
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mpic_msgr_ids[] = {
+	{
+		.compatible = "fsl,mpic-v3.1-msgr",
+		.data = NULL,
+	},
+	{}
+};
+
+static struct platform_driver mpic_msgr_driver = {
+	.driver = {
+		.name = "mpic-msgr",
+		.of_match_table = mpic_msgr_ids,
+	},
+	.probe = mpic_msgr_probe,
+};
+
+static __init int mpic_msgr_init(void)
+{
+	return platform_driver_register(&mpic_msgr_driver);
+}
+subsys_initcall(mpic_msgr_init);
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
new file mode 100644
index 0000000000..34246c8e01
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/bitmap.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq)
+{
+	/* The mpic calls this even when there is no allocator setup */
+	if (!mpic->msi_bitmap.bitmap)
+		return;
+
+	msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+{
+	irq_hw_number_t hwirq;
+	const struct irq_domain_ops *ops = mpic->irqhost->ops;
+	struct device_node *np;
+	int flags, index, i;
+	struct of_phandle_args oirq;
+
+	pr_debug("mpic: found U3, guessing msi allocator setup\n");
+
+	/* Reserve source numbers we know are reserved in the HW.
+	 *
+	 * This is a bit of a mix of U3 and U4 reserves but that's going
+	 * to work fine, we have plenty enough numbers left so let's just
+	 * mark anything we don't like reserved.
+	 */
+	for (i = 0;   i < 8;   i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 42;  i < 46;  i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 100; i < 105; i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 124; i < mpic->num_sources; i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+
+	np = NULL;
+	while ((np = of_find_all_nodes(np))) {
+		pr_debug("mpic: mapping hwirqs for %pOF\n", np);
+
+		index = 0;
+		while (of_irq_parse_one(np, index++, &oirq) == 0) {
+			ops->xlate(mpic->irqhost, NULL, oirq.args,
+						oirq.args_count, &hwirq, &flags);
+			msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
+		}
+	}
+
+	return 0;
+}
+#else
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+{
+	return -1;
+}
+#endif
+
+int __init mpic_msi_init_allocator(struct mpic *mpic)
+{
+	int rc;
+
+	rc = msi_bitmap_alloc(&mpic->msi_bitmap, mpic->num_sources,
+			      irq_domain_get_of_node(mpic->irqhost));
+	if (rc)
+		return rc;
+
+	rc = msi_bitmap_reserve_dt_hwirqs(&mpic->msi_bitmap);
+	if (rc > 0) {
+		if (mpic->flags & MPIC_U3_HT_IRQS)
+			rc = mpic_msi_reserve_u3_hwirqs(mpic);
+
+		if (rc) {
+			msi_bitmap_free(&mpic->msi_bitmap);
+			return rc;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
new file mode 100644
index 0000000000..7166e2e0ba
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPIC timer driver
+ *
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ * Author: Dongsheng Wang <Dongsheng.Wang@freescale.com>
+ *	   Li Yang <leoli@freescale.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/syscore_ops.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/io.h>
+
+#include <asm/mpic_timer.h>
+
+#define FSL_GLOBAL_TIMER		0x1
+
+/* Clock Ratio
+ * Divide by 64 0x00000300
+ * Divide by 32 0x00000200
+ * Divide by 16 0x00000100
+ * Divide by  8 0x00000000 (Hardware default div)
+ */
+#define MPIC_TIMER_TCR_CLKDIV		0x00000300
+
+#define MPIC_TIMER_TCR_ROVR_OFFSET	24
+
+#define TIMER_STOP			0x80000000
+#define GTCCR_TOG			0x80000000
+#define TIMERS_PER_GROUP		4
+#define MAX_TICKS			(~0U >> 1)
+#define MAX_TICKS_CASCADE		(~0U)
+#define TIMER_OFFSET(num)		(1 << (TIMERS_PER_GROUP - 1 - num))
+
+struct timer_regs {
+	u32	gtccr;
+	u32	res0[3];
+	u32	gtbcr;
+	u32	res1[3];
+	u32	gtvpr;
+	u32	res2[3];
+	u32	gtdr;
+	u32	res3[3];
+};
+
+struct cascade_priv {
+	u32 tcr_value;			/* TCR register: CASC & ROVR value */
+	unsigned int cascade_map;	/* cascade map */
+	unsigned int timer_num;		/* cascade control timer */
+};
+
+struct timer_group_priv {
+	struct timer_regs __iomem	*regs;
+	struct mpic_timer		timer[TIMERS_PER_GROUP];
+	struct list_head		node;
+	unsigned int			timerfreq;
+	unsigned int			idle;
+	unsigned int			flags;
+	spinlock_t			lock;
+	void __iomem			*group_tcr;
+};
+
+static struct cascade_priv cascade_timer[] = {
+	/* cascade timer 0 and 1 */
+	{0x1, 0xc, 0x1},
+	/* cascade timer 1 and 2 */
+	{0x2, 0x6, 0x2},
+	/* cascade timer 2 and 3 */
+	{0x4, 0x3, 0x3}
+};
+
+static LIST_HEAD(timer_group_list);
+
+static void convert_ticks_to_time(struct timer_group_priv *priv,
+		const u64 ticks, time64_t *time)
+{
+	*time = (u64)div_u64(ticks, priv->timerfreq);
+}
+
+/* the time set by the user is converted to "ticks" */
+static int convert_time_to_ticks(struct timer_group_priv *priv,
+		time64_t time, u64 *ticks)
+{
+	u64 max_value;		/* prevent u64 overflow */
+
+	max_value = div_u64(ULLONG_MAX, priv->timerfreq);
+
+	if (time > max_value)
+		return -EINVAL;
+
+	*ticks = (u64)time * (u64)priv->timerfreq;
+
+	return 0;
+}
+
+/* detect whether there is a cascade timer available */
+static struct mpic_timer *detect_idle_cascade_timer(
+					struct timer_group_priv *priv)
+{
+	struct cascade_priv *casc_priv;
+	unsigned int map;
+	unsigned int array_size = ARRAY_SIZE(cascade_timer);
+	unsigned int num;
+	unsigned int i;
+	unsigned long flags;
+
+	casc_priv = cascade_timer;
+	for (i = 0; i < array_size; i++) {
+		spin_lock_irqsave(&priv->lock, flags);
+		map = casc_priv->cascade_map & priv->idle;
+		if (map == casc_priv->cascade_map) {
+			num = casc_priv->timer_num;
+			priv->timer[num].cascade_handle = casc_priv;
+
+			/* set timer busy */
+			priv->idle &= ~casc_priv->cascade_map;
+			spin_unlock_irqrestore(&priv->lock, flags);
+			return &priv->timer[num];
+		}
+		spin_unlock_irqrestore(&priv->lock, flags);
+		casc_priv++;
+	}
+
+	return NULL;
+}
+
+static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks,
+		unsigned int num)
+{
+	struct cascade_priv *casc_priv;
+	u32 tcr;
+	u32 tmp_ticks;
+	u32 rem_ticks;
+
+	/* set group tcr reg for cascade */
+	casc_priv = priv->timer[num].cascade_handle;
+	if (!casc_priv)
+		return -EINVAL;
+
+	tcr = casc_priv->tcr_value |
+		(casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET);
+	setbits32(priv->group_tcr, tcr);
+
+	tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks);
+
+	out_be32(&priv->regs[num].gtccr, 0);
+	out_be32(&priv->regs[num].gtbcr, tmp_ticks | TIMER_STOP);
+
+	out_be32(&priv->regs[num - 1].gtccr, 0);
+	out_be32(&priv->regs[num - 1].gtbcr, rem_ticks);
+
+	return 0;
+}
+
+static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
+					u64 ticks)
+{
+	struct mpic_timer *allocated_timer;
+
+	/* Two cascade timers: Support the maximum time */
+	const u64 max_ticks = (u64)MAX_TICKS * (u64)MAX_TICKS_CASCADE;
+	int ret;
+
+	if (ticks > max_ticks)
+		return NULL;
+
+	/* detect idle timer */
+	allocated_timer = detect_idle_cascade_timer(priv);
+	if (!allocated_timer)
+		return NULL;
+
+	/* set ticks to timer */
+	ret = set_cascade_timer(priv, ticks, allocated_timer->num);
+	if (ret < 0)
+		return NULL;
+
+	return allocated_timer;
+}
+
+static struct mpic_timer *get_timer(time64_t time)
+{
+	struct timer_group_priv *priv;
+	struct mpic_timer *timer;
+
+	u64 ticks;
+	unsigned int num;
+	unsigned int i;
+	unsigned long flags;
+	int ret;
+
+	list_for_each_entry(priv, &timer_group_list, node) {
+		ret = convert_time_to_ticks(priv, time, &ticks);
+		if (ret < 0)
+			return NULL;
+
+		if (ticks > MAX_TICKS) {
+			if (!(priv->flags & FSL_GLOBAL_TIMER))
+				return NULL;
+
+			timer = get_cascade_timer(priv, ticks);
+			if (!timer)
+				continue;
+
+			return timer;
+		}
+
+		for (i = 0; i < TIMERS_PER_GROUP; i++) {
+			/* one timer: Reverse allocation */
+			num = TIMERS_PER_GROUP - 1 - i;
+			spin_lock_irqsave(&priv->lock, flags);
+			if (priv->idle & (1 << i)) {
+				/* set timer busy */
+				priv->idle &= ~(1 << i);
+				/* set ticks & stop timer */
+				out_be32(&priv->regs[num].gtbcr,
+					ticks | TIMER_STOP);
+				out_be32(&priv->regs[num].gtccr, 0);
+				priv->timer[num].cascade_handle = NULL;
+				spin_unlock_irqrestore(&priv->lock, flags);
+				return &priv->timer[num];
+			}
+			spin_unlock_irqrestore(&priv->lock, flags);
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * mpic_start_timer - start hardware timer
+ * @handle: the timer to be started.
+ *
+ * It will do ->fn(->dev) callback from the hardware interrupt at
+ * the 'time64_t' point in the future.
+ */
+void mpic_start_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+
+	clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
+}
+EXPORT_SYMBOL(mpic_start_timer);
+
+/**
+ * mpic_stop_timer - stop hardware timer
+ * @handle: the timer to be stopped
+ *
+ * The timer periodically generates an interrupt. Unless user stops the timer.
+ */
+void mpic_stop_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+	struct cascade_priv *casc_priv;
+
+	setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+	if (casc_priv) {
+		out_be32(&priv->regs[handle->num].gtccr, 0);
+		out_be32(&priv->regs[handle->num - 1].gtccr, 0);
+	} else {
+		out_be32(&priv->regs[handle->num].gtccr, 0);
+	}
+}
+EXPORT_SYMBOL(mpic_stop_timer);
+
+/**
+ * mpic_get_remain_time - get timer time
+ * @handle: the timer to be selected.
+ * @time: time for timer
+ *
+ * Query timer remaining time.
+ */
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+	struct cascade_priv *casc_priv;
+
+	u64 ticks;
+	u32 tmp_ticks;
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+	if (casc_priv) {
+		tmp_ticks = in_be32(&priv->regs[handle->num].gtccr);
+		tmp_ticks &= ~GTCCR_TOG;
+		ticks = ((u64)tmp_ticks & UINT_MAX) * (u64)MAX_TICKS_CASCADE;
+		tmp_ticks = in_be32(&priv->regs[handle->num - 1].gtccr);
+		ticks += tmp_ticks;
+	} else {
+		ticks = in_be32(&priv->regs[handle->num].gtccr);
+		ticks &= ~GTCCR_TOG;
+	}
+
+	convert_ticks_to_time(priv, ticks, time);
+}
+EXPORT_SYMBOL(mpic_get_remain_time);
+
+/**
+ * mpic_free_timer - free hardware timer
+ * @handle: the timer to be removed.
+ *
+ * Free the timer.
+ *
+ * Note: can not be used in interrupt context.
+ */
+void mpic_free_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+
+	struct cascade_priv *casc_priv;
+	unsigned long flags;
+
+	mpic_stop_timer(handle);
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+
+	free_irq(priv->timer[handle->num].irq, priv->timer[handle->num].dev);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (casc_priv) {
+		u32 tcr;
+		tcr = casc_priv->tcr_value | (casc_priv->tcr_value <<
+					MPIC_TIMER_TCR_ROVR_OFFSET);
+		clrbits32(priv->group_tcr, tcr);
+		priv->idle |= casc_priv->cascade_map;
+		priv->timer[handle->num].cascade_handle = NULL;
+	} else {
+		priv->idle |= TIMER_OFFSET(handle->num);
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+EXPORT_SYMBOL(mpic_free_timer);
+
+/**
+ * mpic_request_timer - get a hardware timer
+ * @fn: interrupt handler function
+ * @dev: callback function of the data
+ * @time: time for timer
+ *
+ * This executes the "request_irq", returning NULL
+ * else "handle" on success.
+ */
+struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
+				      time64_t time)
+{
+	struct mpic_timer *allocated_timer;
+	int ret;
+
+	if (list_empty(&timer_group_list))
+		return NULL;
+
+	if (time < 0)
+		return NULL;
+
+	allocated_timer = get_timer(time);
+	if (!allocated_timer)
+		return NULL;
+
+	ret = request_irq(allocated_timer->irq, fn,
+			IRQF_TRIGGER_LOW, "global-timer", dev);
+	if (ret) {
+		mpic_free_timer(allocated_timer);
+		return NULL;
+	}
+
+	allocated_timer->dev = dev;
+
+	return allocated_timer;
+}
+EXPORT_SYMBOL(mpic_request_timer);
+
+static int __init timer_group_get_freq(struct device_node *np,
+			struct timer_group_priv *priv)
+{
+	u32 div;
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		struct device_node *dn;
+
+		dn = of_find_compatible_node(NULL, NULL, "fsl,mpic");
+		if (dn) {
+			of_property_read_u32(dn, "clock-frequency",
+					&priv->timerfreq);
+			of_node_put(dn);
+		}
+	}
+
+	if (priv->timerfreq <= 0)
+		return -EINVAL;
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		div = (1 << (MPIC_TIMER_TCR_CLKDIV >> 8)) * 8;
+		priv->timerfreq /= div;
+	}
+
+	return 0;
+}
+
+static int __init timer_group_get_irq(struct device_node *np,
+		struct timer_group_priv *priv)
+{
+	const u32 all_timer[] = { 0, TIMERS_PER_GROUP };
+	const u32 *p;
+	u32 offset;
+	u32 count;
+
+	unsigned int i;
+	unsigned int j;
+	unsigned int irq_index = 0;
+	unsigned int irq;
+	int len;
+
+	p = of_get_property(np, "fsl,available-ranges", &len);
+	if (p && len % (2 * sizeof(u32)) != 0) {
+		pr_err("%pOF: malformed available-ranges property.\n", np);
+		return -EINVAL;
+	}
+
+	if (!p) {
+		p = all_timer;
+		len = sizeof(all_timer);
+	}
+
+	len /= 2 * sizeof(u32);
+
+	for (i = 0; i < len; i++) {
+		offset = p[i * 2];
+		count = p[i * 2 + 1];
+		for (j = 0; j < count; j++) {
+			irq = irq_of_parse_and_map(np, irq_index);
+			if (!irq) {
+				pr_err("%pOF: irq parse and map failed.\n", np);
+				return -EINVAL;
+			}
+
+			/* Set timer idle */
+			priv->idle |= TIMER_OFFSET((offset + j));
+			priv->timer[offset + j].irq = irq;
+			priv->timer[offset + j].num = offset + j;
+			irq_index++;
+		}
+	}
+
+	return 0;
+}
+
+static void __init timer_group_init(struct device_node *np)
+{
+	struct timer_group_priv *priv;
+	unsigned int i = 0;
+	int ret;
+
+	priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL);
+	if (!priv) {
+		pr_err("%pOF: cannot allocate memory for group.\n", np);
+		return;
+	}
+
+	if (of_device_is_compatible(np, "fsl,mpic-global-timer"))
+		priv->flags |= FSL_GLOBAL_TIMER;
+
+	priv->regs = of_iomap(np, i++);
+	if (!priv->regs) {
+		pr_err("%pOF: cannot ioremap timer register address.\n", np);
+		goto out;
+	}
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		priv->group_tcr = of_iomap(np, i++);
+		if (!priv->group_tcr) {
+			pr_err("%pOF: cannot ioremap tcr address.\n", np);
+			goto out;
+		}
+	}
+
+	ret = timer_group_get_freq(np, priv);
+	if (ret < 0) {
+		pr_err("%pOF: cannot get timer frequency.\n", np);
+		goto out;
+	}
+
+	ret = timer_group_get_irq(np, priv);
+	if (ret < 0) {
+		pr_err("%pOF: cannot get timer irqs.\n", np);
+		goto out;
+	}
+
+	spin_lock_init(&priv->lock);
+
+	/* Init FSL timer hardware */
+	if (priv->flags & FSL_GLOBAL_TIMER)
+		setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
+
+	list_add_tail(&priv->node, &timer_group_list);
+
+	return;
+
+out:
+	if (priv->regs)
+		iounmap(priv->regs);
+
+	if (priv->group_tcr)
+		iounmap(priv->group_tcr);
+
+	kfree(priv);
+}
+
+static void mpic_timer_resume(void)
+{
+	struct timer_group_priv *priv;
+
+	list_for_each_entry(priv, &timer_group_list, node) {
+		/* Init FSL timer hardware */
+		if (priv->flags & FSL_GLOBAL_TIMER)
+			setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
+	}
+}
+
+static const struct of_device_id mpic_timer_ids[] = {
+	{ .compatible = "fsl,mpic-global-timer", },
+	{},
+};
+
+static struct syscore_ops mpic_timer_syscore_ops = {
+	.resume = mpic_timer_resume,
+};
+
+static int __init mpic_timer_init(void)
+{
+	struct device_node *np = NULL;
+
+	for_each_matching_node(np, mpic_timer_ids)
+		timer_group_init(np);
+
+	register_syscore_ops(&mpic_timer_syscore_ops);
+
+	if (list_empty(&timer_group_list))
+		return -ENODEV;
+
+	return 0;
+}
+subsys_initcall(mpic_timer_init);
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
new file mode 100644
index 0000000000..492cb03c0b
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include "mpic.h"
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+static void mpic_u3msi_mask_irq(struct irq_data *data)
+{
+	pci_msi_mask_irq(data);
+	mpic_mask_irq(data);
+}
+
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
+{
+	mpic_unmask_irq(data);
+	pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_u3msi_chip = {
+	.irq_shutdown		= mpic_u3msi_mask_irq,
+	.irq_mask		= mpic_u3msi_mask_irq,
+	.irq_unmask		= mpic_u3msi_unmask_irq,
+	.irq_eoi		= mpic_end_irq,
+	.irq_set_type		= mpic_set_irq_type,
+	.irq_set_affinity	= mpic_set_affinity,
+	.name			= "MPIC-U3MSI",
+};
+
+static u64 read_ht_magic_addr(struct pci_dev *pdev, unsigned int pos)
+{
+	u8 flags;
+	u32 tmp;
+	u64 addr;
+
+	pci_read_config_byte(pdev, pos + HT_MSI_FLAGS, &flags);
+
+	if (flags & HT_MSI_FLAGS_FIXED)
+		return HT_MSI_FIXED_ADDR;
+
+	pci_read_config_dword(pdev, pos + HT_MSI_ADDR_LO, &tmp);
+	addr = tmp & HT_MSI_ADDR_LO_MASK;
+	pci_read_config_dword(pdev, pos + HT_MSI_ADDR_HI, &tmp);
+	addr = addr | ((u64)tmp << 32);
+
+	return addr;
+}
+
+static u64 find_ht_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
+{
+	struct pci_bus *bus;
+	unsigned int pos;
+
+	for (bus = pdev->bus; bus && bus->self; bus = bus->parent) {
+		pos = pci_find_ht_capability(bus->self, HT_CAPTYPE_MSI_MAPPING);
+		if (pos)
+			return read_ht_magic_addr(bus->self, pos);
+	}
+
+	return 0;
+}
+
+static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	/* U4 PCIe MSIs need to write to the special register in
+	 * the bridge that generates interrupts. There should be
+	 * theoretically a register at 0xf8005000 where you just write
+	 * the MSI number and that triggers the right interrupt, but
+	 * unfortunately, this is busted in HW, the bridge endian swaps
+	 * the value and hits the wrong nibble in the register.
+	 *
+	 * So instead we use another register set which is used normally
+	 * for converting HT interrupts to MPIC interrupts, which decodes
+	 * the interrupt number as part of the low address bits
+	 *
+	 * This will not work if we ever use more than one legacy MSI in
+	 * a block but we never do. For one MSI or multiple MSI-X where
+	 * each interrupt address can be specified separately, it works
+	 * just fine.
+	 */
+	if (of_device_is_compatible(hose->dn, "u4-pcie") ||
+	    of_device_is_compatible(hose->dn, "U4-pcie"))
+		return 0xf8004000 | (hwirq << 4);
+
+	return 0;
+}
+
+static void u3msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	irq_hw_number_t hwirq;
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		hwirq = virq_to_hw(entry->irq);
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+		msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
+	}
+}
+
+static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	unsigned int virq;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	u64 addr;
+	int hwirq;
+
+	if (type == PCI_CAP_ID_MSIX)
+		pr_debug("u3msi: MSI-X untested, trying anyway.\n");
+
+	/* If we can't find a magic address then MSI ain't gonna work */
+	if (find_ht_magic_addr(pdev, 0) == 0 &&
+	    find_u4_magic_addr(pdev, 0) == 0) {
+		pr_debug("u3msi: no magic address found for %s\n",
+			 pci_name(pdev));
+		return -ENXIO;
+	}
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, 1);
+		if (hwirq < 0) {
+			pr_debug("u3msi: failed allocating hwirq\n");
+			return hwirq;
+		}
+
+		addr = find_ht_magic_addr(pdev, hwirq);
+		if (addr == 0)
+			addr = find_u4_magic_addr(pdev, hwirq);
+		msg.address_lo = addr & 0xFFFFFFFF;
+		msg.address_hi = addr >> 32;
+
+		virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+		if (!virq) {
+			pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq);
+			msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
+			return -ENOSPC;
+		}
+
+		irq_set_msi_desc(virq, entry);
+		irq_set_chip(virq, &mpic_u3msi_chip);
+		irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+		pr_debug("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
+			  virq, hwirq, (unsigned long)addr);
+
+		printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
+			  virq, hwirq, (unsigned long)addr);
+		msg.data = hwirq;
+		pci_write_msi_msg(virq, &msg);
+
+		hwirq++;
+	}
+
+	return 0;
+}
+
+int __init mpic_u3msi_init(struct mpic *mpic)
+{
+	int rc;
+	struct pci_controller *phb;
+
+	rc = mpic_msi_init_allocator(mpic);
+	if (rc) {
+		pr_debug("u3msi: Error allocating bitmap!\n");
+		return rc;
+	}
+
+	pr_debug("u3msi: Registering MPIC U3 MSI callbacks.\n");
+
+	BUG_ON(msi_mpic);
+	msi_mpic = mpic;
+
+	list_for_each_entry(phb, &hose_list, list_node) {
+		WARN_ON(phb->controller_ops.setup_msi_irqs);
+		phb->controller_ops.setup_msi_irqs = u3msi_setup_msi_irqs;
+		phb->controller_ops.teardown_msi_irqs = u3msi_teardown_msi_irqs;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
new file mode 100644
index 0000000000..0b6e37f3ff
--- /dev/null
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006-2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <asm/msi_bitmap.h>
+#include <asm/setup.h>
+
+int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num)
+{
+	unsigned long flags;
+	int offset, order = get_count_order(num);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+
+	offset = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, 0,
+					    num, (1 << order) - 1);
+	if (offset > bmp->irq_count)
+		goto err;
+
+	bitmap_set(bmp->bitmap, offset, num);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+
+	pr_debug("msi_bitmap: allocated 0x%x at offset 0x%x\n", num, offset);
+
+	return offset;
+err:
+	spin_unlock_irqrestore(&bmp->lock, flags);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs);
+
+void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset,
+			    unsigned int num)
+{
+	unsigned long flags;
+
+	pr_debug("msi_bitmap: freeing 0x%x at offset 0x%x\n",
+		 num, offset);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+	bitmap_clear(bmp->bitmap, offset, num);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+}
+EXPORT_SYMBOL(msi_bitmap_free_hwirqs);
+
+void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq)
+{
+	unsigned long flags;
+
+	pr_debug("msi_bitmap: reserving hwirq 0x%x\n", hwirq);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+	bitmap_allocate_region(bmp->bitmap, hwirq, 0);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+}
+
+/**
+ * msi_bitmap_reserve_dt_hwirqs - Reserve irqs specified in the device tree.
+ * @bmp: pointer to the MSI bitmap.
+ *
+ * Looks in the device tree to see if there is a property specifying which
+ * irqs can be used for MSI. If found those irqs reserved in the device tree
+ * are reserved in the bitmap.
+ *
+ * Returns 0 for success, < 0 if there was an error, and > 0 if no property
+ * was found in the device tree.
+ **/
+int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp)
+{
+	int i, j, len;
+	const u32 *p;
+
+	if (!bmp->of_node)
+		return 1;
+
+	p = of_get_property(bmp->of_node, "msi-available-ranges", &len);
+	if (!p) {
+		pr_debug("msi_bitmap: no msi-available-ranges property " \
+			 "found on %pOF\n", bmp->of_node);
+		return 1;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		printk(KERN_WARNING "msi_bitmap: Malformed msi-available-ranges"
+		       " property on %pOF\n", bmp->of_node);
+		return -EINVAL;
+	}
+
+	bitmap_allocate_region(bmp->bitmap, 0, get_count_order(bmp->irq_count));
+
+	spin_lock(&bmp->lock);
+
+	/* Format is: (<u32 start> <u32 count>)+ */
+	len /= 2 * sizeof(u32);
+	for (i = 0; i < len; i++, p += 2) {
+		for (j = 0; j < *(p + 1); j++)
+			bitmap_release_region(bmp->bitmap, *p + j, 0);
+	}
+
+	spin_unlock(&bmp->lock);
+
+	return 0;
+}
+
+int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
+		     struct device_node *of_node)
+{
+	int size;
+
+	if (!irq_count)
+		return -EINVAL;
+
+	size = BITS_TO_LONGS(irq_count) * sizeof(long);
+	pr_debug("msi_bitmap: allocator bitmap size is 0x%x bytes\n", size);
+
+	bmp->bitmap_from_slab = slab_is_available();
+	if (bmp->bitmap_from_slab)
+		bmp->bitmap = kzalloc(size, GFP_KERNEL);
+	else {
+		bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES);
+		if (!bmp->bitmap)
+			panic("%s: Failed to allocate %u bytes\n", __func__,
+			      size);
+		/* the bitmap won't be freed from memblock allocator */
+		kmemleak_not_leak(bmp->bitmap);
+	}
+
+	if (!bmp->bitmap) {
+		pr_debug("msi_bitmap: ENOMEM allocating allocator bitmap!\n");
+		return -ENOMEM;
+	}
+
+	/* We zalloc'ed the bitmap, so all irqs are free by default */
+	spin_lock_init(&bmp->lock);
+	bmp->of_node = of_node_get(of_node);
+	bmp->irq_count = irq_count;
+
+	return 0;
+}
+
+void msi_bitmap_free(struct msi_bitmap *bmp)
+{
+	if (bmp->bitmap_from_slab)
+		kfree(bmp->bitmap);
+	of_node_put(bmp->of_node);
+	bmp->bitmap = NULL;
+}
+
+#ifdef CONFIG_MSI_BITMAP_SELFTEST
+
+static void __init test_basics(void)
+{
+	struct msi_bitmap bmp;
+	int rc, i, size = 512;
+
+	/* Can't allocate a bitmap of 0 irqs */
+	WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0);
+
+	/* of_node may be NULL */
+	WARN_ON(msi_bitmap_alloc(&bmp, size, NULL));
+
+	/* Should all be free by default */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
+
+	/* With no node, there's no msi-available-ranges, so expect > 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
+
+	/* Should all still be free */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
+
+	/* Check we can fill it up and then no more */
+	for (i = 0; i < size; i++)
+		WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
+
+	WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
+
+	/* Should all be allocated */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0);
+
+	/* And if we free one we can then allocate another */
+	msi_bitmap_free_hwirqs(&bmp, size / 2, 1);
+	WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2);
+
+	/* Free most of them for the alignment tests */
+	msi_bitmap_free_hwirqs(&bmp, 3, size - 3);
+
+	/* Check we get a naturally aligned offset */
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 2);
+	WARN_ON(rc < 0 && rc % 2 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 4);
+	WARN_ON(rc < 0 && rc % 4 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 8);
+	WARN_ON(rc < 0 && rc % 8 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 9);
+	WARN_ON(rc < 0 && rc % 16 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 3);
+	WARN_ON(rc < 0 && rc % 4 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 7);
+	WARN_ON(rc < 0 && rc % 8 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 121);
+	WARN_ON(rc < 0 && rc % 128 != 0);
+
+	msi_bitmap_free(&bmp);
+
+	/* Clients may WARN_ON bitmap == NULL for "not-allocated" */
+	WARN_ON(bmp.bitmap != NULL);
+}
+
+static void __init test_of_node(void)
+{
+	u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 };
+	const char *expected_str = "0-9,20-24,28-39,41-99,220-255";
+	char *prop_name = "msi-available-ranges";
+	char *node_name = "/fakenode";
+	struct device_node of_node;
+	struct property prop;
+	struct msi_bitmap bmp;
+#define SIZE_EXPECTED 256
+	DECLARE_BITMAP(expected, SIZE_EXPECTED);
+
+	/* There should really be a struct device_node allocator */
+	memset(&of_node, 0, sizeof(of_node));
+	of_node_init(&of_node);
+	of_node.full_name = node_name;
+
+	WARN_ON(msi_bitmap_alloc(&bmp, SIZE_EXPECTED, &of_node));
+
+	/* No msi-available-ranges, so expect > 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
+
+	/* Should all still be free */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, SIZE_EXPECTED,
+					get_count_order(SIZE_EXPECTED)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(SIZE_EXPECTED));
+
+	/* Now create a fake msi-available-ranges property */
+
+	/* There should really .. oh whatever */
+	memset(&prop, 0, sizeof(prop));
+	prop.name = prop_name;
+	prop.value = &prop_data;
+	prop.length = sizeof(prop_data);
+
+	of_node.properties = &prop;
+
+	/* msi-available-ranges, so expect == 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
+
+	/* Check we got the expected result */
+	WARN_ON(bitmap_parselist(expected_str, expected, SIZE_EXPECTED));
+	WARN_ON(!bitmap_equal(expected, bmp.bitmap, SIZE_EXPECTED));
+
+	msi_bitmap_free(&bmp);
+	kfree(bmp.bitmap);
+}
+
+static int __init msi_bitmap_selftest(void)
+{
+	printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n");
+
+	test_basics();
+	test_of_node();
+
+	return 0;
+}
+late_initcall(msi_bitmap_selftest);
+#endif /* CONFIG_MSI_BITMAP_SELFTEST */
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
new file mode 100644
index 0000000000..2211937d37
--- /dev/null
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Instantiate mmio-mapped RTC chips based on device tree information
+ *
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/prom.h>
+
+static __initdata struct {
+	const char *compatible;
+	char *plat_name;
+} of_rtc_table[] = {
+	{ "ds1743-nvram", "rtc-ds1742" },
+};
+
+void __init of_instantiate_rtc(void)
+{
+	struct device_node *node;
+	int err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_rtc_table); i++) {
+		char *plat_name = of_rtc_table[i].plat_name;
+
+		for_each_compatible_node(node, NULL,
+					 of_rtc_table[i].compatible) {
+			struct resource *res;
+
+			res = kmalloc(sizeof(*res), GFP_KERNEL);
+			if (!res) {
+				printk(KERN_ERR "OF RTC: Out of memory "
+				       "allocating resource structure for %pOF\n",
+				       node);
+				continue;
+			}
+
+			err = of_address_to_resource(node, 0, res);
+			if (err) {
+				printk(KERN_ERR "OF RTC: Error "
+				       "translating resources for %pOF\n",
+				       node);
+				continue;
+			}
+
+			printk(KERN_INFO "OF_RTC: %pOF is a %s @ 0x%llx-0x%llx\n",
+			       node, plat_name,
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end);
+			platform_device_register_simple(plat_name, -1, res, 1);
+		}
+	}
+}
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
new file mode 100644
index 0000000000..fcf8d15162
--- /dev/null
+++ b/arch/powerpc/sysdev/pmi.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pmi driver
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * PMI (Platform Management Interrupt) is a way to communicate
+ * with the BMC (Baseboard Management Controller) via interrupts.
+ * Unlike IPMI it is bidirectional and has a low latency.
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/workqueue.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/pmi.h>
+
+struct pmi_data {
+	struct list_head	handler;
+	spinlock_t		handler_spinlock;
+	spinlock_t		pmi_spinlock;
+	struct mutex		msg_mutex;
+	pmi_message_t		msg;
+	struct completion	*completion;
+	struct platform_device	*dev;
+	int			irq;
+	u8 __iomem		*pmi_reg;
+	struct work_struct	work;
+};
+
+static struct pmi_data *data;
+
+static irqreturn_t pmi_irq_handler(int irq, void *dev_id)
+{
+	u8 type;
+	int rc;
+
+	spin_lock(&data->pmi_spinlock);
+
+	type = ioread8(data->pmi_reg + PMI_READ_TYPE);
+	pr_debug("pmi: got message of type %d\n", type);
+
+	if (type & PMI_ACK && !data->completion) {
+		printk(KERN_WARNING "pmi: got unexpected ACK message.\n");
+		rc = -EIO;
+		goto unlock;
+	}
+
+	if (data->completion && !(type & PMI_ACK)) {
+		printk(KERN_WARNING "pmi: expected ACK, but got %d\n", type);
+		rc = -EIO;
+		goto unlock;
+	}
+
+	data->msg.type = type;
+	data->msg.data0 = ioread8(data->pmi_reg + PMI_READ_DATA0);
+	data->msg.data1 = ioread8(data->pmi_reg + PMI_READ_DATA1);
+	data->msg.data2 = ioread8(data->pmi_reg + PMI_READ_DATA2);
+	rc = 0;
+unlock:
+	spin_unlock(&data->pmi_spinlock);
+
+	if (rc == -EIO) {
+		rc = IRQ_HANDLED;
+		goto out;
+	}
+
+	if (data->msg.type & PMI_ACK) {
+		complete(data->completion);
+		rc = IRQ_HANDLED;
+		goto out;
+	}
+
+	schedule_work(&data->work);
+
+	rc = IRQ_HANDLED;
+out:
+	return rc;
+}
+
+
+static const struct of_device_id pmi_match[] = {
+	{ .type = "ibm,pmi", .name = "ibm,pmi" },
+	{ .type = "ibm,pmi" },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, pmi_match);
+
+static void pmi_notify_handlers(struct work_struct *work)
+{
+	struct pmi_handler *handler;
+
+	spin_lock(&data->handler_spinlock);
+	list_for_each_entry(handler, &data->handler, node) {
+		pr_debug("pmi: notifying handler %p\n", handler);
+		if (handler->type == data->msg.type)
+			handler->handle_pmi_message(data->msg);
+	}
+	spin_unlock(&data->handler_spinlock);
+}
+
+static int pmi_of_probe(struct platform_device *dev)
+{
+	struct device_node *np = dev->dev.of_node;
+	int rc;
+
+	if (data) {
+		printk(KERN_ERR "pmi: driver has already been initialized.\n");
+		rc = -EBUSY;
+		goto out;
+	}
+
+	data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR "pmi: could not allocate memory.\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	data->pmi_reg = of_iomap(np, 0);
+	if (!data->pmi_reg) {
+		printk(KERN_ERR "pmi: invalid register address.\n");
+		rc = -EFAULT;
+		goto error_cleanup_data;
+	}
+
+	INIT_LIST_HEAD(&data->handler);
+
+	mutex_init(&data->msg_mutex);
+	spin_lock_init(&data->pmi_spinlock);
+	spin_lock_init(&data->handler_spinlock);
+
+	INIT_WORK(&data->work, pmi_notify_handlers);
+
+	data->dev = dev;
+
+	data->irq = irq_of_parse_and_map(np, 0);
+	if (!data->irq) {
+		printk(KERN_ERR "pmi: invalid interrupt.\n");
+		rc = -EFAULT;
+		goto error_cleanup_iomap;
+	}
+
+	rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
+	if (rc) {
+		printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
+				data->irq, rc);
+		goto error_cleanup_iomap;
+	}
+
+	printk(KERN_INFO "pmi: found pmi device at addr %p.\n", data->pmi_reg);
+
+	goto out;
+
+error_cleanup_iomap:
+	iounmap(data->pmi_reg);
+
+error_cleanup_data:
+	kfree(data);
+
+out:
+	return rc;
+}
+
+static int pmi_of_remove(struct platform_device *dev)
+{
+	struct pmi_handler *handler, *tmp;
+
+	free_irq(data->irq, NULL);
+	iounmap(data->pmi_reg);
+
+	spin_lock(&data->handler_spinlock);
+
+	list_for_each_entry_safe(handler, tmp, &data->handler, node)
+		list_del(&handler->node);
+
+	spin_unlock(&data->handler_spinlock);
+
+	kfree(data);
+	data = NULL;
+
+	return 0;
+}
+
+static struct platform_driver pmi_of_platform_driver = {
+	.probe		= pmi_of_probe,
+	.remove		= pmi_of_remove,
+	.driver = {
+		.name = "pmi",
+		.of_match_table = pmi_match,
+	},
+};
+module_platform_driver(pmi_of_platform_driver);
+
+int pmi_send_message(pmi_message_t msg)
+{
+	unsigned long flags;
+	DECLARE_COMPLETION_ONSTACK(completion);
+
+	if (!data)
+		return -ENODEV;
+
+	mutex_lock(&data->msg_mutex);
+
+	data->msg = msg;
+	pr_debug("pmi_send_message: msg is %08x\n", *(u32*)&msg);
+
+	data->completion = &completion;
+
+	spin_lock_irqsave(&data->pmi_spinlock, flags);
+	iowrite8(msg.data0, data->pmi_reg + PMI_WRITE_DATA0);
+	iowrite8(msg.data1, data->pmi_reg + PMI_WRITE_DATA1);
+	iowrite8(msg.data2, data->pmi_reg + PMI_WRITE_DATA2);
+	iowrite8(msg.type, data->pmi_reg + PMI_WRITE_TYPE);
+	spin_unlock_irqrestore(&data->pmi_spinlock, flags);
+
+	pr_debug("pmi_send_message: wait for completion\n");
+
+	wait_for_completion_interruptible_timeout(data->completion,
+						  PMI_TIMEOUT);
+
+	data->completion = NULL;
+
+	mutex_unlock(&data->msg_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmi_send_message);
+
+int pmi_register_handler(struct pmi_handler *handler)
+{
+	if (!data)
+		return -ENODEV;
+
+	spin_lock(&data->handler_spinlock);
+	list_add_tail(&handler->node, &data->handler);
+	spin_unlock(&data->handler_spinlock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmi_register_handler);
+
+void pmi_unregister_handler(struct pmi_handler *handler)
+{
+	if (!data)
+		return;
+
+	pr_debug("pmi: unregistering handler %p\n", handler);
+
+	spin_lock(&data->handler_spinlock);
+	list_del(&handler->node);
+	spin_unlock(&data->handler_spinlock);
+}
+EXPORT_SYMBOL_GPL(pmi_unregister_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+MODULE_DESCRIPTION("IBM Platform Management Interrupt driver");
diff --git a/arch/powerpc/sysdev/rtc_cmos_setup.c b/arch/powerpc/sysdev/rtc_cmos_setup.c
new file mode 100644
index 0000000000..47cc87bd6a
--- /dev/null
+++ b/arch/powerpc/sysdev/rtc_cmos_setup.c
@@ -0,0 +1,69 @@
+/*
+ * Setup code for PC-style Real-Time Clock.
+ *
+ * Author: Wade Farnsworth <wfarnsworth@mvista.com>
+ *
+ * 2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mc146818rtc.h>
+#include <linux/of_address.h>
+
+
+static int  __init add_rtc(void)
+{
+	struct device_node *np;
+	struct platform_device *pd;
+	struct resource res[2];
+	unsigned int num_res = 1;
+	int ret;
+
+	memset(&res, 0, sizeof(res));
+
+	np = of_find_compatible_node(NULL, NULL, "pnpPNP,b00");
+	if (!np)
+		return -ENODEV;
+
+	ret = of_address_to_resource(np, 0, &res[0]);
+	of_node_put(np);
+	if (ret)
+		return ret;
+
+	/*
+	 * RTC_PORT(x) is hardcoded in asm/mc146818rtc.h.  Verify that the
+	 * address provided by the device node matches.
+	 */
+	if (res[0].start != RTC_PORT(0))
+		return -EINVAL;
+
+	np = of_find_compatible_node(NULL, NULL, "chrp,iic");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL, "pnpPNP,000");
+	if (np) {
+		of_node_put(np);
+		/*
+		 * Use a fixed interrupt value of 8 since on PPC if we are
+		 * using this its off an i8259 which we ensure has interrupt
+		 * numbers 0..15.
+		 */
+		res[1].start = 8;
+		res[1].end = 8;
+		res[1].flags = IORESOURCE_IRQ;
+		num_res++;
+	}
+
+	pd = platform_device_register_simple("rtc_cmos", -1,
+					     &res[0], num_res);
+
+	return PTR_ERR_OR_ZERO(pd);
+}
+fs_initcall(add_rtc);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
new file mode 100644
index 0000000000..db520c40cb
--- /dev/null
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * tsi108/109 device setup code
+ *
+ * Maintained by Roy Zang < tie-fei.zang@freescale.com >
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_net.h>
+#include <asm/tsi108.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+static phys_addr_t tsi108_csr_base = -1;
+
+phys_addr_t get_csrbase(void)
+{
+	struct device_node *tsi;
+
+	if (tsi108_csr_base != -1)
+		return tsi108_csr_base;
+
+	tsi = of_find_node_by_type(NULL, "tsi-bridge");
+	if (tsi) {
+		struct resource res;
+		of_address_to_resource(tsi, 0, &res);
+		tsi108_csr_base = res.start;
+		of_node_put(tsi);
+	}
+	return tsi108_csr_base;
+}
+EXPORT_SYMBOL(get_csrbase);
+
+u32 get_vir_csrbase(void)
+{
+	return (u32) (ioremap(get_csrbase(), 0x10000));
+}
+EXPORT_SYMBOL(get_vir_csrbase);
+
+static int __init tsi108_eth_of_init(void)
+{
+	struct device_node *np;
+	unsigned int i = 0;
+	struct platform_device *tsi_eth_dev;
+	struct resource res;
+	int ret;
+
+	for_each_compatible_node(np, "network", "tsi108-ethernet") {
+		struct resource r[2];
+		struct device_node *phy, *mdio;
+		hw_info tsi_eth_data;
+		const unsigned int *phy_id;
+		const phandle *ph;
+
+		memset(r, 0, sizeof(r));
+		memset(&tsi_eth_data, 0, sizeof(tsi_eth_data));
+
+		ret = of_address_to_resource(np, 0, &r[0]);
+		DBG("%s: name:start->end = %s:%pR\n",
+		    __func__, r[0].name, &r[0]);
+		if (ret)
+			goto err;
+
+		r[1].name = "tx";
+		r[1].start = irq_of_parse_and_map(np, 0);
+		r[1].end = irq_of_parse_and_map(np, 0);
+		r[1].flags = IORESOURCE_IRQ;
+		DBG("%s: name:start->end = %s:%pR\n",
+			__func__, r[1].name, &r[1]);
+
+		tsi_eth_dev =
+		    platform_device_register_simple("tsi-ethernet", i++, &r[0],
+						    1);
+
+		if (IS_ERR(tsi_eth_dev)) {
+			ret = PTR_ERR(tsi_eth_dev);
+			goto err;
+		}
+
+		of_get_mac_address(np, tsi_eth_data.mac_addr);
+
+		ph = of_get_property(np, "mdio-handle", NULL);
+		mdio = of_find_node_by_phandle(*ph);
+		ret = of_address_to_resource(mdio, 0, &res);
+		of_node_put(mdio);
+		if (ret)
+			goto unreg;
+
+		ph = of_get_property(np, "phy-handle", NULL);
+		phy = of_find_node_by_phandle(*ph);
+
+		if (phy == NULL) {
+			ret = -ENODEV;
+			goto unreg;
+		}
+
+		phy_id = of_get_property(phy, "reg", NULL);
+
+		tsi_eth_data.regs = r[0].start;
+		tsi_eth_data.phyregs = res.start;
+		tsi_eth_data.phy = *phy_id;
+		tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0);
+
+		/* Some boards with the TSI108 bridge (e.g. Holly)
+		 * have a miswiring of the ethernet PHYs which
+		 * requires a workaround.  The special
+		 * "txc-rxc-delay-disable" property enables this
+		 * workaround.  FIXME: Need to port the tsi108_eth
+		 * driver itself to phylib and use a non-misleading
+		 * name for the workaround flag - it's not actually to
+		 * do with the model of PHY in use */
+		if (of_property_read_bool(phy, "txc-rxc-delay-disable"))
+			tsi_eth_data.phy_type = TSI108_PHY_BCM54XX;
+		of_node_put(phy);
+
+		ret =
+		    platform_device_add_data(tsi_eth_dev, &tsi_eth_data,
+					     sizeof(hw_info));
+		if (ret)
+			goto unreg;
+	}
+	return 0;
+unreg:
+	platform_device_unregister(tsi_eth_dev);
+err:
+	of_node_put(np);
+	return ret;
+}
+
+arch_initcall(tsi108_eth_of_init);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
new file mode 100644
index 0000000000..0e42f7bad7
--- /dev/null
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common routines for Tundra Semiconductor TSI108 host bridge.
+ *
+ * 2004-2005 (c) Tundra Semiconductor Corp.
+ * Author: Alex Bounine (alexandreb@tundra.com)
+ * Author: Roy Zang (tie-fei.zang@freescale.com)
+ * 	   Add pci interrupt router host
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/tsi108.h>
+#include <asm/tsi108_pci.h>
+#include <asm/tsi108_irq.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define tsi_mk_config_addr(bus, devfunc, offset) \
+	((((bus)<<16) | ((devfunc)<<8) | (offset & 0xfc)) + tsi108_pci_cfg_base)
+
+u32 tsi108_pci_cfg_base;
+static u32 tsi108_pci_cfg_phys;
+u32 tsi108_csr_vir_base;
+static struct irq_domain *pci_irq_host;
+
+extern u32 get_vir_csrbase(void);
+extern u32 tsi108_read_reg(u32 reg_offset);
+extern void tsi108_write_reg(u32 reg_offset, u32 val);
+
+int
+tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfunc,
+			   int offset, int len, u32 val)
+{
+	volatile unsigned char *cfg_addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfunc))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	cfg_addr = (unsigned char *)(tsi_mk_config_addr(bus->number,
+							devfunc, offset) |
+							(offset & 0x03));
+
+#ifdef DEBUG
+	printk("PCI CFG write : ");
+	printk("%d:0x%x:0x%x ", bus->number, devfunc, offset);
+	printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
+	printk("data = 0x%08x\n", val);
+#endif
+
+	switch (len) {
+	case 1:
+		out_8((u8 *) cfg_addr, val);
+		break;
+	case 2:
+		out_le16((u16 *) cfg_addr, val);
+		break;
+	default:
+		out_le32((u32 *) cfg_addr, val);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+void tsi108_clear_pci_error(u32 pci_cfg_base)
+{
+	u32 err_stat, err_addr, pci_stat;
+
+	/*
+	 * Quietly clear PB and PCI error flags set as result
+	 * of PCI/X configuration read requests.
+	 */
+
+	/* Read PB Error Log Registers */
+
+	err_stat = tsi108_read_reg(TSI108_PB_OFFSET + TSI108_PB_ERRCS);
+	err_addr = tsi108_read_reg(TSI108_PB_OFFSET + TSI108_PB_AERR);
+
+	if (err_stat & TSI108_PB_ERRCS_ES) {
+		/* Clear error flag */
+		tsi108_write_reg(TSI108_PB_OFFSET + TSI108_PB_ERRCS,
+				 TSI108_PB_ERRCS_ES);
+
+		/* Clear read error reported in PB_ISR */
+		tsi108_write_reg(TSI108_PB_OFFSET + TSI108_PB_ISR,
+				 TSI108_PB_ISR_PBS_RD_ERR);
+
+		/* Clear PCI/X bus cfg errors if applicable */
+		if ((err_addr & 0xFF000000) == pci_cfg_base) {
+			pci_stat =
+			    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_CSR);
+			tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_CSR,
+					 pci_stat);
+		}
+	}
+
+	return;
+}
+
+#define __tsi108_read_pci_config(x, addr, op)		\
+	__asm__ __volatile__(				\
+		"	"op" %0,0,%1\n"		\
+		"1:	eieio\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %0,-1\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r"(x) : "r"(addr))
+
+int
+tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			  int len, u32 * val)
+{
+	volatile unsigned char *cfg_addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 temp;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	cfg_addr = (unsigned char *)(tsi_mk_config_addr(bus->number,
+							devfn,
+							offset) | (offset &
+								   0x03));
+
+	switch (len) {
+	case 1:
+		__tsi108_read_pci_config(temp, cfg_addr, "lbzx");
+		break;
+	case 2:
+		__tsi108_read_pci_config(temp, cfg_addr, "lhbrx");
+		break;
+	default:
+		__tsi108_read_pci_config(temp, cfg_addr, "lwbrx");
+		break;
+	}
+
+	*val = temp;
+
+#ifdef DEBUG
+	if ((0xFFFFFFFF != temp) && (0xFFFF != temp) && (0xFF != temp)) {
+		printk("PCI CFG read : ");
+		printk("%d:0x%x:0x%x ", bus->number, devfn, offset);
+		printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
+		printk("data = 0x%x\n", *val);
+	}
+#endif
+	return PCIBIOS_SUCCESSFUL;
+}
+
+void tsi108_clear_pci_cfg_error(void)
+{
+	tsi108_clear_pci_error(tsi108_pci_cfg_phys);
+}
+
+static struct pci_ops tsi108_direct_pci_ops = {
+	.read = tsi108_direct_read_config,
+	.write = tsi108_direct_write_config,
+};
+
+int __init tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	const int *bus_range;
+	int has_address = 0;
+
+	/* PCI Config mapping */
+	tsi108_pci_cfg_base = (u32)ioremap(cfg_phys, TSI108_PCI_CFG_SIZE);
+	tsi108_pci_cfg_phys = cfg_phys;
+	DBG("TSI_PCI: %s tsi108_pci_cfg_base=0x%x\n", __func__,
+	    tsi108_pci_cfg_base);
+
+	/* Fetch host bridge registers address */
+	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+
+	if (!hose) {
+		printk("PCI Host bridge init failed\n");
+		return -ENOMEM;
+	}
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	(hose)->ops = &tsi108_direct_pci_ops;
+
+	pr_info("Found tsi108 PCI host bridge at 0x%pa. Firmware bus number: %d->%d\n",
+		&rsrc.start, hose->first_busno, hose->last_busno);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+	return 0;
+}
+
+/*
+ * Low level utility functions
+ */
+
+static void tsi108_pci_int_mask(u_int irq)
+{
+	u_int irp_cfg;
+	int int_line = (irq - IRQ_PCI_INTAD_BASE);
+
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+	mb();
+	irp_cfg |= (1 << int_line);	/* INTx_DIR = output */
+	irp_cfg &= ~(3 << (8 + (int_line * 2)));	/* INTx_TYPE = unused */
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg);
+	mb();
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+}
+
+static void tsi108_pci_int_unmask(u_int irq)
+{
+	u_int irp_cfg;
+	int int_line = (irq - IRQ_PCI_INTAD_BASE);
+
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+	mb();
+	irp_cfg &= ~(1 << int_line);
+	irp_cfg |= (3 << (8 + (int_line * 2)));
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg);
+	mb();
+}
+
+static void __init init_pci_source(void)
+{
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL,
+			0x0000ff00);
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+			TSI108_PCI_IRP_ENABLE_P_INT);
+	mb();
+}
+
+static inline unsigned int get_pci_source(void)
+{
+	u_int temp = 0;
+	int irq = -1;
+	int i;
+	u_int pci_irp_stat;
+	static int mask = 0;
+
+	/* Read PCI/X block interrupt status register */
+	pci_irp_stat = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_STAT);
+	mb();
+
+	if (pci_irp_stat & TSI108_PCI_IRP_STAT_P_INT) {
+		/* Process Interrupt from PCI bus INTA# - INTD# lines */
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET +
+				    TSI108_PCI_IRP_INTAD) & 0xf;
+		mb();
+		for (i = 0; i < 4; i++, mask++) {
+			if (temp & (1 << mask % 4)) {
+				irq = IRQ_PCI_INTA + mask % 4;
+				mask++;
+				break;
+			}
+		}
+
+		/* Disable interrupts from PCI block */
+		temp = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+				temp & ~TSI108_PCI_IRP_ENABLE_P_INT);
+		mb();
+		(void)tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		mb();
+	}
+#ifdef DEBUG
+	else {
+		printk("TSI108_PIC: error in TSI108_PCI_IRP_STAT\n");
+		pci_irp_stat =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_STAT);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_INTAD);
+		mb();
+		printk(">> stat=0x%08x intad=0x%08x ", pci_irp_stat, temp);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+		mb();
+		printk("cfg_ctl=0x%08x ", temp);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		mb();
+		printk("irp_enable=0x%08x\n", temp);
+	}
+#endif	/* end of DEBUG */
+
+	return irq;
+}
+
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+static void tsi108_pci_irq_unmask(struct irq_data *d)
+{
+	tsi108_pci_int_unmask(d->irq);
+
+	/* Enable interrupts from PCI block */
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+			 tsi108_read_reg(TSI108_PCI_OFFSET +
+					 TSI108_PCI_IRP_ENABLE) |
+			 TSI108_PCI_IRP_ENABLE_P_INT);
+	mb();
+}
+
+static void tsi108_pci_irq_mask(struct irq_data *d)
+{
+	tsi108_pci_int_mask(d->irq);
+}
+
+static void tsi108_pci_irq_ack(struct irq_data *d)
+{
+	tsi108_pci_int_mask(d->irq);
+}
+
+/*
+ * Interrupt controller descriptor for cascaded PCI interrupt controller.
+ */
+
+static struct irq_chip tsi108_pci_irq = {
+	.name = "tsi108_PCI_int",
+	.irq_mask = tsi108_pci_irq_mask,
+	.irq_ack = tsi108_pci_irq_ack,
+	.irq_unmask = tsi108_pci_irq_unmask,
+};
+
+static int pci_irq_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static int pci_irq_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{	unsigned int irq;
+	DBG("%s(%d, 0x%lx)\n", __func__, virq, hw);
+	if ((virq >= 1) && (virq <= 4)){
+		irq = virq + IRQ_PCI_INTAD_BASE - 1;
+		irq_set_status_flags(irq, IRQ_LEVEL);
+		irq_set_chip(irq, &tsi108_pci_irq);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops pci_irq_domain_ops = {
+	.map = pci_irq_host_map,
+	.xlate = pci_irq_host_xlate,
+};
+
+/*
+ * Exported functions
+ */
+
+/*
+ * The Tsi108 PCI interrupts initialization routine.
+ *
+ * The INTA# - INTD# interrupts on the PCI bus are reported by the PCI block
+ * to the MPIC using single interrupt source (IRQ_TSI108_PCI). Therefore the
+ * PCI block has to be treated as a cascaded interrupt controller connected
+ * to the MPIC.
+ */
+
+void __init tsi108_pci_int_init(struct device_node *node)
+{
+	DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
+
+	pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					     &pci_irq_domain_ops, NULL);
+	if (pci_irq_host == NULL) {
+		printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
+		return;
+	}
+
+	init_pci_source();
+}
+
+void tsi108_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = get_pci_source();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c
new file mode 100644
index 0000000000..5020044400
--- /dev/null
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * A udbg backend which logs messages and reads input from in memory
+ * buffers.
+ *
+ * The console output can be read from memcons_output which is a
+ * circular buffer whose next write position is stored in memcons.output_pos.
+ *
+ * Input may be passed by writing into the memcons_input buffer when it is
+ * empty. The input buffer is empty when both input_pos == input_start and
+ * *input_start == '\0'.
+ *
+ * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp
+ * Copyright (C) 2013 Alistair Popple, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+struct memcons {
+	char *output_start;
+	char *output_pos;
+	char *output_end;
+	char *input_start;
+	char *input_pos;
+	char *input_end;
+};
+
+static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE];
+static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE];
+
+struct memcons memcons = {
+	.output_start = memcons_output,
+	.output_pos = memcons_output,
+	.output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE],
+	.input_start = memcons_input,
+	.input_pos = memcons_input,
+	.input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
+};
+
+void memcons_putc(char c)
+{
+	char *new_output_pos;
+
+	*memcons.output_pos = c;
+	wmb();
+	new_output_pos = memcons.output_pos + 1;
+	if (new_output_pos >= memcons.output_end)
+		new_output_pos = memcons.output_start;
+
+	memcons.output_pos = new_output_pos;
+}
+
+int memcons_getc_poll(void)
+{
+	char c;
+	char *new_input_pos;
+
+	if (*memcons.input_pos) {
+		c = *memcons.input_pos;
+
+		new_input_pos = memcons.input_pos + 1;
+		if (new_input_pos >= memcons.input_end)
+			new_input_pos = memcons.input_start;
+		else if (*new_input_pos == '\0')
+			new_input_pos = memcons.input_start;
+
+		*memcons.input_pos = '\0';
+		wmb();
+		memcons.input_pos = new_input_pos;
+		return c;
+	}
+
+	return -1;
+}
+
+int memcons_getc(void)
+{
+	int c;
+
+	while (1) {
+		c = memcons_getc_poll();
+		if (c == -1)
+			cpu_relax();
+		else
+			break;
+	}
+
+	return c;
+}
+
+void __init udbg_init_memcons(void)
+{
+	udbg_putc = memcons_putc;
+	udbg_getc = memcons_getc;
+	udbg_getc_poll = memcons_getc_poll;
+}
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 0000000000..063d919589
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_XICS
+	def_bool n
+	select PPC_SMP_MUXED_IPI
+	select HARDIRQS_SW_RESEND
+
+config PPC_ICP_NATIVE
+	def_bool n
+
+config PPC_ICP_HV
+	def_bool n
+
+config PPC_ICS_RTAS
+	def_bool n
+
+config PPC_ICS_NATIVE
+	def_bool n
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 0000000000..747063927c
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
+obj-$(CONFIG_PPC_ICS_NATIVE)	+= ics-native.o
+obj-$(CONFIG_PPC_POWERNV)	+= ics-opal.o icp-opal.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 0000000000..cf8db19a4f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+	unsigned int ret = XICS_IRQ_SPURIOUS;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc == H_SUCCESS) {
+		ret = (unsigned int)retbuf[0];
+	} else {
+		pr_err("%s: bad return code xirr cppr=0x%x returned %ld\n",
+			__func__, cppr, rc);
+		WARN_ON_ONCE(1);
+	}
+
+	return ret;
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+		icp_hv_set_cppr(value >> 24);
+	}
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	int hw_cpu = get_hard_smp_processor_id(n_cpu);
+	long rc;
+
+	/* Make sure all previous accesses are ordered before IPI sending */
+	mb();
+	rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
+			"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return 0;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu)
+{
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.cause_ipi	= icp_hv_cause_ipi,
+#endif
+};
+
+int __init icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 0000000000..f6ec6dba92
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+	unsigned int xirr;
+
+	/* Handled an interrupt latched by KVM */
+	xirr = kvmppc_get_xics_latch();
+	if (xirr)
+		return xirr;
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu)
+{
+	kvmppc_set_host_ipi(cpu);
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+	/*
+	 * Currently not used to send IPIs to another CPU
+	 * on the same core. Only caller is KVM real mode.
+	 * Need the physical address of the XICS to be
+	 * previously saved in kvm_hstate in the paca.
+	 */
+	void __iomem *xics_phys;
+
+	/*
+	 * Just like the cause_ipi functions, it is required to
+	 * include a full barrier before causing the IPI.
+	 */
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
+	mb();
+	__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
+}
+#endif
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_native_flush_interrupt(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return;
+	if (vec == XICS_IPI) {
+		/* Clear pending IPI */
+		int cpu = smp_processor_id();
+		kvmppc_clear_host_ipi(cpu);
+		icp_native_set_qirr(cpu, 0xff);
+	} else {
+		pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
+		       vec);
+		xics_mask_unknown_vec(vec);
+	}
+	/* EOI the interrupt */
+	icp_native_set_xirr(xirr);
+}
+
+void xics_wake_cpu(int cpu)
+{
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+EXPORT_SYMBOL_GPL(xics_wake_cpu);
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	icp_native_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+			cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	kvmppc_set_xics_phys(cpu, addr);
+	if (!icp_native_regs[cpu]) {
+		pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+			cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const __be32 *ireg;
+	int i;
+	int num_reg;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	num_reg = of_address_count(np);
+	if (num_servers && (num_servers != num_reg)) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       num_reg, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < num_reg; i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, resource_size(&r)))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.cause_ipi	= icp_native_cause_ipi,
+#endif
+};
+
+int __init icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
new file mode 100644
index 0000000000..4dae624b9f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/kvm_ppc.h>
+
+static void icp_opal_teardown_cpu(void)
+{
+	int hw_cpu = hard_smp_processor_id();
+
+	/* Clear any pending IPI */
+	opal_int_set_mfrr(hw_cpu, 0xff);
+}
+
+static void icp_opal_flush_ipi(void)
+{
+	/*
+	 * We take the ipi irq but and never return so we need to EOI the IPI,
+	 * but want to leave our priority 0.
+	 *
+	 * Should we check all the other interrupts too?
+	 * Should we be flagging idle loop instead?
+	 * Or creating some task to be scheduled?
+	 */
+	if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0)
+		force_external_irq_replay();
+}
+
+static unsigned int icp_opal_get_xirr(void)
+{
+	unsigned int kvm_xirr;
+	__be32 hw_xirr;
+	int64_t rc;
+
+	/* Handle an interrupt latched by KVM first */
+	kvm_xirr = kvmppc_get_xics_latch();
+	if (kvm_xirr)
+		return kvm_xirr;
+
+	/* Then ask OPAL */
+	rc = opal_int_get_xirr(&hw_xirr, false);
+	if (rc < 0)
+		return 0;
+	return be32_to_cpu(hw_xirr);
+}
+
+static unsigned int icp_opal_get_irq(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+	unsigned int irq;
+
+	xirr = icp_opal_get_xirr();
+	vec = xirr & 0x00ffffff;
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	if (opal_int_eoi(xirr) > 0)
+		force_external_irq_replay();
+
+	return 0;
+}
+
+static void icp_opal_set_cpu_priority(unsigned char cppr)
+{
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
+	xics_set_base_cppr(cppr);
+	opal_int_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_opal_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+
+	iosync();
+	rc = opal_int_eoi((xics_pop_cppr() << 24) | hw_irq);
+
+	/*
+	 * EOI tells us whether there are more interrupts to fetch.
+	 *
+	 * Some HW implementations might not be able to send us another
+	 * external interrupt in that case, so we force a replay.
+	 */
+	if (rc > 0)
+		force_external_irq_replay();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_opal_cause_ipi(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+
+	return smp_ipi_demux();
+}
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_opal_flush_interrupt(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+
+	do {
+		xirr = icp_opal_get_xirr();
+		vec = xirr & 0x00ffffff;
+		if (vec == XICS_IRQ_SPURIOUS)
+			break;
+		if (vec == XICS_IPI) {
+			/* Clear pending IPI */
+			int cpu = smp_processor_id();
+			kvmppc_clear_host_ipi(cpu);
+			opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+		} else {
+			pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+			       "disabling\n", vec);
+			xics_mask_unknown_vec(vec);
+		}
+
+		/* EOI the interrupt */
+	} while (opal_int_eoi(xirr) > 0);
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_opal_ops = {
+	.get_irq	= icp_opal_get_irq,
+	.eoi		= icp_opal_eoi,
+	.set_priority	= icp_opal_set_cpu_priority,
+	.teardown_cpu	= icp_opal_teardown_cpu,
+	.flush_ipi	= icp_opal_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_opal_ipi_action,
+	.cause_ipi	= icp_opal_cause_ipi,
+#endif
+};
+
+int __init icp_opal_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_opal_ops;
+
+	printk("XICS: Using OPAL ICP fallbacks\n");
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
new file mode 100644
index 0000000000..112c8a1e81
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-native.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+//#define DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/list.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+struct ics_native {
+	struct ics		ics;
+	struct device_node	*node;
+	void __iomem    	*base;
+	u32             	ibase;
+	u32             	icount;
+};
+#define to_ics_native(_ics)     container_of(_ics, struct ics_native, ics)
+
+static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec)
+{
+	return in->base + 0x800 + ((vec - in->ibase) << 2);
+}
+
+static void ics_native_unmask_irq(struct irq_data *d)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+	unsigned int server;
+
+	pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+	out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY);
+}
+
+static unsigned int ics_native_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (irq_data_get_msi_desc(d))
+		pci_msi_unmask_irq(d);
+#endif
+
+	/* unmask it */
+	ics_native_unmask_irq(d);
+	return 0;
+}
+
+static void ics_native_do_mask(struct ics_native *in, unsigned int vec)
+{
+	out_be32(ics_native_xive(in, vec), 0xff);
+}
+
+static void ics_native_mask_irq(struct irq_data *d)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+
+	pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+	ics_native_do_mask(in, vec);
+}
+
+static int ics_native_set_affinity(struct irq_data *d,
+				   const struct cpumask *cpumask,
+				   bool force)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+	int server;
+	u32 xive;
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (server == -1) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+
+	xive = in_be32(ics_native_xive(in, vec));
+	xive = (xive & 0xff) | (server << 8);
+	out_be32(ics_native_xive(in, vec), xive);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_native_irq_chip = {
+	.name = "ICS",
+	.irq_startup		= ics_native_startup,
+	.irq_mask		= ics_native_mask_irq,
+	.irq_unmask		= ics_native_unmask_irq,
+	.irq_eoi		= NULL, /* Patched at init time */
+	.irq_set_affinity 	= ics_native_set_affinity,
+	.irq_set_type		= xics_set_irq_type,
+	.irq_retrigger		= xics_retrigger,
+};
+
+static int ics_native_check(struct ics *ics, unsigned int hw_irq)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	pr_devel("%s: hw_irq=0x%x\n", __func__, hw_irq);
+
+	if (hw_irq < in->ibase || hw_irq >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ics_native_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+
+	ics_native_do_mask(in, vec);
+}
+
+static long ics_native_get_server(struct ics *ics, unsigned long vec)
+{
+	struct ics_native *in = to_ics_native(ics);
+	u32 xive;
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	xive = in_be32(ics_native_xive(in, vec));
+	return (xive >> 8) & 0xfff;
+}
+
+static int ics_native_host_match(struct ics *ics, struct device_node *node)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	return in->node == node;
+}
+
+static struct ics ics_native_template = {
+	.check		= ics_native_check,
+	.mask_unknown	= ics_native_mask_unknown,
+	.get_server	= ics_native_get_server,
+	.host_match	= ics_native_host_match,
+	.chip = &ics_native_irq_chip,
+};
+
+static int __init ics_native_add_one(struct device_node *np)
+{
+	struct ics_native *ics;
+	u32 ranges[2];
+	int rc, count;
+
+	ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL);
+	if (!ics)
+		return -ENOMEM;
+	ics->node = of_node_get(np);
+	memcpy(&ics->ics, &ics_native_template, sizeof(struct ics));
+
+	ics->base = of_iomap(np, 0);
+	if (!ics->base) {
+		pr_err("Failed to map %pOFP\n", np);
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	count = of_property_count_u32_elems(np, "interrupt-ranges");
+	if (count < 2 || count & 1) {
+		pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (count > 2) {
+		pr_warn("ICS %pOFP has %d ranges, only one supported\n",
+			np, count >> 1);
+	}
+	rc = of_property_read_u32_array(np, "interrupt-ranges",
+					ranges, 2);
+	if (rc) {
+		pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+		goto fail;
+	}
+	ics->ibase = ranges[0];
+	ics->icount = ranges[1];
+
+	pr_info("ICS native initialized for sources %d..%d\n",
+		ics->ibase, ics->ibase + ics->icount - 1);
+
+	/* Register ourselves */
+	xics_register_ics(&ics->ics);
+
+	return 0;
+fail:
+	of_node_put(ics->node);
+	kfree(ics);
+	return rc;
+}
+
+int __init ics_native_init(void)
+{
+	struct device_node *ics;
+	bool found_one = false;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_native_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Find native ICS in the device-tree */
+	for_each_compatible_node(ics, NULL, "openpower,xics-sources") {
+		if (ics_native_add_one(ics) == 0)
+			found_one = true;
+	}
+
+	if (found_one)
+		pr_info("ICS native backend registered\n");
+
+	return found_one ? 0 : -ENODEV;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
new file mode 100644
index 0000000000..5fe73dabab
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+static int ics_opal_mangle_server(int server)
+{
+	/* No link for now */
+	return server << 2;
+}
+
+static int ics_opal_unmangle_server(int server)
+{
+	/* No link for now */
+	return server >> 2;
+}
+
+static void ics_opal_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+	int server;
+
+	pr_devel("ics-hal: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+	server = ics_opal_mangle_server(server);
+
+	rc = opal_set_xive(hw_irq, server, DEFAULT_PRIORITY);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+}
+
+static unsigned int ics_opal_startup(struct irq_data *d)
+{
+	ics_opal_unmask_irq(d);
+	return 0;
+}
+
+static void ics_opal_mask_real_irq(unsigned int hw_irq)
+{
+	int server = ics_opal_mangle_server(xics_default_server);
+	int64_t rc;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	rc = opal_set_xive(hw_irq, server, 0xff);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(0xff) irq=%u returned %lld\n",
+		       __func__, hw_irq, rc);
+}
+
+static void ics_opal_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("ics-hal: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_opal_mask_real_irq(hw_irq);
+}
+
+static int ics_opal_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	__be16 oserver;
+	int16_t server;
+	int8_t priority;
+	int64_t rc;
+	int wanted_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	rc = opal_get_xive(hw_irq, &oserver, &priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_get_xive(irq=%d [hw 0x%x]) error %lld\n",
+		       __func__, d->irq, hw_irq, rc);
+		return -1;
+	}
+
+	wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (wanted_server < 0) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+	server = ics_opal_mangle_server(wanted_server);
+
+	pr_debug("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
+		 d->irq, hw_irq, wanted_server, server);
+
+	rc = opal_set_xive(hw_irq, server, priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+		return -1;
+	}
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_opal_irq_chip = {
+	.name = "OPAL ICS",
+	.irq_startup = ics_opal_startup,
+	.irq_mask = ics_opal_mask_irq,
+	.irq_unmask = ics_opal_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_opal_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_opal_host_match(struct ics *ics, struct device_node *node)
+{
+	return 1;
+}
+
+static int ics_opal_check(struct ics *ics, unsigned int hw_irq)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(hw_irq, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -ENXIO;
+
+	return 0;
+}
+
+static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return;
+
+	ics_opal_mask_real_irq(vec);
+}
+
+static long ics_opal_get_server(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -1;
+	return ics_opal_unmangle_server(be16_to_cpu(server));
+}
+
+/* Only one global & state struct ics */
+static struct ics ics_hal = {
+	.check		= ics_opal_check,
+	.mask_unknown	= ics_opal_mask_unknown,
+	.get_server	= ics_opal_get_server,
+	.host_match	= ics_opal_host_match,
+	.chip		= &ics_opal_irq_chip,
+};
+
+int __init ics_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_opal_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_hal);
+
+	pr_info("ICS OPAL backend registered\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 0000000000..b772a833d9
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+
+	pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
+		 hw_irq, irq_server);
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
+{
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.check		= ics_rtas_check,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+	.chip = &ics_rtas_irq_chip,
+};
+
+__init int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_function_token(RTAS_FN_IBM_GET_XIVE);
+	ibm_set_xive = rtas_function_token(RTAS_FN_IBM_SET_XIVE);
+	ibm_int_on  = rtas_function_token(RTAS_FN_IBM_INT_ON);
+	ibm_int_off = rtas_function_token(RTAS_FN_IBM_INT_OFF);
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 0000000000..d3a4156e87
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_domain *xics_host;
+
+static struct ics *xics_ics;
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const __be32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (be32_to_cpu(ireg[j]) == hcpuid) {
+			xics_default_distrib_server = be32_to_cpu(ireg[j+1]);
+			break;
+		}
+	}
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+	if (WARN_ON(!xics_ics))
+		return;
+	xics_ics->mask_unknown(xics_ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void __init xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(!ipi);
+
+	/*
+	 * IPIs are marked IRQF_PERCPU. The handler was set in map.
+	 */
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+}
+
+void __init xics_smp_probe(void)
+{
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	/* Setup cause_ipi callback based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
+}
+
+#endif /* CONFIG_SMP */
+
+noinstr void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+noinstr void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+	struct irq_desc *desc;
+
+	pr_debug("%s: CPU %u\n", __func__, cpu);
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	for_each_irq_desc(virq, desc) {
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct irq_data *irqd;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NR_IRQS_LEGACY)
+			continue;
+		/* We only need to migrate enabled IRQS */
+		if (!desc->action)
+			continue;
+		/* We need a mapping in the XICS IRQ domain */
+		irqd = irq_domain_get_irq_data(xics_host, virq);
+		if (!irqd)
+			continue;
+		irq = irqd_to_hwirq(irqd);
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = xics_ics->get_server(xics_ics, irq);
+		if (server < 0) {
+			pr_err("%s: Can't find server for irq %d/%x\n",
+			       __func__, virq, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warn("IRQ %u affinity broken off cpu %u\n",
+				virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	if (WARN_ON(!xics_ics))
+		return 0;
+	return xics_ics->host_match(xics_ics, node) ? 1 : 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_domain *domain, unsigned int virq,
+			 irq_hw_number_t hwirq)
+{
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hwirq);
+
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. The device-tree parsing will turn the LSIs
+	 * back to level.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hwirq == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_percpu_irq);
+		return 0;
+	}
+
+	if (WARN_ON(!xics_ics))
+		return -EINVAL;
+
+	if (xics_ics->check(xics_ics, hwirq))
+		return -EINVAL;
+
+	/* Let the ICS be the chip data for the XICS domain. For ICS native */
+	irq_domain_set_info(domain, virq, hwirq, xics_ics->chip,
+			    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	*out_hwirq = intspec[0];
+
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+	/*
+	 * We need to push a dummy CPPR when retriggering, since the subsequent
+	 * EOI will try to pop it. Passing 0 works, as the function hard codes
+	 * the priority value anyway.
+	 */
+	xics_push_cppr(0);
+
+	/* Tell the core to do a soft retrigger */
+	return 0;
+}
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xics_host_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+				      unsigned long *hwirq, unsigned int *type)
+{
+	return xics_host_xlate(d, to_of_node(fwspec->fwnode), fwspec->param,
+			       fwspec->param_count, hwirq, type);
+}
+
+static int xics_host_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xics_host_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i, xics_ics->chip,
+				    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xics_host_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+}
+#endif
+
+static const struct irq_domain_ops xics_host_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xics_host_domain_alloc,
+	.free	= xics_host_domain_free,
+	.translate = xics_host_domain_translate,
+#endif
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static int __init xics_allocate_domain(void)
+{
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_fwnode("XICS");
+	if (!fn)
+		return -ENOMEM;
+
+	xics_host = irq_domain_create_tree(fn, &xics_host_ops, NULL);
+	if (!xics_host) {
+		irq_domain_free_fwnode(fn);
+		return -ENOMEM;
+	}
+
+	irq_set_default_host(xics_host);
+	return 0;
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	if (WARN_ONCE(xics_ics, "XICS: Source Controller is already defined !"))
+		return;
+	xics_ics = ics;
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const __be32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (isize)
+		xics_interrupt_server_size = be32_to_cpu(*isize);
+
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+	if (rc < 0) {
+		rc = icp_native_init();
+		if (rc == -ENODEV)
+		    rc = icp_opal_init();
+	}
+	if (rc < 0) {
+		pr_warn("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+	rc = ics_rtas_init();
+	if (rc < 0)
+		rc = ics_opal_init();
+	if (rc < 0)
+		rc = ics_native_init();
+	if (rc < 0)
+		pr_warn("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	rc = xics_allocate_domain();
+	if (rc < 0)
+		pr_err("XICS: Failed to create IRQ domain");
+	xics_setup_cpu();
+}
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
new file mode 100644
index 0000000000..785c292d10
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_XIVE
+	bool
+	select PPC_SMP_MUXED_IPI
+	select HARDIRQS_SW_RESEND
+
+config PPC_XIVE_NATIVE
+	bool
+	select PPC_XIVE
+	depends on PPC_POWERNV
+
+config PPC_XIVE_SPAPR
+	bool
+	select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
new file mode 100644
index 0000000000..e510888389
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y				+= common.o
+obj-$(CONFIG_PPC_XIVE_NATIVE)	+= native.o
+obj-$(CONFIG_PPC_XIVE_SPAPR)	+= spapr.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
new file mode 100644
index 0000000000..a289cb97c1
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -0,0 +1,1864 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/xmon.h>
+
+#include "xive-internal.h"
+
+#undef DEBUG_FLUSH
+#undef DEBUG_ALL
+
+#ifdef DEBUG_ALL
+#define DBG_VERBOSE(fmt, ...)	pr_devel("cpu %d - " fmt, \
+					 smp_processor_id(), ## __VA_ARGS__)
+#else
+#define DBG_VERBOSE(fmt...)	do { } while(0)
+#endif
+
+bool __xive_enabled;
+EXPORT_SYMBOL_GPL(__xive_enabled);
+bool xive_cmdline_disabled;
+
+/* We use only one priority for now */
+static u8 xive_irq_priority;
+
+/* TIMA exported to KVM */
+void __iomem *xive_tima;
+EXPORT_SYMBOL_GPL(xive_tima);
+u32 xive_tima_offset;
+
+/* Backend ops */
+static const struct xive_ops *xive_ops;
+
+/* Our global interrupt domain */
+static struct irq_domain *xive_irq_domain;
+
+#ifdef CONFIG_SMP
+/* The IPIs use the same logical irq number when on the same chip */
+static struct xive_ipi_desc {
+	unsigned int irq;
+	char name[16];
+	atomic_t started;
+} *xive_ipis;
+
+/*
+ * Use early_cpu_to_node() for hot-plugged CPUs
+ */
+static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
+{
+	return xive_ipis[early_cpu_to_node(cpu)].irq;
+}
+#endif
+
+/* Xive state for each CPU */
+static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
+
+/* An invalid CPU target */
+#define XIVE_INVALID_TARGET	(-1)
+
+/*
+ * Global toggle to switch on/off StoreEOI
+ */
+static bool xive_store_eoi = true;
+
+static bool xive_is_store_eoi(struct xive_irq_data *xd)
+{
+	return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi;
+}
+
+/*
+ * Read the next entry in a queue, return its content if it's valid
+ * or 0 if there is no new entry.
+ *
+ * The queue pointer is moved forward unless "just_peek" is set
+ */
+static u32 xive_read_eq(struct xive_q *q, bool just_peek)
+{
+	u32 cur;
+
+	if (!q->qpage)
+		return 0;
+	cur = be32_to_cpup(q->qpage + q->idx);
+
+	/* Check valid bit (31) vs current toggle polarity */
+	if ((cur >> 31) == q->toggle)
+		return 0;
+
+	/* If consuming from the queue ... */
+	if (!just_peek) {
+		/* Next entry */
+		q->idx = (q->idx + 1) & q->msk;
+
+		/* Wrap around: flip valid toggle */
+		if (q->idx == 0)
+			q->toggle ^= 1;
+	}
+	/* Mask out the valid bit (31) */
+	return cur & 0x7fffffff;
+}
+
+/*
+ * Scans all the queue that may have interrupts in them
+ * (based on "pending_prio") in priority order until an
+ * interrupt is found or all the queues are empty.
+ *
+ * Then updates the CPPR (Current Processor Priority
+ * Register) based on the most favored interrupt found
+ * (0xff if none) and return what was found (0 if none).
+ *
+ * If just_peek is set, return the most favored pending
+ * interrupt if any but don't update the queue pointers.
+ *
+ * Note: This function can operate generically on any number
+ * of queues (up to 8). The current implementation of the XIVE
+ * driver only uses a single queue however.
+ *
+ * Note2: This will also "flush" "the pending_count" of a queue
+ * into the "count" when that queue is observed to be empty.
+ * This is used to keep track of the amount of interrupts
+ * targetting a queue. When an interrupt is moved away from
+ * a queue, we only decrement that queue count once the queue
+ * has been observed empty to avoid races.
+ */
+static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
+{
+	u32 irq = 0;
+	u8 prio = 0;
+
+	/* Find highest pending priority */
+	while (xc->pending_prio != 0) {
+		struct xive_q *q;
+
+		prio = ffs(xc->pending_prio) - 1;
+		DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
+
+		/* Try to fetch */
+		irq = xive_read_eq(&xc->queue[prio], just_peek);
+
+		/* Found something ? That's it */
+		if (irq) {
+			if (just_peek || irq_to_desc(irq))
+				break;
+			/*
+			 * We should never get here; if we do then we must
+			 * have failed to synchronize the interrupt properly
+			 * when shutting it down.
+			 */
+			pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+				irq);
+			WARN_ON(1);
+			continue;
+		}
+
+		/* Clear pending bits */
+		xc->pending_prio &= ~(1 << prio);
+
+		/*
+		 * Check if the queue count needs adjusting due to
+		 * interrupts being moved away. See description of
+		 * xive_dec_target_count()
+		 */
+		q = &xc->queue[prio];
+		if (atomic_read(&q->pending_count)) {
+			int p = atomic_xchg(&q->pending_count, 0);
+			if (p) {
+				WARN_ON(p > atomic_read(&q->count));
+				atomic_sub(p, &q->count);
+			}
+		}
+	}
+
+	/* If nothing was found, set CPPR to 0xff */
+	if (irq == 0)
+		prio = 0xff;
+
+	/* Update HW CPPR to match if necessary */
+	if (prio != xc->cppr) {
+		DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
+		xc->cppr = prio;
+		out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
+	}
+
+	return irq;
+}
+
+/*
+ * This is used to perform the magic loads from an ESB
+ * described in xive-regs.h
+ */
+static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd))
+		offset |= XIVE_ESB_LD_ST_MO;
+
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
+	else
+		val = in_be64(xd->eoi_mmio + offset);
+
+	return (u8)val;
+}
+
+static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
+{
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
+	else
+		out_be64(xd->eoi_mmio + offset, data);
+}
+
+#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS)
+static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
+{
+	u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+	snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
+		 xive_is_store_eoi(xd) ? 'S' : ' ',
+		 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+		 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+		 val & XIVE_ESB_VAL_P ? 'P' : '-',
+		 val & XIVE_ESB_VAL_Q ? 'Q' : '-',
+		 xd->trig_page, xd->eoi_page);
+}
+#endif
+
+#ifdef CONFIG_XMON
+static notrace void xive_dump_eq(const char *name, struct xive_q *q)
+{
+	u32 i0, i1, idx;
+
+	if (!q->qpage)
+		return;
+	idx = q->idx;
+	i0 = be32_to_cpup(q->qpage + idx);
+	idx = (idx + 1) & q->msk;
+	i1 = be32_to_cpup(q->qpage + idx);
+	xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
+		     q->idx, q->toggle, i0, i1);
+}
+
+notrace void xmon_xive_do_dump(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	xmon_printf("CPU %d:", cpu);
+	if (xc) {
+		xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
+#ifdef CONFIG_SMP
+		{
+			char buffer[128];
+
+			xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+			xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer);
+		}
+#endif
+		xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
+	}
+	xmon_printf("\n");
+}
+
+static struct irq_data *xive_get_irq_data(u32 hw_irq)
+{
+	unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
+
+	return irq ? irq_get_irq_data(irq) : NULL;
+}
+
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
+{
+	int rc;
+	u32 target;
+	u8 prio;
+	u32 lirq;
+
+	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+	if (rc) {
+		xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+		return rc;
+	}
+
+	xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+		    hw_irq, target, prio, lirq);
+
+	if (!d)
+		d = xive_get_irq_data(hw_irq);
+
+	if (d) {
+		char buffer[128];
+
+		xive_irq_data_dump(irq_data_get_irq_handler_data(d),
+				   buffer, sizeof(buffer));
+		xmon_printf("%s", buffer);
+	}
+
+	xmon_printf("\n");
+	return 0;
+}
+
+void xmon_xive_get_irq_all(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+
+		if (d)
+			xmon_xive_get_irq_config(irqd_to_hwirq(d), d);
+	}
+}
+
+#endif /* CONFIG_XMON */
+
+static unsigned int xive_get_irq(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	u32 irq;
+
+	/*
+	 * This can be called either as a result of a HW interrupt or
+	 * as a "replay" because EOI decided there was still something
+	 * in one of the queues.
+	 *
+	 * First we perform an ACK cycle in order to update our mask
+	 * of pending priorities. This will also have the effect of
+	 * updating the CPPR to the most favored pending interrupts.
+	 *
+	 * In the future, if we have a way to differentiate a first
+	 * entry (on HW interrupt) from a replay triggered by EOI,
+	 * we could skip this on replays unless we soft-mask tells us
+	 * that a new HW interrupt occurred.
+	 */
+	xive_ops->update_pending(xc);
+
+	DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
+
+	/* Scan our queue(s) for interrupts */
+	irq = xive_scan_interrupts(xc, false);
+
+	DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
+	    irq, xc->pending_prio);
+
+	/* Return pending interrupt if any */
+	if (irq == XIVE_BAD_IRQ)
+		return 0;
+	return irq;
+}
+
+/*
+ * After EOI'ing an interrupt, we need to re-check the queue
+ * to see if another interrupt is pending since multiple
+ * interrupts can coalesce into a single notification to the
+ * CPU.
+ *
+ * If we find that there is indeed more in there, we call
+ * force_external_irq_replay() to make Linux synthetize an
+ * external interrupt on the next call to local_irq_restore().
+ */
+static void xive_do_queue_eoi(struct xive_cpu *xc)
+{
+	if (xive_scan_interrupts(xc, true) != 0) {
+		DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
+		force_external_irq_replay();
+	}
+}
+
+/*
+ * EOI an interrupt at the source. There are several methods
+ * to do this depending on the HW version and source type
+ */
+static void xive_do_source_eoi(struct xive_irq_data *xd)
+{
+	u8 eoi_val;
+
+	xd->stale_p = false;
+
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xive_is_store_eoi(xd)) {
+		xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
+		return;
+	}
+
+	/*
+	 * For LSIs, we use the "EOI cycle" special load rather than
+	 * PQ bits, as they are automatically re-triggered in HW when
+	 * still pending.
+	 */
+	if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
+		return;
+	}
+
+	/*
+	 * Otherwise, we use the special MMIO that does a clear of
+	 * both P and Q and returns the old Q. This allows us to then
+	 * do a re-trigger if Q was set rather than synthesizing an
+	 * interrupt in software
+	 */
+	eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+	DBG_VERBOSE("eoi_val=%x\n", eoi_val);
+
+	/* Re-trigger if needed */
+	if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
+		out_be64(xd->trig_mmio, 0);
+}
+
+/* irq_chip eoi callback, called with irq descriptor lock held */
+static void xive_irq_eoi(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
+		    d->irq, irqd_to_hwirq(d), xc->pending_prio);
+
+	/*
+	 * EOI the source if it hasn't been disabled and hasn't
+	 * been passed-through to a KVM guest
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+	    !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
+		xive_do_source_eoi(xd);
+	else
+		xd->stale_p = true;
+
+	/*
+	 * Clear saved_p to indicate that it's no longer occupying
+	 * a queue slot on the target queue
+	 */
+	xd->saved_p = false;
+
+	/* Check for more work in the queue */
+	xive_do_queue_eoi(xc);
+}
+
+/*
+ * Helper used to mask and unmask an interrupt source.
+ */
+static void xive_do_source_set_mask(struct xive_irq_data *xd,
+				    bool mask)
+{
+	u64 val;
+
+	pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un");
+
+	/*
+	 * If the interrupt had P set, it may be in a queue.
+	 *
+	 * We need to make sure we don't re-enable it until it
+	 * has been fetched from that queue and EOId. We keep
+	 * a copy of that P state and use it to restore the
+	 * ESB accordingly on unmask.
+	 */
+	if (mask) {
+		val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+		if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+			xd->saved_p = true;
+		xd->stale_p = false;
+	} else if (xd->saved_p) {
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+		xd->saved_p = false;
+	} else {
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+		xd->stale_p = false;
+	}
+}
+
+/*
+ * Try to chose "cpu" as a new interrupt target. Increments
+ * the queue accounting for that target if it's not already
+ * full.
+ */
+static bool xive_try_pick_target(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+	struct xive_q *q = &xc->queue[xive_irq_priority];
+	int max;
+
+	/*
+	 * Calculate max number of interrupts in that queue.
+	 *
+	 * We leave a gap of 1 just in case...
+	 */
+	max = (q->msk + 1) - 1;
+	return !!atomic_add_unless(&q->count, 1, max);
+}
+
+/*
+ * Un-account an interrupt for a target CPU. We don't directly
+ * decrement q->count since the interrupt might still be present
+ * in the queue.
+ *
+ * Instead increment a separate counter "pending_count" which
+ * will be substracted from "count" later when that CPU observes
+ * the queue to be empty.
+ */
+static void xive_dec_target_count(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+	struct xive_q *q = &xc->queue[xive_irq_priority];
+
+	if (WARN_ON(cpu < 0 || !xc)) {
+		pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
+		return;
+	}
+
+	/*
+	 * We increment the "pending count" which will be used
+	 * to decrement the target queue count whenever it's next
+	 * processed and found empty. This ensure that we don't
+	 * decrement while we still have the interrupt there
+	 * occupying a slot.
+	 */
+	atomic_inc(&q->pending_count);
+}
+
+/* Find a tentative CPU target in a CPU mask */
+static int xive_find_target_in_mask(const struct cpumask *mask,
+				    unsigned int fuzz)
+{
+	int cpu, first, num, i;
+
+	/* Pick up a starting point CPU in the mask based on  fuzz */
+	num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
+	first = fuzz % num;
+
+	/* Locate it */
+	cpu = cpumask_first(mask);
+	for (i = 0; i < first && cpu < nr_cpu_ids; i++)
+		cpu = cpumask_next(cpu, mask);
+
+	/* Sanity check */
+	if (WARN_ON(cpu >= nr_cpu_ids))
+		cpu = cpumask_first(cpu_online_mask);
+
+	/* Remember first one to handle wrap-around */
+	first = cpu;
+
+	/*
+	 * Now go through the entire mask until we find a valid
+	 * target.
+	 */
+	do {
+		/*
+		 * We re-check online as the fallback case passes us
+		 * an untested affinity mask
+		 */
+		if (cpu_online(cpu) && xive_try_pick_target(cpu))
+			return cpu;
+		cpu = cpumask_next(cpu, mask);
+		/* Wrap around */
+		if (cpu >= nr_cpu_ids)
+			cpu = cpumask_first(mask);
+	} while (cpu != first);
+
+	return -1;
+}
+
+/*
+ * Pick a target CPU for an interrupt. This is done at
+ * startup or if the affinity is changed in a way that
+ * invalidates the current target.
+ */
+static int xive_pick_irq_target(struct irq_data *d,
+				const struct cpumask *affinity)
+{
+	static unsigned int fuzz;
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	cpumask_var_t mask;
+	int cpu = -1;
+
+	/*
+	 * If we have chip IDs, first we try to build a mask of
+	 * CPUs matching the CPU and find a target in there
+	 */
+	if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
+		zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
+		/* Build a mask of matching chip IDs */
+		for_each_cpu_and(cpu, affinity, cpu_online_mask) {
+			struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+			if (xc->chip_id == xd->src_chip)
+				cpumask_set_cpu(cpu, mask);
+		}
+		/* Try to find a target */
+		if (cpumask_empty(mask))
+			cpu = -1;
+		else
+			cpu = xive_find_target_in_mask(mask, fuzz++);
+		free_cpumask_var(mask);
+		if (cpu >= 0)
+			return cpu;
+		fuzz--;
+	}
+
+	/* No chip IDs, fallback to using the affinity mask */
+	return xive_find_target_in_mask(affinity, fuzz++);
+}
+
+static unsigned int xive_irq_startup(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int target, rc;
+
+	xd->saved_p = false;
+	xd->stale_p = false;
+
+	pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
+
+	/* Pick a target */
+	target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
+	if (target == XIVE_INVALID_TARGET) {
+		/* Try again breaking affinity */
+		target = xive_pick_irq_target(d, cpu_online_mask);
+		if (target == XIVE_INVALID_TARGET)
+			return -ENXIO;
+		pr_warn("irq %d started with broken affinity\n", d->irq);
+	}
+
+	/* Sanity check */
+	if (WARN_ON(target == XIVE_INVALID_TARGET ||
+		    target >= nr_cpu_ids))
+		target = smp_processor_id();
+
+	xd->target = target;
+
+	/*
+	 * Configure the logical number to be the Linux IRQ number
+	 * and set the target queue
+	 */
+	rc = xive_ops->configure_irq(hw_irq,
+				     get_hard_smp_processor_id(target),
+				     xive_irq_priority, d->irq);
+	if (rc)
+		return rc;
+
+	/* Unmask the ESB */
+	xive_do_source_set_mask(xd, false);
+
+	return 0;
+}
+
+/* called with irq descriptor lock held */
+static void xive_irq_shutdown(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
+
+	if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
+		return;
+
+	/* Mask the interrupt at the source */
+	xive_do_source_set_mask(xd, true);
+
+	/*
+	 * Mask the interrupt in HW in the IVT/EAS and set the number
+	 * to be the "bad" IRQ number
+	 */
+	xive_ops->configure_irq(hw_irq,
+				get_hard_smp_processor_id(xd->target),
+				0xff, XIVE_BAD_IRQ);
+
+	xive_dec_target_count(xd->target);
+	xd->target = XIVE_INVALID_TARGET;
+}
+
+static void xive_irq_unmask(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
+
+	xive_do_source_set_mask(xd, false);
+}
+
+static void xive_irq_mask(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
+
+	xive_do_source_set_mask(xd, true);
+}
+
+static int xive_irq_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	u32 target, old_target;
+	int rc = 0;
+
+	pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq);
+
+	/* Is this valid ? */
+	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
+		return -EINVAL;
+
+	/*
+	 * If existing target is already in the new mask, and is
+	 * online then do nothing.
+	 */
+	if (xd->target != XIVE_INVALID_TARGET &&
+	    cpu_online(xd->target) &&
+	    cpumask_test_cpu(xd->target, cpumask))
+		return IRQ_SET_MASK_OK;
+
+	/* Pick a new target */
+	target = xive_pick_irq_target(d, cpumask);
+
+	/* No target found */
+	if (target == XIVE_INVALID_TARGET)
+		return -ENXIO;
+
+	/* Sanity check */
+	if (WARN_ON(target >= nr_cpu_ids))
+		target = smp_processor_id();
+
+	old_target = xd->target;
+
+	/*
+	 * Only configure the irq if it's not currently passed-through to
+	 * a KVM guest
+	 */
+	if (!irqd_is_forwarded_to_vcpu(d))
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(target),
+					     xive_irq_priority, d->irq);
+	if (rc < 0) {
+		pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
+		return rc;
+	}
+
+	pr_debug("  target: 0x%x\n", target);
+	xd->target = target;
+
+	/* Give up previous target */
+	if (old_target != XIVE_INVALID_TARGET)
+	    xive_dec_target_count(old_target);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	/*
+	 * Double check it matches what the FW thinks
+	 *
+	 * NOTE: We don't know yet if the PAPR interface will provide
+	 * the LSI vs MSI information apart from the device-tree so
+	 * this check might have to move into an optional backend call
+	 * that is specific to the native backend
+	 */
+	if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
+	    !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
+		pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
+			d->irq, (u32)irqd_to_hwirq(d),
+			(flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
+			(xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
+	}
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static int xive_irq_retrigger(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	/* This should be only for MSIs */
+	if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+		return 0;
+
+	/*
+	 * To perform a retrigger, we first set the PQ bits to
+	 * 11, then perform an EOI.
+	 */
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+	xive_do_source_eoi(xd);
+
+	return 1;
+}
+
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
+static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u8 pq;
+
+	/*
+	 * This is called by KVM with state non-NULL for enabling
+	 * pass-through or NULL for disabling it
+	 */
+	if (state) {
+		irqd_set_forwarded_to_vcpu(d);
+
+		/* Set it to PQ=10 state to prevent further sends */
+		pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+		if (!xd->stale_p) {
+			xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+			xd->stale_p = !xd->saved_p;
+		}
+
+		/* No target ? nothing to do */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			/*
+			 * An untargetted interrupt should have been
+			 * also masked at the source
+			 */
+			WARN_ON(xd->saved_p);
+
+			return 0;
+		}
+
+		/*
+		 * If P was set, adjust state to PQ=11 to indicate
+		 * that a resend is needed for the interrupt to reach
+		 * the guest. Also remember the value of P.
+		 *
+		 * This also tells us that it's in flight to a host queue
+		 * or has already been fetched but hasn't been EOIed yet
+		 * by the host. This it's potentially using up a host
+		 * queue slot. This is important to know because as long
+		 * as this is the case, we must not hard-unmask it when
+		 * "returning" that interrupt to the host.
+		 *
+		 * This saved_p is cleared by the host EOI, when we know
+		 * for sure the queue slot is no longer in use.
+		 */
+		if (xd->saved_p) {
+			xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+
+			/*
+			 * Sync the XIVE source HW to ensure the interrupt
+			 * has gone through the EAS before we change its
+			 * target to the guest. That should guarantee us
+			 * that we *will* eventually get an EOI for it on
+			 * the host. Otherwise there would be a small window
+			 * for P to be seen here but the interrupt going
+			 * to the guest queue.
+			 */
+			if (xive_ops->sync_source)
+				xive_ops->sync_source(hw_irq);
+		}
+	} else {
+		irqd_clr_forwarded_to_vcpu(d);
+
+		/* No host target ? hard mask and return */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			xive_do_source_set_mask(xd, true);
+			return 0;
+		}
+
+		/*
+		 * Sync the XIVE source HW to ensure the interrupt
+		 * has gone through the EAS before we change its
+		 * target to the host.
+		 */
+		if (xive_ops->sync_source)
+			xive_ops->sync_source(hw_irq);
+
+		/*
+		 * By convention we are called with the interrupt in
+		 * a PQ=10 or PQ=11 state, ie, it won't fire and will
+		 * have latched in Q whether there's a pending HW
+		 * interrupt or not.
+		 *
+		 * First reconfigure the target.
+		 */
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(xd->target),
+					     xive_irq_priority, d->irq);
+		if (rc)
+			return rc;
+
+		/*
+		 * Then if saved_p is not set, effectively re-enable the
+		 * interrupt with an EOI. If it is set, we know there is
+		 * still a message in a host queue somewhere that will be
+		 * EOId eventually.
+		 *
+		 * Note: We don't check irqd_irq_disabled(). Effectively,
+		 * we *will* let the irq get through even if masked if the
+		 * HW is still firing it in order to deal with the whole
+		 * saved_p business properly. If the interrupt triggers
+		 * while masked, the generic code will re-mask it anyway.
+		 */
+		if (!xd->saved_p)
+			xive_do_source_eoi(xd);
+
+	}
+	return 0;
+}
+
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+				  enum irqchip_irq_state which, bool *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+	u8 pq;
+
+	switch (which) {
+	case IRQCHIP_STATE_ACTIVE:
+		pq = xive_esb_read(xd, XIVE_ESB_GET);
+
+		/*
+		 * The esb value being all 1's means we couldn't get
+		 * the PQ state of the interrupt through mmio. It may
+		 * happen, for example when querying a PHB interrupt
+		 * while the PHB is in an error state. We consider the
+		 * interrupt to be inactive in that case.
+		 */
+		*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
+			(xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
+			 !irqd_irq_disabled(data)));
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct irq_chip xive_irq_chip = {
+	.name = "XIVE-IRQ",
+	.irq_startup = xive_irq_startup,
+	.irq_shutdown = xive_irq_shutdown,
+	.irq_eoi = xive_irq_eoi,
+	.irq_mask = xive_irq_mask,
+	.irq_unmask = xive_irq_unmask,
+	.irq_set_affinity = xive_irq_set_affinity,
+	.irq_set_type = xive_irq_set_type,
+	.irq_retrigger = xive_irq_retrigger,
+	.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+	.irq_get_irqchip_state = xive_get_irqchip_state,
+};
+
+bool is_xive_irq(struct irq_chip *chip)
+{
+	return chip == &xive_irq_chip;
+}
+EXPORT_SYMBOL_GPL(is_xive_irq);
+
+void xive_cleanup_irq_data(struct xive_irq_data *xd)
+{
+	pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq);
+
+	if (xd->eoi_mmio) {
+		iounmap(xd->eoi_mmio);
+		if (xd->eoi_mmio == xd->trig_mmio)
+			xd->trig_mmio = NULL;
+		xd->eoi_mmio = NULL;
+	}
+	if (xd->trig_mmio) {
+		iounmap(xd->trig_mmio);
+		xd->trig_mmio = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
+
+static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
+{
+	struct xive_irq_data *xd;
+	int rc;
+
+	xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
+	if (!xd)
+		return -ENOMEM;
+	rc = xive_ops->populate_irq_data(hw, xd);
+	if (rc) {
+		kfree(xd);
+		return rc;
+	}
+	xd->target = XIVE_INVALID_TARGET;
+	irq_set_handler_data(virq, xd);
+
+	/*
+	 * Turn OFF by default the interrupt being mapped. A side
+	 * effect of this check is the mapping the ESB page of the
+	 * interrupt in the Linux address space. This prevents page
+	 * fault issues in the crash handler which masks all
+	 * interrupts.
+	 */
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+
+	return 0;
+}
+
+void xive_irq_free_data(unsigned int virq)
+{
+	struct xive_irq_data *xd = irq_get_handler_data(virq);
+
+	if (!xd)
+		return;
+	irq_set_handler_data(virq, NULL);
+	xive_cleanup_irq_data(xd);
+	kfree(xd);
+}
+EXPORT_SYMBOL_GPL(xive_irq_free_data);
+
+#ifdef CONFIG_SMP
+
+static void xive_cause_ipi(int cpu)
+{
+	struct xive_cpu *xc;
+	struct xive_irq_data *xd;
+
+	xc = per_cpu(xive_cpu, cpu);
+
+	DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n",
+		    smp_processor_id(), cpu, xc->hw_ipi);
+
+	xd = &xc->ipi_data;
+	if (WARN_ON(!xd->trig_mmio))
+		return;
+	out_be64(xd->trig_mmio, 0);
+}
+
+static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
+{
+	return smp_ipi_demux();
+}
+
+static void xive_ipi_eoi(struct irq_data *d)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	/* Handle possible race with unplug and drop stale IPIs */
+	if (!xc)
+		return;
+
+	DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+		    d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
+	xive_do_source_eoi(&xc->ipi_data);
+	xive_do_queue_eoi(xc);
+}
+
+static void xive_ipi_do_nothing(struct irq_data *d)
+{
+	/*
+	 * Nothing to do, we never mask/unmask IPIs, but the callback
+	 * has to exist for the struct irq_chip.
+	 */
+}
+
+static struct irq_chip xive_ipi_chip = {
+	.name = "XIVE-IPI",
+	.irq_eoi = xive_ipi_eoi,
+	.irq_mask = xive_ipi_do_nothing,
+	.irq_unmask = xive_ipi_do_nothing,
+};
+
+/*
+ * IPIs are marked per-cpu. We use separate HW interrupts under the
+ * hood but associated with the same "linux" interrupt
+ */
+struct xive_ipi_alloc_info {
+	irq_hw_number_t hwirq;
+};
+
+static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nr_irqs, void *arg)
+{
+	struct xive_ipi_alloc_info *info = arg;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip,
+				    domain->host_data, handle_percpu_irq,
+				    NULL, NULL);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
+	.alloc  = xive_ipi_irq_domain_alloc,
+};
+
+static int __init xive_init_ipis(void)
+{
+	struct fwnode_handle *fwnode;
+	struct irq_domain *ipi_domain;
+	unsigned int node;
+	int ret = -ENOMEM;
+
+	fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI");
+	if (!fwnode)
+		goto out;
+
+	ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids,
+					      &xive_ipi_irq_domain_ops, NULL);
+	if (!ipi_domain)
+		goto out_free_fwnode;
+
+	xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
+	if (!xive_ipis)
+		goto out_free_domain;
+
+	for_each_node(node) {
+		struct xive_ipi_desc *xid = &xive_ipis[node];
+		struct xive_ipi_alloc_info info = { node };
+
+		/*
+		 * Map one IPI interrupt per node for all cpus of that node.
+		 * Since the HW interrupt number doesn't have any meaning,
+		 * simply use the node number.
+		 */
+		ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info);
+		if (ret < 0)
+			goto out_free_xive_ipis;
+		xid->irq = ret;
+
+		snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
+	}
+
+	return ret;
+
+out_free_xive_ipis:
+	kfree(xive_ipis);
+out_free_domain:
+	irq_domain_remove(ipi_domain);
+out_free_fwnode:
+	irq_domain_free_fwnode(fwnode);
+out:
+	return ret;
+}
+
+static int xive_request_ipi(unsigned int cpu)
+{
+	struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
+	int ret;
+
+	if (atomic_inc_return(&xid->started) > 1)
+		return 0;
+
+	ret = request_irq(xid->irq, xive_muxed_ipi_action,
+			  IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD,
+			  xid->name, NULL);
+
+	WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+	return ret;
+}
+
+static int xive_setup_cpu_ipi(unsigned int cpu)
+{
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+	struct xive_cpu *xc;
+	int rc;
+
+	pr_debug("Setting up IPI for CPU %d\n", cpu);
+
+	xc = per_cpu(xive_cpu, cpu);
+
+	/* Check if we are already setup */
+	if (xc->hw_ipi != XIVE_BAD_IRQ)
+		return 0;
+
+	/* Register the IPI */
+	xive_request_ipi(cpu);
+
+	/* Grab an IPI from the backend, this will populate xc->hw_ipi */
+	if (xive_ops->get_ipi(cpu, xc))
+		return -EIO;
+
+	/*
+	 * Populate the IRQ data in the xive_cpu structure and
+	 * configure the HW / enable the IPIs.
+	 */
+	rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
+	if (rc) {
+		pr_err("Failed to populate IPI data on CPU %d\n", cpu);
+		return -EIO;
+	}
+	rc = xive_ops->configure_irq(xc->hw_ipi,
+				     get_hard_smp_processor_id(cpu),
+				     xive_irq_priority, xive_ipi_irq);
+	if (rc) {
+		pr_err("Failed to map IPI CPU %d\n", cpu);
+		return -EIO;
+	}
+	pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu,
+		 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
+
+	/* Unmask it */
+	xive_do_source_set_mask(&xc->ipi_data, false);
+
+	return 0;
+}
+
+noinstr static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+
+	/* Disable the IPI and free the IRQ data */
+
+	/* Already cleaned up ? */
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+
+	/* TODO: clear IPI mapping */
+
+	/* Mask the IPI */
+	xive_do_source_set_mask(&xc->ipi_data, true);
+
+	/*
+	 * Note: We don't call xive_cleanup_irq_data() to free
+	 * the mappings as this is called from an IPI on kexec
+	 * which is not a safe environment to call iounmap()
+	 */
+
+	/* Deconfigure/mask in the backend */
+	xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
+				0xff, xive_ipi_irq);
+
+	/* Free the IPIs in the backend */
+	xive_ops->put_ipi(cpu, xc);
+}
+
+void __init xive_smp_probe(void)
+{
+	smp_ops->cause_ipi = xive_cause_ipi;
+
+	/* Register the IPI */
+	xive_init_ipis();
+
+	/* Allocate and setup IPI for the boot CPU */
+	xive_setup_cpu_ipi(smp_processor_id());
+}
+
+#endif /* CONFIG_SMP */
+
+static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	int rc;
+
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. Will fix that up below if needed.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
+
+	rc = xive_irq_alloc_data(virq, hw);
+	if (rc)
+		return rc;
+
+	irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
+
+	return 0;
+}
+
+static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
+{
+	xive_irq_free_data(virq);
+}
+
+static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	*out_hwirq = intspec[0];
+
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
+				 enum irq_domain_bus_token bus_token)
+{
+	return xive_ops->match(node);
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" };
+
+static const struct {
+	u64  mask;
+	char *name;
+} xive_irq_flags[] = {
+	{ XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" },
+	{ XIVE_IRQ_FLAG_LSI,       "LSI"       },
+	{ XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" },
+	{ XIVE_IRQ_FLAG_NO_EOI,    "NO_EOI"    },
+};
+
+static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d,
+				       struct irq_data *irqd, int ind)
+{
+	struct xive_irq_data *xd;
+	u64 val;
+	int i;
+
+	/* No IRQ domain level information. To be done */
+	if (!irqd)
+		return;
+
+	if (!is_xive_irq(irq_data_get_irq_chip(irqd)))
+		return;
+
+	seq_printf(m, "%*sXIVE:\n", ind, "");
+	ind++;
+
+	xd = irq_data_get_irq_handler_data(irqd);
+	if (!xd) {
+		seq_printf(m, "%*snot assigned\n", ind, "");
+		return;
+	}
+
+	val = xive_esb_read(xd, XIVE_ESB_GET);
+	seq_printf(m, "%*sESB:      %s\n", ind, "", esb_names[val & 0x3]);
+	seq_printf(m, "%*sPstate:   %s %s\n", ind, "", xd->stale_p ? "stale" : "",
+		   xd->saved_p ? "saved" : "");
+	seq_printf(m, "%*sTarget:   %d\n", ind, "", xd->target);
+	seq_printf(m, "%*sChip:     %d\n", ind, "", xd->src_chip);
+	seq_printf(m, "%*sTrigger:  0x%016llx\n", ind, "", xd->trig_page);
+	seq_printf(m, "%*sEOI:      0x%016llx\n", ind, "", xd->eoi_page);
+	seq_printf(m, "%*sFlags:    0x%llx\n", ind, "", xd->flags);
+	for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) {
+		if (xd->flags & xive_irq_flags[i].mask)
+			seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name);
+	}
+}
+#endif
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xive_irq_domain_translate(struct irq_domain *d,
+				     struct irq_fwspec *fwspec,
+				     unsigned long *hwirq,
+				     unsigned int *type)
+{
+	return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode),
+				     fwspec->param, fwspec->param_count,
+				     hwirq, type);
+}
+
+static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* TODO: call xive_irq_domain_map() */
+
+		/*
+		 * Mark interrupts as edge sensitive by default so that resend
+		 * actually works. Will fix that up below if needed.
+		 */
+		irq_clear_status_flags(virq, IRQ_LEVEL);
+
+		/* allocates and sets handler data */
+		rc = xive_irq_alloc_data(virq + i, hwirq + i);
+		if (rc)
+			return rc;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &xive_irq_chip, domain->host_data);
+		irq_set_handler(virq + i, handle_fasteoi_irq);
+	}
+
+	return 0;
+}
+
+static void xive_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		xive_irq_free_data(virq + i);
+}
+#endif
+
+static const struct irq_domain_ops xive_irq_domain_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xive_irq_domain_alloc,
+	.free	= xive_irq_domain_free,
+	.translate = xive_irq_domain_translate,
+#endif
+	.match = xive_irq_domain_match,
+	.map = xive_irq_domain_map,
+	.unmap = xive_irq_domain_unmap,
+	.xlate = xive_irq_domain_xlate,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	.debug_show = xive_irq_domain_debug_show,
+#endif
+};
+
+static void __init xive_init_host(struct device_node *np)
+{
+	xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+	if (WARN_ON(xive_irq_domain == NULL))
+		return;
+	irq_set_default_host(xive_irq_domain);
+}
+
+static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
+{
+	if (xc->queue[xive_irq_priority].qpage)
+		xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
+}
+
+static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
+{
+	int rc = 0;
+
+	/* We setup 1 queues for now with a 64k page */
+	if (!xc->queue[xive_irq_priority].qpage)
+		rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
+
+	return rc;
+}
+
+static int xive_prepare_cpu(unsigned int cpu)
+{
+	struct xive_cpu *xc;
+
+	xc = per_cpu(xive_cpu, cpu);
+	if (!xc) {
+		xc = kzalloc_node(sizeof(struct xive_cpu),
+				  GFP_KERNEL, cpu_to_node(cpu));
+		if (!xc)
+			return -ENOMEM;
+		xc->hw_ipi = XIVE_BAD_IRQ;
+		xc->chip_id = XIVE_INVALID_CHIP_ID;
+		if (xive_ops->prepare_cpu)
+			xive_ops->prepare_cpu(cpu, xc);
+
+		per_cpu(xive_cpu, cpu) = xc;
+	}
+
+	/* Setup EQs if not already */
+	return xive_setup_cpu_queues(cpu, xc);
+}
+
+static void xive_setup_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	/* The backend might have additional things to do */
+	if (xive_ops->setup_cpu)
+		xive_ops->setup_cpu(smp_processor_id(), xc);
+
+	/* Set CPPR to 0xff to enable flow of interrupts */
+	xc->cppr = 0xff;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+}
+
+#ifdef CONFIG_SMP
+void xive_smp_setup_cpu(void)
+{
+	pr_debug("SMP setup CPU %d\n", smp_processor_id());
+
+	/* This will have already been done on the boot CPU */
+	if (smp_processor_id() != boot_cpuid)
+		xive_setup_cpu();
+
+}
+
+int xive_smp_prepare_cpu(unsigned int cpu)
+{
+	int rc;
+
+	/* Allocate per-CPU data and queues */
+	rc = xive_prepare_cpu(cpu);
+	if (rc)
+		return rc;
+
+	/* Allocate and setup IPI for the new CPU */
+	return xive_setup_cpu_ipi(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
+{
+	u32 irq;
+
+	/* We assume local irqs are disabled */
+	WARN_ON(!irqs_disabled());
+
+	/* Check what's already in the CPU queue */
+	while ((irq = xive_scan_interrupts(xc, false)) != 0) {
+		/*
+		 * We need to re-route that interrupt to its new destination.
+		 * First get and lock the descriptor
+		 */
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *d = irq_desc_get_irq_data(desc);
+		struct xive_irq_data *xd;
+
+		/*
+		 * Ignore anything that isn't a XIVE irq and ignore
+		 * IPIs, so can just be dropped.
+		 */
+		if (d->domain != xive_irq_domain)
+			continue;
+
+		/*
+		 * The IRQ should have already been re-routed, it's just a
+		 * stale in the old queue, so re-trigger it in order to make
+		 * it reach is new destination.
+		 */
+#ifdef DEBUG_FLUSH
+		pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
+			cpu, irq);
+#endif
+		raw_spin_lock(&desc->lock);
+		xd = irq_desc_get_handler_data(desc);
+
+		/*
+		 * Clear saved_p to indicate that it's no longer pending
+		 */
+		xd->saved_p = false;
+
+		/*
+		 * For LSIs, we EOI, this will cause a resend if it's
+		 * still asserted. Otherwise do an MSI retrigger.
+		 */
+		if (xd->flags & XIVE_IRQ_FLAG_LSI)
+			xive_do_source_eoi(xd);
+		else
+			xive_irq_retrigger(d);
+
+		raw_spin_unlock(&desc->lock);
+	}
+}
+
+void xive_smp_disable_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Migrate interrupts away from the CPU */
+	irq_migrate_all_off_this_cpu();
+
+	/* Set CPPR to 0 to disable flow of interrupts */
+	xc->cppr = 0;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+	/* Flush everything still in the queue */
+	xive_flush_cpu_queue(cpu, xc);
+
+	/* Re-enable CPPR  */
+	xc->cppr = 0xff;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+}
+
+void xive_flush_interrupt(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Called if an interrupt occurs while the CPU is hot unplugged */
+	xive_flush_cpu_queue(cpu, xc);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#endif /* CONFIG_SMP */
+
+noinstr void xive_teardown_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Set CPPR to 0 to disable flow of interrupts */
+	xc->cppr = 0;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+	if (xive_ops->teardown_cpu)
+		xive_ops->teardown_cpu(cpu, xc);
+
+#ifdef CONFIG_SMP
+	/* Get rid of IPI */
+	xive_cleanup_cpu_ipi(cpu, xc);
+#endif
+
+	/* Disable and free the queues */
+	xive_cleanup_cpu_queues(cpu, xc);
+}
+
+void xive_shutdown(void)
+{
+	xive_ops->shutdown();
+}
+
+bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops,
+			   void __iomem *area, u32 offset, u8 max_prio)
+{
+	xive_tima = area;
+	xive_tima_offset = offset;
+	xive_ops = ops;
+	xive_irq_priority = max_prio;
+
+	ppc_md.get_irq = xive_get_irq;
+	__xive_enabled = true;
+
+	pr_debug("Initializing host..\n");
+	xive_init_host(np);
+
+	pr_debug("Initializing boot CPU..\n");
+
+	/* Allocate per-CPU data and queues */
+	xive_prepare_cpu(smp_processor_id());
+
+	/* Get ready for interrupts */
+	xive_setup_cpu();
+
+	pr_info("Interrupt handling initialized with %s backend\n",
+		xive_ops->name);
+	pr_info("Using priority %d for all interrupts\n", max_prio);
+
+	return true;
+}
+
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
+{
+	unsigned int alloc_order;
+	struct page *pages;
+	__be32 *qpage;
+
+	alloc_order = xive_alloc_order(queue_shift);
+	pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	qpage = (__be32 *)page_address(pages);
+	memset(qpage, 0, 1 << queue_shift);
+
+	return qpage;
+}
+
+static int __init xive_off(char *arg)
+{
+	xive_cmdline_disabled = true;
+	return 1;
+}
+__setup("xive=off", xive_off);
+
+static int __init xive_store_eoi_cmdline(char *arg)
+{
+	if (!arg)
+		return 1;
+
+	if (strncmp(arg, "off", 3) == 0) {
+		pr_info("StoreEOI disabled on kernel command line\n");
+		xive_store_eoi = false;
+	}
+	return 1;
+}
+__setup("xive.store-eoi=", xive_store_eoi_cmdline);
+
+#ifdef CONFIG_DEBUG_FS
+static void xive_debug_show_ipi(struct seq_file *m, int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	seq_printf(m, "CPU %d: ", cpu);
+	if (xc) {
+		seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
+#ifdef CONFIG_SMP
+		{
+			char buffer[128];
+
+			xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+			seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
+		}
+#endif
+	}
+	seq_puts(m, "\n");
+}
+
+static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u32 target;
+	u8 prio;
+	u32 lirq;
+	char buffer[128];
+
+	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+	if (rc) {
+		seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+		return;
+	}
+
+	seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+		   hw_irq, target, prio, lirq);
+
+	xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer));
+	seq_puts(m, buffer);
+	seq_puts(m, "\n");
+}
+
+static int xive_irq_debug_show(struct seq_file *m, void *private)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+
+		if (d)
+			xive_debug_show_irq(m, d);
+	}
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
+
+static int xive_ipi_debug_show(struct seq_file *m, void *private)
+{
+	int cpu;
+
+	if (xive_ops->debug_show)
+		xive_ops->debug_show(m, private);
+
+	for_each_online_cpu(cpu)
+		xive_debug_show_ipi(m, cpu);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
+
+static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio)
+{
+	int i;
+
+	seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle);
+	if (q->qpage) {
+		for (i = 0; i < q->msk + 1; i++) {
+			if (!(i % 8))
+				seq_printf(m, "%05d ", i);
+			seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i),
+				   (i + 1) % 8 ? " " : "\n");
+		}
+	}
+	seq_puts(m, "\n");
+}
+
+static int xive_eq_debug_show(struct seq_file *m, void *private)
+{
+	int cpu = (long)m->private;
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	if (xc)
+		xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority],
+				       xive_irq_priority);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_eq_debug);
+
+static void xive_core_debugfs_create(void)
+{
+	struct dentry *xive_dir;
+	struct dentry *xive_eq_dir;
+	long cpu;
+	char name[16];
+
+	xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
+	if (IS_ERR(xive_dir))
+		return;
+
+	debugfs_create_file("ipis", 0400, xive_dir,
+			    NULL, &xive_ipi_debug_fops);
+	debugfs_create_file("interrupts", 0400, xive_dir,
+			    NULL, &xive_irq_debug_fops);
+	xive_eq_dir = debugfs_create_dir("eqs", xive_dir);
+	for_each_possible_cpu(cpu) {
+		snprintf(name, sizeof(name), "cpu%ld", cpu);
+		debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
+				    &xive_eq_debug_fops);
+	}
+	debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
+
+	if (xive_ops->debug_create)
+		xive_ops->debug_create(xive_dir);
+}
+#else
+static inline void xive_core_debugfs_create(void) { }
+#endif /* CONFIG_DEBUG_FS */
+
+int xive_core_debug_init(void)
+{
+	if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS))
+		xive_core_debugfs_create();
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
new file mode 100644
index 0000000000..f1c0fa6ece
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+#include <linux/kmemleak.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/opal.h>
+#include <asm/kvm_ppc.h>
+
+#include "xive-internal.h"
+
+
+static u32 xive_provision_size;
+static u32 *xive_provision_chips;
+static u32 xive_provision_chip_count;
+static u32 xive_queue_shift;
+static u32 xive_pool_vps = XIVE_INVALID_VP;
+static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
+bool xive_has_save_restore;
+
+int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+	__be64 flags, eoi_page, trig_page;
+	__be32 esb_shift, src_chip;
+	u64 opal_flags;
+	s64 rc;
+
+	memset(data, 0, sizeof(*data));
+
+	rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
+				    &esb_shift, &src_chip);
+	if (rc) {
+		pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
+		       hw_irq, rc);
+		return -EINVAL;
+	}
+
+	opal_flags = be64_to_cpu(flags);
+	if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
+		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2)
+		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (opal_flags & OPAL_XIVE_IRQ_LSI)
+		data->flags |= XIVE_IRQ_FLAG_LSI;
+	data->eoi_page = be64_to_cpu(eoi_page);
+	data->trig_page = be64_to_cpu(trig_page);
+	data->esb_shift = be32_to_cpu(esb_shift);
+	data->src_chip = be32_to_cpu(src_chip);
+
+	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+	if (!data->eoi_mmio) {
+		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+
+	data->hw_irq = hw_irq;
+
+	if (!data->trig_page)
+		return 0;
+	if (data->trig_page == data->eoi_page) {
+		data->trig_mmio = data->eoi_mmio;
+		return 0;
+	}
+
+	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+	if (!data->trig_mmio) {
+		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
+
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	return rc == 0 ? 0 : -ENXIO;
+}
+EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+
+static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+				      u32 *sw_irq)
+{
+	s64 rc;
+	__be64 vp;
+	__be32 lirq;
+
+	rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
+
+	*target = be64_to_cpu(vp);
+	*sw_irq = be32_to_cpu(lirq);
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__)
+
+/* This can be called multiple time to change a queue configuration */
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+				__be32 *qpage, u32 order, bool can_escalate)
+{
+	s64 rc = 0;
+	__be64 qeoi_page_be;
+	__be32 esc_irq_be;
+	u64 flags, qpage_phys;
+
+	/* If there's an actual queue page, clean it */
+	if (order) {
+		if (WARN_ON(!qpage))
+			return -EINVAL;
+		qpage_phys = __pa(qpage);
+	} else
+		qpage_phys = 0;
+
+	/* Initialize the rest of the fields */
+	q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+	q->idx = 0;
+	q->toggle = 0;
+
+	rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
+				      &qeoi_page_be,
+				      &esc_irq_be,
+				      NULL);
+	if (rc) {
+		vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc);
+		rc = -EIO;
+		goto fail;
+	}
+	q->eoi_phys = be64_to_cpu(qeoi_page_be);
+
+	/* Default flags */
+	flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
+
+	/* Escalation needed ? */
+	if (can_escalate) {
+		q->esc_irq = be32_to_cpu(esc_irq_be);
+		flags |= OPAL_XIVE_EQ_ESCALATE;
+	}
+
+	/* Configure and enable the queue in HW */
+	for (;;) {
+		rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc) {
+		vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc);
+		rc = -EIO;
+	} else {
+		/*
+		 * KVM code requires all of the above to be visible before
+		 * q->qpage is set due to how it manages IPI EOIs
+		 */
+		wmb();
+		q->qpage = qpage;
+	}
+fail:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xive_native_configure_queue);
+
+static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+	s64 rc;
+
+	/* Disable the queue in HW */
+	for (;;) {
+		rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc);
+}
+
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+	__xive_native_disable_queue(vp_id, q, prio);
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_queue);
+
+static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	__be32 *qpage;
+
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
+	return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
+					   q, prio, qpage, xive_queue_shift, false);
+}
+
+static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	unsigned int alloc_order;
+
+	/*
+	 * We use the variant with no iounmap as this is called on exec
+	 * from an IPI and iounmap isn't safe
+	 */
+	__xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
+	alloc_order = xive_alloc_order(xive_queue_shift);
+	free_pages((unsigned long)q->qpage, alloc_order);
+	q->qpage = NULL;
+}
+
+static bool xive_native_match(struct device_node *node)
+{
+	return of_device_is_compatible(node, "ibm,opal-xive-vc");
+}
+
+static s64 opal_xive_allocate_irq(u32 chip_id)
+{
+	s64 irq = opal_xive_allocate_irq_raw(chip_id);
+
+	/*
+	 * Old versions of skiboot can incorrectly return 0xffffffff to
+	 * indicate no space, fix it up here.
+	 */
+	return irq == 0xffffffff ? OPAL_RESOURCE : irq;
+}
+
+#ifdef CONFIG_SMP
+static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 irq;
+
+	/* Allocate an IPI and populate info about it */
+	for (;;) {
+		irq = opal_xive_allocate_irq(xc->chip_id);
+		if (irq == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+			continue;
+		}
+		if (irq < 0) {
+			pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+			return -ENXIO;
+		}
+		xc->hw_ipi = irq;
+		break;
+	}
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+u32 xive_native_alloc_irq_on_chip(u32 chip_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_allocate_irq(chip_id);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc < 0)
+		return 0;
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip);
+
+void xive_native_free_irq(u32 irq)
+{
+	for (;;) {
+		s64 rc = opal_xive_free_irq(irq);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+}
+EXPORT_SYMBOL_GPL(xive_native_free_irq);
+
+#ifdef CONFIG_SMP
+static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+
+	/* Free the IPI */
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+	for (;;) {
+		rc = opal_xive_free_irq(xc->hw_ipi);
+		if (rc == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+			continue;
+		}
+		xc->hw_ipi = XIVE_BAD_IRQ;
+		break;
+	}
+}
+#endif /* CONFIG_SMP */
+
+static void xive_native_shutdown(void)
+{
+	/* Switch the XIVE to emulation mode */
+	opal_xive_reset(OPAL_XIVE_MODE_EMU);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread, thus
+ * grabbing the pending active priorities and updating
+ * the CPPR to the most favored one.
+ */
+static void xive_native_update_pending(struct xive_cpu *xc)
+{
+	u8 he, cppr;
+	u16 ack;
+
+	/* Perform the acknowledge hypervisor to register cycle */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/*
+	 * Grab the CPPR and the "HE" field which indicates the source
+	 * of the hypervisor interrupt (if any)
+	 */
+	cppr = ack & 0xff;
+	he = (ack >> 8) >> 6;
+	switch(he) {
+	case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
+		break;
+	case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
+		if (cppr == 0xff)
+			return;
+		/* Mark the priority pending */
+		xc->pending_prio |= 1 << cppr;
+
+		/*
+		 * A new interrupt should never have a CPPR less favored
+		 * than our current one.
+		 */
+		if (cppr >= xc->cppr)
+			pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+			       smp_processor_id(), cppr, xc->cppr);
+
+		/* Update our idea of what the CPPR is */
+		xc->cppr = cppr;
+		break;
+	case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
+	case TM_QW3_NSR_HE_LSI:  /* Legacy FW LSI (unused) */
+		pr_err("CPU %d got unexpected interrupt type HE=%d\n",
+		       smp_processor_id(), he);
+		return;
+	}
+}
+
+static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	xc->chip_id = cpu_to_chip_id(cpu);
+}
+
+static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+	u32 vp;
+	__be64 vp_cam_be;
+	u64 vp_cam;
+
+	if (xive_pool_vps == XIVE_INVALID_VP)
+		return;
+
+	/* Check if pool VP already active, if it is, pull it */
+	if (in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2) & TM_QW2W2_VP)
+		in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
+
+	/* Enable the pool VP */
+	vp = xive_pool_vps + cpu;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc) {
+		pr_err("Failed to enable pool VP on CPU %d\n", cpu);
+		return;
+	}
+
+	/* Grab it's CAM value */
+	rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
+	if (rc) {
+		pr_err("Failed to get pool VP info CPU %d\n", cpu);
+		return;
+	}
+	vp_cam = be64_to_cpu(vp_cam_be);
+
+	/* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
+	out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
+	out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2, TM_QW2W2_VP | vp_cam);
+}
+
+static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+	u32 vp;
+
+	if (xive_pool_vps == XIVE_INVALID_VP)
+		return;
+
+	/* Pull the pool VP from the CPU */
+	in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
+
+	/* Disable it */
+	vp = xive_pool_vps + cpu;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+}
+
+void xive_native_sync_source(u32 hw_irq)
+{
+	opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_source);
+
+void xive_native_sync_queue(u32 hw_irq)
+{
+	opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+
+#ifdef CONFIG_DEBUG_FS
+static int xive_native_debug_create(struct dentry *xive_dir)
+{
+	debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore);
+	return 0;
+}
+#endif
+
+static const struct xive_ops xive_native_ops = {
+	.populate_irq_data	= xive_native_populate_irq_data,
+	.configure_irq		= xive_native_configure_irq,
+	.get_irq_config		= xive_native_get_irq_config,
+	.setup_queue		= xive_native_setup_queue,
+	.cleanup_queue		= xive_native_cleanup_queue,
+	.match			= xive_native_match,
+	.shutdown		= xive_native_shutdown,
+	.update_pending		= xive_native_update_pending,
+	.prepare_cpu		= xive_native_prepare_cpu,
+	.setup_cpu		= xive_native_setup_cpu,
+	.teardown_cpu		= xive_native_teardown_cpu,
+	.sync_source		= xive_native_sync_source,
+#ifdef CONFIG_SMP
+	.get_ipi		= xive_native_get_ipi,
+	.put_ipi		= xive_native_put_ipi,
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_DEBUG_FS
+	.debug_create		= xive_native_debug_create,
+#endif /* CONFIG_DEBUG_FS */
+	.name			= "native",
+};
+
+static bool __init xive_parse_provisioning(struct device_node *np)
+{
+	int rc;
+
+	if (of_property_read_u32(np, "ibm,xive-provision-page-size",
+				 &xive_provision_size) < 0)
+		return true;
+	rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
+	if (rc < 0) {
+		pr_err("Error %d getting provision chips array\n", rc);
+		return false;
+	}
+	xive_provision_chip_count = rc;
+	if (rc == 0)
+		return true;
+
+	xive_provision_chips = kcalloc(4, xive_provision_chip_count,
+				       GFP_KERNEL);
+	if (WARN_ON(!xive_provision_chips))
+		return false;
+
+	rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
+					xive_provision_chips,
+					xive_provision_chip_count);
+	if (rc < 0) {
+		pr_err("Error %d reading provision chips array\n", rc);
+		return false;
+	}
+
+	xive_provision_cache = kmem_cache_create("xive-provision",
+						 xive_provision_size,
+						 xive_provision_size,
+						 0, NULL);
+	if (!xive_provision_cache) {
+		pr_err("Failed to allocate provision cache\n");
+		return false;
+	}
+	return true;
+}
+
+static void __init xive_native_setup_pools(void)
+{
+	/* Allocate a pool big enough */
+	pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids);
+
+	xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
+	if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
+		pr_err("Failed to allocate pool VP, KVM might not function\n");
+
+	pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n",
+		 xive_pool_vps, nr_cpu_ids);
+}
+
+u32 xive_native_default_eq_shift(void)
+{
+	return xive_queue_shift;
+}
+EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
+
+unsigned long xive_tima_os;
+EXPORT_SYMBOL_GPL(xive_tima_os);
+
+bool __init xive_native_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	void __iomem *tima;
+	struct property *prop;
+	u8 max_prio = 7;
+	const __be32 *p;
+	u32 val, cpu;
+	s64 rc;
+
+	if (xive_cmdline_disabled)
+		return false;
+
+	pr_devel("xive_native_init()\n");
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
+	if (!np) {
+		pr_devel("not found !\n");
+		return false;
+	}
+	pr_devel("Found %pOF\n", np);
+
+	/* Resource 1 is HV window */
+	if (of_address_to_resource(np, 1, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+	tima = ioremap(r.start, resource_size(&r));
+	if (!tima) {
+		pr_err("Failed to map thread mgmnt area\n");
+		goto err_put;
+	}
+
+	/* Read number of priorities */
+	if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
+		max_prio = val - 1;
+
+	/* Iterate the EQ sizes and pick one */
+	of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
+		xive_queue_shift = val;
+		if (val == PAGE_SHIFT)
+			break;
+	}
+
+	/* Do we support single escalation */
+	xive_has_single_esc = of_property_read_bool(np, "single-escalation-support");
+
+	xive_has_save_restore = of_property_read_bool(np, "vp-save-restore");
+
+	/* Configure Thread Management areas for KVM */
+	for_each_possible_cpu(cpu)
+		kvmppc_set_xive_tima(cpu, r.start, tima);
+
+	/* Resource 2 is OS window */
+	if (of_address_to_resource(np, 2, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+
+	xive_tima_os = r.start;
+
+	/* Grab size of provisioning pages */
+	xive_parse_provisioning(np);
+
+	/* Switch the XIVE to exploitation mode */
+	rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
+	if (rc) {
+		pr_err("Switch to exploitation mode failed with error %lld\n", rc);
+		goto err_put;
+	}
+
+	/* Setup some dummy HV pool VPs */
+	xive_native_setup_pools();
+
+	/* Initialize XIVE core with our backend */
+	if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS,
+			    max_prio)) {
+		opal_xive_reset(OPAL_XIVE_MODE_EMU);
+		goto err_put;
+	}
+	of_node_put(np);
+	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+	return true;
+
+err_put:
+	of_node_put(np);
+	return false;
+}
+
+static bool xive_native_provision_pages(void)
+{
+	u32 i;
+	void *p;
+
+	for (i = 0; i < xive_provision_chip_count; i++) {
+		u32 chip = xive_provision_chips[i];
+
+		/*
+		 * XXX TODO: Try to make the allocation local to the node where
+		 * the chip resides.
+		 */
+		p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
+		if (!p) {
+			pr_err("Failed to allocate provisioning page\n");
+			return false;
+		}
+		kmemleak_ignore(p);
+		opal_xive_donate_page(chip, __pa(p));
+	}
+	return true;
+}
+
+u32 xive_native_alloc_vp_block(u32 max_vcpus)
+{
+	s64 rc;
+	u32 order;
+
+	order = fls(max_vcpus) - 1;
+	if (max_vcpus > (1 << order))
+		order++;
+
+	pr_debug("VP block alloc, for max VCPUs %d use order %d\n",
+		 max_vcpus, order);
+
+	for (;;) {
+		rc = opal_xive_alloc_vp_block(order);
+		switch (rc) {
+		case OPAL_BUSY:
+			msleep(OPAL_BUSY_DELAY_MS);
+			break;
+		case OPAL_XIVE_PROVISIONING:
+			if (!xive_native_provision_pages())
+				return XIVE_INVALID_VP;
+			break;
+		default:
+			if (rc < 0) {
+				pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
+				       order, rc);
+				return XIVE_INVALID_VP;
+			}
+			return rc;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
+
+void xive_native_free_vp_block(u32 vp_base)
+{
+	s64 rc;
+
+	if (vp_base == XIVE_INVALID_VP)
+		return;
+
+	rc = opal_xive_free_vp_block(vp_base);
+	if (rc < 0)
+		pr_warn("OPAL error %lld freeing VP block\n", rc);
+}
+EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
+
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
+{
+	s64 rc;
+	u64 flags = OPAL_XIVE_VP_ENABLED;
+
+	if (single_escalation)
+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to enable VP : %lld\n", rc);
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_enable_vp);
+
+int xive_native_disable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to disable VP : %lld\n", rc);
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_vp);
+
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
+{
+	__be64 vp_cam_be;
+	__be32 vp_chip_id_be;
+	s64 rc;
+
+	rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
+	if (rc) {
+		vp_err(vp_id, "Failed to get VP info : %lld\n", rc);
+		return -EIO;
+	}
+	*out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
+	*out_chip_id = be32_to_cpu(vp_chip_id_be);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+	return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
+
+bool xive_native_has_save_restore(void)
+{
+	return xive_has_save_restore;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_save_restore);
+
+int xive_native_get_queue_info(u32 vp_id, u32 prio,
+			       u64 *out_qpage,
+			       u64 *out_qsize,
+			       u64 *out_qeoi_page,
+			       u32 *out_escalate_irq,
+			       u64 *out_qflags)
+{
+	__be64 qpage;
+	__be64 qsize;
+	__be64 qeoi_page;
+	__be32 escalate_irq;
+	__be64 qflags;
+	s64 rc;
+
+	rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
+				      &qeoi_page, &escalate_irq, &qflags);
+	if (rc) {
+		vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	if (out_qpage)
+		*out_qpage = be64_to_cpu(qpage);
+	if (out_qsize)
+		*out_qsize = be64_to_cpu(qsize);
+	if (out_qeoi_page)
+		*out_qeoi_page = be64_to_cpu(qeoi_page);
+	if (out_escalate_irq)
+		*out_escalate_irq = be32_to_cpu(escalate_irq);
+	if (out_qflags)
+		*out_qflags = be64_to_cpu(qflags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
+
+int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
+{
+	__be32 opal_qtoggle;
+	__be32 opal_qindex;
+	s64 rc;
+
+	rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
+				       &opal_qindex);
+	if (rc) {
+		vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	if (qtoggle)
+		*qtoggle = be32_to_cpu(opal_qtoggle);
+	if (qindex)
+		*qindex = be32_to_cpu(opal_qindex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
+
+int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
+{
+	s64 rc;
+
+	rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
+	if (rc) {
+		vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+
+bool xive_native_has_queue_state_support(void)
+{
+	return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+		opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+}
+EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+{
+	__be64 state;
+	s64 rc;
+
+	rc = opal_xive_get_vp_state(vp_id, &state);
+	if (rc) {
+		vp_err(vp_id, "failed to get vp state : %lld\n", rc);
+		return -EIO;
+	}
+
+	if (out_state)
+		*out_state = be64_to_cpu(state);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
+
+machine_arch_initcall(powernv, xive_core_debug_init);
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
new file mode 100644
index 0000000000..e454192643
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -0,0 +1,892 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/libfdt.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/hvcall.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+#include "xive-internal.h"
+
+static u32 xive_queue_shift;
+
+struct xive_irq_bitmap {
+	unsigned long		*bitmap;
+	unsigned int		base;
+	unsigned int		count;
+	spinlock_t		lock;
+	struct list_head	list;
+};
+
+static LIST_HEAD(xive_irq_bitmaps);
+
+static int __init xive_irq_bitmap_add(int base, int count)
+{
+	struct xive_irq_bitmap *xibm;
+
+	xibm = kzalloc(sizeof(*xibm), GFP_KERNEL);
+	if (!xibm)
+		return -ENOMEM;
+
+	spin_lock_init(&xibm->lock);
+	xibm->base = base;
+	xibm->count = count;
+	xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL);
+	if (!xibm->bitmap) {
+		kfree(xibm);
+		return -ENOMEM;
+	}
+	list_add(&xibm->list, &xive_irq_bitmaps);
+
+	pr_info("Using IRQ range [%x-%x]", xibm->base,
+		xibm->base + xibm->count - 1);
+	return 0;
+}
+
+static void xive_irq_bitmap_remove_all(void)
+{
+	struct xive_irq_bitmap *xibm, *tmp;
+
+	list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) {
+		list_del(&xibm->list);
+		bitmap_free(xibm->bitmap);
+		kfree(xibm);
+	}
+}
+
+static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+{
+	int irq;
+
+	irq = find_first_zero_bit(xibm->bitmap, xibm->count);
+	if (irq != xibm->count) {
+		set_bit(irq, xibm->bitmap);
+		irq += xibm->base;
+	} else {
+		irq = -ENOMEM;
+	}
+
+	return irq;
+}
+
+static int xive_irq_bitmap_alloc(void)
+{
+	struct xive_irq_bitmap *xibm;
+	unsigned long flags;
+	int irq = -ENOENT;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		spin_lock_irqsave(&xibm->lock, flags);
+		irq = __xive_irq_bitmap_alloc(xibm);
+		spin_unlock_irqrestore(&xibm->lock, flags);
+		if (irq >= 0)
+			break;
+	}
+	return irq;
+}
+
+static void xive_irq_bitmap_free(int irq)
+{
+	unsigned long flags;
+	struct xive_irq_bitmap *xibm;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
+			spin_lock_irqsave(&xibm->lock, flags);
+			clear_bit(irq - xibm->base, xibm->bitmap);
+			spin_unlock_irqrestore(&xibm->lock, flags);
+			break;
+		}
+	}
+}
+
+
+/* Based on the similar routines in RTAS */
+static unsigned int plpar_busy_delay_time(long rc)
+{
+	unsigned int ms = 0;
+
+	if (H_IS_LONG_BUSY(rc)) {
+		ms = get_longbusy_msecs(rc);
+	} else if (rc == H_BUSY) {
+		ms = 10; /* seems appropriate for XIVE hcalls */
+	}
+
+	return ms;
+}
+
+static unsigned int plpar_busy_delay(int rc)
+{
+	unsigned int ms;
+
+	ms = plpar_busy_delay_time(rc);
+	if (ms)
+		mdelay(ms);
+
+	return ms;
+}
+
+/*
+ * Note: this call has a partition wide scope and can take a while to
+ * complete. If it returns H_LONG_BUSY_* it should be retried
+ * periodically.
+ */
+static long plpar_int_reset(unsigned long flags)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_INT_RESET, flags);
+	} while (plpar_busy_delay(rc));
+
+	if (rc)
+		pr_err("H_INT_RESET failed %ld\n", rc);
+
+	return rc;
+}
+
+static long plpar_int_get_source_info(unsigned long flags,
+				      unsigned long lisn,
+				      unsigned long *src_flags,
+				      unsigned long *eoi_page,
+				      unsigned long *trig_page,
+				      unsigned long *esb_shift)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	do {
+		rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc);
+		return rc;
+	}
+
+	*src_flags = retbuf[0];
+	*eoi_page  = retbuf[1];
+	*trig_page = retbuf[2];
+	*esb_shift = retbuf[3];
+
+	pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n",
+		 lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+
+	return 0;
+}
+
+#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
+#define XIVE_SRC_MASK     (1ull << (63 - 63)) /* unused */
+
+static long plpar_int_set_source_config(unsigned long flags,
+					unsigned long lisn,
+					unsigned long target,
+					unsigned long prio,
+					unsigned long sw_irq)
+{
+	long rc;
+
+
+	pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n",
+		 flags, lisn, target, prio, sw_irq);
+
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+					target, prio, sw_irq);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n",
+		       lisn, target, prio, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_get_source_config(unsigned long flags,
+					unsigned long lisn,
+					unsigned long *target,
+					unsigned long *prio,
+					unsigned long *sw_irq)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn);
+
+	do {
+		rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
+				 target, prio, sw_irq);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n",
+		       lisn, rc);
+		return rc;
+	}
+
+	*target = retbuf[0];
+	*prio   = retbuf[1];
+	*sw_irq = retbuf[2];
+
+	pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n",
+		 retbuf[0], retbuf[1], retbuf[2]);
+
+	return 0;
+}
+
+static long plpar_int_get_queue_info(unsigned long flags,
+				     unsigned long target,
+				     unsigned long priority,
+				     unsigned long *esn_page,
+				     unsigned long *esn_size)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	do {
+		rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target,
+				 priority);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
+		       target, priority, rc);
+		return rc;
+	}
+
+	*esn_page = retbuf[0];
+	*esn_size = retbuf[1];
+
+	pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n",
+		 target, priority, retbuf[0], retbuf[1]);
+
+	return 0;
+}
+
+#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
+
+static long plpar_int_set_queue_config(unsigned long flags,
+				       unsigned long target,
+				       unsigned long priority,
+				       unsigned long qpage,
+				       unsigned long qsize)
+{
+	long rc;
+
+	pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n",
+		 flags,  target, priority, qpage, qsize);
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+					priority, qpage, qsize);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n",
+		       target, priority, qpage, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_sync(unsigned long flags, unsigned long lisn)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+#define XIVE_ESB_FLAG_STORE (1ull << (63 - 63))
+
+static long plpar_int_esb(unsigned long flags,
+			  unsigned long lisn,
+			  unsigned long offset,
+			  unsigned long in_data,
+			  unsigned long *out_data)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n",
+		 flags,  lisn, offset, in_data);
+
+	do {
+		rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
+				 in_data);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n",
+		       lisn, offset, rc);
+		return  rc;
+	}
+
+	*out_data = retbuf[0];
+
+	return 0;
+}
+
+static u64 xive_spapr_esb_rw(u32 lisn, u32 offset, u64 data, bool write)
+{
+	unsigned long read_data;
+	long rc;
+
+	rc = plpar_int_esb(write ? XIVE_ESB_FLAG_STORE : 0,
+			   lisn, offset, data, &read_data);
+	if (rc)
+		return -1;
+
+	return write ? 0 : read_data;
+}
+
+#define XIVE_SRC_H_INT_ESB     (1ull << (63 - 60))
+#define XIVE_SRC_LSI           (1ull << (63 - 61))
+#define XIVE_SRC_TRIGGER       (1ull << (63 - 62))
+#define XIVE_SRC_STORE_EOI     (1ull << (63 - 63))
+
+static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+	long rc;
+	unsigned long flags;
+	unsigned long eoi_page;
+	unsigned long trig_page;
+	unsigned long esb_shift;
+
+	memset(data, 0, sizeof(*data));
+
+	rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
+				       &esb_shift);
+	if (rc)
+		return  -EINVAL;
+
+	if (flags & XIVE_SRC_H_INT_ESB)
+		data->flags  |= XIVE_IRQ_FLAG_H_INT_ESB;
+	if (flags & XIVE_SRC_STORE_EOI)
+		data->flags  |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (flags & XIVE_SRC_LSI)
+		data->flags  |= XIVE_IRQ_FLAG_LSI;
+	data->eoi_page  = eoi_page;
+	data->esb_shift = esb_shift;
+	data->trig_page = trig_page;
+
+	data->hw_irq = hw_irq;
+
+	/*
+	 * No chip-id for the sPAPR backend. This has an impact how we
+	 * pick a target. See xive_pick_irq_target().
+	 */
+	data->src_chip = XIVE_INVALID_CHIP_ID;
+
+	/*
+	 * When the H_INT_ESB flag is set, the H_INT_ESB hcall should
+	 * be used for interrupt management. Skip the remapping of the
+	 * ESB pages which are not available.
+	 */
+	if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB)
+		return 0;
+
+	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+	if (!data->eoi_mmio) {
+		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+
+	/* Full function page supports trigger */
+	if (flags & XIVE_SRC_TRIGGER) {
+		data->trig_mmio = data->eoi_mmio;
+		return 0;
+	}
+
+	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+	if (!data->trig_mmio) {
+		iounmap(data->eoi_mmio);
+		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+	long rc;
+
+	rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
+					 prio, sw_irq);
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+static int xive_spapr_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+				     u32 *sw_irq)
+{
+	long rc;
+	unsigned long h_target;
+	unsigned long h_prio;
+	unsigned long h_sw_irq;
+
+	rc = plpar_int_get_source_config(0, hw_irq, &h_target, &h_prio,
+					 &h_sw_irq);
+
+	*target = h_target;
+	*prio = h_prio;
+	*sw_irq = h_sw_irq;
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
+				   __be32 *qpage, u32 order)
+{
+	s64 rc = 0;
+	unsigned long esn_page;
+	unsigned long esn_size;
+	u64 flags, qpage_phys;
+
+	/* If there's an actual queue page, clean it */
+	if (order) {
+		if (WARN_ON(!qpage))
+			return -EINVAL;
+		qpage_phys = __pa(qpage);
+	} else {
+		qpage_phys = 0;
+	}
+
+	/* Initialize the rest of the fields */
+	q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+	q->idx = 0;
+	q->toggle = 0;
+
+	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
+	if (rc) {
+		pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+		       target, prio);
+		rc = -EIO;
+		goto fail;
+	}
+
+	/* TODO: add support for the notification page */
+	q->eoi_phys = esn_page;
+
+	/* Default is to always notify */
+	flags = XIVE_EQ_ALWAYS_NOTIFY;
+
+	/* Configure and enable the queue in HW */
+	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
+	if (rc) {
+		pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+		       target, prio);
+		rc = -EIO;
+	} else {
+		q->qpage = qpage;
+		if (is_secure_guest())
+			uv_share_page(PHYS_PFN(qpage_phys),
+					1 << xive_alloc_order(order));
+	}
+fail:
+	return rc;
+}
+
+static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	__be32 *qpage;
+
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
+	return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+					  q, prio, qpage, xive_queue_shift);
+}
+
+static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	unsigned int alloc_order;
+	long rc;
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
+	if (rc)
+		pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+		       hw_cpu, prio);
+
+	alloc_order = xive_alloc_order(xive_queue_shift);
+	if (is_secure_guest())
+		uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order);
+	free_pages((unsigned long)q->qpage, alloc_order);
+	q->qpage = NULL;
+}
+
+static bool xive_spapr_match(struct device_node *node)
+{
+	/* Ignore cascaded controllers for the moment */
+	return true;
+}
+
+#ifdef CONFIG_SMP
+static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	int irq = xive_irq_bitmap_alloc();
+
+	if (irq < 0) {
+		pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+		return -ENXIO;
+	}
+
+	xc->hw_ipi = irq;
+	return 0;
+}
+
+static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+
+	xive_irq_bitmap_free(xc->hw_ipi);
+	xc->hw_ipi = XIVE_BAD_IRQ;
+}
+#endif /* CONFIG_SMP */
+
+static void xive_spapr_shutdown(void)
+{
+	plpar_int_reset(0);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread. Grab the pending
+ * active priorities and update the CPPR to the most favored one.
+ */
+static void xive_spapr_update_pending(struct xive_cpu *xc)
+{
+	u8 nsr, cppr;
+	u16 ack;
+
+	/*
+	 * Perform the "Acknowledge O/S to Register" cycle.
+	 *
+	 * Let's speedup the access to the TIMA using the raw I/O
+	 * accessor as we don't need the synchronisation routine of
+	 * the higher level ones
+	 */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/*
+	 * Grab the CPPR and the "NSR" field which indicates the source
+	 * of the interrupt (if any)
+	 */
+	cppr = ack & 0xff;
+	nsr = ack >> 8;
+
+	if (nsr & TM_QW1_NSR_EO) {
+		if (cppr == 0xff)
+			return;
+		/* Mark the priority pending */
+		xc->pending_prio |= 1 << cppr;
+
+		/*
+		 * A new interrupt should never have a CPPR less favored
+		 * than our current one.
+		 */
+		if (cppr >= xc->cppr)
+			pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+			       smp_processor_id(), cppr, xc->cppr);
+
+		/* Update our idea of what the CPPR is */
+		xc->cppr = cppr;
+	}
+}
+
+static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Only some debug on the TIMA settings */
+	pr_debug("(HW value: %08x %08x %08x)\n",
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
+}
+
+static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Nothing to do */;
+}
+
+static void xive_spapr_sync_source(u32 hw_irq)
+{
+	/* Specs are unclear on what this is doing */
+	plpar_int_sync(0, hw_irq);
+}
+
+static int xive_spapr_debug_show(struct seq_file *m, void *private)
+{
+	struct xive_irq_bitmap *xibm;
+	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+	if (!buf)
+		return -ENOMEM;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		memset(buf, 0, PAGE_SIZE);
+		bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count);
+		seq_printf(m, "bitmap #%d: %s", xibm->count, buf);
+	}
+	kfree(buf);
+
+	return 0;
+}
+
+static const struct xive_ops xive_spapr_ops = {
+	.populate_irq_data	= xive_spapr_populate_irq_data,
+	.configure_irq		= xive_spapr_configure_irq,
+	.get_irq_config		= xive_spapr_get_irq_config,
+	.setup_queue		= xive_spapr_setup_queue,
+	.cleanup_queue		= xive_spapr_cleanup_queue,
+	.match			= xive_spapr_match,
+	.shutdown		= xive_spapr_shutdown,
+	.update_pending		= xive_spapr_update_pending,
+	.setup_cpu		= xive_spapr_setup_cpu,
+	.teardown_cpu		= xive_spapr_teardown_cpu,
+	.sync_source		= xive_spapr_sync_source,
+	.esb_rw			= xive_spapr_esb_rw,
+#ifdef CONFIG_SMP
+	.get_ipi		= xive_spapr_get_ipi,
+	.put_ipi		= xive_spapr_put_ipi,
+	.debug_show		= xive_spapr_debug_show,
+#endif /* CONFIG_SMP */
+	.name			= "spapr",
+};
+
+/*
+ * get max priority from "/ibm,plat-res-int-priorities"
+ */
+static bool __init xive_get_max_prio(u8 *max_prio)
+{
+	struct device_node *rootdn;
+	const __be32 *reg;
+	u32 len;
+	int prio, found;
+
+	rootdn = of_find_node_by_path("/");
+	if (!rootdn) {
+		pr_err("not root node found !\n");
+		return false;
+	}
+
+	reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
+	of_node_put(rootdn);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	/* HW supports priorities in the range [0-7] and 0xFF is a
+	 * wildcard priority used to mask. We scan the ranges reserved
+	 * by the hypervisor to find the lowest priority we can use.
+	 */
+	found = 0xFF;
+	for (prio = 0; prio < 8; prio++) {
+		int reserved = 0;
+		int i;
+
+		for (i = 0; i < len / (2 * sizeof(u32)); i++) {
+			int base  = be32_to_cpu(reg[2 * i]);
+			int range = be32_to_cpu(reg[2 * i + 1]);
+
+			if (prio >= base && prio < base + range)
+				reserved++;
+		}
+
+		if (!reserved)
+			found = prio;
+	}
+
+	if (found == 0xFF) {
+		pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n");
+		return false;
+	}
+
+	*max_prio = found;
+	return true;
+}
+
+static const u8 *__init get_vec5_feature(unsigned int index)
+{
+	unsigned long root, chosen;
+	int size;
+	const u8 *vec5;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return NULL;
+
+	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+	if (!vec5)
+		return NULL;
+
+	if (size <= index)
+		return NULL;
+
+	return vec5 + index;
+}
+
+static bool __init xive_spapr_disabled(void)
+{
+	const u8 *vec5_xive;
+
+	vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
+	if (vec5_xive) {
+		u8 val;
+
+		val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
+		switch (val) {
+		case OV5_FEAT(OV5_XIVE_EITHER):
+		case OV5_FEAT(OV5_XIVE_LEGACY):
+			break;
+		case OV5_FEAT(OV5_XIVE_EXPLOIT):
+			/* Hypervisor only supports XIVE */
+			if (xive_cmdline_disabled)
+				pr_warn("WARNING: Ignoring cmdline option xive=off\n");
+			return false;
+		default:
+			pr_warn("%s: Unknown xive support option: 0x%x\n",
+				__func__, val);
+			break;
+		}
+	}
+
+	return xive_cmdline_disabled;
+}
+
+bool __init xive_spapr_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	void __iomem *tima;
+	struct property *prop;
+	u8 max_prio;
+	u32 val;
+	u32 len;
+	const __be32 *reg;
+	int i, err;
+
+	if (xive_spapr_disabled())
+		return false;
+
+	pr_devel("%s()\n", __func__);
+	np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
+	if (!np) {
+		pr_devel("not found !\n");
+		return false;
+	}
+	pr_devel("Found %s\n", np->full_name);
+
+	/* Resource 1 is the OS ring TIMA */
+	if (of_address_to_resource(np, 1, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+	tima = ioremap(r.start, resource_size(&r));
+	if (!tima) {
+		pr_err("Failed to map thread mgmnt area\n");
+		goto err_put;
+	}
+
+	if (!xive_get_max_prio(&max_prio))
+		goto err_unmap;
+
+	/* Feed the IRQ number allocator with the ranges given in the DT */
+	reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+		goto err_unmap;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+		goto err_unmap;
+	}
+
+	for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) {
+		err = xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+					  be32_to_cpu(reg[1]));
+		if (err < 0)
+			goto err_mem_free;
+	}
+
+	/* Iterate the EQ sizes and pick one */
+	of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+		xive_queue_shift = val;
+		if (val == PAGE_SHIFT)
+			break;
+	}
+
+	/* Initialize XIVE core with our backend */
+	if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+		goto err_mem_free;
+
+	of_node_put(np);
+	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+	return true;
+
+err_mem_free:
+	xive_irq_bitmap_remove_all();
+err_unmap:
+	iounmap(tima);
+err_put:
+	of_node_put(np);
+	return false;
+}
+
+machine_arch_initcall(pseries, xive_core_debug_init);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
new file mode 100644
index 0000000000..fe6d95d54a
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef __XIVE_INTERNAL_H
+#define __XIVE_INTERNAL_H
+
+/*
+ * A "disabled" interrupt should never fire, to catch problems
+ * we set its logical number to this
+ */
+#define XIVE_BAD_IRQ		0x7fffffff
+#define XIVE_MAX_IRQ		(XIVE_BAD_IRQ - 1)
+
+/* Each CPU carry one of these with various per-CPU state */
+struct xive_cpu {
+#ifdef CONFIG_SMP
+	/* HW irq number and data of IPI */
+	u32 hw_ipi;
+	struct xive_irq_data ipi_data;
+#endif /* CONFIG_SMP */
+
+	int chip_id;
+
+	/* Queue datas. Only one is populated */
+#define XIVE_MAX_QUEUES	8
+	struct xive_q queue[XIVE_MAX_QUEUES];
+
+	/*
+	 * Pending mask. Each bit corresponds to a priority that
+	 * potentially has pending interrupts.
+	 */
+	u8 pending_prio;
+
+	/* Cache of HW CPPR */
+	u8 cppr;
+};
+
+/* Backend ops */
+struct xive_ops {
+	int	(*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
+	int 	(*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+	int	(*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio,
+				  u32 *sw_irq);
+	int	(*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+	void	(*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+	void	(*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	bool	(*match)(struct device_node *np);
+	void	(*shutdown)(void);
+
+	void	(*update_pending)(struct xive_cpu *xc);
+	void	(*sync_source)(u32 hw_irq);
+	u64	(*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write);
+#ifdef CONFIG_SMP
+	int	(*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
+#endif
+	int	(*debug_show)(struct seq_file *m, void *private);
+	int	(*debug_create)(struct dentry *xive_dir);
+	const char *name;
+};
+
+bool xive_core_init(struct device_node *np, const struct xive_ops *ops,
+		    void __iomem *area, u32 offset, u8 max_prio);
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift);
+int xive_core_debug_init(void);
+
+static inline u32 xive_alloc_order(u32 queue_shift)
+{
+	return (queue_shift > PAGE_SHIFT) ? (queue_shift - PAGE_SHIFT) : 0;
+}
+
+extern bool xive_cmdline_disabled;
+extern bool xive_has_save_restore;
+
+#endif /*  __XIVE_INTERNAL_H */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /arch/powerpc/sysdev
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip