8 files changed, 1745 insertions, 0 deletions
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 000000000..86fee428f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_XICS
+       def_bool n
+       select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
+
+config PPC_ICP_NATIVE
+       def_bool n
+
+config PPC_ICP_HV
+       def_bool n
+
+config PPC_ICS_RTAS
+       def_bool n
+
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 000000000..5d438d924
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
+obj-$(CONFIG_PPC_POWERNV)	+= ics-opal.o icp-opal.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 000000000..003deaabb
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+	unsigned int ret = XICS_IRQ_SPURIOUS;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc == H_SUCCESS) {
+		ret = (unsigned int)retbuf[0];
+	} else {
+		pr_err("%s: bad return code xirr cppr=0x%x returned %ld\n",
+			__func__, cppr, rc);
+		WARN_ON_ONCE(1);
+	}
+
+	return ret;
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+		icp_hv_set_cppr(value >> 24);
+	}
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	int hw_cpu = get_hard_smp_processor_id(n_cpu);
+	long rc;
+
+	/* Make sure all previous accesses are ordered before IPI sending */
+	mb();
+	rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
+			"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return 0;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu)
+{
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.cause_ipi	= icp_hv_cause_ipi,
+#endif
+};
+
+int icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 000000000..340de58a1
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+	unsigned int xirr;
+
+	/* Handled an interrupt latched by KVM */
+	xirr = kvmppc_get_xics_latch();
+	if (xirr)
+		return xirr;
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu)
+{
+	kvmppc_set_host_ipi(cpu);
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+	/*
+	 * Currently not used to send IPIs to another CPU
+	 * on the same core. Only caller is KVM real mode.
+	 * Need the physical address of the XICS to be
+	 * previously saved in kvm_hstate in the paca.
+	 */
+	void __iomem *xics_phys;
+
+	/*
+	 * Just like the cause_ipi functions, it is required to
+	 * include a full barrier before causing the IPI.
+	 */
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
+	mb();
+	__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
+}
+#endif
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_native_flush_interrupt(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return;
+	if (vec == XICS_IPI) {
+		/* Clear pending IPI */
+		int cpu = smp_processor_id();
+		kvmppc_clear_host_ipi(cpu);
+		icp_native_set_qirr(cpu, 0xff);
+	} else {
+		pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
+		       vec);
+		xics_mask_unknown_vec(vec);
+	}
+	/* EOI the interrupt */
+	icp_native_set_xirr(xirr);
+}
+
+void xics_wake_cpu(int cpu)
+{
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+EXPORT_SYMBOL_GPL(xics_wake_cpu);
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	icp_native_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+			cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	kvmppc_set_xics_phys(cpu, addr);
+	if (!icp_native_regs[cpu]) {
+		pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+			cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const __be32 *ireg;
+	int i;
+	int reg_tuple_size;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	ireg = of_get_property(np, "reg", &ilen);
+	if (!ireg) {
+		pr_err("icp_native: Can't find interrupt reg property");
+		return -1;
+	}
+
+	reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4;
+	if (((ilen % reg_tuple_size) != 0)
+	    || (num_servers && (num_servers != (ilen / reg_tuple_size)))) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       ilen / reg_tuple_size, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < (ilen / reg_tuple_size); i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, resource_size(&r)))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.cause_ipi	= icp_native_cause_ipi,
+#endif
+};
+
+int __init icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
new file mode 100644
index 000000000..672d8aeda
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2016 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/kvm_ppc.h>
+
+static void icp_opal_teardown_cpu(void)
+{
+	int hw_cpu = hard_smp_processor_id();
+
+	/* Clear any pending IPI */
+	opal_int_set_mfrr(hw_cpu, 0xff);
+}
+
+static void icp_opal_flush_ipi(void)
+{
+	/*
+	 * We take the ipi irq but and never return so we need to EOI the IPI,
+	 * but want to leave our priority 0.
+	 *
+	 * Should we check all the other interrupts too?
+	 * Should we be flagging idle loop instead?
+	 * Or creating some task to be scheduled?
+	 */
+	if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0)
+		force_external_irq_replay();
+}
+
+static unsigned int icp_opal_get_xirr(void)
+{
+	unsigned int kvm_xirr;
+	__be32 hw_xirr;
+	int64_t rc;
+
+	/* Handle an interrupt latched by KVM first */
+	kvm_xirr = kvmppc_get_xics_latch();
+	if (kvm_xirr)
+		return kvm_xirr;
+
+	/* Then ask OPAL */
+	rc = opal_int_get_xirr(&hw_xirr, false);
+	if (rc < 0)
+		return 0;
+	return be32_to_cpu(hw_xirr);
+}
+
+static unsigned int icp_opal_get_irq(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+	unsigned int irq;
+
+	xirr = icp_opal_get_xirr();
+	vec = xirr & 0x00ffffff;
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	if (opal_int_eoi(xirr) > 0)
+		force_external_irq_replay();
+
+	return 0;
+}
+
+static void icp_opal_set_cpu_priority(unsigned char cppr)
+{
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
+	xics_set_base_cppr(cppr);
+	opal_int_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_opal_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+
+	iosync();
+	rc = opal_int_eoi((xics_pop_cppr() << 24) | hw_irq);
+
+	/*
+	 * EOI tells us whether there are more interrupts to fetch.
+	 *
+	 * Some HW implementations might not be able to send us another
+	 * external interrupt in that case, so we force a replay.
+	 */
+	if (rc > 0)
+		force_external_irq_replay();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_opal_cause_ipi(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+
+	return smp_ipi_demux();
+}
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_opal_flush_interrupt(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+
+	do {
+		xirr = icp_opal_get_xirr();
+		vec = xirr & 0x00ffffff;
+		if (vec == XICS_IRQ_SPURIOUS)
+			break;
+		if (vec == XICS_IPI) {
+			/* Clear pending IPI */
+			int cpu = smp_processor_id();
+			kvmppc_clear_host_ipi(cpu);
+			opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+		} else {
+			pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+			       "disabling\n", vec);
+			xics_mask_unknown_vec(vec);
+		}
+
+		/* EOI the interrupt */
+	} while (opal_int_eoi(xirr) > 0);
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_opal_ops = {
+	.get_irq	= icp_opal_get_irq,
+	.eoi		= icp_opal_eoi,
+	.set_priority	= icp_opal_set_cpu_priority,
+	.teardown_cpu	= icp_opal_teardown_cpu,
+	.flush_ipi	= icp_opal_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_opal_ipi_action,
+	.cause_ipi	= icp_opal_cause_ipi,
+#endif
+};
+
+int icp_opal_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_opal_ops;
+
+	printk("XICS: Using OPAL ICP fallbacks\n");
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
new file mode 100644
index 000000000..f85f916ba
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -0,0 +1,245 @@
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+static int ics_opal_mangle_server(int server)
+{
+	/* No link for now */
+	return server << 2;
+}
+
+static int ics_opal_unmangle_server(int server)
+{
+	/* No link for now */
+	return server >> 2;
+}
+
+static void ics_opal_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+	int server;
+
+	pr_devel("ics-hal: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+	server = ics_opal_mangle_server(server);
+
+	rc = opal_set_xive(hw_irq, server, DEFAULT_PRIORITY);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+}
+
+static unsigned int ics_opal_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (irq_data_get_msi_desc(d))
+		pci_msi_unmask_irq(d);
+#endif
+
+	/* unmask it */
+	ics_opal_unmask_irq(d);
+	return 0;
+}
+
+static void ics_opal_mask_real_irq(unsigned int hw_irq)
+{
+	int server = ics_opal_mangle_server(xics_default_server);
+	int64_t rc;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	rc = opal_set_xive(hw_irq, server, 0xff);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(0xff) irq=%u returned %lld\n",
+		       __func__, hw_irq, rc);
+}
+
+static void ics_opal_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("ics-hal: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_opal_mask_real_irq(hw_irq);
+}
+
+static int ics_opal_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	__be16 oserver;
+	int16_t server;
+	int8_t priority;
+	int64_t rc;
+	int wanted_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	rc = opal_get_xive(hw_irq, &oserver, &priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_get_xive(irq=%d [hw 0x%x]) error %lld\n",
+		       __func__, d->irq, hw_irq, rc);
+		return -1;
+	}
+	server = be16_to_cpu(oserver);
+
+	wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (wanted_server < 0) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+	server = ics_opal_mangle_server(wanted_server);
+
+	pr_devel("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
+		 d->irq, hw_irq, wanted_server, server);
+
+	rc = opal_set_xive(hw_irq, server, priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+		return -1;
+	}
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_opal_irq_chip = {
+	.name = "OPAL ICS",
+	.irq_startup = ics_opal_startup,
+	.irq_mask = ics_opal_mask_irq,
+	.irq_unmask = ics_opal_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_opal_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_opal_map(struct ics *ics, unsigned int virq);
+static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_opal_get_server(struct ics *ics, unsigned long vec);
+
+static int ics_opal_host_match(struct ics *ics, struct device_node *node)
+{
+	return 1;
+}
+
+/* Only one global & state struct ics */
+static struct ics ics_hal = {
+	.map		= ics_opal_map,
+	.mask_unknown	= ics_opal_mask_unknown,
+	.get_server	= ics_opal_get_server,
+	.host_match	= ics_opal_host_match,
+};
+
+static int ics_opal_map(struct ics *ics, unsigned int virq)
+{
+	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(hw_irq, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -ENXIO;
+
+	irq_set_chip_and_handler(virq, &ics_opal_irq_chip, handle_fasteoi_irq);
+	irq_set_chip_data(virq, &ics_hal);
+
+	return 0;
+}
+
+static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return;
+
+	ics_opal_mask_real_irq(vec);
+}
+
+static long ics_opal_get_server(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -1;
+	return ics_opal_unmangle_server(be16_to_cpu(server));
+}
+
+int __init ics_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_opal_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_hal);
+
+	pr_info("ICS OPAL backend registered\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 000000000..6aabc7468
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq);
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.map		= ics_rtas_map,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+};
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (irq_data_get_msi_desc(d))
+		pci_msi_unmask_irq(d);
+#endif
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq)
+{
+	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
+	irq_set_chip_data(virq, &ics_rtas);
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+__init int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_token("ibm,get-xive");
+	ibm_set_xive = rtas_token("ibm,set-xive");
+	ibm_int_on  = rtas_token("ibm,int-on");
+	ibm_int_off = rtas_token("ibm,int-off");
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 000000000..f87470319
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_domain *xics_host;
+
+static LIST_HEAD(ics_list);
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const __be32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (be32_to_cpu(ireg[j]) == hcpuid) {
+			xics_default_distrib_server = be32_to_cpu(ireg[j+1]);
+			break;
+		}
+	}
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	struct ics *ics;
+
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+	list_for_each_entry(ics, &ics_list, link)
+		ics->mask_unknown(ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(!ipi);
+
+	/*
+	 * IPIs are marked IRQF_PERCPU. The handler was set in map.
+	 */
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+}
+
+void __init xics_smp_probe(void)
+{
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	/* Setup cause_ipi callback based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
+}
+
+#endif /* CONFIG_SMP */
+
+void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+	struct irq_desc *desc;
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	for_each_irq_desc(virq, desc) {
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct ics *ics;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NUM_ISA_INTERRUPTS)
+			continue;
+		/* We only need to migrate enabled IRQS */
+		if (!desc->action)
+			continue;
+		if (desc->irq_data.domain != xics_host)
+			continue;
+		irq = desc->irq_data.hwirq;
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = -1;
+		ics = irq_desc_get_chip_data(desc);
+		if (ics)
+			server = ics->get_server(ics, irq);
+		if (server < 0) {
+			printk(KERN_ERR "%s: Can't find server for irq %d\n",
+			       __func__, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warn("IRQ %u affinity broken off cpu %u\n",
+				virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	struct ics *ics;
+
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->host_match(ics, node))
+			return 1;
+
+	return 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ics *ics;
+
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. The device-tree parsing will turn the LSIs
+	 * back to level.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hw == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_percpu_irq);
+		return 0;
+	}
+
+	/* Let the ICS setup the chip data */
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->map(ics, virq) == 0)
+			return 0;
+
+	return -EINVAL;
+}
+
+static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	*out_hwirq = intspec[0];
+
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+	/*
+	 * We need to push a dummy CPPR when retriggering, since the subsequent
+	 * EOI will try to pop it. Passing 0 works, as the function hard codes
+	 * the priority value anyway.
+	 */
+	xics_push_cppr(0);
+
+	/* Tell the core to do a soft retrigger */
+	return 0;
+}
+
+static const struct irq_domain_ops xics_host_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+	xics_host = irq_domain_add_tree(NULL, &xics_host_ops, NULL);
+	BUG_ON(xics_host == NULL);
+	irq_set_default_host(xics_host);
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	list_add(&ics->link, &ics_list);
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const __be32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (isize)
+		xics_interrupt_server_size = be32_to_cpu(*isize);
+
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+	if (rc < 0) {
+		rc = icp_native_init();
+		if (rc == -ENODEV)
+		    rc = icp_opal_init();
+	}
+	if (rc < 0) {
+		pr_warn("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+	rc = ics_rtas_init();
+	if (rc < 0)
+		rc = ics_opal_init();
+	if (rc < 0)
+		pr_warn("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	xics_init_host();
+	xics_setup_cpu();
+}